2 An implementation of Text I/O as defined by PEP 3116 - "New I/O"
4 Classes defined here: TextIOBase, IncrementalNewlineDecoder, TextIOWrapper.
6 Written by Amaury Forgeot d'Arc and Antoine Pitrou
9 #define PY_SSIZE_T_CLEAN
11 #include "structmember.h"
12 #include "_iomodule.h"
16 PyDoc_STRVAR(textiobase_doc
,
17 "Base class for text I/O.\n"
19 "This class provides a character and line based interface to stream\n"
20 "I/O. There is no readinto method because Python's character strings\n"
21 "are immutable. There is no public constructor.\n"
25 _unsupported(const char *message
)
27 PyErr_SetString(_PyIO_unsupported_operation
, message
);
31 PyDoc_STRVAR(textiobase_detach_doc
,
32 "Separate the underlying buffer from the TextIOBase and return it.\n"
34 "After the underlying buffer has been detached, the TextIO is in an\n"
39 textiobase_detach(PyObject
*self
)
41 return _unsupported("detach");
44 PyDoc_STRVAR(textiobase_read_doc
,
45 "Read at most n characters from stream.\n"
47 "Read from underlying buffer until we have n characters or we hit EOF.\n"
48 "If n is negative or omitted, read until EOF.\n"
52 textiobase_read(PyObject
*self
, PyObject
*args
)
54 return _unsupported("read");
57 PyDoc_STRVAR(textiobase_readline_doc
,
58 "Read until newline or EOF.\n"
60 "Returns an empty string if EOF is hit immediately.\n"
64 textiobase_readline(PyObject
*self
, PyObject
*args
)
66 return _unsupported("readline");
69 PyDoc_STRVAR(textiobase_write_doc
,
70 "Write string to stream.\n"
71 "Returns the number of characters written (which is always equal to\n"
72 "the length of the string).\n"
76 textiobase_write(PyObject
*self
, PyObject
*args
)
78 return _unsupported("write");
81 PyDoc_STRVAR(textiobase_encoding_doc
,
82 "Encoding of the text stream.\n"
84 "Subclasses should override.\n"
88 textiobase_encoding_get(PyObject
*self
, void *context
)
93 PyDoc_STRVAR(textiobase_newlines_doc
,
94 "Line endings translated so far.\n"
96 "Only line endings translated during reading are considered.\n"
98 "Subclasses should override.\n"
102 textiobase_newlines_get(PyObject
*self
, void *context
)
107 PyDoc_STRVAR(textiobase_errors_doc
,
108 "The error setting of the decoder or encoder.\n"
110 "Subclasses should override.\n"
114 textiobase_errors_get(PyObject
*self
, void *context
)
120 static PyMethodDef textiobase_methods
[] = {
121 {"detach", (PyCFunction
)textiobase_detach
, METH_NOARGS
, textiobase_detach_doc
},
122 {"read", textiobase_read
, METH_VARARGS
, textiobase_read_doc
},
123 {"readline", textiobase_readline
, METH_VARARGS
, textiobase_readline_doc
},
124 {"write", textiobase_write
, METH_VARARGS
, textiobase_write_doc
},
128 static PyGetSetDef textiobase_getset
[] = {
129 {"encoding", (getter
)textiobase_encoding_get
, NULL
, textiobase_encoding_doc
},
130 {"newlines", (getter
)textiobase_newlines_get
, NULL
, textiobase_newlines_doc
},
131 {"errors", (getter
)textiobase_errors_get
, NULL
, textiobase_errors_doc
},
135 PyTypeObject PyTextIOBase_Type
= {
136 PyVarObject_HEAD_INIT(NULL
, 0)
137 "_io._TextIOBase", /*tp_name*/
147 0, /*tp_as_sequence*/
155 Py_TPFLAGS_DEFAULT
| Py_TPFLAGS_BASETYPE
, /*tp_flags*/
156 textiobase_doc
, /* tp_doc */
159 0, /* tp_richcompare */
160 0, /* tp_weaklistoffset */
163 textiobase_methods
, /* tp_methods */
165 textiobase_getset
, /* tp_getset */
166 &PyIOBase_Type
, /* tp_base */
168 0, /* tp_descr_get */
169 0, /* tp_descr_set */
170 0, /* tp_dictoffset */
177 /* IncrementalNewlineDecoder */
179 PyDoc_STRVAR(incrementalnewlinedecoder_doc
,
180 "Codec used when reading a file in universal newlines mode. It wraps\n"
181 "another incremental decoder, translating \\r\\n and \\r into \\n. It also\n"
182 "records the types of newlines encountered. When used with\n"
183 "translate=False, it ensures that the newline sequence is returned in\n"
184 "one piece. When used with decoder=None, it expects unicode strings as\n"
185 "decode input and translates newlines without first invoking an external\n"
193 signed int pendingcr
: 1;
194 signed int translate
: 1;
195 unsigned int seennl
: 3;
199 incrementalnewlinedecoder_init(nldecoder_object
*self
,
200 PyObject
*args
, PyObject
*kwds
)
204 PyObject
*errors
= NULL
;
205 char *kwlist
[] = {"decoder", "translate", "errors", NULL
};
207 if (!PyArg_ParseTupleAndKeywords(args
, kwds
, "Oi|O:IncrementalNewlineDecoder",
208 kwlist
, &decoder
, &translate
, &errors
))
211 self
->decoder
= decoder
;
214 if (errors
== NULL
) {
215 self
->errors
= PyUnicode_FromString("strict");
216 if (self
->errors
== NULL
)
221 self
->errors
= errors
;
224 self
->translate
= translate
;
232 incrementalnewlinedecoder_dealloc(nldecoder_object
*self
)
234 Py_CLEAR(self
->decoder
);
235 Py_CLEAR(self
->errors
);
236 Py_TYPE(self
)->tp_free((PyObject
*)self
);
240 check_decoded(PyObject
*decoded
)
244 if (!PyUnicode_Check(decoded
)) {
245 PyErr_Format(PyExc_TypeError
,
246 "decoder should return a string result, not '%.200s'",
247 Py_TYPE(decoded
)->tp_name
);
257 #define SEEN_ALL (SEEN_CR | SEEN_LF | SEEN_CRLF)
260 _PyIncrementalNewlineDecoder_decode(PyObject
*_self
,
261 PyObject
*input
, int final
)
264 Py_ssize_t output_len
;
265 nldecoder_object
*self
= (nldecoder_object
*) _self
;
267 if (self
->decoder
== NULL
) {
268 PyErr_SetString(PyExc_ValueError
,
269 "IncrementalNewlineDecoder.__init__ not called");
273 /* decode input (with the eventual \r from a previous pass) */
274 if (self
->decoder
!= Py_None
) {
275 output
= PyObject_CallMethodObjArgs(self
->decoder
,
276 _PyIO_str_decode
, input
, final
? Py_True
: Py_False
, NULL
);
283 if (check_decoded(output
) < 0)
286 output_len
= PyUnicode_GET_SIZE(output
);
287 if (self
->pendingcr
&& (final
|| output_len
> 0)) {
289 PyObject
*modified
= PyUnicode_FromUnicode(NULL
, output_len
+ 1);
290 if (modified
== NULL
)
292 out
= PyUnicode_AS_UNICODE(modified
);
294 memcpy(out
+ 1, PyUnicode_AS_UNICODE(output
),
295 output_len
* sizeof(Py_UNICODE
));
302 /* retain last \r even when not translating data:
303 * then readline() is sure to get \r\n in one pass
307 && PyUnicode_AS_UNICODE(output
)[output_len
- 1] == '\r') {
309 if (Py_REFCNT(output
) == 1) {
310 if (PyUnicode_Resize(&output
, output_len
- 1) < 0)
314 PyObject
*modified
= PyUnicode_FromUnicode(
315 PyUnicode_AS_UNICODE(output
),
317 if (modified
== NULL
)
326 /* Record which newlines are read and do newline translation if desired,
331 int seennl
= self
->seennl
;
334 in_str
= PyUnicode_AS_UNICODE(output
);
335 len
= PyUnicode_GET_SIZE(output
);
340 /* If, up to now, newlines are consistently \n, do a quick check
341 for the \r *byte* with the libc's optimized memchr.
343 if (seennl
== SEEN_LF
|| seennl
== 0) {
344 only_lf
= (memchr(in_str
, '\r', len
* sizeof(Py_UNICODE
)) == NULL
);
348 /* If not already seen, quick scan for a possible "\n" character.
349 (there's nothing else to be done, even when in translation mode)
352 memchr(in_str
, '\n', len
* sizeof(Py_UNICODE
)) != NULL
) {
358 /* Fast loop for non-control characters */
370 /* Finished: we have scanned for newlines, and none of them
373 else if (!self
->translate
) {
375 /* We have already seen all newline types, no need to scan again */
376 if (seennl
== SEEN_ALL
)
382 /* Fast loop for non-control characters */
388 else if (c
== '\r') {
398 if (seennl
== SEEN_ALL
)
405 PyObject
*translated
= NULL
;
407 Py_UNICODE
*in
, *out
, *end
;
408 if (Py_REFCNT(output
) != 1) {
409 /* We could try to optimize this so that we only do a copy
410 when there is something to translate. On the other hand,
411 most decoders should only output non-shared strings, i.e.
412 translation is done in place. */
413 translated
= PyUnicode_FromUnicode(NULL
, len
);
414 if (translated
== NULL
)
416 assert(Py_REFCNT(translated
) == 1);
417 memcpy(PyUnicode_AS_UNICODE(translated
),
418 PyUnicode_AS_UNICODE(output
),
419 len
* sizeof(Py_UNICODE
));
424 out_str
= PyUnicode_AS_UNICODE(translated
);
430 /* Fast loop for non-control characters */
431 while ((c
= *in
++) > '\r')
452 if (translated
!= output
) {
456 if (out
- out_str
!= len
) {
457 if (PyUnicode_Resize(&output
, out
- out_str
) < 0)
461 self
->seennl
|= seennl
;
472 incrementalnewlinedecoder_decode(nldecoder_object
*self
,
473 PyObject
*args
, PyObject
*kwds
)
475 char *kwlist
[] = {"input", "final", NULL
};
479 if (!PyArg_ParseTupleAndKeywords(args
, kwds
, "O|i:IncrementalNewlineDecoder",
480 kwlist
, &input
, &final
))
482 return _PyIncrementalNewlineDecoder_decode((PyObject
*) self
, input
, final
);
486 incrementalnewlinedecoder_getstate(nldecoder_object
*self
, PyObject
*args
)
489 unsigned PY_LONG_LONG flag
;
491 if (self
->decoder
!= Py_None
) {
492 PyObject
*state
= PyObject_CallMethodObjArgs(self
->decoder
,
493 _PyIO_str_getstate
, NULL
);
496 if (!PyArg_Parse(state
, "(OK)", &buffer
, &flag
)) {
504 buffer
= PyBytes_FromString("");
510 return Py_BuildValue("NK", buffer
, flag
);
514 incrementalnewlinedecoder_setstate(nldecoder_object
*self
, PyObject
*state
)
517 unsigned PY_LONG_LONG flag
;
519 if (!PyArg_Parse(state
, "(OK)", &buffer
, &flag
))
522 self
->pendingcr
= (int) flag
& 1;
525 if (self
->decoder
!= Py_None
)
526 return PyObject_CallMethod(self
->decoder
,
527 "setstate", "((OK))", buffer
, flag
);
533 incrementalnewlinedecoder_reset(nldecoder_object
*self
, PyObject
*args
)
537 if (self
->decoder
!= Py_None
)
538 return PyObject_CallMethodObjArgs(self
->decoder
, _PyIO_str_reset
, NULL
);
544 incrementalnewlinedecoder_newlines_get(nldecoder_object
*self
, void *context
)
546 switch (self
->seennl
) {
548 return PyUnicode_FromString("\r");
550 return PyUnicode_FromString("\n");
552 return PyUnicode_FromString("\r\n");
553 case SEEN_CR
| SEEN_LF
:
554 return Py_BuildValue("ss", "\r", "\n");
555 case SEEN_CR
| SEEN_CRLF
:
556 return Py_BuildValue("ss", "\r", "\r\n");
557 case SEEN_LF
| SEEN_CRLF
:
558 return Py_BuildValue("ss", "\n", "\r\n");
559 case SEEN_CR
| SEEN_LF
| SEEN_CRLF
:
560 return Py_BuildValue("sss", "\r", "\n", "\r\n");
568 static PyMethodDef incrementalnewlinedecoder_methods
[] = {
569 {"decode", (PyCFunction
)incrementalnewlinedecoder_decode
, METH_VARARGS
|METH_KEYWORDS
},
570 {"getstate", (PyCFunction
)incrementalnewlinedecoder_getstate
, METH_NOARGS
},
571 {"setstate", (PyCFunction
)incrementalnewlinedecoder_setstate
, METH_O
},
572 {"reset", (PyCFunction
)incrementalnewlinedecoder_reset
, METH_NOARGS
},
576 static PyGetSetDef incrementalnewlinedecoder_getset
[] = {
577 {"newlines", (getter
)incrementalnewlinedecoder_newlines_get
, NULL
, NULL
},
581 PyTypeObject PyIncrementalNewlineDecoder_Type
= {
582 PyVarObject_HEAD_INIT(NULL
, 0)
583 "_io.IncrementalNewlineDecoder", /*tp_name*/
584 sizeof(nldecoder_object
), /*tp_basicsize*/
586 (destructor
)incrementalnewlinedecoder_dealloc
, /*tp_dealloc*/
593 0, /*tp_as_sequence*/
601 Py_TPFLAGS_DEFAULT
| Py_TPFLAGS_BASETYPE
, /*tp_flags*/
602 incrementalnewlinedecoder_doc
, /* tp_doc */
605 0, /* tp_richcompare */
606 0, /*tp_weaklistoffset*/
609 incrementalnewlinedecoder_methods
, /* tp_methods */
611 incrementalnewlinedecoder_getset
, /* tp_getset */
614 0, /* tp_descr_get */
615 0, /* tp_descr_set */
616 0, /* tp_dictoffset */
617 (initproc
)incrementalnewlinedecoder_init
, /* tp_init */
619 PyType_GenericNew
, /* tp_new */
625 PyDoc_STRVAR(textiowrapper_doc
,
626 "Character and line based layer over a BufferedIOBase object, buffer.\n"
628 "encoding gives the name of the encoding that the stream will be\n"
629 "decoded or encoded with. It defaults to locale.getpreferredencoding.\n"
631 "errors determines the strictness of encoding and decoding (see the\n"
632 "codecs.register) and defaults to \"strict\".\n"
634 "newline controls how line endings are handled. It can be None, '',\n"
635 "'\\n', '\\r', and '\\r\\n'. It works as follows:\n"
637 "* On input, if newline is None, universal newlines mode is\n"
638 " enabled. Lines in the input can end in '\\n', '\\r', or '\\r\\n', and\n"
639 " these are translated into '\\n' before being returned to the\n"
640 " caller. If it is '', universal newline mode is enabled, but line\n"
641 " endings are returned to the caller untranslated. If it has any of\n"
642 " the other legal values, input lines are only terminated by the given\n"
643 " string, and the line ending is returned to the caller untranslated.\n"
645 "* On output, if newline is None, any '\\n' characters written are\n"
646 " translated to the system default line separator, os.linesep. If\n"
647 " newline is '', no translation takes place. If newline is any of the\n"
648 " other legal values, any '\\n' characters written are translated to\n"
649 " the given string.\n"
651 "If line_buffering is True, a call to flush is implied when a call to\n"
652 "write contains a newline character."
656 (*encodefunc_t
)(PyObject
*, PyObject
*);
661 int ok
; /* initialized? */
663 Py_ssize_t chunk_size
;
670 const char *writenl
; /* utf-8 encoded, NULL stands for \n */
677 /* Specialized encoding func (see below) */
678 encodefunc_t encodefunc
;
679 /* Whether or not it's the start of the stream */
680 char encoding_start_of_stream
;
682 /* Reads and writes are internally buffered in order to speed things up.
683 However, any read will first flush the write buffer if itsn't empty.
685 Please also note that text to be written is first encoded before being
686 buffered. This is necessary so that encoding errors are immediately
687 reported to the caller, but it unfortunately means that the
688 IncrementalEncoder (whose encode() method is always written in Python)
689 becomes a bottleneck for small writes.
691 PyObject
*decoded_chars
; /* buffer for text returned from decoder */
692 Py_ssize_t decoded_chars_used
; /* offset into _decoded_chars for read() */
693 PyObject
*pending_bytes
; /* list of bytes objects waiting to be
695 Py_ssize_t pending_bytes_count
;
697 /* snapshot is either None, or a tuple (dec_flags, next_input) where
698 * dec_flags is the second (integer) item of the decoder state and
699 * next_input is the chunk of input bytes that comes next after the
700 * snapshot point. We use this to reconstruct decoder states in tell().
703 /* Cache raw object if it's a FileIO object */
706 PyObject
*weakreflist
;
711 /* A couple of specialized cases in order to bypass the slow incremental
712 encoding methods for the most popular encodings. */
715 ascii_encode(textio
*self
, PyObject
*text
)
717 return PyUnicode_EncodeASCII(PyUnicode_AS_UNICODE(text
),
718 PyUnicode_GET_SIZE(text
),
719 PyBytes_AS_STRING(self
->errors
));
723 utf16be_encode(textio
*self
, PyObject
*text
)
725 return PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(text
),
726 PyUnicode_GET_SIZE(text
),
727 PyBytes_AS_STRING(self
->errors
), 1);
731 utf16le_encode(textio
*self
, PyObject
*text
)
733 return PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(text
),
734 PyUnicode_GET_SIZE(text
),
735 PyBytes_AS_STRING(self
->errors
), -1);
739 utf16_encode(textio
*self
, PyObject
*text
)
741 if (!self
->encoding_start_of_stream
) {
742 /* Skip the BOM and use native byte ordering */
743 #if defined(WORDS_BIGENDIAN)
744 return utf16be_encode(self
, text
);
746 return utf16le_encode(self
, text
);
749 return PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(text
),
750 PyUnicode_GET_SIZE(text
),
751 PyBytes_AS_STRING(self
->errors
), 0);
755 utf32be_encode(textio
*self
, PyObject
*text
)
757 return PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(text
),
758 PyUnicode_GET_SIZE(text
),
759 PyBytes_AS_STRING(self
->errors
), 1);
763 utf32le_encode(textio
*self
, PyObject
*text
)
765 return PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(text
),
766 PyUnicode_GET_SIZE(text
),
767 PyBytes_AS_STRING(self
->errors
), -1);
771 utf32_encode(textio
*self
, PyObject
*text
)
773 if (!self
->encoding_start_of_stream
) {
774 /* Skip the BOM and use native byte ordering */
775 #if defined(WORDS_BIGENDIAN)
776 return utf32be_encode(self
, text
);
778 return utf32le_encode(self
, text
);
781 return PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(text
),
782 PyUnicode_GET_SIZE(text
),
783 PyBytes_AS_STRING(self
->errors
), 0);
787 utf8_encode(textio
*self
, PyObject
*text
)
789 return PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(text
),
790 PyUnicode_GET_SIZE(text
),
791 PyBytes_AS_STRING(self
->errors
));
795 latin1_encode(textio
*self
, PyObject
*text
)
797 return PyUnicode_EncodeLatin1(PyUnicode_AS_UNICODE(text
),
798 PyUnicode_GET_SIZE(text
),
799 PyBytes_AS_STRING(self
->errors
));
802 /* Map normalized encoding names onto the specialized encoding funcs */
806 encodefunc_t encodefunc
;
809 static encodefuncentry encodefuncs
[] = {
810 {"ascii", (encodefunc_t
) ascii_encode
},
811 {"iso8859-1", (encodefunc_t
) latin1_encode
},
812 {"utf-8", (encodefunc_t
) utf8_encode
},
813 {"utf-16-be", (encodefunc_t
) utf16be_encode
},
814 {"utf-16-le", (encodefunc_t
) utf16le_encode
},
815 {"utf-16", (encodefunc_t
) utf16_encode
},
816 {"utf-32-be", (encodefunc_t
) utf32be_encode
},
817 {"utf-32-le", (encodefunc_t
) utf32le_encode
},
818 {"utf-32", (encodefunc_t
) utf32_encode
},
824 textiowrapper_init(textio
*self
, PyObject
*args
, PyObject
*kwds
)
826 char *kwlist
[] = {"buffer", "encoding", "errors",
827 "newline", "line_buffering",
829 PyObject
*buffer
, *raw
;
830 char *encoding
= NULL
;
832 char *newline
= NULL
;
833 int line_buffering
= 0;
840 if (!PyArg_ParseTupleAndKeywords(args
, kwds
, "O|zzzi:fileio",
841 kwlist
, &buffer
, &encoding
, &errors
,
842 &newline
, &line_buffering
))
845 if (newline
&& newline
[0] != '\0'
846 && !(newline
[0] == '\n' && newline
[1] == '\0')
847 && !(newline
[0] == '\r' && newline
[1] == '\0')
848 && !(newline
[0] == '\r' && newline
[1] == '\n' && newline
[2] == '\0')) {
849 PyErr_Format(PyExc_ValueError
,
850 "illegal newline value: %s", newline
);
854 Py_CLEAR(self
->buffer
);
855 Py_CLEAR(self
->encoding
);
856 Py_CLEAR(self
->encoder
);
857 Py_CLEAR(self
->decoder
);
858 Py_CLEAR(self
->readnl
);
859 Py_CLEAR(self
->decoded_chars
);
860 Py_CLEAR(self
->pending_bytes
);
861 Py_CLEAR(self
->snapshot
);
862 Py_CLEAR(self
->errors
);
864 self
->decoded_chars_used
= 0;
865 self
->pending_bytes_count
= 0;
866 self
->encodefunc
= NULL
;
867 self
->writenl
= NULL
;
869 if (encoding
== NULL
&& self
->encoding
== NULL
) {
870 if (_PyIO_locale_module
== NULL
) {
871 _PyIO_locale_module
= PyImport_ImportModule("locale");
872 if (_PyIO_locale_module
== NULL
)
873 goto catch_ImportError
;
879 self
->encoding
= PyObject_CallMethod(
880 _PyIO_locale_module
, "getpreferredencoding", NULL
);
881 if (self
->encoding
== NULL
) {
884 Importing locale can raise a ImportError because of
885 _functools, and locale.getpreferredencoding can raise a
886 ImportError if _locale is not available. These will happen
887 during module building.
889 if (PyErr_ExceptionMatches(PyExc_ImportError
)) {
891 self
->encoding
= PyString_FromString("ascii");
896 else if (!PyString_Check(self
->encoding
))
897 Py_CLEAR(self
->encoding
);
900 if (self
->encoding
!= NULL
)
901 encoding
= PyString_AsString(self
->encoding
);
902 else if (encoding
!= NULL
) {
903 self
->encoding
= PyString_FromString(encoding
);
904 if (self
->encoding
== NULL
)
908 PyErr_SetString(PyExc_IOError
,
909 "could not determine default encoding");
914 self
->errors
= PyBytes_FromString(errors
);
915 if (self
->errors
== NULL
)
918 self
->chunk_size
= 8192;
919 self
->readuniversal
= (newline
== NULL
|| newline
[0] == '\0');
920 self
->line_buffering
= line_buffering
;
921 self
->readtranslate
= (newline
== NULL
);
923 self
->readnl
= PyString_FromString(newline
);
924 if (self
->readnl
== NULL
)
927 self
->writetranslate
= (newline
== NULL
|| newline
[0] != '\0');
928 if (!self
->readuniversal
&& self
->writetranslate
) {
929 self
->writenl
= PyString_AsString(self
->readnl
);
930 if (!strcmp(self
->writenl
, "\n"))
931 self
->writenl
= NULL
;
935 self
->writenl
= "\r\n";
938 /* Build the decoder object */
939 res
= PyObject_CallMethod(buffer
, "readable", NULL
);
942 r
= PyObject_IsTrue(res
);
947 self
->decoder
= PyCodec_IncrementalDecoder(
949 if (self
->decoder
== NULL
)
952 if (self
->readuniversal
) {
953 PyObject
*incrementalDecoder
= PyObject_CallFunction(
954 (PyObject
*)&PyIncrementalNewlineDecoder_Type
,
955 "Oi", self
->decoder
, (int)self
->readtranslate
);
956 if (incrementalDecoder
== NULL
)
958 Py_CLEAR(self
->decoder
);
959 self
->decoder
= incrementalDecoder
;
963 /* Build the encoder object */
964 res
= PyObject_CallMethod(buffer
, "writable", NULL
);
967 r
= PyObject_IsTrue(res
);
973 self
->encoder
= PyCodec_IncrementalEncoder(
975 if (self
->encoder
== NULL
)
977 /* Get the normalized named of the codec */
978 ci
= _PyCodec_Lookup(encoding
);
981 res
= PyObject_GetAttrString(ci
, "name");
984 if (PyErr_ExceptionMatches(PyExc_AttributeError
))
989 else if (PyString_Check(res
)) {
990 encodefuncentry
*e
= encodefuncs
;
991 while (e
->name
!= NULL
) {
992 if (!strcmp(PyString_AS_STRING(res
), e
->name
)) {
993 self
->encodefunc
= e
->encodefunc
;
1002 self
->buffer
= buffer
;
1005 if (Py_TYPE(buffer
) == &PyBufferedReader_Type
||
1006 Py_TYPE(buffer
) == &PyBufferedWriter_Type
||
1007 Py_TYPE(buffer
) == &PyBufferedRandom_Type
) {
1008 raw
= PyObject_GetAttrString(buffer
, "raw");
1009 /* Cache the raw FileIO object to speed up 'closed' checks */
1011 if (PyErr_ExceptionMatches(PyExc_AttributeError
))
1016 else if (Py_TYPE(raw
) == &PyFileIO_Type
)
1022 res
= PyObject_CallMethod(buffer
, "seekable", NULL
);
1025 r
= PyObject_IsTrue(res
);
1029 self
->seekable
= self
->telling
= r
;
1031 self
->encoding_start_of_stream
= 0;
1032 if (self
->seekable
&& self
->encoder
) {
1033 PyObject
*cookieObj
;
1036 self
->encoding_start_of_stream
= 1;
1038 cookieObj
= PyObject_CallMethodObjArgs(buffer
, _PyIO_str_tell
, NULL
);
1039 if (cookieObj
== NULL
)
1042 cmp
= PyObject_RichCompareBool(cookieObj
, _PyIO_zero
, Py_EQ
);
1043 Py_DECREF(cookieObj
);
1049 self
->encoding_start_of_stream
= 0;
1050 res
= PyObject_CallMethodObjArgs(self
->encoder
, _PyIO_str_setstate
,
1066 _textiowrapper_clear(textio
*self
)
1068 if (self
->ok
&& _PyIOBase_finalize((PyObject
*) self
) < 0)
1071 Py_CLEAR(self
->buffer
);
1072 Py_CLEAR(self
->encoding
);
1073 Py_CLEAR(self
->encoder
);
1074 Py_CLEAR(self
->decoder
);
1075 Py_CLEAR(self
->readnl
);
1076 Py_CLEAR(self
->decoded_chars
);
1077 Py_CLEAR(self
->pending_bytes
);
1078 Py_CLEAR(self
->snapshot
);
1079 Py_CLEAR(self
->errors
);
1080 Py_CLEAR(self
->raw
);
1085 textiowrapper_dealloc(textio
*self
)
1087 if (_textiowrapper_clear(self
) < 0)
1089 _PyObject_GC_UNTRACK(self
);
1090 if (self
->weakreflist
!= NULL
)
1091 PyObject_ClearWeakRefs((PyObject
*)self
);
1092 Py_CLEAR(self
->dict
);
1093 Py_TYPE(self
)->tp_free((PyObject
*)self
);
1097 textiowrapper_traverse(textio
*self
, visitproc visit
, void *arg
)
1099 Py_VISIT(self
->buffer
);
1100 Py_VISIT(self
->encoding
);
1101 Py_VISIT(self
->encoder
);
1102 Py_VISIT(self
->decoder
);
1103 Py_VISIT(self
->readnl
);
1104 Py_VISIT(self
->decoded_chars
);
1105 Py_VISIT(self
->pending_bytes
);
1106 Py_VISIT(self
->snapshot
);
1107 Py_VISIT(self
->errors
);
1108 Py_VISIT(self
->raw
);
1110 Py_VISIT(self
->dict
);
1115 textiowrapper_clear(textio
*self
)
1117 if (_textiowrapper_clear(self
) < 0)
1119 Py_CLEAR(self
->dict
);
1124 textiowrapper_closed_get(textio
*self
, void *context
);
1126 /* This macro takes some shortcuts to make the common case faster. */
1127 #define CHECK_CLOSED(self) \
1131 if (Py_TYPE(self) == &PyTextIOWrapper_Type) { \
1132 if (self->raw != NULL) \
1133 r = _PyFileIO_closed(self->raw); \
1135 _res = textiowrapper_closed_get(self, NULL); \
1138 r = PyObject_IsTrue(_res); \
1144 PyErr_SetString(PyExc_ValueError, \
1145 "I/O operation on closed file."); \
1149 else if (_PyIOBase_check_closed((PyObject *)self, Py_True) == NULL) \
1153 #define CHECK_INITIALIZED(self) \
1154 if (self->ok <= 0) { \
1155 PyErr_SetString(PyExc_ValueError, \
1156 "I/O operation on uninitialized object"); \
1160 #define CHECK_ATTACHED(self) \
1161 CHECK_INITIALIZED(self); \
1162 if (self->detached) { \
1163 PyErr_SetString(PyExc_ValueError, \
1164 "underlying buffer has been detached"); \
1168 #define CHECK_ATTACHED_INT(self) \
1169 if (self->ok <= 0) { \
1170 PyErr_SetString(PyExc_ValueError, \
1171 "I/O operation on uninitialized object"); \
1173 } else if (self->detached) { \
1174 PyErr_SetString(PyExc_ValueError, \
1175 "underlying buffer has been detached"); \
1181 textiowrapper_detach(textio
*self
)
1183 PyObject
*buffer
, *res
;
1184 CHECK_ATTACHED(self
);
1185 res
= PyObject_CallMethodObjArgs((PyObject
*)self
, _PyIO_str_flush
, NULL
);
1189 buffer
= self
->buffer
;
1190 self
->buffer
= NULL
;
1195 Py_LOCAL_INLINE(const Py_UNICODE
*)
1196 findchar(const Py_UNICODE
*s
, Py_ssize_t size
, Py_UNICODE ch
)
1198 /* like wcschr, but doesn't stop at NULL characters */
1199 while (size
-- > 0) {
1207 /* Flush the internal write buffer. This doesn't explicitly flush the
1208 underlying buffered object, though. */
1210 _textiowrapper_writeflush(textio
*self
)
1212 PyObject
*pending
, *b
, *ret
;
1214 if (self
->pending_bytes
== NULL
)
1217 pending
= self
->pending_bytes
;
1219 self
->pending_bytes_count
= 0;
1220 Py_CLEAR(self
->pending_bytes
);
1222 b
= _PyBytes_Join(_PyIO_empty_bytes
, pending
);
1228 ret
= PyObject_CallMethodObjArgs(self
->buffer
,
1229 _PyIO_str_write
, b
, NULL
);
1230 } while (ret
== NULL
&& _PyIO_trap_eintr());
1239 textiowrapper_write(textio
*self
, PyObject
*args
)
1242 PyObject
*text
; /* owned reference */
1248 CHECK_ATTACHED(self
);
1250 if (!PyArg_ParseTuple(args
, "U:write", &text
)) {
1256 if (self
->encoder
== NULL
) {
1257 PyErr_SetString(PyExc_IOError
, "not writable");
1263 textlen
= PyUnicode_GetSize(text
);
1265 if ((self
->writetranslate
&& self
->writenl
!= NULL
) || self
->line_buffering
)
1266 if (findchar(PyUnicode_AS_UNICODE(text
),
1267 PyUnicode_GET_SIZE(text
), '\n'))
1270 if (haslf
&& self
->writetranslate
&& self
->writenl
!= NULL
) {
1271 PyObject
*newtext
= PyObject_CallMethod(
1272 text
, "replace", "ss", "\n", self
->writenl
);
1274 if (newtext
== NULL
)
1279 if (self
->line_buffering
&&
1281 findchar(PyUnicode_AS_UNICODE(text
),
1282 PyUnicode_GET_SIZE(text
), '\r')))
1285 /* XXX What if we were just reading? */
1286 if (self
->encodefunc
!= NULL
) {
1287 b
= (*self
->encodefunc
)((PyObject
*) self
, text
);
1288 self
->encoding_start_of_stream
= 0;
1291 b
= PyObject_CallMethodObjArgs(self
->encoder
,
1292 _PyIO_str_encode
, text
, NULL
);
1297 if (self
->pending_bytes
== NULL
) {
1298 self
->pending_bytes
= PyList_New(0);
1299 if (self
->pending_bytes
== NULL
) {
1303 self
->pending_bytes_count
= 0;
1305 if (PyList_Append(self
->pending_bytes
, b
) < 0) {
1309 self
->pending_bytes_count
+= PyBytes_GET_SIZE(b
);
1311 if (self
->pending_bytes_count
> self
->chunk_size
|| needflush
) {
1312 if (_textiowrapper_writeflush(self
) < 0)
1317 ret
= PyObject_CallMethodObjArgs(self
->buffer
, _PyIO_str_flush
, NULL
);
1323 Py_CLEAR(self
->snapshot
);
1325 if (self
->decoder
) {
1326 ret
= PyObject_CallMethod(self
->decoder
, "reset", NULL
);
1332 return PyLong_FromSsize_t(textlen
);
1335 /* Steal a reference to chars and store it in the decoded_char buffer;
1338 textiowrapper_set_decoded_chars(textio
*self
, PyObject
*chars
)
1340 Py_CLEAR(self
->decoded_chars
);
1341 self
->decoded_chars
= chars
;
1342 self
->decoded_chars_used
= 0;
1346 textiowrapper_get_decoded_chars(textio
*self
, Py_ssize_t n
)
1351 if (self
->decoded_chars
== NULL
)
1352 return PyUnicode_FromStringAndSize(NULL
, 0);
1354 avail
= (PyUnicode_GET_SIZE(self
->decoded_chars
)
1355 - self
->decoded_chars_used
);
1359 if (n
< 0 || n
> avail
)
1362 if (self
->decoded_chars_used
> 0 || n
< avail
) {
1363 chars
= PyUnicode_FromUnicode(
1364 PyUnicode_AS_UNICODE(self
->decoded_chars
)
1365 + self
->decoded_chars_used
, n
);
1370 chars
= self
->decoded_chars
;
1374 self
->decoded_chars_used
+= n
;
1378 /* Read and decode the next chunk of data from the BufferedReader.
1381 textiowrapper_read_chunk(textio
*self
)
1383 PyObject
*dec_buffer
= NULL
;
1384 PyObject
*dec_flags
= NULL
;
1385 PyObject
*input_chunk
= NULL
;
1386 PyObject
*decoded_chars
, *chunk_size
;
1389 /* The return value is True unless EOF was reached. The decoded string is
1390 * placed in self._decoded_chars (replacing its previous value). The
1391 * entire input chunk is sent to the decoder, though some of it may remain
1392 * buffered in the decoder, yet to be converted.
1395 if (self
->decoder
== NULL
) {
1396 PyErr_SetString(PyExc_IOError
, "not readable");
1400 if (self
->telling
) {
1401 /* To prepare for tell(), we need to snapshot a point in the file
1402 * where the decoder's input buffer is empty.
1405 PyObject
*state
= PyObject_CallMethodObjArgs(self
->decoder
,
1406 _PyIO_str_getstate
, NULL
);
1409 /* Given this, we know there was a valid snapshot point
1410 * len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
1412 if (PyArg_Parse(state
, "(OO)", &dec_buffer
, &dec_flags
) < 0) {
1416 Py_INCREF(dec_buffer
);
1417 Py_INCREF(dec_flags
);
1421 /* Read a chunk, decode it, and put the result in self._decoded_chars. */
1422 chunk_size
= PyLong_FromSsize_t(self
->chunk_size
);
1423 if (chunk_size
== NULL
)
1425 input_chunk
= PyObject_CallMethodObjArgs(self
->buffer
,
1426 _PyIO_str_read1
, chunk_size
, NULL
);
1427 Py_DECREF(chunk_size
);
1428 if (input_chunk
== NULL
)
1430 if (!PyBytes_Check(input_chunk
)) {
1431 PyErr_Format(PyExc_TypeError
,
1432 "underlying read1() should have returned a bytes object, "
1433 "not '%.200s'", Py_TYPE(input_chunk
)->tp_name
);
1437 eof
= (PyBytes_Size(input_chunk
) == 0);
1439 if (Py_TYPE(self
->decoder
) == &PyIncrementalNewlineDecoder_Type
) {
1440 decoded_chars
= _PyIncrementalNewlineDecoder_decode(
1441 self
->decoder
, input_chunk
, eof
);
1444 decoded_chars
= PyObject_CallMethodObjArgs(self
->decoder
,
1445 _PyIO_str_decode
, input_chunk
, eof
? Py_True
: Py_False
, NULL
);
1448 if (check_decoded(decoded_chars
) < 0)
1450 textiowrapper_set_decoded_chars(self
, decoded_chars
);
1451 if (PyUnicode_GET_SIZE(decoded_chars
) > 0)
1454 if (self
->telling
) {
1455 /* At the snapshot point, len(dec_buffer) bytes before the read, the
1456 * next input to be decoded is dec_buffer + input_chunk.
1458 PyObject
*next_input
= PyNumber_Add(dec_buffer
, input_chunk
);
1459 if (next_input
== NULL
)
1461 if (!PyBytes_Check(next_input
)) {
1462 PyErr_Format(PyExc_TypeError
,
1463 "decoder getstate() should have returned a bytes "
1464 "object, not '%.200s'",
1465 Py_TYPE(next_input
)->tp_name
);
1466 Py_DECREF(next_input
);
1469 Py_DECREF(dec_buffer
);
1470 Py_CLEAR(self
->snapshot
);
1471 self
->snapshot
= Py_BuildValue("NN", dec_flags
, next_input
);
1473 Py_DECREF(input_chunk
);
1478 Py_XDECREF(dec_buffer
);
1479 Py_XDECREF(dec_flags
);
1480 Py_XDECREF(input_chunk
);
1485 textiowrapper_read(textio
*self
, PyObject
*args
)
1488 PyObject
*result
= NULL
, *chunks
= NULL
;
1490 CHECK_ATTACHED(self
);
1492 if (!PyArg_ParseTuple(args
, "|O&:read", &_PyIO_ConvertSsize_t
, &n
))
1497 if (self
->decoder
== NULL
) {
1498 PyErr_SetString(PyExc_IOError
, "not readable");
1502 if (_textiowrapper_writeflush(self
) < 0)
1506 /* Read everything */
1507 PyObject
*bytes
= PyObject_CallMethod(self
->buffer
, "read", NULL
);
1508 PyObject
*decoded
, *final
;
1511 decoded
= PyObject_CallMethodObjArgs(self
->decoder
, _PyIO_str_decode
,
1512 bytes
, Py_True
, NULL
);
1514 if (check_decoded(decoded
) < 0)
1517 result
= textiowrapper_get_decoded_chars(self
, -1);
1519 if (result
== NULL
) {
1524 final
= PyUnicode_Concat(result
, decoded
);
1530 Py_CLEAR(self
->snapshot
);
1535 Py_ssize_t remaining
= n
;
1537 result
= textiowrapper_get_decoded_chars(self
, n
);
1540 remaining
-= PyUnicode_GET_SIZE(result
);
1542 /* Keep reading chunks until we have n characters to return */
1543 while (remaining
> 0) {
1544 res
= textiowrapper_read_chunk(self
);
1546 /* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals()
1547 when EINTR occurs so we needn't do it ourselves. */
1548 if (_PyIO_trap_eintr()) {
1553 if (res
== 0) /* EOF */
1555 if (chunks
== NULL
) {
1556 chunks
= PyList_New(0);
1560 if (PyList_Append(chunks
, result
) < 0)
1563 result
= textiowrapper_get_decoded_chars(self
, remaining
);
1566 remaining
-= PyUnicode_GET_SIZE(result
);
1568 if (chunks
!= NULL
) {
1569 if (result
!= NULL
&& PyList_Append(chunks
, result
) < 0)
1572 result
= PyUnicode_Join(_PyIO_empty_str
, chunks
);
1586 /* NOTE: `end` must point to the real end of the Py_UNICODE storage,
1587 that is to the NUL character. Otherwise the function will produce
1588 incorrect results. */
1590 find_control_char(Py_UNICODE
*start
, Py_UNICODE
*end
, Py_UNICODE ch
)
1592 Py_UNICODE
*s
= start
;
1605 _PyIO_find_line_ending(
1606 int translated
, int universal
, PyObject
*readnl
,
1607 Py_UNICODE
*start
, Py_UNICODE
*end
, Py_ssize_t
*consumed
)
1609 Py_ssize_t len
= end
- start
;
1612 /* Newlines are already translated, only search for \n */
1613 Py_UNICODE
*pos
= find_control_char(start
, end
, '\n');
1615 return pos
- start
+ 1;
1621 else if (universal
) {
1622 /* Universal newline search. Find any of \r, \r\n, \n
1623 * The decoder ensures that \r\n are not split in two pieces
1625 Py_UNICODE
*s
= start
;
1628 /* Fast path for non-control chars. The loop always ends
1629 since the Py_UNICODE storage is NUL-terminated. */
1641 return s
- start
+ 1;
1648 /* Non-universal mode. */
1649 Py_ssize_t readnl_len
= PyString_GET_SIZE(readnl
);
1650 unsigned char *nl
= (unsigned char *) PyString_AS_STRING(readnl
);
1651 if (readnl_len
== 1) {
1652 Py_UNICODE
*pos
= find_control_char(start
, end
, nl
[0]);
1654 return pos
- start
+ 1;
1659 Py_UNICODE
*s
= start
;
1660 Py_UNICODE
*e
= end
- readnl_len
+ 1;
1666 Py_UNICODE
*pos
= find_control_char(s
, end
, nl
[0]);
1667 if (pos
== NULL
|| pos
>= e
)
1669 for (i
= 1; i
< readnl_len
; i
++) {
1670 if (pos
[i
] != nl
[i
])
1673 if (i
== readnl_len
)
1674 return pos
- start
+ readnl_len
;
1677 pos
= find_control_char(e
, end
, nl
[0]);
1681 *consumed
= pos
- start
;
1688 _textiowrapper_readline(textio
*self
, Py_ssize_t limit
)
1690 PyObject
*line
= NULL
, *chunks
= NULL
, *remaining
= NULL
;
1691 Py_ssize_t start
, endpos
, chunked
, offset_to_buffer
;
1696 if (_textiowrapper_writeflush(self
) < 0)
1703 Py_ssize_t line_len
;
1704 Py_ssize_t consumed
= 0;
1706 /* First, get some data if necessary */
1708 while (!self
->decoded_chars
||
1709 !PyUnicode_GET_SIZE(self
->decoded_chars
)) {
1710 res
= textiowrapper_read_chunk(self
);
1712 /* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals()
1713 when EINTR occurs so we needn't do it ourselves. */
1714 if (_PyIO_trap_eintr()) {
1724 textiowrapper_set_decoded_chars(self
, NULL
);
1725 Py_CLEAR(self
->snapshot
);
1726 start
= endpos
= offset_to_buffer
= 0;
1730 if (remaining
== NULL
) {
1731 line
= self
->decoded_chars
;
1732 start
= self
->decoded_chars_used
;
1733 offset_to_buffer
= 0;
1737 assert(self
->decoded_chars_used
== 0);
1738 line
= PyUnicode_Concat(remaining
, self
->decoded_chars
);
1740 offset_to_buffer
= PyUnicode_GET_SIZE(remaining
);
1741 Py_CLEAR(remaining
);
1746 ptr
= PyUnicode_AS_UNICODE(line
);
1747 line_len
= PyUnicode_GET_SIZE(line
);
1749 endpos
= _PyIO_find_line_ending(
1750 self
->readtranslate
, self
->readuniversal
, self
->readnl
,
1751 ptr
+ start
, ptr
+ line_len
, &consumed
);
1754 if (limit
>= 0 && (endpos
- start
) + chunked
>= limit
)
1755 endpos
= start
+ limit
- chunked
;
1759 /* We can put aside up to `endpos` */
1760 endpos
= consumed
+ start
;
1761 if (limit
>= 0 && (endpos
- start
) + chunked
>= limit
) {
1762 /* Didn't find line ending, but reached length limit */
1763 endpos
= start
+ limit
- chunked
;
1767 if (endpos
> start
) {
1768 /* No line ending seen yet - put aside current data */
1770 if (chunks
== NULL
) {
1771 chunks
= PyList_New(0);
1775 s
= PyUnicode_FromUnicode(ptr
+ start
, endpos
- start
);
1778 if (PyList_Append(chunks
, s
) < 0) {
1782 chunked
+= PyUnicode_GET_SIZE(s
);
1785 /* There may be some remaining bytes we'll have to prepend to the
1786 next chunk of data */
1787 if (endpos
< line_len
) {
1788 remaining
= PyUnicode_FromUnicode(
1789 ptr
+ endpos
, line_len
- endpos
);
1790 if (remaining
== NULL
)
1794 /* We have consumed the buffer */
1795 textiowrapper_set_decoded_chars(self
, NULL
);
1799 /* Our line ends in the current buffer */
1800 self
->decoded_chars_used
= endpos
- offset_to_buffer
;
1801 if (start
> 0 || endpos
< PyUnicode_GET_SIZE(line
)) {
1802 if (start
== 0 && Py_REFCNT(line
) == 1) {
1803 if (PyUnicode_Resize(&line
, endpos
) < 0)
1807 PyObject
*s
= PyUnicode_FromUnicode(
1808 PyUnicode_AS_UNICODE(line
) + start
, endpos
- start
);
1816 if (remaining
!= NULL
) {
1817 if (chunks
== NULL
) {
1818 chunks
= PyList_New(0);
1822 if (PyList_Append(chunks
, remaining
) < 0)
1824 Py_CLEAR(remaining
);
1826 if (chunks
!= NULL
) {
1827 if (line
!= NULL
&& PyList_Append(chunks
, line
) < 0)
1830 line
= PyUnicode_Join(_PyIO_empty_str
, chunks
);
1836 line
= PyUnicode_FromStringAndSize(NULL
, 0);
1842 Py_XDECREF(remaining
);
1848 textiowrapper_readline(textio
*self
, PyObject
*args
)
1850 PyObject
*limitobj
= NULL
;
1851 Py_ssize_t limit
= -1;
1853 CHECK_ATTACHED(self
);
1854 if (!PyArg_ParseTuple(args
, "|O:readline", &limitobj
)) {
1858 if (!PyNumber_Check(limitobj
)) {
1859 PyErr_Format(PyExc_TypeError
,
1860 "integer argument expected, got '%.200s'",
1861 Py_TYPE(limitobj
)->tp_name
);
1864 limit
= PyNumber_AsSsize_t(limitobj
, PyExc_OverflowError
);
1865 if (limit
== -1 && PyErr_Occurred())
1868 return _textiowrapper_readline(self
, limit
);
1882 To speed up cookie packing/unpacking, we store the fields in a temporary
1883 string and call _PyLong_FromByteArray() or _PyLong_AsByteArray (resp.).
1884 The following macros define at which offsets in the intermediary byte
1885 string the various CookieStruct fields will be stored.
1888 #define COOKIE_BUF_LEN (sizeof(Py_off_t) + 3 * sizeof(int) + sizeof(char))
1890 #if defined(WORDS_BIGENDIAN)
1892 # define IS_LITTLE_ENDIAN 0
1894 /* We want the least significant byte of start_pos to also be the least
1895 significant byte of the cookie, which means that in big-endian mode we
1896 must copy the fields in reverse order. */
1898 # define OFF_START_POS (sizeof(char) + 3 * sizeof(int))
1899 # define OFF_DEC_FLAGS (sizeof(char) + 2 * sizeof(int))
1900 # define OFF_BYTES_TO_FEED (sizeof(char) + sizeof(int))
1901 # define OFF_CHARS_TO_SKIP (sizeof(char))
1902 # define OFF_NEED_EOF 0
1906 # define IS_LITTLE_ENDIAN 1
1908 /* Little-endian mode: the least significant byte of start_pos will
1909 naturally end up the least significant byte of the cookie. */
1911 # define OFF_START_POS 0
1912 # define OFF_DEC_FLAGS (sizeof(Py_off_t))
1913 # define OFF_BYTES_TO_FEED (sizeof(Py_off_t) + sizeof(int))
1914 # define OFF_CHARS_TO_SKIP (sizeof(Py_off_t) + 2 * sizeof(int))
1915 # define OFF_NEED_EOF (sizeof(Py_off_t) + 3 * sizeof(int))
1920 textiowrapper_parse_cookie(cookie_type
*cookie
, PyObject
*cookieObj
)
1922 unsigned char buffer
[COOKIE_BUF_LEN
];
1923 PyLongObject
*cookieLong
= (PyLongObject
*)PyNumber_Long(cookieObj
);
1924 if (cookieLong
== NULL
)
1927 if (_PyLong_AsByteArray(cookieLong
, buffer
, sizeof(buffer
),
1928 IS_LITTLE_ENDIAN
, 0) < 0) {
1929 Py_DECREF(cookieLong
);
1932 Py_DECREF(cookieLong
);
1934 memcpy(&cookie
->start_pos
, buffer
+ OFF_START_POS
, sizeof(cookie
->start_pos
));
1935 memcpy(&cookie
->dec_flags
, buffer
+ OFF_DEC_FLAGS
, sizeof(cookie
->dec_flags
));
1936 memcpy(&cookie
->bytes_to_feed
, buffer
+ OFF_BYTES_TO_FEED
, sizeof(cookie
->bytes_to_feed
));
1937 memcpy(&cookie
->chars_to_skip
, buffer
+ OFF_CHARS_TO_SKIP
, sizeof(cookie
->chars_to_skip
));
1938 memcpy(&cookie
->need_eof
, buffer
+ OFF_NEED_EOF
, sizeof(cookie
->need_eof
));
1944 textiowrapper_build_cookie(cookie_type
*cookie
)
1946 unsigned char buffer
[COOKIE_BUF_LEN
];
1948 memcpy(buffer
+ OFF_START_POS
, &cookie
->start_pos
, sizeof(cookie
->start_pos
));
1949 memcpy(buffer
+ OFF_DEC_FLAGS
, &cookie
->dec_flags
, sizeof(cookie
->dec_flags
));
1950 memcpy(buffer
+ OFF_BYTES_TO_FEED
, &cookie
->bytes_to_feed
, sizeof(cookie
->bytes_to_feed
));
1951 memcpy(buffer
+ OFF_CHARS_TO_SKIP
, &cookie
->chars_to_skip
, sizeof(cookie
->chars_to_skip
));
1952 memcpy(buffer
+ OFF_NEED_EOF
, &cookie
->need_eof
, sizeof(cookie
->need_eof
));
1954 return _PyLong_FromByteArray(buffer
, sizeof(buffer
), IS_LITTLE_ENDIAN
, 0);
1956 #undef IS_LITTLE_ENDIAN
1959 _textiowrapper_decoder_setstate(textio
*self
, cookie_type
*cookie
)
1962 /* When seeking to the start of the stream, we call decoder.reset()
1963 rather than decoder.getstate().
1964 This is for a few decoders such as utf-16 for which the state value
1965 at start is not (b"", 0) but e.g. (b"", 2) (meaning, in the case of
1966 utf-16, that we are expecting a BOM).
1968 if (cookie
->start_pos
== 0 && cookie
->dec_flags
== 0)
1969 res
= PyObject_CallMethodObjArgs(self
->decoder
, _PyIO_str_reset
, NULL
);
1971 res
= PyObject_CallMethod(self
->decoder
, "setstate",
1972 "((si))", "", cookie
->dec_flags
);
1980 _textiowrapper_encoder_setstate(textio
*self
, cookie_type
*cookie
)
1983 /* Same as _textiowrapper_decoder_setstate() above. */
1984 if (cookie
->start_pos
== 0 && cookie
->dec_flags
== 0) {
1985 res
= PyObject_CallMethodObjArgs(self
->encoder
, _PyIO_str_reset
, NULL
);
1986 self
->encoding_start_of_stream
= 1;
1989 res
= PyObject_CallMethodObjArgs(self
->encoder
, _PyIO_str_setstate
,
1991 self
->encoding_start_of_stream
= 0;
2000 textiowrapper_seek(textio
*self
, PyObject
*args
)
2002 PyObject
*cookieObj
, *posobj
;
2008 CHECK_ATTACHED(self
);
2010 if (!PyArg_ParseTuple(args
, "O|i:seek", &cookieObj
, &whence
))
2014 Py_INCREF(cookieObj
);
2016 if (!self
->seekable
) {
2017 PyErr_SetString(PyExc_IOError
,
2018 "underlying stream is not seekable");
2023 /* seek relative to current position */
2024 cmp
= PyObject_RichCompareBool(cookieObj
, _PyIO_zero
, Py_EQ
);
2029 PyErr_SetString(PyExc_IOError
,
2030 "can't do nonzero cur-relative seeks");
2034 /* Seeking to the current position should attempt to
2035 * sync the underlying buffer with the current position.
2037 Py_DECREF(cookieObj
);
2038 cookieObj
= PyObject_CallMethod((PyObject
*)self
, "tell", NULL
);
2039 if (cookieObj
== NULL
)
2042 else if (whence
== 2) {
2043 /* seek relative to end of file */
2045 cmp
= PyObject_RichCompareBool(cookieObj
, _PyIO_zero
, Py_EQ
);
2050 PyErr_SetString(PyExc_IOError
,
2051 "can't do nonzero end-relative seeks");
2055 res
= PyObject_CallMethod((PyObject
*)self
, "flush", NULL
);
2060 textiowrapper_set_decoded_chars(self
, NULL
);
2061 Py_CLEAR(self
->snapshot
);
2062 if (self
->decoder
) {
2063 res
= PyObject_CallMethod(self
->decoder
, "reset", NULL
);
2069 res
= PyObject_CallMethod(self
->buffer
, "seek", "ii", 0, 2);
2070 Py_XDECREF(cookieObj
);
2073 else if (whence
!= 0) {
2074 PyErr_Format(PyExc_ValueError
,
2075 "invalid whence (%d, should be 0, 1 or 2)", whence
);
2079 cmp
= PyObject_RichCompareBool(cookieObj
, _PyIO_zero
, Py_LT
);
2084 PyObject
*repr
= PyObject_Repr(cookieObj
);
2086 PyErr_Format(PyExc_ValueError
,
2087 "negative seek position %s",
2088 PyString_AS_STRING(repr
));
2094 res
= PyObject_CallMethodObjArgs((PyObject
*)self
, _PyIO_str_flush
, NULL
);
2099 /* The strategy of seek() is to go back to the safe start point
2100 * and replay the effect of read(chars_to_skip) from there.
2102 if (textiowrapper_parse_cookie(&cookie
, cookieObj
) < 0)
2105 /* Seek back to the safe start point. */
2106 posobj
= PyLong_FromOff_t(cookie
.start_pos
);
2109 res
= PyObject_CallMethodObjArgs(self
->buffer
,
2110 _PyIO_str_seek
, posobj
, NULL
);
2116 textiowrapper_set_decoded_chars(self
, NULL
);
2117 Py_CLEAR(self
->snapshot
);
2119 /* Restore the decoder to its state from the safe start point. */
2120 if (self
->decoder
) {
2121 if (_textiowrapper_decoder_setstate(self
, &cookie
) < 0)
2125 if (cookie
.chars_to_skip
) {
2126 /* Just like _read_chunk, feed the decoder and save a snapshot. */
2127 PyObject
*input_chunk
= PyObject_CallMethod(
2128 self
->buffer
, "read", "i", cookie
.bytes_to_feed
);
2131 if (input_chunk
== NULL
)
2134 if (!PyBytes_Check(input_chunk
)) {
2135 PyErr_Format(PyExc_TypeError
,
2136 "underlying read() should have returned a bytes "
2137 "object, not '%.200s'",
2138 Py_TYPE(input_chunk
)->tp_name
);
2139 Py_DECREF(input_chunk
);
2143 self
->snapshot
= Py_BuildValue("iN", cookie
.dec_flags
, input_chunk
);
2144 if (self
->snapshot
== NULL
) {
2145 Py_DECREF(input_chunk
);
2149 decoded
= PyObject_CallMethod(self
->decoder
, "decode",
2150 "Oi", input_chunk
, (int)cookie
.need_eof
);
2152 if (check_decoded(decoded
) < 0)
2155 textiowrapper_set_decoded_chars(self
, decoded
);
2157 /* Skip chars_to_skip of the decoded characters. */
2158 if (PyUnicode_GetSize(self
->decoded_chars
) < cookie
.chars_to_skip
) {
2159 PyErr_SetString(PyExc_IOError
, "can't restore logical file position");
2162 self
->decoded_chars_used
= cookie
.chars_to_skip
;
2165 self
->snapshot
= Py_BuildValue("is", cookie
.dec_flags
, "");
2166 if (self
->snapshot
== NULL
)
2170 /* Finally, reset the encoder (merely useful for proper BOM handling) */
2171 if (self
->encoder
) {
2172 if (_textiowrapper_encoder_setstate(self
, &cookie
) < 0)
2177 Py_XDECREF(cookieObj
);
2183 textiowrapper_tell(textio
*self
, PyObject
*args
)
2186 PyObject
*posobj
= NULL
;
2187 cookie_type cookie
= {0,0,0,0,0};
2188 PyObject
*next_input
;
2189 Py_ssize_t chars_to_skip
, chars_decoded
;
2190 PyObject
*saved_state
= NULL
;
2191 char *input
, *input_end
;
2193 CHECK_ATTACHED(self
);
2196 if (!self
->seekable
) {
2197 PyErr_SetString(PyExc_IOError
,
2198 "underlying stream is not seekable");
2201 if (!self
->telling
) {
2202 PyErr_SetString(PyExc_IOError
,
2203 "telling position disabled by next() call");
2207 if (_textiowrapper_writeflush(self
) < 0)
2209 res
= PyObject_CallMethod((PyObject
*)self
, "flush", NULL
);
2214 posobj
= PyObject_CallMethod(self
->buffer
, "tell", NULL
);
2218 if (self
->decoder
== NULL
|| self
->snapshot
== NULL
) {
2219 assert (self
->decoded_chars
== NULL
|| PyUnicode_GetSize(self
->decoded_chars
) == 0);
2223 #if defined(HAVE_LARGEFILE_SUPPORT)
2224 cookie
.start_pos
= PyLong_AsLongLong(posobj
);
2226 cookie
.start_pos
= PyLong_AsLong(posobj
);
2228 if (PyErr_Occurred())
2231 /* Skip backward to the snapshot point (see _read_chunk). */
2232 if (!PyArg_Parse(self
->snapshot
, "(iO)", &cookie
.dec_flags
, &next_input
))
2235 assert (PyBytes_Check(next_input
));
2237 cookie
.start_pos
-= PyBytes_GET_SIZE(next_input
);
2239 /* How many decoded characters have been used up since the snapshot? */
2240 if (self
->decoded_chars_used
== 0) {
2241 /* We haven't moved from the snapshot point. */
2243 return textiowrapper_build_cookie(&cookie
);
2246 chars_to_skip
= self
->decoded_chars_used
;
2248 /* Starting from the snapshot position, we will walk the decoder
2249 * forward until it gives us enough decoded characters.
2251 saved_state
= PyObject_CallMethodObjArgs(self
->decoder
,
2252 _PyIO_str_getstate
, NULL
);
2253 if (saved_state
== NULL
)
2256 /* Note our initial start point. */
2257 if (_textiowrapper_decoder_setstate(self
, &cookie
) < 0)
2260 /* Feed the decoder one byte at a time. As we go, note the
2261 * nearest "safe start point" before the current location
2262 * (a point where the decoder has nothing buffered, so seek()
2263 * can safely start from there and advance to this location).
2266 input
= PyBytes_AS_STRING(next_input
);
2267 input_end
= input
+ PyBytes_GET_SIZE(next_input
);
2268 while (input
< input_end
) {
2271 Py_ssize_t dec_buffer_len
;
2274 PyObject
*decoded
= PyObject_CallMethod(
2275 self
->decoder
, "decode", "s#", input
, (Py_ssize_t
)1);
2276 if (check_decoded(decoded
) < 0)
2278 chars_decoded
+= PyUnicode_GET_SIZE(decoded
);
2281 cookie
.bytes_to_feed
+= 1;
2283 state
= PyObject_CallMethodObjArgs(self
->decoder
,
2284 _PyIO_str_getstate
, NULL
);
2287 if (!PyArg_Parse(state
, "(s#i)", &dec_buffer
, &dec_buffer_len
, &dec_flags
)) {
2293 if (dec_buffer_len
== 0 && chars_decoded
<= chars_to_skip
) {
2294 /* Decoder buffer is empty, so this is a safe start point. */
2295 cookie
.start_pos
+= cookie
.bytes_to_feed
;
2296 chars_to_skip
-= chars_decoded
;
2297 cookie
.dec_flags
= dec_flags
;
2298 cookie
.bytes_to_feed
= 0;
2301 if (chars_decoded
>= chars_to_skip
)
2305 if (input
== input_end
) {
2306 /* We didn't get enough decoded data; signal EOF to get more. */
2307 PyObject
*decoded
= PyObject_CallMethod(
2308 self
->decoder
, "decode", "si", "", /* final = */ 1);
2309 if (check_decoded(decoded
) < 0)
2311 chars_decoded
+= PyUnicode_GET_SIZE(decoded
);
2313 cookie
.need_eof
= 1;
2315 if (chars_decoded
< chars_to_skip
) {
2316 PyErr_SetString(PyExc_IOError
,
2317 "can't reconstruct logical file position");
2324 res
= PyObject_CallMethod(self
->decoder
, "setstate", "(O)", saved_state
);
2325 Py_DECREF(saved_state
);
2330 /* The returned cookie corresponds to the last safe start point. */
2331 cookie
.chars_to_skip
= Py_SAFE_DOWNCAST(chars_to_skip
, Py_ssize_t
, int);
2332 return textiowrapper_build_cookie(&cookie
);
2337 PyObject
*type
, *value
, *traceback
;
2338 PyErr_Fetch(&type
, &value
, &traceback
);
2340 res
= PyObject_CallMethod(self
->decoder
, "setstate", "(O)", saved_state
);
2341 _PyErr_ReplaceException(type
, value
, traceback
);
2342 Py_DECREF(saved_state
);
2349 textiowrapper_truncate(textio
*self
, PyObject
*args
)
2351 PyObject
*pos
= Py_None
;
2354 CHECK_ATTACHED(self
)
2355 if (!PyArg_ParseTuple(args
, "|O:truncate", &pos
)) {
2359 res
= PyObject_CallMethodObjArgs((PyObject
*) self
, _PyIO_str_flush
, NULL
);
2364 return PyObject_CallMethodObjArgs(self
->buffer
, _PyIO_str_truncate
, pos
, NULL
);
2368 textiowrapper_repr(textio
*self
)
2370 PyObject
*nameobj
, *res
;
2371 PyObject
*namerepr
= NULL
, *encrepr
= NULL
;
2373 CHECK_INITIALIZED(self
);
2375 nameobj
= PyObject_GetAttrString((PyObject
*) self
, "name");
2376 if (nameobj
== NULL
) {
2377 if (PyErr_ExceptionMatches(PyExc_Exception
))
2381 encrepr
= PyObject_Repr(self
->encoding
);
2382 res
= PyString_FromFormat("<_io.TextIOWrapper encoding=%s>",
2383 PyString_AS_STRING(encrepr
));
2386 encrepr
= PyObject_Repr(self
->encoding
);
2387 namerepr
= PyObject_Repr(nameobj
);
2388 res
= PyString_FromFormat("<_io.TextIOWrapper name=%s encoding=%s>",
2389 PyString_AS_STRING(namerepr
),
2390 PyString_AS_STRING(encrepr
));
2393 Py_XDECREF(namerepr
);
2394 Py_XDECREF(encrepr
);
2398 Py_XDECREF(namerepr
);
2399 Py_XDECREF(encrepr
);
2407 textiowrapper_fileno(textio
*self
, PyObject
*args
)
2409 CHECK_ATTACHED(self
);
2410 return PyObject_CallMethod(self
->buffer
, "fileno", NULL
);
2414 textiowrapper_seekable(textio
*self
, PyObject
*args
)
2416 CHECK_ATTACHED(self
);
2417 return PyObject_CallMethod(self
->buffer
, "seekable", NULL
);
2421 textiowrapper_readable(textio
*self
, PyObject
*args
)
2423 CHECK_ATTACHED(self
);
2424 return PyObject_CallMethod(self
->buffer
, "readable", NULL
);
2428 textiowrapper_writable(textio
*self
, PyObject
*args
)
2430 CHECK_ATTACHED(self
);
2431 return PyObject_CallMethod(self
->buffer
, "writable", NULL
);
2435 textiowrapper_isatty(textio
*self
, PyObject
*args
)
2437 CHECK_ATTACHED(self
);
2438 return PyObject_CallMethod(self
->buffer
, "isatty", NULL
);
2442 textiowrapper_flush(textio
*self
, PyObject
*args
)
2444 CHECK_ATTACHED(self
);
2446 self
->telling
= self
->seekable
;
2447 if (_textiowrapper_writeflush(self
) < 0)
2449 return PyObject_CallMethod(self
->buffer
, "flush", NULL
);
2453 textiowrapper_close(textio
*self
, PyObject
*args
)
2457 CHECK_ATTACHED(self
);
2459 res
= textiowrapper_closed_get(self
, NULL
);
2462 r
= PyObject_IsTrue(res
);
2468 Py_RETURN_NONE
; /* stream already closed */
2471 PyObject
*exc
= NULL
, *val
, *tb
;
2472 res
= PyObject_CallMethod((PyObject
*)self
, "flush", NULL
);
2474 PyErr_Fetch(&exc
, &val
, &tb
);
2478 res
= PyObject_CallMethod(self
->buffer
, "close", NULL
);
2480 _PyErr_ReplaceException(exc
, val
, tb
);
2488 textiowrapper_iternext(textio
*self
)
2492 CHECK_ATTACHED(self
);
2495 if (Py_TYPE(self
) == &PyTextIOWrapper_Type
) {
2496 /* Skip method call overhead for speed */
2497 line
= _textiowrapper_readline(self
, -1);
2500 line
= PyObject_CallMethodObjArgs((PyObject
*)self
,
2501 _PyIO_str_readline
, NULL
);
2502 if (line
&& !PyUnicode_Check(line
)) {
2503 PyErr_Format(PyExc_IOError
,
2504 "readline() should have returned an str object, "
2505 "not '%.200s'", Py_TYPE(line
)->tp_name
);
2514 if (PyUnicode_GET_SIZE(line
) == 0) {
2515 /* Reached EOF or would have blocked */
2517 Py_CLEAR(self
->snapshot
);
2518 self
->telling
= self
->seekable
;
2526 textiowrapper_name_get(textio
*self
, void *context
)
2528 CHECK_ATTACHED(self
);
2529 return PyObject_GetAttrString(self
->buffer
, "name");
2533 textiowrapper_closed_get(textio
*self
, void *context
)
2535 CHECK_ATTACHED(self
);
2536 return PyObject_GetAttr(self
->buffer
, _PyIO_str_closed
);
2540 textiowrapper_newlines_get(textio
*self
, void *context
)
2543 CHECK_ATTACHED(self
);
2544 if (self
->decoder
== NULL
)
2546 res
= PyObject_GetAttr(self
->decoder
, _PyIO_str_newlines
);
2548 if (PyErr_ExceptionMatches(PyExc_AttributeError
)) {
2560 textiowrapper_errors_get(textio
*self
, void *context
)
2562 CHECK_INITIALIZED(self
);
2563 Py_INCREF(self
->errors
);
2564 return self
->errors
;
2568 textiowrapper_chunk_size_get(textio
*self
, void *context
)
2570 CHECK_ATTACHED(self
);
2571 return PyLong_FromSsize_t(self
->chunk_size
);
2575 textiowrapper_chunk_size_set(textio
*self
, PyObject
*arg
, void *context
)
2578 CHECK_ATTACHED_INT(self
);
2579 n
= PyNumber_AsSsize_t(arg
, PyExc_TypeError
);
2580 if (n
== -1 && PyErr_Occurred())
2583 PyErr_SetString(PyExc_ValueError
,
2584 "a strictly positive integer is required");
2587 self
->chunk_size
= n
;
2591 static PyMethodDef textiowrapper_methods
[] = {
2592 {"detach", (PyCFunction
)textiowrapper_detach
, METH_NOARGS
},
2593 {"write", (PyCFunction
)textiowrapper_write
, METH_VARARGS
},
2594 {"read", (PyCFunction
)textiowrapper_read
, METH_VARARGS
},
2595 {"readline", (PyCFunction
)textiowrapper_readline
, METH_VARARGS
},
2596 {"flush", (PyCFunction
)textiowrapper_flush
, METH_NOARGS
},
2597 {"close", (PyCFunction
)textiowrapper_close
, METH_NOARGS
},
2599 {"fileno", (PyCFunction
)textiowrapper_fileno
, METH_NOARGS
},
2600 {"seekable", (PyCFunction
)textiowrapper_seekable
, METH_NOARGS
},
2601 {"readable", (PyCFunction
)textiowrapper_readable
, METH_NOARGS
},
2602 {"writable", (PyCFunction
)textiowrapper_writable
, METH_NOARGS
},
2603 {"isatty", (PyCFunction
)textiowrapper_isatty
, METH_NOARGS
},
2605 {"seek", (PyCFunction
)textiowrapper_seek
, METH_VARARGS
},
2606 {"tell", (PyCFunction
)textiowrapper_tell
, METH_NOARGS
},
2607 {"truncate", (PyCFunction
)textiowrapper_truncate
, METH_VARARGS
},
2611 static PyMemberDef textiowrapper_members
[] = {
2612 {"encoding", T_OBJECT
, offsetof(textio
, encoding
), READONLY
},
2613 {"buffer", T_OBJECT
, offsetof(textio
, buffer
), READONLY
},
2614 {"line_buffering", T_BOOL
, offsetof(textio
, line_buffering
), READONLY
},
2618 static PyGetSetDef textiowrapper_getset
[] = {
2619 {"name", (getter
)textiowrapper_name_get
, NULL
, NULL
},
2620 {"closed", (getter
)textiowrapper_closed_get
, NULL
, NULL
},
2621 /* {"mode", (getter)TextIOWrapper_mode_get, NULL, NULL},
2623 {"newlines", (getter
)textiowrapper_newlines_get
, NULL
, NULL
},
2624 {"errors", (getter
)textiowrapper_errors_get
, NULL
, NULL
},
2625 {"_CHUNK_SIZE", (getter
)textiowrapper_chunk_size_get
,
2626 (setter
)textiowrapper_chunk_size_set
, NULL
},
2630 PyTypeObject PyTextIOWrapper_Type
= {
2631 PyVarObject_HEAD_INIT(NULL
, 0)
2632 "_io.TextIOWrapper", /*tp_name*/
2633 sizeof(textio
), /*tp_basicsize*/
2635 (destructor
)textiowrapper_dealloc
, /*tp_dealloc*/
2640 (reprfunc
)textiowrapper_repr
,/*tp_repr*/
2642 0, /*tp_as_sequence*/
2643 0, /*tp_as_mapping*/
2650 Py_TPFLAGS_DEFAULT
| Py_TPFLAGS_BASETYPE
2651 | Py_TPFLAGS_HAVE_GC
, /*tp_flags*/
2652 textiowrapper_doc
, /* tp_doc */
2653 (traverseproc
)textiowrapper_traverse
, /* tp_traverse */
2654 (inquiry
)textiowrapper_clear
, /* tp_clear */
2655 0, /* tp_richcompare */
2656 offsetof(textio
, weakreflist
), /*tp_weaklistoffset*/
2658 (iternextfunc
)textiowrapper_iternext
, /* tp_iternext */
2659 textiowrapper_methods
, /* tp_methods */
2660 textiowrapper_members
, /* tp_members */
2661 textiowrapper_getset
, /* tp_getset */
2664 0, /* tp_descr_get */
2665 0, /* tp_descr_set */
2666 offsetof(textio
, dict
), /*tp_dictoffset*/
2667 (initproc
)textiowrapper_init
, /* tp_init */
2669 PyType_GenericNew
, /* tp_new */