+++ /dev/null
-/*\r
- An implementation of Text I/O as defined by PEP 3116 - "New I/O"\r
-\r
- Classes defined here: TextIOBase, IncrementalNewlineDecoder, TextIOWrapper.\r
-\r
- Written by Amaury Forgeot d'Arc and Antoine Pitrou\r
-*/\r
-\r
-#define PY_SSIZE_T_CLEAN\r
-#include "Python.h"\r
-#include "structmember.h"\r
-#include "_iomodule.h"\r
-\r
-/* TextIOBase */\r
-\r
-PyDoc_STRVAR(textiobase_doc,\r
- "Base class for text I/O.\n"\r
- "\n"\r
- "This class provides a character and line based interface to stream\n"\r
- "I/O. There is no readinto method because Python's character strings\n"\r
- "are immutable. There is no public constructor.\n"\r
- );\r
-\r
-static PyObject *\r
-_unsupported(const char *message)\r
-{\r
- PyErr_SetString(_PyIO_unsupported_operation, message);\r
- return NULL;\r
-}\r
-\r
-PyDoc_STRVAR(textiobase_detach_doc,\r
- "Separate the underlying buffer from the TextIOBase and return it.\n"\r
- "\n"\r
- "After the underlying buffer has been detached, the TextIO is in an\n"\r
- "unusable state.\n"\r
- );\r
-\r
-static PyObject *\r
-textiobase_detach(PyObject *self)\r
-{\r
- return _unsupported("detach");\r
-}\r
-\r
-PyDoc_STRVAR(textiobase_read_doc,\r
- "Read at most n characters from stream.\n"\r
- "\n"\r
- "Read from underlying buffer until we have n characters or we hit EOF.\n"\r
- "If n is negative or omitted, read until EOF.\n"\r
- );\r
-\r
-static PyObject *\r
-textiobase_read(PyObject *self, PyObject *args)\r
-{\r
- return _unsupported("read");\r
-}\r
-\r
-PyDoc_STRVAR(textiobase_readline_doc,\r
- "Read until newline or EOF.\n"\r
- "\n"\r
- "Returns an empty string if EOF is hit immediately.\n"\r
- );\r
-\r
-static PyObject *\r
-textiobase_readline(PyObject *self, PyObject *args)\r
-{\r
- return _unsupported("readline");\r
-}\r
-\r
-PyDoc_STRVAR(textiobase_write_doc,\r
- "Write string to stream.\n"\r
- "Returns the number of characters written (which is always equal to\n"\r
- "the length of the string).\n"\r
- );\r
-\r
-static PyObject *\r
-textiobase_write(PyObject *self, PyObject *args)\r
-{\r
- return _unsupported("write");\r
-}\r
-\r
-PyDoc_STRVAR(textiobase_encoding_doc,\r
- "Encoding of the text stream.\n"\r
- "\n"\r
- "Subclasses should override.\n"\r
- );\r
-\r
-static PyObject *\r
-textiobase_encoding_get(PyObject *self, void *context)\r
-{\r
- Py_RETURN_NONE;\r
-}\r
-\r
-PyDoc_STRVAR(textiobase_newlines_doc,\r
- "Line endings translated so far.\n"\r
- "\n"\r
- "Only line endings translated during reading are considered.\n"\r
- "\n"\r
- "Subclasses should override.\n"\r
- );\r
-\r
-static PyObject *\r
-textiobase_newlines_get(PyObject *self, void *context)\r
-{\r
- Py_RETURN_NONE;\r
-}\r
-\r
-PyDoc_STRVAR(textiobase_errors_doc,\r
- "The error setting of the decoder or encoder.\n"\r
- "\n"\r
- "Subclasses should override.\n"\r
- );\r
-\r
-static PyObject *\r
-textiobase_errors_get(PyObject *self, void *context)\r
-{\r
- Py_RETURN_NONE;\r
-}\r
-\r
-\r
-static PyMethodDef textiobase_methods[] = {\r
- {"detach", (PyCFunction)textiobase_detach, METH_NOARGS, textiobase_detach_doc},\r
- {"read", textiobase_read, METH_VARARGS, textiobase_read_doc},\r
- {"readline", textiobase_readline, METH_VARARGS, textiobase_readline_doc},\r
- {"write", textiobase_write, METH_VARARGS, textiobase_write_doc},\r
- {NULL, NULL}\r
-};\r
-\r
-static PyGetSetDef textiobase_getset[] = {\r
- {"encoding", (getter)textiobase_encoding_get, NULL, textiobase_encoding_doc},\r
- {"newlines", (getter)textiobase_newlines_get, NULL, textiobase_newlines_doc},\r
- {"errors", (getter)textiobase_errors_get, NULL, textiobase_errors_doc},\r
- {NULL}\r
-};\r
-\r
-PyTypeObject PyTextIOBase_Type = {\r
- PyVarObject_HEAD_INIT(NULL, 0)\r
- "_io._TextIOBase", /*tp_name*/\r
- 0, /*tp_basicsize*/\r
- 0, /*tp_itemsize*/\r
- 0, /*tp_dealloc*/\r
- 0, /*tp_print*/\r
- 0, /*tp_getattr*/\r
- 0, /*tp_setattr*/\r
- 0, /*tp_compare */\r
- 0, /*tp_repr*/\r
- 0, /*tp_as_number*/\r
- 0, /*tp_as_sequence*/\r
- 0, /*tp_as_mapping*/\r
- 0, /*tp_hash */\r
- 0, /*tp_call*/\r
- 0, /*tp_str*/\r
- 0, /*tp_getattro*/\r
- 0, /*tp_setattro*/\r
- 0, /*tp_as_buffer*/\r
- Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/\r
- textiobase_doc, /* tp_doc */\r
- 0, /* tp_traverse */\r
- 0, /* tp_clear */\r
- 0, /* tp_richcompare */\r
- 0, /* tp_weaklistoffset */\r
- 0, /* tp_iter */\r
- 0, /* tp_iternext */\r
- textiobase_methods, /* tp_methods */\r
- 0, /* tp_members */\r
- textiobase_getset, /* tp_getset */\r
- &PyIOBase_Type, /* tp_base */\r
- 0, /* tp_dict */\r
- 0, /* tp_descr_get */\r
- 0, /* tp_descr_set */\r
- 0, /* tp_dictoffset */\r
- 0, /* tp_init */\r
- 0, /* tp_alloc */\r
- 0, /* tp_new */\r
-};\r
-\r
-\r
-/* IncrementalNewlineDecoder */\r
-\r
-PyDoc_STRVAR(incrementalnewlinedecoder_doc,\r
- "Codec used when reading a file in universal newlines mode. It wraps\n"\r
- "another incremental decoder, translating \\r\\n and \\r into \\n. It also\n"\r
- "records the types of newlines encountered. When used with\n"\r
- "translate=False, it ensures that the newline sequence is returned in\n"\r
- "one piece. When used with decoder=None, it expects unicode strings as\n"\r
- "decode input and translates newlines without first invoking an external\n"\r
- "decoder.\n"\r
- );\r
-\r
-typedef struct {\r
- PyObject_HEAD\r
- PyObject *decoder;\r
- PyObject *errors;\r
- signed int pendingcr: 1;\r
- signed int translate: 1;\r
- unsigned int seennl: 3;\r
-} nldecoder_object;\r
-\r
-static int\r
-incrementalnewlinedecoder_init(nldecoder_object *self,\r
- PyObject *args, PyObject *kwds)\r
-{\r
- PyObject *decoder;\r
- int translate;\r
- PyObject *errors = NULL;\r
- char *kwlist[] = {"decoder", "translate", "errors", NULL};\r
-\r
- if (!PyArg_ParseTupleAndKeywords(args, kwds, "Oi|O:IncrementalNewlineDecoder",\r
- kwlist, &decoder, &translate, &errors))\r
- return -1;\r
-\r
- self->decoder = decoder;\r
- Py_INCREF(decoder);\r
-\r
- if (errors == NULL) {\r
- self->errors = PyUnicode_FromString("strict");\r
- if (self->errors == NULL)\r
- return -1;\r
- }\r
- else {\r
- Py_INCREF(errors);\r
- self->errors = errors;\r
- }\r
-\r
- self->translate = translate;\r
- self->seennl = 0;\r
- self->pendingcr = 0;\r
-\r
- return 0;\r
-}\r
-\r
-static void\r
-incrementalnewlinedecoder_dealloc(nldecoder_object *self)\r
-{\r
- Py_CLEAR(self->decoder);\r
- Py_CLEAR(self->errors);\r
- Py_TYPE(self)->tp_free((PyObject *)self);\r
-}\r
-\r
-static int\r
-check_decoded(PyObject *decoded)\r
-{\r
- if (decoded == NULL)\r
- return -1;\r
- if (!PyUnicode_Check(decoded)) {\r
- PyErr_Format(PyExc_TypeError,\r
- "decoder should return a string result, not '%.200s'",\r
- Py_TYPE(decoded)->tp_name);\r
- Py_DECREF(decoded);\r
- return -1;\r
- }\r
- return 0;\r
-}\r
-\r
-#define SEEN_CR 1\r
-#define SEEN_LF 2\r
-#define SEEN_CRLF 4\r
-#define SEEN_ALL (SEEN_CR | SEEN_LF | SEEN_CRLF)\r
-\r
-PyObject *\r
-_PyIncrementalNewlineDecoder_decode(PyObject *_self,\r
- PyObject *input, int final)\r
-{\r
- PyObject *output;\r
- Py_ssize_t output_len;\r
- nldecoder_object *self = (nldecoder_object *) _self;\r
-\r
- if (self->decoder == NULL) {\r
- PyErr_SetString(PyExc_ValueError,\r
- "IncrementalNewlineDecoder.__init__ not called");\r
- return NULL;\r
- }\r
-\r
- /* decode input (with the eventual \r from a previous pass) */\r
- if (self->decoder != Py_None) {\r
- output = PyObject_CallMethodObjArgs(self->decoder,\r
- _PyIO_str_decode, input, final ? Py_True : Py_False, NULL);\r
- }\r
- else {\r
- output = input;\r
- Py_INCREF(output);\r
- }\r
-\r
- if (check_decoded(output) < 0)\r
- return NULL;\r
-\r
- output_len = PyUnicode_GET_SIZE(output);\r
- if (self->pendingcr && (final || output_len > 0)) {\r
- Py_UNICODE *out;\r
- PyObject *modified = PyUnicode_FromUnicode(NULL, output_len + 1);\r
- if (modified == NULL)\r
- goto error;\r
- out = PyUnicode_AS_UNICODE(modified);\r
- out[0] = '\r';\r
- memcpy(out + 1, PyUnicode_AS_UNICODE(output),\r
- output_len * sizeof(Py_UNICODE));\r
- Py_DECREF(output);\r
- output = modified;\r
- self->pendingcr = 0;\r
- output_len++;\r
- }\r
-\r
- /* retain last \r even when not translating data:\r
- * then readline() is sure to get \r\n in one pass\r
- */\r
- if (!final) {\r
- if (output_len > 0\r
- && PyUnicode_AS_UNICODE(output)[output_len - 1] == '\r') {\r
-\r
- if (Py_REFCNT(output) == 1) {\r
- if (PyUnicode_Resize(&output, output_len - 1) < 0)\r
- goto error;\r
- }\r
- else {\r
- PyObject *modified = PyUnicode_FromUnicode(\r
- PyUnicode_AS_UNICODE(output),\r
- output_len - 1);\r
- if (modified == NULL)\r
- goto error;\r
- Py_DECREF(output);\r
- output = modified;\r
- }\r
- self->pendingcr = 1;\r
- }\r
- }\r
-\r
- /* Record which newlines are read and do newline translation if desired,\r
- all in one pass. */\r
- {\r
- Py_UNICODE *in_str;\r
- Py_ssize_t len;\r
- int seennl = self->seennl;\r
- int only_lf = 0;\r
-\r
- in_str = PyUnicode_AS_UNICODE(output);\r
- len = PyUnicode_GET_SIZE(output);\r
-\r
- if (len == 0)\r
- return output;\r
-\r
- /* If, up to now, newlines are consistently \n, do a quick check\r
- for the \r *byte* with the libc's optimized memchr.\r
- */\r
- if (seennl == SEEN_LF || seennl == 0) {\r
- only_lf = (memchr(in_str, '\r', len * sizeof(Py_UNICODE)) == NULL);\r
- }\r
-\r
- if (only_lf) {\r
- /* If not already seen, quick scan for a possible "\n" character.\r
- (there's nothing else to be done, even when in translation mode)\r
- */\r
- if (seennl == 0 &&\r
- memchr(in_str, '\n', len * sizeof(Py_UNICODE)) != NULL) {\r
- Py_UNICODE *s, *end;\r
- s = in_str;\r
- end = in_str + len;\r
- for (;;) {\r
- Py_UNICODE c;\r
- /* Fast loop for non-control characters */\r
- while (*s > '\n')\r
- s++;\r
- c = *s++;\r
- if (c == '\n') {\r
- seennl |= SEEN_LF;\r
- break;\r
- }\r
- if (s > end)\r
- break;\r
- }\r
- }\r
- /* Finished: we have scanned for newlines, and none of them\r
- need translating */\r
- }\r
- else if (!self->translate) {\r
- Py_UNICODE *s, *end;\r
- /* We have already seen all newline types, no need to scan again */\r
- if (seennl == SEEN_ALL)\r
- goto endscan;\r
- s = in_str;\r
- end = in_str + len;\r
- for (;;) {\r
- Py_UNICODE c;\r
- /* Fast loop for non-control characters */\r
- while (*s > '\r')\r
- s++;\r
- c = *s++;\r
- if (c == '\n')\r
- seennl |= SEEN_LF;\r
- else if (c == '\r') {\r
- if (*s == '\n') {\r
- seennl |= SEEN_CRLF;\r
- s++;\r
- }\r
- else\r
- seennl |= SEEN_CR;\r
- }\r
- if (s > end)\r
- break;\r
- if (seennl == SEEN_ALL)\r
- break;\r
- }\r
- endscan:\r
- ;\r
- }\r
- else {\r
- PyObject *translated = NULL;\r
- Py_UNICODE *out_str;\r
- Py_UNICODE *in, *out, *end;\r
- if (Py_REFCNT(output) != 1) {\r
- /* We could try to optimize this so that we only do a copy\r
- when there is something to translate. On the other hand,\r
- most decoders should only output non-shared strings, i.e.\r
- translation is done in place. */\r
- translated = PyUnicode_FromUnicode(NULL, len);\r
- if (translated == NULL)\r
- goto error;\r
- assert(Py_REFCNT(translated) == 1);\r
- memcpy(PyUnicode_AS_UNICODE(translated),\r
- PyUnicode_AS_UNICODE(output),\r
- len * sizeof(Py_UNICODE));\r
- }\r
- else {\r
- translated = output;\r
- }\r
- out_str = PyUnicode_AS_UNICODE(translated);\r
- in = in_str;\r
- out = out_str;\r
- end = in_str + len;\r
- for (;;) {\r
- Py_UNICODE c;\r
- /* Fast loop for non-control characters */\r
- while ((c = *in++) > '\r')\r
- *out++ = c;\r
- if (c == '\n') {\r
- *out++ = c;\r
- seennl |= SEEN_LF;\r
- continue;\r
- }\r
- if (c == '\r') {\r
- if (*in == '\n') {\r
- in++;\r
- seennl |= SEEN_CRLF;\r
- }\r
- else\r
- seennl |= SEEN_CR;\r
- *out++ = '\n';\r
- continue;\r
- }\r
- if (in > end)\r
- break;\r
- *out++ = c;\r
- }\r
- if (translated != output) {\r
- Py_DECREF(output);\r
- output = translated;\r
- }\r
- if (out - out_str != len) {\r
- if (PyUnicode_Resize(&output, out - out_str) < 0)\r
- goto error;\r
- }\r
- }\r
- self->seennl |= seennl;\r
- }\r
-\r
- return output;\r
-\r
- error:\r
- Py_DECREF(output);\r
- return NULL;\r
-}\r
-\r
-static PyObject *\r
-incrementalnewlinedecoder_decode(nldecoder_object *self,\r
- PyObject *args, PyObject *kwds)\r
-{\r
- char *kwlist[] = {"input", "final", NULL};\r
- PyObject *input;\r
- int final = 0;\r
-\r
- if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|i:IncrementalNewlineDecoder",\r
- kwlist, &input, &final))\r
- return NULL;\r
- return _PyIncrementalNewlineDecoder_decode((PyObject *) self, input, final);\r
-}\r
-\r
-static PyObject *\r
-incrementalnewlinedecoder_getstate(nldecoder_object *self, PyObject *args)\r
-{\r
- PyObject *buffer;\r
- unsigned PY_LONG_LONG flag;\r
-\r
- if (self->decoder != Py_None) {\r
- PyObject *state = PyObject_CallMethodObjArgs(self->decoder,\r
- _PyIO_str_getstate, NULL);\r
- if (state == NULL)\r
- return NULL;\r
- if (!PyArg_Parse(state, "(OK)", &buffer, &flag)) {\r
- Py_DECREF(state);\r
- return NULL;\r
- }\r
- Py_INCREF(buffer);\r
- Py_DECREF(state);\r
- }\r
- else {\r
- buffer = PyBytes_FromString("");\r
- flag = 0;\r
- }\r
- flag <<= 1;\r
- if (self->pendingcr)\r
- flag |= 1;\r
- return Py_BuildValue("NK", buffer, flag);\r
-}\r
-\r
-static PyObject *\r
-incrementalnewlinedecoder_setstate(nldecoder_object *self, PyObject *state)\r
-{\r
- PyObject *buffer;\r
- unsigned PY_LONG_LONG flag;\r
-\r
- if (!PyArg_Parse(state, "(OK)", &buffer, &flag))\r
- return NULL;\r
-\r
- self->pendingcr = (int) flag & 1;\r
- flag >>= 1;\r
-\r
- if (self->decoder != Py_None)\r
- return PyObject_CallMethod(self->decoder,\r
- "setstate", "((OK))", buffer, flag);\r
- else\r
- Py_RETURN_NONE;\r
-}\r
-\r
-static PyObject *\r
-incrementalnewlinedecoder_reset(nldecoder_object *self, PyObject *args)\r
-{\r
- self->seennl = 0;\r
- self->pendingcr = 0;\r
- if (self->decoder != Py_None)\r
- return PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_reset, NULL);\r
- else\r
- Py_RETURN_NONE;\r
-}\r
-\r
-static PyObject *\r
-incrementalnewlinedecoder_newlines_get(nldecoder_object *self, void *context)\r
-{\r
- switch (self->seennl) {\r
- case SEEN_CR:\r
- return PyUnicode_FromString("\r");\r
- case SEEN_LF:\r
- return PyUnicode_FromString("\n");\r
- case SEEN_CRLF:\r
- return PyUnicode_FromString("\r\n");\r
- case SEEN_CR | SEEN_LF:\r
- return Py_BuildValue("ss", "\r", "\n");\r
- case SEEN_CR | SEEN_CRLF:\r
- return Py_BuildValue("ss", "\r", "\r\n");\r
- case SEEN_LF | SEEN_CRLF:\r
- return Py_BuildValue("ss", "\n", "\r\n");\r
- case SEEN_CR | SEEN_LF | SEEN_CRLF:\r
- return Py_BuildValue("sss", "\r", "\n", "\r\n");\r
- default:\r
- Py_RETURN_NONE;\r
- }\r
-\r
-}\r
-\r
-\r
-static PyMethodDef incrementalnewlinedecoder_methods[] = {\r
- {"decode", (PyCFunction)incrementalnewlinedecoder_decode, METH_VARARGS|METH_KEYWORDS},\r
- {"getstate", (PyCFunction)incrementalnewlinedecoder_getstate, METH_NOARGS},\r
- {"setstate", (PyCFunction)incrementalnewlinedecoder_setstate, METH_O},\r
- {"reset", (PyCFunction)incrementalnewlinedecoder_reset, METH_NOARGS},\r
- {NULL}\r
-};\r
-\r
-static PyGetSetDef incrementalnewlinedecoder_getset[] = {\r
- {"newlines", (getter)incrementalnewlinedecoder_newlines_get, NULL, NULL},\r
- {NULL}\r
-};\r
-\r
-PyTypeObject PyIncrementalNewlineDecoder_Type = {\r
- PyVarObject_HEAD_INIT(NULL, 0)\r
- "_io.IncrementalNewlineDecoder", /*tp_name*/\r
- sizeof(nldecoder_object), /*tp_basicsize*/\r
- 0, /*tp_itemsize*/\r
- (destructor)incrementalnewlinedecoder_dealloc, /*tp_dealloc*/\r
- 0, /*tp_print*/\r
- 0, /*tp_getattr*/\r
- 0, /*tp_setattr*/\r
- 0, /*tp_compare */\r
- 0, /*tp_repr*/\r
- 0, /*tp_as_number*/\r
- 0, /*tp_as_sequence*/\r
- 0, /*tp_as_mapping*/\r
- 0, /*tp_hash */\r
- 0, /*tp_call*/\r
- 0, /*tp_str*/\r
- 0, /*tp_getattro*/\r
- 0, /*tp_setattro*/\r
- 0, /*tp_as_buffer*/\r
- Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/\r
- incrementalnewlinedecoder_doc, /* tp_doc */\r
- 0, /* tp_traverse */\r
- 0, /* tp_clear */\r
- 0, /* tp_richcompare */\r
- 0, /*tp_weaklistoffset*/\r
- 0, /* tp_iter */\r
- 0, /* tp_iternext */\r
- incrementalnewlinedecoder_methods, /* tp_methods */\r
- 0, /* tp_members */\r
- incrementalnewlinedecoder_getset, /* tp_getset */\r
- 0, /* tp_base */\r
- 0, /* tp_dict */\r
- 0, /* tp_descr_get */\r
- 0, /* tp_descr_set */\r
- 0, /* tp_dictoffset */\r
- (initproc)incrementalnewlinedecoder_init, /* tp_init */\r
- 0, /* tp_alloc */\r
- PyType_GenericNew, /* tp_new */\r
-};\r
-\r
-\r
-/* TextIOWrapper */\r
-\r
-PyDoc_STRVAR(textiowrapper_doc,\r
- "Character and line based layer over a BufferedIOBase object, buffer.\n"\r
- "\n"\r
- "encoding gives the name of the encoding that the stream will be\n"\r
- "decoded or encoded with. It defaults to locale.getpreferredencoding.\n"\r
- "\n"\r
- "errors determines the strictness of encoding and decoding (see the\n"\r
- "codecs.register) and defaults to \"strict\".\n"\r
- "\n"\r
- "newline controls how line endings are handled. It can be None, '',\n"\r
- "'\\n', '\\r', and '\\r\\n'. It works as follows:\n"\r
- "\n"\r
- "* On input, if newline is None, universal newlines mode is\n"\r
- " enabled. Lines in the input can end in '\\n', '\\r', or '\\r\\n', and\n"\r
- " these are translated into '\\n' before being returned to the\n"\r
- " caller. If it is '', universal newline mode is enabled, but line\n"\r
- " endings are returned to the caller untranslated. If it has any of\n"\r
- " the other legal values, input lines are only terminated by the given\n"\r
- " string, and the line ending is returned to the caller untranslated.\n"\r
- "\n"\r
- "* On output, if newline is None, any '\\n' characters written are\n"\r
- " translated to the system default line separator, os.linesep. If\n"\r
- " newline is '', no translation takes place. If newline is any of the\n"\r
- " other legal values, any '\\n' characters written are translated to\n"\r
- " the given string.\n"\r
- "\n"\r
- "If line_buffering is True, a call to flush is implied when a call to\n"\r
- "write contains a newline character."\r
- );\r
-\r
-typedef PyObject *\r
- (*encodefunc_t)(PyObject *, PyObject *);\r
-\r
-typedef struct\r
-{\r
- PyObject_HEAD\r
- int ok; /* initialized? */\r
- int detached;\r
- Py_ssize_t chunk_size;\r
- PyObject *buffer;\r
- PyObject *encoding;\r
- PyObject *encoder;\r
- PyObject *decoder;\r
- PyObject *readnl;\r
- PyObject *errors;\r
- const char *writenl; /* utf-8 encoded, NULL stands for \n */\r
- char line_buffering;\r
- char readuniversal;\r
- char readtranslate;\r
- char writetranslate;\r
- char seekable;\r
- char telling;\r
- /* Specialized encoding func (see below) */\r
- encodefunc_t encodefunc;\r
- /* Whether or not it's the start of the stream */\r
- char encoding_start_of_stream;\r
-\r
- /* Reads and writes are internally buffered in order to speed things up.\r
- However, any read will first flush the write buffer if itsn't empty.\r
-\r
- Please also note that text to be written is first encoded before being\r
- buffered. This is necessary so that encoding errors are immediately\r
- reported to the caller, but it unfortunately means that the\r
- IncrementalEncoder (whose encode() method is always written in Python)\r
- becomes a bottleneck for small writes.\r
- */\r
- PyObject *decoded_chars; /* buffer for text returned from decoder */\r
- Py_ssize_t decoded_chars_used; /* offset into _decoded_chars for read() */\r
- PyObject *pending_bytes; /* list of bytes objects waiting to be\r
- written, or NULL */\r
- Py_ssize_t pending_bytes_count;\r
- PyObject *snapshot;\r
- /* snapshot is either None, or a tuple (dec_flags, next_input) where\r
- * dec_flags is the second (integer) item of the decoder state and\r
- * next_input is the chunk of input bytes that comes next after the\r
- * snapshot point. We use this to reconstruct decoder states in tell().\r
- */\r
-\r
- /* Cache raw object if it's a FileIO object */\r
- PyObject *raw;\r
-\r
- PyObject *weakreflist;\r
- PyObject *dict;\r
-} textio;\r
-\r
-\r
-/* A couple of specialized cases in order to bypass the slow incremental\r
- encoding methods for the most popular encodings. */\r
-\r
-static PyObject *\r
-ascii_encode(textio *self, PyObject *text)\r
-{\r
- return PyUnicode_EncodeASCII(PyUnicode_AS_UNICODE(text),\r
- PyUnicode_GET_SIZE(text),\r
- PyBytes_AS_STRING(self->errors));\r
-}\r
-\r
-static PyObject *\r
-utf16be_encode(textio *self, PyObject *text)\r
-{\r
- return PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(text),\r
- PyUnicode_GET_SIZE(text),\r
- PyBytes_AS_STRING(self->errors), 1);\r
-}\r
-\r
-static PyObject *\r
-utf16le_encode(textio *self, PyObject *text)\r
-{\r
- return PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(text),\r
- PyUnicode_GET_SIZE(text),\r
- PyBytes_AS_STRING(self->errors), -1);\r
-}\r
-\r
-static PyObject *\r
-utf16_encode(textio *self, PyObject *text)\r
-{\r
- if (!self->encoding_start_of_stream) {\r
- /* Skip the BOM and use native byte ordering */\r
-#if defined(WORDS_BIGENDIAN)\r
- return utf16be_encode(self, text);\r
-#else\r
- return utf16le_encode(self, text);\r
-#endif\r
- }\r
- return PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(text),\r
- PyUnicode_GET_SIZE(text),\r
- PyBytes_AS_STRING(self->errors), 0);\r
-}\r
-\r
-static PyObject *\r
-utf32be_encode(textio *self, PyObject *text)\r
-{\r
- return PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(text),\r
- PyUnicode_GET_SIZE(text),\r
- PyBytes_AS_STRING(self->errors), 1);\r
-}\r
-\r
-static PyObject *\r
-utf32le_encode(textio *self, PyObject *text)\r
-{\r
- return PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(text),\r
- PyUnicode_GET_SIZE(text),\r
- PyBytes_AS_STRING(self->errors), -1);\r
-}\r
-\r
-static PyObject *\r
-utf32_encode(textio *self, PyObject *text)\r
-{\r
- if (!self->encoding_start_of_stream) {\r
- /* Skip the BOM and use native byte ordering */\r
-#if defined(WORDS_BIGENDIAN)\r
- return utf32be_encode(self, text);\r
-#else\r
- return utf32le_encode(self, text);\r
-#endif\r
- }\r
- return PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(text),\r
- PyUnicode_GET_SIZE(text),\r
- PyBytes_AS_STRING(self->errors), 0);\r
-}\r
-\r
-static PyObject *\r
-utf8_encode(textio *self, PyObject *text)\r
-{\r
- return PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(text),\r
- PyUnicode_GET_SIZE(text),\r
- PyBytes_AS_STRING(self->errors));\r
-}\r
-\r
-static PyObject *\r
-latin1_encode(textio *self, PyObject *text)\r
-{\r
- return PyUnicode_EncodeLatin1(PyUnicode_AS_UNICODE(text),\r
- PyUnicode_GET_SIZE(text),\r
- PyBytes_AS_STRING(self->errors));\r
-}\r
-\r
-/* Map normalized encoding names onto the specialized encoding funcs */\r
-\r
-typedef struct {\r
- const char *name;\r
- encodefunc_t encodefunc;\r
-} encodefuncentry;\r
-\r
-static encodefuncentry encodefuncs[] = {\r
- {"ascii", (encodefunc_t) ascii_encode},\r
- {"iso8859-1", (encodefunc_t) latin1_encode},\r
- {"utf-8", (encodefunc_t) utf8_encode},\r
- {"utf-16-be", (encodefunc_t) utf16be_encode},\r
- {"utf-16-le", (encodefunc_t) utf16le_encode},\r
- {"utf-16", (encodefunc_t) utf16_encode},\r
- {"utf-32-be", (encodefunc_t) utf32be_encode},\r
- {"utf-32-le", (encodefunc_t) utf32le_encode},\r
- {"utf-32", (encodefunc_t) utf32_encode},\r
- {NULL, NULL}\r
-};\r
-\r
-\r
-static int\r
-textiowrapper_init(textio *self, PyObject *args, PyObject *kwds)\r
-{\r
- char *kwlist[] = {"buffer", "encoding", "errors",\r
- "newline", "line_buffering",\r
- NULL};\r
- PyObject *buffer, *raw;\r
- char *encoding = NULL;\r
- char *errors = NULL;\r
- char *newline = NULL;\r
- int line_buffering = 0;\r
-\r
- PyObject *res;\r
- int r;\r
-\r
- self->ok = 0;\r
- self->detached = 0;\r
- if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|zzzi:fileio",\r
- kwlist, &buffer, &encoding, &errors,\r
- &newline, &line_buffering))\r
- return -1;\r
-\r
- if (newline && newline[0] != '\0'\r
- && !(newline[0] == '\n' && newline[1] == '\0')\r
- && !(newline[0] == '\r' && newline[1] == '\0')\r
- && !(newline[0] == '\r' && newline[1] == '\n' && newline[2] == '\0')) {\r
- PyErr_Format(PyExc_ValueError,\r
- "illegal newline value: %s", newline);\r
- return -1;\r
- }\r
-\r
- Py_CLEAR(self->buffer);\r
- Py_CLEAR(self->encoding);\r
- Py_CLEAR(self->encoder);\r
- Py_CLEAR(self->decoder);\r
- Py_CLEAR(self->readnl);\r
- Py_CLEAR(self->decoded_chars);\r
- Py_CLEAR(self->pending_bytes);\r
- Py_CLEAR(self->snapshot);\r
- Py_CLEAR(self->errors);\r
- Py_CLEAR(self->raw);\r
- self->decoded_chars_used = 0;\r
- self->pending_bytes_count = 0;\r
- self->encodefunc = NULL;\r
- self->writenl = NULL;\r
-\r
- if (encoding == NULL && self->encoding == NULL) {\r
- if (_PyIO_locale_module == NULL) {\r
- _PyIO_locale_module = PyImport_ImportModule("locale");\r
- if (_PyIO_locale_module == NULL)\r
- goto catch_ImportError;\r
- else\r
- goto use_locale;\r
- }\r
- else {\r
- use_locale:\r
- self->encoding = PyObject_CallMethod(\r
- _PyIO_locale_module, "getpreferredencoding", NULL);\r
- if (self->encoding == NULL) {\r
- catch_ImportError:\r
- /*\r
- Importing locale can raise a ImportError because of\r
- _functools, and locale.getpreferredencoding can raise a\r
- ImportError if _locale is not available. These will happen\r
- during module building.\r
- */\r
- if (PyErr_ExceptionMatches(PyExc_ImportError)) {\r
- PyErr_Clear();\r
- self->encoding = PyString_FromString("ascii");\r
- }\r
- else\r
- goto error;\r
- }\r
- else if (!PyString_Check(self->encoding))\r
- Py_CLEAR(self->encoding);\r
- }\r
- }\r
- if (self->encoding != NULL)\r
- encoding = PyString_AsString(self->encoding);\r
- else if (encoding != NULL) {\r
- self->encoding = PyString_FromString(encoding);\r
- if (self->encoding == NULL)\r
- goto error;\r
- }\r
- else {\r
- PyErr_SetString(PyExc_IOError,\r
- "could not determine default encoding");\r
- }\r
-\r
- if (errors == NULL)\r
- errors = "strict";\r
- self->errors = PyBytes_FromString(errors);\r
- if (self->errors == NULL)\r
- goto error;\r
-\r
- self->chunk_size = 8192;\r
- self->readuniversal = (newline == NULL || newline[0] == '\0');\r
- self->line_buffering = line_buffering;\r
- self->readtranslate = (newline == NULL);\r
- if (newline) {\r
- self->readnl = PyString_FromString(newline);\r
- if (self->readnl == NULL)\r
- return -1;\r
- }\r
- self->writetranslate = (newline == NULL || newline[0] != '\0');\r
- if (!self->readuniversal && self->writetranslate) {\r
- self->writenl = PyString_AsString(self->readnl);\r
- if (!strcmp(self->writenl, "\n"))\r
- self->writenl = NULL;\r
- }\r
-#ifdef MS_WINDOWS\r
- else\r
- self->writenl = "\r\n";\r
-#endif\r
-\r
- /* Build the decoder object */\r
- res = PyObject_CallMethod(buffer, "readable", NULL);\r
- if (res == NULL)\r
- goto error;\r
- r = PyObject_IsTrue(res);\r
- Py_DECREF(res);\r
- if (r == -1)\r
- goto error;\r
- if (r == 1) {\r
- self->decoder = PyCodec_IncrementalDecoder(\r
- encoding, errors);\r
- if (self->decoder == NULL)\r
- goto error;\r
-\r
- if (self->readuniversal) {\r
- PyObject *incrementalDecoder = PyObject_CallFunction(\r
- (PyObject *)&PyIncrementalNewlineDecoder_Type,\r
- "Oi", self->decoder, (int)self->readtranslate);\r
- if (incrementalDecoder == NULL)\r
- goto error;\r
- Py_CLEAR(self->decoder);\r
- self->decoder = incrementalDecoder;\r
- }\r
- }\r
-\r
- /* Build the encoder object */\r
- res = PyObject_CallMethod(buffer, "writable", NULL);\r
- if (res == NULL)\r
- goto error;\r
- r = PyObject_IsTrue(res);\r
- Py_DECREF(res);\r
- if (r == -1)\r
- goto error;\r
- if (r == 1) {\r
- PyObject *ci;\r
- self->encoder = PyCodec_IncrementalEncoder(\r
- encoding, errors);\r
- if (self->encoder == NULL)\r
- goto error;\r
- /* Get the normalized named of the codec */\r
- ci = _PyCodec_Lookup(encoding);\r
- if (ci == NULL)\r
- goto error;\r
- res = PyObject_GetAttrString(ci, "name");\r
- Py_DECREF(ci);\r
- if (res == NULL) {\r
- if (PyErr_ExceptionMatches(PyExc_AttributeError))\r
- PyErr_Clear();\r
- else\r
- goto error;\r
- }\r
- else if (PyString_Check(res)) {\r
- encodefuncentry *e = encodefuncs;\r
- while (e->name != NULL) {\r
- if (!strcmp(PyString_AS_STRING(res), e->name)) {\r
- self->encodefunc = e->encodefunc;\r
- break;\r
- }\r
- e++;\r
- }\r
- }\r
- Py_XDECREF(res);\r
- }\r
-\r
- self->buffer = buffer;\r
- Py_INCREF(buffer);\r
-\r
- if (Py_TYPE(buffer) == &PyBufferedReader_Type ||\r
- Py_TYPE(buffer) == &PyBufferedWriter_Type ||\r
- Py_TYPE(buffer) == &PyBufferedRandom_Type) {\r
- raw = PyObject_GetAttrString(buffer, "raw");\r
- /* Cache the raw FileIO object to speed up 'closed' checks */\r
- if (raw == NULL) {\r
- if (PyErr_ExceptionMatches(PyExc_AttributeError))\r
- PyErr_Clear();\r
- else\r
- goto error;\r
- }\r
- else if (Py_TYPE(raw) == &PyFileIO_Type)\r
- self->raw = raw;\r
- else\r
- Py_DECREF(raw);\r
- }\r
-\r
- res = PyObject_CallMethod(buffer, "seekable", NULL);\r
- if (res == NULL)\r
- goto error;\r
- r = PyObject_IsTrue(res);\r
- Py_DECREF(res);\r
- if (r < 0)\r
- goto error;\r
- self->seekable = self->telling = r;\r
-\r
- self->encoding_start_of_stream = 0;\r
- if (self->seekable && self->encoder) {\r
- PyObject *cookieObj;\r
- int cmp;\r
-\r
- self->encoding_start_of_stream = 1;\r
-\r
- cookieObj = PyObject_CallMethodObjArgs(buffer, _PyIO_str_tell, NULL);\r
- if (cookieObj == NULL)\r
- goto error;\r
-\r
- cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ);\r
- Py_DECREF(cookieObj);\r
- if (cmp < 0) {\r
- goto error;\r
- }\r
-\r
- if (cmp == 0) {\r
- self->encoding_start_of_stream = 0;\r
- res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_setstate,\r
- _PyIO_zero, NULL);\r
- if (res == NULL)\r
- goto error;\r
- Py_DECREF(res);\r
- }\r
- }\r
-\r
- self->ok = 1;\r
- return 0;\r
-\r
- error:\r
- return -1;\r
-}\r
-\r
-static int\r
-_textiowrapper_clear(textio *self)\r
-{\r
- if (self->ok && _PyIOBase_finalize((PyObject *) self) < 0)\r
- return -1;\r
- self->ok = 0;\r
- Py_CLEAR(self->buffer);\r
- Py_CLEAR(self->encoding);\r
- Py_CLEAR(self->encoder);\r
- Py_CLEAR(self->decoder);\r
- Py_CLEAR(self->readnl);\r
- Py_CLEAR(self->decoded_chars);\r
- Py_CLEAR(self->pending_bytes);\r
- Py_CLEAR(self->snapshot);\r
- Py_CLEAR(self->errors);\r
- Py_CLEAR(self->raw);\r
- return 0;\r
-}\r
-\r
-static void\r
-textiowrapper_dealloc(textio *self)\r
-{\r
- if (_textiowrapper_clear(self) < 0)\r
- return;\r
- _PyObject_GC_UNTRACK(self);\r
- if (self->weakreflist != NULL)\r
- PyObject_ClearWeakRefs((PyObject *)self);\r
- Py_CLEAR(self->dict);\r
- Py_TYPE(self)->tp_free((PyObject *)self);\r
-}\r
-\r
-static int\r
-textiowrapper_traverse(textio *self, visitproc visit, void *arg)\r
-{\r
- Py_VISIT(self->buffer);\r
- Py_VISIT(self->encoding);\r
- Py_VISIT(self->encoder);\r
- Py_VISIT(self->decoder);\r
- Py_VISIT(self->readnl);\r
- Py_VISIT(self->decoded_chars);\r
- Py_VISIT(self->pending_bytes);\r
- Py_VISIT(self->snapshot);\r
- Py_VISIT(self->errors);\r
- Py_VISIT(self->raw);\r
-\r
- Py_VISIT(self->dict);\r
- return 0;\r
-}\r
-\r
-static int\r
-textiowrapper_clear(textio *self)\r
-{\r
- if (_textiowrapper_clear(self) < 0)\r
- return -1;\r
- Py_CLEAR(self->dict);\r
- return 0;\r
-}\r
-\r
-static PyObject *\r
-textiowrapper_closed_get(textio *self, void *context);\r
-\r
-/* This macro takes some shortcuts to make the common case faster. */\r
-#define CHECK_CLOSED(self) \\r
- do { \\r
- int r; \\r
- PyObject *_res; \\r
- if (Py_TYPE(self) == &PyTextIOWrapper_Type) { \\r
- if (self->raw != NULL) \\r
- r = _PyFileIO_closed(self->raw); \\r
- else { \\r
- _res = textiowrapper_closed_get(self, NULL); \\r
- if (_res == NULL) \\r
- return NULL; \\r
- r = PyObject_IsTrue(_res); \\r
- Py_DECREF(_res); \\r
- if (r < 0) \\r
- return NULL; \\r
- } \\r
- if (r > 0) { \\r
- PyErr_SetString(PyExc_ValueError, \\r
- "I/O operation on closed file."); \\r
- return NULL; \\r
- } \\r
- } \\r
- else if (_PyIOBase_check_closed((PyObject *)self, Py_True) == NULL) \\r
- return NULL; \\r
- } while (0)\r
-\r
-#define CHECK_INITIALIZED(self) \\r
- if (self->ok <= 0) { \\r
- PyErr_SetString(PyExc_ValueError, \\r
- "I/O operation on uninitialized object"); \\r
- return NULL; \\r
- }\r
-\r
-#define CHECK_ATTACHED(self) \\r
- CHECK_INITIALIZED(self); \\r
- if (self->detached) { \\r
- PyErr_SetString(PyExc_ValueError, \\r
- "underlying buffer has been detached"); \\r
- return NULL; \\r
- }\r
-\r
-#define CHECK_ATTACHED_INT(self) \\r
- if (self->ok <= 0) { \\r
- PyErr_SetString(PyExc_ValueError, \\r
- "I/O operation on uninitialized object"); \\r
- return -1; \\r
- } else if (self->detached) { \\r
- PyErr_SetString(PyExc_ValueError, \\r
- "underlying buffer has been detached"); \\r
- return -1; \\r
- }\r
-\r
-\r
-static PyObject *\r
-textiowrapper_detach(textio *self)\r
-{\r
- PyObject *buffer, *res;\r
- CHECK_ATTACHED(self);\r
- res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL);\r
- if (res == NULL)\r
- return NULL;\r
- Py_DECREF(res);\r
- buffer = self->buffer;\r
- self->buffer = NULL;\r
- self->detached = 1;\r
- return buffer;\r
-}\r
-\r
-Py_LOCAL_INLINE(const Py_UNICODE *)\r
-findchar(const Py_UNICODE *s, Py_ssize_t size, Py_UNICODE ch)\r
-{\r
- /* like wcschr, but doesn't stop at NULL characters */\r
- while (size-- > 0) {\r
- if (*s == ch)\r
- return s;\r
- s++;\r
- }\r
- return NULL;\r
-}\r
-\r
-/* Flush the internal write buffer. This doesn't explicitly flush the\r
- underlying buffered object, though. */\r
-static int\r
-_textiowrapper_writeflush(textio *self)\r
-{\r
- PyObject *pending, *b, *ret;\r
-\r
- if (self->pending_bytes == NULL)\r
- return 0;\r
-\r
- pending = self->pending_bytes;\r
- Py_INCREF(pending);\r
- self->pending_bytes_count = 0;\r
- Py_CLEAR(self->pending_bytes);\r
-\r
- b = _PyBytes_Join(_PyIO_empty_bytes, pending);\r
- Py_DECREF(pending);\r
- if (b == NULL)\r
- return -1;\r
- ret = NULL;\r
- do {\r
- ret = PyObject_CallMethodObjArgs(self->buffer,\r
- _PyIO_str_write, b, NULL);\r
- } while (ret == NULL && _PyIO_trap_eintr());\r
- Py_DECREF(b);\r
- if (ret == NULL)\r
- return -1;\r
- Py_DECREF(ret);\r
- return 0;\r
-}\r
-\r
-static PyObject *\r
-textiowrapper_write(textio *self, PyObject *args)\r
-{\r
- PyObject *ret;\r
- PyObject *text; /* owned reference */\r
- PyObject *b;\r
- Py_ssize_t textlen;\r
- int haslf = 0;\r
- int needflush = 0;\r
-\r
- CHECK_ATTACHED(self);\r
-\r
- if (!PyArg_ParseTuple(args, "U:write", &text)) {\r
- return NULL;\r
- }\r
-\r
- CHECK_CLOSED(self);\r
-\r
- if (self->encoder == NULL) {\r
- PyErr_SetString(PyExc_IOError, "not writable");\r
- return NULL;\r
- }\r
-\r
- Py_INCREF(text);\r
-\r
- textlen = PyUnicode_GetSize(text);\r
-\r
- if ((self->writetranslate && self->writenl != NULL) || self->line_buffering)\r
- if (findchar(PyUnicode_AS_UNICODE(text),\r
- PyUnicode_GET_SIZE(text), '\n'))\r
- haslf = 1;\r
-\r
- if (haslf && self->writetranslate && self->writenl != NULL) {\r
- PyObject *newtext = PyObject_CallMethod(\r
- text, "replace", "ss", "\n", self->writenl);\r
- Py_DECREF(text);\r
- if (newtext == NULL)\r
- return NULL;\r
- text = newtext;\r
- }\r
-\r
- if (self->line_buffering &&\r
- (haslf ||\r
- findchar(PyUnicode_AS_UNICODE(text),\r
- PyUnicode_GET_SIZE(text), '\r')))\r
- needflush = 1;\r
-\r
- /* XXX What if we were just reading? */\r
- if (self->encodefunc != NULL) {\r
- b = (*self->encodefunc)((PyObject *) self, text);\r
- self->encoding_start_of_stream = 0;\r
- }\r
- else\r
- b = PyObject_CallMethodObjArgs(self->encoder,\r
- _PyIO_str_encode, text, NULL);\r
- Py_DECREF(text);\r
- if (b == NULL)\r
- return NULL;\r
-\r
- if (self->pending_bytes == NULL) {\r
- self->pending_bytes = PyList_New(0);\r
- if (self->pending_bytes == NULL) {\r
- Py_DECREF(b);\r
- return NULL;\r
- }\r
- self->pending_bytes_count = 0;\r
- }\r
- if (PyList_Append(self->pending_bytes, b) < 0) {\r
- Py_DECREF(b);\r
- return NULL;\r
- }\r
- self->pending_bytes_count += PyBytes_GET_SIZE(b);\r
- Py_DECREF(b);\r
- if (self->pending_bytes_count > self->chunk_size || needflush) {\r
- if (_textiowrapper_writeflush(self) < 0)\r
- return NULL;\r
- }\r
-\r
- if (needflush) {\r
- ret = PyObject_CallMethodObjArgs(self->buffer, _PyIO_str_flush, NULL);\r
- if (ret == NULL)\r
- return NULL;\r
- Py_DECREF(ret);\r
- }\r
-\r
- Py_CLEAR(self->snapshot);\r
-\r
- if (self->decoder) {\r
- ret = PyObject_CallMethod(self->decoder, "reset", NULL);\r
- if (ret == NULL)\r
- return NULL;\r
- Py_DECREF(ret);\r
- }\r
-\r
- return PyLong_FromSsize_t(textlen);\r
-}\r
-\r
-/* Steal a reference to chars and store it in the decoded_char buffer;\r
- */\r
-static void\r
-textiowrapper_set_decoded_chars(textio *self, PyObject *chars)\r
-{\r
- Py_CLEAR(self->decoded_chars);\r
- self->decoded_chars = chars;\r
- self->decoded_chars_used = 0;\r
-}\r
-\r
-static PyObject *\r
-textiowrapper_get_decoded_chars(textio *self, Py_ssize_t n)\r
-{\r
- PyObject *chars;\r
- Py_ssize_t avail;\r
-\r
- if (self->decoded_chars == NULL)\r
- return PyUnicode_FromStringAndSize(NULL, 0);\r
-\r
- avail = (PyUnicode_GET_SIZE(self->decoded_chars)\r
- - self->decoded_chars_used);\r
-\r
- assert(avail >= 0);\r
-\r
- if (n < 0 || n > avail)\r
- n = avail;\r
-\r
- if (self->decoded_chars_used > 0 || n < avail) {\r
- chars = PyUnicode_FromUnicode(\r
- PyUnicode_AS_UNICODE(self->decoded_chars)\r
- + self->decoded_chars_used, n);\r
- if (chars == NULL)\r
- return NULL;\r
- }\r
- else {\r
- chars = self->decoded_chars;\r
- Py_INCREF(chars);\r
- }\r
-\r
- self->decoded_chars_used += n;\r
- return chars;\r
-}\r
-\r
-/* Read and decode the next chunk of data from the BufferedReader.\r
- */\r
-static int\r
-textiowrapper_read_chunk(textio *self)\r
-{\r
- PyObject *dec_buffer = NULL;\r
- PyObject *dec_flags = NULL;\r
- PyObject *input_chunk = NULL;\r
- PyObject *decoded_chars, *chunk_size;\r
- int eof;\r
-\r
- /* The return value is True unless EOF was reached. The decoded string is\r
- * placed in self._decoded_chars (replacing its previous value). The\r
- * entire input chunk is sent to the decoder, though some of it may remain\r
- * buffered in the decoder, yet to be converted.\r
- */\r
-\r
- if (self->decoder == NULL) {\r
- PyErr_SetString(PyExc_IOError, "not readable");\r
- return -1;\r
- }\r
-\r
- if (self->telling) {\r
- /* To prepare for tell(), we need to snapshot a point in the file\r
- * where the decoder's input buffer is empty.\r
- */\r
-\r
- PyObject *state = PyObject_CallMethodObjArgs(self->decoder,\r
- _PyIO_str_getstate, NULL);\r
- if (state == NULL)\r
- return -1;\r
- /* Given this, we know there was a valid snapshot point\r
- * len(dec_buffer) bytes ago with decoder state (b'', dec_flags).\r
- */\r
- if (PyArg_Parse(state, "(OO)", &dec_buffer, &dec_flags) < 0) {\r
- Py_DECREF(state);\r
- return -1;\r
- }\r
- Py_INCREF(dec_buffer);\r
- Py_INCREF(dec_flags);\r
- Py_DECREF(state);\r
- }\r
-\r
- /* Read a chunk, decode it, and put the result in self._decoded_chars. */\r
- chunk_size = PyLong_FromSsize_t(self->chunk_size);\r
- if (chunk_size == NULL)\r
- goto fail;\r
- input_chunk = PyObject_CallMethodObjArgs(self->buffer,\r
- _PyIO_str_read1, chunk_size, NULL);\r
- Py_DECREF(chunk_size);\r
- if (input_chunk == NULL)\r
- goto fail;\r
- if (!PyBytes_Check(input_chunk)) {\r
- PyErr_Format(PyExc_TypeError,\r
- "underlying read1() should have returned a bytes object, "\r
- "not '%.200s'", Py_TYPE(input_chunk)->tp_name);\r
- goto fail;\r
- }\r
-\r
- eof = (PyBytes_Size(input_chunk) == 0);\r
-\r
- if (Py_TYPE(self->decoder) == &PyIncrementalNewlineDecoder_Type) {\r
- decoded_chars = _PyIncrementalNewlineDecoder_decode(\r
- self->decoder, input_chunk, eof);\r
- }\r
- else {\r
- decoded_chars = PyObject_CallMethodObjArgs(self->decoder,\r
- _PyIO_str_decode, input_chunk, eof ? Py_True : Py_False, NULL);\r
- }\r
-\r
- if (check_decoded(decoded_chars) < 0)\r
- goto fail;\r
- textiowrapper_set_decoded_chars(self, decoded_chars);\r
- if (PyUnicode_GET_SIZE(decoded_chars) > 0)\r
- eof = 0;\r
-\r
- if (self->telling) {\r
- /* At the snapshot point, len(dec_buffer) bytes before the read, the\r
- * next input to be decoded is dec_buffer + input_chunk.\r
- */\r
- PyObject *next_input = PyNumber_Add(dec_buffer, input_chunk);\r
- if (next_input == NULL)\r
- goto fail;\r
- if (!PyBytes_Check(next_input)) {\r
- PyErr_Format(PyExc_TypeError,\r
- "decoder getstate() should have returned a bytes "\r
- "object, not '%.200s'",\r
- Py_TYPE(next_input)->tp_name);\r
- Py_DECREF(next_input);\r
- goto fail;\r
- }\r
- Py_DECREF(dec_buffer);\r
- Py_CLEAR(self->snapshot);\r
- self->snapshot = Py_BuildValue("NN", dec_flags, next_input);\r
- }\r
- Py_DECREF(input_chunk);\r
-\r
- return (eof == 0);\r
-\r
- fail:\r
- Py_XDECREF(dec_buffer);\r
- Py_XDECREF(dec_flags);\r
- Py_XDECREF(input_chunk);\r
- return -1;\r
-}\r
-\r
-static PyObject *\r
-textiowrapper_read(textio *self, PyObject *args)\r
-{\r
- Py_ssize_t n = -1;\r
- PyObject *result = NULL, *chunks = NULL;\r
-\r
- CHECK_ATTACHED(self);\r
-\r
- if (!PyArg_ParseTuple(args, "|O&:read", &_PyIO_ConvertSsize_t, &n))\r
- return NULL;\r
-\r
- CHECK_CLOSED(self);\r
-\r
- if (self->decoder == NULL) {\r
- PyErr_SetString(PyExc_IOError, "not readable");\r
- return NULL;\r
- }\r
-\r
- if (_textiowrapper_writeflush(self) < 0)\r
- return NULL;\r
-\r
- if (n < 0) {\r
- /* Read everything */\r
- PyObject *bytes = PyObject_CallMethod(self->buffer, "read", NULL);\r
- PyObject *decoded, *final;\r
- if (bytes == NULL)\r
- goto fail;\r
- decoded = PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_decode,\r
- bytes, Py_True, NULL);\r
- Py_DECREF(bytes);\r
- if (check_decoded(decoded) < 0)\r
- goto fail;\r
-\r
- result = textiowrapper_get_decoded_chars(self, -1);\r
-\r
- if (result == NULL) {\r
- Py_DECREF(decoded);\r
- return NULL;\r
- }\r
-\r
- final = PyUnicode_Concat(result, decoded);\r
- Py_DECREF(result);\r
- Py_DECREF(decoded);\r
- if (final == NULL)\r
- goto fail;\r
-\r
- Py_CLEAR(self->snapshot);\r
- return final;\r
- }\r
- else {\r
- int res = 1;\r
- Py_ssize_t remaining = n;\r
-\r
- result = textiowrapper_get_decoded_chars(self, n);\r
- if (result == NULL)\r
- goto fail;\r
- remaining -= PyUnicode_GET_SIZE(result);\r
-\r
- /* Keep reading chunks until we have n characters to return */\r
- while (remaining > 0) {\r
- res = textiowrapper_read_chunk(self);\r
- if (res < 0) {\r
- /* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals()\r
- when EINTR occurs so we needn't do it ourselves. */\r
- if (_PyIO_trap_eintr()) {\r
- continue;\r
- }\r
- goto fail;\r
- }\r
- if (res == 0) /* EOF */\r
- break;\r
- if (chunks == NULL) {\r
- chunks = PyList_New(0);\r
- if (chunks == NULL)\r
- goto fail;\r
- }\r
- if (PyList_Append(chunks, result) < 0)\r
- goto fail;\r
- Py_DECREF(result);\r
- result = textiowrapper_get_decoded_chars(self, remaining);\r
- if (result == NULL)\r
- goto fail;\r
- remaining -= PyUnicode_GET_SIZE(result);\r
- }\r
- if (chunks != NULL) {\r
- if (result != NULL && PyList_Append(chunks, result) < 0)\r
- goto fail;\r
- Py_CLEAR(result);\r
- result = PyUnicode_Join(_PyIO_empty_str, chunks);\r
- if (result == NULL)\r
- goto fail;\r
- Py_CLEAR(chunks);\r
- }\r
- return result;\r
- }\r
- fail:\r
- Py_XDECREF(result);\r
- Py_XDECREF(chunks);\r
- return NULL;\r
-}\r
-\r
-\r
-/* NOTE: `end` must point to the real end of the Py_UNICODE storage,\r
- that is to the NUL character. Otherwise the function will produce\r
- incorrect results. */\r
-static Py_UNICODE *\r
-find_control_char(Py_UNICODE *start, Py_UNICODE *end, Py_UNICODE ch)\r
-{\r
- Py_UNICODE *s = start;\r
- for (;;) {\r
- while (*s > ch)\r
- s++;\r
- if (*s == ch)\r
- return s;\r
- if (s == end)\r
- return NULL;\r
- s++;\r
- }\r
-}\r
-\r
-Py_ssize_t\r
-_PyIO_find_line_ending(\r
- int translated, int universal, PyObject *readnl,\r
- Py_UNICODE *start, Py_UNICODE *end, Py_ssize_t *consumed)\r
-{\r
- Py_ssize_t len = end - start;\r
-\r
- if (translated) {\r
- /* Newlines are already translated, only search for \n */\r
- Py_UNICODE *pos = find_control_char(start, end, '\n');\r
- if (pos != NULL)\r
- return pos - start + 1;\r
- else {\r
- *consumed = len;\r
- return -1;\r
- }\r
- }\r
- else if (universal) {\r
- /* Universal newline search. Find any of \r, \r\n, \n\r
- * The decoder ensures that \r\n are not split in two pieces\r
- */\r
- Py_UNICODE *s = start;\r
- for (;;) {\r
- Py_UNICODE ch;\r
- /* Fast path for non-control chars. The loop always ends\r
- since the Py_UNICODE storage is NUL-terminated. */\r
- while (*s > '\r')\r
- s++;\r
- if (s >= end) {\r
- *consumed = len;\r
- return -1;\r
- }\r
- ch = *s++;\r
- if (ch == '\n')\r
- return s - start;\r
- if (ch == '\r') {\r
- if (*s == '\n')\r
- return s - start + 1;\r
- else\r
- return s - start;\r
- }\r
- }\r
- }\r
- else {\r
- /* Non-universal mode. */\r
- Py_ssize_t readnl_len = PyString_GET_SIZE(readnl);\r
- unsigned char *nl = (unsigned char *) PyString_AS_STRING(readnl);\r
- if (readnl_len == 1) {\r
- Py_UNICODE *pos = find_control_char(start, end, nl[0]);\r
- if (pos != NULL)\r
- return pos - start + 1;\r
- *consumed = len;\r
- return -1;\r
- }\r
- else {\r
- Py_UNICODE *s = start;\r
- Py_UNICODE *e = end - readnl_len + 1;\r
- Py_UNICODE *pos;\r
- if (e < s)\r
- e = s;\r
- while (s < e) {\r
- Py_ssize_t i;\r
- Py_UNICODE *pos = find_control_char(s, end, nl[0]);\r
- if (pos == NULL || pos >= e)\r
- break;\r
- for (i = 1; i < readnl_len; i++) {\r
- if (pos[i] != nl[i])\r
- break;\r
- }\r
- if (i == readnl_len)\r
- return pos - start + readnl_len;\r
- s = pos + 1;\r
- }\r
- pos = find_control_char(e, end, nl[0]);\r
- if (pos == NULL)\r
- *consumed = len;\r
- else\r
- *consumed = pos - start;\r
- return -1;\r
- }\r
- }\r
-}\r
-\r
-static PyObject *\r
-_textiowrapper_readline(textio *self, Py_ssize_t limit)\r
-{\r
- PyObject *line = NULL, *chunks = NULL, *remaining = NULL;\r
- Py_ssize_t start, endpos, chunked, offset_to_buffer;\r
- int res;\r
-\r
- CHECK_CLOSED(self);\r
-\r
- if (_textiowrapper_writeflush(self) < 0)\r
- return NULL;\r
-\r
- chunked = 0;\r
-\r
- while (1) {\r
- Py_UNICODE *ptr;\r
- Py_ssize_t line_len;\r
- Py_ssize_t consumed = 0;\r
-\r
- /* First, get some data if necessary */\r
- res = 1;\r
- while (!self->decoded_chars ||\r
- !PyUnicode_GET_SIZE(self->decoded_chars)) {\r
- res = textiowrapper_read_chunk(self);\r
- if (res < 0) {\r
- /* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals()\r
- when EINTR occurs so we needn't do it ourselves. */\r
- if (_PyIO_trap_eintr()) {\r
- continue;\r
- }\r
- goto error;\r
- }\r
- if (res == 0)\r
- break;\r
- }\r
- if (res == 0) {\r
- /* end of file */\r
- textiowrapper_set_decoded_chars(self, NULL);\r
- Py_CLEAR(self->snapshot);\r
- start = endpos = offset_to_buffer = 0;\r
- break;\r
- }\r
-\r
- if (remaining == NULL) {\r
- line = self->decoded_chars;\r
- start = self->decoded_chars_used;\r
- offset_to_buffer = 0;\r
- Py_INCREF(line);\r
- }\r
- else {\r
- assert(self->decoded_chars_used == 0);\r
- line = PyUnicode_Concat(remaining, self->decoded_chars);\r
- start = 0;\r
- offset_to_buffer = PyUnicode_GET_SIZE(remaining);\r
- Py_CLEAR(remaining);\r
- if (line == NULL)\r
- goto error;\r
- }\r
-\r
- ptr = PyUnicode_AS_UNICODE(line);\r
- line_len = PyUnicode_GET_SIZE(line);\r
-\r
- endpos = _PyIO_find_line_ending(\r
- self->readtranslate, self->readuniversal, self->readnl,\r
- ptr + start, ptr + line_len, &consumed);\r
- if (endpos >= 0) {\r
- endpos += start;\r
- if (limit >= 0 && (endpos - start) + chunked >= limit)\r
- endpos = start + limit - chunked;\r
- break;\r
- }\r
-\r
- /* We can put aside up to `endpos` */\r
- endpos = consumed + start;\r
- if (limit >= 0 && (endpos - start) + chunked >= limit) {\r
- /* Didn't find line ending, but reached length limit */\r
- endpos = start + limit - chunked;\r
- break;\r
- }\r
-\r
- if (endpos > start) {\r
- /* No line ending seen yet - put aside current data */\r
- PyObject *s;\r
- if (chunks == NULL) {\r
- chunks = PyList_New(0);\r
- if (chunks == NULL)\r
- goto error;\r
- }\r
- s = PyUnicode_FromUnicode(ptr + start, endpos - start);\r
- if (s == NULL)\r
- goto error;\r
- if (PyList_Append(chunks, s) < 0) {\r
- Py_DECREF(s);\r
- goto error;\r
- }\r
- chunked += PyUnicode_GET_SIZE(s);\r
- Py_DECREF(s);\r
- }\r
- /* There may be some remaining bytes we'll have to prepend to the\r
- next chunk of data */\r
- if (endpos < line_len) {\r
- remaining = PyUnicode_FromUnicode(\r
- ptr + endpos, line_len - endpos);\r
- if (remaining == NULL)\r
- goto error;\r
- }\r
- Py_CLEAR(line);\r
- /* We have consumed the buffer */\r
- textiowrapper_set_decoded_chars(self, NULL);\r
- }\r
-\r
- if (line != NULL) {\r
- /* Our line ends in the current buffer */\r
- self->decoded_chars_used = endpos - offset_to_buffer;\r
- if (start > 0 || endpos < PyUnicode_GET_SIZE(line)) {\r
- if (start == 0 && Py_REFCNT(line) == 1) {\r
- if (PyUnicode_Resize(&line, endpos) < 0)\r
- goto error;\r
- }\r
- else {\r
- PyObject *s = PyUnicode_FromUnicode(\r
- PyUnicode_AS_UNICODE(line) + start, endpos - start);\r
- Py_CLEAR(line);\r
- if (s == NULL)\r
- goto error;\r
- line = s;\r
- }\r
- }\r
- }\r
- if (remaining != NULL) {\r
- if (chunks == NULL) {\r
- chunks = PyList_New(0);\r
- if (chunks == NULL)\r
- goto error;\r
- }\r
- if (PyList_Append(chunks, remaining) < 0)\r
- goto error;\r
- Py_CLEAR(remaining);\r
- }\r
- if (chunks != NULL) {\r
- if (line != NULL && PyList_Append(chunks, line) < 0)\r
- goto error;\r
- Py_CLEAR(line);\r
- line = PyUnicode_Join(_PyIO_empty_str, chunks);\r
- if (line == NULL)\r
- goto error;\r
- Py_DECREF(chunks);\r
- }\r
- if (line == NULL)\r
- line = PyUnicode_FromStringAndSize(NULL, 0);\r
-\r
- return line;\r
-\r
- error:\r
- Py_XDECREF(chunks);\r
- Py_XDECREF(remaining);\r
- Py_XDECREF(line);\r
- return NULL;\r
-}\r
-\r
-static PyObject *\r
-textiowrapper_readline(textio *self, PyObject *args)\r
-{\r
- PyObject *limitobj = NULL;\r
- Py_ssize_t limit = -1;\r
-\r
- CHECK_ATTACHED(self);\r
- if (!PyArg_ParseTuple(args, "|O:readline", &limitobj)) {\r
- return NULL;\r
- }\r
- if (limitobj) {\r
- if (!PyNumber_Check(limitobj)) {\r
- PyErr_Format(PyExc_TypeError,\r
- "integer argument expected, got '%.200s'",\r
- Py_TYPE(limitobj)->tp_name);\r
- return NULL;\r
- }\r
- limit = PyNumber_AsSsize_t(limitobj, PyExc_OverflowError);\r
- if (limit == -1 && PyErr_Occurred())\r
- return NULL;\r
- }\r
- return _textiowrapper_readline(self, limit);\r
-}\r
-\r
-/* Seek and Tell */\r
-\r
-typedef struct {\r
- Py_off_t start_pos;\r
- int dec_flags;\r
- int bytes_to_feed;\r
- int chars_to_skip;\r
- char need_eof;\r
-} cookie_type;\r
-\r
-/*\r
- To speed up cookie packing/unpacking, we store the fields in a temporary\r
- string and call _PyLong_FromByteArray() or _PyLong_AsByteArray (resp.).\r
- The following macros define at which offsets in the intermediary byte\r
- string the various CookieStruct fields will be stored.\r
- */\r
-\r
-#define COOKIE_BUF_LEN (sizeof(Py_off_t) + 3 * sizeof(int) + sizeof(char))\r
-\r
-#if defined(WORDS_BIGENDIAN)\r
-\r
-# define IS_LITTLE_ENDIAN 0\r
-\r
-/* We want the least significant byte of start_pos to also be the least\r
- significant byte of the cookie, which means that in big-endian mode we\r
- must copy the fields in reverse order. */\r
-\r
-# define OFF_START_POS (sizeof(char) + 3 * sizeof(int))\r
-# define OFF_DEC_FLAGS (sizeof(char) + 2 * sizeof(int))\r
-# define OFF_BYTES_TO_FEED (sizeof(char) + sizeof(int))\r
-# define OFF_CHARS_TO_SKIP (sizeof(char))\r
-# define OFF_NEED_EOF 0\r
-\r
-#else\r
-\r
-# define IS_LITTLE_ENDIAN 1\r
-\r
-/* Little-endian mode: the least significant byte of start_pos will\r
- naturally end up the least significant byte of the cookie. */\r
-\r
-# define OFF_START_POS 0\r
-# define OFF_DEC_FLAGS (sizeof(Py_off_t))\r
-# define OFF_BYTES_TO_FEED (sizeof(Py_off_t) + sizeof(int))\r
-# define OFF_CHARS_TO_SKIP (sizeof(Py_off_t) + 2 * sizeof(int))\r
-# define OFF_NEED_EOF (sizeof(Py_off_t) + 3 * sizeof(int))\r
-\r
-#endif\r
-\r
-static int\r
-textiowrapper_parse_cookie(cookie_type *cookie, PyObject *cookieObj)\r
-{\r
- unsigned char buffer[COOKIE_BUF_LEN];\r
- PyLongObject *cookieLong = (PyLongObject *)PyNumber_Long(cookieObj);\r
- if (cookieLong == NULL)\r
- return -1;\r
-\r
- if (_PyLong_AsByteArray(cookieLong, buffer, sizeof(buffer),\r
- IS_LITTLE_ENDIAN, 0) < 0) {\r
- Py_DECREF(cookieLong);\r
- return -1;\r
- }\r
- Py_DECREF(cookieLong);\r
-\r
- memcpy(&cookie->start_pos, buffer + OFF_START_POS, sizeof(cookie->start_pos));\r
- memcpy(&cookie->dec_flags, buffer + OFF_DEC_FLAGS, sizeof(cookie->dec_flags));\r
- memcpy(&cookie->bytes_to_feed, buffer + OFF_BYTES_TO_FEED, sizeof(cookie->bytes_to_feed));\r
- memcpy(&cookie->chars_to_skip, buffer + OFF_CHARS_TO_SKIP, sizeof(cookie->chars_to_skip));\r
- memcpy(&cookie->need_eof, buffer + OFF_NEED_EOF, sizeof(cookie->need_eof));\r
-\r
- return 0;\r
-}\r
-\r
-static PyObject *\r
-textiowrapper_build_cookie(cookie_type *cookie)\r
-{\r
- unsigned char buffer[COOKIE_BUF_LEN];\r
-\r
- memcpy(buffer + OFF_START_POS, &cookie->start_pos, sizeof(cookie->start_pos));\r
- memcpy(buffer + OFF_DEC_FLAGS, &cookie->dec_flags, sizeof(cookie->dec_flags));\r
- memcpy(buffer + OFF_BYTES_TO_FEED, &cookie->bytes_to_feed, sizeof(cookie->bytes_to_feed));\r
- memcpy(buffer + OFF_CHARS_TO_SKIP, &cookie->chars_to_skip, sizeof(cookie->chars_to_skip));\r
- memcpy(buffer + OFF_NEED_EOF, &cookie->need_eof, sizeof(cookie->need_eof));\r
-\r
- return _PyLong_FromByteArray(buffer, sizeof(buffer), IS_LITTLE_ENDIAN, 0);\r
-}\r
-#undef IS_LITTLE_ENDIAN\r
-\r
-static int\r
-_textiowrapper_decoder_setstate(textio *self, cookie_type *cookie)\r
-{\r
- PyObject *res;\r
- /* When seeking to the start of the stream, we call decoder.reset()\r
- rather than decoder.getstate().\r
- This is for a few decoders such as utf-16 for which the state value\r
- at start is not (b"", 0) but e.g. (b"", 2) (meaning, in the case of\r
- utf-16, that we are expecting a BOM).\r
- */\r
- if (cookie->start_pos == 0 && cookie->dec_flags == 0)\r
- res = PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_reset, NULL);\r
- else\r
- res = PyObject_CallMethod(self->decoder, "setstate",\r
- "((si))", "", cookie->dec_flags);\r
- if (res == NULL)\r
- return -1;\r
- Py_DECREF(res);\r
- return 0;\r
-}\r
-\r
-static int\r
-_textiowrapper_encoder_setstate(textio *self, cookie_type *cookie)\r
-{\r
- PyObject *res;\r
- /* Same as _textiowrapper_decoder_setstate() above. */\r
- if (cookie->start_pos == 0 && cookie->dec_flags == 0) {\r
- res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_reset, NULL);\r
- self->encoding_start_of_stream = 1;\r
- }\r
- else {\r
- res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_setstate,\r
- _PyIO_zero, NULL);\r
- self->encoding_start_of_stream = 0;\r
- }\r
- if (res == NULL)\r
- return -1;\r
- Py_DECREF(res);\r
- return 0;\r
-}\r
-\r
-static PyObject *\r
-textiowrapper_seek(textio *self, PyObject *args)\r
-{\r
- PyObject *cookieObj, *posobj;\r
- cookie_type cookie;\r
- int whence = 0;\r
- PyObject *res;\r
- int cmp;\r
-\r
- CHECK_ATTACHED(self);\r
-\r
- if (!PyArg_ParseTuple(args, "O|i:seek", &cookieObj, &whence))\r
- return NULL;\r
- CHECK_CLOSED(self);\r
-\r
- Py_INCREF(cookieObj);\r
-\r
- if (!self->seekable) {\r
- PyErr_SetString(PyExc_IOError,\r
- "underlying stream is not seekable");\r
- goto fail;\r
- }\r
-\r
- if (whence == 1) {\r
- /* seek relative to current position */\r
- cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ);\r
- if (cmp < 0)\r
- goto fail;\r
-\r
- if (cmp == 0) {\r
- PyErr_SetString(PyExc_IOError,\r
- "can't do nonzero cur-relative seeks");\r
- goto fail;\r
- }\r
-\r
- /* Seeking to the current position should attempt to\r
- * sync the underlying buffer with the current position.\r
- */\r
- Py_DECREF(cookieObj);\r
- cookieObj = PyObject_CallMethod((PyObject *)self, "tell", NULL);\r
- if (cookieObj == NULL)\r
- goto fail;\r
- }\r
- else if (whence == 2) {\r
- /* seek relative to end of file */\r
-\r
- cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ);\r
- if (cmp < 0)\r
- goto fail;\r
-\r
- if (cmp == 0) {\r
- PyErr_SetString(PyExc_IOError,\r
- "can't do nonzero end-relative seeks");\r
- goto fail;\r
- }\r
-\r
- res = PyObject_CallMethod((PyObject *)self, "flush", NULL);\r
- if (res == NULL)\r
- goto fail;\r
- Py_DECREF(res);\r
-\r
- textiowrapper_set_decoded_chars(self, NULL);\r
- Py_CLEAR(self->snapshot);\r
- if (self->decoder) {\r
- res = PyObject_CallMethod(self->decoder, "reset", NULL);\r
- if (res == NULL)\r
- goto fail;\r
- Py_DECREF(res);\r
- }\r
-\r
- res = PyObject_CallMethod(self->buffer, "seek", "ii", 0, 2);\r
- Py_XDECREF(cookieObj);\r
- return res;\r
- }\r
- else if (whence != 0) {\r
- PyErr_Format(PyExc_ValueError,\r
- "invalid whence (%d, should be 0, 1 or 2)", whence);\r
- goto fail;\r
- }\r
-\r
- cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_LT);\r
- if (cmp < 0)\r
- goto fail;\r
-\r
- if (cmp == 1) {\r
- PyObject *repr = PyObject_Repr(cookieObj);\r
- if (repr != NULL) {\r
- PyErr_Format(PyExc_ValueError,\r
- "negative seek position %s",\r
- PyString_AS_STRING(repr));\r
- Py_DECREF(repr);\r
- }\r
- goto fail;\r
- }\r
-\r
- res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL);\r
- if (res == NULL)\r
- goto fail;\r
- Py_DECREF(res);\r
-\r
- /* The strategy of seek() is to go back to the safe start point\r
- * and replay the effect of read(chars_to_skip) from there.\r
- */\r
- if (textiowrapper_parse_cookie(&cookie, cookieObj) < 0)\r
- goto fail;\r
-\r
- /* Seek back to the safe start point. */\r
- posobj = PyLong_FromOff_t(cookie.start_pos);\r
- if (posobj == NULL)\r
- goto fail;\r
- res = PyObject_CallMethodObjArgs(self->buffer,\r
- _PyIO_str_seek, posobj, NULL);\r
- Py_DECREF(posobj);\r
- if (res == NULL)\r
- goto fail;\r
- Py_DECREF(res);\r
-\r
- textiowrapper_set_decoded_chars(self, NULL);\r
- Py_CLEAR(self->snapshot);\r
-\r
- /* Restore the decoder to its state from the safe start point. */\r
- if (self->decoder) {\r
- if (_textiowrapper_decoder_setstate(self, &cookie) < 0)\r
- goto fail;\r
- }\r
-\r
- if (cookie.chars_to_skip) {\r
- /* Just like _read_chunk, feed the decoder and save a snapshot. */\r
- PyObject *input_chunk = PyObject_CallMethod(\r
- self->buffer, "read", "i", cookie.bytes_to_feed);\r
- PyObject *decoded;\r
-\r
- if (input_chunk == NULL)\r
- goto fail;\r
-\r
- if (!PyBytes_Check(input_chunk)) {\r
- PyErr_Format(PyExc_TypeError,\r
- "underlying read() should have returned a bytes "\r
- "object, not '%.200s'",\r
- Py_TYPE(input_chunk)->tp_name);\r
- Py_DECREF(input_chunk);\r
- goto fail;\r
- }\r
-\r
- self->snapshot = Py_BuildValue("iN", cookie.dec_flags, input_chunk);\r
- if (self->snapshot == NULL) {\r
- Py_DECREF(input_chunk);\r
- goto fail;\r
- }\r
-\r
- decoded = PyObject_CallMethod(self->decoder, "decode",\r
- "Oi", input_chunk, (int)cookie.need_eof);\r
-\r
- if (check_decoded(decoded) < 0)\r
- goto fail;\r
-\r
- textiowrapper_set_decoded_chars(self, decoded);\r
-\r
- /* Skip chars_to_skip of the decoded characters. */\r
- if (PyUnicode_GetSize(self->decoded_chars) < cookie.chars_to_skip) {\r
- PyErr_SetString(PyExc_IOError, "can't restore logical file position");\r
- goto fail;\r
- }\r
- self->decoded_chars_used = cookie.chars_to_skip;\r
- }\r
- else {\r
- self->snapshot = Py_BuildValue("is", cookie.dec_flags, "");\r
- if (self->snapshot == NULL)\r
- goto fail;\r
- }\r
-\r
- /* Finally, reset the encoder (merely useful for proper BOM handling) */\r
- if (self->encoder) {\r
- if (_textiowrapper_encoder_setstate(self, &cookie) < 0)\r
- goto fail;\r
- }\r
- return cookieObj;\r
- fail:\r
- Py_XDECREF(cookieObj);\r
- return NULL;\r
-\r
-}\r
-\r
-static PyObject *\r
-textiowrapper_tell(textio *self, PyObject *args)\r
-{\r
- PyObject *res;\r
- PyObject *posobj = NULL;\r
- cookie_type cookie = {0,0,0,0,0};\r
- PyObject *next_input;\r
- Py_ssize_t chars_to_skip, chars_decoded;\r
- PyObject *saved_state = NULL;\r
- char *input, *input_end;\r
-\r
- CHECK_ATTACHED(self);\r
- CHECK_CLOSED(self);\r
-\r
- if (!self->seekable) {\r
- PyErr_SetString(PyExc_IOError,\r
- "underlying stream is not seekable");\r
- goto fail;\r
- }\r
- if (!self->telling) {\r
- PyErr_SetString(PyExc_IOError,\r
- "telling position disabled by next() call");\r
- goto fail;\r
- }\r
-\r
- if (_textiowrapper_writeflush(self) < 0)\r
- return NULL;\r
- res = PyObject_CallMethod((PyObject *)self, "flush", NULL);\r
- if (res == NULL)\r
- goto fail;\r
- Py_DECREF(res);\r
-\r
- posobj = PyObject_CallMethod(self->buffer, "tell", NULL);\r
- if (posobj == NULL)\r
- goto fail;\r
-\r
- if (self->decoder == NULL || self->snapshot == NULL) {\r
- assert (self->decoded_chars == NULL || PyUnicode_GetSize(self->decoded_chars) == 0);\r
- return posobj;\r
- }\r
-\r
-#if defined(HAVE_LARGEFILE_SUPPORT)\r
- cookie.start_pos = PyLong_AsLongLong(posobj);\r
-#else\r
- cookie.start_pos = PyLong_AsLong(posobj);\r
-#endif\r
- if (PyErr_Occurred())\r
- goto fail;\r
-\r
- /* Skip backward to the snapshot point (see _read_chunk). */\r
- if (!PyArg_Parse(self->snapshot, "(iO)", &cookie.dec_flags, &next_input))\r
- goto fail;\r
-\r
- assert (PyBytes_Check(next_input));\r
-\r
- cookie.start_pos -= PyBytes_GET_SIZE(next_input);\r
-\r
- /* How many decoded characters have been used up since the snapshot? */\r
- if (self->decoded_chars_used == 0) {\r
- /* We haven't moved from the snapshot point. */\r
- Py_DECREF(posobj);\r
- return textiowrapper_build_cookie(&cookie);\r
- }\r
-\r
- chars_to_skip = self->decoded_chars_used;\r
-\r
- /* Starting from the snapshot position, we will walk the decoder\r
- * forward until it gives us enough decoded characters.\r
- */\r
- saved_state = PyObject_CallMethodObjArgs(self->decoder,\r
- _PyIO_str_getstate, NULL);\r
- if (saved_state == NULL)\r
- goto fail;\r
-\r
- /* Note our initial start point. */\r
- if (_textiowrapper_decoder_setstate(self, &cookie) < 0)\r
- goto fail;\r
-\r
- /* Feed the decoder one byte at a time. As we go, note the\r
- * nearest "safe start point" before the current location\r
- * (a point where the decoder has nothing buffered, so seek()\r
- * can safely start from there and advance to this location).\r
- */\r
- chars_decoded = 0;\r
- input = PyBytes_AS_STRING(next_input);\r
- input_end = input + PyBytes_GET_SIZE(next_input);\r
- while (input < input_end) {\r
- PyObject *state;\r
- char *dec_buffer;\r
- Py_ssize_t dec_buffer_len;\r
- int dec_flags;\r
-\r
- PyObject *decoded = PyObject_CallMethod(\r
- self->decoder, "decode", "s#", input, (Py_ssize_t)1);\r
- if (check_decoded(decoded) < 0)\r
- goto fail;\r
- chars_decoded += PyUnicode_GET_SIZE(decoded);\r
- Py_DECREF(decoded);\r
-\r
- cookie.bytes_to_feed += 1;\r
-\r
- state = PyObject_CallMethodObjArgs(self->decoder,\r
- _PyIO_str_getstate, NULL);\r
- if (state == NULL)\r
- goto fail;\r
- if (!PyArg_Parse(state, "(s#i)", &dec_buffer, &dec_buffer_len, &dec_flags)) {\r
- Py_DECREF(state);\r
- goto fail;\r
- }\r
- Py_DECREF(state);\r
-\r
- if (dec_buffer_len == 0 && chars_decoded <= chars_to_skip) {\r
- /* Decoder buffer is empty, so this is a safe start point. */\r
- cookie.start_pos += cookie.bytes_to_feed;\r
- chars_to_skip -= chars_decoded;\r
- cookie.dec_flags = dec_flags;\r
- cookie.bytes_to_feed = 0;\r
- chars_decoded = 0;\r
- }\r
- if (chars_decoded >= chars_to_skip)\r
- break;\r
- input++;\r
- }\r
- if (input == input_end) {\r
- /* We didn't get enough decoded data; signal EOF to get more. */\r
- PyObject *decoded = PyObject_CallMethod(\r
- self->decoder, "decode", "si", "", /* final = */ 1);\r
- if (check_decoded(decoded) < 0)\r
- goto fail;\r
- chars_decoded += PyUnicode_GET_SIZE(decoded);\r
- Py_DECREF(decoded);\r
- cookie.need_eof = 1;\r
-\r
- if (chars_decoded < chars_to_skip) {\r
- PyErr_SetString(PyExc_IOError,\r
- "can't reconstruct logical file position");\r
- goto fail;\r
- }\r
- }\r
-\r
- /* finally */\r
- Py_XDECREF(posobj);\r
- res = PyObject_CallMethod(self->decoder, "setstate", "(O)", saved_state);\r
- Py_DECREF(saved_state);\r
- if (res == NULL)\r
- return NULL;\r
- Py_DECREF(res);\r
-\r
- /* The returned cookie corresponds to the last safe start point. */\r
- cookie.chars_to_skip = Py_SAFE_DOWNCAST(chars_to_skip, Py_ssize_t, int);\r
- return textiowrapper_build_cookie(&cookie);\r
-\r
- fail:\r
- Py_XDECREF(posobj);\r
- if (saved_state) {\r
- PyObject *type, *value, *traceback;\r
- PyErr_Fetch(&type, &value, &traceback);\r
-\r
- res = PyObject_CallMethod(self->decoder, "setstate", "(O)", saved_state);\r
- _PyErr_ReplaceException(type, value, traceback);\r
- Py_DECREF(saved_state);\r
- Py_XDECREF(res);\r
- }\r
- return NULL;\r
-}\r
-\r
-static PyObject *\r
-textiowrapper_truncate(textio *self, PyObject *args)\r
-{\r
- PyObject *pos = Py_None;\r
- PyObject *res;\r
-\r
- CHECK_ATTACHED(self)\r
- if (!PyArg_ParseTuple(args, "|O:truncate", &pos)) {\r
- return NULL;\r
- }\r
-\r
- res = PyObject_CallMethodObjArgs((PyObject *) self, _PyIO_str_flush, NULL);\r
- if (res == NULL)\r
- return NULL;\r
- Py_DECREF(res);\r
-\r
- return PyObject_CallMethodObjArgs(self->buffer, _PyIO_str_truncate, pos, NULL);\r
-}\r
-\r
-static PyObject *\r
-textiowrapper_repr(textio *self)\r
-{\r
- PyObject *nameobj, *res;\r
- PyObject *namerepr = NULL, *encrepr = NULL;\r
-\r
- CHECK_INITIALIZED(self);\r
-\r
- nameobj = PyObject_GetAttrString((PyObject *) self, "name");\r
- if (nameobj == NULL) {\r
- if (PyErr_ExceptionMatches(PyExc_Exception))\r
- PyErr_Clear();\r
- else\r
- goto error;\r
- encrepr = PyObject_Repr(self->encoding);\r
- res = PyString_FromFormat("<_io.TextIOWrapper encoding=%s>",\r
- PyString_AS_STRING(encrepr));\r
- }\r
- else {\r
- encrepr = PyObject_Repr(self->encoding);\r
- namerepr = PyObject_Repr(nameobj);\r
- res = PyString_FromFormat("<_io.TextIOWrapper name=%s encoding=%s>",\r
- PyString_AS_STRING(namerepr),\r
- PyString_AS_STRING(encrepr));\r
- Py_DECREF(nameobj);\r
- }\r
- Py_XDECREF(namerepr);\r
- Py_XDECREF(encrepr);\r
- return res;\r
-\r
-error:\r
- Py_XDECREF(namerepr);\r
- Py_XDECREF(encrepr);\r
- return NULL;\r
-}\r
-\r
-\r
-/* Inquiries */\r
-\r
-static PyObject *\r
-textiowrapper_fileno(textio *self, PyObject *args)\r
-{\r
- CHECK_ATTACHED(self);\r
- return PyObject_CallMethod(self->buffer, "fileno", NULL);\r
-}\r
-\r
-static PyObject *\r
-textiowrapper_seekable(textio *self, PyObject *args)\r
-{\r
- CHECK_ATTACHED(self);\r
- return PyObject_CallMethod(self->buffer, "seekable", NULL);\r
-}\r
-\r
-static PyObject *\r
-textiowrapper_readable(textio *self, PyObject *args)\r
-{\r
- CHECK_ATTACHED(self);\r
- return PyObject_CallMethod(self->buffer, "readable", NULL);\r
-}\r
-\r
-static PyObject *\r
-textiowrapper_writable(textio *self, PyObject *args)\r
-{\r
- CHECK_ATTACHED(self);\r
- return PyObject_CallMethod(self->buffer, "writable", NULL);\r
-}\r
-\r
-static PyObject *\r
-textiowrapper_isatty(textio *self, PyObject *args)\r
-{\r
- CHECK_ATTACHED(self);\r
- return PyObject_CallMethod(self->buffer, "isatty", NULL);\r
-}\r
-\r
-static PyObject *\r
-textiowrapper_flush(textio *self, PyObject *args)\r
-{\r
- CHECK_ATTACHED(self);\r
- CHECK_CLOSED(self);\r
- self->telling = self->seekable;\r
- if (_textiowrapper_writeflush(self) < 0)\r
- return NULL;\r
- return PyObject_CallMethod(self->buffer, "flush", NULL);\r
-}\r
-\r
-static PyObject *\r
-textiowrapper_close(textio *self, PyObject *args)\r
-{\r
- PyObject *res;\r
- int r;\r
- CHECK_ATTACHED(self);\r
-\r
- res = textiowrapper_closed_get(self, NULL);\r
- if (res == NULL)\r
- return NULL;\r
- r = PyObject_IsTrue(res);\r
- Py_DECREF(res);\r
- if (r < 0)\r
- return NULL;\r
-\r
- if (r > 0) {\r
- Py_RETURN_NONE; /* stream already closed */\r
- }\r
- else {\r
- PyObject *exc = NULL, *val, *tb;\r
- res = PyObject_CallMethod((PyObject *)self, "flush", NULL);\r
- if (res == NULL)\r
- PyErr_Fetch(&exc, &val, &tb);\r
- else\r
- Py_DECREF(res);\r
-\r
- res = PyObject_CallMethod(self->buffer, "close", NULL);\r
- if (exc != NULL) {\r
- _PyErr_ReplaceException(exc, val, tb);\r
- Py_CLEAR(res);\r
- }\r
- return res;\r
- }\r
-}\r
-\r
-static PyObject *\r
-textiowrapper_iternext(textio *self)\r
-{\r
- PyObject *line;\r
-\r
- CHECK_ATTACHED(self);\r
-\r
- self->telling = 0;\r
- if (Py_TYPE(self) == &PyTextIOWrapper_Type) {\r
- /* Skip method call overhead for speed */\r
- line = _textiowrapper_readline(self, -1);\r
- }\r
- else {\r
- line = PyObject_CallMethodObjArgs((PyObject *)self,\r
- _PyIO_str_readline, NULL);\r
- if (line && !PyUnicode_Check(line)) {\r
- PyErr_Format(PyExc_IOError,\r
- "readline() should have returned an str object, "\r
- "not '%.200s'", Py_TYPE(line)->tp_name);\r
- Py_DECREF(line);\r
- return NULL;\r
- }\r
- }\r
-\r
- if (line == NULL)\r
- return NULL;\r
-\r
- if (PyUnicode_GET_SIZE(line) == 0) {\r
- /* Reached EOF or would have blocked */\r
- Py_DECREF(line);\r
- Py_CLEAR(self->snapshot);\r
- self->telling = self->seekable;\r
- return NULL;\r
- }\r
-\r
- return line;\r
-}\r
-\r
-static PyObject *\r
-textiowrapper_name_get(textio *self, void *context)\r
-{\r
- CHECK_ATTACHED(self);\r
- return PyObject_GetAttrString(self->buffer, "name");\r
-}\r
-\r
-static PyObject *\r
-textiowrapper_closed_get(textio *self, void *context)\r
-{\r
- CHECK_ATTACHED(self);\r
- return PyObject_GetAttr(self->buffer, _PyIO_str_closed);\r
-}\r
-\r
-static PyObject *\r
-textiowrapper_newlines_get(textio *self, void *context)\r
-{\r
- PyObject *res;\r
- CHECK_ATTACHED(self);\r
- if (self->decoder == NULL)\r
- Py_RETURN_NONE;\r
- res = PyObject_GetAttr(self->decoder, _PyIO_str_newlines);\r
- if (res == NULL) {\r
- if (PyErr_ExceptionMatches(PyExc_AttributeError)) {\r
- PyErr_Clear();\r
- Py_RETURN_NONE;\r
- }\r
- else {\r
- return NULL;\r
- }\r
- }\r
- return res;\r
-}\r
-\r
-static PyObject *\r
-textiowrapper_errors_get(textio *self, void *context)\r
-{\r
- CHECK_INITIALIZED(self);\r
- Py_INCREF(self->errors);\r
- return self->errors;\r
-}\r
-\r
-static PyObject *\r
-textiowrapper_chunk_size_get(textio *self, void *context)\r
-{\r
- CHECK_ATTACHED(self);\r
- return PyLong_FromSsize_t(self->chunk_size);\r
-}\r
-\r
-static int\r
-textiowrapper_chunk_size_set(textio *self, PyObject *arg, void *context)\r
-{\r
- Py_ssize_t n;\r
- CHECK_ATTACHED_INT(self);\r
- n = PyNumber_AsSsize_t(arg, PyExc_TypeError);\r
- if (n == -1 && PyErr_Occurred())\r
- return -1;\r
- if (n <= 0) {\r
- PyErr_SetString(PyExc_ValueError,\r
- "a strictly positive integer is required");\r
- return -1;\r
- }\r
- self->chunk_size = n;\r
- return 0;\r
-}\r
-\r
-static PyMethodDef textiowrapper_methods[] = {\r
- {"detach", (PyCFunction)textiowrapper_detach, METH_NOARGS},\r
- {"write", (PyCFunction)textiowrapper_write, METH_VARARGS},\r
- {"read", (PyCFunction)textiowrapper_read, METH_VARARGS},\r
- {"readline", (PyCFunction)textiowrapper_readline, METH_VARARGS},\r
- {"flush", (PyCFunction)textiowrapper_flush, METH_NOARGS},\r
- {"close", (PyCFunction)textiowrapper_close, METH_NOARGS},\r
-\r
- {"fileno", (PyCFunction)textiowrapper_fileno, METH_NOARGS},\r
- {"seekable", (PyCFunction)textiowrapper_seekable, METH_NOARGS},\r
- {"readable", (PyCFunction)textiowrapper_readable, METH_NOARGS},\r
- {"writable", (PyCFunction)textiowrapper_writable, METH_NOARGS},\r
- {"isatty", (PyCFunction)textiowrapper_isatty, METH_NOARGS},\r
-\r
- {"seek", (PyCFunction)textiowrapper_seek, METH_VARARGS},\r
- {"tell", (PyCFunction)textiowrapper_tell, METH_NOARGS},\r
- {"truncate", (PyCFunction)textiowrapper_truncate, METH_VARARGS},\r
- {NULL, NULL}\r
-};\r
-\r
-static PyMemberDef textiowrapper_members[] = {\r
- {"encoding", T_OBJECT, offsetof(textio, encoding), READONLY},\r
- {"buffer", T_OBJECT, offsetof(textio, buffer), READONLY},\r
- {"line_buffering", T_BOOL, offsetof(textio, line_buffering), READONLY},\r
- {NULL}\r
-};\r
-\r
-static PyGetSetDef textiowrapper_getset[] = {\r
- {"name", (getter)textiowrapper_name_get, NULL, NULL},\r
- {"closed", (getter)textiowrapper_closed_get, NULL, NULL},\r
-/* {"mode", (getter)TextIOWrapper_mode_get, NULL, NULL},\r
-*/\r
- {"newlines", (getter)textiowrapper_newlines_get, NULL, NULL},\r
- {"errors", (getter)textiowrapper_errors_get, NULL, NULL},\r
- {"_CHUNK_SIZE", (getter)textiowrapper_chunk_size_get,\r
- (setter)textiowrapper_chunk_size_set, NULL},\r
- {NULL}\r
-};\r
-\r
-PyTypeObject PyTextIOWrapper_Type = {\r
- PyVarObject_HEAD_INIT(NULL, 0)\r
- "_io.TextIOWrapper", /*tp_name*/\r
- sizeof(textio), /*tp_basicsize*/\r
- 0, /*tp_itemsize*/\r
- (destructor)textiowrapper_dealloc, /*tp_dealloc*/\r
- 0, /*tp_print*/\r
- 0, /*tp_getattr*/\r
- 0, /*tps_etattr*/\r
- 0, /*tp_compare */\r
- (reprfunc)textiowrapper_repr,/*tp_repr*/\r
- 0, /*tp_as_number*/\r
- 0, /*tp_as_sequence*/\r
- 0, /*tp_as_mapping*/\r
- 0, /*tp_hash */\r
- 0, /*tp_call*/\r
- 0, /*tp_str*/\r
- 0, /*tp_getattro*/\r
- 0, /*tp_setattro*/\r
- 0, /*tp_as_buffer*/\r
- Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE\r
- | Py_TPFLAGS_HAVE_GC, /*tp_flags*/\r
- textiowrapper_doc, /* tp_doc */\r
- (traverseproc)textiowrapper_traverse, /* tp_traverse */\r
- (inquiry)textiowrapper_clear, /* tp_clear */\r
- 0, /* tp_richcompare */\r
- offsetof(textio, weakreflist), /*tp_weaklistoffset*/\r
- 0, /* tp_iter */\r
- (iternextfunc)textiowrapper_iternext, /* tp_iternext */\r
- textiowrapper_methods, /* tp_methods */\r
- textiowrapper_members, /* tp_members */\r
- textiowrapper_getset, /* tp_getset */\r
- 0, /* tp_base */\r
- 0, /* tp_dict */\r
- 0, /* tp_descr_get */\r
- 0, /* tp_descr_set */\r
- offsetof(textio, dict), /*tp_dictoffset*/\r
- (initproc)textiowrapper_init, /* tp_init */\r
- 0, /* tp_alloc */\r
- PyType_GenericNew, /* tp_new */\r
-};\r