1 #define PY_SSIZE_T_CLEAN
3 #include "structmember.h"
6 /* Implementation note: the buffer is always at least one character longer
7 than the enclosed string, for proper functioning of _PyIO_find_line_ending.
14 Py_ssize_t string_size
;
17 char ok
; /* initialized? */
26 PyObject
*weakreflist
;
29 #define CHECK_INITIALIZED(self) \
30 if (self->ok <= 0) { \
31 PyErr_SetString(PyExc_ValueError, \
32 "I/O operation on uninitialized object"); \
36 #define CHECK_CLOSED(self) \
38 PyErr_SetString(PyExc_ValueError, \
39 "I/O operation on closed file"); \
43 PyDoc_STRVAR(stringio_doc
,
44 "Text I/O implementation using an in-memory buffer.\n"
46 "The initial_value argument sets the value of object. The newline\n"
47 "argument is like the one of TextIOWrapper's constructor.");
50 /* Internal routine for changing the size, in terms of characters, of the
51 buffer of StringIO objects. The caller should ensure that the 'size'
52 argument is non-negative. Returns 0 on success, -1 otherwise. */
54 resize_buffer(stringio
*self
, size_t size
)
56 /* Here, unsigned types are used to avoid dealing with signed integer
57 overflow, which is undefined in C. */
58 size_t alloc
= self
->buf_size
;
59 Py_UNICODE
*new_buf
= NULL
;
61 assert(self
->buf
!= NULL
);
63 /* Reserve one more char for line ending detection. */
65 /* For simplicity, stay in the range of the signed type. Anyway, Python
66 doesn't allow strings to be longer than this. */
67 if (size
> PY_SSIZE_T_MAX
)
70 if (size
< alloc
/ 2) {
71 /* Major downsize; resize down to exact size. */
74 else if (size
< alloc
) {
75 /* Within allocated size; quick exit */
78 else if (size
<= alloc
* 1.125) {
79 /* Moderate upsize; overallocate similar to list_resize() */
80 alloc
= size
+ (size
>> 3) + (size
< 9 ? 3 : 6);
83 /* Major upsize; resize up to exact size */
87 if (alloc
> ((size_t)-1) / sizeof(Py_UNICODE
))
89 new_buf
= (Py_UNICODE
*)PyMem_Realloc(self
->buf
,
90 alloc
* sizeof(Py_UNICODE
));
91 if (new_buf
== NULL
) {
95 self
->buf_size
= alloc
;
101 PyErr_SetString(PyExc_OverflowError
,
102 "new buffer size too large");
106 /* Internal routine for writing a whole PyUnicode object to the buffer of a
107 StringIO object. Returns 0 on success, or -1 on error. */
109 write_str(stringio
*self
, PyObject
*obj
)
113 PyObject
*decoded
= NULL
;
114 assert(self
->buf
!= NULL
);
115 assert(self
->pos
>= 0);
117 if (self
->decoder
!= NULL
) {
118 decoded
= _PyIncrementalNewlineDecoder_decode(
119 self
->decoder
, obj
, 1 /* always final */);
126 PyObject
*translated
= PyUnicode_Replace(
127 decoded
, _PyIO_str_nl
, self
->writenl
, -1);
129 decoded
= translated
;
134 assert(PyUnicode_Check(decoded
));
135 str
= PyUnicode_AS_UNICODE(decoded
);
136 len
= PyUnicode_GET_SIZE(decoded
);
140 /* This overflow check is not strictly necessary. However, it avoids us to
141 deal with funky things like comparing an unsigned and a signed
143 if (self
->pos
> PY_SSIZE_T_MAX
- len
) {
144 PyErr_SetString(PyExc_OverflowError
,
145 "new position too large");
148 if (self
->pos
+ len
> self
->string_size
) {
149 if (resize_buffer(self
, self
->pos
+ len
) < 0)
153 if (self
->pos
> self
->string_size
) {
154 /* In case of overseek, pad with null bytes the buffer region between
155 the end of stream and the current position.
158 | |<---used--->|<----------available----------->|
159 | | <--to pad-->|<---to write---> |
163 memset(self
->buf
+ self
->string_size
, '\0',
164 (self
->pos
- self
->string_size
) * sizeof(Py_UNICODE
));
167 /* Copy the data to the internal buffer, overwriting some of the
168 existing data if self->pos < self->string_size. */
169 memcpy(self
->buf
+ self
->pos
, str
, len
* sizeof(Py_UNICODE
));
172 /* Set the new length of the internal string if it has changed. */
173 if (self
->string_size
< self
->pos
) {
174 self
->string_size
= self
->pos
;
185 PyDoc_STRVAR(stringio_getvalue_doc
,
186 "Retrieve the entire contents of the object.");
189 stringio_getvalue(stringio
*self
)
191 CHECK_INITIALIZED(self
);
193 return PyUnicode_FromUnicode(self
->buf
, self
->string_size
);
196 PyDoc_STRVAR(stringio_tell_doc
,
197 "Tell the current file position.");
200 stringio_tell(stringio
*self
)
202 CHECK_INITIALIZED(self
);
204 return PyLong_FromSsize_t(self
->pos
);
207 PyDoc_STRVAR(stringio_read_doc
,
208 "Read at most n characters, returned as a string.\n"
210 "If the argument is negative or omitted, read until EOF\n"
211 "is reached. Return an empty string at EOF.\n");
214 stringio_read(stringio
*self
, PyObject
*args
)
218 PyObject
*arg
= Py_None
;
220 CHECK_INITIALIZED(self
);
221 if (!PyArg_ParseTuple(args
, "|O:read", &arg
))
225 if (PyNumber_Check(arg
)) {
226 size
= PyNumber_AsSsize_t(arg
, PyExc_OverflowError
);
227 if (size
== -1 && PyErr_Occurred())
230 else if (arg
== Py_None
) {
231 /* Read until EOF is reached, by default. */
235 PyErr_Format(PyExc_TypeError
, "integer argument expected, got '%s'",
236 Py_TYPE(arg
)->tp_name
);
240 /* adjust invalid sizes */
241 n
= self
->string_size
- self
->pos
;
242 if (size
< 0 || size
> n
) {
248 output
= self
->buf
+ self
->pos
;
250 return PyUnicode_FromUnicode(output
, size
);
253 /* Internal helper, used by stringio_readline and stringio_iternext */
255 _stringio_readline(stringio
*self
, Py_ssize_t limit
)
257 Py_UNICODE
*start
, *end
, old_char
;
258 Py_ssize_t len
, consumed
;
260 /* In case of overseek, return the empty string */
261 if (self
->pos
>= self
->string_size
)
262 return PyUnicode_FromString("");
264 start
= self
->buf
+ self
->pos
;
265 if (limit
< 0 || limit
> self
->string_size
- self
->pos
)
266 limit
= self
->string_size
- self
->pos
;
271 len
= _PyIO_find_line_ending(
272 self
->readtranslate
, self
->readuniversal
, self
->readnl
,
273 start
, end
, &consumed
);
275 /* If we haven't found any line ending, we just return everything
276 (`consumed` is ignored). */
280 return PyUnicode_FromUnicode(start
, len
);
283 PyDoc_STRVAR(stringio_readline_doc
,
284 "Read until newline or EOF.\n"
286 "Returns an empty string if EOF is hit immediately.\n");
289 stringio_readline(stringio
*self
, PyObject
*args
)
291 PyObject
*arg
= Py_None
;
292 Py_ssize_t limit
= -1;
294 CHECK_INITIALIZED(self
);
295 if (!PyArg_ParseTuple(args
, "|O:readline", &arg
))
299 if (PyNumber_Check(arg
)) {
300 limit
= PyNumber_AsSsize_t(arg
, PyExc_OverflowError
);
301 if (limit
== -1 && PyErr_Occurred())
304 else if (arg
!= Py_None
) {
305 PyErr_Format(PyExc_TypeError
, "integer argument expected, got '%s'",
306 Py_TYPE(arg
)->tp_name
);
309 return _stringio_readline(self
, limit
);
313 stringio_iternext(stringio
*self
)
317 CHECK_INITIALIZED(self
);
320 if (Py_TYPE(self
) == &PyStringIO_Type
) {
321 /* Skip method call overhead for speed */
322 line
= _stringio_readline(self
, -1);
325 /* XXX is subclassing StringIO really supported? */
326 line
= PyObject_CallMethodObjArgs((PyObject
*)self
,
327 _PyIO_str_readline
, NULL
);
328 if (line
&& !PyUnicode_Check(line
)) {
329 PyErr_Format(PyExc_IOError
,
330 "readline() should have returned an str object, "
331 "not '%.200s'", Py_TYPE(line
)->tp_name
);
340 if (PyUnicode_GET_SIZE(line
) == 0) {
349 PyDoc_STRVAR(stringio_truncate_doc
,
350 "Truncate size to pos.\n"
352 "The pos argument defaults to the current file position, as\n"
353 "returned by tell(). The current file position is unchanged.\n"
354 "Returns the new absolute position.\n");
357 stringio_truncate(stringio
*self
, PyObject
*args
)
360 PyObject
*arg
= Py_None
;
362 CHECK_INITIALIZED(self
);
363 if (!PyArg_ParseTuple(args
, "|O:truncate", &arg
))
367 if (PyNumber_Check(arg
)) {
368 size
= PyNumber_AsSsize_t(arg
, PyExc_OverflowError
);
369 if (size
== -1 && PyErr_Occurred())
372 else if (arg
== Py_None
) {
373 /* Truncate to current position if no argument is passed. */
377 PyErr_Format(PyExc_TypeError
, "integer argument expected, got '%s'",
378 Py_TYPE(arg
)->tp_name
);
383 PyErr_Format(PyExc_ValueError
,
384 "Negative size value %zd", size
);
388 if (size
< self
->string_size
) {
389 if (resize_buffer(self
, size
) < 0)
391 self
->string_size
= size
;
394 return PyLong_FromSsize_t(size
);
397 PyDoc_STRVAR(stringio_seek_doc
,
398 "Change stream position.\n"
400 "Seek to character offset pos relative to position indicated by whence:\n"
401 " 0 Start of stream (the default). pos should be >= 0;\n"
402 " 1 Current position - pos must be 0;\n"
403 " 2 End of stream - pos must be 0.\n"
404 "Returns the new absolute position.\n");
407 stringio_seek(stringio
*self
, PyObject
*args
)
413 CHECK_INITIALIZED(self
);
414 if (!PyArg_ParseTuple(args
, "O|i:seek", &posobj
, &mode
))
417 pos
= PyNumber_AsSsize_t(posobj
, PyExc_OverflowError
);
418 if (pos
== -1 && PyErr_Occurred())
423 if (mode
!= 0 && mode
!= 1 && mode
!= 2) {
424 PyErr_Format(PyExc_ValueError
,
425 "Invalid whence (%i, should be 0, 1 or 2)", mode
);
428 else if (pos
< 0 && mode
== 0) {
429 PyErr_Format(PyExc_ValueError
,
430 "Negative seek position %zd", pos
);
433 else if (mode
!= 0 && pos
!= 0) {
434 PyErr_SetString(PyExc_IOError
,
435 "Can't do nonzero cur-relative seeks");
439 /* mode 0: offset relative to beginning of the string.
440 mode 1: no change to current position.
441 mode 2: change position to end of file. */
445 else if (mode
== 2) {
446 pos
= self
->string_size
;
451 return PyLong_FromSsize_t(self
->pos
);
454 PyDoc_STRVAR(stringio_write_doc
,
455 "Write string to file.\n"
457 "Returns the number of characters written, which is always equal to\n"
458 "the length of the string.\n");
461 stringio_write(stringio
*self
, PyObject
*obj
)
465 CHECK_INITIALIZED(self
);
466 if (!PyUnicode_Check(obj
)) {
467 PyErr_Format(PyExc_TypeError
, "unicode argument expected, got '%s'",
468 Py_TYPE(obj
)->tp_name
);
472 size
= PyUnicode_GET_SIZE(obj
);
474 if (size
> 0 && write_str(self
, obj
) < 0)
477 return PyLong_FromSsize_t(size
);
480 PyDoc_STRVAR(stringio_close_doc
,
481 "Close the IO object. Attempting any further operation after the\n"
482 "object is closed will raise a ValueError.\n"
484 "This method has no effect if the file is already closed.\n");
487 stringio_close(stringio
*self
)
490 /* Free up some memory */
491 if (resize_buffer(self
, 0) < 0)
493 Py_CLEAR(self
->readnl
);
494 Py_CLEAR(self
->writenl
);
495 Py_CLEAR(self
->decoder
);
500 stringio_traverse(stringio
*self
, visitproc visit
, void *arg
)
502 Py_VISIT(self
->dict
);
507 stringio_clear(stringio
*self
)
509 Py_CLEAR(self
->dict
);
514 stringio_dealloc(stringio
*self
)
516 _PyObject_GC_UNTRACK(self
);
519 PyMem_Free(self
->buf
);
522 Py_CLEAR(self
->readnl
);
523 Py_CLEAR(self
->writenl
);
524 Py_CLEAR(self
->decoder
);
525 Py_CLEAR(self
->dict
);
526 if (self
->weakreflist
!= NULL
)
527 PyObject_ClearWeakRefs((PyObject
*) self
);
528 Py_TYPE(self
)->tp_free(self
);
532 stringio_new(PyTypeObject
*type
, PyObject
*args
, PyObject
*kwds
)
536 assert(type
!= NULL
&& type
->tp_alloc
!= NULL
);
537 self
= (stringio
*)type
->tp_alloc(type
, 0);
541 /* tp_alloc initializes all the fields to zero. So we don't have to
542 initialize them here. */
544 self
->buf
= (Py_UNICODE
*)PyMem_Malloc(0);
545 if (self
->buf
== NULL
) {
547 return PyErr_NoMemory();
550 return (PyObject
*)self
;
554 stringio_init(stringio
*self
, PyObject
*args
, PyObject
*kwds
)
556 char *kwlist
[] = {"initial_value", "newline", NULL
};
557 PyObject
*value
= NULL
;
558 char *newline
= "\n";
560 if (!PyArg_ParseTupleAndKeywords(args
, kwds
, "|Oz:__init__", kwlist
,
564 if (newline
&& newline
[0] != '\0'
565 && !(newline
[0] == '\n' && newline
[1] == '\0')
566 && !(newline
[0] == '\r' && newline
[1] == '\0')
567 && !(newline
[0] == '\r' && newline
[1] == '\n' && newline
[2] == '\0')) {
568 PyErr_Format(PyExc_ValueError
,
569 "illegal newline value: %s", newline
);
572 if (value
&& value
!= Py_None
&& !PyUnicode_Check(value
)) {
573 PyErr_Format(PyExc_TypeError
,
574 "initial_value must be unicode or None, not %.200s",
575 Py_TYPE(value
)->tp_name
);
581 Py_CLEAR(self
->readnl
);
582 Py_CLEAR(self
->writenl
);
583 Py_CLEAR(self
->decoder
);
586 self
->readnl
= PyString_FromString(newline
);
587 if (self
->readnl
== NULL
)
590 self
->readuniversal
= (newline
== NULL
|| newline
[0] == '\0');
591 self
->readtranslate
= (newline
== NULL
);
592 /* If newline == "", we don't translate anything.
593 If newline == "\n" or newline == None, we translate to "\n", which is
595 (for newline == None, TextIOWrapper translates to os.sepline, but it
596 is pointless for StringIO)
598 if (newline
!= NULL
&& newline
[0] == '\r') {
599 self
->writenl
= PyUnicode_FromString(newline
);
602 if (self
->readuniversal
) {
603 self
->decoder
= PyObject_CallFunction(
604 (PyObject
*)&PyIncrementalNewlineDecoder_Type
,
605 "Oi", Py_None
, (int) self
->readtranslate
);
606 if (self
->decoder
== NULL
)
610 /* Now everything is set up, resize buffer to size of initial value,
612 self
->string_size
= 0;
613 if (value
&& value
!= Py_None
) {
614 Py_ssize_t len
= PyUnicode_GetSize(value
);
615 /* This is a heuristic, for newline translation might change
616 the string length. */
617 if (resize_buffer(self
, len
) < 0)
620 if (write_str(self
, value
) < 0)
624 if (resize_buffer(self
, 0) < 0)
634 /* Properties and pseudo-properties */
636 PyDoc_STRVAR(stringio_readable_doc
,
637 "readable() -> bool. Returns True if the IO object can be read.");
639 PyDoc_STRVAR(stringio_writable_doc
,
640 "writable() -> bool. Returns True if the IO object can be written.");
642 PyDoc_STRVAR(stringio_seekable_doc
,
643 "seekable() -> bool. Returns True if the IO object can be seeked.");
646 stringio_seekable(stringio
*self
, PyObject
*args
)
648 CHECK_INITIALIZED(self
);
654 stringio_readable(stringio
*self
, PyObject
*args
)
656 CHECK_INITIALIZED(self
);
662 stringio_writable(stringio
*self
, PyObject
*args
)
664 CHECK_INITIALIZED(self
);
671 The implementation of __getstate__ is similar to the one for BytesIO,
672 except that we also save the newline parameter. For __setstate__ and unlike
673 BytesIO, we call __init__ to restore the object's state. Doing so allows us
674 to avoid decoding the complex newline state while keeping the object
675 representation compact.
677 See comment in bytesio.c regarding why only pickle protocols and onward are
682 stringio_getstate(stringio
*self
)
684 PyObject
*initvalue
= stringio_getvalue(self
);
688 if (initvalue
== NULL
)
690 if (self
->dict
== NULL
) {
695 dict
= PyDict_Copy(self
->dict
);
700 state
= Py_BuildValue("(OOnN)", initvalue
,
701 self
->readnl
? self
->readnl
: Py_None
,
703 Py_DECREF(initvalue
);
708 stringio_setstate(stringio
*self
, PyObject
*state
)
711 PyObject
*position_obj
;
715 assert(state
!= NULL
);
718 /* We allow the state tuple to be longer than 4, because we may need
719 someday to extend the object's state without breaking
720 backward-compatibility. */
721 if (!PyTuple_Check(state
) || Py_SIZE(state
) < 4) {
722 PyErr_Format(PyExc_TypeError
,
723 "%.200s.__setstate__ argument should be 4-tuple, got %.200s",
724 Py_TYPE(self
)->tp_name
, Py_TYPE(state
)->tp_name
);
728 /* Initialize the object's state. */
729 initarg
= PyTuple_GetSlice(state
, 0, 2);
732 if (stringio_init(self
, initarg
, NULL
) < 0) {
738 /* Restore the buffer state. Even if __init__ did initialize the buffer,
739 we have to initialize it again since __init__ may translates the
740 newlines in the inital_value string. We clearly do not want that
741 because the string value in the state tuple has already been translated
742 once by __init__. So we do not take any chance and replace object's
743 buffer completely. */
745 Py_UNICODE
*buf
= PyUnicode_AS_UNICODE(PyTuple_GET_ITEM(state
, 0));
746 Py_ssize_t bufsize
= PyUnicode_GET_SIZE(PyTuple_GET_ITEM(state
, 0));
747 if (resize_buffer(self
, bufsize
) < 0)
749 memcpy(self
->buf
, buf
, bufsize
* sizeof(Py_UNICODE
));
750 self
->string_size
= bufsize
;
753 /* Set carefully the position value. Alternatively, we could use the seek
754 method instead of modifying self->pos directly to better protect the
755 object internal state against errneous (or malicious) inputs. */
756 position_obj
= PyTuple_GET_ITEM(state
, 2);
757 if (!PyIndex_Check(position_obj
)) {
758 PyErr_Format(PyExc_TypeError
,
759 "third item of state must be an integer, got %.200s",
760 Py_TYPE(position_obj
)->tp_name
);
763 pos
= PyNumber_AsSsize_t(position_obj
, PyExc_OverflowError
);
764 if (pos
== -1 && PyErr_Occurred())
767 PyErr_SetString(PyExc_ValueError
,
768 "position value cannot be negative");
773 /* Set the dictionary of the instance variables. */
774 dict
= PyTuple_GET_ITEM(state
, 3);
775 if (dict
!= Py_None
) {
776 if (!PyDict_Check(dict
)) {
777 PyErr_Format(PyExc_TypeError
,
778 "fourth item of state should be a dict, got a %.200s",
779 Py_TYPE(dict
)->tp_name
);
783 /* Alternatively, we could replace the internal dictionary
784 completely. However, it seems more practical to just update it. */
785 if (PyDict_Update(self
->dict
, dict
) < 0)
799 stringio_closed(stringio
*self
, void *context
)
801 CHECK_INITIALIZED(self
);
802 return PyBool_FromLong(self
->closed
);
806 stringio_line_buffering(stringio
*self
, void *context
)
808 CHECK_INITIALIZED(self
);
814 stringio_newlines(stringio
*self
, void *context
)
816 CHECK_INITIALIZED(self
);
818 if (self
->decoder
== NULL
)
820 return PyObject_GetAttr(self
->decoder
, _PyIO_str_newlines
);
823 static struct PyMethodDef stringio_methods
[] = {
824 {"close", (PyCFunction
)stringio_close
, METH_NOARGS
, stringio_close_doc
},
825 {"getvalue", (PyCFunction
)stringio_getvalue
, METH_NOARGS
, stringio_getvalue_doc
},
826 {"read", (PyCFunction
)stringio_read
, METH_VARARGS
, stringio_read_doc
},
827 {"readline", (PyCFunction
)stringio_readline
, METH_VARARGS
, stringio_readline_doc
},
828 {"tell", (PyCFunction
)stringio_tell
, METH_NOARGS
, stringio_tell_doc
},
829 {"truncate", (PyCFunction
)stringio_truncate
, METH_VARARGS
, stringio_truncate_doc
},
830 {"seek", (PyCFunction
)stringio_seek
, METH_VARARGS
, stringio_seek_doc
},
831 {"write", (PyCFunction
)stringio_write
, METH_O
, stringio_write_doc
},
833 {"seekable", (PyCFunction
)stringio_seekable
, METH_NOARGS
, stringio_seekable_doc
},
834 {"readable", (PyCFunction
)stringio_readable
, METH_NOARGS
, stringio_readable_doc
},
835 {"writable", (PyCFunction
)stringio_writable
, METH_NOARGS
, stringio_writable_doc
},
837 {"__getstate__", (PyCFunction
)stringio_getstate
, METH_NOARGS
},
838 {"__setstate__", (PyCFunction
)stringio_setstate
, METH_O
},
839 {NULL
, NULL
} /* sentinel */
842 static PyGetSetDef stringio_getset
[] = {
843 {"closed", (getter
)stringio_closed
, NULL
, NULL
},
844 {"newlines", (getter
)stringio_newlines
, NULL
, NULL
},
845 /* (following comments straight off of the original Python wrapper:)
846 XXX Cruft to support the TextIOWrapper API. This would only
847 be meaningful if StringIO supported the buffer attribute.
848 Hopefully, a better solution, than adding these pseudo-attributes,
851 {"line_buffering", (getter
)stringio_line_buffering
, NULL
, NULL
},
855 PyTypeObject PyStringIO_Type
= {
856 PyVarObject_HEAD_INIT(NULL
, 0)
857 "_io.StringIO", /*tp_name*/
858 sizeof(stringio
), /*tp_basicsize*/
860 (destructor
)stringio_dealloc
, /*tp_dealloc*/
867 0, /*tp_as_sequence*/
875 Py_TPFLAGS_DEFAULT
| Py_TPFLAGS_BASETYPE
876 | Py_TPFLAGS_HAVE_GC
, /*tp_flags*/
877 stringio_doc
, /*tp_doc*/
878 (traverseproc
)stringio_traverse
, /*tp_traverse*/
879 (inquiry
)stringio_clear
, /*tp_clear*/
880 0, /*tp_richcompare*/
881 offsetof(stringio
, weakreflist
), /*tp_weaklistoffset*/
883 (iternextfunc
)stringio_iternext
, /*tp_iternext*/
884 stringio_methods
, /*tp_methods*/
886 stringio_getset
, /*tp_getset*/
891 offsetof(stringio
, dict
), /*tp_dictoffset*/
892 (initproc
)stringio_init
, /*tp_init*/
894 stringio_new
, /*tp_new*/