5 This module provides the low-level underpinnings of a CSV reading/writing
6 module. Users should not use this module directly, but import the csv.py
9 **** For people modifying this code, please note that as of this writing
10 **** (2003-03-23), it is intended that this code should work with Python
15 #define MODULE_VERSION "1.0"
18 #include "structmember.h"
21 /* begin 2.2 compatibility macros */
23 /* Define macros for inline documentation. */
24 #define PyDoc_VAR(name) static char name[]
25 #define PyDoc_STRVAR(name,str) PyDoc_VAR(name) = PyDoc_STR(str)
26 #ifdef WITH_DOC_STRINGS
27 #define PyDoc_STR(str) str
29 #define PyDoc_STR(str) ""
31 #endif /* ifndef PyDoc_STRVAR */
33 #ifndef PyMODINIT_FUNC
34 # if defined(__cplusplus)
35 # define PyMODINIT_FUNC extern "C" void
36 # else /* __cplusplus */
37 # define PyMODINIT_FUNC void
38 # endif /* __cplusplus */
42 #define Py_CLEAR(op) \
45 PyObject *tmp = (PyObject *)(op); \
52 #define Py_VISIT(op) \
55 int vret = visit((PyObject *)(op), arg); \
62 /* end 2.2 compatibility macros */
64 #define IS_BASESTRING(o) \
65 PyObject_TypeCheck(o, &PyBaseString_Type)
67 static PyObject
*error_obj
; /* CSV exception */
68 static PyObject
*dialects
; /* Dialect registry */
69 static long field_limit
= 128 * 1024; /* max parsed field size */
72 START_RECORD
, START_FIELD
, ESCAPED_CHAR
, IN_FIELD
,
73 IN_QUOTED_FIELD
, ESCAPE_IN_QUOTED_FIELD
, QUOTE_IN_QUOTED_FIELD
,
78 QUOTE_MINIMAL
, QUOTE_ALL
, QUOTE_NONNUMERIC
, QUOTE_NONE
86 static StyleDesc quote_styles
[] = {
87 { QUOTE_MINIMAL
, "QUOTE_MINIMAL" },
88 { QUOTE_ALL
, "QUOTE_ALL" },
89 { QUOTE_NONNUMERIC
, "QUOTE_NONNUMERIC" },
90 { QUOTE_NONE
, "QUOTE_NONE" },
97 int doublequote
; /* is " represented by ""? */
98 char delimiter
; /* field separator */
99 char quotechar
; /* quote character */
100 char escapechar
; /* escape character */
101 int skipinitialspace
; /* ignore spaces following delimiter? */
102 PyObject
*lineterminator
; /* string to write between records */
103 int quoting
; /* style of quoting to write */
105 int strict
; /* raise exception on bad CSV */
108 staticforward PyTypeObject Dialect_Type
;
113 PyObject
*input_iter
; /* iterate over this for input lines */
115 DialectObj
*dialect
; /* parsing dialect */
117 PyObject
*fields
; /* field list for current record */
118 ParserState state
; /* current CSV parse state */
119 char *field
; /* build current field in here */
120 int field_size
; /* size of allocated buffer */
121 int field_len
; /* length of current field */
122 int numeric_field
; /* treat field as numeric */
123 unsigned long line_num
; /* Source-file line number */
126 staticforward PyTypeObject Reader_Type
;
128 #define ReaderObject_Check(v) (Py_TYPE(v) == &Reader_Type)
133 PyObject
*writeline
; /* write output lines to this file */
135 DialectObj
*dialect
; /* parsing dialect */
137 char *rec
; /* buffer for parser.join */
138 int rec_size
; /* size of allocated record */
139 int rec_len
; /* length of record */
140 int num_fields
; /* number of fields in record */
143 staticforward PyTypeObject Writer_Type
;
150 get_dialect_from_registry(PyObject
* name_obj
)
152 PyObject
*dialect_obj
;
154 dialect_obj
= PyDict_GetItem(dialects
, name_obj
);
155 if (dialect_obj
== NULL
) {
156 if (!PyErr_Occurred())
157 PyErr_Format(error_obj
, "unknown dialect");
160 Py_INCREF(dialect_obj
);
165 get_string(PyObject
*str
)
172 get_nullchar_as_None(char c
)
179 return PyString_FromStringAndSize((char*)&c
, 1);
183 Dialect_get_lineterminator(DialectObj
*self
)
185 return get_string(self
->lineterminator
);
189 Dialect_get_escapechar(DialectObj
*self
)
191 return get_nullchar_as_None(self
->escapechar
);
195 Dialect_get_quotechar(DialectObj
*self
)
197 return get_nullchar_as_None(self
->quotechar
);
201 Dialect_get_quoting(DialectObj
*self
)
203 return PyInt_FromLong(self
->quoting
);
207 _set_bool(const char *name
, int *target
, PyObject
*src
, int dflt
)
212 int b
= PyObject_IsTrue(src
);
221 _set_int(const char *name
, int *target
, PyObject
*src
, int dflt
)
226 if (!PyInt_Check(src
)) {
227 PyErr_Format(PyExc_TypeError
,
228 "\"%s\" must be an integer", name
);
231 *target
= PyInt_AsLong(src
);
237 _set_char(const char *name
, char *target
, PyObject
*src
, char dflt
)
243 if (src
!= Py_None
) {
245 if (!PyString_Check(src
)) {
246 PyErr_Format(PyExc_TypeError
,
247 "\"%s\" must be string, not %.200s", name
,
248 src
->ob_type
->tp_name
);
251 len
= PyString_GET_SIZE(src
);
253 PyErr_Format(PyExc_TypeError
,
254 "\"%s\" must be an 1-character string",
259 *target
= *PyString_AS_STRING(src
);
266 _set_str(const char *name
, PyObject
**target
, PyObject
*src
, const char *dflt
)
269 *target
= PyString_FromString(dflt
);
273 else if (!IS_BASESTRING(src
)) {
274 PyErr_Format(PyExc_TypeError
,
275 "\"%s\" must be a string", name
);
288 dialect_check_quoting(int quoting
)
290 StyleDesc
*qs
= quote_styles
;
292 for (qs
= quote_styles
; qs
->name
; qs
++) {
293 if (qs
->style
== quoting
)
296 PyErr_Format(PyExc_TypeError
, "bad \"quoting\" value");
300 #define D_OFF(x) offsetof(DialectObj, x)
302 static struct PyMemberDef Dialect_memberlist
[] = {
303 { "delimiter", T_CHAR
, D_OFF(delimiter
), READONLY
},
304 { "skipinitialspace", T_INT
, D_OFF(skipinitialspace
), READONLY
},
305 { "doublequote", T_INT
, D_OFF(doublequote
), READONLY
},
306 { "strict", T_INT
, D_OFF(strict
), READONLY
},
310 static PyGetSetDef Dialect_getsetlist
[] = {
311 { "escapechar", (getter
)Dialect_get_escapechar
},
312 { "lineterminator", (getter
)Dialect_get_lineterminator
},
313 { "quotechar", (getter
)Dialect_get_quotechar
},
314 { "quoting", (getter
)Dialect_get_quoting
},
319 Dialect_dealloc(DialectObj
*self
)
321 Py_XDECREF(self
->lineterminator
);
322 Py_TYPE(self
)->tp_free((PyObject
*)self
);
325 static char *dialect_kws
[] = {
339 dialect_new(PyTypeObject
*type
, PyObject
*args
, PyObject
*kwargs
)
342 PyObject
*ret
= NULL
;
343 PyObject
*dialect
= NULL
;
344 PyObject
*delimiter
= NULL
;
345 PyObject
*doublequote
= NULL
;
346 PyObject
*escapechar
= NULL
;
347 PyObject
*lineterminator
= NULL
;
348 PyObject
*quotechar
= NULL
;
349 PyObject
*quoting
= NULL
;
350 PyObject
*skipinitialspace
= NULL
;
351 PyObject
*strict
= NULL
;
353 if (!PyArg_ParseTupleAndKeywords(args
, kwargs
,
354 "|OOOOOOOOO", dialect_kws
,
366 if (dialect
!= NULL
) {
367 if (IS_BASESTRING(dialect
)) {
368 dialect
= get_dialect_from_registry(dialect
);
374 /* Can we reuse this instance? */
375 if (PyObject_TypeCheck(dialect
, &Dialect_Type
) &&
379 lineterminator
== 0 &&
382 skipinitialspace
== 0 &&
387 self
= (DialectObj
*)type
->tp_alloc(type
, 0);
392 self
->lineterminator
= NULL
;
394 Py_XINCREF(delimiter
);
395 Py_XINCREF(doublequote
);
396 Py_XINCREF(escapechar
);
397 Py_XINCREF(lineterminator
);
398 Py_XINCREF(quotechar
);
400 Py_XINCREF(skipinitialspace
);
402 if (dialect
!= NULL
) {
403 #define DIALECT_GETATTR(v, n) \
405 v = PyObject_GetAttrString(dialect, n)
406 DIALECT_GETATTR(delimiter
, "delimiter");
407 DIALECT_GETATTR(doublequote
, "doublequote");
408 DIALECT_GETATTR(escapechar
, "escapechar");
409 DIALECT_GETATTR(lineterminator
, "lineterminator");
410 DIALECT_GETATTR(quotechar
, "quotechar");
411 DIALECT_GETATTR(quoting
, "quoting");
412 DIALECT_GETATTR(skipinitialspace
, "skipinitialspace");
413 DIALECT_GETATTR(strict
, "strict");
417 /* check types and convert to C values */
418 #define DIASET(meth, name, target, src, dflt) \
419 if (meth(name, target, src, dflt)) \
421 DIASET(_set_char
, "delimiter", &self
->delimiter
, delimiter
, ',');
422 DIASET(_set_bool
, "doublequote", &self
->doublequote
, doublequote
, 1);
423 DIASET(_set_char
, "escapechar", &self
->escapechar
, escapechar
, 0);
424 DIASET(_set_str
, "lineterminator", &self
->lineterminator
, lineterminator
, "\r\n");
425 DIASET(_set_char
, "quotechar", &self
->quotechar
, quotechar
, '"');
426 DIASET(_set_int
, "quoting", &self
->quoting
, quoting
, QUOTE_MINIMAL
);
427 DIASET(_set_bool
, "skipinitialspace", &self
->skipinitialspace
, skipinitialspace
, 0);
428 DIASET(_set_bool
, "strict", &self
->strict
, strict
, 0);
430 /* validate options */
431 if (dialect_check_quoting(self
->quoting
))
433 if (self
->delimiter
== 0) {
434 PyErr_SetString(PyExc_TypeError
,
435 "\"delimiter\" must be an 1-character string");
438 if (quotechar
== Py_None
&& quoting
== NULL
)
439 self
->quoting
= QUOTE_NONE
;
440 if (self
->quoting
!= QUOTE_NONE
&& self
->quotechar
== 0) {
441 PyErr_SetString(PyExc_TypeError
,
442 "quotechar must be set if quoting enabled");
445 if (self
->lineterminator
== 0) {
446 PyErr_SetString(PyExc_TypeError
, "lineterminator must be set");
450 ret
= (PyObject
*)self
;
455 Py_XDECREF(delimiter
);
456 Py_XDECREF(doublequote
);
457 Py_XDECREF(escapechar
);
458 Py_XDECREF(lineterminator
);
459 Py_XDECREF(quotechar
);
461 Py_XDECREF(skipinitialspace
);
467 PyDoc_STRVAR(Dialect_Type_doc
,
470 "The Dialect type records CSV parsing and generation options.\n");
472 static PyTypeObject Dialect_Type
= {
473 PyVarObject_HEAD_INIT(NULL
, 0)
474 "_csv.Dialect", /* tp_name */
475 sizeof(DialectObj
), /* tp_basicsize */
478 (destructor
)Dialect_dealloc
, /* tp_dealloc */
479 (printfunc
)0, /* tp_print */
480 (getattrfunc
)0, /* tp_getattr */
481 (setattrfunc
)0, /* tp_setattr */
482 (cmpfunc
)0, /* tp_compare */
483 (reprfunc
)0, /* tp_repr */
484 0, /* tp_as_number */
485 0, /* tp_as_sequence */
486 0, /* tp_as_mapping */
487 (hashfunc
)0, /* tp_hash */
488 (ternaryfunc
)0, /* tp_call */
489 (reprfunc
)0, /* tp_str */
492 0, /* tp_as_buffer */
493 Py_TPFLAGS_DEFAULT
| Py_TPFLAGS_BASETYPE
, /* tp_flags */
494 Dialect_Type_doc
, /* tp_doc */
497 0, /* tp_richcompare */
498 0, /* tp_weaklistoffset */
502 Dialect_memberlist
, /* tp_members */
503 Dialect_getsetlist
, /* tp_getset */
506 0, /* tp_descr_get */
507 0, /* tp_descr_set */
508 0, /* tp_dictoffset */
511 dialect_new
, /* tp_new */
516 * Return an instance of the dialect type, given a Python instance or kwarg
517 * description of the dialect
520 _call_dialect(PyObject
*dialect_inst
, PyObject
*kwargs
)
525 ctor_args
= Py_BuildValue(dialect_inst
? "(O)" : "()", dialect_inst
);
526 if (ctor_args
== NULL
)
528 dialect
= PyObject_Call((PyObject
*)&Dialect_Type
, ctor_args
, kwargs
);
529 Py_DECREF(ctor_args
);
537 parse_save_field(ReaderObj
*self
)
541 field
= PyString_FromStringAndSize(self
->field
, self
->field_len
);
545 if (self
->numeric_field
) {
548 self
->numeric_field
= 0;
549 tmp
= PyNumber_Float(field
);
557 PyList_Append(self
->fields
, field
);
563 parse_grow_buff(ReaderObj
*self
)
565 if (self
->field_size
== 0) {
566 self
->field_size
= 4096;
567 if (self
->field
!= NULL
)
568 PyMem_Free(self
->field
);
569 self
->field
= PyMem_Malloc(self
->field_size
);
572 if (self
->field_size
> INT_MAX
/ 2) {
576 self
->field_size
*= 2;
577 self
->field
= PyMem_Realloc(self
->field
, self
->field_size
);
579 if (self
->field
== NULL
) {
587 parse_add_char(ReaderObj
*self
, char c
)
589 if (self
->field_len
>= field_limit
) {
590 PyErr_Format(error_obj
, "field larger than field limit (%ld)",
594 if (self
->field_len
== self
->field_size
&& !parse_grow_buff(self
))
596 self
->field
[self
->field_len
++] = c
;
601 parse_process_char(ReaderObj
*self
, char c
)
603 DialectObj
*dialect
= self
->dialect
;
605 switch (self
->state
) {
607 /* start of record */
609 /* empty line - return [] */
611 else if (c
== '\n' || c
== '\r') {
612 self
->state
= EAT_CRNL
;
615 /* normal character - handle as START_FIELD */
616 self
->state
= START_FIELD
;
619 /* expecting field */
620 if (c
== '\n' || c
== '\r' || c
== '\0') {
621 /* save empty field - return [fields] */
622 if (parse_save_field(self
) < 0)
624 self
->state
= (c
== '\0' ? START_RECORD
: EAT_CRNL
);
626 else if (c
== dialect
->quotechar
&&
627 dialect
->quoting
!= QUOTE_NONE
) {
628 /* start quoted field */
629 self
->state
= IN_QUOTED_FIELD
;
631 else if (c
== dialect
->escapechar
) {
632 /* possible escaped character */
633 self
->state
= ESCAPED_CHAR
;
635 else if (c
== ' ' && dialect
->skipinitialspace
)
636 /* ignore space at start of field */
638 else if (c
== dialect
->delimiter
) {
639 /* save empty field */
640 if (parse_save_field(self
) < 0)
644 /* begin new unquoted field */
645 if (dialect
->quoting
== QUOTE_NONNUMERIC
)
646 self
->numeric_field
= 1;
647 if (parse_add_char(self
, c
) < 0)
649 self
->state
= IN_FIELD
;
656 if (parse_add_char(self
, c
) < 0)
658 self
->state
= IN_FIELD
;
662 /* in unquoted field */
663 if (c
== '\n' || c
== '\r' || c
== '\0') {
664 /* end of line - return [fields] */
665 if (parse_save_field(self
) < 0)
667 self
->state
= (c
== '\0' ? START_RECORD
: EAT_CRNL
);
669 else if (c
== dialect
->escapechar
) {
670 /* possible escaped character */
671 self
->state
= ESCAPED_CHAR
;
673 else if (c
== dialect
->delimiter
) {
674 /* save field - wait for new field */
675 if (parse_save_field(self
) < 0)
677 self
->state
= START_FIELD
;
680 /* normal character - save in field */
681 if (parse_add_char(self
, c
) < 0)
686 case IN_QUOTED_FIELD
:
687 /* in quoted field */
690 else if (c
== dialect
->escapechar
) {
691 /* Possible escape character */
692 self
->state
= ESCAPE_IN_QUOTED_FIELD
;
694 else if (c
== dialect
->quotechar
&&
695 dialect
->quoting
!= QUOTE_NONE
) {
696 if (dialect
->doublequote
) {
697 /* doublequote; " represented by "" */
698 self
->state
= QUOTE_IN_QUOTED_FIELD
;
701 /* end of quote part of field */
702 self
->state
= IN_FIELD
;
706 /* normal character - save in field */
707 if (parse_add_char(self
, c
) < 0)
712 case ESCAPE_IN_QUOTED_FIELD
:
715 if (parse_add_char(self
, c
) < 0)
717 self
->state
= IN_QUOTED_FIELD
;
720 case QUOTE_IN_QUOTED_FIELD
:
721 /* doublequote - seen a quote in an quoted field */
722 if (dialect
->quoting
!= QUOTE_NONE
&&
723 c
== dialect
->quotechar
) {
725 if (parse_add_char(self
, c
) < 0)
727 self
->state
= IN_QUOTED_FIELD
;
729 else if (c
== dialect
->delimiter
) {
730 /* save field - wait for new field */
731 if (parse_save_field(self
) < 0)
733 self
->state
= START_FIELD
;
735 else if (c
== '\n' || c
== '\r' || c
== '\0') {
736 /* end of line - return [fields] */
737 if (parse_save_field(self
) < 0)
739 self
->state
= (c
== '\0' ? START_RECORD
: EAT_CRNL
);
741 else if (!dialect
->strict
) {
742 if (parse_add_char(self
, c
) < 0)
744 self
->state
= IN_FIELD
;
748 PyErr_Format(error_obj
, "'%c' expected after '%c'",
756 if (c
== '\n' || c
== '\r')
759 self
->state
= START_RECORD
;
761 PyErr_Format(error_obj
, "new-line character seen in unquoted field - do you need to open the file in universal-newline mode?");
771 parse_reset(ReaderObj
*self
)
773 Py_XDECREF(self
->fields
);
774 self
->fields
= PyList_New(0);
775 if (self
->fields
== NULL
)
778 self
->state
= START_RECORD
;
779 self
->numeric_field
= 0;
784 Reader_iternext(ReaderObj
*self
)
787 PyObject
*fields
= NULL
;
791 if (parse_reset(self
) < 0)
794 lineobj
= PyIter_Next(self
->input_iter
);
795 if (lineobj
== NULL
) {
796 /* End of input OR exception */
797 if (!PyErr_Occurred() && (self
->field_len
!= 0 ||
798 self
->state
== IN_QUOTED_FIELD
)) {
799 if (self
->dialect
->strict
)
800 PyErr_SetString(error_obj
, "unexpected end of data");
801 else if (parse_save_field(self
) >= 0 )
808 line
= PyString_AsString(lineobj
);
809 linelen
= PyString_Size(lineobj
);
811 if (line
== NULL
|| linelen
< 0) {
819 PyErr_Format(error_obj
,
820 "line contains NULL byte");
823 if (parse_process_char(self
, c
) < 0) {
829 if (parse_process_char(self
, 0) < 0)
831 } while (self
->state
!= START_RECORD
);
833 fields
= self
->fields
;
840 Reader_dealloc(ReaderObj
*self
)
842 PyObject_GC_UnTrack(self
);
843 Py_XDECREF(self
->dialect
);
844 Py_XDECREF(self
->input_iter
);
845 Py_XDECREF(self
->fields
);
846 if (self
->field
!= NULL
)
847 PyMem_Free(self
->field
);
848 PyObject_GC_Del(self
);
852 Reader_traverse(ReaderObj
*self
, visitproc visit
, void *arg
)
854 Py_VISIT(self
->dialect
);
855 Py_VISIT(self
->input_iter
);
856 Py_VISIT(self
->fields
);
861 Reader_clear(ReaderObj
*self
)
863 Py_CLEAR(self
->dialect
);
864 Py_CLEAR(self
->input_iter
);
865 Py_CLEAR(self
->fields
);
869 PyDoc_STRVAR(Reader_Type_doc
,
872 "Reader objects are responsible for reading and parsing tabular data\n"
876 static struct PyMethodDef Reader_methods
[] = {
879 #define R_OFF(x) offsetof(ReaderObj, x)
881 static struct PyMemberDef Reader_memberlist
[] = {
882 { "dialect", T_OBJECT
, R_OFF(dialect
), RO
},
883 { "line_num", T_ULONG
, R_OFF(line_num
), RO
},
888 static PyTypeObject Reader_Type
= {
889 PyVarObject_HEAD_INIT(NULL
, 0)
890 "_csv.reader", /*tp_name*/
891 sizeof(ReaderObj
), /*tp_basicsize*/
894 (destructor
)Reader_dealloc
, /*tp_dealloc*/
895 (printfunc
)0, /*tp_print*/
896 (getattrfunc
)0, /*tp_getattr*/
897 (setattrfunc
)0, /*tp_setattr*/
898 (cmpfunc
)0, /*tp_compare*/
899 (reprfunc
)0, /*tp_repr*/
901 0, /*tp_as_sequence*/
903 (hashfunc
)0, /*tp_hash*/
904 (ternaryfunc
)0, /*tp_call*/
905 (reprfunc
)0, /*tp_str*/
909 Py_TPFLAGS_DEFAULT
| Py_TPFLAGS_BASETYPE
|
910 Py_TPFLAGS_HAVE_GC
, /*tp_flags*/
911 Reader_Type_doc
, /*tp_doc*/
912 (traverseproc
)Reader_traverse
, /*tp_traverse*/
913 (inquiry
)Reader_clear
, /*tp_clear*/
914 0, /*tp_richcompare*/
915 0, /*tp_weaklistoffset*/
916 PyObject_SelfIter
, /*tp_iter*/
917 (getiterfunc
)Reader_iternext
, /*tp_iternext*/
918 Reader_methods
, /*tp_methods*/
919 Reader_memberlist
, /*tp_members*/
925 csv_reader(PyObject
*module
, PyObject
*args
, PyObject
*keyword_args
)
927 PyObject
* iterator
, * dialect
= NULL
;
928 ReaderObj
* self
= PyObject_GC_New(ReaderObj
, &Reader_Type
);
933 self
->dialect
= NULL
;
935 self
->input_iter
= NULL
;
937 self
->field_size
= 0;
940 if (parse_reset(self
) < 0) {
945 if (!PyArg_UnpackTuple(args
, "", 1, 2, &iterator
, &dialect
)) {
949 self
->input_iter
= PyObject_GetIter(iterator
);
950 if (self
->input_iter
== NULL
) {
951 PyErr_SetString(PyExc_TypeError
,
952 "argument 1 must be an iterator");
956 self
->dialect
= (DialectObj
*)_call_dialect(dialect
, keyword_args
);
957 if (self
->dialect
== NULL
) {
962 PyObject_GC_Track(self
);
963 return (PyObject
*)self
;
969 /* ---------------------------------------------------------------- */
971 join_reset(WriterObj
*self
)
974 self
->num_fields
= 0;
977 #define MEM_INCR 32768
979 /* Calculate new record length or append field to record. Return new
983 join_append_data(WriterObj
*self
, char *field
, int quote_empty
,
984 int *quoted
, int copy_phase
)
986 DialectObj
*dialect
= self
->dialect
;
993 self->rec[rec_len] = c;\
997 lineterm
= PyString_AsString(dialect
->lineterminator
);
998 if (lineterm
== NULL
)
1001 rec_len
= self
->rec_len
;
1003 /* If this is not the first field we need a field separator */
1004 if (self
->num_fields
> 0)
1005 ADDCH(dialect
->delimiter
);
1007 /* Handle preceding quote */
1008 if (copy_phase
&& *quoted
)
1009 ADDCH(dialect
->quotechar
);
1011 /* Copy/count field data */
1014 int want_escape
= 0;
1019 if (c
== dialect
->delimiter
||
1020 c
== dialect
->escapechar
||
1021 c
== dialect
->quotechar
||
1022 strchr(lineterm
, c
)) {
1023 if (dialect
->quoting
== QUOTE_NONE
)
1026 if (c
== dialect
->quotechar
) {
1027 if (dialect
->doublequote
)
1028 ADDCH(dialect
->quotechar
);
1036 if (!dialect
->escapechar
) {
1037 PyErr_Format(error_obj
,
1038 "need to escape, but no escapechar set");
1041 ADDCH(dialect
->escapechar
);
1044 /* Copy field character into record buffer.
1049 /* If field is empty check if it needs to be quoted.
1051 if (i
== 0 && quote_empty
) {
1052 if (dialect
->quoting
== QUOTE_NONE
) {
1053 PyErr_Format(error_obj
,
1054 "single empty field record must be quoted");
1063 ADDCH(dialect
->quotechar
);
1072 join_check_rec_size(WriterObj
*self
, int rec_len
)
1075 if (rec_len
< 0 || rec_len
> INT_MAX
- MEM_INCR
) {
1080 if (rec_len
> self
->rec_size
) {
1081 if (self
->rec_size
== 0) {
1082 self
->rec_size
= (rec_len
/ MEM_INCR
+ 1) * MEM_INCR
;
1083 if (self
->rec
!= NULL
)
1084 PyMem_Free(self
->rec
);
1085 self
->rec
= PyMem_Malloc(self
->rec_size
);
1088 char *old_rec
= self
->rec
;
1090 self
->rec_size
= (rec_len
/ MEM_INCR
+ 1) * MEM_INCR
;
1091 self
->rec
= PyMem_Realloc(self
->rec
, self
->rec_size
);
1092 if (self
->rec
== NULL
)
1093 PyMem_Free(old_rec
);
1095 if (self
->rec
== NULL
) {
1104 join_append(WriterObj
*self
, char *field
, int *quoted
, int quote_empty
)
1108 rec_len
= join_append_data(self
, field
, quote_empty
, quoted
, 0);
1112 /* grow record buffer if necessary */
1113 if (!join_check_rec_size(self
, rec_len
))
1116 self
->rec_len
= join_append_data(self
, field
, quote_empty
, quoted
, 1);
1123 join_append_lineterminator(WriterObj
*self
)
1128 terminator_len
= PyString_Size(self
->dialect
->lineterminator
);
1129 if (terminator_len
== -1)
1132 /* grow record buffer if necessary */
1133 if (!join_check_rec_size(self
, self
->rec_len
+ terminator_len
))
1136 terminator
= PyString_AsString(self
->dialect
->lineterminator
);
1137 if (terminator
== NULL
)
1139 memmove(self
->rec
+ self
->rec_len
, terminator
, terminator_len
);
1140 self
->rec_len
+= terminator_len
;
1145 PyDoc_STRVAR(csv_writerow_doc
,
1146 "writerow(sequence)\n"
1148 "Construct and write a CSV record from a sequence of fields. Non-string\n"
1149 "elements will be converted to string.");
1152 csv_writerow(WriterObj
*self
, PyObject
*seq
)
1154 DialectObj
*dialect
= self
->dialect
;
1157 if (!PySequence_Check(seq
))
1158 return PyErr_Format(error_obj
, "sequence expected");
1160 len
= PySequence_Length(seq
);
1164 /* Join all fields in internal buffer.
1167 for (i
= 0; i
< len
; i
++) {
1172 field
= PySequence_GetItem(seq
, i
);
1176 switch (dialect
->quoting
) {
1177 case QUOTE_NONNUMERIC
:
1178 quoted
= !PyNumber_Check(field
);
1188 if (PyString_Check(field
)) {
1189 append_ok
= join_append(self
,
1190 PyString_AS_STRING(field
),
1194 else if (field
== Py_None
) {
1195 append_ok
= join_append(self
, "", "ed
, len
== 1);
1201 if (PyFloat_Check(field
)) {
1202 str
= PyObject_Repr(field
);
1204 str
= PyObject_Str(field
);
1210 append_ok
= join_append(self
, PyString_AS_STRING(str
),
1218 /* Add line terminator.
1220 if (!join_append_lineterminator(self
))
1223 return PyObject_CallFunction(self
->writeline
,
1224 "(s#)", self
->rec
, self
->rec_len
);
1227 PyDoc_STRVAR(csv_writerows_doc
,
1228 "writerows(sequence of sequences)\n"
1230 "Construct and write a series of sequences to a csv file. Non-string\n"
1231 "elements will be converted to string.");
1234 csv_writerows(WriterObj
*self
, PyObject
*seqseq
)
1236 PyObject
*row_iter
, *row_obj
, *result
;
1238 row_iter
= PyObject_GetIter(seqseq
);
1239 if (row_iter
== NULL
) {
1240 PyErr_SetString(PyExc_TypeError
,
1241 "writerows() argument must be iterable");
1244 while ((row_obj
= PyIter_Next(row_iter
))) {
1245 result
= csv_writerow(self
, row_obj
);
1248 Py_DECREF(row_iter
);
1254 Py_DECREF(row_iter
);
1255 if (PyErr_Occurred())
1261 static struct PyMethodDef Writer_methods
[] = {
1262 { "writerow", (PyCFunction
)csv_writerow
, METH_O
, csv_writerow_doc
},
1263 { "writerows", (PyCFunction
)csv_writerows
, METH_O
, csv_writerows_doc
},
1267 #define W_OFF(x) offsetof(WriterObj, x)
1269 static struct PyMemberDef Writer_memberlist
[] = {
1270 { "dialect", T_OBJECT
, W_OFF(dialect
), RO
},
1275 Writer_dealloc(WriterObj
*self
)
1277 PyObject_GC_UnTrack(self
);
1278 Py_XDECREF(self
->dialect
);
1279 Py_XDECREF(self
->writeline
);
1280 if (self
->rec
!= NULL
)
1281 PyMem_Free(self
->rec
);
1282 PyObject_GC_Del(self
);
1286 Writer_traverse(WriterObj
*self
, visitproc visit
, void *arg
)
1288 Py_VISIT(self
->dialect
);
1289 Py_VISIT(self
->writeline
);
1294 Writer_clear(WriterObj
*self
)
1296 Py_CLEAR(self
->dialect
);
1297 Py_CLEAR(self
->writeline
);
1301 PyDoc_STRVAR(Writer_Type_doc
,
1304 "Writer objects are responsible for generating tabular data\n"
1305 "in CSV format from sequence input.\n"
1308 static PyTypeObject Writer_Type
= {
1309 PyVarObject_HEAD_INIT(NULL
, 0)
1310 "_csv.writer", /*tp_name*/
1311 sizeof(WriterObj
), /*tp_basicsize*/
1314 (destructor
)Writer_dealloc
, /*tp_dealloc*/
1315 (printfunc
)0, /*tp_print*/
1316 (getattrfunc
)0, /*tp_getattr*/
1317 (setattrfunc
)0, /*tp_setattr*/
1318 (cmpfunc
)0, /*tp_compare*/
1319 (reprfunc
)0, /*tp_repr*/
1321 0, /*tp_as_sequence*/
1322 0, /*tp_as_mapping*/
1323 (hashfunc
)0, /*tp_hash*/
1324 (ternaryfunc
)0, /*tp_call*/
1325 (reprfunc
)0, /*tp_str*/
1329 Py_TPFLAGS_DEFAULT
| Py_TPFLAGS_BASETYPE
|
1330 Py_TPFLAGS_HAVE_GC
, /*tp_flags*/
1332 (traverseproc
)Writer_traverse
, /*tp_traverse*/
1333 (inquiry
)Writer_clear
, /*tp_clear*/
1334 0, /*tp_richcompare*/
1335 0, /*tp_weaklistoffset*/
1336 (getiterfunc
)0, /*tp_iter*/
1337 (getiterfunc
)0, /*tp_iternext*/
1338 Writer_methods
, /*tp_methods*/
1339 Writer_memberlist
, /*tp_members*/
1344 csv_writer(PyObject
*module
, PyObject
*args
, PyObject
*keyword_args
)
1346 PyObject
* output_file
, * dialect
= NULL
;
1347 WriterObj
* self
= PyObject_GC_New(WriterObj
, &Writer_Type
);
1352 self
->dialect
= NULL
;
1353 self
->writeline
= NULL
;
1358 self
->num_fields
= 0;
1360 if (!PyArg_UnpackTuple(args
, "", 1, 2, &output_file
, &dialect
)) {
1364 self
->writeline
= PyObject_GetAttrString(output_file
, "write");
1365 if (self
->writeline
== NULL
|| !PyCallable_Check(self
->writeline
)) {
1366 PyErr_SetString(PyExc_TypeError
,
1367 "argument 1 must have a \"write\" method");
1371 self
->dialect
= (DialectObj
*)_call_dialect(dialect
, keyword_args
);
1372 if (self
->dialect
== NULL
) {
1376 PyObject_GC_Track(self
);
1377 return (PyObject
*)self
;
1384 csv_list_dialects(PyObject
*module
, PyObject
*args
)
1386 return PyDict_Keys(dialects
);
1390 csv_register_dialect(PyObject
*module
, PyObject
*args
, PyObject
*kwargs
)
1392 PyObject
*name_obj
, *dialect_obj
= NULL
;
1395 if (!PyArg_UnpackTuple(args
, "", 1, 2, &name_obj
, &dialect_obj
))
1397 if (!IS_BASESTRING(name_obj
)) {
1398 PyErr_SetString(PyExc_TypeError
,
1399 "dialect name must be a string or unicode");
1402 dialect
= _call_dialect(dialect_obj
, kwargs
);
1403 if (dialect
== NULL
)
1405 if (PyDict_SetItem(dialects
, name_obj
, dialect
) < 0) {
1415 csv_unregister_dialect(PyObject
*module
, PyObject
*name_obj
)
1417 if (PyDict_DelItem(dialects
, name_obj
) < 0)
1418 return PyErr_Format(error_obj
, "unknown dialect");
1424 csv_get_dialect(PyObject
*module
, PyObject
*name_obj
)
1426 return get_dialect_from_registry(name_obj
);
1430 csv_field_size_limit(PyObject
*module
, PyObject
*args
)
1432 PyObject
*new_limit
= NULL
;
1433 long old_limit
= field_limit
;
1435 if (!PyArg_UnpackTuple(args
, "field_size_limit", 0, 1, &new_limit
))
1437 if (new_limit
!= NULL
) {
1438 if (!PyInt_Check(new_limit
)) {
1439 PyErr_Format(PyExc_TypeError
,
1440 "limit must be an integer");
1443 field_limit
= PyInt_AsLong(new_limit
);
1445 return PyInt_FromLong(old_limit
);
1452 PyDoc_STRVAR(csv_module_doc
,
1453 "CSV parsing and writing.\n"
1455 "This module provides classes that assist in the reading and writing\n"
1456 "of Comma Separated Value (CSV) files, and implements the interface\n"
1457 "described by PEP 305. Although many CSV files are simple to parse,\n"
1458 "the format is not formally defined by a stable specification and\n"
1459 "is subtle enough that parsing lines of a CSV file with something\n"
1460 "like line.split(\",\") is bound to fail. The module supports three\n"
1461 "basic APIs: reading, writing, and registration of dialects.\n"
1464 "DIALECT REGISTRATION:\n"
1466 "Readers and writers support a dialect argument, which is a convenient\n"
1467 "handle on a group of settings. When the dialect argument is a string,\n"
1468 "it identifies one of the dialects previously registered with the module.\n"
1469 "If it is a class or instance, the attributes of the argument are used as\n"
1470 "the settings for the reader or writer:\n"
1473 " delimiter = ','\n"
1474 " quotechar = '\"'\n"
1475 " escapechar = None\n"
1476 " doublequote = True\n"
1477 " skipinitialspace = False\n"
1478 " lineterminator = '\\r\\n'\n"
1479 " quoting = QUOTE_MINIMAL\n"
1483 " * quotechar - specifies a one-character string to use as the \n"
1484 " quoting character. It defaults to '\"'.\n"
1485 " * delimiter - specifies a one-character string to use as the \n"
1486 " field separator. It defaults to ','.\n"
1487 " * skipinitialspace - specifies how to interpret whitespace which\n"
1488 " immediately follows a delimiter. It defaults to False, which\n"
1489 " means that whitespace immediately following a delimiter is part\n"
1490 " of the following field.\n"
1491 " * lineterminator - specifies the character sequence which should \n"
1492 " terminate rows.\n"
1493 " * quoting - controls when quotes should be generated by the writer.\n"
1494 " It can take on any of the following module constants:\n"
1496 " csv.QUOTE_MINIMAL means only when required, for example, when a\n"
1497 " field contains either the quotechar or the delimiter\n"
1498 " csv.QUOTE_ALL means that quotes are always placed around fields.\n"
1499 " csv.QUOTE_NONNUMERIC means that quotes are always placed around\n"
1500 " fields which do not parse as integers or floating point\n"
1502 " csv.QUOTE_NONE means that quotes are never placed around fields.\n"
1503 " * escapechar - specifies a one-character string used to escape \n"
1504 " the delimiter when quoting is set to QUOTE_NONE.\n"
1505 " * doublequote - controls the handling of quotes inside fields. When\n"
1506 " True, two consecutive quotes are interpreted as one during read,\n"
1507 " and when writing, each quote character embedded in the data is\n"
1508 " written as two quotes\n");
1510 PyDoc_STRVAR(csv_reader_doc
,
1511 " csv_reader = reader(iterable [, dialect='excel']\n"
1512 " [optional keyword args])\n"
1513 " for row in csv_reader:\n"
1516 "The \"iterable\" argument can be any object that returns a line\n"
1517 "of input for each iteration, such as a file object or a list. The\n"
1518 "optional \"dialect\" parameter is discussed below. The function\n"
1519 "also accepts optional keyword arguments which override settings\n"
1520 "provided by the dialect.\n"
1522 "The returned object is an iterator. Each iteration returns a row\n"
1523 "of the CSV file (which can span multiple input lines):\n");
1525 PyDoc_STRVAR(csv_writer_doc
,
1526 " csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1527 " [optional keyword args])\n"
1528 " for row in sequence:\n"
1529 " csv_writer.writerow(row)\n"
1533 " csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1534 " [optional keyword args])\n"
1535 " csv_writer.writerows(rows)\n"
1537 "The \"fileobj\" argument can be any object that supports the file API.\n");
1539 PyDoc_STRVAR(csv_list_dialects_doc
,
1540 "Return a list of all know dialect names.\n"
1541 " names = csv.list_dialects()");
1543 PyDoc_STRVAR(csv_get_dialect_doc
,
1544 "Return the dialect instance associated with name.\n"
1545 " dialect = csv.get_dialect(name)");
1547 PyDoc_STRVAR(csv_register_dialect_doc
,
1548 "Create a mapping from a string name to a dialect class.\n"
1549 " dialect = csv.register_dialect(name, dialect)");
1551 PyDoc_STRVAR(csv_unregister_dialect_doc
,
1552 "Delete the name/dialect mapping associated with a string name.\n"
1553 " csv.unregister_dialect(name)");
1555 PyDoc_STRVAR(csv_field_size_limit_doc
,
1556 "Sets an upper limit on parsed fields.\n"
1557 " csv.field_size_limit([limit])\n"
1559 "Returns old limit. If limit is not given, no new limit is set and\n"
1560 "the old limit is returned");
1562 static struct PyMethodDef csv_methods
[] = {
1563 { "reader", (PyCFunction
)csv_reader
,
1564 METH_VARARGS
| METH_KEYWORDS
, csv_reader_doc
},
1565 { "writer", (PyCFunction
)csv_writer
,
1566 METH_VARARGS
| METH_KEYWORDS
, csv_writer_doc
},
1567 { "list_dialects", (PyCFunction
)csv_list_dialects
,
1568 METH_NOARGS
, csv_list_dialects_doc
},
1569 { "register_dialect", (PyCFunction
)csv_register_dialect
,
1570 METH_VARARGS
| METH_KEYWORDS
, csv_register_dialect_doc
},
1571 { "unregister_dialect", (PyCFunction
)csv_unregister_dialect
,
1572 METH_O
, csv_unregister_dialect_doc
},
1573 { "get_dialect", (PyCFunction
)csv_get_dialect
,
1574 METH_O
, csv_get_dialect_doc
},
1575 { "field_size_limit", (PyCFunction
)csv_field_size_limit
,
1576 METH_VARARGS
, csv_field_size_limit_doc
},
1586 if (PyType_Ready(&Dialect_Type
) < 0)
1589 if (PyType_Ready(&Reader_Type
) < 0)
1592 if (PyType_Ready(&Writer_Type
) < 0)
1595 /* Create the module and add the functions */
1596 module
= Py_InitModule3("_csv", csv_methods
, csv_module_doc
);
1600 /* Add version to the module. */
1601 if (PyModule_AddStringConstant(module
, "__version__",
1602 MODULE_VERSION
) == -1)
1605 /* Add _dialects dictionary */
1606 dialects
= PyDict_New();
1607 if (dialects
== NULL
)
1609 if (PyModule_AddObject(module
, "_dialects", dialects
))
1612 /* Add quote styles into dictionary */
1613 for (style
= quote_styles
; style
->name
; style
++) {
1614 if (PyModule_AddIntConstant(module
, style
->name
,
1615 style
->style
) == -1)
1619 /* Add the Dialect type */
1620 Py_INCREF(&Dialect_Type
);
1621 if (PyModule_AddObject(module
, "Dialect", (PyObject
*)&Dialect_Type
))
1624 /* Add the CSV exception object to the module. */
1625 error_obj
= PyErr_NewException("_csv.Error", NULL
, NULL
);
1626 if (error_obj
== NULL
)
1628 PyModule_AddObject(module
, "Error", error_obj
);