AppPkg/Applications/Python/Python-2.7.10/Modules/_csv.c

   1 /* csv module */
   2
   3 /*
   4
   5 This module provides the low-level underpinnings of a CSV reading/writing
   6 module.  Users should not use this module directly, but import the csv.py
   7 module instead.
   8
   9 **** For people modifying this code, please note that as of this writing
  10 **** (2003-03-23), it is intended that this code should work with Python
  11 **** 2.2.
  12
  13 */
  14
  15 #define MODULE_VERSION "1.0"
  16
  17 #include "Python.h"
  18 #include "structmember.h"
  19
  20
  21 /* begin 2.2 compatibility macros */
  22 #ifndef PyDoc_STRVAR
  23 /* Define macros for inline documentation. */
  24 #define PyDoc_VAR(name) static char name[]
  25 #define PyDoc_STRVAR(name,str) PyDoc_VAR(name) = PyDoc_STR(str)
  26 #ifdef WITH_DOC_STRINGS
  27 #define PyDoc_STR(str) str
  28 #else
  29 #define PyDoc_STR(str) ""
  30 #endif
  31 #endif /* ifndef PyDoc_STRVAR */
  32
  33 #ifndef PyMODINIT_FUNC
  34 #       if defined(__cplusplus)
  35 #               define PyMODINIT_FUNC extern "C" void
  36 #       else /* __cplusplus */
  37 #               define PyMODINIT_FUNC void
  38 #       endif /* __cplusplus */
  39 #endif
  40
  41 #ifndef Py_CLEAR
  42 #define Py_CLEAR(op)                                            \
  43     do {                                                        \
  44         if (op) {                                               \
  45             PyObject *tmp = (PyObject *)(op);                   \
  46             (op) = NULL;                                        \
  47             Py_DECREF(tmp);                                     \
  48         }                                                       \
  49     } while (0)
  50 #endif
  51 #ifndef Py_VISIT
  52 #define Py_VISIT(op)                                                    \
  53     do {                                                                \
  54         if (op) {                                                       \
  55             int vret = visit((PyObject *)(op), arg);                    \
  56             if (vret)                                                   \
  57                 return vret;                                            \
  58         }                                                               \
  59     } while (0)
  60 #endif
  61
  62 /* end 2.2 compatibility macros */
  63
  64 #define IS_BASESTRING(o) \
  65     PyObject_TypeCheck(o, &PyBaseString_Type)
  66
  67 static PyObject *error_obj;     /* CSV exception */
  68 static PyObject *dialects;      /* Dialect registry */
  69 static long field_limit = 128 * 1024;   /* max parsed field size */
  70
  71 typedef enum {
  72     START_RECORD, START_FIELD, ESCAPED_CHAR, IN_FIELD,
  73     IN_QUOTED_FIELD, ESCAPE_IN_QUOTED_FIELD, QUOTE_IN_QUOTED_FIELD,
  74     EAT_CRNL
  75 } ParserState;
  76
  77 typedef enum {
  78     QUOTE_MINIMAL, QUOTE_ALL, QUOTE_NONNUMERIC, QUOTE_NONE
  79 } QuoteStyle;
  80
  81 typedef struct {
  82     QuoteStyle style;
  83     char *name;
  84 } StyleDesc;
  85
  86 static StyleDesc quote_styles[] = {
  87     { QUOTE_MINIMAL,    "QUOTE_MINIMAL" },
  88     { QUOTE_ALL,        "QUOTE_ALL" },
  89     { QUOTE_NONNUMERIC, "QUOTE_NONNUMERIC" },
  90     { QUOTE_NONE,       "QUOTE_NONE" },
  91     { 0 }
  92 };
  93
  94 typedef struct {
  95     PyObject_HEAD
  96
  97     int doublequote;            /* is " represented by ""? */
  98     char delimiter;             /* field separator */
  99     char quotechar;             /* quote character */
 100     char escapechar;            /* escape character */
 101     int skipinitialspace;       /* ignore spaces following delimiter? */
 102     PyObject *lineterminator; /* string to write between records */
 103     int quoting;                /* style of quoting to write */
 104
 105     int strict;                 /* raise exception on bad CSV */
 106 } DialectObj;
 107
 108 staticforward PyTypeObject Dialect_Type;
 109
 110 typedef struct {
 111     PyObject_HEAD
 112
 113     PyObject *input_iter;   /* iterate over this for input lines */
 114
 115     DialectObj *dialect;    /* parsing dialect */
 116
 117     PyObject *fields;           /* field list for current record */
 118     ParserState state;          /* current CSV parse state */
 119     char *field;                /* build current field in here */
 120     int field_size;             /* size of allocated buffer */
 121     int field_len;              /* length of current field */
 122     int numeric_field;          /* treat field as numeric */
 123     unsigned long line_num;     /* Source-file line number */
 124 } ReaderObj;
 125
 126 staticforward PyTypeObject Reader_Type;
 127
 128 #define ReaderObject_Check(v)   (Py_TYPE(v) == &Reader_Type)
 129
 130 typedef struct {
 131     PyObject_HEAD
 132
 133     PyObject *writeline;    /* write output lines to this file */
 134
 135     DialectObj *dialect;    /* parsing dialect */
 136
 137     char *rec;                  /* buffer for parser.join */
 138     int rec_size;               /* size of allocated record */
 139     int rec_len;                /* length of record */
 140     int num_fields;             /* number of fields in record */
 141 } WriterObj;
 142
 143 staticforward PyTypeObject Writer_Type;
 144
 145 /*
 146  * DIALECT class
 147  */
 148
 149 static PyObject *
 150 get_dialect_from_registry(PyObject * name_obj)
 151 {
 152     PyObject *dialect_obj;
 153
 154     dialect_obj = PyDict_GetItem(dialects, name_obj);
 155     if (dialect_obj == NULL) {
 156         if (!PyErr_Occurred())
 157             PyErr_Format(error_obj, "unknown dialect");
 158     }
 159     else
 160         Py_INCREF(dialect_obj);
 161     return dialect_obj;
 162 }
 163
 164 static PyObject *
 165 get_string(PyObject *str)
 166 {
 167     Py_XINCREF(str);
 168     return str;
 169 }
 170
 171 static PyObject *
 172 get_nullchar_as_None(char c)
 173 {
 174     if (c == '\0') {
 175         Py_INCREF(Py_None);
 176         return Py_None;
 177     }
 178     else
 179         return PyString_FromStringAndSize((char*)&c, 1);
 180 }
 181
 182 static PyObject *
 183 Dialect_get_lineterminator(DialectObj *self)
 184 {
 185     return get_string(self->lineterminator);
 186 }
 187
 188 static PyObject *
 189 Dialect_get_escapechar(DialectObj *self)
 190 {
 191     return get_nullchar_as_None(self->escapechar);
 192 }
 193
 194 static PyObject *
 195 Dialect_get_quotechar(DialectObj *self)
 196 {
 197     return get_nullchar_as_None(self->quotechar);
 198 }
 199
 200 static PyObject *
 201 Dialect_get_quoting(DialectObj *self)
 202 {
 203     return PyInt_FromLong(self->quoting);
 204 }
 205
 206 static int
 207 _set_bool(const char *name, int *target, PyObject *src, int dflt)
 208 {
 209     if (src == NULL)
 210         *target = dflt;
 211     else {
 212         int b = PyObject_IsTrue(src);
 213         if (b < 0)
 214             return -1;
 215         *target = b;
 216     }
 217     return 0;
 218 }
 219
 220 static int
 221 _set_int(const char *name, int *target, PyObject *src, int dflt)
 222 {
 223     if (src == NULL)
 224         *target = dflt;
 225     else {
 226         if (!PyInt_Check(src)) {
 227             PyErr_Format(PyExc_TypeError,
 228                          "\"%s\" must be an integer", name);
 229             return -1;
 230         }
 231         *target = PyInt_AsLong(src);
 232     }
 233     return 0;
 234 }
 235
 236 static int
 237 _set_char(const char *name, char *target, PyObject *src, char dflt)
 238 {
 239     if (src == NULL)
 240         *target = dflt;
 241     else {
 242         *target = '\0';
 243         if (src != Py_None) {
 244             Py_ssize_t len;
 245             if (!PyString_Check(src)) {
 246                 PyErr_Format(PyExc_TypeError,
 247                     "\"%s\" must be string, not %.200s", name,
 248                     src->ob_type->tp_name);
 249                 return -1;
 250             }
 251             len = PyString_GET_SIZE(src);
 252             if (len > 1) {
 253                 PyErr_Format(PyExc_TypeError,
 254                     "\"%s\" must be an 1-character string",
 255                     name);
 256                 return -1;
 257             }
 258             if (len > 0)
 259                 *target = *PyString_AS_STRING(src);
 260         }
 261     }
 262     return 0;
 263 }
 264
 265 static int
 266 _set_str(const char *name, PyObject **target, PyObject *src, const char *dflt)
 267 {
 268     if (src == NULL)
 269         *target = PyString_FromString(dflt);
 270     else {
 271         if (src == Py_None)
 272             *target = NULL;
 273         else if (!IS_BASESTRING(src)) {
 274             PyErr_Format(PyExc_TypeError,
 275                          "\"%s\" must be a string", name);
 276             return -1;
 277         }
 278         else {
 279             Py_XDECREF(*target);
 280             Py_INCREF(src);
 281             *target = src;
 282         }
 283     }
 284     return 0;
 285 }
 286
 287 static int
 288 dialect_check_quoting(int quoting)
 289 {
 290     StyleDesc *qs = quote_styles;
 291
 292     for (qs = quote_styles; qs->name; qs++) {
 293         if (qs->style == quoting)
 294             return 0;
 295     }
 296     PyErr_Format(PyExc_TypeError, "bad \"quoting\" value");
 297     return -1;
 298 }
 299
 300 #define D_OFF(x) offsetof(DialectObj, x)
 301
 302 static struct PyMemberDef Dialect_memberlist[] = {
 303     { "delimiter",          T_CHAR, D_OFF(delimiter), READONLY },
 304     { "skipinitialspace",   T_INT, D_OFF(skipinitialspace), READONLY },
 305     { "doublequote",        T_INT, D_OFF(doublequote), READONLY },
 306     { "strict",             T_INT, D_OFF(strict), READONLY },
 307     { NULL }
 308 };
 309
 310 static PyGetSetDef Dialect_getsetlist[] = {
 311     { "escapechar",             (getter)Dialect_get_escapechar},
 312     { "lineterminator",         (getter)Dialect_get_lineterminator},
 313     { "quotechar",              (getter)Dialect_get_quotechar},
 314     { "quoting",                (getter)Dialect_get_quoting},
 315     {NULL},
 316 };
 317
 318 static void
 319 Dialect_dealloc(DialectObj *self)
 320 {
 321     Py_XDECREF(self->lineterminator);
 322     Py_TYPE(self)->tp_free((PyObject *)self);
 323 }
 324
 325 static char *dialect_kws[] = {
 326     "dialect",
 327     "delimiter",
 328     "doublequote",
 329     "escapechar",
 330     "lineterminator",
 331     "quotechar",
 332     "quoting",
 333     "skipinitialspace",
 334     "strict",
 335     NULL
 336 };
 337
 338 static PyObject *
 339 dialect_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
 340 {
 341     DialectObj *self;
 342     PyObject *ret = NULL;
 343     PyObject *dialect = NULL;
 344     PyObject *delimiter = NULL;
 345     PyObject *doublequote = NULL;
 346     PyObject *escapechar = NULL;
 347     PyObject *lineterminator = NULL;
 348     PyObject *quotechar = NULL;
 349     PyObject *quoting = NULL;
 350     PyObject *skipinitialspace = NULL;
 351     PyObject *strict = NULL;
 352
 353     if (!PyArg_ParseTupleAndKeywords(args, kwargs,
 354                                      "|OOOOOOOOO", dialect_kws,
 355                                      &dialect,
 356                                      &delimiter,
 357                                      &doublequote,
 358                                      &escapechar,
 359                                      &lineterminator,
 360                                      &quotechar,
 361                                      &quoting,
 362                                      &skipinitialspace,
 363                                      &strict))
 364         return NULL;
 365
 366     if (dialect != NULL) {
 367         if (IS_BASESTRING(dialect)) {
 368             dialect = get_dialect_from_registry(dialect);
 369             if (dialect == NULL)
 370                 return NULL;
 371         }
 372         else
 373             Py_INCREF(dialect);
 374         /* Can we reuse this instance? */
 375         if (PyObject_TypeCheck(dialect, &Dialect_Type) &&
 376             delimiter == 0 &&
 377             doublequote == 0 &&
 378             escapechar == 0 &&
 379             lineterminator == 0 &&
 380             quotechar == 0 &&
 381             quoting == 0 &&
 382             skipinitialspace == 0 &&
 383             strict == 0)
 384             return dialect;
 385     }
 386
 387     self = (DialectObj *)type->tp_alloc(type, 0);
 388     if (self == NULL) {
 389         Py_XDECREF(dialect);
 390         return NULL;
 391     }
 392     self->lineterminator = NULL;
 393
 394     Py_XINCREF(delimiter);
 395     Py_XINCREF(doublequote);
 396     Py_XINCREF(escapechar);
 397     Py_XINCREF(lineterminator);
 398     Py_XINCREF(quotechar);
 399     Py_XINCREF(quoting);
 400     Py_XINCREF(skipinitialspace);
 401     Py_XINCREF(strict);
 402     if (dialect != NULL) {
 403 #define DIALECT_GETATTR(v, n) \
 404         if (v == NULL) \
 405             v = PyObject_GetAttrString(dialect, n)
 406         DIALECT_GETATTR(delimiter, "delimiter");
 407         DIALECT_GETATTR(doublequote, "doublequote");
 408         DIALECT_GETATTR(escapechar, "escapechar");
 409         DIALECT_GETATTR(lineterminator, "lineterminator");
 410         DIALECT_GETATTR(quotechar, "quotechar");
 411         DIALECT_GETATTR(quoting, "quoting");
 412         DIALECT_GETATTR(skipinitialspace, "skipinitialspace");
 413         DIALECT_GETATTR(strict, "strict");
 414         PyErr_Clear();
 415     }
 416
 417     /* check types and convert to C values */
 418 #define DIASET(meth, name, target, src, dflt) \
 419     if (meth(name, target, src, dflt)) \
 420         goto err
 421     DIASET(_set_char, "delimiter", &self->delimiter, delimiter, ',');
 422     DIASET(_set_bool, "doublequote", &self->doublequote, doublequote, 1);
 423     DIASET(_set_char, "escapechar", &self->escapechar, escapechar, 0);
 424     DIASET(_set_str, "lineterminator", &self->lineterminator, lineterminator, "\r\n");
 425     DIASET(_set_char, "quotechar", &self->quotechar, quotechar, '"');
 426     DIASET(_set_int, "quoting", &self->quoting, quoting, QUOTE_MINIMAL);
 427     DIASET(_set_bool, "skipinitialspace", &self->skipinitialspace, skipinitialspace, 0);
 428     DIASET(_set_bool, "strict", &self->strict, strict, 0);
 429
 430     /* validate options */
 431     if (dialect_check_quoting(self->quoting))
 432         goto err;
 433     if (self->delimiter == 0) {
 434         PyErr_SetString(PyExc_TypeError,
 435                         "\"delimiter\" must be an 1-character string");
 436         goto err;
 437     }
 438     if (quotechar == Py_None && quoting == NULL)
 439         self->quoting = QUOTE_NONE;
 440     if (self->quoting != QUOTE_NONE && self->quotechar == 0) {
 441         PyErr_SetString(PyExc_TypeError,
 442                         "quotechar must be set if quoting enabled");
 443         goto err;
 444     }
 445     if (self->lineterminator == 0) {
 446         PyErr_SetString(PyExc_TypeError, "lineterminator must be set");
 447         goto err;
 448     }
 449
 450     ret = (PyObject *)self;
 451     Py_INCREF(self);
 452 err:
 453     Py_XDECREF(self);
 454     Py_XDECREF(dialect);
 455     Py_XDECREF(delimiter);
 456     Py_XDECREF(doublequote);
 457     Py_XDECREF(escapechar);
 458     Py_XDECREF(lineterminator);
 459     Py_XDECREF(quotechar);
 460     Py_XDECREF(quoting);
 461     Py_XDECREF(skipinitialspace);
 462     Py_XDECREF(strict);
 463     return ret;
 464 }
 465
 466
 467 PyDoc_STRVAR(Dialect_Type_doc,
 468 "CSV dialect\n"
 469 "\n"
 470 "The Dialect type records CSV parsing and generation options.\n");
 471
 472 static PyTypeObject Dialect_Type = {
 473     PyVarObject_HEAD_INIT(NULL, 0)
 474     "_csv.Dialect",                         /* tp_name */
 475     sizeof(DialectObj),                     /* tp_basicsize */
 476     0,                                      /* tp_itemsize */
 477     /*  methods  */
 478     (destructor)Dialect_dealloc,            /* tp_dealloc */
 479     (printfunc)0,                           /* tp_print */
 480     (getattrfunc)0,                         /* tp_getattr */
 481     (setattrfunc)0,                         /* tp_setattr */
 482     (cmpfunc)0,                             /* tp_compare */
 483     (reprfunc)0,                            /* tp_repr */
 484     0,                                      /* tp_as_number */
 485     0,                                      /* tp_as_sequence */
 486     0,                                      /* tp_as_mapping */
 487     (hashfunc)0,                            /* tp_hash */
 488     (ternaryfunc)0,                         /* tp_call */
 489     (reprfunc)0,                                /* tp_str */
 490     0,                                      /* tp_getattro */
 491     0,                                      /* tp_setattro */
 492     0,                                      /* tp_as_buffer */
 493     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
 494     Dialect_Type_doc,                       /* tp_doc */
 495     0,                                      /* tp_traverse */
 496     0,                                      /* tp_clear */
 497     0,                                      /* tp_richcompare */
 498     0,                                      /* tp_weaklistoffset */
 499     0,                                      /* tp_iter */
 500     0,                                      /* tp_iternext */
 501     0,                                          /* tp_methods */
 502     Dialect_memberlist,                     /* tp_members */
 503     Dialect_getsetlist,                     /* tp_getset */
 504     0,                                          /* tp_base */
 505     0,                                          /* tp_dict */
 506     0,                                          /* tp_descr_get */
 507     0,                                          /* tp_descr_set */
 508     0,                                          /* tp_dictoffset */
 509     0,                                          /* tp_init */
 510     0,                                          /* tp_alloc */
 511     dialect_new,                                /* tp_new */
 512     0,                                          /* tp_free */
 513 };
 514
 515 /*
 516  * Return an instance of the dialect type, given a Python instance or kwarg
 517  * description of the dialect
 518  */
 519 static PyObject *
 520 _call_dialect(PyObject *dialect_inst, PyObject *kwargs)
 521 {
 522     PyObject *ctor_args;
 523     PyObject *dialect;
 524
 525     ctor_args = Py_BuildValue(dialect_inst ? "(O)" : "()", dialect_inst);
 526     if (ctor_args == NULL)
 527         return NULL;
 528     dialect = PyObject_Call((PyObject *)&Dialect_Type, ctor_args, kwargs);
 529     Py_DECREF(ctor_args);
 530     return dialect;
 531 }
 532
 533 /*
 534  * READER
 535  */
 536 static int
 537 parse_save_field(ReaderObj *self)
 538 {
 539     PyObject *field;
 540
 541     field = PyString_FromStringAndSize(self->field, self->field_len);
 542     if (field == NULL)
 543         return -1;
 544     self->field_len = 0;
 545     if (self->numeric_field) {
 546         PyObject *tmp;
 547
 548         self->numeric_field = 0;
 549         tmp = PyNumber_Float(field);
 550         if (tmp == NULL) {
 551             Py_DECREF(field);
 552             return -1;
 553         }
 554         Py_DECREF(field);
 555         field = tmp;
 556     }
 557     PyList_Append(self->fields, field);
 558     Py_DECREF(field);
 559     return 0;
 560 }
 561
 562 static int
 563 parse_grow_buff(ReaderObj *self)
 564 {
 565     if (self->field_size == 0) {
 566         self->field_size = 4096;
 567         if (self->field != NULL)
 568             PyMem_Free(self->field);
 569         self->field = PyMem_Malloc(self->field_size);
 570     }
 571     else {
 572         if (self->field_size > INT_MAX / 2) {
 573             PyErr_NoMemory();
 574             return 0;
 575         }
 576         self->field_size *= 2;
 577         self->field = PyMem_Realloc(self->field, self->field_size);
 578     }
 579     if (self->field == NULL) {
 580         PyErr_NoMemory();
 581         return 0;
 582     }
 583     return 1;
 584 }
 585
 586 static int
 587 parse_add_char(ReaderObj *self, char c)
 588 {
 589     if (self->field_len >= field_limit) {
 590         PyErr_Format(error_obj, "field larger than field limit (%ld)",
 591                      field_limit);
 592         return -1;
 593     }
 594     if (self->field_len == self->field_size && !parse_grow_buff(self))
 595         return -1;
 596     self->field[self->field_len++] = c;
 597     return 0;
 598 }
 599
 600 static int
 601 parse_process_char(ReaderObj *self, char c)
 602 {
 603     DialectObj *dialect = self->dialect;
 604
 605     switch (self->state) {
 606     case START_RECORD:
 607         /* start of record */
 608         if (c == '\0')
 609             /* empty line - return [] */
 610             break;
 611         else if (c == '\n' || c == '\r') {
 612             self->state = EAT_CRNL;
 613             break;
 614         }
 615         /* normal character - handle as START_FIELD */
 616         self->state = START_FIELD;
 617         /* fallthru */
 618     case START_FIELD:
 619         /* expecting field */
 620         if (c == '\n' || c == '\r' || c == '\0') {
 621             /* save empty field - return [fields] */
 622             if (parse_save_field(self) < 0)
 623                 return -1;
 624             self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
 625         }
 626         else if (c == dialect->quotechar &&
 627                  dialect->quoting != QUOTE_NONE) {
 628             /* start quoted field */
 629             self->state = IN_QUOTED_FIELD;
 630         }
 631         else if (c == dialect->escapechar) {
 632             /* possible escaped character */
 633             self->state = ESCAPED_CHAR;
 634         }
 635         else if (c == ' ' && dialect->skipinitialspace)
 636             /* ignore space at start of field */
 637             ;
 638         else if (c == dialect->delimiter) {
 639             /* save empty field */
 640             if (parse_save_field(self) < 0)
 641                 return -1;
 642         }
 643         else {
 644             /* begin new unquoted field */
 645             if (dialect->quoting == QUOTE_NONNUMERIC)
 646                 self->numeric_field = 1;
 647             if (parse_add_char(self, c) < 0)
 648                 return -1;
 649             self->state = IN_FIELD;
 650         }
 651         break;
 652
 653     case ESCAPED_CHAR:
 654         if (c == '\0')
 655             c = '\n';
 656         if (parse_add_char(self, c) < 0)
 657             return -1;
 658         self->state = IN_FIELD;
 659         break;
 660
 661     case IN_FIELD:
 662         /* in unquoted field */
 663         if (c == '\n' || c == '\r' || c == '\0') {
 664             /* end of line - return [fields] */
 665             if (parse_save_field(self) < 0)
 666                 return -1;
 667             self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
 668         }
 669         else if (c == dialect->escapechar) {
 670             /* possible escaped character */
 671             self->state = ESCAPED_CHAR;
 672         }
 673         else if (c == dialect->delimiter) {
 674             /* save field - wait for new field */
 675             if (parse_save_field(self) < 0)
 676                 return -1;
 677             self->state = START_FIELD;
 678         }
 679         else {
 680             /* normal character - save in field */
 681             if (parse_add_char(self, c) < 0)
 682                 return -1;
 683         }
 684         break;
 685
 686     case IN_QUOTED_FIELD:
 687         /* in quoted field */
 688         if (c == '\0')
 689             ;
 690         else if (c == dialect->escapechar) {
 691             /* Possible escape character */
 692             self->state = ESCAPE_IN_QUOTED_FIELD;
 693         }
 694         else if (c == dialect->quotechar &&
 695                  dialect->quoting != QUOTE_NONE) {
 696             if (dialect->doublequote) {
 697                 /* doublequote; " represented by "" */
 698                 self->state = QUOTE_IN_QUOTED_FIELD;
 699             }
 700             else {
 701                 /* end of quote part of field */
 702                 self->state = IN_FIELD;
 703             }
 704         }
 705         else {
 706             /* normal character - save in field */
 707             if (parse_add_char(self, c) < 0)
 708                 return -1;
 709         }
 710         break;
 711
 712     case ESCAPE_IN_QUOTED_FIELD:
 713         if (c == '\0')
 714             c = '\n';
 715         if (parse_add_char(self, c) < 0)
 716             return -1;
 717         self->state = IN_QUOTED_FIELD;
 718         break;
 719
 720     case QUOTE_IN_QUOTED_FIELD:
 721         /* doublequote - seen a quote in an quoted field */
 722         if (dialect->quoting != QUOTE_NONE &&
 723             c == dialect->quotechar) {
 724             /* save "" as " */
 725             if (parse_add_char(self, c) < 0)
 726                 return -1;
 727             self->state = IN_QUOTED_FIELD;
 728         }
 729         else if (c == dialect->delimiter) {
 730             /* save field - wait for new field */
 731             if (parse_save_field(self) < 0)
 732                 return -1;
 733             self->state = START_FIELD;
 734         }
 735         else if (c == '\n' || c == '\r' || c == '\0') {
 736             /* end of line - return [fields] */
 737             if (parse_save_field(self) < 0)
 738                 return -1;
 739             self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
 740         }
 741         else if (!dialect->strict) {
 742             if (parse_add_char(self, c) < 0)
 743                 return -1;
 744             self->state = IN_FIELD;
 745         }
 746         else {
 747             /* illegal */
 748             PyErr_Format(error_obj, "'%c' expected after '%c'",
 749                             dialect->delimiter,
 750                             dialect->quotechar);
 751             return -1;
 752         }
 753         break;
 754
 755     case EAT_CRNL:
 756         if (c == '\n' || c == '\r')
 757             ;
 758         else if (c == '\0')
 759             self->state = START_RECORD;
 760         else {
 761             PyErr_Format(error_obj, "new-line character seen in unquoted field - do you need to open the file in universal-newline mode?");
 762             return -1;
 763         }
 764         break;
 765
 766     }
 767     return 0;
 768 }
 769
 770 static int
 771 parse_reset(ReaderObj *self)
 772 {
 773     Py_XDECREF(self->fields);
 774     self->fields = PyList_New(0);
 775     if (self->fields == NULL)
 776         return -1;
 777     self->field_len = 0;
 778     self->state = START_RECORD;
 779     self->numeric_field = 0;
 780     return 0;
 781 }
 782
 783 static PyObject *
 784 Reader_iternext(ReaderObj *self)
 785 {
 786     PyObject *lineobj;
 787     PyObject *fields = NULL;
 788     char *line, c;
 789     int linelen;
 790
 791     if (parse_reset(self) < 0)
 792         return NULL;
 793     do {
 794         lineobj = PyIter_Next(self->input_iter);
 795         if (lineobj == NULL) {
 796             /* End of input OR exception */
 797             if (!PyErr_Occurred() && (self->field_len != 0 ||
 798                                       self->state == IN_QUOTED_FIELD)) {
 799                 if (self->dialect->strict)
 800                     PyErr_SetString(error_obj, "unexpected end of data");
 801                 else if (parse_save_field(self) >= 0 )
 802                     break;
 803             }
 804             return NULL;
 805         }
 806         ++self->line_num;
 807
 808         line = PyString_AsString(lineobj);
 809         linelen = PyString_Size(lineobj);
 810
 811         if (line == NULL || linelen < 0) {
 812             Py_DECREF(lineobj);
 813             return NULL;
 814         }
 815         while (linelen--) {
 816             c = *line++;
 817             if (c == '\0') {
 818                 Py_DECREF(lineobj);
 819                 PyErr_Format(error_obj,
 820                              "line contains NULL byte");
 821                 goto err;
 822             }
 823             if (parse_process_char(self, c) < 0) {
 824                 Py_DECREF(lineobj);
 825                 goto err;
 826             }
 827         }
 828         Py_DECREF(lineobj);
 829         if (parse_process_char(self, 0) < 0)
 830             goto err;
 831     } while (self->state != START_RECORD);
 832
 833     fields = self->fields;
 834     self->fields = NULL;
 835 err:
 836     return fields;
 837 }
 838
 839 static void
 840 Reader_dealloc(ReaderObj *self)
 841 {
 842     PyObject_GC_UnTrack(self);
 843     Py_XDECREF(self->dialect);
 844     Py_XDECREF(self->input_iter);
 845     Py_XDECREF(self->fields);
 846     if (self->field != NULL)
 847         PyMem_Free(self->field);
 848     PyObject_GC_Del(self);
 849 }
 850
 851 static int
 852 Reader_traverse(ReaderObj *self, visitproc visit, void *arg)
 853 {
 854     Py_VISIT(self->dialect);
 855     Py_VISIT(self->input_iter);
 856     Py_VISIT(self->fields);
 857     return 0;
 858 }
 859
 860 static int
 861 Reader_clear(ReaderObj *self)
 862 {
 863     Py_CLEAR(self->dialect);
 864     Py_CLEAR(self->input_iter);
 865     Py_CLEAR(self->fields);
 866     return 0;
 867 }
 868
 869 PyDoc_STRVAR(Reader_Type_doc,
 870 "CSV reader\n"
 871 "\n"
 872 "Reader objects are responsible for reading and parsing tabular data\n"
 873 "in CSV format.\n"
 874 );
 875
 876 static struct PyMethodDef Reader_methods[] = {
 877     { NULL, NULL }
 878 };
 879 #define R_OFF(x) offsetof(ReaderObj, x)
 880
 881 static struct PyMemberDef Reader_memberlist[] = {
 882     { "dialect", T_OBJECT, R_OFF(dialect), RO },
 883     { "line_num", T_ULONG, R_OFF(line_num), RO },
 884     { NULL }
 885 };
 886
 887
 888 static PyTypeObject Reader_Type = {
 889     PyVarObject_HEAD_INIT(NULL, 0)
 890     "_csv.reader",                          /*tp_name*/
 891     sizeof(ReaderObj),                      /*tp_basicsize*/
 892     0,                                      /*tp_itemsize*/
 893     /* methods */
 894     (destructor)Reader_dealloc,             /*tp_dealloc*/
 895     (printfunc)0,                           /*tp_print*/
 896     (getattrfunc)0,                         /*tp_getattr*/
 897     (setattrfunc)0,                         /*tp_setattr*/
 898     (cmpfunc)0,                             /*tp_compare*/
 899     (reprfunc)0,                            /*tp_repr*/
 900     0,                                      /*tp_as_number*/
 901     0,                                      /*tp_as_sequence*/
 902     0,                                      /*tp_as_mapping*/
 903     (hashfunc)0,                            /*tp_hash*/
 904     (ternaryfunc)0,                         /*tp_call*/
 905     (reprfunc)0,                                /*tp_str*/
 906     0,                                      /*tp_getattro*/
 907     0,                                      /*tp_setattro*/
 908     0,                                      /*tp_as_buffer*/
 909     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
 910         Py_TPFLAGS_HAVE_GC,                     /*tp_flags*/
 911     Reader_Type_doc,                        /*tp_doc*/
 912     (traverseproc)Reader_traverse,          /*tp_traverse*/
 913     (inquiry)Reader_clear,                  /*tp_clear*/
 914     0,                                      /*tp_richcompare*/
 915     0,                                      /*tp_weaklistoffset*/
 916     PyObject_SelfIter,                          /*tp_iter*/
 917     (getiterfunc)Reader_iternext,           /*tp_iternext*/
 918     Reader_methods,                         /*tp_methods*/
 919     Reader_memberlist,                      /*tp_members*/
 920     0,                                      /*tp_getset*/
 921
 922 };
 923
 924 static PyObject *
 925 csv_reader(PyObject *module, PyObject *args, PyObject *keyword_args)
 926 {
 927     PyObject * iterator, * dialect = NULL;
 928     ReaderObj * self = PyObject_GC_New(ReaderObj, &Reader_Type);
 929
 930     if (!self)
 931         return NULL;
 932
 933     self->dialect = NULL;
 934     self->fields = NULL;
 935     self->input_iter = NULL;
 936     self->field = NULL;
 937     self->field_size = 0;
 938     self->line_num = 0;
 939
 940     if (parse_reset(self) < 0) {
 941         Py_DECREF(self);
 942         return NULL;
 943     }
 944
 945     if (!PyArg_UnpackTuple(args, "", 1, 2, &iterator, &dialect)) {
 946         Py_DECREF(self);
 947         return NULL;
 948     }
 949     self->input_iter = PyObject_GetIter(iterator);
 950     if (self->input_iter == NULL) {
 951         PyErr_SetString(PyExc_TypeError,
 952                         "argument 1 must be an iterator");
 953         Py_DECREF(self);
 954         return NULL;
 955     }
 956     self->dialect = (DialectObj *)_call_dialect(dialect, keyword_args);
 957     if (self->dialect == NULL) {
 958         Py_DECREF(self);
 959         return NULL;
 960     }
 961
 962     PyObject_GC_Track(self);
 963     return (PyObject *)self;
 964 }
 965
 966 /*
 967  * WRITER
 968  */
 969 /* ---------------------------------------------------------------- */
 970 static void
 971 join_reset(WriterObj *self)
 972 {
 973     self->rec_len = 0;
 974     self->num_fields = 0;
 975 }
 976
 977 #define MEM_INCR 32768
 978
 979 /* Calculate new record length or append field to record.  Return new
 980  * record length.
 981  */
 982 static int
 983 join_append_data(WriterObj *self, char *field, int quote_empty,
 984                  int *quoted, int copy_phase)
 985 {
 986     DialectObj *dialect = self->dialect;
 987     int i, rec_len;
 988     char *lineterm;
 989
 990 #define ADDCH(c) \
 991     do {\
 992         if (copy_phase) \
 993             self->rec[rec_len] = c;\
 994         rec_len++;\
 995     } while(0)
 996
 997     lineterm = PyString_AsString(dialect->lineterminator);
 998     if (lineterm == NULL)
 999         return -1;
1000
1001     rec_len = self->rec_len;
1002
1003     /* If this is not the first field we need a field separator */
1004     if (self->num_fields > 0)
1005         ADDCH(dialect->delimiter);
1006
1007     /* Handle preceding quote */
1008     if (copy_phase && *quoted)
1009         ADDCH(dialect->quotechar);
1010
1011     /* Copy/count field data */
1012     for (i = 0;; i++) {
1013         char c = field[i];
1014         int want_escape = 0;
1015
1016         if (c == '\0')
1017             break;
1018
1019         if (c == dialect->delimiter ||
1020             c == dialect->escapechar ||
1021             c == dialect->quotechar ||
1022             strchr(lineterm, c)) {
1023             if (dialect->quoting == QUOTE_NONE)
1024                 want_escape = 1;
1025             else {
1026                 if (c == dialect->quotechar) {
1027                     if (dialect->doublequote)
1028                         ADDCH(dialect->quotechar);
1029                     else
1030                         want_escape = 1;
1031                 }
1032                 if (!want_escape)
1033                     *quoted = 1;
1034             }
1035             if (want_escape) {
1036                 if (!dialect->escapechar) {
1037                     PyErr_Format(error_obj,
1038                                  "need to escape, but no escapechar set");
1039                     return -1;
1040                 }
1041                 ADDCH(dialect->escapechar);
1042             }
1043         }
1044         /* Copy field character into record buffer.
1045          */
1046         ADDCH(c);
1047     }
1048
1049     /* If field is empty check if it needs to be quoted.
1050      */
1051     if (i == 0 && quote_empty) {
1052         if (dialect->quoting == QUOTE_NONE) {
1053             PyErr_Format(error_obj,
1054                          "single empty field record must be quoted");
1055             return -1;
1056         }
1057         else
1058             *quoted = 1;
1059     }
1060
1061     if (*quoted) {
1062         if (copy_phase)
1063             ADDCH(dialect->quotechar);
1064         else
1065             rec_len += 2;
1066     }
1067     return rec_len;
1068 #undef ADDCH
1069 }
1070
1071 static int
1072 join_check_rec_size(WriterObj *self, int rec_len)
1073 {
1074
1075     if (rec_len < 0 || rec_len > INT_MAX - MEM_INCR) {
1076         PyErr_NoMemory();
1077         return 0;
1078     }
1079
1080     if (rec_len > self->rec_size) {
1081         if (self->rec_size == 0) {
1082             self->rec_size = (rec_len / MEM_INCR + 1) * MEM_INCR;
1083             if (self->rec != NULL)
1084                 PyMem_Free(self->rec);
1085             self->rec = PyMem_Malloc(self->rec_size);
1086         }
1087         else {
1088             char *old_rec = self->rec;
1089
1090             self->rec_size = (rec_len / MEM_INCR + 1) * MEM_INCR;
1091             self->rec = PyMem_Realloc(self->rec, self->rec_size);
1092             if (self->rec == NULL)
1093                 PyMem_Free(old_rec);
1094         }
1095         if (self->rec == NULL) {
1096             PyErr_NoMemory();
1097             return 0;
1098         }
1099     }
1100     return 1;
1101 }
1102
1103 static int
1104 join_append(WriterObj *self, char *field, int *quoted, int quote_empty)
1105 {
1106     int rec_len;
1107
1108     rec_len = join_append_data(self, field, quote_empty, quoted, 0);
1109     if (rec_len < 0)
1110         return 0;
1111
1112     /* grow record buffer if necessary */
1113     if (!join_check_rec_size(self, rec_len))
1114         return 0;
1115
1116     self->rec_len = join_append_data(self, field, quote_empty, quoted, 1);
1117     self->num_fields++;
1118
1119     return 1;
1120 }
1121
1122 static int
1123 join_append_lineterminator(WriterObj *self)
1124 {
1125     int terminator_len;
1126     char *terminator;
1127
1128     terminator_len = PyString_Size(self->dialect->lineterminator);
1129     if (terminator_len == -1)
1130         return 0;
1131
1132     /* grow record buffer if necessary */
1133     if (!join_check_rec_size(self, self->rec_len + terminator_len))
1134         return 0;
1135
1136     terminator = PyString_AsString(self->dialect->lineterminator);
1137     if (terminator == NULL)
1138         return 0;
1139     memmove(self->rec + self->rec_len, terminator, terminator_len);
1140     self->rec_len += terminator_len;
1141
1142     return 1;
1143 }
1144
1145 PyDoc_STRVAR(csv_writerow_doc,
1146 "writerow(sequence)\n"
1147 "\n"
1148 "Construct and write a CSV record from a sequence of fields.  Non-string\n"
1149 "elements will be converted to string.");
1150
1151 static PyObject *
1152 csv_writerow(WriterObj *self, PyObject *seq)
1153 {
1154     DialectObj *dialect = self->dialect;
1155     int len, i;
1156
1157     if (!PySequence_Check(seq))
1158         return PyErr_Format(error_obj, "sequence expected");
1159
1160     len = PySequence_Length(seq);
1161     if (len < 0)
1162         return NULL;
1163
1164     /* Join all fields in internal buffer.
1165      */
1166     join_reset(self);
1167     for (i = 0; i < len; i++) {
1168         PyObject *field;
1169         int append_ok;
1170         int quoted;
1171
1172         field = PySequence_GetItem(seq, i);
1173         if (field == NULL)
1174             return NULL;
1175
1176         switch (dialect->quoting) {
1177         case QUOTE_NONNUMERIC:
1178             quoted = !PyNumber_Check(field);
1179             break;
1180         case QUOTE_ALL:
1181             quoted = 1;
1182             break;
1183         default:
1184             quoted = 0;
1185             break;
1186         }
1187
1188         if (PyString_Check(field)) {
1189             append_ok = join_append(self,
1190                                     PyString_AS_STRING(field),
1191                                     &quoted, len == 1);
1192             Py_DECREF(field);
1193         }
1194         else if (field == Py_None) {
1195             append_ok = join_append(self, "", &quoted, len == 1);
1196             Py_DECREF(field);
1197         }
1198         else {
1199             PyObject *str;
1200
1201             if (PyFloat_Check(field)) {
1202                 str = PyObject_Repr(field);
1203             } else {
1204                 str = PyObject_Str(field);
1205             }
1206             Py_DECREF(field);
1207             if (str == NULL)
1208                 return NULL;
1209
1210             append_ok = join_append(self, PyString_AS_STRING(str),
1211                                     &quoted, len == 1);
1212             Py_DECREF(str);
1213         }
1214         if (!append_ok)
1215             return NULL;
1216     }
1217
1218     /* Add line terminator.
1219      */
1220     if (!join_append_lineterminator(self))
1221         return 0;
1222
1223     return PyObject_CallFunction(self->writeline,
1224                                  "(s#)", self->rec, self->rec_len);
1225 }
1226
1227 PyDoc_STRVAR(csv_writerows_doc,
1228 "writerows(sequence of sequences)\n"
1229 "\n"
1230 "Construct and write a series of sequences to a csv file.  Non-string\n"
1231 "elements will be converted to string.");
1232
1233 static PyObject *
1234 csv_writerows(WriterObj *self, PyObject *seqseq)
1235 {
1236     PyObject *row_iter, *row_obj, *result;
1237
1238     row_iter = PyObject_GetIter(seqseq);
1239     if (row_iter == NULL) {
1240         PyErr_SetString(PyExc_TypeError,
1241                         "writerows() argument must be iterable");
1242         return NULL;
1243     }
1244     while ((row_obj = PyIter_Next(row_iter))) {
1245         result = csv_writerow(self, row_obj);
1246         Py_DECREF(row_obj);
1247         if (!result) {
1248             Py_DECREF(row_iter);
1249             return NULL;
1250         }
1251         else
1252              Py_DECREF(result);
1253     }
1254     Py_DECREF(row_iter);
1255     if (PyErr_Occurred())
1256         return NULL;
1257     Py_INCREF(Py_None);
1258     return Py_None;
1259 }
1260
1261 static struct PyMethodDef Writer_methods[] = {
1262     { "writerow", (PyCFunction)csv_writerow, METH_O, csv_writerow_doc},
1263     { "writerows", (PyCFunction)csv_writerows, METH_O, csv_writerows_doc},
1264     { NULL, NULL }
1265 };
1266
1267 #define W_OFF(x) offsetof(WriterObj, x)
1268
1269 static struct PyMemberDef Writer_memberlist[] = {
1270     { "dialect", T_OBJECT, W_OFF(dialect), RO },
1271     { NULL }
1272 };
1273
1274 static void
1275 Writer_dealloc(WriterObj *self)
1276 {
1277     PyObject_GC_UnTrack(self);
1278     Py_XDECREF(self->dialect);
1279     Py_XDECREF(self->writeline);
1280     if (self->rec != NULL)
1281         PyMem_Free(self->rec);
1282     PyObject_GC_Del(self);
1283 }
1284
1285 static int
1286 Writer_traverse(WriterObj *self, visitproc visit, void *arg)
1287 {
1288     Py_VISIT(self->dialect);
1289     Py_VISIT(self->writeline);
1290     return 0;
1291 }
1292
1293 static int
1294 Writer_clear(WriterObj *self)
1295 {
1296     Py_CLEAR(self->dialect);
1297     Py_CLEAR(self->writeline);
1298     return 0;
1299 }
1300
1301 PyDoc_STRVAR(Writer_Type_doc,
1302 "CSV writer\n"
1303 "\n"
1304 "Writer objects are responsible for generating tabular data\n"
1305 "in CSV format from sequence input.\n"
1306 );
1307
1308 static PyTypeObject Writer_Type = {
1309     PyVarObject_HEAD_INIT(NULL, 0)
1310     "_csv.writer",                          /*tp_name*/
1311     sizeof(WriterObj),                      /*tp_basicsize*/
1312     0,                                      /*tp_itemsize*/
1313     /* methods */
1314     (destructor)Writer_dealloc,             /*tp_dealloc*/
1315     (printfunc)0,                           /*tp_print*/
1316     (getattrfunc)0,                         /*tp_getattr*/
1317     (setattrfunc)0,                         /*tp_setattr*/
1318     (cmpfunc)0,                             /*tp_compare*/
1319     (reprfunc)0,                            /*tp_repr*/
1320     0,                                      /*tp_as_number*/
1321     0,                                      /*tp_as_sequence*/
1322     0,                                      /*tp_as_mapping*/
1323     (hashfunc)0,                            /*tp_hash*/
1324     (ternaryfunc)0,                         /*tp_call*/
1325     (reprfunc)0,                            /*tp_str*/
1326     0,                                      /*tp_getattro*/
1327     0,                                      /*tp_setattro*/
1328     0,                                      /*tp_as_buffer*/
1329     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
1330         Py_TPFLAGS_HAVE_GC,                     /*tp_flags*/
1331     Writer_Type_doc,
1332     (traverseproc)Writer_traverse,          /*tp_traverse*/
1333     (inquiry)Writer_clear,                  /*tp_clear*/
1334     0,                                      /*tp_richcompare*/
1335     0,                                      /*tp_weaklistoffset*/
1336     (getiterfunc)0,                         /*tp_iter*/
1337     (getiterfunc)0,                         /*tp_iternext*/
1338     Writer_methods,                         /*tp_methods*/
1339     Writer_memberlist,                      /*tp_members*/
1340     0,                                      /*tp_getset*/
1341 };
1342
1343 static PyObject *
1344 csv_writer(PyObject *module, PyObject *args, PyObject *keyword_args)
1345 {
1346     PyObject * output_file, * dialect = NULL;
1347     WriterObj * self = PyObject_GC_New(WriterObj, &Writer_Type);
1348
1349     if (!self)
1350         return NULL;
1351
1352     self->dialect = NULL;
1353     self->writeline = NULL;
1354
1355     self->rec = NULL;
1356     self->rec_size = 0;
1357     self->rec_len = 0;
1358     self->num_fields = 0;
1359
1360     if (!PyArg_UnpackTuple(args, "", 1, 2, &output_file, &dialect)) {
1361         Py_DECREF(self);
1362         return NULL;
1363     }
1364     self->writeline = PyObject_GetAttrString(output_file, "write");
1365     if (self->writeline == NULL || !PyCallable_Check(self->writeline)) {
1366         PyErr_SetString(PyExc_TypeError,
1367                         "argument 1 must have a \"write\" method");
1368         Py_DECREF(self);
1369         return NULL;
1370     }
1371     self->dialect = (DialectObj *)_call_dialect(dialect, keyword_args);
1372     if (self->dialect == NULL) {
1373         Py_DECREF(self);
1374         return NULL;
1375     }
1376     PyObject_GC_Track(self);
1377     return (PyObject *)self;
1378 }
1379
1380 /*
1381  * DIALECT REGISTRY
1382  */
1383 static PyObject *
1384 csv_list_dialects(PyObject *module, PyObject *args)
1385 {
1386     return PyDict_Keys(dialects);
1387 }
1388
1389 static PyObject *
1390 csv_register_dialect(PyObject *module, PyObject *args, PyObject *kwargs)
1391 {
1392     PyObject *name_obj, *dialect_obj = NULL;
1393     PyObject *dialect;
1394
1395     if (!PyArg_UnpackTuple(args, "", 1, 2, &name_obj, &dialect_obj))
1396         return NULL;
1397     if (!IS_BASESTRING(name_obj)) {
1398         PyErr_SetString(PyExc_TypeError,
1399                         "dialect name must be a string or unicode");
1400         return NULL;
1401     }
1402     dialect = _call_dialect(dialect_obj, kwargs);
1403     if (dialect == NULL)
1404         return NULL;
1405     if (PyDict_SetItem(dialects, name_obj, dialect) < 0) {
1406         Py_DECREF(dialect);
1407         return NULL;
1408     }
1409     Py_DECREF(dialect);
1410     Py_INCREF(Py_None);
1411     return Py_None;
1412 }
1413
1414 static PyObject *
1415 csv_unregister_dialect(PyObject *module, PyObject *name_obj)
1416 {
1417     if (PyDict_DelItem(dialects, name_obj) < 0)
1418         return PyErr_Format(error_obj, "unknown dialect");
1419     Py_INCREF(Py_None);
1420     return Py_None;
1421 }
1422
1423 static PyObject *
1424 csv_get_dialect(PyObject *module, PyObject *name_obj)
1425 {
1426     return get_dialect_from_registry(name_obj);
1427 }
1428
1429 static PyObject *
1430 csv_field_size_limit(PyObject *module, PyObject *args)
1431 {
1432     PyObject *new_limit = NULL;
1433     long old_limit = field_limit;
1434
1435     if (!PyArg_UnpackTuple(args, "field_size_limit", 0, 1, &new_limit))
1436         return NULL;
1437     if (new_limit != NULL) {
1438         if (!PyInt_Check(new_limit)) {
1439             PyErr_Format(PyExc_TypeError,
1440                          "limit must be an integer");
1441             return NULL;
1442         }
1443         field_limit = PyInt_AsLong(new_limit);
1444     }
1445     return PyInt_FromLong(old_limit);
1446 }
1447
1448 /*
1449  * MODULE
1450  */
1451
1452 PyDoc_STRVAR(csv_module_doc,
1453 "CSV parsing and writing.\n"
1454 "\n"
1455 "This module provides classes that assist in the reading and writing\n"
1456 "of Comma Separated Value (CSV) files, and implements the interface\n"
1457 "described by PEP 305.  Although many CSV files are simple to parse,\n"
1458 "the format is not formally defined by a stable specification and\n"
1459 "is subtle enough that parsing lines of a CSV file with something\n"
1460 "like line.split(\",\") is bound to fail.  The module supports three\n"
1461 "basic APIs: reading, writing, and registration of dialects.\n"
1462 "\n"
1463 "\n"
1464 "DIALECT REGISTRATION:\n"
1465 "\n"
1466 "Readers and writers support a dialect argument, which is a convenient\n"
1467 "handle on a group of settings.  When the dialect argument is a string,\n"
1468 "it identifies one of the dialects previously registered with the module.\n"
1469 "If it is a class or instance, the attributes of the argument are used as\n"
1470 "the settings for the reader or writer:\n"
1471 "\n"
1472 "    class excel:\n"
1473 "        delimiter = ','\n"
1474 "        quotechar = '\"'\n"
1475 "        escapechar = None\n"
1476 "        doublequote = True\n"
1477 "        skipinitialspace = False\n"
1478 "        lineterminator = '\\r\\n'\n"
1479 "        quoting = QUOTE_MINIMAL\n"
1480 "\n"
1481 "SETTINGS:\n"
1482 "\n"
1483 "    * quotechar - specifies a one-character string to use as the \n"
1484 "        quoting character.  It defaults to '\"'.\n"
1485 "    * delimiter - specifies a one-character string to use as the \n"
1486 "        field separator.  It defaults to ','.\n"
1487 "    * skipinitialspace - specifies how to interpret whitespace which\n"
1488 "        immediately follows a delimiter.  It defaults to False, which\n"
1489 "        means that whitespace immediately following a delimiter is part\n"
1490 "        of the following field.\n"
1491 "    * lineterminator -  specifies the character sequence which should \n"
1492 "        terminate rows.\n"
1493 "    * quoting - controls when quotes should be generated by the writer.\n"
1494 "        It can take on any of the following module constants:\n"
1495 "\n"
1496 "        csv.QUOTE_MINIMAL means only when required, for example, when a\n"
1497 "            field contains either the quotechar or the delimiter\n"
1498 "        csv.QUOTE_ALL means that quotes are always placed around fields.\n"
1499 "        csv.QUOTE_NONNUMERIC means that quotes are always placed around\n"
1500 "            fields which do not parse as integers or floating point\n"
1501 "            numbers.\n"
1502 "        csv.QUOTE_NONE means that quotes are never placed around fields.\n"
1503 "    * escapechar - specifies a one-character string used to escape \n"
1504 "        the delimiter when quoting is set to QUOTE_NONE.\n"
1505 "    * doublequote - controls the handling of quotes inside fields.  When\n"
1506 "        True, two consecutive quotes are interpreted as one during read,\n"
1507 "        and when writing, each quote character embedded in the data is\n"
1508 "        written as two quotes\n");
1509
1510 PyDoc_STRVAR(csv_reader_doc,
1511 "    csv_reader = reader(iterable [, dialect='excel']\n"
1512 "                        [optional keyword args])\n"
1513 "    for row in csv_reader:\n"
1514 "        process(row)\n"
1515 "\n"
1516 "The \"iterable\" argument can be any object that returns a line\n"
1517 "of input for each iteration, such as a file object or a list.  The\n"
1518 "optional \"dialect\" parameter is discussed below.  The function\n"
1519 "also accepts optional keyword arguments which override settings\n"
1520 "provided by the dialect.\n"
1521 "\n"
1522 "The returned object is an iterator.  Each iteration returns a row\n"
1523 "of the CSV file (which can span multiple input lines):\n");
1524
1525 PyDoc_STRVAR(csv_writer_doc,
1526 "    csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1527 "                            [optional keyword args])\n"
1528 "    for row in sequence:\n"
1529 "        csv_writer.writerow(row)\n"
1530 "\n"
1531 "    [or]\n"
1532 "\n"
1533 "    csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1534 "                            [optional keyword args])\n"
1535 "    csv_writer.writerows(rows)\n"
1536 "\n"
1537 "The \"fileobj\" argument can be any object that supports the file API.\n");
1538
1539 PyDoc_STRVAR(csv_list_dialects_doc,
1540 "Return a list of all know dialect names.\n"
1541 "    names = csv.list_dialects()");
1542
1543 PyDoc_STRVAR(csv_get_dialect_doc,
1544 "Return the dialect instance associated with name.\n"
1545 "    dialect = csv.get_dialect(name)");
1546
1547 PyDoc_STRVAR(csv_register_dialect_doc,
1548 "Create a mapping from a string name to a dialect class.\n"
1549 "    dialect = csv.register_dialect(name, dialect)");
1550
1551 PyDoc_STRVAR(csv_unregister_dialect_doc,
1552 "Delete the name/dialect mapping associated with a string name.\n"
1553 "    csv.unregister_dialect(name)");
1554
1555 PyDoc_STRVAR(csv_field_size_limit_doc,
1556 "Sets an upper limit on parsed fields.\n"
1557 "    csv.field_size_limit([limit])\n"
1558 "\n"
1559 "Returns old limit. If limit is not given, no new limit is set and\n"
1560 "the old limit is returned");
1561
1562 static struct PyMethodDef csv_methods[] = {
1563     { "reader", (PyCFunction)csv_reader,
1564         METH_VARARGS | METH_KEYWORDS, csv_reader_doc},
1565     { "writer", (PyCFunction)csv_writer,
1566         METH_VARARGS | METH_KEYWORDS, csv_writer_doc},
1567     { "list_dialects", (PyCFunction)csv_list_dialects,
1568         METH_NOARGS, csv_list_dialects_doc},
1569     { "register_dialect", (PyCFunction)csv_register_dialect,
1570         METH_VARARGS | METH_KEYWORDS, csv_register_dialect_doc},
1571     { "unregister_dialect", (PyCFunction)csv_unregister_dialect,
1572         METH_O, csv_unregister_dialect_doc},
1573     { "get_dialect", (PyCFunction)csv_get_dialect,
1574         METH_O, csv_get_dialect_doc},
1575     { "field_size_limit", (PyCFunction)csv_field_size_limit,
1576         METH_VARARGS, csv_field_size_limit_doc},
1577     { NULL, NULL }
1578 };
1579
1580 PyMODINIT_FUNC
1581 init_csv(void)
1582 {
1583     PyObject *module;
1584     StyleDesc *style;
1585
1586     if (PyType_Ready(&Dialect_Type) < 0)
1587         return;
1588
1589     if (PyType_Ready(&Reader_Type) < 0)
1590         return;
1591
1592     if (PyType_Ready(&Writer_Type) < 0)
1593         return;
1594
1595     /* Create the module and add the functions */
1596     module = Py_InitModule3("_csv", csv_methods, csv_module_doc);
1597     if (module == NULL)
1598         return;
1599
1600     /* Add version to the module. */
1601     if (PyModule_AddStringConstant(module, "__version__",
1602                                    MODULE_VERSION) == -1)
1603         return;
1604
1605     /* Add _dialects dictionary */
1606     dialects = PyDict_New();
1607     if (dialects == NULL)
1608         return;
1609     if (PyModule_AddObject(module, "_dialects", dialects))
1610         return;
1611
1612     /* Add quote styles into dictionary */
1613     for (style = quote_styles; style->name; style++) {
1614         if (PyModule_AddIntConstant(module, style->name,
1615                                     style->style) == -1)
1616             return;
1617     }
1618
1619     /* Add the Dialect type */
1620     Py_INCREF(&Dialect_Type);
1621     if (PyModule_AddObject(module, "Dialect", (PyObject *)&Dialect_Type))
1622         return;
1623
1624     /* Add the CSV exception object to the module. */
1625     error_obj = PyErr_NewException("_csv.Error", NULL, NULL);
1626     if (error_obj == NULL)
1627         return;
1628     PyModule_AddObject(module, "Error", error_obj);
1629 }