AppPkg/Applications/Python/Python-2.7.2/Modules/_csv.c

   1 /* csv module */
   2
   3 /*
   4
   5 This module provides the low-level underpinnings of a CSV reading/writing
   6 module.  Users should not use this module directly, but import the csv.py
   7 module instead.
   8
   9 **** For people modifying this code, please note that as of this writing
  10 **** (2003-03-23), it is intended that this code should work with Python
  11 **** 2.2.
  12
  13 */
  14
  15 #define MODULE_VERSION "1.0"
  16
  17 #include "Python.h"
  18 #include "structmember.h"
  19
  20
  21 /* begin 2.2 compatibility macros */
  22 #ifndef PyDoc_STRVAR
  23 /* Define macros for inline documentation. */
  24 #define PyDoc_VAR(name) static char name[]
  25 #define PyDoc_STRVAR(name,str) PyDoc_VAR(name) = PyDoc_STR(str)
  26 #ifdef WITH_DOC_STRINGS
  27 #define PyDoc_STR(str) str
  28 #else
  29 #define PyDoc_STR(str) ""
  30 #endif
  31 #endif /* ifndef PyDoc_STRVAR */
  32
  33 #ifndef PyMODINIT_FUNC
  34 #       if defined(__cplusplus)
  35 #               define PyMODINIT_FUNC extern "C" void
  36 #       else /* __cplusplus */
  37 #               define PyMODINIT_FUNC void
  38 #       endif /* __cplusplus */
  39 #endif
  40
  41 #ifndef Py_CLEAR
  42 #define Py_CLEAR(op)                                            \
  43     do {                                                        \
  44         if (op) {                                               \
  45             PyObject *tmp = (PyObject *)(op);                   \
  46             (op) = NULL;                                        \
  47             Py_DECREF(tmp);                                     \
  48         }                                                       \
  49     } while (0)
  50 #endif
  51 #ifndef Py_VISIT
  52 #define Py_VISIT(op)                                                    \
  53     do {                                                                \
  54         if (op) {                                                       \
  55             int vret = visit((PyObject *)(op), arg);                    \
  56             if (vret)                                                   \
  57                 return vret;                                            \
  58         }                                                               \
  59     } while (0)
  60 #endif
  61
  62 /* end 2.2 compatibility macros */
  63
  64 #define IS_BASESTRING(o) \
  65     PyObject_TypeCheck(o, &PyBaseString_Type)
  66
  67 static PyObject *error_obj;     /* CSV exception */
  68 static PyObject *dialects;      /* Dialect registry */
  69 static long field_limit = 128 * 1024;   /* max parsed field size */
  70
  71 typedef enum {
  72     START_RECORD, START_FIELD, ESCAPED_CHAR, IN_FIELD,
  73     IN_QUOTED_FIELD, ESCAPE_IN_QUOTED_FIELD, QUOTE_IN_QUOTED_FIELD,
  74     EAT_CRNL
  75 } ParserState;
  76
  77 typedef enum {
  78     QUOTE_MINIMAL, QUOTE_ALL, QUOTE_NONNUMERIC, QUOTE_NONE
  79 } QuoteStyle;
  80
  81 typedef struct {
  82     QuoteStyle style;
  83     char *name;
  84 } StyleDesc;
  85
  86 static StyleDesc quote_styles[] = {
  87     { QUOTE_MINIMAL,    "QUOTE_MINIMAL" },
  88     { QUOTE_ALL,        "QUOTE_ALL" },
  89     { QUOTE_NONNUMERIC, "QUOTE_NONNUMERIC" },
  90     { QUOTE_NONE,       "QUOTE_NONE" },
  91     { 0 }
  92 };
  93
  94 typedef struct {
  95     PyObject_HEAD
  96
  97     int doublequote;            /* is " represented by ""? */
  98     char delimiter;             /* field separator */
  99     char quotechar;             /* quote character */
 100     char escapechar;            /* escape character */
 101     int skipinitialspace;       /* ignore spaces following delimiter? */
 102     PyObject *lineterminator; /* string to write between records */
 103     int quoting;                /* style of quoting to write */
 104
 105     int strict;                 /* raise exception on bad CSV */
 106 } DialectObj;
 107
 108 staticforward PyTypeObject Dialect_Type;
 109
 110 typedef struct {
 111     PyObject_HEAD
 112
 113     PyObject *input_iter;   /* iterate over this for input lines */
 114
 115     DialectObj *dialect;    /* parsing dialect */
 116
 117     PyObject *fields;           /* field list for current record */
 118     ParserState state;          /* current CSV parse state */
 119     char *field;                /* build current field in here */
 120     int field_size;             /* size of allocated buffer */
 121     int field_len;              /* length of current field */
 122     int numeric_field;          /* treat field as numeric */
 123     unsigned long line_num;     /* Source-file line number */
 124 } ReaderObj;
 125
 126 staticforward PyTypeObject Reader_Type;
 127
 128 #define ReaderObject_Check(v)   (Py_TYPE(v) == &Reader_Type)
 129
 130 typedef struct {
 131     PyObject_HEAD
 132
 133     PyObject *writeline;    /* write output lines to this file */
 134
 135     DialectObj *dialect;    /* parsing dialect */
 136
 137     char *rec;                  /* buffer for parser.join */
 138     int rec_size;               /* size of allocated record */
 139     int rec_len;                /* length of record */
 140     int num_fields;             /* number of fields in record */
 141 } WriterObj;
 142
 143 staticforward PyTypeObject Writer_Type;
 144
 145 /*
 146  * DIALECT class
 147  */
 148
 149 static PyObject *
 150 get_dialect_from_registry(PyObject * name_obj)
 151 {
 152     PyObject *dialect_obj;
 153
 154     dialect_obj = PyDict_GetItem(dialects, name_obj);
 155     if (dialect_obj == NULL) {
 156         if (!PyErr_Occurred())
 157             PyErr_Format(error_obj, "unknown dialect");
 158     }
 159     else
 160         Py_INCREF(dialect_obj);
 161     return dialect_obj;
 162 }
 163
 164 static PyObject *
 165 get_string(PyObject *str)
 166 {
 167     Py_XINCREF(str);
 168     return str;
 169 }
 170
 171 static PyObject *
 172 get_nullchar_as_None(char c)
 173 {
 174     if (c == '\0') {
 175         Py_INCREF(Py_None);
 176         return Py_None;
 177     }
 178     else
 179         return PyString_FromStringAndSize((char*)&c, 1);
 180 }
 181
 182 static PyObject *
 183 Dialect_get_lineterminator(DialectObj *self)
 184 {
 185     return get_string(self->lineterminator);
 186 }
 187
 188 static PyObject *
 189 Dialect_get_escapechar(DialectObj *self)
 190 {
 191     return get_nullchar_as_None(self->escapechar);
 192 }
 193
 194 static PyObject *
 195 Dialect_get_quotechar(DialectObj *self)
 196 {
 197     return get_nullchar_as_None(self->quotechar);
 198 }
 199
 200 static PyObject *
 201 Dialect_get_quoting(DialectObj *self)
 202 {
 203     return PyInt_FromLong(self->quoting);
 204 }
 205
 206 static int
 207 _set_bool(const char *name, int *target, PyObject *src, int dflt)
 208 {
 209     if (src == NULL)
 210         *target = dflt;
 211     else
 212         *target = PyObject_IsTrue(src);
 213     return 0;
 214 }
 215
 216 static int
 217 _set_int(const char *name, int *target, PyObject *src, int dflt)
 218 {
 219     if (src == NULL)
 220         *target = dflt;
 221     else {
 222         if (!PyInt_Check(src)) {
 223             PyErr_Format(PyExc_TypeError,
 224                          "\"%s\" must be an integer", name);
 225             return -1;
 226         }
 227         *target = PyInt_AsLong(src);
 228     }
 229     return 0;
 230 }
 231
 232 static int
 233 _set_char(const char *name, char *target, PyObject *src, char dflt)
 234 {
 235     if (src == NULL)
 236         *target = dflt;
 237     else {
 238         if (src == Py_None || PyString_Size(src) == 0)
 239             *target = '\0';
 240         else if (!PyString_Check(src) || PyString_Size(src) != 1) {
 241             PyErr_Format(PyExc_TypeError,
 242                          "\"%s\" must be an 1-character string",
 243                          name);
 244             return -1;
 245         }
 246         else {
 247             char *s = PyString_AsString(src);
 248             if (s == NULL)
 249                 return -1;
 250             *target = s[0];
 251         }
 252     }
 253     return 0;
 254 }
 255
 256 static int
 257 _set_str(const char *name, PyObject **target, PyObject *src, const char *dflt)
 258 {
 259     if (src == NULL)
 260         *target = PyString_FromString(dflt);
 261     else {
 262         if (src == Py_None)
 263             *target = NULL;
 264         else if (!IS_BASESTRING(src)) {
 265             PyErr_Format(PyExc_TypeError,
 266                          "\"%s\" must be an string", name);
 267             return -1;
 268         }
 269         else {
 270             Py_XDECREF(*target);
 271             Py_INCREF(src);
 272             *target = src;
 273         }
 274     }
 275     return 0;
 276 }
 277
 278 static int
 279 dialect_check_quoting(int quoting)
 280 {
 281     StyleDesc *qs = quote_styles;
 282
 283     for (qs = quote_styles; qs->name; qs++) {
 284         if (qs->style == quoting)
 285             return 0;
 286     }
 287     PyErr_Format(PyExc_TypeError, "bad \"quoting\" value");
 288     return -1;
 289 }
 290
 291 #define D_OFF(x) offsetof(DialectObj, x)
 292
 293 static struct PyMemberDef Dialect_memberlist[] = {
 294     { "delimiter",          T_CHAR, D_OFF(delimiter), READONLY },
 295     { "skipinitialspace",   T_INT, D_OFF(skipinitialspace), READONLY },
 296     { "doublequote",        T_INT, D_OFF(doublequote), READONLY },
 297     { "strict",             T_INT, D_OFF(strict), READONLY },
 298     { NULL }
 299 };
 300
 301 static PyGetSetDef Dialect_getsetlist[] = {
 302     { "escapechar",             (getter)Dialect_get_escapechar},
 303     { "lineterminator",         (getter)Dialect_get_lineterminator},
 304     { "quotechar",              (getter)Dialect_get_quotechar},
 305     { "quoting",                (getter)Dialect_get_quoting},
 306     {NULL},
 307 };
 308
 309 static void
 310 Dialect_dealloc(DialectObj *self)
 311 {
 312     Py_XDECREF(self->lineterminator);
 313     Py_TYPE(self)->tp_free((PyObject *)self);
 314 }
 315
 316 static char *dialect_kws[] = {
 317     "dialect",
 318     "delimiter",
 319     "doublequote",
 320     "escapechar",
 321     "lineterminator",
 322     "quotechar",
 323     "quoting",
 324     "skipinitialspace",
 325     "strict",
 326     NULL
 327 };
 328
 329 static PyObject *
 330 dialect_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
 331 {
 332     DialectObj *self;
 333     PyObject *ret = NULL;
 334     PyObject *dialect = NULL;
 335     PyObject *delimiter = NULL;
 336     PyObject *doublequote = NULL;
 337     PyObject *escapechar = NULL;
 338     PyObject *lineterminator = NULL;
 339     PyObject *quotechar = NULL;
 340     PyObject *quoting = NULL;
 341     PyObject *skipinitialspace = NULL;
 342     PyObject *strict = NULL;
 343
 344     if (!PyArg_ParseTupleAndKeywords(args, kwargs,
 345                                      "|OOOOOOOOO", dialect_kws,
 346                                      &dialect,
 347                                      &delimiter,
 348                                      &doublequote,
 349                                      &escapechar,
 350                                      &lineterminator,
 351                                      &quotechar,
 352                                      &quoting,
 353                                      &skipinitialspace,
 354                                      &strict))
 355         return NULL;
 356
 357     if (dialect != NULL) {
 358         if (IS_BASESTRING(dialect)) {
 359             dialect = get_dialect_from_registry(dialect);
 360             if (dialect == NULL)
 361                 return NULL;
 362         }
 363         else
 364             Py_INCREF(dialect);
 365         /* Can we reuse this instance? */
 366         if (PyObject_TypeCheck(dialect, &Dialect_Type) &&
 367             delimiter == 0 &&
 368             doublequote == 0 &&
 369             escapechar == 0 &&
 370             lineterminator == 0 &&
 371             quotechar == 0 &&
 372             quoting == 0 &&
 373             skipinitialspace == 0 &&
 374             strict == 0)
 375             return dialect;
 376     }
 377
 378     self = (DialectObj *)type->tp_alloc(type, 0);
 379     if (self == NULL) {
 380         Py_XDECREF(dialect);
 381         return NULL;
 382     }
 383     self->lineterminator = NULL;
 384
 385     Py_XINCREF(delimiter);
 386     Py_XINCREF(doublequote);
 387     Py_XINCREF(escapechar);
 388     Py_XINCREF(lineterminator);
 389     Py_XINCREF(quotechar);
 390     Py_XINCREF(quoting);
 391     Py_XINCREF(skipinitialspace);
 392     Py_XINCREF(strict);
 393     if (dialect != NULL) {
 394 #define DIALECT_GETATTR(v, n) \
 395         if (v == NULL) \
 396             v = PyObject_GetAttrString(dialect, n)
 397         DIALECT_GETATTR(delimiter, "delimiter");
 398         DIALECT_GETATTR(doublequote, "doublequote");
 399         DIALECT_GETATTR(escapechar, "escapechar");
 400         DIALECT_GETATTR(lineterminator, "lineterminator");
 401         DIALECT_GETATTR(quotechar, "quotechar");
 402         DIALECT_GETATTR(quoting, "quoting");
 403         DIALECT_GETATTR(skipinitialspace, "skipinitialspace");
 404         DIALECT_GETATTR(strict, "strict");
 405         PyErr_Clear();
 406     }
 407
 408     /* check types and convert to C values */
 409 #define DIASET(meth, name, target, src, dflt) \
 410     if (meth(name, target, src, dflt)) \
 411         goto err
 412     DIASET(_set_char, "delimiter", &self->delimiter, delimiter, ',');
 413     DIASET(_set_bool, "doublequote", &self->doublequote, doublequote, 1);
 414     DIASET(_set_char, "escapechar", &self->escapechar, escapechar, 0);
 415     DIASET(_set_str, "lineterminator", &self->lineterminator, lineterminator, "\r\n");
 416     DIASET(_set_char, "quotechar", &self->quotechar, quotechar, '"');
 417     DIASET(_set_int, "quoting", &self->quoting, quoting, QUOTE_MINIMAL);
 418     DIASET(_set_bool, "skipinitialspace", &self->skipinitialspace, skipinitialspace, 0);
 419     DIASET(_set_bool, "strict", &self->strict, strict, 0);
 420
 421     /* validate options */
 422     if (dialect_check_quoting(self->quoting))
 423         goto err;
 424     if (self->delimiter == 0) {
 425         PyErr_SetString(PyExc_TypeError, "delimiter must be set");
 426         goto err;
 427     }
 428     if (quotechar == Py_None && quoting == NULL)
 429         self->quoting = QUOTE_NONE;
 430     if (self->quoting != QUOTE_NONE && self->quotechar == 0) {
 431         PyErr_SetString(PyExc_TypeError,
 432                         "quotechar must be set if quoting enabled");
 433         goto err;
 434     }
 435     if (self->lineterminator == 0) {
 436         PyErr_SetString(PyExc_TypeError, "lineterminator must be set");
 437         goto err;
 438     }
 439
 440     ret = (PyObject *)self;
 441     Py_INCREF(self);
 442 err:
 443     Py_XDECREF(self);
 444     Py_XDECREF(dialect);
 445     Py_XDECREF(delimiter);
 446     Py_XDECREF(doublequote);
 447     Py_XDECREF(escapechar);
 448     Py_XDECREF(lineterminator);
 449     Py_XDECREF(quotechar);
 450     Py_XDECREF(quoting);
 451     Py_XDECREF(skipinitialspace);
 452     Py_XDECREF(strict);
 453     return ret;
 454 }
 455
 456
 457 PyDoc_STRVAR(Dialect_Type_doc,
 458 "CSV dialect\n"
 459 "\n"
 460 "The Dialect type records CSV parsing and generation options.\n");
 461
 462 static PyTypeObject Dialect_Type = {
 463     PyVarObject_HEAD_INIT(NULL, 0)
 464     "_csv.Dialect",                         /* tp_name */
 465     sizeof(DialectObj),                     /* tp_basicsize */
 466     0,                                      /* tp_itemsize */
 467     /*  methods  */
 468     (destructor)Dialect_dealloc,            /* tp_dealloc */
 469     (printfunc)0,                           /* tp_print */
 470     (getattrfunc)0,                         /* tp_getattr */
 471     (setattrfunc)0,                         /* tp_setattr */
 472     (cmpfunc)0,                             /* tp_compare */
 473     (reprfunc)0,                            /* tp_repr */
 474     0,                                      /* tp_as_number */
 475     0,                                      /* tp_as_sequence */
 476     0,                                      /* tp_as_mapping */
 477     (hashfunc)0,                            /* tp_hash */
 478     (ternaryfunc)0,                         /* tp_call */
 479     (reprfunc)0,                                /* tp_str */
 480     0,                                      /* tp_getattro */
 481     0,                                      /* tp_setattro */
 482     0,                                      /* tp_as_buffer */
 483     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
 484     Dialect_Type_doc,                       /* tp_doc */
 485     0,                                      /* tp_traverse */
 486     0,                                      /* tp_clear */
 487     0,                                      /* tp_richcompare */
 488     0,                                      /* tp_weaklistoffset */
 489     0,                                      /* tp_iter */
 490     0,                                      /* tp_iternext */
 491     0,                                          /* tp_methods */
 492     Dialect_memberlist,                     /* tp_members */
 493     Dialect_getsetlist,                     /* tp_getset */
 494     0,                                          /* tp_base */
 495     0,                                          /* tp_dict */
 496     0,                                          /* tp_descr_get */
 497     0,                                          /* tp_descr_set */
 498     0,                                          /* tp_dictoffset */
 499     0,                                          /* tp_init */
 500     0,                                          /* tp_alloc */
 501     dialect_new,                                /* tp_new */
 502     0,                                          /* tp_free */
 503 };
 504
 505 /*
 506  * Return an instance of the dialect type, given a Python instance or kwarg
 507  * description of the dialect
 508  */
 509 static PyObject *
 510 _call_dialect(PyObject *dialect_inst, PyObject *kwargs)
 511 {
 512     PyObject *ctor_args;
 513     PyObject *dialect;
 514
 515     ctor_args = Py_BuildValue(dialect_inst ? "(O)" : "()", dialect_inst);
 516     if (ctor_args == NULL)
 517         return NULL;
 518     dialect = PyObject_Call((PyObject *)&Dialect_Type, ctor_args, kwargs);
 519     Py_DECREF(ctor_args);
 520     return dialect;
 521 }
 522
 523 /*
 524  * READER
 525  */
 526 static int
 527 parse_save_field(ReaderObj *self)
 528 {
 529     PyObject *field;
 530
 531     field = PyString_FromStringAndSize(self->field, self->field_len);
 532     if (field == NULL)
 533         return -1;
 534     self->field_len = 0;
 535     if (self->numeric_field) {
 536         PyObject *tmp;
 537
 538         self->numeric_field = 0;
 539         tmp = PyNumber_Float(field);
 540         if (tmp == NULL) {
 541             Py_DECREF(field);
 542             return -1;
 543         }
 544         Py_DECREF(field);
 545         field = tmp;
 546     }
 547     PyList_Append(self->fields, field);
 548     Py_DECREF(field);
 549     return 0;
 550 }
 551
 552 static int
 553 parse_grow_buff(ReaderObj *self)
 554 {
 555     if (self->field_size == 0) {
 556         self->field_size = 4096;
 557         if (self->field != NULL)
 558             PyMem_Free(self->field);
 559         self->field = PyMem_Malloc(self->field_size);
 560     }
 561     else {
 562         if (self->field_size > INT_MAX / 2) {
 563             PyErr_NoMemory();
 564             return 0;
 565         }
 566         self->field_size *= 2;
 567         self->field = PyMem_Realloc(self->field, self->field_size);
 568     }
 569     if (self->field == NULL) {
 570         PyErr_NoMemory();
 571         return 0;
 572     }
 573     return 1;
 574 }
 575
 576 static int
 577 parse_add_char(ReaderObj *self, char c)
 578 {
 579     if (self->field_len >= field_limit) {
 580         PyErr_Format(error_obj, "field larger than field limit (%ld)",
 581                      field_limit);
 582         return -1;
 583     }
 584     if (self->field_len == self->field_size && !parse_grow_buff(self))
 585         return -1;
 586     self->field[self->field_len++] = c;
 587     return 0;
 588 }
 589
 590 static int
 591 parse_process_char(ReaderObj *self, char c)
 592 {
 593     DialectObj *dialect = self->dialect;
 594
 595     switch (self->state) {
 596     case START_RECORD:
 597         /* start of record */
 598         if (c == '\0')
 599             /* empty line - return [] */
 600             break;
 601         else if (c == '\n' || c == '\r') {
 602             self->state = EAT_CRNL;
 603             break;
 604         }
 605         /* normal character - handle as START_FIELD */
 606         self->state = START_FIELD;
 607         /* fallthru */
 608     case START_FIELD:
 609         /* expecting field */
 610         if (c == '\n' || c == '\r' || c == '\0') {
 611             /* save empty field - return [fields] */
 612             if (parse_save_field(self) < 0)
 613                 return -1;
 614             self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
 615         }
 616         else if (c == dialect->quotechar &&
 617                  dialect->quoting != QUOTE_NONE) {
 618             /* start quoted field */
 619             self->state = IN_QUOTED_FIELD;
 620         }
 621         else if (c == dialect->escapechar) {
 622             /* possible escaped character */
 623             self->state = ESCAPED_CHAR;
 624         }
 625         else if (c == ' ' && dialect->skipinitialspace)
 626             /* ignore space at start of field */
 627             ;
 628         else if (c == dialect->delimiter) {
 629             /* save empty field */
 630             if (parse_save_field(self) < 0)
 631                 return -1;
 632         }
 633         else {
 634             /* begin new unquoted field */
 635             if (dialect->quoting == QUOTE_NONNUMERIC)
 636                 self->numeric_field = 1;
 637             if (parse_add_char(self, c) < 0)
 638                 return -1;
 639             self->state = IN_FIELD;
 640         }
 641         break;
 642
 643     case ESCAPED_CHAR:
 644         if (c == '\0')
 645             c = '\n';
 646         if (parse_add_char(self, c) < 0)
 647             return -1;
 648         self->state = IN_FIELD;
 649         break;
 650
 651     case IN_FIELD:
 652         /* in unquoted field */
 653         if (c == '\n' || c == '\r' || c == '\0') {
 654             /* end of line - return [fields] */
 655             if (parse_save_field(self) < 0)
 656                 return -1;
 657             self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
 658         }
 659         else if (c == dialect->escapechar) {
 660             /* possible escaped character */
 661             self->state = ESCAPED_CHAR;
 662         }
 663         else if (c == dialect->delimiter) {
 664             /* save field - wait for new field */
 665             if (parse_save_field(self) < 0)
 666                 return -1;
 667             self->state = START_FIELD;
 668         }
 669         else {
 670             /* normal character - save in field */
 671             if (parse_add_char(self, c) < 0)
 672                 return -1;
 673         }
 674         break;
 675
 676     case IN_QUOTED_FIELD:
 677         /* in quoted field */
 678         if (c == '\0')
 679             ;
 680         else if (c == dialect->escapechar) {
 681             /* Possible escape character */
 682             self->state = ESCAPE_IN_QUOTED_FIELD;
 683         }
 684         else if (c == dialect->quotechar &&
 685                  dialect->quoting != QUOTE_NONE) {
 686             if (dialect->doublequote) {
 687                 /* doublequote; " represented by "" */
 688                 self->state = QUOTE_IN_QUOTED_FIELD;
 689             }
 690             else {
 691                 /* end of quote part of field */
 692                 self->state = IN_FIELD;
 693             }
 694         }
 695         else {
 696             /* normal character - save in field */
 697             if (parse_add_char(self, c) < 0)
 698                 return -1;
 699         }
 700         break;
 701
 702     case ESCAPE_IN_QUOTED_FIELD:
 703         if (c == '\0')
 704             c = '\n';
 705         if (parse_add_char(self, c) < 0)
 706             return -1;
 707         self->state = IN_QUOTED_FIELD;
 708         break;
 709
 710     case QUOTE_IN_QUOTED_FIELD:
 711         /* doublequote - seen a quote in an quoted field */
 712         if (dialect->quoting != QUOTE_NONE &&
 713             c == dialect->quotechar) {
 714             /* save "" as " */
 715             if (parse_add_char(self, c) < 0)
 716                 return -1;
 717             self->state = IN_QUOTED_FIELD;
 718         }
 719         else if (c == dialect->delimiter) {
 720             /* save field - wait for new field */
 721             if (parse_save_field(self) < 0)
 722                 return -1;
 723             self->state = START_FIELD;
 724         }
 725         else if (c == '\n' || c == '\r' || c == '\0') {
 726             /* end of line - return [fields] */
 727             if (parse_save_field(self) < 0)
 728                 return -1;
 729             self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
 730         }
 731         else if (!dialect->strict) {
 732             if (parse_add_char(self, c) < 0)
 733                 return -1;
 734             self->state = IN_FIELD;
 735         }
 736         else {
 737             /* illegal */
 738             PyErr_Format(error_obj, "'%c' expected after '%c'",
 739                             dialect->delimiter,
 740                             dialect->quotechar);
 741             return -1;
 742         }
 743         break;
 744
 745     case EAT_CRNL:
 746         if (c == '\n' || c == '\r')
 747             ;
 748         else if (c == '\0')
 749             self->state = START_RECORD;
 750         else {
 751             PyErr_Format(error_obj, "new-line character seen in unquoted field - do you need to open the file in universal-newline mode?");
 752             return -1;
 753         }
 754         break;
 755
 756     }
 757     return 0;
 758 }
 759
 760 static int
 761 parse_reset(ReaderObj *self)
 762 {
 763     Py_XDECREF(self->fields);
 764     self->fields = PyList_New(0);
 765     if (self->fields == NULL)
 766         return -1;
 767     self->field_len = 0;
 768     self->state = START_RECORD;
 769     self->numeric_field = 0;
 770     return 0;
 771 }
 772
 773 static PyObject *
 774 Reader_iternext(ReaderObj *self)
 775 {
 776     PyObject *lineobj;
 777     PyObject *fields = NULL;
 778     char *line, c;
 779     int linelen;
 780
 781     if (parse_reset(self) < 0)
 782         return NULL;
 783     do {
 784         lineobj = PyIter_Next(self->input_iter);
 785         if (lineobj == NULL) {
 786             /* End of input OR exception */
 787             if (!PyErr_Occurred() && self->field_len != 0)
 788                 PyErr_Format(error_obj,
 789                              "newline inside string");
 790             return NULL;
 791         }
 792         ++self->line_num;
 793
 794         line = PyString_AsString(lineobj);
 795         linelen = PyString_Size(lineobj);
 796
 797         if (line == NULL || linelen < 0) {
 798             Py_DECREF(lineobj);
 799             return NULL;
 800         }
 801         while (linelen--) {
 802             c = *line++;
 803             if (c == '\0') {
 804                 Py_DECREF(lineobj);
 805                 PyErr_Format(error_obj,
 806                              "line contains NULL byte");
 807                 goto err;
 808             }
 809             if (parse_process_char(self, c) < 0) {
 810                 Py_DECREF(lineobj);
 811                 goto err;
 812             }
 813         }
 814         Py_DECREF(lineobj);
 815         if (parse_process_char(self, 0) < 0)
 816             goto err;
 817     } while (self->state != START_RECORD);
 818
 819     fields = self->fields;
 820     self->fields = NULL;
 821 err:
 822     return fields;
 823 }
 824
 825 static void
 826 Reader_dealloc(ReaderObj *self)
 827 {
 828     PyObject_GC_UnTrack(self);
 829     Py_XDECREF(self->dialect);
 830     Py_XDECREF(self->input_iter);
 831     Py_XDECREF(self->fields);
 832     if (self->field != NULL)
 833         PyMem_Free(self->field);
 834     PyObject_GC_Del(self);
 835 }
 836
 837 static int
 838 Reader_traverse(ReaderObj *self, visitproc visit, void *arg)
 839 {
 840     Py_VISIT(self->dialect);
 841     Py_VISIT(self->input_iter);
 842     Py_VISIT(self->fields);
 843     return 0;
 844 }
 845
 846 static int
 847 Reader_clear(ReaderObj *self)
 848 {
 849     Py_CLEAR(self->dialect);
 850     Py_CLEAR(self->input_iter);
 851     Py_CLEAR(self->fields);
 852     return 0;
 853 }
 854
 855 PyDoc_STRVAR(Reader_Type_doc,
 856 "CSV reader\n"
 857 "\n"
 858 "Reader objects are responsible for reading and parsing tabular data\n"
 859 "in CSV format.\n"
 860 );
 861
 862 static struct PyMethodDef Reader_methods[] = {
 863     { NULL, NULL }
 864 };
 865 #define R_OFF(x) offsetof(ReaderObj, x)
 866
 867 static struct PyMemberDef Reader_memberlist[] = {
 868     { "dialect", T_OBJECT, R_OFF(dialect), RO },
 869     { "line_num", T_ULONG, R_OFF(line_num), RO },
 870     { NULL }
 871 };
 872
 873
 874 static PyTypeObject Reader_Type = {
 875     PyVarObject_HEAD_INIT(NULL, 0)
 876     "_csv.reader",                          /*tp_name*/
 877     sizeof(ReaderObj),                      /*tp_basicsize*/
 878     0,                                      /*tp_itemsize*/
 879     /* methods */
 880     (destructor)Reader_dealloc,             /*tp_dealloc*/
 881     (printfunc)0,                           /*tp_print*/
 882     (getattrfunc)0,                         /*tp_getattr*/
 883     (setattrfunc)0,                         /*tp_setattr*/
 884     (cmpfunc)0,                             /*tp_compare*/
 885     (reprfunc)0,                            /*tp_repr*/
 886     0,                                      /*tp_as_number*/
 887     0,                                      /*tp_as_sequence*/
 888     0,                                      /*tp_as_mapping*/
 889     (hashfunc)0,                            /*tp_hash*/
 890     (ternaryfunc)0,                         /*tp_call*/
 891     (reprfunc)0,                                /*tp_str*/
 892     0,                                      /*tp_getattro*/
 893     0,                                      /*tp_setattro*/
 894     0,                                      /*tp_as_buffer*/
 895     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
 896         Py_TPFLAGS_HAVE_GC,                     /*tp_flags*/
 897     Reader_Type_doc,                        /*tp_doc*/
 898     (traverseproc)Reader_traverse,          /*tp_traverse*/
 899     (inquiry)Reader_clear,                  /*tp_clear*/
 900     0,                                      /*tp_richcompare*/
 901     0,                                      /*tp_weaklistoffset*/
 902     PyObject_SelfIter,                          /*tp_iter*/
 903     (getiterfunc)Reader_iternext,           /*tp_iternext*/
 904     Reader_methods,                         /*tp_methods*/
 905     Reader_memberlist,                      /*tp_members*/
 906     0,                                      /*tp_getset*/
 907
 908 };
 909
 910 static PyObject *
 911 csv_reader(PyObject *module, PyObject *args, PyObject *keyword_args)
 912 {
 913     PyObject * iterator, * dialect = NULL;
 914     ReaderObj * self = PyObject_GC_New(ReaderObj, &Reader_Type);
 915
 916     if (!self)
 917         return NULL;
 918
 919     self->dialect = NULL;
 920     self->fields = NULL;
 921     self->input_iter = NULL;
 922     self->field = NULL;
 923     self->field_size = 0;
 924     self->line_num = 0;
 925
 926     if (parse_reset(self) < 0) {
 927         Py_DECREF(self);
 928         return NULL;
 929     }
 930
 931     if (!PyArg_UnpackTuple(args, "", 1, 2, &iterator, &dialect)) {
 932         Py_DECREF(self);
 933         return NULL;
 934     }
 935     self->input_iter = PyObject_GetIter(iterator);
 936     if (self->input_iter == NULL) {
 937         PyErr_SetString(PyExc_TypeError,
 938                         "argument 1 must be an iterator");
 939         Py_DECREF(self);
 940         return NULL;
 941     }
 942     self->dialect = (DialectObj *)_call_dialect(dialect, keyword_args);
 943     if (self->dialect == NULL) {
 944         Py_DECREF(self);
 945         return NULL;
 946     }
 947
 948     PyObject_GC_Track(self);
 949     return (PyObject *)self;
 950 }
 951
 952 /*
 953  * WRITER
 954  */
 955 /* ---------------------------------------------------------------- */
 956 static void
 957 join_reset(WriterObj *self)
 958 {
 959     self->rec_len = 0;
 960     self->num_fields = 0;
 961 }
 962
 963 #define MEM_INCR 32768
 964
 965 /* Calculate new record length or append field to record.  Return new
 966  * record length.
 967  */
 968 static int
 969 join_append_data(WriterObj *self, char *field, int quote_empty,
 970                  int *quoted, int copy_phase)
 971 {
 972     DialectObj *dialect = self->dialect;
 973     int i, rec_len;
 974     char *lineterm;
 975
 976 #define ADDCH(c) \
 977     do {\
 978         if (copy_phase) \
 979             self->rec[rec_len] = c;\
 980         rec_len++;\
 981     } while(0)
 982
 983     lineterm = PyString_AsString(dialect->lineterminator);
 984     if (lineterm == NULL)
 985         return -1;
 986
 987     rec_len = self->rec_len;
 988
 989     /* If this is not the first field we need a field separator */
 990     if (self->num_fields > 0)
 991         ADDCH(dialect->delimiter);
 992
 993     /* Handle preceding quote */
 994     if (copy_phase && *quoted)
 995         ADDCH(dialect->quotechar);
 996
 997     /* Copy/count field data */
 998     for (i = 0;; i++) {
 999         char c = field[i];
1000         int want_escape = 0;
1001
1002         if (c == '\0')
1003             break;
1004
1005         if (c == dialect->delimiter ||
1006             c == dialect->escapechar ||
1007             c == dialect->quotechar ||
1008             strchr(lineterm, c)) {
1009             if (dialect->quoting == QUOTE_NONE)
1010                 want_escape = 1;
1011             else {
1012                 if (c == dialect->quotechar) {
1013                     if (dialect->doublequote)
1014                         ADDCH(dialect->quotechar);
1015                     else
1016                         want_escape = 1;
1017                 }
1018                 if (!want_escape)
1019                     *quoted = 1;
1020             }
1021             if (want_escape) {
1022                 if (!dialect->escapechar) {
1023                     PyErr_Format(error_obj,
1024                                  "need to escape, but no escapechar set");
1025                     return -1;
1026                 }
1027                 ADDCH(dialect->escapechar);
1028             }
1029         }
1030         /* Copy field character into record buffer.
1031          */
1032         ADDCH(c);
1033     }
1034
1035     /* If field is empty check if it needs to be quoted.
1036      */
1037     if (i == 0 && quote_empty) {
1038         if (dialect->quoting == QUOTE_NONE) {
1039             PyErr_Format(error_obj,
1040                          "single empty field record must be quoted");
1041             return -1;
1042         }
1043         else
1044             *quoted = 1;
1045     }
1046
1047     if (*quoted) {
1048         if (copy_phase)
1049             ADDCH(dialect->quotechar);
1050         else
1051             rec_len += 2;
1052     }
1053     return rec_len;
1054 #undef ADDCH
1055 }
1056
1057 static int
1058 join_check_rec_size(WriterObj *self, int rec_len)
1059 {
1060
1061     if (rec_len < 0 || rec_len > INT_MAX - MEM_INCR) {
1062         PyErr_NoMemory();
1063         return 0;
1064     }
1065
1066     if (rec_len > self->rec_size) {
1067         if (self->rec_size == 0) {
1068             self->rec_size = (rec_len / MEM_INCR + 1) * MEM_INCR;
1069             if (self->rec != NULL)
1070                 PyMem_Free(self->rec);
1071             self->rec = PyMem_Malloc(self->rec_size);
1072         }
1073         else {
1074             char *old_rec = self->rec;
1075
1076             self->rec_size = (rec_len / MEM_INCR + 1) * MEM_INCR;
1077             self->rec = PyMem_Realloc(self->rec, self->rec_size);
1078             if (self->rec == NULL)
1079                 PyMem_Free(old_rec);
1080         }
1081         if (self->rec == NULL) {
1082             PyErr_NoMemory();
1083             return 0;
1084         }
1085     }
1086     return 1;
1087 }
1088
1089 static int
1090 join_append(WriterObj *self, char *field, int *quoted, int quote_empty)
1091 {
1092     int rec_len;
1093
1094     rec_len = join_append_data(self, field, quote_empty, quoted, 0);
1095     if (rec_len < 0)
1096         return 0;
1097
1098     /* grow record buffer if necessary */
1099     if (!join_check_rec_size(self, rec_len))
1100         return 0;
1101
1102     self->rec_len = join_append_data(self, field, quote_empty, quoted, 1);
1103     self->num_fields++;
1104
1105     return 1;
1106 }
1107
1108 static int
1109 join_append_lineterminator(WriterObj *self)
1110 {
1111     int terminator_len;
1112     char *terminator;
1113
1114     terminator_len = PyString_Size(self->dialect->lineterminator);
1115     if (terminator_len == -1)
1116         return 0;
1117
1118     /* grow record buffer if necessary */
1119     if (!join_check_rec_size(self, self->rec_len + terminator_len))
1120         return 0;
1121
1122     terminator = PyString_AsString(self->dialect->lineterminator);
1123     if (terminator == NULL)
1124         return 0;
1125     memmove(self->rec + self->rec_len, terminator, terminator_len);
1126     self->rec_len += terminator_len;
1127
1128     return 1;
1129 }
1130
1131 PyDoc_STRVAR(csv_writerow_doc,
1132 "writerow(sequence)\n"
1133 "\n"
1134 "Construct and write a CSV record from a sequence of fields.  Non-string\n"
1135 "elements will be converted to string.");
1136
1137 static PyObject *
1138 csv_writerow(WriterObj *self, PyObject *seq)
1139 {
1140     DialectObj *dialect = self->dialect;
1141     int len, i;
1142
1143     if (!PySequence_Check(seq))
1144         return PyErr_Format(error_obj, "sequence expected");
1145
1146     len = PySequence_Length(seq);
1147     if (len < 0)
1148         return NULL;
1149
1150     /* Join all fields in internal buffer.
1151      */
1152     join_reset(self);
1153     for (i = 0; i < len; i++) {
1154         PyObject *field;
1155         int append_ok;
1156         int quoted;
1157
1158         field = PySequence_GetItem(seq, i);
1159         if (field == NULL)
1160             return NULL;
1161
1162         switch (dialect->quoting) {
1163         case QUOTE_NONNUMERIC:
1164             quoted = !PyNumber_Check(field);
1165             break;
1166         case QUOTE_ALL:
1167             quoted = 1;
1168             break;
1169         default:
1170             quoted = 0;
1171             break;
1172         }
1173
1174         if (PyString_Check(field)) {
1175             append_ok = join_append(self,
1176                                     PyString_AS_STRING(field),
1177                                     &quoted, len == 1);
1178             Py_DECREF(field);
1179         }
1180         else if (field == Py_None) {
1181             append_ok = join_append(self, "", &quoted, len == 1);
1182             Py_DECREF(field);
1183         }
1184         else {
1185             PyObject *str;
1186
1187             str = PyObject_Str(field);
1188             Py_DECREF(field);
1189             if (str == NULL)
1190                 return NULL;
1191
1192             append_ok = join_append(self, PyString_AS_STRING(str),
1193                                     &quoted, len == 1);
1194             Py_DECREF(str);
1195         }
1196         if (!append_ok)
1197             return NULL;
1198     }
1199
1200     /* Add line terminator.
1201      */
1202     if (!join_append_lineterminator(self))
1203         return 0;
1204
1205     return PyObject_CallFunction(self->writeline,
1206                                  "(s#)", self->rec, self->rec_len);
1207 }
1208
1209 PyDoc_STRVAR(csv_writerows_doc,
1210 "writerows(sequence of sequences)\n"
1211 "\n"
1212 "Construct and write a series of sequences to a csv file.  Non-string\n"
1213 "elements will be converted to string.");
1214
1215 static PyObject *
1216 csv_writerows(WriterObj *self, PyObject *seqseq)
1217 {
1218     PyObject *row_iter, *row_obj, *result;
1219
1220     row_iter = PyObject_GetIter(seqseq);
1221     if (row_iter == NULL) {
1222         PyErr_SetString(PyExc_TypeError,
1223                         "writerows() argument must be iterable");
1224         return NULL;
1225     }
1226     while ((row_obj = PyIter_Next(row_iter))) {
1227         result = csv_writerow(self, row_obj);
1228         Py_DECREF(row_obj);
1229         if (!result) {
1230             Py_DECREF(row_iter);
1231             return NULL;
1232         }
1233         else
1234              Py_DECREF(result);
1235     }
1236     Py_DECREF(row_iter);
1237     if (PyErr_Occurred())
1238         return NULL;
1239     Py_INCREF(Py_None);
1240     return Py_None;
1241 }
1242
1243 static struct PyMethodDef Writer_methods[] = {
1244     { "writerow", (PyCFunction)csv_writerow, METH_O, csv_writerow_doc},
1245     { "writerows", (PyCFunction)csv_writerows, METH_O, csv_writerows_doc},
1246     { NULL, NULL }
1247 };
1248
1249 #define W_OFF(x) offsetof(WriterObj, x)
1250
1251 static struct PyMemberDef Writer_memberlist[] = {
1252     { "dialect", T_OBJECT, W_OFF(dialect), RO },
1253     { NULL }
1254 };
1255
1256 static void
1257 Writer_dealloc(WriterObj *self)
1258 {
1259     PyObject_GC_UnTrack(self);
1260     Py_XDECREF(self->dialect);
1261     Py_XDECREF(self->writeline);
1262     if (self->rec != NULL)
1263         PyMem_Free(self->rec);
1264     PyObject_GC_Del(self);
1265 }
1266
1267 static int
1268 Writer_traverse(WriterObj *self, visitproc visit, void *arg)
1269 {
1270     Py_VISIT(self->dialect);
1271     Py_VISIT(self->writeline);
1272     return 0;
1273 }
1274
1275 static int
1276 Writer_clear(WriterObj *self)
1277 {
1278     Py_CLEAR(self->dialect);
1279     Py_CLEAR(self->writeline);
1280     return 0;
1281 }
1282
1283 PyDoc_STRVAR(Writer_Type_doc,
1284 "CSV writer\n"
1285 "\n"
1286 "Writer objects are responsible for generating tabular data\n"
1287 "in CSV format from sequence input.\n"
1288 );
1289
1290 static PyTypeObject Writer_Type = {
1291     PyVarObject_HEAD_INIT(NULL, 0)
1292     "_csv.writer",                          /*tp_name*/
1293     sizeof(WriterObj),                      /*tp_basicsize*/
1294     0,                                      /*tp_itemsize*/
1295     /* methods */
1296     (destructor)Writer_dealloc,             /*tp_dealloc*/
1297     (printfunc)0,                           /*tp_print*/
1298     (getattrfunc)0,                         /*tp_getattr*/
1299     (setattrfunc)0,                         /*tp_setattr*/
1300     (cmpfunc)0,                             /*tp_compare*/
1301     (reprfunc)0,                            /*tp_repr*/
1302     0,                                      /*tp_as_number*/
1303     0,                                      /*tp_as_sequence*/
1304     0,                                      /*tp_as_mapping*/
1305     (hashfunc)0,                            /*tp_hash*/
1306     (ternaryfunc)0,                         /*tp_call*/
1307     (reprfunc)0,                            /*tp_str*/
1308     0,                                      /*tp_getattro*/
1309     0,                                      /*tp_setattro*/
1310     0,                                      /*tp_as_buffer*/
1311     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
1312         Py_TPFLAGS_HAVE_GC,                     /*tp_flags*/
1313     Writer_Type_doc,
1314     (traverseproc)Writer_traverse,          /*tp_traverse*/
1315     (inquiry)Writer_clear,                  /*tp_clear*/
1316     0,                                      /*tp_richcompare*/
1317     0,                                      /*tp_weaklistoffset*/
1318     (getiterfunc)0,                         /*tp_iter*/
1319     (getiterfunc)0,                         /*tp_iternext*/
1320     Writer_methods,                         /*tp_methods*/
1321     Writer_memberlist,                      /*tp_members*/
1322     0,                                      /*tp_getset*/
1323 };
1324
1325 static PyObject *
1326 csv_writer(PyObject *module, PyObject *args, PyObject *keyword_args)
1327 {
1328     PyObject * output_file, * dialect = NULL;
1329     WriterObj * self = PyObject_GC_New(WriterObj, &Writer_Type);
1330
1331     if (!self)
1332         return NULL;
1333
1334     self->dialect = NULL;
1335     self->writeline = NULL;
1336
1337     self->rec = NULL;
1338     self->rec_size = 0;
1339     self->rec_len = 0;
1340     self->num_fields = 0;
1341
1342     if (!PyArg_UnpackTuple(args, "", 1, 2, &output_file, &dialect)) {
1343         Py_DECREF(self);
1344         return NULL;
1345     }
1346     self->writeline = PyObject_GetAttrString(output_file, "write");
1347     if (self->writeline == NULL || !PyCallable_Check(self->writeline)) {
1348         PyErr_SetString(PyExc_TypeError,
1349                         "argument 1 must have a \"write\" method");
1350         Py_DECREF(self);
1351         return NULL;
1352     }
1353     self->dialect = (DialectObj *)_call_dialect(dialect, keyword_args);
1354     if (self->dialect == NULL) {
1355         Py_DECREF(self);
1356         return NULL;
1357     }
1358     PyObject_GC_Track(self);
1359     return (PyObject *)self;
1360 }
1361
1362 /*
1363  * DIALECT REGISTRY
1364  */
1365 static PyObject *
1366 csv_list_dialects(PyObject *module, PyObject *args)
1367 {
1368     return PyDict_Keys(dialects);
1369 }
1370
1371 static PyObject *
1372 csv_register_dialect(PyObject *module, PyObject *args, PyObject *kwargs)
1373 {
1374     PyObject *name_obj, *dialect_obj = NULL;
1375     PyObject *dialect;
1376
1377     if (!PyArg_UnpackTuple(args, "", 1, 2, &name_obj, &dialect_obj))
1378         return NULL;
1379     if (!IS_BASESTRING(name_obj)) {
1380         PyErr_SetString(PyExc_TypeError,
1381                         "dialect name must be a string or unicode");
1382         return NULL;
1383     }
1384     dialect = _call_dialect(dialect_obj, kwargs);
1385     if (dialect == NULL)
1386         return NULL;
1387     if (PyDict_SetItem(dialects, name_obj, dialect) < 0) {
1388         Py_DECREF(dialect);
1389         return NULL;
1390     }
1391     Py_DECREF(dialect);
1392     Py_INCREF(Py_None);
1393     return Py_None;
1394 }
1395
1396 static PyObject *
1397 csv_unregister_dialect(PyObject *module, PyObject *name_obj)
1398 {
1399     if (PyDict_DelItem(dialects, name_obj) < 0)
1400         return PyErr_Format(error_obj, "unknown dialect");
1401     Py_INCREF(Py_None);
1402     return Py_None;
1403 }
1404
1405 static PyObject *
1406 csv_get_dialect(PyObject *module, PyObject *name_obj)
1407 {
1408     return get_dialect_from_registry(name_obj);
1409 }
1410
1411 static PyObject *
1412 csv_field_size_limit(PyObject *module, PyObject *args)
1413 {
1414     PyObject *new_limit = NULL;
1415     long old_limit = field_limit;
1416
1417     if (!PyArg_UnpackTuple(args, "field_size_limit", 0, 1, &new_limit))
1418         return NULL;
1419     if (new_limit != NULL) {
1420         if (!PyInt_Check(new_limit)) {
1421             PyErr_Format(PyExc_TypeError,
1422                          "limit must be an integer");
1423             return NULL;
1424         }
1425         field_limit = PyInt_AsLong(new_limit);
1426     }
1427     return PyInt_FromLong(old_limit);
1428 }
1429
1430 /*
1431  * MODULE
1432  */
1433
1434 PyDoc_STRVAR(csv_module_doc,
1435 "CSV parsing and writing.\n"
1436 "\n"
1437 "This module provides classes that assist in the reading and writing\n"
1438 "of Comma Separated Value (CSV) files, and implements the interface\n"
1439 "described by PEP 305.  Although many CSV files are simple to parse,\n"
1440 "the format is not formally defined by a stable specification and\n"
1441 "is subtle enough that parsing lines of a CSV file with something\n"
1442 "like line.split(\",\") is bound to fail.  The module supports three\n"
1443 "basic APIs: reading, writing, and registration of dialects.\n"
1444 "\n"
1445 "\n"
1446 "DIALECT REGISTRATION:\n"
1447 "\n"
1448 "Readers and writers support a dialect argument, which is a convenient\n"
1449 "handle on a group of settings.  When the dialect argument is a string,\n"
1450 "it identifies one of the dialects previously registered with the module.\n"
1451 "If it is a class or instance, the attributes of the argument are used as\n"
1452 "the settings for the reader or writer:\n"
1453 "\n"
1454 "    class excel:\n"
1455 "        delimiter = ','\n"
1456 "        quotechar = '\"'\n"
1457 "        escapechar = None\n"
1458 "        doublequote = True\n"
1459 "        skipinitialspace = False\n"
1460 "        lineterminator = '\\r\\n'\n"
1461 "        quoting = QUOTE_MINIMAL\n"
1462 "\n"
1463 "SETTINGS:\n"
1464 "\n"
1465 "    * quotechar - specifies a one-character string to use as the \n"
1466 "        quoting character.  It defaults to '\"'.\n"
1467 "    * delimiter - specifies a one-character string to use as the \n"
1468 "        field separator.  It defaults to ','.\n"
1469 "    * skipinitialspace - specifies how to interpret whitespace which\n"
1470 "        immediately follows a delimiter.  It defaults to False, which\n"
1471 "        means that whitespace immediately following a delimiter is part\n"
1472 "        of the following field.\n"
1473 "    * lineterminator -  specifies the character sequence which should \n"
1474 "        terminate rows.\n"
1475 "    * quoting - controls when quotes should be generated by the writer.\n"
1476 "        It can take on any of the following module constants:\n"
1477 "\n"
1478 "        csv.QUOTE_MINIMAL means only when required, for example, when a\n"
1479 "            field contains either the quotechar or the delimiter\n"
1480 "        csv.QUOTE_ALL means that quotes are always placed around fields.\n"
1481 "        csv.QUOTE_NONNUMERIC means that quotes are always placed around\n"
1482 "            fields which do not parse as integers or floating point\n"
1483 "            numbers.\n"
1484 "        csv.QUOTE_NONE means that quotes are never placed around fields.\n"
1485 "    * escapechar - specifies a one-character string used to escape \n"
1486 "        the delimiter when quoting is set to QUOTE_NONE.\n"
1487 "    * doublequote - controls the handling of quotes inside fields.  When\n"
1488 "        True, two consecutive quotes are interpreted as one during read,\n"
1489 "        and when writing, each quote character embedded in the data is\n"
1490 "        written as two quotes\n");
1491
1492 PyDoc_STRVAR(csv_reader_doc,
1493 "    csv_reader = reader(iterable [, dialect='excel']\n"
1494 "                        [optional keyword args])\n"
1495 "    for row in csv_reader:\n"
1496 "        process(row)\n"
1497 "\n"
1498 "The \"iterable\" argument can be any object that returns a line\n"
1499 "of input for each iteration, such as a file object or a list.  The\n"
1500 "optional \"dialect\" parameter is discussed below.  The function\n"
1501 "also accepts optional keyword arguments which override settings\n"
1502 "provided by the dialect.\n"
1503 "\n"
1504 "The returned object is an iterator.  Each iteration returns a row\n"
1505 "of the CSV file (which can span multiple input lines):\n");
1506
1507 PyDoc_STRVAR(csv_writer_doc,
1508 "    csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1509 "                            [optional keyword args])\n"
1510 "    for row in sequence:\n"
1511 "        csv_writer.writerow(row)\n"
1512 "\n"
1513 "    [or]\n"
1514 "\n"
1515 "    csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1516 "                            [optional keyword args])\n"
1517 "    csv_writer.writerows(rows)\n"
1518 "\n"
1519 "The \"fileobj\" argument can be any object that supports the file API.\n");
1520
1521 PyDoc_STRVAR(csv_list_dialects_doc,
1522 "Return a list of all know dialect names.\n"
1523 "    names = csv.list_dialects()");
1524
1525 PyDoc_STRVAR(csv_get_dialect_doc,
1526 "Return the dialect instance associated with name.\n"
1527 "    dialect = csv.get_dialect(name)");
1528
1529 PyDoc_STRVAR(csv_register_dialect_doc,
1530 "Create a mapping from a string name to a dialect class.\n"
1531 "    dialect = csv.register_dialect(name, dialect)");
1532
1533 PyDoc_STRVAR(csv_unregister_dialect_doc,
1534 "Delete the name/dialect mapping associated with a string name.\n"
1535 "    csv.unregister_dialect(name)");
1536
1537 PyDoc_STRVAR(csv_field_size_limit_doc,
1538 "Sets an upper limit on parsed fields.\n"
1539 "    csv.field_size_limit([limit])\n"
1540 "\n"
1541 "Returns old limit. If limit is not given, no new limit is set and\n"
1542 "the old limit is returned");
1543
1544 static struct PyMethodDef csv_methods[] = {
1545     { "reader", (PyCFunction)csv_reader,
1546         METH_VARARGS | METH_KEYWORDS, csv_reader_doc},
1547     { "writer", (PyCFunction)csv_writer,
1548         METH_VARARGS | METH_KEYWORDS, csv_writer_doc},
1549     { "list_dialects", (PyCFunction)csv_list_dialects,
1550         METH_NOARGS, csv_list_dialects_doc},
1551     { "register_dialect", (PyCFunction)csv_register_dialect,
1552         METH_VARARGS | METH_KEYWORDS, csv_register_dialect_doc},
1553     { "unregister_dialect", (PyCFunction)csv_unregister_dialect,
1554         METH_O, csv_unregister_dialect_doc},
1555     { "get_dialect", (PyCFunction)csv_get_dialect,
1556         METH_O, csv_get_dialect_doc},
1557     { "field_size_limit", (PyCFunction)csv_field_size_limit,
1558         METH_VARARGS, csv_field_size_limit_doc},
1559     { NULL, NULL }
1560 };
1561
1562 PyMODINIT_FUNC
1563 init_csv(void)
1564 {
1565     PyObject *module;
1566     StyleDesc *style;
1567
1568     if (PyType_Ready(&Dialect_Type) < 0)
1569         return;
1570
1571     if (PyType_Ready(&Reader_Type) < 0)
1572         return;
1573
1574     if (PyType_Ready(&Writer_Type) < 0)
1575         return;
1576
1577     /* Create the module and add the functions */
1578     module = Py_InitModule3("_csv", csv_methods, csv_module_doc);
1579     if (module == NULL)
1580         return;
1581
1582     /* Add version to the module. */
1583     if (PyModule_AddStringConstant(module, "__version__",
1584                                    MODULE_VERSION) == -1)
1585         return;
1586
1587     /* Add _dialects dictionary */
1588     dialects = PyDict_New();
1589     if (dialects == NULL)
1590         return;
1591     if (PyModule_AddObject(module, "_dialects", dialects))
1592         return;
1593
1594     /* Add quote styles into dictionary */
1595     for (style = quote_styles; style->name; style++) {
1596         if (PyModule_AddIntConstant(module, style->name,
1597                                     style->style) == -1)
1598             return;
1599     }
1600
1601     /* Add the Dialect type */
1602     Py_INCREF(&Dialect_Type);
1603     if (PyModule_AddObject(module, "Dialect", (PyObject *)&Dialect_Type))
1604         return;
1605
1606     /* Add the CSV exception object to the module. */
1607     error_obj = PyErr_NewException("_csv.Error", NULL, NULL);
1608     if (error_obj == NULL)
1609         return;
1610     PyModule_AddObject(module, "Error", error_obj);
1611 }