AppPkg/Applications/Python/Python-2.7.2/Objects/stringlib/string_format.h

   1 /*
   2     string_format.h -- implementation of string.format().
   3
   4     It uses the Objects/stringlib conventions, so that it can be
   5     compiled for both unicode and string objects.
   6 */
   7
   8
   9 /* Defines for Python 2.6 compatibility */
  10 #if PY_VERSION_HEX < 0x03000000
  11 #define PyLong_FromSsize_t _PyLong_FromSsize_t
  12 #endif
  13
  14 /* Defines for more efficiently reallocating the string buffer */
  15 #define INITIAL_SIZE_INCREMENT 100
  16 #define SIZE_MULTIPLIER 2
  17 #define MAX_SIZE_INCREMENT  3200
  18
  19
  20 /************************************************************************/
  21 /***********   Global data structures and forward declarations  *********/
  22 /************************************************************************/
  23
  24 /*
  25    A SubString consists of the characters between two string or
  26    unicode pointers.
  27 */
  28 typedef struct {
  29     STRINGLIB_CHAR *ptr;
  30     STRINGLIB_CHAR *end;
  31 } SubString;
  32
  33
  34 typedef enum {
  35     ANS_INIT,
  36     ANS_AUTO,
  37     ANS_MANUAL
  38 } AutoNumberState;   /* Keep track if we're auto-numbering fields */
  39
  40 /* Keeps track of our auto-numbering state, and which number field we're on */
  41 typedef struct {
  42     AutoNumberState an_state;
  43     int an_field_number;
  44 } AutoNumber;
  45
  46
  47 /* forward declaration for recursion */
  48 static PyObject *
  49 build_string(SubString *input, PyObject *args, PyObject *kwargs,
  50              int recursion_depth, AutoNumber *auto_number);
  51
  52
  53
  54 /************************************************************************/
  55 /**************************  Utility  functions  ************************/
  56 /************************************************************************/
  57
  58 static void
  59 AutoNumber_Init(AutoNumber *auto_number)
  60 {
  61     auto_number->an_state = ANS_INIT;
  62     auto_number->an_field_number = 0;
  63 }
  64
  65 /* fill in a SubString from a pointer and length */
  66 Py_LOCAL_INLINE(void)
  67 SubString_init(SubString *str, STRINGLIB_CHAR *p, Py_ssize_t len)
  68 {
  69     str->ptr = p;
  70     if (p == NULL)
  71         str->end = NULL;
  72     else
  73         str->end = str->ptr + len;
  74 }
  75
  76 /* return a new string.  if str->ptr is NULL, return None */
  77 Py_LOCAL_INLINE(PyObject *)
  78 SubString_new_object(SubString *str)
  79 {
  80     if (str->ptr == NULL) {
  81         Py_INCREF(Py_None);
  82         return Py_None;
  83     }
  84     return STRINGLIB_NEW(str->ptr, str->end - str->ptr);
  85 }
  86
  87 /* return a new string.  if str->ptr is NULL, return None */
  88 Py_LOCAL_INLINE(PyObject *)
  89 SubString_new_object_or_empty(SubString *str)
  90 {
  91     if (str->ptr == NULL) {
  92         return STRINGLIB_NEW(NULL, 0);
  93     }
  94     return STRINGLIB_NEW(str->ptr, str->end - str->ptr);
  95 }
  96
  97 /* Return 1 if an error has been detected switching between automatic
  98    field numbering and manual field specification, else return 0. Set
  99    ValueError on error. */
 100 static int
 101 autonumber_state_error(AutoNumberState state, int field_name_is_empty)
 102 {
 103     if (state == ANS_MANUAL) {
 104         if (field_name_is_empty) {
 105             PyErr_SetString(PyExc_ValueError, "cannot switch from "
 106                             "manual field specification to "
 107                             "automatic field numbering");
 108             return 1;
 109         }
 110     }
 111     else {
 112         if (!field_name_is_empty) {
 113             PyErr_SetString(PyExc_ValueError, "cannot switch from "
 114                             "automatic field numbering to "
 115                             "manual field specification");
 116             return 1;
 117         }
 118     }
 119     return 0;
 120 }
 121
 122
 123 /************************************************************************/
 124 /***********    Output string management functions       ****************/
 125 /************************************************************************/
 126
 127 typedef struct {
 128     STRINGLIB_CHAR *ptr;
 129     STRINGLIB_CHAR *end;
 130     PyObject *obj;
 131     Py_ssize_t size_increment;
 132 } OutputString;
 133
 134 /* initialize an OutputString object, reserving size characters */
 135 static int
 136 output_initialize(OutputString *output, Py_ssize_t size)
 137 {
 138     output->obj = STRINGLIB_NEW(NULL, size);
 139     if (output->obj == NULL)
 140         return 0;
 141
 142     output->ptr = STRINGLIB_STR(output->obj);
 143     output->end = STRINGLIB_LEN(output->obj) + output->ptr;
 144     output->size_increment = INITIAL_SIZE_INCREMENT;
 145
 146     return 1;
 147 }
 148
 149 /*
 150     output_extend reallocates the output string buffer.
 151     It returns a status:  0 for a failed reallocation,
 152     1 for success.
 153 */
 154
 155 static int
 156 output_extend(OutputString *output, Py_ssize_t count)
 157 {
 158     STRINGLIB_CHAR *startptr = STRINGLIB_STR(output->obj);
 159     Py_ssize_t curlen = output->ptr - startptr;
 160     Py_ssize_t maxlen = curlen + count + output->size_increment;
 161
 162     if (STRINGLIB_RESIZE(&output->obj, maxlen) < 0)
 163         return 0;
 164     startptr = STRINGLIB_STR(output->obj);
 165     output->ptr = startptr + curlen;
 166     output->end = startptr + maxlen;
 167     if (output->size_increment < MAX_SIZE_INCREMENT)
 168         output->size_increment *= SIZE_MULTIPLIER;
 169     return 1;
 170 }
 171
 172 /*
 173     output_data dumps characters into our output string
 174     buffer.
 175
 176     In some cases, it has to reallocate the string.
 177
 178     It returns a status:  0 for a failed reallocation,
 179     1 for success.
 180 */
 181 static int
 182 output_data(OutputString *output, const STRINGLIB_CHAR *s, Py_ssize_t count)
 183 {
 184     if ((count > output->end - output->ptr) && !output_extend(output, count))
 185         return 0;
 186     memcpy(output->ptr, s, count * sizeof(STRINGLIB_CHAR));
 187     output->ptr += count;
 188     return 1;
 189 }
 190
 191 /************************************************************************/
 192 /***********  Format string parsing -- integers and identifiers *********/
 193 /************************************************************************/
 194
 195 static Py_ssize_t
 196 get_integer(const SubString *str)
 197 {
 198     Py_ssize_t accumulator = 0;
 199     Py_ssize_t digitval;
 200     Py_ssize_t oldaccumulator;
 201     STRINGLIB_CHAR *p;
 202
 203     /* empty string is an error */
 204     if (str->ptr >= str->end)
 205         return -1;
 206
 207     for (p = str->ptr; p < str->end; p++) {
 208         digitval = STRINGLIB_TODECIMAL(*p);
 209         if (digitval < 0)
 210             return -1;
 211         /*
 212            This trick was copied from old Unicode format code.  It's cute,
 213            but would really suck on an old machine with a slow divide
 214            implementation.  Fortunately, in the normal case we do not
 215            expect too many digits.
 216         */
 217         oldaccumulator = accumulator;
 218         accumulator *= 10;
 219         if ((accumulator+10)/10 != oldaccumulator+1) {
 220             PyErr_Format(PyExc_ValueError,
 221                          "Too many decimal digits in format string");
 222             return -1;
 223         }
 224         accumulator += digitval;
 225     }
 226     return accumulator;
 227 }
 228
 229 /************************************************************************/
 230 /******** Functions to get field objects and specification strings ******/
 231 /************************************************************************/
 232
 233 /* do the equivalent of obj.name */
 234 static PyObject *
 235 getattr(PyObject *obj, SubString *name)
 236 {
 237     PyObject *newobj;
 238     PyObject *str = SubString_new_object(name);
 239     if (str == NULL)
 240         return NULL;
 241     newobj = PyObject_GetAttr(obj, str);
 242     Py_DECREF(str);
 243     return newobj;
 244 }
 245
 246 /* do the equivalent of obj[idx], where obj is a sequence */
 247 static PyObject *
 248 getitem_sequence(PyObject *obj, Py_ssize_t idx)
 249 {
 250     return PySequence_GetItem(obj, idx);
 251 }
 252
 253 /* do the equivalent of obj[idx], where obj is not a sequence */
 254 static PyObject *
 255 getitem_idx(PyObject *obj, Py_ssize_t idx)
 256 {
 257     PyObject *newobj;
 258     PyObject *idx_obj = PyLong_FromSsize_t(idx);
 259     if (idx_obj == NULL)
 260         return NULL;
 261     newobj = PyObject_GetItem(obj, idx_obj);
 262     Py_DECREF(idx_obj);
 263     return newobj;
 264 }
 265
 266 /* do the equivalent of obj[name] */
 267 static PyObject *
 268 getitem_str(PyObject *obj, SubString *name)
 269 {
 270     PyObject *newobj;
 271     PyObject *str = SubString_new_object(name);
 272     if (str == NULL)
 273         return NULL;
 274     newobj = PyObject_GetItem(obj, str);
 275     Py_DECREF(str);
 276     return newobj;
 277 }
 278
 279 typedef struct {
 280     /* the entire string we're parsing.  we assume that someone else
 281        is managing its lifetime, and that it will exist for the
 282        lifetime of the iterator.  can be empty */
 283     SubString str;
 284
 285     /* pointer to where we are inside field_name */
 286     STRINGLIB_CHAR *ptr;
 287 } FieldNameIterator;
 288
 289
 290 static int
 291 FieldNameIterator_init(FieldNameIterator *self, STRINGLIB_CHAR *ptr,
 292                        Py_ssize_t len)
 293 {
 294     SubString_init(&self->str, ptr, len);
 295     self->ptr = self->str.ptr;
 296     return 1;
 297 }
 298
 299 static int
 300 _FieldNameIterator_attr(FieldNameIterator *self, SubString *name)
 301 {
 302     STRINGLIB_CHAR c;
 303
 304     name->ptr = self->ptr;
 305
 306     /* return everything until '.' or '[' */
 307     while (self->ptr < self->str.end) {
 308         switch (c = *self->ptr++) {
 309         case '[':
 310         case '.':
 311             /* backup so that we this character will be seen next time */
 312             self->ptr--;
 313             break;
 314         default:
 315             continue;
 316         }
 317         break;
 318     }
 319     /* end of string is okay */
 320     name->end = self->ptr;
 321     return 1;
 322 }
 323
 324 static int
 325 _FieldNameIterator_item(FieldNameIterator *self, SubString *name)
 326 {
 327     int bracket_seen = 0;
 328     STRINGLIB_CHAR c;
 329
 330     name->ptr = self->ptr;
 331
 332     /* return everything until ']' */
 333     while (self->ptr < self->str.end) {
 334         switch (c = *self->ptr++) {
 335         case ']':
 336             bracket_seen = 1;
 337             break;
 338         default:
 339             continue;
 340         }
 341         break;
 342     }
 343     /* make sure we ended with a ']' */
 344     if (!bracket_seen) {
 345         PyErr_SetString(PyExc_ValueError, "Missing ']' in format string");
 346         return 0;
 347     }
 348
 349     /* end of string is okay */
 350     /* don't include the ']' */
 351     name->end = self->ptr-1;
 352     return 1;
 353 }
 354
 355 /* returns 0 on error, 1 on non-error termination, and 2 if it returns a value */
 356 static int
 357 FieldNameIterator_next(FieldNameIterator *self, int *is_attribute,
 358                        Py_ssize_t *name_idx, SubString *name)
 359 {
 360     /* check at end of input */
 361     if (self->ptr >= self->str.end)
 362         return 1;
 363
 364     switch (*self->ptr++) {
 365     case '.':
 366         *is_attribute = 1;
 367         if (_FieldNameIterator_attr(self, name) == 0)
 368             return 0;
 369         *name_idx = -1;
 370         break;
 371     case '[':
 372         *is_attribute = 0;
 373         if (_FieldNameIterator_item(self, name) == 0)
 374             return 0;
 375         *name_idx = get_integer(name);
 376         if (*name_idx == -1 && PyErr_Occurred())
 377             return 0;
 378         break;
 379     default:
 380         /* Invalid character follows ']' */
 381         PyErr_SetString(PyExc_ValueError, "Only '.' or '[' may "
 382                         "follow ']' in format field specifier");
 383         return 0;
 384     }
 385
 386     /* empty string is an error */
 387     if (name->ptr == name->end) {
 388         PyErr_SetString(PyExc_ValueError, "Empty attribute in format string");
 389         return 0;
 390     }
 391
 392     return 2;
 393 }
 394
 395
 396 /* input: field_name
 397    output: 'first' points to the part before the first '[' or '.'
 398            'first_idx' is -1 if 'first' is not an integer, otherwise
 399                        it's the value of first converted to an integer
 400            'rest' is an iterator to return the rest
 401 */
 402 static int
 403 field_name_split(STRINGLIB_CHAR *ptr, Py_ssize_t len, SubString *first,
 404                  Py_ssize_t *first_idx, FieldNameIterator *rest,
 405                  AutoNumber *auto_number)
 406 {
 407     STRINGLIB_CHAR c;
 408     STRINGLIB_CHAR *p = ptr;
 409     STRINGLIB_CHAR *end = ptr + len;
 410     int field_name_is_empty;
 411     int using_numeric_index;
 412
 413     /* find the part up until the first '.' or '[' */
 414     while (p < end) {
 415         switch (c = *p++) {
 416         case '[':
 417         case '.':
 418             /* backup so that we this character is available to the
 419                "rest" iterator */
 420             p--;
 421             break;
 422         default:
 423             continue;
 424         }
 425         break;
 426     }
 427
 428     /* set up the return values */
 429     SubString_init(first, ptr, p - ptr);
 430     FieldNameIterator_init(rest, p, end - p);
 431
 432     /* see if "first" is an integer, in which case it's used as an index */
 433     *first_idx = get_integer(first);
 434     if (*first_idx == -1 && PyErr_Occurred())
 435         return 0;
 436
 437     field_name_is_empty = first->ptr >= first->end;
 438
 439     /* If the field name is omitted or if we have a numeric index
 440        specified, then we're doing numeric indexing into args. */
 441     using_numeric_index = field_name_is_empty || *first_idx != -1;
 442
 443     /* We always get here exactly one time for each field we're
 444        processing. And we get here in field order (counting by left
 445        braces). So this is the perfect place to handle automatic field
 446        numbering if the field name is omitted. */
 447
 448     /* Check if we need to do the auto-numbering. It's not needed if
 449        we're called from string.Format routines, because it's handled
 450        in that class by itself. */
 451     if (auto_number) {
 452         /* Initialize our auto numbering state if this is the first
 453            time we're either auto-numbering or manually numbering. */
 454         if (auto_number->an_state == ANS_INIT && using_numeric_index)
 455             auto_number->an_state = field_name_is_empty ?
 456                 ANS_AUTO : ANS_MANUAL;
 457
 458         /* Make sure our state is consistent with what we're doing
 459            this time through. Only check if we're using a numeric
 460            index. */
 461         if (using_numeric_index)
 462             if (autonumber_state_error(auto_number->an_state,
 463                                        field_name_is_empty))
 464                 return 0;
 465         /* Zero length field means we want to do auto-numbering of the
 466            fields. */
 467         if (field_name_is_empty)
 468             *first_idx = (auto_number->an_field_number)++;
 469     }
 470
 471     return 1;
 472 }
 473
 474
 475 /*
 476     get_field_object returns the object inside {}, before the
 477     format_spec.  It handles getindex and getattr lookups and consumes
 478     the entire input string.
 479 */
 480 static PyObject *
 481 get_field_object(SubString *input, PyObject *args, PyObject *kwargs,
 482                  AutoNumber *auto_number)
 483 {
 484     PyObject *obj = NULL;
 485     int ok;
 486     int is_attribute;
 487     SubString name;
 488     SubString first;
 489     Py_ssize_t index;
 490     FieldNameIterator rest;
 491
 492     if (!field_name_split(input->ptr, input->end - input->ptr, &first,
 493                           &index, &rest, auto_number)) {
 494         goto error;
 495     }
 496
 497     if (index == -1) {
 498         /* look up in kwargs */
 499         PyObject *key = SubString_new_object(&first);
 500         if (key == NULL)
 501             goto error;
 502         if ((kwargs == NULL) || (obj = PyDict_GetItem(kwargs, key)) == NULL) {
 503             PyErr_SetObject(PyExc_KeyError, key);
 504             Py_DECREF(key);
 505             goto error;
 506         }
 507         Py_DECREF(key);
 508         Py_INCREF(obj);
 509     }
 510     else {
 511         /* look up in args */
 512         obj = PySequence_GetItem(args, index);
 513         if (obj == NULL)
 514             goto error;
 515     }
 516
 517     /* iterate over the rest of the field_name */
 518     while ((ok = FieldNameIterator_next(&rest, &is_attribute, &index,
 519                                         &name)) == 2) {
 520         PyObject *tmp;
 521
 522         if (is_attribute)
 523             /* getattr lookup "." */
 524             tmp = getattr(obj, &name);
 525         else
 526             /* getitem lookup "[]" */
 527             if (index == -1)
 528                 tmp = getitem_str(obj, &name);
 529             else
 530                 if (PySequence_Check(obj))
 531                     tmp = getitem_sequence(obj, index);
 532                 else
 533                     /* not a sequence */
 534                     tmp = getitem_idx(obj, index);
 535         if (tmp == NULL)
 536             goto error;
 537
 538         /* assign to obj */
 539         Py_DECREF(obj);
 540         obj = tmp;
 541     }
 542     /* end of iterator, this is the non-error case */
 543     if (ok == 1)
 544         return obj;
 545 error:
 546     Py_XDECREF(obj);
 547     return NULL;
 548 }
 549
 550 /************************************************************************/
 551 /*****************  Field rendering functions  **************************/
 552 /************************************************************************/
 553
 554 /*
 555     render_field() is the main function in this section.  It takes the
 556     field object and field specification string generated by
 557     get_field_and_spec, and renders the field into the output string.
 558
 559     render_field calls fieldobj.__format__(format_spec) method, and
 560     appends to the output.
 561 */
 562 static int
 563 render_field(PyObject *fieldobj, SubString *format_spec, OutputString *output)
 564 {
 565     int ok = 0;
 566     PyObject *result = NULL;
 567     PyObject *format_spec_object = NULL;
 568     PyObject *(*formatter)(PyObject *, STRINGLIB_CHAR *, Py_ssize_t) = NULL;
 569     STRINGLIB_CHAR* format_spec_start = format_spec->ptr ?
 570             format_spec->ptr : NULL;
 571     Py_ssize_t format_spec_len = format_spec->ptr ?
 572             format_spec->end - format_spec->ptr : 0;
 573
 574     /* If we know the type exactly, skip the lookup of __format__ and just
 575        call the formatter directly. */
 576 #if STRINGLIB_IS_UNICODE
 577     if (PyUnicode_CheckExact(fieldobj))
 578         formatter = _PyUnicode_FormatAdvanced;
 579     /* Unfortunately, there's a problem with checking for int, long,
 580        and float here.  If we're being included as unicode, their
 581        formatters expect string format_spec args.  For now, just skip
 582        this optimization for unicode.  This could be fixed, but it's a
 583        hassle. */
 584 #else
 585     if (PyString_CheckExact(fieldobj))
 586         formatter = _PyBytes_FormatAdvanced;
 587     else if (PyInt_CheckExact(fieldobj))
 588         formatter =_PyInt_FormatAdvanced;
 589     else if (PyLong_CheckExact(fieldobj))
 590         formatter =_PyLong_FormatAdvanced;
 591     else if (PyFloat_CheckExact(fieldobj))
 592         formatter = _PyFloat_FormatAdvanced;
 593 #endif
 594
 595     if (formatter) {
 596         /* we know exactly which formatter will be called when __format__ is
 597            looked up, so call it directly, instead. */
 598         result = formatter(fieldobj, format_spec_start, format_spec_len);
 599     }
 600     else {
 601         /* We need to create an object out of the pointers we have, because
 602            __format__ takes a string/unicode object for format_spec. */
 603         format_spec_object = STRINGLIB_NEW(format_spec_start,
 604                                            format_spec_len);
 605         if (format_spec_object == NULL)
 606             goto done;
 607
 608         result = PyObject_Format(fieldobj, format_spec_object);
 609     }
 610     if (result == NULL)
 611         goto done;
 612
 613 #if PY_VERSION_HEX >= 0x03000000
 614     assert(PyUnicode_Check(result));
 615 #else
 616     assert(PyString_Check(result) || PyUnicode_Check(result));
 617
 618     /* Convert result to our type.  We could be str, and result could
 619        be unicode */
 620     {
 621         PyObject *tmp = STRINGLIB_TOSTR(result);
 622         if (tmp == NULL)
 623             goto done;
 624         Py_DECREF(result);
 625         result = tmp;
 626     }
 627 #endif
 628
 629     ok = output_data(output,
 630                      STRINGLIB_STR(result), STRINGLIB_LEN(result));
 631 done:
 632     Py_XDECREF(format_spec_object);
 633     Py_XDECREF(result);
 634     return ok;
 635 }
 636
 637 static int
 638 parse_field(SubString *str, SubString *field_name, SubString *format_spec,
 639             STRINGLIB_CHAR *conversion)
 640 {
 641     /* Note this function works if the field name is zero length,
 642        which is good.  Zero length field names are handled later, in
 643        field_name_split. */
 644
 645     STRINGLIB_CHAR c = 0;
 646
 647     /* initialize these, as they may be empty */
 648     *conversion = '\0';
 649     SubString_init(format_spec, NULL, 0);
 650
 651     /* Search for the field name.  it's terminated by the end of
 652        the string, or a ':' or '!' */
 653     field_name->ptr = str->ptr;
 654     while (str->ptr < str->end) {
 655         switch (c = *(str->ptr++)) {
 656         case ':':
 657         case '!':
 658             break;
 659         default:
 660             continue;
 661         }
 662         break;
 663     }
 664
 665     if (c == '!' || c == ':') {
 666         /* we have a format specifier and/or a conversion */
 667         /* don't include the last character */
 668         field_name->end = str->ptr-1;
 669
 670         /* the format specifier is the rest of the string */
 671         format_spec->ptr = str->ptr;
 672         format_spec->end = str->end;
 673
 674         /* see if there's a conversion specifier */
 675         if (c == '!') {
 676             /* there must be another character present */
 677             if (format_spec->ptr >= format_spec->end) {
 678                 PyErr_SetString(PyExc_ValueError,
 679                                 "end of format while looking for conversion "
 680                                 "specifier");
 681                 return 0;
 682             }
 683             *conversion = *(format_spec->ptr++);
 684
 685             /* if there is another character, it must be a colon */
 686             if (format_spec->ptr < format_spec->end) {
 687                 c = *(format_spec->ptr++);
 688                 if (c != ':') {
 689                     PyErr_SetString(PyExc_ValueError,
 690                                     "expected ':' after format specifier");
 691                     return 0;
 692                 }
 693             }
 694         }
 695     }
 696     else
 697         /* end of string, there's no format_spec or conversion */
 698         field_name->end = str->ptr;
 699
 700     return 1;
 701 }
 702
 703 /************************************************************************/
 704 /******* Output string allocation and escape-to-markup processing  ******/
 705 /************************************************************************/
 706
 707 /* MarkupIterator breaks the string into pieces of either literal
 708    text, or things inside {} that need to be marked up.  it is
 709    designed to make it easy to wrap a Python iterator around it, for
 710    use with the Formatter class */
 711
 712 typedef struct {
 713     SubString str;
 714 } MarkupIterator;
 715
 716 static int
 717 MarkupIterator_init(MarkupIterator *self, STRINGLIB_CHAR *ptr, Py_ssize_t len)
 718 {
 719     SubString_init(&self->str, ptr, len);
 720     return 1;
 721 }
 722
 723 /* returns 0 on error, 1 on non-error termination, and 2 if it got a
 724    string (or something to be expanded) */
 725 static int
 726 MarkupIterator_next(MarkupIterator *self, SubString *literal,
 727                     int *field_present, SubString *field_name,
 728                     SubString *format_spec, STRINGLIB_CHAR *conversion,
 729                     int *format_spec_needs_expanding)
 730 {
 731     int at_end;
 732     STRINGLIB_CHAR c = 0;
 733     STRINGLIB_CHAR *start;
 734     int count;
 735     Py_ssize_t len;
 736     int markup_follows = 0;
 737
 738     /* initialize all of the output variables */
 739     SubString_init(literal, NULL, 0);
 740     SubString_init(field_name, NULL, 0);
 741     SubString_init(format_spec, NULL, 0);
 742     *conversion = '\0';
 743     *format_spec_needs_expanding = 0;
 744     *field_present = 0;
 745
 746     /* No more input, end of iterator.  This is the normal exit
 747        path. */
 748     if (self->str.ptr >= self->str.end)
 749         return 1;
 750
 751     start = self->str.ptr;
 752
 753     /* First read any literal text. Read until the end of string, an
 754        escaped '{' or '}', or an unescaped '{'.  In order to never
 755        allocate memory and so I can just pass pointers around, if
 756        there's an escaped '{' or '}' then we'll return the literal
 757        including the brace, but no format object.  The next time
 758        through, we'll return the rest of the literal, skipping past
 759        the second consecutive brace. */
 760     while (self->str.ptr < self->str.end) {
 761         switch (c = *(self->str.ptr++)) {
 762         case '{':
 763         case '}':
 764             markup_follows = 1;
 765             break;
 766         default:
 767             continue;
 768         }
 769         break;
 770     }
 771
 772     at_end = self->str.ptr >= self->str.end;
 773     len = self->str.ptr - start;
 774
 775     if ((c == '}') && (at_end || (c != *self->str.ptr))) {
 776         PyErr_SetString(PyExc_ValueError, "Single '}' encountered "
 777                         "in format string");
 778         return 0;
 779     }
 780     if (at_end && c == '{') {
 781         PyErr_SetString(PyExc_ValueError, "Single '{' encountered "
 782                         "in format string");
 783         return 0;
 784     }
 785     if (!at_end) {
 786         if (c == *self->str.ptr) {
 787             /* escaped } or {, skip it in the input.  there is no
 788                markup object following us, just this literal text */
 789             self->str.ptr++;
 790             markup_follows = 0;
 791         }
 792         else
 793             len--;
 794     }
 795
 796     /* record the literal text */
 797     literal->ptr = start;
 798     literal->end = start + len;
 799
 800     if (!markup_follows)
 801         return 2;
 802
 803     /* this is markup, find the end of the string by counting nested
 804        braces.  note that this prohibits escaped braces, so that
 805        format_specs cannot have braces in them. */
 806     *field_present = 1;
 807     count = 1;
 808
 809     start = self->str.ptr;
 810
 811     /* we know we can't have a zero length string, so don't worry
 812        about that case */
 813     while (self->str.ptr < self->str.end) {
 814         switch (c = *(self->str.ptr++)) {
 815         case '{':
 816             /* the format spec needs to be recursively expanded.
 817                this is an optimization, and not strictly needed */
 818             *format_spec_needs_expanding = 1;
 819             count++;
 820             break;
 821         case '}':
 822             count--;
 823             if (count <= 0) {
 824                 /* we're done.  parse and get out */
 825                 SubString s;
 826
 827                 SubString_init(&s, start, self->str.ptr - 1 - start);
 828                 if (parse_field(&s, field_name, format_spec, conversion) == 0)
 829                     return 0;
 830
 831                 /* success */
 832                 return 2;
 833             }
 834             break;
 835         }
 836     }
 837
 838     /* end of string while searching for matching '}' */
 839     PyErr_SetString(PyExc_ValueError, "unmatched '{' in format");
 840     return 0;
 841 }
 842
 843
 844 /* do the !r or !s conversion on obj */
 845 static PyObject *
 846 do_conversion(PyObject *obj, STRINGLIB_CHAR conversion)
 847 {
 848     /* XXX in pre-3.0, do we need to convert this to unicode, since it
 849        might have returned a string? */
 850     switch (conversion) {
 851     case 'r':
 852         return PyObject_Repr(obj);
 853     case 's':
 854         return STRINGLIB_TOSTR(obj);
 855     default:
 856         if (conversion > 32 && conversion < 127) {
 857                 /* It's the ASCII subrange; casting to char is safe
 858                    (assuming the execution character set is an ASCII
 859                    superset). */
 860                 PyErr_Format(PyExc_ValueError,
 861                      "Unknown conversion specifier %c",
 862                      (char)conversion);
 863         } else
 864                 PyErr_Format(PyExc_ValueError,
 865                      "Unknown conversion specifier \\x%x",
 866                      (unsigned int)conversion);
 867         return NULL;
 868     }
 869 }
 870
 871 /* given:
 872
 873    {field_name!conversion:format_spec}
 874
 875    compute the result and write it to output.
 876    format_spec_needs_expanding is an optimization.  if it's false,
 877    just output the string directly, otherwise recursively expand the
 878    format_spec string.
 879
 880    field_name is allowed to be zero length, in which case we
 881    are doing auto field numbering.
 882 */
 883
 884 static int
 885 output_markup(SubString *field_name, SubString *format_spec,
 886               int format_spec_needs_expanding, STRINGLIB_CHAR conversion,
 887               OutputString *output, PyObject *args, PyObject *kwargs,
 888               int recursion_depth, AutoNumber *auto_number)
 889 {
 890     PyObject *tmp = NULL;
 891     PyObject *fieldobj = NULL;
 892     SubString expanded_format_spec;
 893     SubString *actual_format_spec;
 894     int result = 0;
 895
 896     /* convert field_name to an object */
 897     fieldobj = get_field_object(field_name, args, kwargs, auto_number);
 898     if (fieldobj == NULL)
 899         goto done;
 900
 901     if (conversion != '\0') {
 902         tmp = do_conversion(fieldobj, conversion);
 903         if (tmp == NULL)
 904             goto done;
 905
 906         /* do the assignment, transferring ownership: fieldobj = tmp */
 907         Py_DECREF(fieldobj);
 908         fieldobj = tmp;
 909         tmp = NULL;
 910     }
 911
 912     /* if needed, recurively compute the format_spec */
 913     if (format_spec_needs_expanding) {
 914         tmp = build_string(format_spec, args, kwargs, recursion_depth-1,
 915                            auto_number);
 916         if (tmp == NULL)
 917             goto done;
 918
 919         /* note that in the case we're expanding the format string,
 920            tmp must be kept around until after the call to
 921            render_field. */
 922         SubString_init(&expanded_format_spec,
 923                        STRINGLIB_STR(tmp), STRINGLIB_LEN(tmp));
 924         actual_format_spec = &expanded_format_spec;
 925     }
 926     else
 927         actual_format_spec = format_spec;
 928
 929     if (render_field(fieldobj, actual_format_spec, output) == 0)
 930         goto done;
 931
 932     result = 1;
 933
 934 done:
 935     Py_XDECREF(fieldobj);
 936     Py_XDECREF(tmp);
 937
 938     return result;
 939 }
 940
 941 /*
 942     do_markup is the top-level loop for the format() method.  It
 943     searches through the format string for escapes to markup codes, and
 944     calls other functions to move non-markup text to the output,
 945     and to perform the markup to the output.
 946 */
 947 static int
 948 do_markup(SubString *input, PyObject *args, PyObject *kwargs,
 949           OutputString *output, int recursion_depth, AutoNumber *auto_number)
 950 {
 951     MarkupIterator iter;
 952     int format_spec_needs_expanding;
 953     int result;
 954     int field_present;
 955     SubString literal;
 956     SubString field_name;
 957     SubString format_spec;
 958     STRINGLIB_CHAR conversion;
 959
 960     MarkupIterator_init(&iter, input->ptr, input->end - input->ptr);
 961     while ((result = MarkupIterator_next(&iter, &literal, &field_present,
 962                                          &field_name, &format_spec,
 963                                          &conversion,
 964                                          &format_spec_needs_expanding)) == 2) {
 965         if (!output_data(output, literal.ptr, literal.end - literal.ptr))
 966             return 0;
 967         if (field_present)
 968             if (!output_markup(&field_name, &format_spec,
 969                                format_spec_needs_expanding, conversion, output,
 970                                args, kwargs, recursion_depth, auto_number))
 971                 return 0;
 972     }
 973     return result;
 974 }
 975
 976
 977 /*
 978     build_string allocates the output string and then
 979     calls do_markup to do the heavy lifting.
 980 */
 981 static PyObject *
 982 build_string(SubString *input, PyObject *args, PyObject *kwargs,
 983              int recursion_depth, AutoNumber *auto_number)
 984 {
 985     OutputString output;
 986     PyObject *result = NULL;
 987     Py_ssize_t count;
 988
 989     output.obj = NULL; /* needed so cleanup code always works */
 990
 991     /* check the recursion level */
 992     if (recursion_depth <= 0) {
 993         PyErr_SetString(PyExc_ValueError,
 994                         "Max string recursion exceeded");
 995         goto done;
 996     }
 997
 998     /* initial size is the length of the format string, plus the size
 999        increment.  seems like a reasonable default */
1000     if (!output_initialize(&output,
1001                            input->end - input->ptr +
1002                            INITIAL_SIZE_INCREMENT))
1003         goto done;
1004
1005     if (!do_markup(input, args, kwargs, &output, recursion_depth,
1006                    auto_number)) {
1007         goto done;
1008     }
1009
1010     count = output.ptr - STRINGLIB_STR(output.obj);
1011     if (STRINGLIB_RESIZE(&output.obj, count) < 0) {
1012         goto done;
1013     }
1014
1015     /* transfer ownership to result */
1016     result = output.obj;
1017     output.obj = NULL;
1018
1019 done:
1020     Py_XDECREF(output.obj);
1021     return result;
1022 }
1023
1024 /************************************************************************/
1025 /*********** main routine ***********************************************/
1026 /************************************************************************/
1027
1028 /* this is the main entry point */
1029 static PyObject *
1030 do_string_format(PyObject *self, PyObject *args, PyObject *kwargs)
1031 {
1032     SubString input;
1033
1034     /* PEP 3101 says only 2 levels, so that
1035        "{0:{1}}".format('abc', 's')            # works
1036        "{0:{1:{2}}}".format('abc', 's', '')    # fails
1037     */
1038     int recursion_depth = 2;
1039
1040     AutoNumber auto_number;
1041
1042     AutoNumber_Init(&auto_number);
1043     SubString_init(&input, STRINGLIB_STR(self), STRINGLIB_LEN(self));
1044     return build_string(&input, args, kwargs, recursion_depth, &auto_number);
1045 }
1046
1047
1048
1049 /************************************************************************/
1050 /*********** formatteriterator ******************************************/
1051 /************************************************************************/
1052
1053 /* This is used to implement string.Formatter.vparse().  It exists so
1054    Formatter can share code with the built in unicode.format() method.
1055    It's really just a wrapper around MarkupIterator that is callable
1056    from Python. */
1057
1058 typedef struct {
1059     PyObject_HEAD
1060
1061     STRINGLIB_OBJECT *str;
1062
1063     MarkupIterator it_markup;
1064 } formatteriterobject;
1065
1066 static void
1067 formatteriter_dealloc(formatteriterobject *it)
1068 {
1069     Py_XDECREF(it->str);
1070     PyObject_FREE(it);
1071 }
1072
1073 /* returns a tuple:
1074    (literal, field_name, format_spec, conversion)
1075
1076    literal is any literal text to output.  might be zero length
1077    field_name is the string before the ':'.  might be None
1078    format_spec is the string after the ':'.  mibht be None
1079    conversion is either None, or the string after the '!'
1080 */
1081 static PyObject *
1082 formatteriter_next(formatteriterobject *it)
1083 {
1084     SubString literal;
1085     SubString field_name;
1086     SubString format_spec;
1087     STRINGLIB_CHAR conversion;
1088     int format_spec_needs_expanding;
1089     int field_present;
1090     int result = MarkupIterator_next(&it->it_markup, &literal, &field_present,
1091                                      &field_name, &format_spec, &conversion,
1092                                      &format_spec_needs_expanding);
1093
1094     /* all of the SubString objects point into it->str, so no
1095        memory management needs to be done on them */
1096     assert(0 <= result && result <= 2);
1097     if (result == 0 || result == 1)
1098         /* if 0, error has already been set, if 1, iterator is empty */
1099         return NULL;
1100     else {
1101         PyObject *literal_str = NULL;
1102         PyObject *field_name_str = NULL;
1103         PyObject *format_spec_str = NULL;
1104         PyObject *conversion_str = NULL;
1105         PyObject *tuple = NULL;
1106
1107         literal_str = SubString_new_object(&literal);
1108         if (literal_str == NULL)
1109             goto done;
1110
1111         field_name_str = SubString_new_object(&field_name);
1112         if (field_name_str == NULL)
1113             goto done;
1114
1115         /* if field_name is non-zero length, return a string for
1116            format_spec (even if zero length), else return None */
1117         format_spec_str = (field_present ?
1118                            SubString_new_object_or_empty :
1119                            SubString_new_object)(&format_spec);
1120         if (format_spec_str == NULL)
1121             goto done;
1122
1123         /* if the conversion is not specified, return a None,
1124            otherwise create a one length string with the conversion
1125            character */
1126         if (conversion == '\0') {
1127             conversion_str = Py_None;
1128             Py_INCREF(conversion_str);
1129         }
1130         else
1131             conversion_str = STRINGLIB_NEW(&conversion, 1);
1132         if (conversion_str == NULL)
1133             goto done;
1134
1135         tuple = PyTuple_Pack(4, literal_str, field_name_str, format_spec_str,
1136                              conversion_str);
1137     done:
1138         Py_XDECREF(literal_str);
1139         Py_XDECREF(field_name_str);
1140         Py_XDECREF(format_spec_str);
1141         Py_XDECREF(conversion_str);
1142         return tuple;
1143     }
1144 }
1145
1146 static PyMethodDef formatteriter_methods[] = {
1147     {NULL,              NULL}           /* sentinel */
1148 };
1149
1150 static PyTypeObject PyFormatterIter_Type = {
1151     PyVarObject_HEAD_INIT(&PyType_Type, 0)
1152     "formatteriterator",                /* tp_name */
1153     sizeof(formatteriterobject),        /* tp_basicsize */
1154     0,                                  /* tp_itemsize */
1155     /* methods */
1156     (destructor)formatteriter_dealloc,  /* tp_dealloc */
1157     0,                                  /* tp_print */
1158     0,                                  /* tp_getattr */
1159     0,                                  /* tp_setattr */
1160     0,                                  /* tp_compare */
1161     0,                                  /* tp_repr */
1162     0,                                  /* tp_as_number */
1163     0,                                  /* tp_as_sequence */
1164     0,                                  /* tp_as_mapping */
1165     0,                                  /* tp_hash */
1166     0,                                  /* tp_call */
1167     0,                                  /* tp_str */
1168     PyObject_GenericGetAttr,            /* tp_getattro */
1169     0,                                  /* tp_setattro */
1170     0,                                  /* tp_as_buffer */
1171     Py_TPFLAGS_DEFAULT,                 /* tp_flags */
1172     0,                                  /* tp_doc */
1173     0,                                  /* tp_traverse */
1174     0,                                  /* tp_clear */
1175     0,                                  /* tp_richcompare */
1176     0,                                  /* tp_weaklistoffset */
1177     PyObject_SelfIter,                  /* tp_iter */
1178     (iternextfunc)formatteriter_next,   /* tp_iternext */
1179     formatteriter_methods,              /* tp_methods */
1180     0,
1181 };
1182
1183 /* unicode_formatter_parser is used to implement
1184    string.Formatter.vformat.  it parses a string and returns tuples
1185    describing the parsed elements.  It's a wrapper around
1186    stringlib/string_format.h's MarkupIterator */
1187 static PyObject *
1188 formatter_parser(STRINGLIB_OBJECT *self)
1189 {
1190     formatteriterobject *it;
1191
1192     it = PyObject_New(formatteriterobject, &PyFormatterIter_Type);
1193     if (it == NULL)
1194         return NULL;
1195
1196     /* take ownership, give the object to the iterator */
1197     Py_INCREF(self);
1198     it->str = self;
1199
1200     /* initialize the contained MarkupIterator */
1201     MarkupIterator_init(&it->it_markup,
1202                         STRINGLIB_STR(self),
1203                         STRINGLIB_LEN(self));
1204
1205     return (PyObject *)it;
1206 }
1207
1208
1209 /************************************************************************/
1210 /*********** fieldnameiterator ******************************************/
1211 /************************************************************************/
1212
1213
1214 /* This is used to implement string.Formatter.vparse().  It parses the
1215    field name into attribute and item values.  It's a Python-callable
1216    wrapper around FieldNameIterator */
1217
1218 typedef struct {
1219     PyObject_HEAD
1220
1221     STRINGLIB_OBJECT *str;
1222
1223     FieldNameIterator it_field;
1224 } fieldnameiterobject;
1225
1226 static void
1227 fieldnameiter_dealloc(fieldnameiterobject *it)
1228 {
1229     Py_XDECREF(it->str);
1230     PyObject_FREE(it);
1231 }
1232
1233 /* returns a tuple:
1234    (is_attr, value)
1235    is_attr is true if we used attribute syntax (e.g., '.foo')
1236               false if we used index syntax (e.g., '[foo]')
1237    value is an integer or string
1238 */
1239 static PyObject *
1240 fieldnameiter_next(fieldnameiterobject *it)
1241 {
1242     int result;
1243     int is_attr;
1244     Py_ssize_t idx;
1245     SubString name;
1246
1247     result = FieldNameIterator_next(&it->it_field, &is_attr,
1248                                     &idx, &name);
1249     if (result == 0 || result == 1)
1250         /* if 0, error has already been set, if 1, iterator is empty */
1251         return NULL;
1252     else {
1253         PyObject* result = NULL;
1254         PyObject* is_attr_obj = NULL;
1255         PyObject* obj = NULL;
1256
1257         is_attr_obj = PyBool_FromLong(is_attr);
1258         if (is_attr_obj == NULL)
1259             goto done;
1260
1261         /* either an integer or a string */
1262         if (idx != -1)
1263             obj = PyLong_FromSsize_t(idx);
1264         else
1265             obj = SubString_new_object(&name);
1266         if (obj == NULL)
1267             goto done;
1268
1269         /* return a tuple of values */
1270         result = PyTuple_Pack(2, is_attr_obj, obj);
1271
1272     done:
1273         Py_XDECREF(is_attr_obj);
1274         Py_XDECREF(obj);
1275         return result;
1276     }
1277 }
1278
1279 static PyMethodDef fieldnameiter_methods[] = {
1280     {NULL,              NULL}           /* sentinel */
1281 };
1282
1283 static PyTypeObject PyFieldNameIter_Type = {
1284     PyVarObject_HEAD_INIT(&PyType_Type, 0)
1285     "fieldnameiterator",                /* tp_name */
1286     sizeof(fieldnameiterobject),        /* tp_basicsize */
1287     0,                                  /* tp_itemsize */
1288     /* methods */
1289     (destructor)fieldnameiter_dealloc,  /* tp_dealloc */
1290     0,                                  /* tp_print */
1291     0,                                  /* tp_getattr */
1292     0,                                  /* tp_setattr */
1293     0,                                  /* tp_compare */
1294     0,                                  /* tp_repr */
1295     0,                                  /* tp_as_number */
1296     0,                                  /* tp_as_sequence */
1297     0,                                  /* tp_as_mapping */
1298     0,                                  /* tp_hash */
1299     0,                                  /* tp_call */
1300     0,                                  /* tp_str */
1301     PyObject_GenericGetAttr,            /* tp_getattro */
1302     0,                                  /* tp_setattro */
1303     0,                                  /* tp_as_buffer */
1304     Py_TPFLAGS_DEFAULT,                 /* tp_flags */
1305     0,                                  /* tp_doc */
1306     0,                                  /* tp_traverse */
1307     0,                                  /* tp_clear */
1308     0,                                  /* tp_richcompare */
1309     0,                                  /* tp_weaklistoffset */
1310     PyObject_SelfIter,                  /* tp_iter */
1311     (iternextfunc)fieldnameiter_next,   /* tp_iternext */
1312     fieldnameiter_methods,              /* tp_methods */
1313     0};
1314
1315 /* unicode_formatter_field_name_split is used to implement
1316    string.Formatter.vformat.  it takes an PEP 3101 "field name", and
1317    returns a tuple of (first, rest): "first", the part before the
1318    first '.' or '['; and "rest", an iterator for the rest of the field
1319    name.  it's a wrapper around stringlib/string_format.h's
1320    field_name_split.  The iterator it returns is a
1321    FieldNameIterator */
1322 static PyObject *
1323 formatter_field_name_split(STRINGLIB_OBJECT *self)
1324 {
1325     SubString first;
1326     Py_ssize_t first_idx;
1327     fieldnameiterobject *it;
1328
1329     PyObject *first_obj = NULL;
1330     PyObject *result = NULL;
1331
1332     it = PyObject_New(fieldnameiterobject, &PyFieldNameIter_Type);
1333     if (it == NULL)
1334         return NULL;
1335
1336     /* take ownership, give the object to the iterator.  this is
1337        just to keep the field_name alive */
1338     Py_INCREF(self);
1339     it->str = self;
1340
1341     /* Pass in auto_number = NULL. We'll return an empty string for
1342        first_obj in that case. */
1343     if (!field_name_split(STRINGLIB_STR(self),
1344                           STRINGLIB_LEN(self),
1345                           &first, &first_idx, &it->it_field, NULL))
1346         goto done;
1347
1348     /* first becomes an integer, if possible; else a string */
1349     if (first_idx != -1)
1350         first_obj = PyLong_FromSsize_t(first_idx);
1351     else
1352         /* convert "first" into a string object */
1353         first_obj = SubString_new_object(&first);
1354     if (first_obj == NULL)
1355         goto done;
1356
1357     /* return a tuple of values */
1358     result = PyTuple_Pack(2, first_obj, it);
1359
1360 done:
1361     Py_XDECREF(it);
1362     Py_XDECREF(first_obj);
1363     return result;
1364 }