AppPkg/Applications/Python/Python-2.7.10/Objects/stringlib/string_format.h

   1 /*
   2     string_format.h -- implementation of string.format().
   3
   4     It uses the Objects/stringlib conventions, so that it can be
   5     compiled for both unicode and string objects.
   6 */
   7
   8
   9 /* Defines for Python 2.6 compatibility */
  10 #if PY_VERSION_HEX < 0x03000000
  11 #define PyLong_FromSsize_t _PyLong_FromSsize_t
  12 #endif
  13
  14 /* Defines for more efficiently reallocating the string buffer */
  15 #define INITIAL_SIZE_INCREMENT 100
  16 #define SIZE_MULTIPLIER 2
  17 #define MAX_SIZE_INCREMENT  3200
  18
  19
  20 /************************************************************************/
  21 /***********   Global data structures and forward declarations  *********/
  22 /************************************************************************/
  23
  24 /*
  25    A SubString consists of the characters between two string or
  26    unicode pointers.
  27 */
  28 typedef struct {
  29     STRINGLIB_CHAR *ptr;
  30     STRINGLIB_CHAR *end;
  31 } SubString;
  32
  33
  34 typedef enum {
  35     ANS_INIT,
  36     ANS_AUTO,
  37     ANS_MANUAL
  38 } AutoNumberState;   /* Keep track if we're auto-numbering fields */
  39
  40 /* Keeps track of our auto-numbering state, and which number field we're on */
  41 typedef struct {
  42     AutoNumberState an_state;
  43     int an_field_number;
  44 } AutoNumber;
  45
  46
  47 /* forward declaration for recursion */
  48 static PyObject *
  49 build_string(SubString *input, PyObject *args, PyObject *kwargs,
  50              int recursion_depth, AutoNumber *auto_number);
  51
  52
  53
  54 /************************************************************************/
  55 /**************************  Utility  functions  ************************/
  56 /************************************************************************/
  57
  58 static void
  59 AutoNumber_Init(AutoNumber *auto_number)
  60 {
  61     auto_number->an_state = ANS_INIT;
  62     auto_number->an_field_number = 0;
  63 }
  64
  65 /* fill in a SubString from a pointer and length */
  66 Py_LOCAL_INLINE(void)
  67 SubString_init(SubString *str, STRINGLIB_CHAR *p, Py_ssize_t len)
  68 {
  69     str->ptr = p;
  70     if (p == NULL)
  71         str->end = NULL;
  72     else
  73         str->end = str->ptr + len;
  74 }
  75
  76 /* return a new string.  if str->ptr is NULL, return None */
  77 Py_LOCAL_INLINE(PyObject *)
  78 SubString_new_object(SubString *str)
  79 {
  80     if (str->ptr == NULL) {
  81         Py_INCREF(Py_None);
  82         return Py_None;
  83     }
  84     return STRINGLIB_NEW(str->ptr, str->end - str->ptr);
  85 }
  86
  87 /* return a new string.  if str->ptr is NULL, return None */
  88 Py_LOCAL_INLINE(PyObject *)
  89 SubString_new_object_or_empty(SubString *str)
  90 {
  91     if (str->ptr == NULL) {
  92         return STRINGLIB_NEW(NULL, 0);
  93     }
  94     return STRINGLIB_NEW(str->ptr, str->end - str->ptr);
  95 }
  96
  97 /* Return 1 if an error has been detected switching between automatic
  98    field numbering and manual field specification, else return 0. Set
  99    ValueError on error. */
 100 static int
 101 autonumber_state_error(AutoNumberState state, int field_name_is_empty)
 102 {
 103     if (state == ANS_MANUAL) {
 104         if (field_name_is_empty) {
 105             PyErr_SetString(PyExc_ValueError, "cannot switch from "
 106                             "manual field specification to "
 107                             "automatic field numbering");
 108             return 1;
 109         }
 110     }
 111     else {
 112         if (!field_name_is_empty) {
 113             PyErr_SetString(PyExc_ValueError, "cannot switch from "
 114                             "automatic field numbering to "
 115                             "manual field specification");
 116             return 1;
 117         }
 118     }
 119     return 0;
 120 }
 121
 122
 123 /************************************************************************/
 124 /***********    Output string management functions       ****************/
 125 /************************************************************************/
 126
 127 typedef struct {
 128     STRINGLIB_CHAR *ptr;
 129     STRINGLIB_CHAR *end;
 130     PyObject *obj;
 131     Py_ssize_t size_increment;
 132 } OutputString;
 133
 134 /* initialize an OutputString object, reserving size characters */
 135 static int
 136 output_initialize(OutputString *output, Py_ssize_t size)
 137 {
 138     output->obj = STRINGLIB_NEW(NULL, size);
 139     if (output->obj == NULL)
 140         return 0;
 141
 142     output->ptr = STRINGLIB_STR(output->obj);
 143     output->end = STRINGLIB_LEN(output->obj) + output->ptr;
 144     output->size_increment = INITIAL_SIZE_INCREMENT;
 145
 146     return 1;
 147 }
 148
 149 /*
 150     output_extend reallocates the output string buffer.
 151     It returns a status:  0 for a failed reallocation,
 152     1 for success.
 153 */
 154
 155 static int
 156 output_extend(OutputString *output, Py_ssize_t count)
 157 {
 158     STRINGLIB_CHAR *startptr = STRINGLIB_STR(output->obj);
 159     Py_ssize_t curlen = output->ptr - startptr;
 160     Py_ssize_t maxlen = curlen + count + output->size_increment;
 161
 162     if (STRINGLIB_RESIZE(&output->obj, maxlen) < 0)
 163         return 0;
 164     startptr = STRINGLIB_STR(output->obj);
 165     output->ptr = startptr + curlen;
 166     output->end = startptr + maxlen;
 167     if (output->size_increment < MAX_SIZE_INCREMENT)
 168         output->size_increment *= SIZE_MULTIPLIER;
 169     return 1;
 170 }
 171
 172 /*
 173     output_data dumps characters into our output string
 174     buffer.
 175
 176     In some cases, it has to reallocate the string.
 177
 178     It returns a status:  0 for a failed reallocation,
 179     1 for success.
 180 */
 181 static int
 182 output_data(OutputString *output, const STRINGLIB_CHAR *s, Py_ssize_t count)
 183 {
 184     if ((count > output->end - output->ptr) && !output_extend(output, count))
 185         return 0;
 186     memcpy(output->ptr, s, count * sizeof(STRINGLIB_CHAR));
 187     output->ptr += count;
 188     return 1;
 189 }
 190
 191 /************************************************************************/
 192 /***********  Format string parsing -- integers and identifiers *********/
 193 /************************************************************************/
 194
 195 static Py_ssize_t
 196 get_integer(const SubString *str)
 197 {
 198     Py_ssize_t accumulator = 0;
 199     Py_ssize_t digitval;
 200     STRINGLIB_CHAR *p;
 201
 202     /* empty string is an error */
 203     if (str->ptr >= str->end)
 204         return -1;
 205
 206     for (p = str->ptr; p < str->end; p++) {
 207         digitval = STRINGLIB_TODECIMAL(*p);
 208         if (digitval < 0)
 209             return -1;
 210         /*
 211            Detect possible overflow before it happens:
 212
 213               accumulator * 10 + digitval > PY_SSIZE_T_MAX if and only if
 214               accumulator > (PY_SSIZE_T_MAX - digitval) / 10.
 215         */
 216         if (accumulator > (PY_SSIZE_T_MAX - digitval) / 10) {
 217             PyErr_Format(PyExc_ValueError,
 218                          "Too many decimal digits in format string");
 219             return -1;
 220         }
 221         accumulator = accumulator * 10 + digitval;
 222     }
 223     return accumulator;
 224 }
 225
 226 /************************************************************************/
 227 /******** Functions to get field objects and specification strings ******/
 228 /************************************************************************/
 229
 230 /* do the equivalent of obj.name */
 231 static PyObject *
 232 getattr(PyObject *obj, SubString *name)
 233 {
 234     PyObject *newobj;
 235     PyObject *str = SubString_new_object(name);
 236     if (str == NULL)
 237         return NULL;
 238     newobj = PyObject_GetAttr(obj, str);
 239     Py_DECREF(str);
 240     return newobj;
 241 }
 242
 243 /* do the equivalent of obj[idx], where obj is a sequence */
 244 static PyObject *
 245 getitem_sequence(PyObject *obj, Py_ssize_t idx)
 246 {
 247     return PySequence_GetItem(obj, idx);
 248 }
 249
 250 /* do the equivalent of obj[idx], where obj is not a sequence */
 251 static PyObject *
 252 getitem_idx(PyObject *obj, Py_ssize_t idx)
 253 {
 254     PyObject *newobj;
 255     PyObject *idx_obj = PyLong_FromSsize_t(idx);
 256     if (idx_obj == NULL)
 257         return NULL;
 258     newobj = PyObject_GetItem(obj, idx_obj);
 259     Py_DECREF(idx_obj);
 260     return newobj;
 261 }
 262
 263 /* do the equivalent of obj[name] */
 264 static PyObject *
 265 getitem_str(PyObject *obj, SubString *name)
 266 {
 267     PyObject *newobj;
 268     PyObject *str = SubString_new_object(name);
 269     if (str == NULL)
 270         return NULL;
 271     newobj = PyObject_GetItem(obj, str);
 272     Py_DECREF(str);
 273     return newobj;
 274 }
 275
 276 typedef struct {
 277     /* the entire string we're parsing.  we assume that someone else
 278        is managing its lifetime, and that it will exist for the
 279        lifetime of the iterator.  can be empty */
 280     SubString str;
 281
 282     /* pointer to where we are inside field_name */
 283     STRINGLIB_CHAR *ptr;
 284 } FieldNameIterator;
 285
 286
 287 static int
 288 FieldNameIterator_init(FieldNameIterator *self, STRINGLIB_CHAR *ptr,
 289                        Py_ssize_t len)
 290 {
 291     SubString_init(&self->str, ptr, len);
 292     self->ptr = self->str.ptr;
 293     return 1;
 294 }
 295
 296 static int
 297 _FieldNameIterator_attr(FieldNameIterator *self, SubString *name)
 298 {
 299     STRINGLIB_CHAR c;
 300
 301     name->ptr = self->ptr;
 302
 303     /* return everything until '.' or '[' */
 304     while (self->ptr < self->str.end) {
 305         switch (c = *self->ptr++) {
 306         case '[':
 307         case '.':
 308             /* backup so that we this character will be seen next time */
 309             self->ptr--;
 310             break;
 311         default:
 312             continue;
 313         }
 314         break;
 315     }
 316     /* end of string is okay */
 317     name->end = self->ptr;
 318     return 1;
 319 }
 320
 321 static int
 322 _FieldNameIterator_item(FieldNameIterator *self, SubString *name)
 323 {
 324     int bracket_seen = 0;
 325     STRINGLIB_CHAR c;
 326
 327     name->ptr = self->ptr;
 328
 329     /* return everything until ']' */
 330     while (self->ptr < self->str.end) {
 331         switch (c = *self->ptr++) {
 332         case ']':
 333             bracket_seen = 1;
 334             break;
 335         default:
 336             continue;
 337         }
 338         break;
 339     }
 340     /* make sure we ended with a ']' */
 341     if (!bracket_seen) {
 342         PyErr_SetString(PyExc_ValueError, "Missing ']' in format string");
 343         return 0;
 344     }
 345
 346     /* end of string is okay */
 347     /* don't include the ']' */
 348     name->end = self->ptr-1;
 349     return 1;
 350 }
 351
 352 /* returns 0 on error, 1 on non-error termination, and 2 if it returns a value */
 353 static int
 354 FieldNameIterator_next(FieldNameIterator *self, int *is_attribute,
 355                        Py_ssize_t *name_idx, SubString *name)
 356 {
 357     /* check at end of input */
 358     if (self->ptr >= self->str.end)
 359         return 1;
 360
 361     switch (*self->ptr++) {
 362     case '.':
 363         *is_attribute = 1;
 364         if (_FieldNameIterator_attr(self, name) == 0)
 365             return 0;
 366         *name_idx = -1;
 367         break;
 368     case '[':
 369         *is_attribute = 0;
 370         if (_FieldNameIterator_item(self, name) == 0)
 371             return 0;
 372         *name_idx = get_integer(name);
 373         if (*name_idx == -1 && PyErr_Occurred())
 374             return 0;
 375         break;
 376     default:
 377         /* Invalid character follows ']' */
 378         PyErr_SetString(PyExc_ValueError, "Only '.' or '[' may "
 379                         "follow ']' in format field specifier");
 380         return 0;
 381     }
 382
 383     /* empty string is an error */
 384     if (name->ptr == name->end) {
 385         PyErr_SetString(PyExc_ValueError, "Empty attribute in format string");
 386         return 0;
 387     }
 388
 389     return 2;
 390 }
 391
 392
 393 /* input: field_name
 394    output: 'first' points to the part before the first '[' or '.'
 395            'first_idx' is -1 if 'first' is not an integer, otherwise
 396                        it's the value of first converted to an integer
 397            'rest' is an iterator to return the rest
 398 */
 399 static int
 400 field_name_split(STRINGLIB_CHAR *ptr, Py_ssize_t len, SubString *first,
 401                  Py_ssize_t *first_idx, FieldNameIterator *rest,
 402                  AutoNumber *auto_number)
 403 {
 404     STRINGLIB_CHAR c;
 405     STRINGLIB_CHAR *p = ptr;
 406     STRINGLIB_CHAR *end = ptr + len;
 407     int field_name_is_empty;
 408     int using_numeric_index;
 409
 410     /* find the part up until the first '.' or '[' */
 411     while (p < end) {
 412         switch (c = *p++) {
 413         case '[':
 414         case '.':
 415             /* backup so that we this character is available to the
 416                "rest" iterator */
 417             p--;
 418             break;
 419         default:
 420             continue;
 421         }
 422         break;
 423     }
 424
 425     /* set up the return values */
 426     SubString_init(first, ptr, p - ptr);
 427     FieldNameIterator_init(rest, p, end - p);
 428
 429     /* see if "first" is an integer, in which case it's used as an index */
 430     *first_idx = get_integer(first);
 431     if (*first_idx == -1 && PyErr_Occurred())
 432         return 0;
 433
 434     field_name_is_empty = first->ptr >= first->end;
 435
 436     /* If the field name is omitted or if we have a numeric index
 437        specified, then we're doing numeric indexing into args. */
 438     using_numeric_index = field_name_is_empty || *first_idx != -1;
 439
 440     /* We always get here exactly one time for each field we're
 441        processing. And we get here in field order (counting by left
 442        braces). So this is the perfect place to handle automatic field
 443        numbering if the field name is omitted. */
 444
 445     /* Check if we need to do the auto-numbering. It's not needed if
 446        we're called from string.Format routines, because it's handled
 447        in that class by itself. */
 448     if (auto_number) {
 449         /* Initialize our auto numbering state if this is the first
 450            time we're either auto-numbering or manually numbering. */
 451         if (auto_number->an_state == ANS_INIT && using_numeric_index)
 452             auto_number->an_state = field_name_is_empty ?
 453                 ANS_AUTO : ANS_MANUAL;
 454
 455         /* Make sure our state is consistent with what we're doing
 456            this time through. Only check if we're using a numeric
 457            index. */
 458         if (using_numeric_index)
 459             if (autonumber_state_error(auto_number->an_state,
 460                                        field_name_is_empty))
 461                 return 0;
 462         /* Zero length field means we want to do auto-numbering of the
 463            fields. */
 464         if (field_name_is_empty)
 465             *first_idx = (auto_number->an_field_number)++;
 466     }
 467
 468     return 1;
 469 }
 470
 471
 472 /*
 473     get_field_object returns the object inside {}, before the
 474     format_spec.  It handles getindex and getattr lookups and consumes
 475     the entire input string.
 476 */
 477 static PyObject *
 478 get_field_object(SubString *input, PyObject *args, PyObject *kwargs,
 479                  AutoNumber *auto_number)
 480 {
 481     PyObject *obj = NULL;
 482     int ok;
 483     int is_attribute;
 484     SubString name;
 485     SubString first;
 486     Py_ssize_t index;
 487     FieldNameIterator rest;
 488
 489     if (!field_name_split(input->ptr, input->end - input->ptr, &first,
 490                           &index, &rest, auto_number)) {
 491         goto error;
 492     }
 493
 494     if (index == -1) {
 495         /* look up in kwargs */
 496         PyObject *key = SubString_new_object(&first);
 497         if (key == NULL)
 498             goto error;
 499         if ((kwargs == NULL) || (obj = PyDict_GetItem(kwargs, key)) == NULL) {
 500             PyErr_SetObject(PyExc_KeyError, key);
 501             Py_DECREF(key);
 502             goto error;
 503         }
 504         Py_DECREF(key);
 505         Py_INCREF(obj);
 506     }
 507     else {
 508         /* look up in args */
 509         obj = PySequence_GetItem(args, index);
 510         if (obj == NULL)
 511             goto error;
 512     }
 513
 514     /* iterate over the rest of the field_name */
 515     while ((ok = FieldNameIterator_next(&rest, &is_attribute, &index,
 516                                         &name)) == 2) {
 517         PyObject *tmp;
 518
 519         if (is_attribute)
 520             /* getattr lookup "." */
 521             tmp = getattr(obj, &name);
 522         else
 523             /* getitem lookup "[]" */
 524             if (index == -1)
 525                 tmp = getitem_str(obj, &name);
 526             else
 527                 if (PySequence_Check(obj))
 528                     tmp = getitem_sequence(obj, index);
 529                 else
 530                     /* not a sequence */
 531                     tmp = getitem_idx(obj, index);
 532         if (tmp == NULL)
 533             goto error;
 534
 535         /* assign to obj */
 536         Py_DECREF(obj);
 537         obj = tmp;
 538     }
 539     /* end of iterator, this is the non-error case */
 540     if (ok == 1)
 541         return obj;
 542 error:
 543     Py_XDECREF(obj);
 544     return NULL;
 545 }
 546
 547 /************************************************************************/
 548 /*****************  Field rendering functions  **************************/
 549 /************************************************************************/
 550
 551 /*
 552     render_field() is the main function in this section.  It takes the
 553     field object and field specification string generated by
 554     get_field_and_spec, and renders the field into the output string.
 555
 556     render_field calls fieldobj.__format__(format_spec) method, and
 557     appends to the output.
 558 */
 559 static int
 560 render_field(PyObject *fieldobj, SubString *format_spec, OutputString *output)
 561 {
 562     int ok = 0;
 563     PyObject *result = NULL;
 564     PyObject *format_spec_object = NULL;
 565     PyObject *(*formatter)(PyObject *, STRINGLIB_CHAR *, Py_ssize_t) = NULL;
 566     STRINGLIB_CHAR* format_spec_start = format_spec->ptr ?
 567             format_spec->ptr : NULL;
 568     Py_ssize_t format_spec_len = format_spec->ptr ?
 569             format_spec->end - format_spec->ptr : 0;
 570
 571     /* If we know the type exactly, skip the lookup of __format__ and just
 572        call the formatter directly. */
 573 #if STRINGLIB_IS_UNICODE
 574     if (PyUnicode_CheckExact(fieldobj))
 575         formatter = _PyUnicode_FormatAdvanced;
 576     /* Unfortunately, there's a problem with checking for int, long,
 577        and float here.  If we're being included as unicode, their
 578        formatters expect string format_spec args.  For now, just skip
 579        this optimization for unicode.  This could be fixed, but it's a
 580        hassle. */
 581 #else
 582     if (PyString_CheckExact(fieldobj))
 583         formatter = _PyBytes_FormatAdvanced;
 584     else if (PyInt_CheckExact(fieldobj))
 585         formatter =_PyInt_FormatAdvanced;
 586     else if (PyLong_CheckExact(fieldobj))
 587         formatter =_PyLong_FormatAdvanced;
 588     else if (PyFloat_CheckExact(fieldobj))
 589         formatter = _PyFloat_FormatAdvanced;
 590 #endif
 591
 592     if (formatter) {
 593         /* we know exactly which formatter will be called when __format__ is
 594            looked up, so call it directly, instead. */
 595         result = formatter(fieldobj, format_spec_start, format_spec_len);
 596     }
 597     else {
 598         /* We need to create an object out of the pointers we have, because
 599            __format__ takes a string/unicode object for format_spec. */
 600         format_spec_object = STRINGLIB_NEW(format_spec_start,
 601                                            format_spec_len);
 602         if (format_spec_object == NULL)
 603             goto done;
 604
 605         result = PyObject_Format(fieldobj, format_spec_object);
 606     }
 607     if (result == NULL)
 608         goto done;
 609
 610 #if PY_VERSION_HEX >= 0x03000000
 611     assert(PyUnicode_Check(result));
 612 #else
 613     assert(PyString_Check(result) || PyUnicode_Check(result));
 614
 615     /* Convert result to our type.  We could be str, and result could
 616        be unicode */
 617     {
 618         PyObject *tmp = STRINGLIB_TOSTR(result);
 619         if (tmp == NULL)
 620             goto done;
 621         Py_DECREF(result);
 622         result = tmp;
 623     }
 624 #endif
 625
 626     ok = output_data(output,
 627                      STRINGLIB_STR(result), STRINGLIB_LEN(result));
 628 done:
 629     Py_XDECREF(format_spec_object);
 630     Py_XDECREF(result);
 631     return ok;
 632 }
 633
 634 static int
 635 parse_field(SubString *str, SubString *field_name, SubString *format_spec,
 636             STRINGLIB_CHAR *conversion)
 637 {
 638     /* Note this function works if the field name is zero length,
 639        which is good.  Zero length field names are handled later, in
 640        field_name_split. */
 641
 642     STRINGLIB_CHAR c = 0;
 643
 644     /* initialize these, as they may be empty */
 645     *conversion = '\0';
 646     SubString_init(format_spec, NULL, 0);
 647
 648     /* Search for the field name.  it's terminated by the end of
 649        the string, or a ':' or '!' */
 650     field_name->ptr = str->ptr;
 651     while (str->ptr < str->end) {
 652         switch (c = *(str->ptr++)) {
 653         case ':':
 654         case '!':
 655             break;
 656         default:
 657             continue;
 658         }
 659         break;
 660     }
 661
 662     if (c == '!' || c == ':') {
 663         /* we have a format specifier and/or a conversion */
 664         /* don't include the last character */
 665         field_name->end = str->ptr-1;
 666
 667         /* the format specifier is the rest of the string */
 668         format_spec->ptr = str->ptr;
 669         format_spec->end = str->end;
 670
 671         /* see if there's a conversion specifier */
 672         if (c == '!') {
 673             /* there must be another character present */
 674             if (format_spec->ptr >= format_spec->end) {
 675                 PyErr_SetString(PyExc_ValueError,
 676                                 "end of format while looking for conversion "
 677                                 "specifier");
 678                 return 0;
 679             }
 680             *conversion = *(format_spec->ptr++);
 681
 682             /* if there is another character, it must be a colon */
 683             if (format_spec->ptr < format_spec->end) {
 684                 c = *(format_spec->ptr++);
 685                 if (c != ':') {
 686                     PyErr_SetString(PyExc_ValueError,
 687                                     "expected ':' after format specifier");
 688                     return 0;
 689                 }
 690             }
 691         }
 692     }
 693     else
 694         /* end of string, there's no format_spec or conversion */
 695         field_name->end = str->ptr;
 696
 697     return 1;
 698 }
 699
 700 /************************************************************************/
 701 /******* Output string allocation and escape-to-markup processing  ******/
 702 /************************************************************************/
 703
 704 /* MarkupIterator breaks the string into pieces of either literal
 705    text, or things inside {} that need to be marked up.  it is
 706    designed to make it easy to wrap a Python iterator around it, for
 707    use with the Formatter class */
 708
 709 typedef struct {
 710     SubString str;
 711 } MarkupIterator;
 712
 713 static int
 714 MarkupIterator_init(MarkupIterator *self, STRINGLIB_CHAR *ptr, Py_ssize_t len)
 715 {
 716     SubString_init(&self->str, ptr, len);
 717     return 1;
 718 }
 719
 720 /* returns 0 on error, 1 on non-error termination, and 2 if it got a
 721    string (or something to be expanded) */
 722 static int
 723 MarkupIterator_next(MarkupIterator *self, SubString *literal,
 724                     int *field_present, SubString *field_name,
 725                     SubString *format_spec, STRINGLIB_CHAR *conversion,
 726                     int *format_spec_needs_expanding)
 727 {
 728     int at_end;
 729     STRINGLIB_CHAR c = 0;
 730     STRINGLIB_CHAR *start;
 731     int count;
 732     Py_ssize_t len;
 733     int markup_follows = 0;
 734
 735     /* initialize all of the output variables */
 736     SubString_init(literal, NULL, 0);
 737     SubString_init(field_name, NULL, 0);
 738     SubString_init(format_spec, NULL, 0);
 739     *conversion = '\0';
 740     *format_spec_needs_expanding = 0;
 741     *field_present = 0;
 742
 743     /* No more input, end of iterator.  This is the normal exit
 744        path. */
 745     if (self->str.ptr >= self->str.end)
 746         return 1;
 747
 748     start = self->str.ptr;
 749
 750     /* First read any literal text. Read until the end of string, an
 751        escaped '{' or '}', or an unescaped '{'.  In order to never
 752        allocate memory and so I can just pass pointers around, if
 753        there's an escaped '{' or '}' then we'll return the literal
 754        including the brace, but no format object.  The next time
 755        through, we'll return the rest of the literal, skipping past
 756        the second consecutive brace. */
 757     while (self->str.ptr < self->str.end) {
 758         switch (c = *(self->str.ptr++)) {
 759         case '{':
 760         case '}':
 761             markup_follows = 1;
 762             break;
 763         default:
 764             continue;
 765         }
 766         break;
 767     }
 768
 769     at_end = self->str.ptr >= self->str.end;
 770     len = self->str.ptr - start;
 771
 772     if ((c == '}') && (at_end || (c != *self->str.ptr))) {
 773         PyErr_SetString(PyExc_ValueError, "Single '}' encountered "
 774                         "in format string");
 775         return 0;
 776     }
 777     if (at_end && c == '{') {
 778         PyErr_SetString(PyExc_ValueError, "Single '{' encountered "
 779                         "in format string");
 780         return 0;
 781     }
 782     if (!at_end) {
 783         if (c == *self->str.ptr) {
 784             /* escaped } or {, skip it in the input.  there is no
 785                markup object following us, just this literal text */
 786             self->str.ptr++;
 787             markup_follows = 0;
 788         }
 789         else
 790             len--;
 791     }
 792
 793     /* record the literal text */
 794     literal->ptr = start;
 795     literal->end = start + len;
 796
 797     if (!markup_follows)
 798         return 2;
 799
 800     /* this is markup, find the end of the string by counting nested
 801        braces.  note that this prohibits escaped braces, so that
 802        format_specs cannot have braces in them. */
 803     *field_present = 1;
 804     count = 1;
 805
 806     start = self->str.ptr;
 807
 808     /* we know we can't have a zero length string, so don't worry
 809        about that case */
 810     while (self->str.ptr < self->str.end) {
 811         switch (c = *(self->str.ptr++)) {
 812         case '{':
 813             /* the format spec needs to be recursively expanded.
 814                this is an optimization, and not strictly needed */
 815             *format_spec_needs_expanding = 1;
 816             count++;
 817             break;
 818         case '}':
 819             count--;
 820             if (count <= 0) {
 821                 /* we're done.  parse and get out */
 822                 SubString s;
 823
 824                 SubString_init(&s, start, self->str.ptr - 1 - start);
 825                 if (parse_field(&s, field_name, format_spec, conversion) == 0)
 826                     return 0;
 827
 828                 /* success */
 829                 return 2;
 830             }
 831             break;
 832         }
 833     }
 834
 835     /* end of string while searching for matching '}' */
 836     PyErr_SetString(PyExc_ValueError, "unmatched '{' in format");
 837     return 0;
 838 }
 839
 840
 841 /* do the !r or !s conversion on obj */
 842 static PyObject *
 843 do_conversion(PyObject *obj, STRINGLIB_CHAR conversion)
 844 {
 845     /* XXX in pre-3.0, do we need to convert this to unicode, since it
 846        might have returned a string? */
 847     switch (conversion) {
 848     case 'r':
 849         return PyObject_Repr(obj);
 850     case 's':
 851         return STRINGLIB_TOSTR(obj);
 852     default:
 853         if (conversion > 32 && conversion < 127) {
 854                 /* It's the ASCII subrange; casting to char is safe
 855                    (assuming the execution character set is an ASCII
 856                    superset). */
 857                 PyErr_Format(PyExc_ValueError,
 858                      "Unknown conversion specifier %c",
 859                      (char)conversion);
 860         } else
 861                 PyErr_Format(PyExc_ValueError,
 862                      "Unknown conversion specifier \\x%x",
 863                      (unsigned int)conversion);
 864         return NULL;
 865     }
 866 }
 867
 868 /* given:
 869
 870    {field_name!conversion:format_spec}
 871
 872    compute the result and write it to output.
 873    format_spec_needs_expanding is an optimization.  if it's false,
 874    just output the string directly, otherwise recursively expand the
 875    format_spec string.
 876
 877    field_name is allowed to be zero length, in which case we
 878    are doing auto field numbering.
 879 */
 880
 881 static int
 882 output_markup(SubString *field_name, SubString *format_spec,
 883               int format_spec_needs_expanding, STRINGLIB_CHAR conversion,
 884               OutputString *output, PyObject *args, PyObject *kwargs,
 885               int recursion_depth, AutoNumber *auto_number)
 886 {
 887     PyObject *tmp = NULL;
 888     PyObject *fieldobj = NULL;
 889     SubString expanded_format_spec;
 890     SubString *actual_format_spec;
 891     int result = 0;
 892
 893     /* convert field_name to an object */
 894     fieldobj = get_field_object(field_name, args, kwargs, auto_number);
 895     if (fieldobj == NULL)
 896         goto done;
 897
 898     if (conversion != '\0') {
 899         tmp = do_conversion(fieldobj, conversion);
 900         if (tmp == NULL)
 901             goto done;
 902
 903         /* do the assignment, transferring ownership: fieldobj = tmp */
 904         Py_DECREF(fieldobj);
 905         fieldobj = tmp;
 906         tmp = NULL;
 907     }
 908
 909     /* if needed, recurively compute the format_spec */
 910     if (format_spec_needs_expanding) {
 911         tmp = build_string(format_spec, args, kwargs, recursion_depth-1,
 912                            auto_number);
 913         if (tmp == NULL)
 914             goto done;
 915
 916         /* note that in the case we're expanding the format string,
 917            tmp must be kept around until after the call to
 918            render_field. */
 919         SubString_init(&expanded_format_spec,
 920                        STRINGLIB_STR(tmp), STRINGLIB_LEN(tmp));
 921         actual_format_spec = &expanded_format_spec;
 922     }
 923     else
 924         actual_format_spec = format_spec;
 925
 926     if (render_field(fieldobj, actual_format_spec, output) == 0)
 927         goto done;
 928
 929     result = 1;
 930
 931 done:
 932     Py_XDECREF(fieldobj);
 933     Py_XDECREF(tmp);
 934
 935     return result;
 936 }
 937
 938 /*
 939     do_markup is the top-level loop for the format() method.  It
 940     searches through the format string for escapes to markup codes, and
 941     calls other functions to move non-markup text to the output,
 942     and to perform the markup to the output.
 943 */
 944 static int
 945 do_markup(SubString *input, PyObject *args, PyObject *kwargs,
 946           OutputString *output, int recursion_depth, AutoNumber *auto_number)
 947 {
 948     MarkupIterator iter;
 949     int format_spec_needs_expanding;
 950     int result;
 951     int field_present;
 952     SubString literal;
 953     SubString field_name;
 954     SubString format_spec;
 955     STRINGLIB_CHAR conversion;
 956
 957     MarkupIterator_init(&iter, input->ptr, input->end - input->ptr);
 958     while ((result = MarkupIterator_next(&iter, &literal, &field_present,
 959                                          &field_name, &format_spec,
 960                                          &conversion,
 961                                          &format_spec_needs_expanding)) == 2) {
 962         if (!output_data(output, literal.ptr, literal.end - literal.ptr))
 963             return 0;
 964         if (field_present)
 965             if (!output_markup(&field_name, &format_spec,
 966                                format_spec_needs_expanding, conversion, output,
 967                                args, kwargs, recursion_depth, auto_number))
 968                 return 0;
 969     }
 970     return result;
 971 }
 972
 973
 974 /*
 975     build_string allocates the output string and then
 976     calls do_markup to do the heavy lifting.
 977 */
 978 static PyObject *
 979 build_string(SubString *input, PyObject *args, PyObject *kwargs,
 980              int recursion_depth, AutoNumber *auto_number)
 981 {
 982     OutputString output;
 983     PyObject *result = NULL;
 984     Py_ssize_t count;
 985
 986     output.obj = NULL; /* needed so cleanup code always works */
 987
 988     /* check the recursion level */
 989     if (recursion_depth <= 0) {
 990         PyErr_SetString(PyExc_ValueError,
 991                         "Max string recursion exceeded");
 992         goto done;
 993     }
 994
 995     /* initial size is the length of the format string, plus the size
 996        increment.  seems like a reasonable default */
 997     if (!output_initialize(&output,
 998                            input->end - input->ptr +
 999                            INITIAL_SIZE_INCREMENT))
1000         goto done;
1001
1002     if (!do_markup(input, args, kwargs, &output, recursion_depth,
1003                    auto_number)) {
1004         goto done;
1005     }
1006
1007     count = output.ptr - STRINGLIB_STR(output.obj);
1008     if (STRINGLIB_RESIZE(&output.obj, count) < 0) {
1009         goto done;
1010     }
1011
1012     /* transfer ownership to result */
1013     result = output.obj;
1014     output.obj = NULL;
1015
1016 done:
1017     Py_XDECREF(output.obj);
1018     return result;
1019 }
1020
1021 /************************************************************************/
1022 /*********** main routine ***********************************************/
1023 /************************************************************************/
1024
1025 /* this is the main entry point */
1026 static PyObject *
1027 do_string_format(PyObject *self, PyObject *args, PyObject *kwargs)
1028 {
1029     SubString input;
1030
1031     /* PEP 3101 says only 2 levels, so that
1032        "{0:{1}}".format('abc', 's')            # works
1033        "{0:{1:{2}}}".format('abc', 's', '')    # fails
1034     */
1035     int recursion_depth = 2;
1036
1037     AutoNumber auto_number;
1038
1039     AutoNumber_Init(&auto_number);
1040     SubString_init(&input, STRINGLIB_STR(self), STRINGLIB_LEN(self));
1041     return build_string(&input, args, kwargs, recursion_depth, &auto_number);
1042 }
1043
1044
1045
1046 /************************************************************************/
1047 /*********** formatteriterator ******************************************/
1048 /************************************************************************/
1049
1050 /* This is used to implement string.Formatter.vparse().  It exists so
1051    Formatter can share code with the built in unicode.format() method.
1052    It's really just a wrapper around MarkupIterator that is callable
1053    from Python. */
1054
1055 typedef struct {
1056     PyObject_HEAD
1057
1058     STRINGLIB_OBJECT *str;
1059
1060     MarkupIterator it_markup;
1061 } formatteriterobject;
1062
1063 static void
1064 formatteriter_dealloc(formatteriterobject *it)
1065 {
1066     Py_XDECREF(it->str);
1067     PyObject_FREE(it);
1068 }
1069
1070 /* returns a tuple:
1071    (literal, field_name, format_spec, conversion)
1072
1073    literal is any literal text to output.  might be zero length
1074    field_name is the string before the ':'.  might be None
1075    format_spec is the string after the ':'.  mibht be None
1076    conversion is either None, or the string after the '!'
1077 */
1078 static PyObject *
1079 formatteriter_next(formatteriterobject *it)
1080 {
1081     SubString literal;
1082     SubString field_name;
1083     SubString format_spec;
1084     STRINGLIB_CHAR conversion;
1085     int format_spec_needs_expanding;
1086     int field_present;
1087     int result = MarkupIterator_next(&it->it_markup, &literal, &field_present,
1088                                      &field_name, &format_spec, &conversion,
1089                                      &format_spec_needs_expanding);
1090
1091     /* all of the SubString objects point into it->str, so no
1092        memory management needs to be done on them */
1093     assert(0 <= result && result <= 2);
1094     if (result == 0 || result == 1)
1095         /* if 0, error has already been set, if 1, iterator is empty */
1096         return NULL;
1097     else {
1098         PyObject *literal_str = NULL;
1099         PyObject *field_name_str = NULL;
1100         PyObject *format_spec_str = NULL;
1101         PyObject *conversion_str = NULL;
1102         PyObject *tuple = NULL;
1103
1104         literal_str = SubString_new_object(&literal);
1105         if (literal_str == NULL)
1106             goto done;
1107
1108         field_name_str = SubString_new_object(&field_name);
1109         if (field_name_str == NULL)
1110             goto done;
1111
1112         /* if field_name is non-zero length, return a string for
1113            format_spec (even if zero length), else return None */
1114         format_spec_str = (field_present ?
1115                            SubString_new_object_or_empty :
1116                            SubString_new_object)(&format_spec);
1117         if (format_spec_str == NULL)
1118             goto done;
1119
1120         /* if the conversion is not specified, return a None,
1121            otherwise create a one length string with the conversion
1122            character */
1123         if (conversion == '\0') {
1124             conversion_str = Py_None;
1125             Py_INCREF(conversion_str);
1126         }
1127         else
1128             conversion_str = STRINGLIB_NEW(&conversion, 1);
1129         if (conversion_str == NULL)
1130             goto done;
1131
1132         tuple = PyTuple_Pack(4, literal_str, field_name_str, format_spec_str,
1133                              conversion_str);
1134     done:
1135         Py_XDECREF(literal_str);
1136         Py_XDECREF(field_name_str);
1137         Py_XDECREF(format_spec_str);
1138         Py_XDECREF(conversion_str);
1139         return tuple;
1140     }
1141 }
1142
1143 static PyMethodDef formatteriter_methods[] = {
1144     {NULL,              NULL}           /* sentinel */
1145 };
1146
1147 static PyTypeObject PyFormatterIter_Type = {
1148     PyVarObject_HEAD_INIT(&PyType_Type, 0)
1149     "formatteriterator",                /* tp_name */
1150     sizeof(formatteriterobject),        /* tp_basicsize */
1151     0,                                  /* tp_itemsize */
1152     /* methods */
1153     (destructor)formatteriter_dealloc,  /* tp_dealloc */
1154     0,                                  /* tp_print */
1155     0,                                  /* tp_getattr */
1156     0,                                  /* tp_setattr */
1157     0,                                  /* tp_compare */
1158     0,                                  /* tp_repr */
1159     0,                                  /* tp_as_number */
1160     0,                                  /* tp_as_sequence */
1161     0,                                  /* tp_as_mapping */
1162     0,                                  /* tp_hash */
1163     0,                                  /* tp_call */
1164     0,                                  /* tp_str */
1165     PyObject_GenericGetAttr,            /* tp_getattro */
1166     0,                                  /* tp_setattro */
1167     0,                                  /* tp_as_buffer */
1168     Py_TPFLAGS_DEFAULT,                 /* tp_flags */
1169     0,                                  /* tp_doc */
1170     0,                                  /* tp_traverse */
1171     0,                                  /* tp_clear */
1172     0,                                  /* tp_richcompare */
1173     0,                                  /* tp_weaklistoffset */
1174     PyObject_SelfIter,                  /* tp_iter */
1175     (iternextfunc)formatteriter_next,   /* tp_iternext */
1176     formatteriter_methods,              /* tp_methods */
1177     0,
1178 };
1179
1180 /* unicode_formatter_parser is used to implement
1181    string.Formatter.vformat.  it parses a string and returns tuples
1182    describing the parsed elements.  It's a wrapper around
1183    stringlib/string_format.h's MarkupIterator */
1184 static PyObject *
1185 formatter_parser(STRINGLIB_OBJECT *self)
1186 {
1187     formatteriterobject *it;
1188
1189     it = PyObject_New(formatteriterobject, &PyFormatterIter_Type);
1190     if (it == NULL)
1191         return NULL;
1192
1193     /* take ownership, give the object to the iterator */
1194     Py_INCREF(self);
1195     it->str = self;
1196
1197     /* initialize the contained MarkupIterator */
1198     MarkupIterator_init(&it->it_markup,
1199                         STRINGLIB_STR(self),
1200                         STRINGLIB_LEN(self));
1201
1202     return (PyObject *)it;
1203 }
1204
1205
1206 /************************************************************************/
1207 /*********** fieldnameiterator ******************************************/
1208 /************************************************************************/
1209
1210
1211 /* This is used to implement string.Formatter.vparse().  It parses the
1212    field name into attribute and item values.  It's a Python-callable
1213    wrapper around FieldNameIterator */
1214
1215 typedef struct {
1216     PyObject_HEAD
1217
1218     STRINGLIB_OBJECT *str;
1219
1220     FieldNameIterator it_field;
1221 } fieldnameiterobject;
1222
1223 static void
1224 fieldnameiter_dealloc(fieldnameiterobject *it)
1225 {
1226     Py_XDECREF(it->str);
1227     PyObject_FREE(it);
1228 }
1229
1230 /* returns a tuple:
1231    (is_attr, value)
1232    is_attr is true if we used attribute syntax (e.g., '.foo')
1233               false if we used index syntax (e.g., '[foo]')
1234    value is an integer or string
1235 */
1236 static PyObject *
1237 fieldnameiter_next(fieldnameiterobject *it)
1238 {
1239     int result;
1240     int is_attr;
1241     Py_ssize_t idx;
1242     SubString name;
1243
1244     result = FieldNameIterator_next(&it->it_field, &is_attr,
1245                                     &idx, &name);
1246     if (result == 0 || result == 1)
1247         /* if 0, error has already been set, if 1, iterator is empty */
1248         return NULL;
1249     else {
1250         PyObject* result = NULL;
1251         PyObject* is_attr_obj = NULL;
1252         PyObject* obj = NULL;
1253
1254         is_attr_obj = PyBool_FromLong(is_attr);
1255         if (is_attr_obj == NULL)
1256             goto done;
1257
1258         /* either an integer or a string */
1259         if (idx != -1)
1260             obj = PyLong_FromSsize_t(idx);
1261         else
1262             obj = SubString_new_object(&name);
1263         if (obj == NULL)
1264             goto done;
1265
1266         /* return a tuple of values */
1267         result = PyTuple_Pack(2, is_attr_obj, obj);
1268
1269     done:
1270         Py_XDECREF(is_attr_obj);
1271         Py_XDECREF(obj);
1272         return result;
1273     }
1274 }
1275
1276 static PyMethodDef fieldnameiter_methods[] = {
1277     {NULL,              NULL}           /* sentinel */
1278 };
1279
1280 static PyTypeObject PyFieldNameIter_Type = {
1281     PyVarObject_HEAD_INIT(&PyType_Type, 0)
1282     "fieldnameiterator",                /* tp_name */
1283     sizeof(fieldnameiterobject),        /* tp_basicsize */
1284     0,                                  /* tp_itemsize */
1285     /* methods */
1286     (destructor)fieldnameiter_dealloc,  /* tp_dealloc */
1287     0,                                  /* tp_print */
1288     0,                                  /* tp_getattr */
1289     0,                                  /* tp_setattr */
1290     0,                                  /* tp_compare */
1291     0,                                  /* tp_repr */
1292     0,                                  /* tp_as_number */
1293     0,                                  /* tp_as_sequence */
1294     0,                                  /* tp_as_mapping */
1295     0,                                  /* tp_hash */
1296     0,                                  /* tp_call */
1297     0,                                  /* tp_str */
1298     PyObject_GenericGetAttr,            /* tp_getattro */
1299     0,                                  /* tp_setattro */
1300     0,                                  /* tp_as_buffer */
1301     Py_TPFLAGS_DEFAULT,                 /* tp_flags */
1302     0,                                  /* tp_doc */
1303     0,                                  /* tp_traverse */
1304     0,                                  /* tp_clear */
1305     0,                                  /* tp_richcompare */
1306     0,                                  /* tp_weaklistoffset */
1307     PyObject_SelfIter,                  /* tp_iter */
1308     (iternextfunc)fieldnameiter_next,   /* tp_iternext */
1309     fieldnameiter_methods,              /* tp_methods */
1310     0};
1311
1312 /* unicode_formatter_field_name_split is used to implement
1313    string.Formatter.vformat.  it takes an PEP 3101 "field name", and
1314    returns a tuple of (first, rest): "first", the part before the
1315    first '.' or '['; and "rest", an iterator for the rest of the field
1316    name.  it's a wrapper around stringlib/string_format.h's
1317    field_name_split.  The iterator it returns is a
1318    FieldNameIterator */
1319 static PyObject *
1320 formatter_field_name_split(STRINGLIB_OBJECT *self)
1321 {
1322     SubString first;
1323     Py_ssize_t first_idx;
1324     fieldnameiterobject *it;
1325
1326     PyObject *first_obj = NULL;
1327     PyObject *result = NULL;
1328
1329     it = PyObject_New(fieldnameiterobject, &PyFieldNameIter_Type);
1330     if (it == NULL)
1331         return NULL;
1332
1333     /* take ownership, give the object to the iterator.  this is
1334        just to keep the field_name alive */
1335     Py_INCREF(self);
1336     it->str = self;
1337
1338     /* Pass in auto_number = NULL. We'll return an empty string for
1339        first_obj in that case. */
1340     if (!field_name_split(STRINGLIB_STR(self),
1341                           STRINGLIB_LEN(self),
1342                           &first, &first_idx, &it->it_field, NULL))
1343         goto done;
1344
1345     /* first becomes an integer, if possible; else a string */
1346     if (first_idx != -1)
1347         first_obj = PyLong_FromSsize_t(first_idx);
1348     else
1349         /* convert "first" into a string object */
1350         first_obj = SubString_new_object(&first);
1351     if (first_obj == NULL)
1352         goto done;
1353
1354     /* return a tuple of values */
1355     result = PyTuple_Pack(2, first_obj, it);
1356
1357 done:
1358     Py_XDECREF(it);
1359     Py_XDECREF(first_obj);
1360     return result;
1361 }