2 string_format.h -- implementation of string.format().
4 It uses the Objects/stringlib conventions, so that it can be
5 compiled for both unicode and string objects.
9 /* Defines for Python 2.6 compatibility */
10 #if PY_VERSION_HEX < 0x03000000
11 #define PyLong_FromSsize_t _PyLong_FromSsize_t
14 /* Defines for more efficiently reallocating the string buffer */
15 #define INITIAL_SIZE_INCREMENT 100
16 #define SIZE_MULTIPLIER 2
17 #define MAX_SIZE_INCREMENT 3200
20 /************************************************************************/
21 /*********** Global data structures and forward declarations *********/
22 /************************************************************************/
25 A SubString consists of the characters between two string or
38 } AutoNumberState
; /* Keep track if we're auto-numbering fields */
40 /* Keeps track of our auto-numbering state, and which number field we're on */
42 AutoNumberState an_state
;
47 /* forward declaration for recursion */
49 build_string(SubString
*input
, PyObject
*args
, PyObject
*kwargs
,
50 int recursion_depth
, AutoNumber
*auto_number
);
54 /************************************************************************/
55 /************************** Utility functions ************************/
56 /************************************************************************/
59 AutoNumber_Init(AutoNumber
*auto_number
)
61 auto_number
->an_state
= ANS_INIT
;
62 auto_number
->an_field_number
= 0;
65 /* fill in a SubString from a pointer and length */
67 SubString_init(SubString
*str
, STRINGLIB_CHAR
*p
, Py_ssize_t len
)
73 str
->end
= str
->ptr
+ len
;
76 /* return a new string. if str->ptr is NULL, return None */
77 Py_LOCAL_INLINE(PyObject
*)
78 SubString_new_object(SubString
*str
)
80 if (str
->ptr
== NULL
) {
84 return STRINGLIB_NEW(str
->ptr
, str
->end
- str
->ptr
);
87 /* return a new string. if str->ptr is NULL, return None */
88 Py_LOCAL_INLINE(PyObject
*)
89 SubString_new_object_or_empty(SubString
*str
)
91 if (str
->ptr
== NULL
) {
92 return STRINGLIB_NEW(NULL
, 0);
94 return STRINGLIB_NEW(str
->ptr
, str
->end
- str
->ptr
);
97 /* Return 1 if an error has been detected switching between automatic
98 field numbering and manual field specification, else return 0. Set
99 ValueError on error. */
101 autonumber_state_error(AutoNumberState state
, int field_name_is_empty
)
103 if (state
== ANS_MANUAL
) {
104 if (field_name_is_empty
) {
105 PyErr_SetString(PyExc_ValueError
, "cannot switch from "
106 "manual field specification to "
107 "automatic field numbering");
112 if (!field_name_is_empty
) {
113 PyErr_SetString(PyExc_ValueError
, "cannot switch from "
114 "automatic field numbering to "
115 "manual field specification");
123 /************************************************************************/
124 /*********** Output string management functions ****************/
125 /************************************************************************/
131 Py_ssize_t size_increment
;
134 /* initialize an OutputString object, reserving size characters */
136 output_initialize(OutputString
*output
, Py_ssize_t size
)
138 output
->obj
= STRINGLIB_NEW(NULL
, size
);
139 if (output
->obj
== NULL
)
142 output
->ptr
= STRINGLIB_STR(output
->obj
);
143 output
->end
= STRINGLIB_LEN(output
->obj
) + output
->ptr
;
144 output
->size_increment
= INITIAL_SIZE_INCREMENT
;
150 output_extend reallocates the output string buffer.
151 It returns a status: 0 for a failed reallocation,
156 output_extend(OutputString
*output
, Py_ssize_t count
)
158 STRINGLIB_CHAR
*startptr
= STRINGLIB_STR(output
->obj
);
159 Py_ssize_t curlen
= output
->ptr
- startptr
;
160 Py_ssize_t maxlen
= curlen
+ count
+ output
->size_increment
;
162 if (STRINGLIB_RESIZE(&output
->obj
, maxlen
) < 0)
164 startptr
= STRINGLIB_STR(output
->obj
);
165 output
->ptr
= startptr
+ curlen
;
166 output
->end
= startptr
+ maxlen
;
167 if (output
->size_increment
< MAX_SIZE_INCREMENT
)
168 output
->size_increment
*= SIZE_MULTIPLIER
;
173 output_data dumps characters into our output string
176 In some cases, it has to reallocate the string.
178 It returns a status: 0 for a failed reallocation,
182 output_data(OutputString
*output
, const STRINGLIB_CHAR
*s
, Py_ssize_t count
)
184 if ((count
> output
->end
- output
->ptr
) && !output_extend(output
, count
))
186 memcpy(output
->ptr
, s
, count
* sizeof(STRINGLIB_CHAR
));
187 output
->ptr
+= count
;
191 /************************************************************************/
192 /*********** Format string parsing -- integers and identifiers *********/
193 /************************************************************************/
196 get_integer(const SubString
*str
)
198 Py_ssize_t accumulator
= 0;
200 Py_ssize_t oldaccumulator
;
203 /* empty string is an error */
204 if (str
->ptr
>= str
->end
)
207 for (p
= str
->ptr
; p
< str
->end
; p
++) {
208 digitval
= STRINGLIB_TODECIMAL(*p
);
212 This trick was copied from old Unicode format code. It's cute,
213 but would really suck on an old machine with a slow divide
214 implementation. Fortunately, in the normal case we do not
215 expect too many digits.
217 oldaccumulator
= accumulator
;
219 if ((accumulator
+10)/10 != oldaccumulator
+1) {
220 PyErr_Format(PyExc_ValueError
,
221 "Too many decimal digits in format string");
224 accumulator
+= digitval
;
229 /************************************************************************/
230 /******** Functions to get field objects and specification strings ******/
231 /************************************************************************/
233 /* do the equivalent of obj.name */
235 getattr(PyObject
*obj
, SubString
*name
)
238 PyObject
*str
= SubString_new_object(name
);
241 newobj
= PyObject_GetAttr(obj
, str
);
246 /* do the equivalent of obj[idx], where obj is a sequence */
248 getitem_sequence(PyObject
*obj
, Py_ssize_t idx
)
250 return PySequence_GetItem(obj
, idx
);
253 /* do the equivalent of obj[idx], where obj is not a sequence */
255 getitem_idx(PyObject
*obj
, Py_ssize_t idx
)
258 PyObject
*idx_obj
= PyLong_FromSsize_t(idx
);
261 newobj
= PyObject_GetItem(obj
, idx_obj
);
266 /* do the equivalent of obj[name] */
268 getitem_str(PyObject
*obj
, SubString
*name
)
271 PyObject
*str
= SubString_new_object(name
);
274 newobj
= PyObject_GetItem(obj
, str
);
280 /* the entire string we're parsing. we assume that someone else
281 is managing its lifetime, and that it will exist for the
282 lifetime of the iterator. can be empty */
285 /* pointer to where we are inside field_name */
291 FieldNameIterator_init(FieldNameIterator
*self
, STRINGLIB_CHAR
*ptr
,
294 SubString_init(&self
->str
, ptr
, len
);
295 self
->ptr
= self
->str
.ptr
;
300 _FieldNameIterator_attr(FieldNameIterator
*self
, SubString
*name
)
304 name
->ptr
= self
->ptr
;
306 /* return everything until '.' or '[' */
307 while (self
->ptr
< self
->str
.end
) {
308 switch (c
= *self
->ptr
++) {
311 /* backup so that we this character will be seen next time */
319 /* end of string is okay */
320 name
->end
= self
->ptr
;
325 _FieldNameIterator_item(FieldNameIterator
*self
, SubString
*name
)
327 int bracket_seen
= 0;
330 name
->ptr
= self
->ptr
;
332 /* return everything until ']' */
333 while (self
->ptr
< self
->str
.end
) {
334 switch (c
= *self
->ptr
++) {
343 /* make sure we ended with a ']' */
345 PyErr_SetString(PyExc_ValueError
, "Missing ']' in format string");
349 /* end of string is okay */
350 /* don't include the ']' */
351 name
->end
= self
->ptr
-1;
355 /* returns 0 on error, 1 on non-error termination, and 2 if it returns a value */
357 FieldNameIterator_next(FieldNameIterator
*self
, int *is_attribute
,
358 Py_ssize_t
*name_idx
, SubString
*name
)
360 /* check at end of input */
361 if (self
->ptr
>= self
->str
.end
)
364 switch (*self
->ptr
++) {
367 if (_FieldNameIterator_attr(self
, name
) == 0)
373 if (_FieldNameIterator_item(self
, name
) == 0)
375 *name_idx
= get_integer(name
);
376 if (*name_idx
== -1 && PyErr_Occurred())
380 /* Invalid character follows ']' */
381 PyErr_SetString(PyExc_ValueError
, "Only '.' or '[' may "
382 "follow ']' in format field specifier");
386 /* empty string is an error */
387 if (name
->ptr
== name
->end
) {
388 PyErr_SetString(PyExc_ValueError
, "Empty attribute in format string");
397 output: 'first' points to the part before the first '[' or '.'
398 'first_idx' is -1 if 'first' is not an integer, otherwise
399 it's the value of first converted to an integer
400 'rest' is an iterator to return the rest
403 field_name_split(STRINGLIB_CHAR
*ptr
, Py_ssize_t len
, SubString
*first
,
404 Py_ssize_t
*first_idx
, FieldNameIterator
*rest
,
405 AutoNumber
*auto_number
)
408 STRINGLIB_CHAR
*p
= ptr
;
409 STRINGLIB_CHAR
*end
= ptr
+ len
;
410 int field_name_is_empty
;
411 int using_numeric_index
;
413 /* find the part up until the first '.' or '[' */
418 /* backup so that we this character is available to the
428 /* set up the return values */
429 SubString_init(first
, ptr
, p
- ptr
);
430 FieldNameIterator_init(rest
, p
, end
- p
);
432 /* see if "first" is an integer, in which case it's used as an index */
433 *first_idx
= get_integer(first
);
434 if (*first_idx
== -1 && PyErr_Occurred())
437 field_name_is_empty
= first
->ptr
>= first
->end
;
439 /* If the field name is omitted or if we have a numeric index
440 specified, then we're doing numeric indexing into args. */
441 using_numeric_index
= field_name_is_empty
|| *first_idx
!= -1;
443 /* We always get here exactly one time for each field we're
444 processing. And we get here in field order (counting by left
445 braces). So this is the perfect place to handle automatic field
446 numbering if the field name is omitted. */
448 /* Check if we need to do the auto-numbering. It's not needed if
449 we're called from string.Format routines, because it's handled
450 in that class by itself. */
452 /* Initialize our auto numbering state if this is the first
453 time we're either auto-numbering or manually numbering. */
454 if (auto_number
->an_state
== ANS_INIT
&& using_numeric_index
)
455 auto_number
->an_state
= field_name_is_empty
?
456 ANS_AUTO
: ANS_MANUAL
;
458 /* Make sure our state is consistent with what we're doing
459 this time through. Only check if we're using a numeric
461 if (using_numeric_index
)
462 if (autonumber_state_error(auto_number
->an_state
,
463 field_name_is_empty
))
465 /* Zero length field means we want to do auto-numbering of the
467 if (field_name_is_empty
)
468 *first_idx
= (auto_number
->an_field_number
)++;
476 get_field_object returns the object inside {}, before the
477 format_spec. It handles getindex and getattr lookups and consumes
478 the entire input string.
481 get_field_object(SubString
*input
, PyObject
*args
, PyObject
*kwargs
,
482 AutoNumber
*auto_number
)
484 PyObject
*obj
= NULL
;
490 FieldNameIterator rest
;
492 if (!field_name_split(input
->ptr
, input
->end
- input
->ptr
, &first
,
493 &index
, &rest
, auto_number
)) {
498 /* look up in kwargs */
499 PyObject
*key
= SubString_new_object(&first
);
502 if ((kwargs
== NULL
) || (obj
= PyDict_GetItem(kwargs
, key
)) == NULL
) {
503 PyErr_SetObject(PyExc_KeyError
, key
);
511 /* look up in args */
512 obj
= PySequence_GetItem(args
, index
);
517 /* iterate over the rest of the field_name */
518 while ((ok
= FieldNameIterator_next(&rest
, &is_attribute
, &index
,
523 /* getattr lookup "." */
524 tmp
= getattr(obj
, &name
);
526 /* getitem lookup "[]" */
528 tmp
= getitem_str(obj
, &name
);
530 if (PySequence_Check(obj
))
531 tmp
= getitem_sequence(obj
, index
);
534 tmp
= getitem_idx(obj
, index
);
542 /* end of iterator, this is the non-error case */
550 /************************************************************************/
551 /***************** Field rendering functions **************************/
552 /************************************************************************/
555 render_field() is the main function in this section. It takes the
556 field object and field specification string generated by
557 get_field_and_spec, and renders the field into the output string.
559 render_field calls fieldobj.__format__(format_spec) method, and
560 appends to the output.
563 render_field(PyObject
*fieldobj
, SubString
*format_spec
, OutputString
*output
)
566 PyObject
*result
= NULL
;
567 PyObject
*format_spec_object
= NULL
;
568 PyObject
*(*formatter
)(PyObject
*, STRINGLIB_CHAR
*, Py_ssize_t
) = NULL
;
569 STRINGLIB_CHAR
* format_spec_start
= format_spec
->ptr
?
570 format_spec
->ptr
: NULL
;
571 Py_ssize_t format_spec_len
= format_spec
->ptr
?
572 format_spec
->end
- format_spec
->ptr
: 0;
574 /* If we know the type exactly, skip the lookup of __format__ and just
575 call the formatter directly. */
576 #if STRINGLIB_IS_UNICODE
577 if (PyUnicode_CheckExact(fieldobj
))
578 formatter
= _PyUnicode_FormatAdvanced
;
579 /* Unfortunately, there's a problem with checking for int, long,
580 and float here. If we're being included as unicode, their
581 formatters expect string format_spec args. For now, just skip
582 this optimization for unicode. This could be fixed, but it's a
585 if (PyString_CheckExact(fieldobj
))
586 formatter
= _PyBytes_FormatAdvanced
;
587 else if (PyInt_CheckExact(fieldobj
))
588 formatter
=_PyInt_FormatAdvanced
;
589 else if (PyLong_CheckExact(fieldobj
))
590 formatter
=_PyLong_FormatAdvanced
;
591 else if (PyFloat_CheckExact(fieldobj
))
592 formatter
= _PyFloat_FormatAdvanced
;
596 /* we know exactly which formatter will be called when __format__ is
597 looked up, so call it directly, instead. */
598 result
= formatter(fieldobj
, format_spec_start
, format_spec_len
);
601 /* We need to create an object out of the pointers we have, because
602 __format__ takes a string/unicode object for format_spec. */
603 format_spec_object
= STRINGLIB_NEW(format_spec_start
,
605 if (format_spec_object
== NULL
)
608 result
= PyObject_Format(fieldobj
, format_spec_object
);
613 #if PY_VERSION_HEX >= 0x03000000
614 assert(PyUnicode_Check(result
));
616 assert(PyString_Check(result
) || PyUnicode_Check(result
));
618 /* Convert result to our type. We could be str, and result could
621 PyObject
*tmp
= STRINGLIB_TOSTR(result
);
629 ok
= output_data(output
,
630 STRINGLIB_STR(result
), STRINGLIB_LEN(result
));
632 Py_XDECREF(format_spec_object
);
638 parse_field(SubString
*str
, SubString
*field_name
, SubString
*format_spec
,
639 STRINGLIB_CHAR
*conversion
)
641 /* Note this function works if the field name is zero length,
642 which is good. Zero length field names are handled later, in
645 STRINGLIB_CHAR c
= 0;
647 /* initialize these, as they may be empty */
649 SubString_init(format_spec
, NULL
, 0);
651 /* Search for the field name. it's terminated by the end of
652 the string, or a ':' or '!' */
653 field_name
->ptr
= str
->ptr
;
654 while (str
->ptr
< str
->end
) {
655 switch (c
= *(str
->ptr
++)) {
665 if (c
== '!' || c
== ':') {
666 /* we have a format specifier and/or a conversion */
667 /* don't include the last character */
668 field_name
->end
= str
->ptr
-1;
670 /* the format specifier is the rest of the string */
671 format_spec
->ptr
= str
->ptr
;
672 format_spec
->end
= str
->end
;
674 /* see if there's a conversion specifier */
676 /* there must be another character present */
677 if (format_spec
->ptr
>= format_spec
->end
) {
678 PyErr_SetString(PyExc_ValueError
,
679 "end of format while looking for conversion "
683 *conversion
= *(format_spec
->ptr
++);
685 /* if there is another character, it must be a colon */
686 if (format_spec
->ptr
< format_spec
->end
) {
687 c
= *(format_spec
->ptr
++);
689 PyErr_SetString(PyExc_ValueError
,
690 "expected ':' after format specifier");
697 /* end of string, there's no format_spec or conversion */
698 field_name
->end
= str
->ptr
;
703 /************************************************************************/
704 /******* Output string allocation and escape-to-markup processing ******/
705 /************************************************************************/
707 /* MarkupIterator breaks the string into pieces of either literal
708 text, or things inside {} that need to be marked up. it is
709 designed to make it easy to wrap a Python iterator around it, for
710 use with the Formatter class */
717 MarkupIterator_init(MarkupIterator
*self
, STRINGLIB_CHAR
*ptr
, Py_ssize_t len
)
719 SubString_init(&self
->str
, ptr
, len
);
723 /* returns 0 on error, 1 on non-error termination, and 2 if it got a
724 string (or something to be expanded) */
726 MarkupIterator_next(MarkupIterator
*self
, SubString
*literal
,
727 int *field_present
, SubString
*field_name
,
728 SubString
*format_spec
, STRINGLIB_CHAR
*conversion
,
729 int *format_spec_needs_expanding
)
732 STRINGLIB_CHAR c
= 0;
733 STRINGLIB_CHAR
*start
;
736 int markup_follows
= 0;
738 /* initialize all of the output variables */
739 SubString_init(literal
, NULL
, 0);
740 SubString_init(field_name
, NULL
, 0);
741 SubString_init(format_spec
, NULL
, 0);
743 *format_spec_needs_expanding
= 0;
746 /* No more input, end of iterator. This is the normal exit
748 if (self
->str
.ptr
>= self
->str
.end
)
751 start
= self
->str
.ptr
;
753 /* First read any literal text. Read until the end of string, an
754 escaped '{' or '}', or an unescaped '{'. In order to never
755 allocate memory and so I can just pass pointers around, if
756 there's an escaped '{' or '}' then we'll return the literal
757 including the brace, but no format object. The next time
758 through, we'll return the rest of the literal, skipping past
759 the second consecutive brace. */
760 while (self
->str
.ptr
< self
->str
.end
) {
761 switch (c
= *(self
->str
.ptr
++)) {
772 at_end
= self
->str
.ptr
>= self
->str
.end
;
773 len
= self
->str
.ptr
- start
;
775 if ((c
== '}') && (at_end
|| (c
!= *self
->str
.ptr
))) {
776 PyErr_SetString(PyExc_ValueError
, "Single '}' encountered "
780 if (at_end
&& c
== '{') {
781 PyErr_SetString(PyExc_ValueError
, "Single '{' encountered "
786 if (c
== *self
->str
.ptr
) {
787 /* escaped } or {, skip it in the input. there is no
788 markup object following us, just this literal text */
796 /* record the literal text */
797 literal
->ptr
= start
;
798 literal
->end
= start
+ len
;
803 /* this is markup, find the end of the string by counting nested
804 braces. note that this prohibits escaped braces, so that
805 format_specs cannot have braces in them. */
809 start
= self
->str
.ptr
;
811 /* we know we can't have a zero length string, so don't worry
813 while (self
->str
.ptr
< self
->str
.end
) {
814 switch (c
= *(self
->str
.ptr
++)) {
816 /* the format spec needs to be recursively expanded.
817 this is an optimization, and not strictly needed */
818 *format_spec_needs_expanding
= 1;
824 /* we're done. parse and get out */
827 SubString_init(&s
, start
, self
->str
.ptr
- 1 - start
);
828 if (parse_field(&s
, field_name
, format_spec
, conversion
) == 0)
838 /* end of string while searching for matching '}' */
839 PyErr_SetString(PyExc_ValueError
, "unmatched '{' in format");
844 /* do the !r or !s conversion on obj */
846 do_conversion(PyObject
*obj
, STRINGLIB_CHAR conversion
)
848 /* XXX in pre-3.0, do we need to convert this to unicode, since it
849 might have returned a string? */
850 switch (conversion
) {
852 return PyObject_Repr(obj
);
854 return STRINGLIB_TOSTR(obj
);
856 if (conversion
> 32 && conversion
< 127) {
857 /* It's the ASCII subrange; casting to char is safe
858 (assuming the execution character set is an ASCII
860 PyErr_Format(PyExc_ValueError
,
861 "Unknown conversion specifier %c",
864 PyErr_Format(PyExc_ValueError
,
865 "Unknown conversion specifier \\x%x",
866 (unsigned int)conversion
);
873 {field_name!conversion:format_spec}
875 compute the result and write it to output.
876 format_spec_needs_expanding is an optimization. if it's false,
877 just output the string directly, otherwise recursively expand the
880 field_name is allowed to be zero length, in which case we
881 are doing auto field numbering.
885 output_markup(SubString
*field_name
, SubString
*format_spec
,
886 int format_spec_needs_expanding
, STRINGLIB_CHAR conversion
,
887 OutputString
*output
, PyObject
*args
, PyObject
*kwargs
,
888 int recursion_depth
, AutoNumber
*auto_number
)
890 PyObject
*tmp
= NULL
;
891 PyObject
*fieldobj
= NULL
;
892 SubString expanded_format_spec
;
893 SubString
*actual_format_spec
;
896 /* convert field_name to an object */
897 fieldobj
= get_field_object(field_name
, args
, kwargs
, auto_number
);
898 if (fieldobj
== NULL
)
901 if (conversion
!= '\0') {
902 tmp
= do_conversion(fieldobj
, conversion
);
906 /* do the assignment, transferring ownership: fieldobj = tmp */
912 /* if needed, recurively compute the format_spec */
913 if (format_spec_needs_expanding
) {
914 tmp
= build_string(format_spec
, args
, kwargs
, recursion_depth
-1,
919 /* note that in the case we're expanding the format string,
920 tmp must be kept around until after the call to
922 SubString_init(&expanded_format_spec
,
923 STRINGLIB_STR(tmp
), STRINGLIB_LEN(tmp
));
924 actual_format_spec
= &expanded_format_spec
;
927 actual_format_spec
= format_spec
;
929 if (render_field(fieldobj
, actual_format_spec
, output
) == 0)
935 Py_XDECREF(fieldobj
);
942 do_markup is the top-level loop for the format() method. It
943 searches through the format string for escapes to markup codes, and
944 calls other functions to move non-markup text to the output,
945 and to perform the markup to the output.
948 do_markup(SubString
*input
, PyObject
*args
, PyObject
*kwargs
,
949 OutputString
*output
, int recursion_depth
, AutoNumber
*auto_number
)
952 int format_spec_needs_expanding
;
956 SubString field_name
;
957 SubString format_spec
;
958 STRINGLIB_CHAR conversion
;
960 MarkupIterator_init(&iter
, input
->ptr
, input
->end
- input
->ptr
);
961 while ((result
= MarkupIterator_next(&iter
, &literal
, &field_present
,
962 &field_name
, &format_spec
,
964 &format_spec_needs_expanding
)) == 2) {
965 if (!output_data(output
, literal
.ptr
, literal
.end
- literal
.ptr
))
968 if (!output_markup(&field_name
, &format_spec
,
969 format_spec_needs_expanding
, conversion
, output
,
970 args
, kwargs
, recursion_depth
, auto_number
))
978 build_string allocates the output string and then
979 calls do_markup to do the heavy lifting.
982 build_string(SubString
*input
, PyObject
*args
, PyObject
*kwargs
,
983 int recursion_depth
, AutoNumber
*auto_number
)
986 PyObject
*result
= NULL
;
989 output
.obj
= NULL
; /* needed so cleanup code always works */
991 /* check the recursion level */
992 if (recursion_depth
<= 0) {
993 PyErr_SetString(PyExc_ValueError
,
994 "Max string recursion exceeded");
998 /* initial size is the length of the format string, plus the size
999 increment. seems like a reasonable default */
1000 if (!output_initialize(&output
,
1001 input
->end
- input
->ptr
+
1002 INITIAL_SIZE_INCREMENT
))
1005 if (!do_markup(input
, args
, kwargs
, &output
, recursion_depth
,
1010 count
= output
.ptr
- STRINGLIB_STR(output
.obj
);
1011 if (STRINGLIB_RESIZE(&output
.obj
, count
) < 0) {
1015 /* transfer ownership to result */
1016 result
= output
.obj
;
1020 Py_XDECREF(output
.obj
);
1024 /************************************************************************/
1025 /*********** main routine ***********************************************/
1026 /************************************************************************/
1028 /* this is the main entry point */
1030 do_string_format(PyObject
*self
, PyObject
*args
, PyObject
*kwargs
)
1034 /* PEP 3101 says only 2 levels, so that
1035 "{0:{1}}".format('abc', 's') # works
1036 "{0:{1:{2}}}".format('abc', 's', '') # fails
1038 int recursion_depth
= 2;
1040 AutoNumber auto_number
;
1042 AutoNumber_Init(&auto_number
);
1043 SubString_init(&input
, STRINGLIB_STR(self
), STRINGLIB_LEN(self
));
1044 return build_string(&input
, args
, kwargs
, recursion_depth
, &auto_number
);
1049 /************************************************************************/
1050 /*********** formatteriterator ******************************************/
1051 /************************************************************************/
1053 /* This is used to implement string.Formatter.vparse(). It exists so
1054 Formatter can share code with the built in unicode.format() method.
1055 It's really just a wrapper around MarkupIterator that is callable
1061 STRINGLIB_OBJECT
*str
;
1063 MarkupIterator it_markup
;
1064 } formatteriterobject
;
1067 formatteriter_dealloc(formatteriterobject
*it
)
1069 Py_XDECREF(it
->str
);
1074 (literal, field_name, format_spec, conversion)
1076 literal is any literal text to output. might be zero length
1077 field_name is the string before the ':'. might be None
1078 format_spec is the string after the ':'. mibht be None
1079 conversion is either None, or the string after the '!'
1082 formatteriter_next(formatteriterobject
*it
)
1085 SubString field_name
;
1086 SubString format_spec
;
1087 STRINGLIB_CHAR conversion
;
1088 int format_spec_needs_expanding
;
1090 int result
= MarkupIterator_next(&it
->it_markup
, &literal
, &field_present
,
1091 &field_name
, &format_spec
, &conversion
,
1092 &format_spec_needs_expanding
);
1094 /* all of the SubString objects point into it->str, so no
1095 memory management needs to be done on them */
1096 assert(0 <= result
&& result
<= 2);
1097 if (result
== 0 || result
== 1)
1098 /* if 0, error has already been set, if 1, iterator is empty */
1101 PyObject
*literal_str
= NULL
;
1102 PyObject
*field_name_str
= NULL
;
1103 PyObject
*format_spec_str
= NULL
;
1104 PyObject
*conversion_str
= NULL
;
1105 PyObject
*tuple
= NULL
;
1107 literal_str
= SubString_new_object(&literal
);
1108 if (literal_str
== NULL
)
1111 field_name_str
= SubString_new_object(&field_name
);
1112 if (field_name_str
== NULL
)
1115 /* if field_name is non-zero length, return a string for
1116 format_spec (even if zero length), else return None */
1117 format_spec_str
= (field_present
?
1118 SubString_new_object_or_empty
:
1119 SubString_new_object
)(&format_spec
);
1120 if (format_spec_str
== NULL
)
1123 /* if the conversion is not specified, return a None,
1124 otherwise create a one length string with the conversion
1126 if (conversion
== '\0') {
1127 conversion_str
= Py_None
;
1128 Py_INCREF(conversion_str
);
1131 conversion_str
= STRINGLIB_NEW(&conversion
, 1);
1132 if (conversion_str
== NULL
)
1135 tuple
= PyTuple_Pack(4, literal_str
, field_name_str
, format_spec_str
,
1138 Py_XDECREF(literal_str
);
1139 Py_XDECREF(field_name_str
);
1140 Py_XDECREF(format_spec_str
);
1141 Py_XDECREF(conversion_str
);
1146 static PyMethodDef formatteriter_methods
[] = {
1147 {NULL
, NULL
} /* sentinel */
1150 static PyTypeObject PyFormatterIter_Type
= {
1151 PyVarObject_HEAD_INIT(&PyType_Type
, 0)
1152 "formatteriterator", /* tp_name */
1153 sizeof(formatteriterobject
), /* tp_basicsize */
1154 0, /* tp_itemsize */
1156 (destructor
)formatteriter_dealloc
, /* tp_dealloc */
1162 0, /* tp_as_number */
1163 0, /* tp_as_sequence */
1164 0, /* tp_as_mapping */
1168 PyObject_GenericGetAttr
, /* tp_getattro */
1169 0, /* tp_setattro */
1170 0, /* tp_as_buffer */
1171 Py_TPFLAGS_DEFAULT
, /* tp_flags */
1173 0, /* tp_traverse */
1175 0, /* tp_richcompare */
1176 0, /* tp_weaklistoffset */
1177 PyObject_SelfIter
, /* tp_iter */
1178 (iternextfunc
)formatteriter_next
, /* tp_iternext */
1179 formatteriter_methods
, /* tp_methods */
1183 /* unicode_formatter_parser is used to implement
1184 string.Formatter.vformat. it parses a string and returns tuples
1185 describing the parsed elements. It's a wrapper around
1186 stringlib/string_format.h's MarkupIterator */
1188 formatter_parser(STRINGLIB_OBJECT
*self
)
1190 formatteriterobject
*it
;
1192 it
= PyObject_New(formatteriterobject
, &PyFormatterIter_Type
);
1196 /* take ownership, give the object to the iterator */
1200 /* initialize the contained MarkupIterator */
1201 MarkupIterator_init(&it
->it_markup
,
1202 STRINGLIB_STR(self
),
1203 STRINGLIB_LEN(self
));
1205 return (PyObject
*)it
;
1209 /************************************************************************/
1210 /*********** fieldnameiterator ******************************************/
1211 /************************************************************************/
1214 /* This is used to implement string.Formatter.vparse(). It parses the
1215 field name into attribute and item values. It's a Python-callable
1216 wrapper around FieldNameIterator */
1221 STRINGLIB_OBJECT
*str
;
1223 FieldNameIterator it_field
;
1224 } fieldnameiterobject
;
1227 fieldnameiter_dealloc(fieldnameiterobject
*it
)
1229 Py_XDECREF(it
->str
);
1235 is_attr is true if we used attribute syntax (e.g., '.foo')
1236 false if we used index syntax (e.g., '[foo]')
1237 value is an integer or string
1240 fieldnameiter_next(fieldnameiterobject
*it
)
1247 result
= FieldNameIterator_next(&it
->it_field
, &is_attr
,
1249 if (result
== 0 || result
== 1)
1250 /* if 0, error has already been set, if 1, iterator is empty */
1253 PyObject
* result
= NULL
;
1254 PyObject
* is_attr_obj
= NULL
;
1255 PyObject
* obj
= NULL
;
1257 is_attr_obj
= PyBool_FromLong(is_attr
);
1258 if (is_attr_obj
== NULL
)
1261 /* either an integer or a string */
1263 obj
= PyLong_FromSsize_t(idx
);
1265 obj
= SubString_new_object(&name
);
1269 /* return a tuple of values */
1270 result
= PyTuple_Pack(2, is_attr_obj
, obj
);
1273 Py_XDECREF(is_attr_obj
);
1279 static PyMethodDef fieldnameiter_methods
[] = {
1280 {NULL
, NULL
} /* sentinel */
1283 static PyTypeObject PyFieldNameIter_Type
= {
1284 PyVarObject_HEAD_INIT(&PyType_Type
, 0)
1285 "fieldnameiterator", /* tp_name */
1286 sizeof(fieldnameiterobject
), /* tp_basicsize */
1287 0, /* tp_itemsize */
1289 (destructor
)fieldnameiter_dealloc
, /* tp_dealloc */
1295 0, /* tp_as_number */
1296 0, /* tp_as_sequence */
1297 0, /* tp_as_mapping */
1301 PyObject_GenericGetAttr
, /* tp_getattro */
1302 0, /* tp_setattro */
1303 0, /* tp_as_buffer */
1304 Py_TPFLAGS_DEFAULT
, /* tp_flags */
1306 0, /* tp_traverse */
1308 0, /* tp_richcompare */
1309 0, /* tp_weaklistoffset */
1310 PyObject_SelfIter
, /* tp_iter */
1311 (iternextfunc
)fieldnameiter_next
, /* tp_iternext */
1312 fieldnameiter_methods
, /* tp_methods */
1315 /* unicode_formatter_field_name_split is used to implement
1316 string.Formatter.vformat. it takes an PEP 3101 "field name", and
1317 returns a tuple of (first, rest): "first", the part before the
1318 first '.' or '['; and "rest", an iterator for the rest of the field
1319 name. it's a wrapper around stringlib/string_format.h's
1320 field_name_split. The iterator it returns is a
1321 FieldNameIterator */
1323 formatter_field_name_split(STRINGLIB_OBJECT
*self
)
1326 Py_ssize_t first_idx
;
1327 fieldnameiterobject
*it
;
1329 PyObject
*first_obj
= NULL
;
1330 PyObject
*result
= NULL
;
1332 it
= PyObject_New(fieldnameiterobject
, &PyFieldNameIter_Type
);
1336 /* take ownership, give the object to the iterator. this is
1337 just to keep the field_name alive */
1341 /* Pass in auto_number = NULL. We'll return an empty string for
1342 first_obj in that case. */
1343 if (!field_name_split(STRINGLIB_STR(self
),
1344 STRINGLIB_LEN(self
),
1345 &first
, &first_idx
, &it
->it_field
, NULL
))
1348 /* first becomes an integer, if possible; else a string */
1349 if (first_idx
!= -1)
1350 first_obj
= PyLong_FromSsize_t(first_idx
);
1352 /* convert "first" into a string object */
1353 first_obj
= SubString_new_object(&first
);
1354 if (first_obj
== NULL
)
1357 /* return a tuple of values */
1358 result
= PyTuple_Pack(2, first_obj
, it
);
1362 Py_XDECREF(first_obj
);