2 string_format.h -- implementation of string.format().
4 It uses the Objects/stringlib conventions, so that it can be
5 compiled for both unicode and string objects.
9 /* Defines for Python 2.6 compatibility */
10 #if PY_VERSION_HEX < 0x03000000
11 #define PyLong_FromSsize_t _PyLong_FromSsize_t
14 /* Defines for more efficiently reallocating the string buffer */
15 #define INITIAL_SIZE_INCREMENT 100
16 #define SIZE_MULTIPLIER 2
17 #define MAX_SIZE_INCREMENT 3200
20 /************************************************************************/
21 /*********** Global data structures and forward declarations *********/
22 /************************************************************************/
25 A SubString consists of the characters between two string or
38 } AutoNumberState
; /* Keep track if we're auto-numbering fields */
40 /* Keeps track of our auto-numbering state, and which number field we're on */
42 AutoNumberState an_state
;
47 /* forward declaration for recursion */
49 build_string(SubString
*input
, PyObject
*args
, PyObject
*kwargs
,
50 int recursion_depth
, AutoNumber
*auto_number
);
54 /************************************************************************/
55 /************************** Utility functions ************************/
56 /************************************************************************/
59 AutoNumber_Init(AutoNumber
*auto_number
)
61 auto_number
->an_state
= ANS_INIT
;
62 auto_number
->an_field_number
= 0;
65 /* fill in a SubString from a pointer and length */
67 SubString_init(SubString
*str
, STRINGLIB_CHAR
*p
, Py_ssize_t len
)
73 str
->end
= str
->ptr
+ len
;
76 /* return a new string. if str->ptr is NULL, return None */
77 Py_LOCAL_INLINE(PyObject
*)
78 SubString_new_object(SubString
*str
)
80 if (str
->ptr
== NULL
) {
84 return STRINGLIB_NEW(str
->ptr
, str
->end
- str
->ptr
);
87 /* return a new string. if str->ptr is NULL, return None */
88 Py_LOCAL_INLINE(PyObject
*)
89 SubString_new_object_or_empty(SubString
*str
)
91 if (str
->ptr
== NULL
) {
92 return STRINGLIB_NEW(NULL
, 0);
94 return STRINGLIB_NEW(str
->ptr
, str
->end
- str
->ptr
);
97 /* Return 1 if an error has been detected switching between automatic
98 field numbering and manual field specification, else return 0. Set
99 ValueError on error. */
101 autonumber_state_error(AutoNumberState state
, int field_name_is_empty
)
103 if (state
== ANS_MANUAL
) {
104 if (field_name_is_empty
) {
105 PyErr_SetString(PyExc_ValueError
, "cannot switch from "
106 "manual field specification to "
107 "automatic field numbering");
112 if (!field_name_is_empty
) {
113 PyErr_SetString(PyExc_ValueError
, "cannot switch from "
114 "automatic field numbering to "
115 "manual field specification");
123 /************************************************************************/
124 /*********** Output string management functions ****************/
125 /************************************************************************/
131 Py_ssize_t size_increment
;
134 /* initialize an OutputString object, reserving size characters */
136 output_initialize(OutputString
*output
, Py_ssize_t size
)
138 output
->obj
= STRINGLIB_NEW(NULL
, size
);
139 if (output
->obj
== NULL
)
142 output
->ptr
= STRINGLIB_STR(output
->obj
);
143 output
->end
= STRINGLIB_LEN(output
->obj
) + output
->ptr
;
144 output
->size_increment
= INITIAL_SIZE_INCREMENT
;
150 output_extend reallocates the output string buffer.
151 It returns a status: 0 for a failed reallocation,
156 output_extend(OutputString
*output
, Py_ssize_t count
)
158 STRINGLIB_CHAR
*startptr
= STRINGLIB_STR(output
->obj
);
159 Py_ssize_t curlen
= output
->ptr
- startptr
;
160 Py_ssize_t maxlen
= curlen
+ count
+ output
->size_increment
;
162 if (STRINGLIB_RESIZE(&output
->obj
, maxlen
) < 0)
164 startptr
= STRINGLIB_STR(output
->obj
);
165 output
->ptr
= startptr
+ curlen
;
166 output
->end
= startptr
+ maxlen
;
167 if (output
->size_increment
< MAX_SIZE_INCREMENT
)
168 output
->size_increment
*= SIZE_MULTIPLIER
;
173 output_data dumps characters into our output string
176 In some cases, it has to reallocate the string.
178 It returns a status: 0 for a failed reallocation,
182 output_data(OutputString
*output
, const STRINGLIB_CHAR
*s
, Py_ssize_t count
)
184 if ((count
> output
->end
- output
->ptr
) && !output_extend(output
, count
))
186 memcpy(output
->ptr
, s
, count
* sizeof(STRINGLIB_CHAR
));
187 output
->ptr
+= count
;
191 /************************************************************************/
192 /*********** Format string parsing -- integers and identifiers *********/
193 /************************************************************************/
196 get_integer(const SubString
*str
)
198 Py_ssize_t accumulator
= 0;
202 /* empty string is an error */
203 if (str
->ptr
>= str
->end
)
206 for (p
= str
->ptr
; p
< str
->end
; p
++) {
207 digitval
= STRINGLIB_TODECIMAL(*p
);
211 Detect possible overflow before it happens:
213 accumulator * 10 + digitval > PY_SSIZE_T_MAX if and only if
214 accumulator > (PY_SSIZE_T_MAX - digitval) / 10.
216 if (accumulator
> (PY_SSIZE_T_MAX
- digitval
) / 10) {
217 PyErr_Format(PyExc_ValueError
,
218 "Too many decimal digits in format string");
221 accumulator
= accumulator
* 10 + digitval
;
226 /************************************************************************/
227 /******** Functions to get field objects and specification strings ******/
228 /************************************************************************/
230 /* do the equivalent of obj.name */
232 getattr(PyObject
*obj
, SubString
*name
)
235 PyObject
*str
= SubString_new_object(name
);
238 newobj
= PyObject_GetAttr(obj
, str
);
243 /* do the equivalent of obj[idx], where obj is a sequence */
245 getitem_sequence(PyObject
*obj
, Py_ssize_t idx
)
247 return PySequence_GetItem(obj
, idx
);
250 /* do the equivalent of obj[idx], where obj is not a sequence */
252 getitem_idx(PyObject
*obj
, Py_ssize_t idx
)
255 PyObject
*idx_obj
= PyLong_FromSsize_t(idx
);
258 newobj
= PyObject_GetItem(obj
, idx_obj
);
263 /* do the equivalent of obj[name] */
265 getitem_str(PyObject
*obj
, SubString
*name
)
268 PyObject
*str
= SubString_new_object(name
);
271 newobj
= PyObject_GetItem(obj
, str
);
277 /* the entire string we're parsing. we assume that someone else
278 is managing its lifetime, and that it will exist for the
279 lifetime of the iterator. can be empty */
282 /* pointer to where we are inside field_name */
288 FieldNameIterator_init(FieldNameIterator
*self
, STRINGLIB_CHAR
*ptr
,
291 SubString_init(&self
->str
, ptr
, len
);
292 self
->ptr
= self
->str
.ptr
;
297 _FieldNameIterator_attr(FieldNameIterator
*self
, SubString
*name
)
301 name
->ptr
= self
->ptr
;
303 /* return everything until '.' or '[' */
304 while (self
->ptr
< self
->str
.end
) {
305 switch (c
= *self
->ptr
++) {
308 /* backup so that we this character will be seen next time */
316 /* end of string is okay */
317 name
->end
= self
->ptr
;
322 _FieldNameIterator_item(FieldNameIterator
*self
, SubString
*name
)
324 int bracket_seen
= 0;
327 name
->ptr
= self
->ptr
;
329 /* return everything until ']' */
330 while (self
->ptr
< self
->str
.end
) {
331 switch (c
= *self
->ptr
++) {
340 /* make sure we ended with a ']' */
342 PyErr_SetString(PyExc_ValueError
, "Missing ']' in format string");
346 /* end of string is okay */
347 /* don't include the ']' */
348 name
->end
= self
->ptr
-1;
352 /* returns 0 on error, 1 on non-error termination, and 2 if it returns a value */
354 FieldNameIterator_next(FieldNameIterator
*self
, int *is_attribute
,
355 Py_ssize_t
*name_idx
, SubString
*name
)
357 /* check at end of input */
358 if (self
->ptr
>= self
->str
.end
)
361 switch (*self
->ptr
++) {
364 if (_FieldNameIterator_attr(self
, name
) == 0)
370 if (_FieldNameIterator_item(self
, name
) == 0)
372 *name_idx
= get_integer(name
);
373 if (*name_idx
== -1 && PyErr_Occurred())
377 /* Invalid character follows ']' */
378 PyErr_SetString(PyExc_ValueError
, "Only '.' or '[' may "
379 "follow ']' in format field specifier");
383 /* empty string is an error */
384 if (name
->ptr
== name
->end
) {
385 PyErr_SetString(PyExc_ValueError
, "Empty attribute in format string");
394 output: 'first' points to the part before the first '[' or '.'
395 'first_idx' is -1 if 'first' is not an integer, otherwise
396 it's the value of first converted to an integer
397 'rest' is an iterator to return the rest
400 field_name_split(STRINGLIB_CHAR
*ptr
, Py_ssize_t len
, SubString
*first
,
401 Py_ssize_t
*first_idx
, FieldNameIterator
*rest
,
402 AutoNumber
*auto_number
)
405 STRINGLIB_CHAR
*p
= ptr
;
406 STRINGLIB_CHAR
*end
= ptr
+ len
;
407 int field_name_is_empty
;
408 int using_numeric_index
;
410 /* find the part up until the first '.' or '[' */
415 /* backup so that we this character is available to the
425 /* set up the return values */
426 SubString_init(first
, ptr
, p
- ptr
);
427 FieldNameIterator_init(rest
, p
, end
- p
);
429 /* see if "first" is an integer, in which case it's used as an index */
430 *first_idx
= get_integer(first
);
431 if (*first_idx
== -1 && PyErr_Occurred())
434 field_name_is_empty
= first
->ptr
>= first
->end
;
436 /* If the field name is omitted or if we have a numeric index
437 specified, then we're doing numeric indexing into args. */
438 using_numeric_index
= field_name_is_empty
|| *first_idx
!= -1;
440 /* We always get here exactly one time for each field we're
441 processing. And we get here in field order (counting by left
442 braces). So this is the perfect place to handle automatic field
443 numbering if the field name is omitted. */
445 /* Check if we need to do the auto-numbering. It's not needed if
446 we're called from string.Format routines, because it's handled
447 in that class by itself. */
449 /* Initialize our auto numbering state if this is the first
450 time we're either auto-numbering or manually numbering. */
451 if (auto_number
->an_state
== ANS_INIT
&& using_numeric_index
)
452 auto_number
->an_state
= field_name_is_empty
?
453 ANS_AUTO
: ANS_MANUAL
;
455 /* Make sure our state is consistent with what we're doing
456 this time through. Only check if we're using a numeric
458 if (using_numeric_index
)
459 if (autonumber_state_error(auto_number
->an_state
,
460 field_name_is_empty
))
462 /* Zero length field means we want to do auto-numbering of the
464 if (field_name_is_empty
)
465 *first_idx
= (auto_number
->an_field_number
)++;
473 get_field_object returns the object inside {}, before the
474 format_spec. It handles getindex and getattr lookups and consumes
475 the entire input string.
478 get_field_object(SubString
*input
, PyObject
*args
, PyObject
*kwargs
,
479 AutoNumber
*auto_number
)
481 PyObject
*obj
= NULL
;
487 FieldNameIterator rest
;
489 if (!field_name_split(input
->ptr
, input
->end
- input
->ptr
, &first
,
490 &index
, &rest
, auto_number
)) {
495 /* look up in kwargs */
496 PyObject
*key
= SubString_new_object(&first
);
499 if ((kwargs
== NULL
) || (obj
= PyDict_GetItem(kwargs
, key
)) == NULL
) {
500 PyErr_SetObject(PyExc_KeyError
, key
);
508 /* look up in args */
509 obj
= PySequence_GetItem(args
, index
);
514 /* iterate over the rest of the field_name */
515 while ((ok
= FieldNameIterator_next(&rest
, &is_attribute
, &index
,
520 /* getattr lookup "." */
521 tmp
= getattr(obj
, &name
);
523 /* getitem lookup "[]" */
525 tmp
= getitem_str(obj
, &name
);
527 if (PySequence_Check(obj
))
528 tmp
= getitem_sequence(obj
, index
);
531 tmp
= getitem_idx(obj
, index
);
539 /* end of iterator, this is the non-error case */
547 /************************************************************************/
548 /***************** Field rendering functions **************************/
549 /************************************************************************/
552 render_field() is the main function in this section. It takes the
553 field object and field specification string generated by
554 get_field_and_spec, and renders the field into the output string.
556 render_field calls fieldobj.__format__(format_spec) method, and
557 appends to the output.
560 render_field(PyObject
*fieldobj
, SubString
*format_spec
, OutputString
*output
)
563 PyObject
*result
= NULL
;
564 PyObject
*format_spec_object
= NULL
;
565 PyObject
*(*formatter
)(PyObject
*, STRINGLIB_CHAR
*, Py_ssize_t
) = NULL
;
566 STRINGLIB_CHAR
* format_spec_start
= format_spec
->ptr
?
567 format_spec
->ptr
: NULL
;
568 Py_ssize_t format_spec_len
= format_spec
->ptr
?
569 format_spec
->end
- format_spec
->ptr
: 0;
571 /* If we know the type exactly, skip the lookup of __format__ and just
572 call the formatter directly. */
573 #if STRINGLIB_IS_UNICODE
574 if (PyUnicode_CheckExact(fieldobj
))
575 formatter
= _PyUnicode_FormatAdvanced
;
576 /* Unfortunately, there's a problem with checking for int, long,
577 and float here. If we're being included as unicode, their
578 formatters expect string format_spec args. For now, just skip
579 this optimization for unicode. This could be fixed, but it's a
582 if (PyString_CheckExact(fieldobj
))
583 formatter
= _PyBytes_FormatAdvanced
;
584 else if (PyInt_CheckExact(fieldobj
))
585 formatter
=_PyInt_FormatAdvanced
;
586 else if (PyLong_CheckExact(fieldobj
))
587 formatter
=_PyLong_FormatAdvanced
;
588 else if (PyFloat_CheckExact(fieldobj
))
589 formatter
= _PyFloat_FormatAdvanced
;
593 /* we know exactly which formatter will be called when __format__ is
594 looked up, so call it directly, instead. */
595 result
= formatter(fieldobj
, format_spec_start
, format_spec_len
);
598 /* We need to create an object out of the pointers we have, because
599 __format__ takes a string/unicode object for format_spec. */
600 format_spec_object
= STRINGLIB_NEW(format_spec_start
,
602 if (format_spec_object
== NULL
)
605 result
= PyObject_Format(fieldobj
, format_spec_object
);
610 #if PY_VERSION_HEX >= 0x03000000
611 assert(PyUnicode_Check(result
));
613 assert(PyString_Check(result
) || PyUnicode_Check(result
));
615 /* Convert result to our type. We could be str, and result could
618 PyObject
*tmp
= STRINGLIB_TOSTR(result
);
626 ok
= output_data(output
,
627 STRINGLIB_STR(result
), STRINGLIB_LEN(result
));
629 Py_XDECREF(format_spec_object
);
635 parse_field(SubString
*str
, SubString
*field_name
, SubString
*format_spec
,
636 STRINGLIB_CHAR
*conversion
)
638 /* Note this function works if the field name is zero length,
639 which is good. Zero length field names are handled later, in
642 STRINGLIB_CHAR c
= 0;
644 /* initialize these, as they may be empty */
646 SubString_init(format_spec
, NULL
, 0);
648 /* Search for the field name. it's terminated by the end of
649 the string, or a ':' or '!' */
650 field_name
->ptr
= str
->ptr
;
651 while (str
->ptr
< str
->end
) {
652 switch (c
= *(str
->ptr
++)) {
662 if (c
== '!' || c
== ':') {
663 /* we have a format specifier and/or a conversion */
664 /* don't include the last character */
665 field_name
->end
= str
->ptr
-1;
667 /* the format specifier is the rest of the string */
668 format_spec
->ptr
= str
->ptr
;
669 format_spec
->end
= str
->end
;
671 /* see if there's a conversion specifier */
673 /* there must be another character present */
674 if (format_spec
->ptr
>= format_spec
->end
) {
675 PyErr_SetString(PyExc_ValueError
,
676 "end of format while looking for conversion "
680 *conversion
= *(format_spec
->ptr
++);
682 /* if there is another character, it must be a colon */
683 if (format_spec
->ptr
< format_spec
->end
) {
684 c
= *(format_spec
->ptr
++);
686 PyErr_SetString(PyExc_ValueError
,
687 "expected ':' after format specifier");
694 /* end of string, there's no format_spec or conversion */
695 field_name
->end
= str
->ptr
;
700 /************************************************************************/
701 /******* Output string allocation and escape-to-markup processing ******/
702 /************************************************************************/
704 /* MarkupIterator breaks the string into pieces of either literal
705 text, or things inside {} that need to be marked up. it is
706 designed to make it easy to wrap a Python iterator around it, for
707 use with the Formatter class */
714 MarkupIterator_init(MarkupIterator
*self
, STRINGLIB_CHAR
*ptr
, Py_ssize_t len
)
716 SubString_init(&self
->str
, ptr
, len
);
720 /* returns 0 on error, 1 on non-error termination, and 2 if it got a
721 string (or something to be expanded) */
723 MarkupIterator_next(MarkupIterator
*self
, SubString
*literal
,
724 int *field_present
, SubString
*field_name
,
725 SubString
*format_spec
, STRINGLIB_CHAR
*conversion
,
726 int *format_spec_needs_expanding
)
729 STRINGLIB_CHAR c
= 0;
730 STRINGLIB_CHAR
*start
;
733 int markup_follows
= 0;
735 /* initialize all of the output variables */
736 SubString_init(literal
, NULL
, 0);
737 SubString_init(field_name
, NULL
, 0);
738 SubString_init(format_spec
, NULL
, 0);
740 *format_spec_needs_expanding
= 0;
743 /* No more input, end of iterator. This is the normal exit
745 if (self
->str
.ptr
>= self
->str
.end
)
748 start
= self
->str
.ptr
;
750 /* First read any literal text. Read until the end of string, an
751 escaped '{' or '}', or an unescaped '{'. In order to never
752 allocate memory and so I can just pass pointers around, if
753 there's an escaped '{' or '}' then we'll return the literal
754 including the brace, but no format object. The next time
755 through, we'll return the rest of the literal, skipping past
756 the second consecutive brace. */
757 while (self
->str
.ptr
< self
->str
.end
) {
758 switch (c
= *(self
->str
.ptr
++)) {
769 at_end
= self
->str
.ptr
>= self
->str
.end
;
770 len
= self
->str
.ptr
- start
;
772 if ((c
== '}') && (at_end
|| (c
!= *self
->str
.ptr
))) {
773 PyErr_SetString(PyExc_ValueError
, "Single '}' encountered "
777 if (at_end
&& c
== '{') {
778 PyErr_SetString(PyExc_ValueError
, "Single '{' encountered "
783 if (c
== *self
->str
.ptr
) {
784 /* escaped } or {, skip it in the input. there is no
785 markup object following us, just this literal text */
793 /* record the literal text */
794 literal
->ptr
= start
;
795 literal
->end
= start
+ len
;
800 /* this is markup, find the end of the string by counting nested
801 braces. note that this prohibits escaped braces, so that
802 format_specs cannot have braces in them. */
806 start
= self
->str
.ptr
;
808 /* we know we can't have a zero length string, so don't worry
810 while (self
->str
.ptr
< self
->str
.end
) {
811 switch (c
= *(self
->str
.ptr
++)) {
813 /* the format spec needs to be recursively expanded.
814 this is an optimization, and not strictly needed */
815 *format_spec_needs_expanding
= 1;
821 /* we're done. parse and get out */
824 SubString_init(&s
, start
, self
->str
.ptr
- 1 - start
);
825 if (parse_field(&s
, field_name
, format_spec
, conversion
) == 0)
835 /* end of string while searching for matching '}' */
836 PyErr_SetString(PyExc_ValueError
, "unmatched '{' in format");
841 /* do the !r or !s conversion on obj */
843 do_conversion(PyObject
*obj
, STRINGLIB_CHAR conversion
)
845 /* XXX in pre-3.0, do we need to convert this to unicode, since it
846 might have returned a string? */
847 switch (conversion
) {
849 return PyObject_Repr(obj
);
851 return STRINGLIB_TOSTR(obj
);
853 if (conversion
> 32 && conversion
< 127) {
854 /* It's the ASCII subrange; casting to char is safe
855 (assuming the execution character set is an ASCII
857 PyErr_Format(PyExc_ValueError
,
858 "Unknown conversion specifier %c",
861 PyErr_Format(PyExc_ValueError
,
862 "Unknown conversion specifier \\x%x",
863 (unsigned int)conversion
);
870 {field_name!conversion:format_spec}
872 compute the result and write it to output.
873 format_spec_needs_expanding is an optimization. if it's false,
874 just output the string directly, otherwise recursively expand the
877 field_name is allowed to be zero length, in which case we
878 are doing auto field numbering.
882 output_markup(SubString
*field_name
, SubString
*format_spec
,
883 int format_spec_needs_expanding
, STRINGLIB_CHAR conversion
,
884 OutputString
*output
, PyObject
*args
, PyObject
*kwargs
,
885 int recursion_depth
, AutoNumber
*auto_number
)
887 PyObject
*tmp
= NULL
;
888 PyObject
*fieldobj
= NULL
;
889 SubString expanded_format_spec
;
890 SubString
*actual_format_spec
;
893 /* convert field_name to an object */
894 fieldobj
= get_field_object(field_name
, args
, kwargs
, auto_number
);
895 if (fieldobj
== NULL
)
898 if (conversion
!= '\0') {
899 tmp
= do_conversion(fieldobj
, conversion
);
903 /* do the assignment, transferring ownership: fieldobj = tmp */
909 /* if needed, recurively compute the format_spec */
910 if (format_spec_needs_expanding
) {
911 tmp
= build_string(format_spec
, args
, kwargs
, recursion_depth
-1,
916 /* note that in the case we're expanding the format string,
917 tmp must be kept around until after the call to
919 SubString_init(&expanded_format_spec
,
920 STRINGLIB_STR(tmp
), STRINGLIB_LEN(tmp
));
921 actual_format_spec
= &expanded_format_spec
;
924 actual_format_spec
= format_spec
;
926 if (render_field(fieldobj
, actual_format_spec
, output
) == 0)
932 Py_XDECREF(fieldobj
);
939 do_markup is the top-level loop for the format() method. It
940 searches through the format string for escapes to markup codes, and
941 calls other functions to move non-markup text to the output,
942 and to perform the markup to the output.
945 do_markup(SubString
*input
, PyObject
*args
, PyObject
*kwargs
,
946 OutputString
*output
, int recursion_depth
, AutoNumber
*auto_number
)
949 int format_spec_needs_expanding
;
953 SubString field_name
;
954 SubString format_spec
;
955 STRINGLIB_CHAR conversion
;
957 MarkupIterator_init(&iter
, input
->ptr
, input
->end
- input
->ptr
);
958 while ((result
= MarkupIterator_next(&iter
, &literal
, &field_present
,
959 &field_name
, &format_spec
,
961 &format_spec_needs_expanding
)) == 2) {
962 if (!output_data(output
, literal
.ptr
, literal
.end
- literal
.ptr
))
965 if (!output_markup(&field_name
, &format_spec
,
966 format_spec_needs_expanding
, conversion
, output
,
967 args
, kwargs
, recursion_depth
, auto_number
))
975 build_string allocates the output string and then
976 calls do_markup to do the heavy lifting.
979 build_string(SubString
*input
, PyObject
*args
, PyObject
*kwargs
,
980 int recursion_depth
, AutoNumber
*auto_number
)
983 PyObject
*result
= NULL
;
986 output
.obj
= NULL
; /* needed so cleanup code always works */
988 /* check the recursion level */
989 if (recursion_depth
<= 0) {
990 PyErr_SetString(PyExc_ValueError
,
991 "Max string recursion exceeded");
995 /* initial size is the length of the format string, plus the size
996 increment. seems like a reasonable default */
997 if (!output_initialize(&output
,
998 input
->end
- input
->ptr
+
999 INITIAL_SIZE_INCREMENT
))
1002 if (!do_markup(input
, args
, kwargs
, &output
, recursion_depth
,
1007 count
= output
.ptr
- STRINGLIB_STR(output
.obj
);
1008 if (STRINGLIB_RESIZE(&output
.obj
, count
) < 0) {
1012 /* transfer ownership to result */
1013 result
= output
.obj
;
1017 Py_XDECREF(output
.obj
);
1021 /************************************************************************/
1022 /*********** main routine ***********************************************/
1023 /************************************************************************/
1025 /* this is the main entry point */
1027 do_string_format(PyObject
*self
, PyObject
*args
, PyObject
*kwargs
)
1031 /* PEP 3101 says only 2 levels, so that
1032 "{0:{1}}".format('abc', 's') # works
1033 "{0:{1:{2}}}".format('abc', 's', '') # fails
1035 int recursion_depth
= 2;
1037 AutoNumber auto_number
;
1039 AutoNumber_Init(&auto_number
);
1040 SubString_init(&input
, STRINGLIB_STR(self
), STRINGLIB_LEN(self
));
1041 return build_string(&input
, args
, kwargs
, recursion_depth
, &auto_number
);
1046 /************************************************************************/
1047 /*********** formatteriterator ******************************************/
1048 /************************************************************************/
1050 /* This is used to implement string.Formatter.vparse(). It exists so
1051 Formatter can share code with the built in unicode.format() method.
1052 It's really just a wrapper around MarkupIterator that is callable
1058 STRINGLIB_OBJECT
*str
;
1060 MarkupIterator it_markup
;
1061 } formatteriterobject
;
1064 formatteriter_dealloc(formatteriterobject
*it
)
1066 Py_XDECREF(it
->str
);
1071 (literal, field_name, format_spec, conversion)
1073 literal is any literal text to output. might be zero length
1074 field_name is the string before the ':'. might be None
1075 format_spec is the string after the ':'. mibht be None
1076 conversion is either None, or the string after the '!'
1079 formatteriter_next(formatteriterobject
*it
)
1082 SubString field_name
;
1083 SubString format_spec
;
1084 STRINGLIB_CHAR conversion
;
1085 int format_spec_needs_expanding
;
1087 int result
= MarkupIterator_next(&it
->it_markup
, &literal
, &field_present
,
1088 &field_name
, &format_spec
, &conversion
,
1089 &format_spec_needs_expanding
);
1091 /* all of the SubString objects point into it->str, so no
1092 memory management needs to be done on them */
1093 assert(0 <= result
&& result
<= 2);
1094 if (result
== 0 || result
== 1)
1095 /* if 0, error has already been set, if 1, iterator is empty */
1098 PyObject
*literal_str
= NULL
;
1099 PyObject
*field_name_str
= NULL
;
1100 PyObject
*format_spec_str
= NULL
;
1101 PyObject
*conversion_str
= NULL
;
1102 PyObject
*tuple
= NULL
;
1104 literal_str
= SubString_new_object(&literal
);
1105 if (literal_str
== NULL
)
1108 field_name_str
= SubString_new_object(&field_name
);
1109 if (field_name_str
== NULL
)
1112 /* if field_name is non-zero length, return a string for
1113 format_spec (even if zero length), else return None */
1114 format_spec_str
= (field_present
?
1115 SubString_new_object_or_empty
:
1116 SubString_new_object
)(&format_spec
);
1117 if (format_spec_str
== NULL
)
1120 /* if the conversion is not specified, return a None,
1121 otherwise create a one length string with the conversion
1123 if (conversion
== '\0') {
1124 conversion_str
= Py_None
;
1125 Py_INCREF(conversion_str
);
1128 conversion_str
= STRINGLIB_NEW(&conversion
, 1);
1129 if (conversion_str
== NULL
)
1132 tuple
= PyTuple_Pack(4, literal_str
, field_name_str
, format_spec_str
,
1135 Py_XDECREF(literal_str
);
1136 Py_XDECREF(field_name_str
);
1137 Py_XDECREF(format_spec_str
);
1138 Py_XDECREF(conversion_str
);
1143 static PyMethodDef formatteriter_methods
[] = {
1144 {NULL
, NULL
} /* sentinel */
1147 static PyTypeObject PyFormatterIter_Type
= {
1148 PyVarObject_HEAD_INIT(&PyType_Type
, 0)
1149 "formatteriterator", /* tp_name */
1150 sizeof(formatteriterobject
), /* tp_basicsize */
1151 0, /* tp_itemsize */
1153 (destructor
)formatteriter_dealloc
, /* tp_dealloc */
1159 0, /* tp_as_number */
1160 0, /* tp_as_sequence */
1161 0, /* tp_as_mapping */
1165 PyObject_GenericGetAttr
, /* tp_getattro */
1166 0, /* tp_setattro */
1167 0, /* tp_as_buffer */
1168 Py_TPFLAGS_DEFAULT
, /* tp_flags */
1170 0, /* tp_traverse */
1172 0, /* tp_richcompare */
1173 0, /* tp_weaklistoffset */
1174 PyObject_SelfIter
, /* tp_iter */
1175 (iternextfunc
)formatteriter_next
, /* tp_iternext */
1176 formatteriter_methods
, /* tp_methods */
1180 /* unicode_formatter_parser is used to implement
1181 string.Formatter.vformat. it parses a string and returns tuples
1182 describing the parsed elements. It's a wrapper around
1183 stringlib/string_format.h's MarkupIterator */
1185 formatter_parser(STRINGLIB_OBJECT
*self
)
1187 formatteriterobject
*it
;
1189 it
= PyObject_New(formatteriterobject
, &PyFormatterIter_Type
);
1193 /* take ownership, give the object to the iterator */
1197 /* initialize the contained MarkupIterator */
1198 MarkupIterator_init(&it
->it_markup
,
1199 STRINGLIB_STR(self
),
1200 STRINGLIB_LEN(self
));
1202 return (PyObject
*)it
;
1206 /************************************************************************/
1207 /*********** fieldnameiterator ******************************************/
1208 /************************************************************************/
1211 /* This is used to implement string.Formatter.vparse(). It parses the
1212 field name into attribute and item values. It's a Python-callable
1213 wrapper around FieldNameIterator */
1218 STRINGLIB_OBJECT
*str
;
1220 FieldNameIterator it_field
;
1221 } fieldnameiterobject
;
1224 fieldnameiter_dealloc(fieldnameiterobject
*it
)
1226 Py_XDECREF(it
->str
);
1232 is_attr is true if we used attribute syntax (e.g., '.foo')
1233 false if we used index syntax (e.g., '[foo]')
1234 value is an integer or string
1237 fieldnameiter_next(fieldnameiterobject
*it
)
1244 result
= FieldNameIterator_next(&it
->it_field
, &is_attr
,
1246 if (result
== 0 || result
== 1)
1247 /* if 0, error has already been set, if 1, iterator is empty */
1250 PyObject
* result
= NULL
;
1251 PyObject
* is_attr_obj
= NULL
;
1252 PyObject
* obj
= NULL
;
1254 is_attr_obj
= PyBool_FromLong(is_attr
);
1255 if (is_attr_obj
== NULL
)
1258 /* either an integer or a string */
1260 obj
= PyLong_FromSsize_t(idx
);
1262 obj
= SubString_new_object(&name
);
1266 /* return a tuple of values */
1267 result
= PyTuple_Pack(2, is_attr_obj
, obj
);
1270 Py_XDECREF(is_attr_obj
);
1276 static PyMethodDef fieldnameiter_methods
[] = {
1277 {NULL
, NULL
} /* sentinel */
1280 static PyTypeObject PyFieldNameIter_Type
= {
1281 PyVarObject_HEAD_INIT(&PyType_Type
, 0)
1282 "fieldnameiterator", /* tp_name */
1283 sizeof(fieldnameiterobject
), /* tp_basicsize */
1284 0, /* tp_itemsize */
1286 (destructor
)fieldnameiter_dealloc
, /* tp_dealloc */
1292 0, /* tp_as_number */
1293 0, /* tp_as_sequence */
1294 0, /* tp_as_mapping */
1298 PyObject_GenericGetAttr
, /* tp_getattro */
1299 0, /* tp_setattro */
1300 0, /* tp_as_buffer */
1301 Py_TPFLAGS_DEFAULT
, /* tp_flags */
1303 0, /* tp_traverse */
1305 0, /* tp_richcompare */
1306 0, /* tp_weaklistoffset */
1307 PyObject_SelfIter
, /* tp_iter */
1308 (iternextfunc
)fieldnameiter_next
, /* tp_iternext */
1309 fieldnameiter_methods
, /* tp_methods */
1312 /* unicode_formatter_field_name_split is used to implement
1313 string.Formatter.vformat. it takes an PEP 3101 "field name", and
1314 returns a tuple of (first, rest): "first", the part before the
1315 first '.' or '['; and "rest", an iterator for the rest of the field
1316 name. it's a wrapper around stringlib/string_format.h's
1317 field_name_split. The iterator it returns is a
1318 FieldNameIterator */
1320 formatter_field_name_split(STRINGLIB_OBJECT
*self
)
1323 Py_ssize_t first_idx
;
1324 fieldnameiterobject
*it
;
1326 PyObject
*first_obj
= NULL
;
1327 PyObject
*result
= NULL
;
1329 it
= PyObject_New(fieldnameiterobject
, &PyFieldNameIter_Type
);
1333 /* take ownership, give the object to the iterator. this is
1334 just to keep the field_name alive */
1338 /* Pass in auto_number = NULL. We'll return an empty string for
1339 first_obj in that case. */
1340 if (!field_name_split(STRINGLIB_STR(self
),
1341 STRINGLIB_LEN(self
),
1342 &first
, &first_idx
, &it
->it_field
, NULL
))
1345 /* first becomes an integer, if possible; else a string */
1346 if (first_idx
!= -1)
1347 first_obj
= PyLong_FromSsize_t(first_idx
);
1349 /* convert "first" into a string object */
1350 first_obj
= SubString_new_object(&first
);
1351 if (first_obj
== NULL
)
1354 /* return a tuple of values */
1355 result
= PyTuple_Pack(2, first_obj
, it
);
1359 Py_XDECREF(first_obj
);