]> git.proxmox.com Git - mirror_edk2.git/blame - AppPkg/Applications/Python/Python-2.7.10/Objects/stringlib/string_format.h
AppPkg/Applications/Python/Python-2.7.10: Initial Checkin part 3/5.
[mirror_edk2.git] / AppPkg / Applications / Python / Python-2.7.10 / Objects / stringlib / string_format.h
CommitLineData
53b2ba57
DM
1/*\r
2 string_format.h -- implementation of string.format().\r
3\r
4 It uses the Objects/stringlib conventions, so that it can be\r
5 compiled for both unicode and string objects.\r
6*/\r
7\r
8\r
9/* Defines for Python 2.6 compatibility */\r
10#if PY_VERSION_HEX < 0x03000000\r
11#define PyLong_FromSsize_t _PyLong_FromSsize_t\r
12#endif\r
13\r
14/* Defines for more efficiently reallocating the string buffer */\r
15#define INITIAL_SIZE_INCREMENT 100\r
16#define SIZE_MULTIPLIER 2\r
17#define MAX_SIZE_INCREMENT 3200\r
18\r
19\r
20/************************************************************************/\r
21/*********** Global data structures and forward declarations *********/\r
22/************************************************************************/\r
23\r
24/*\r
25 A SubString consists of the characters between two string or\r
26 unicode pointers.\r
27*/\r
28typedef struct {\r
29 STRINGLIB_CHAR *ptr;\r
30 STRINGLIB_CHAR *end;\r
31} SubString;\r
32\r
33\r
34typedef enum {\r
35 ANS_INIT,\r
36 ANS_AUTO,\r
37 ANS_MANUAL\r
38} AutoNumberState; /* Keep track if we're auto-numbering fields */\r
39\r
40/* Keeps track of our auto-numbering state, and which number field we're on */\r
41typedef struct {\r
42 AutoNumberState an_state;\r
43 int an_field_number;\r
44} AutoNumber;\r
45\r
46\r
47/* forward declaration for recursion */\r
48static PyObject *\r
49build_string(SubString *input, PyObject *args, PyObject *kwargs,\r
50 int recursion_depth, AutoNumber *auto_number);\r
51\r
52\r
53\r
54/************************************************************************/\r
55/************************** Utility functions ************************/\r
56/************************************************************************/\r
57\r
58static void\r
59AutoNumber_Init(AutoNumber *auto_number)\r
60{\r
61 auto_number->an_state = ANS_INIT;\r
62 auto_number->an_field_number = 0;\r
63}\r
64\r
65/* fill in a SubString from a pointer and length */\r
66Py_LOCAL_INLINE(void)\r
67SubString_init(SubString *str, STRINGLIB_CHAR *p, Py_ssize_t len)\r
68{\r
69 str->ptr = p;\r
70 if (p == NULL)\r
71 str->end = NULL;\r
72 else\r
73 str->end = str->ptr + len;\r
74}\r
75\r
76/* return a new string. if str->ptr is NULL, return None */\r
77Py_LOCAL_INLINE(PyObject *)\r
78SubString_new_object(SubString *str)\r
79{\r
80 if (str->ptr == NULL) {\r
81 Py_INCREF(Py_None);\r
82 return Py_None;\r
83 }\r
84 return STRINGLIB_NEW(str->ptr, str->end - str->ptr);\r
85}\r
86\r
87/* return a new string. if str->ptr is NULL, return None */\r
88Py_LOCAL_INLINE(PyObject *)\r
89SubString_new_object_or_empty(SubString *str)\r
90{\r
91 if (str->ptr == NULL) {\r
92 return STRINGLIB_NEW(NULL, 0);\r
93 }\r
94 return STRINGLIB_NEW(str->ptr, str->end - str->ptr);\r
95}\r
96\r
97/* Return 1 if an error has been detected switching between automatic\r
98 field numbering and manual field specification, else return 0. Set\r
99 ValueError on error. */\r
100static int\r
101autonumber_state_error(AutoNumberState state, int field_name_is_empty)\r
102{\r
103 if (state == ANS_MANUAL) {\r
104 if (field_name_is_empty) {\r
105 PyErr_SetString(PyExc_ValueError, "cannot switch from "\r
106 "manual field specification to "\r
107 "automatic field numbering");\r
108 return 1;\r
109 }\r
110 }\r
111 else {\r
112 if (!field_name_is_empty) {\r
113 PyErr_SetString(PyExc_ValueError, "cannot switch from "\r
114 "automatic field numbering to "\r
115 "manual field specification");\r
116 return 1;\r
117 }\r
118 }\r
119 return 0;\r
120}\r
121\r
122\r
123/************************************************************************/\r
124/*********** Output string management functions ****************/\r
125/************************************************************************/\r
126\r
127typedef struct {\r
128 STRINGLIB_CHAR *ptr;\r
129 STRINGLIB_CHAR *end;\r
130 PyObject *obj;\r
131 Py_ssize_t size_increment;\r
132} OutputString;\r
133\r
134/* initialize an OutputString object, reserving size characters */\r
135static int\r
136output_initialize(OutputString *output, Py_ssize_t size)\r
137{\r
138 output->obj = STRINGLIB_NEW(NULL, size);\r
139 if (output->obj == NULL)\r
140 return 0;\r
141\r
142 output->ptr = STRINGLIB_STR(output->obj);\r
143 output->end = STRINGLIB_LEN(output->obj) + output->ptr;\r
144 output->size_increment = INITIAL_SIZE_INCREMENT;\r
145\r
146 return 1;\r
147}\r
148\r
149/*\r
150 output_extend reallocates the output string buffer.\r
151 It returns a status: 0 for a failed reallocation,\r
152 1 for success.\r
153*/\r
154\r
155static int\r
156output_extend(OutputString *output, Py_ssize_t count)\r
157{\r
158 STRINGLIB_CHAR *startptr = STRINGLIB_STR(output->obj);\r
159 Py_ssize_t curlen = output->ptr - startptr;\r
160 Py_ssize_t maxlen = curlen + count + output->size_increment;\r
161\r
162 if (STRINGLIB_RESIZE(&output->obj, maxlen) < 0)\r
163 return 0;\r
164 startptr = STRINGLIB_STR(output->obj);\r
165 output->ptr = startptr + curlen;\r
166 output->end = startptr + maxlen;\r
167 if (output->size_increment < MAX_SIZE_INCREMENT)\r
168 output->size_increment *= SIZE_MULTIPLIER;\r
169 return 1;\r
170}\r
171\r
172/*\r
173 output_data dumps characters into our output string\r
174 buffer.\r
175\r
176 In some cases, it has to reallocate the string.\r
177\r
178 It returns a status: 0 for a failed reallocation,\r
179 1 for success.\r
180*/\r
181static int\r
182output_data(OutputString *output, const STRINGLIB_CHAR *s, Py_ssize_t count)\r
183{\r
184 if ((count > output->end - output->ptr) && !output_extend(output, count))\r
185 return 0;\r
186 memcpy(output->ptr, s, count * sizeof(STRINGLIB_CHAR));\r
187 output->ptr += count;\r
188 return 1;\r
189}\r
190\r
191/************************************************************************/\r
192/*********** Format string parsing -- integers and identifiers *********/\r
193/************************************************************************/\r
194\r
195static Py_ssize_t\r
196get_integer(const SubString *str)\r
197{\r
198 Py_ssize_t accumulator = 0;\r
199 Py_ssize_t digitval;\r
200 STRINGLIB_CHAR *p;\r
201\r
202 /* empty string is an error */\r
203 if (str->ptr >= str->end)\r
204 return -1;\r
205\r
206 for (p = str->ptr; p < str->end; p++) {\r
207 digitval = STRINGLIB_TODECIMAL(*p);\r
208 if (digitval < 0)\r
209 return -1;\r
210 /*\r
211 Detect possible overflow before it happens:\r
212\r
213 accumulator * 10 + digitval > PY_SSIZE_T_MAX if and only if\r
214 accumulator > (PY_SSIZE_T_MAX - digitval) / 10.\r
215 */\r
216 if (accumulator > (PY_SSIZE_T_MAX - digitval) / 10) {\r
217 PyErr_Format(PyExc_ValueError,\r
218 "Too many decimal digits in format string");\r
219 return -1;\r
220 }\r
221 accumulator = accumulator * 10 + digitval;\r
222 }\r
223 return accumulator;\r
224}\r
225\r
226/************************************************************************/\r
227/******** Functions to get field objects and specification strings ******/\r
228/************************************************************************/\r
229\r
230/* do the equivalent of obj.name */\r
231static PyObject *\r
232getattr(PyObject *obj, SubString *name)\r
233{\r
234 PyObject *newobj;\r
235 PyObject *str = SubString_new_object(name);\r
236 if (str == NULL)\r
237 return NULL;\r
238 newobj = PyObject_GetAttr(obj, str);\r
239 Py_DECREF(str);\r
240 return newobj;\r
241}\r
242\r
243/* do the equivalent of obj[idx], where obj is a sequence */\r
244static PyObject *\r
245getitem_sequence(PyObject *obj, Py_ssize_t idx)\r
246{\r
247 return PySequence_GetItem(obj, idx);\r
248}\r
249\r
250/* do the equivalent of obj[idx], where obj is not a sequence */\r
251static PyObject *\r
252getitem_idx(PyObject *obj, Py_ssize_t idx)\r
253{\r
254 PyObject *newobj;\r
255 PyObject *idx_obj = PyLong_FromSsize_t(idx);\r
256 if (idx_obj == NULL)\r
257 return NULL;\r
258 newobj = PyObject_GetItem(obj, idx_obj);\r
259 Py_DECREF(idx_obj);\r
260 return newobj;\r
261}\r
262\r
263/* do the equivalent of obj[name] */\r
264static PyObject *\r
265getitem_str(PyObject *obj, SubString *name)\r
266{\r
267 PyObject *newobj;\r
268 PyObject *str = SubString_new_object(name);\r
269 if (str == NULL)\r
270 return NULL;\r
271 newobj = PyObject_GetItem(obj, str);\r
272 Py_DECREF(str);\r
273 return newobj;\r
274}\r
275\r
276typedef struct {\r
277 /* the entire string we're parsing. we assume that someone else\r
278 is managing its lifetime, and that it will exist for the\r
279 lifetime of the iterator. can be empty */\r
280 SubString str;\r
281\r
282 /* pointer to where we are inside field_name */\r
283 STRINGLIB_CHAR *ptr;\r
284} FieldNameIterator;\r
285\r
286\r
287static int\r
288FieldNameIterator_init(FieldNameIterator *self, STRINGLIB_CHAR *ptr,\r
289 Py_ssize_t len)\r
290{\r
291 SubString_init(&self->str, ptr, len);\r
292 self->ptr = self->str.ptr;\r
293 return 1;\r
294}\r
295\r
296static int\r
297_FieldNameIterator_attr(FieldNameIterator *self, SubString *name)\r
298{\r
299 STRINGLIB_CHAR c;\r
300\r
301 name->ptr = self->ptr;\r
302\r
303 /* return everything until '.' or '[' */\r
304 while (self->ptr < self->str.end) {\r
305 switch (c = *self->ptr++) {\r
306 case '[':\r
307 case '.':\r
308 /* backup so that we this character will be seen next time */\r
309 self->ptr--;\r
310 break;\r
311 default:\r
312 continue;\r
313 }\r
314 break;\r
315 }\r
316 /* end of string is okay */\r
317 name->end = self->ptr;\r
318 return 1;\r
319}\r
320\r
321static int\r
322_FieldNameIterator_item(FieldNameIterator *self, SubString *name)\r
323{\r
324 int bracket_seen = 0;\r
325 STRINGLIB_CHAR c;\r
326\r
327 name->ptr = self->ptr;\r
328\r
329 /* return everything until ']' */\r
330 while (self->ptr < self->str.end) {\r
331 switch (c = *self->ptr++) {\r
332 case ']':\r
333 bracket_seen = 1;\r
334 break;\r
335 default:\r
336 continue;\r
337 }\r
338 break;\r
339 }\r
340 /* make sure we ended with a ']' */\r
341 if (!bracket_seen) {\r
342 PyErr_SetString(PyExc_ValueError, "Missing ']' in format string");\r
343 return 0;\r
344 }\r
345\r
346 /* end of string is okay */\r
347 /* don't include the ']' */\r
348 name->end = self->ptr-1;\r
349 return 1;\r
350}\r
351\r
352/* returns 0 on error, 1 on non-error termination, and 2 if it returns a value */\r
353static int\r
354FieldNameIterator_next(FieldNameIterator *self, int *is_attribute,\r
355 Py_ssize_t *name_idx, SubString *name)\r
356{\r
357 /* check at end of input */\r
358 if (self->ptr >= self->str.end)\r
359 return 1;\r
360\r
361 switch (*self->ptr++) {\r
362 case '.':\r
363 *is_attribute = 1;\r
364 if (_FieldNameIterator_attr(self, name) == 0)\r
365 return 0;\r
366 *name_idx = -1;\r
367 break;\r
368 case '[':\r
369 *is_attribute = 0;\r
370 if (_FieldNameIterator_item(self, name) == 0)\r
371 return 0;\r
372 *name_idx = get_integer(name);\r
373 if (*name_idx == -1 && PyErr_Occurred())\r
374 return 0;\r
375 break;\r
376 default:\r
377 /* Invalid character follows ']' */\r
378 PyErr_SetString(PyExc_ValueError, "Only '.' or '[' may "\r
379 "follow ']' in format field specifier");\r
380 return 0;\r
381 }\r
382\r
383 /* empty string is an error */\r
384 if (name->ptr == name->end) {\r
385 PyErr_SetString(PyExc_ValueError, "Empty attribute in format string");\r
386 return 0;\r
387 }\r
388\r
389 return 2;\r
390}\r
391\r
392\r
393/* input: field_name\r
394 output: 'first' points to the part before the first '[' or '.'\r
395 'first_idx' is -1 if 'first' is not an integer, otherwise\r
396 it's the value of first converted to an integer\r
397 'rest' is an iterator to return the rest\r
398*/\r
399static int\r
400field_name_split(STRINGLIB_CHAR *ptr, Py_ssize_t len, SubString *first,\r
401 Py_ssize_t *first_idx, FieldNameIterator *rest,\r
402 AutoNumber *auto_number)\r
403{\r
404 STRINGLIB_CHAR c;\r
405 STRINGLIB_CHAR *p = ptr;\r
406 STRINGLIB_CHAR *end = ptr + len;\r
407 int field_name_is_empty;\r
408 int using_numeric_index;\r
409\r
410 /* find the part up until the first '.' or '[' */\r
411 while (p < end) {\r
412 switch (c = *p++) {\r
413 case '[':\r
414 case '.':\r
415 /* backup so that we this character is available to the\r
416 "rest" iterator */\r
417 p--;\r
418 break;\r
419 default:\r
420 continue;\r
421 }\r
422 break;\r
423 }\r
424\r
425 /* set up the return values */\r
426 SubString_init(first, ptr, p - ptr);\r
427 FieldNameIterator_init(rest, p, end - p);\r
428\r
429 /* see if "first" is an integer, in which case it's used as an index */\r
430 *first_idx = get_integer(first);\r
431 if (*first_idx == -1 && PyErr_Occurred())\r
432 return 0;\r
433\r
434 field_name_is_empty = first->ptr >= first->end;\r
435\r
436 /* If the field name is omitted or if we have a numeric index\r
437 specified, then we're doing numeric indexing into args. */\r
438 using_numeric_index = field_name_is_empty || *first_idx != -1;\r
439\r
440 /* We always get here exactly one time for each field we're\r
441 processing. And we get here in field order (counting by left\r
442 braces). So this is the perfect place to handle automatic field\r
443 numbering if the field name is omitted. */\r
444\r
445 /* Check if we need to do the auto-numbering. It's not needed if\r
446 we're called from string.Format routines, because it's handled\r
447 in that class by itself. */\r
448 if (auto_number) {\r
449 /* Initialize our auto numbering state if this is the first\r
450 time we're either auto-numbering or manually numbering. */\r
451 if (auto_number->an_state == ANS_INIT && using_numeric_index)\r
452 auto_number->an_state = field_name_is_empty ?\r
453 ANS_AUTO : ANS_MANUAL;\r
454\r
455 /* Make sure our state is consistent with what we're doing\r
456 this time through. Only check if we're using a numeric\r
457 index. */\r
458 if (using_numeric_index)\r
459 if (autonumber_state_error(auto_number->an_state,\r
460 field_name_is_empty))\r
461 return 0;\r
462 /* Zero length field means we want to do auto-numbering of the\r
463 fields. */\r
464 if (field_name_is_empty)\r
465 *first_idx = (auto_number->an_field_number)++;\r
466 }\r
467\r
468 return 1;\r
469}\r
470\r
471\r
472/*\r
473 get_field_object returns the object inside {}, before the\r
474 format_spec. It handles getindex and getattr lookups and consumes\r
475 the entire input string.\r
476*/\r
477static PyObject *\r
478get_field_object(SubString *input, PyObject *args, PyObject *kwargs,\r
479 AutoNumber *auto_number)\r
480{\r
481 PyObject *obj = NULL;\r
482 int ok;\r
483 int is_attribute;\r
484 SubString name;\r
485 SubString first;\r
486 Py_ssize_t index;\r
487 FieldNameIterator rest;\r
488\r
489 if (!field_name_split(input->ptr, input->end - input->ptr, &first,\r
490 &index, &rest, auto_number)) {\r
491 goto error;\r
492 }\r
493\r
494 if (index == -1) {\r
495 /* look up in kwargs */\r
496 PyObject *key = SubString_new_object(&first);\r
497 if (key == NULL)\r
498 goto error;\r
499 if ((kwargs == NULL) || (obj = PyDict_GetItem(kwargs, key)) == NULL) {\r
500 PyErr_SetObject(PyExc_KeyError, key);\r
501 Py_DECREF(key);\r
502 goto error;\r
503 }\r
504 Py_DECREF(key);\r
505 Py_INCREF(obj);\r
506 }\r
507 else {\r
508 /* look up in args */\r
509 obj = PySequence_GetItem(args, index);\r
510 if (obj == NULL)\r
511 goto error;\r
512 }\r
513\r
514 /* iterate over the rest of the field_name */\r
515 while ((ok = FieldNameIterator_next(&rest, &is_attribute, &index,\r
516 &name)) == 2) {\r
517 PyObject *tmp;\r
518\r
519 if (is_attribute)\r
520 /* getattr lookup "." */\r
521 tmp = getattr(obj, &name);\r
522 else\r
523 /* getitem lookup "[]" */\r
524 if (index == -1)\r
525 tmp = getitem_str(obj, &name);\r
526 else\r
527 if (PySequence_Check(obj))\r
528 tmp = getitem_sequence(obj, index);\r
529 else\r
530 /* not a sequence */\r
531 tmp = getitem_idx(obj, index);\r
532 if (tmp == NULL)\r
533 goto error;\r
534\r
535 /* assign to obj */\r
536 Py_DECREF(obj);\r
537 obj = tmp;\r
538 }\r
539 /* end of iterator, this is the non-error case */\r
540 if (ok == 1)\r
541 return obj;\r
542error:\r
543 Py_XDECREF(obj);\r
544 return NULL;\r
545}\r
546\r
547/************************************************************************/\r
548/***************** Field rendering functions **************************/\r
549/************************************************************************/\r
550\r
551/*\r
552 render_field() is the main function in this section. It takes the\r
553 field object and field specification string generated by\r
554 get_field_and_spec, and renders the field into the output string.\r
555\r
556 render_field calls fieldobj.__format__(format_spec) method, and\r
557 appends to the output.\r
558*/\r
559static int\r
560render_field(PyObject *fieldobj, SubString *format_spec, OutputString *output)\r
561{\r
562 int ok = 0;\r
563 PyObject *result = NULL;\r
564 PyObject *format_spec_object = NULL;\r
565 PyObject *(*formatter)(PyObject *, STRINGLIB_CHAR *, Py_ssize_t) = NULL;\r
566 STRINGLIB_CHAR* format_spec_start = format_spec->ptr ?\r
567 format_spec->ptr : NULL;\r
568 Py_ssize_t format_spec_len = format_spec->ptr ?\r
569 format_spec->end - format_spec->ptr : 0;\r
570\r
571 /* If we know the type exactly, skip the lookup of __format__ and just\r
572 call the formatter directly. */\r
573#if STRINGLIB_IS_UNICODE\r
574 if (PyUnicode_CheckExact(fieldobj))\r
575 formatter = _PyUnicode_FormatAdvanced;\r
576 /* Unfortunately, there's a problem with checking for int, long,\r
577 and float here. If we're being included as unicode, their\r
578 formatters expect string format_spec args. For now, just skip\r
579 this optimization for unicode. This could be fixed, but it's a\r
580 hassle. */\r
581#else\r
582 if (PyString_CheckExact(fieldobj))\r
583 formatter = _PyBytes_FormatAdvanced;\r
584 else if (PyInt_CheckExact(fieldobj))\r
585 formatter =_PyInt_FormatAdvanced;\r
586 else if (PyLong_CheckExact(fieldobj))\r
587 formatter =_PyLong_FormatAdvanced;\r
588 else if (PyFloat_CheckExact(fieldobj))\r
589 formatter = _PyFloat_FormatAdvanced;\r
590#endif\r
591\r
592 if (formatter) {\r
593 /* we know exactly which formatter will be called when __format__ is\r
594 looked up, so call it directly, instead. */\r
595 result = formatter(fieldobj, format_spec_start, format_spec_len);\r
596 }\r
597 else {\r
598 /* We need to create an object out of the pointers we have, because\r
599 __format__ takes a string/unicode object for format_spec. */\r
600 format_spec_object = STRINGLIB_NEW(format_spec_start,\r
601 format_spec_len);\r
602 if (format_spec_object == NULL)\r
603 goto done;\r
604\r
605 result = PyObject_Format(fieldobj, format_spec_object);\r
606 }\r
607 if (result == NULL)\r
608 goto done;\r
609\r
610#if PY_VERSION_HEX >= 0x03000000\r
611 assert(PyUnicode_Check(result));\r
612#else\r
613 assert(PyString_Check(result) || PyUnicode_Check(result));\r
614\r
615 /* Convert result to our type. We could be str, and result could\r
616 be unicode */\r
617 {\r
618 PyObject *tmp = STRINGLIB_TOSTR(result);\r
619 if (tmp == NULL)\r
620 goto done;\r
621 Py_DECREF(result);\r
622 result = tmp;\r
623 }\r
624#endif\r
625\r
626 ok = output_data(output,\r
627 STRINGLIB_STR(result), STRINGLIB_LEN(result));\r
628done:\r
629 Py_XDECREF(format_spec_object);\r
630 Py_XDECREF(result);\r
631 return ok;\r
632}\r
633\r
634static int\r
635parse_field(SubString *str, SubString *field_name, SubString *format_spec,\r
636 STRINGLIB_CHAR *conversion)\r
637{\r
638 /* Note this function works if the field name is zero length,\r
639 which is good. Zero length field names are handled later, in\r
640 field_name_split. */\r
641\r
642 STRINGLIB_CHAR c = 0;\r
643\r
644 /* initialize these, as they may be empty */\r
645 *conversion = '\0';\r
646 SubString_init(format_spec, NULL, 0);\r
647\r
648 /* Search for the field name. it's terminated by the end of\r
649 the string, or a ':' or '!' */\r
650 field_name->ptr = str->ptr;\r
651 while (str->ptr < str->end) {\r
652 switch (c = *(str->ptr++)) {\r
653 case ':':\r
654 case '!':\r
655 break;\r
656 default:\r
657 continue;\r
658 }\r
659 break;\r
660 }\r
661\r
662 if (c == '!' || c == ':') {\r
663 /* we have a format specifier and/or a conversion */\r
664 /* don't include the last character */\r
665 field_name->end = str->ptr-1;\r
666\r
667 /* the format specifier is the rest of the string */\r
668 format_spec->ptr = str->ptr;\r
669 format_spec->end = str->end;\r
670\r
671 /* see if there's a conversion specifier */\r
672 if (c == '!') {\r
673 /* there must be another character present */\r
674 if (format_spec->ptr >= format_spec->end) {\r
675 PyErr_SetString(PyExc_ValueError,\r
676 "end of format while looking for conversion "\r
677 "specifier");\r
678 return 0;\r
679 }\r
680 *conversion = *(format_spec->ptr++);\r
681\r
682 /* if there is another character, it must be a colon */\r
683 if (format_spec->ptr < format_spec->end) {\r
684 c = *(format_spec->ptr++);\r
685 if (c != ':') {\r
686 PyErr_SetString(PyExc_ValueError,\r
687 "expected ':' after format specifier");\r
688 return 0;\r
689 }\r
690 }\r
691 }\r
692 }\r
693 else\r
694 /* end of string, there's no format_spec or conversion */\r
695 field_name->end = str->ptr;\r
696\r
697 return 1;\r
698}\r
699\r
700/************************************************************************/\r
701/******* Output string allocation and escape-to-markup processing ******/\r
702/************************************************************************/\r
703\r
704/* MarkupIterator breaks the string into pieces of either literal\r
705 text, or things inside {} that need to be marked up. it is\r
706 designed to make it easy to wrap a Python iterator around it, for\r
707 use with the Formatter class */\r
708\r
709typedef struct {\r
710 SubString str;\r
711} MarkupIterator;\r
712\r
713static int\r
714MarkupIterator_init(MarkupIterator *self, STRINGLIB_CHAR *ptr, Py_ssize_t len)\r
715{\r
716 SubString_init(&self->str, ptr, len);\r
717 return 1;\r
718}\r
719\r
720/* returns 0 on error, 1 on non-error termination, and 2 if it got a\r
721 string (or something to be expanded) */\r
722static int\r
723MarkupIterator_next(MarkupIterator *self, SubString *literal,\r
724 int *field_present, SubString *field_name,\r
725 SubString *format_spec, STRINGLIB_CHAR *conversion,\r
726 int *format_spec_needs_expanding)\r
727{\r
728 int at_end;\r
729 STRINGLIB_CHAR c = 0;\r
730 STRINGLIB_CHAR *start;\r
731 int count;\r
732 Py_ssize_t len;\r
733 int markup_follows = 0;\r
734\r
735 /* initialize all of the output variables */\r
736 SubString_init(literal, NULL, 0);\r
737 SubString_init(field_name, NULL, 0);\r
738 SubString_init(format_spec, NULL, 0);\r
739 *conversion = '\0';\r
740 *format_spec_needs_expanding = 0;\r
741 *field_present = 0;\r
742\r
743 /* No more input, end of iterator. This is the normal exit\r
744 path. */\r
745 if (self->str.ptr >= self->str.end)\r
746 return 1;\r
747\r
748 start = self->str.ptr;\r
749\r
750 /* First read any literal text. Read until the end of string, an\r
751 escaped '{' or '}', or an unescaped '{'. In order to never\r
752 allocate memory and so I can just pass pointers around, if\r
753 there's an escaped '{' or '}' then we'll return the literal\r
754 including the brace, but no format object. The next time\r
755 through, we'll return the rest of the literal, skipping past\r
756 the second consecutive brace. */\r
757 while (self->str.ptr < self->str.end) {\r
758 switch (c = *(self->str.ptr++)) {\r
759 case '{':\r
760 case '}':\r
761 markup_follows = 1;\r
762 break;\r
763 default:\r
764 continue;\r
765 }\r
766 break;\r
767 }\r
768\r
769 at_end = self->str.ptr >= self->str.end;\r
770 len = self->str.ptr - start;\r
771\r
772 if ((c == '}') && (at_end || (c != *self->str.ptr))) {\r
773 PyErr_SetString(PyExc_ValueError, "Single '}' encountered "\r
774 "in format string");\r
775 return 0;\r
776 }\r
777 if (at_end && c == '{') {\r
778 PyErr_SetString(PyExc_ValueError, "Single '{' encountered "\r
779 "in format string");\r
780 return 0;\r
781 }\r
782 if (!at_end) {\r
783 if (c == *self->str.ptr) {\r
784 /* escaped } or {, skip it in the input. there is no\r
785 markup object following us, just this literal text */\r
786 self->str.ptr++;\r
787 markup_follows = 0;\r
788 }\r
789 else\r
790 len--;\r
791 }\r
792\r
793 /* record the literal text */\r
794 literal->ptr = start;\r
795 literal->end = start + len;\r
796\r
797 if (!markup_follows)\r
798 return 2;\r
799\r
800 /* this is markup, find the end of the string by counting nested\r
801 braces. note that this prohibits escaped braces, so that\r
802 format_specs cannot have braces in them. */\r
803 *field_present = 1;\r
804 count = 1;\r
805\r
806 start = self->str.ptr;\r
807\r
808 /* we know we can't have a zero length string, so don't worry\r
809 about that case */\r
810 while (self->str.ptr < self->str.end) {\r
811 switch (c = *(self->str.ptr++)) {\r
812 case '{':\r
813 /* the format spec needs to be recursively expanded.\r
814 this is an optimization, and not strictly needed */\r
815 *format_spec_needs_expanding = 1;\r
816 count++;\r
817 break;\r
818 case '}':\r
819 count--;\r
820 if (count <= 0) {\r
821 /* we're done. parse and get out */\r
822 SubString s;\r
823\r
824 SubString_init(&s, start, self->str.ptr - 1 - start);\r
825 if (parse_field(&s, field_name, format_spec, conversion) == 0)\r
826 return 0;\r
827\r
828 /* success */\r
829 return 2;\r
830 }\r
831 break;\r
832 }\r
833 }\r
834\r
835 /* end of string while searching for matching '}' */\r
836 PyErr_SetString(PyExc_ValueError, "unmatched '{' in format");\r
837 return 0;\r
838}\r
839\r
840\r
841/* do the !r or !s conversion on obj */\r
842static PyObject *\r
843do_conversion(PyObject *obj, STRINGLIB_CHAR conversion)\r
844{\r
845 /* XXX in pre-3.0, do we need to convert this to unicode, since it\r
846 might have returned a string? */\r
847 switch (conversion) {\r
848 case 'r':\r
849 return PyObject_Repr(obj);\r
850 case 's':\r
851 return STRINGLIB_TOSTR(obj);\r
852 default:\r
853 if (conversion > 32 && conversion < 127) {\r
854 /* It's the ASCII subrange; casting to char is safe\r
855 (assuming the execution character set is an ASCII\r
856 superset). */\r
857 PyErr_Format(PyExc_ValueError,\r
858 "Unknown conversion specifier %c",\r
859 (char)conversion);\r
860 } else\r
861 PyErr_Format(PyExc_ValueError,\r
862 "Unknown conversion specifier \\x%x",\r
863 (unsigned int)conversion);\r
864 return NULL;\r
865 }\r
866}\r
867\r
868/* given:\r
869\r
870 {field_name!conversion:format_spec}\r
871\r
872 compute the result and write it to output.\r
873 format_spec_needs_expanding is an optimization. if it's false,\r
874 just output the string directly, otherwise recursively expand the\r
875 format_spec string.\r
876\r
877 field_name is allowed to be zero length, in which case we\r
878 are doing auto field numbering.\r
879*/\r
880\r
881static int\r
882output_markup(SubString *field_name, SubString *format_spec,\r
883 int format_spec_needs_expanding, STRINGLIB_CHAR conversion,\r
884 OutputString *output, PyObject *args, PyObject *kwargs,\r
885 int recursion_depth, AutoNumber *auto_number)\r
886{\r
887 PyObject *tmp = NULL;\r
888 PyObject *fieldobj = NULL;\r
889 SubString expanded_format_spec;\r
890 SubString *actual_format_spec;\r
891 int result = 0;\r
892\r
893 /* convert field_name to an object */\r
894 fieldobj = get_field_object(field_name, args, kwargs, auto_number);\r
895 if (fieldobj == NULL)\r
896 goto done;\r
897\r
898 if (conversion != '\0') {\r
899 tmp = do_conversion(fieldobj, conversion);\r
900 if (tmp == NULL)\r
901 goto done;\r
902\r
903 /* do the assignment, transferring ownership: fieldobj = tmp */\r
904 Py_DECREF(fieldobj);\r
905 fieldobj = tmp;\r
906 tmp = NULL;\r
907 }\r
908\r
909 /* if needed, recurively compute the format_spec */\r
910 if (format_spec_needs_expanding) {\r
911 tmp = build_string(format_spec, args, kwargs, recursion_depth-1,\r
912 auto_number);\r
913 if (tmp == NULL)\r
914 goto done;\r
915\r
916 /* note that in the case we're expanding the format string,\r
917 tmp must be kept around until after the call to\r
918 render_field. */\r
919 SubString_init(&expanded_format_spec,\r
920 STRINGLIB_STR(tmp), STRINGLIB_LEN(tmp));\r
921 actual_format_spec = &expanded_format_spec;\r
922 }\r
923 else\r
924 actual_format_spec = format_spec;\r
925\r
926 if (render_field(fieldobj, actual_format_spec, output) == 0)\r
927 goto done;\r
928\r
929 result = 1;\r
930\r
931done:\r
932 Py_XDECREF(fieldobj);\r
933 Py_XDECREF(tmp);\r
934\r
935 return result;\r
936}\r
937\r
938/*\r
939 do_markup is the top-level loop for the format() method. It\r
940 searches through the format string for escapes to markup codes, and\r
941 calls other functions to move non-markup text to the output,\r
942 and to perform the markup to the output.\r
943*/\r
944static int\r
945do_markup(SubString *input, PyObject *args, PyObject *kwargs,\r
946 OutputString *output, int recursion_depth, AutoNumber *auto_number)\r
947{\r
948 MarkupIterator iter;\r
949 int format_spec_needs_expanding;\r
950 int result;\r
951 int field_present;\r
952 SubString literal;\r
953 SubString field_name;\r
954 SubString format_spec;\r
955 STRINGLIB_CHAR conversion;\r
956\r
957 MarkupIterator_init(&iter, input->ptr, input->end - input->ptr);\r
958 while ((result = MarkupIterator_next(&iter, &literal, &field_present,\r
959 &field_name, &format_spec,\r
960 &conversion,\r
961 &format_spec_needs_expanding)) == 2) {\r
962 if (!output_data(output, literal.ptr, literal.end - literal.ptr))\r
963 return 0;\r
964 if (field_present)\r
965 if (!output_markup(&field_name, &format_spec,\r
966 format_spec_needs_expanding, conversion, output,\r
967 args, kwargs, recursion_depth, auto_number))\r
968 return 0;\r
969 }\r
970 return result;\r
971}\r
972\r
973\r
974/*\r
975 build_string allocates the output string and then\r
976 calls do_markup to do the heavy lifting.\r
977*/\r
978static PyObject *\r
979build_string(SubString *input, PyObject *args, PyObject *kwargs,\r
980 int recursion_depth, AutoNumber *auto_number)\r
981{\r
982 OutputString output;\r
983 PyObject *result = NULL;\r
984 Py_ssize_t count;\r
985\r
986 output.obj = NULL; /* needed so cleanup code always works */\r
987\r
988 /* check the recursion level */\r
989 if (recursion_depth <= 0) {\r
990 PyErr_SetString(PyExc_ValueError,\r
991 "Max string recursion exceeded");\r
992 goto done;\r
993 }\r
994\r
995 /* initial size is the length of the format string, plus the size\r
996 increment. seems like a reasonable default */\r
997 if (!output_initialize(&output,\r
998 input->end - input->ptr +\r
999 INITIAL_SIZE_INCREMENT))\r
1000 goto done;\r
1001\r
1002 if (!do_markup(input, args, kwargs, &output, recursion_depth,\r
1003 auto_number)) {\r
1004 goto done;\r
1005 }\r
1006\r
1007 count = output.ptr - STRINGLIB_STR(output.obj);\r
1008 if (STRINGLIB_RESIZE(&output.obj, count) < 0) {\r
1009 goto done;\r
1010 }\r
1011\r
1012 /* transfer ownership to result */\r
1013 result = output.obj;\r
1014 output.obj = NULL;\r
1015\r
1016done:\r
1017 Py_XDECREF(output.obj);\r
1018 return result;\r
1019}\r
1020\r
1021/************************************************************************/\r
1022/*********** main routine ***********************************************/\r
1023/************************************************************************/\r
1024\r
1025/* this is the main entry point */\r
1026static PyObject *\r
1027do_string_format(PyObject *self, PyObject *args, PyObject *kwargs)\r
1028{\r
1029 SubString input;\r
1030\r
1031 /* PEP 3101 says only 2 levels, so that\r
1032 "{0:{1}}".format('abc', 's') # works\r
1033 "{0:{1:{2}}}".format('abc', 's', '') # fails\r
1034 */\r
1035 int recursion_depth = 2;\r
1036\r
1037 AutoNumber auto_number;\r
1038\r
1039 AutoNumber_Init(&auto_number);\r
1040 SubString_init(&input, STRINGLIB_STR(self), STRINGLIB_LEN(self));\r
1041 return build_string(&input, args, kwargs, recursion_depth, &auto_number);\r
1042}\r
1043\r
1044\r
1045\r
1046/************************************************************************/\r
1047/*********** formatteriterator ******************************************/\r
1048/************************************************************************/\r
1049\r
1050/* This is used to implement string.Formatter.vparse(). It exists so\r
1051 Formatter can share code with the built in unicode.format() method.\r
1052 It's really just a wrapper around MarkupIterator that is callable\r
1053 from Python. */\r
1054\r
1055typedef struct {\r
1056 PyObject_HEAD\r
1057\r
1058 STRINGLIB_OBJECT *str;\r
1059\r
1060 MarkupIterator it_markup;\r
1061} formatteriterobject;\r
1062\r
1063static void\r
1064formatteriter_dealloc(formatteriterobject *it)\r
1065{\r
1066 Py_XDECREF(it->str);\r
1067 PyObject_FREE(it);\r
1068}\r
1069\r
1070/* returns a tuple:\r
1071 (literal, field_name, format_spec, conversion)\r
1072\r
1073 literal is any literal text to output. might be zero length\r
1074 field_name is the string before the ':'. might be None\r
1075 format_spec is the string after the ':'. mibht be None\r
1076 conversion is either None, or the string after the '!'\r
1077*/\r
1078static PyObject *\r
1079formatteriter_next(formatteriterobject *it)\r
1080{\r
1081 SubString literal;\r
1082 SubString field_name;\r
1083 SubString format_spec;\r
1084 STRINGLIB_CHAR conversion;\r
1085 int format_spec_needs_expanding;\r
1086 int field_present;\r
1087 int result = MarkupIterator_next(&it->it_markup, &literal, &field_present,\r
1088 &field_name, &format_spec, &conversion,\r
1089 &format_spec_needs_expanding);\r
1090\r
1091 /* all of the SubString objects point into it->str, so no\r
1092 memory management needs to be done on them */\r
1093 assert(0 <= result && result <= 2);\r
1094 if (result == 0 || result == 1)\r
1095 /* if 0, error has already been set, if 1, iterator is empty */\r
1096 return NULL;\r
1097 else {\r
1098 PyObject *literal_str = NULL;\r
1099 PyObject *field_name_str = NULL;\r
1100 PyObject *format_spec_str = NULL;\r
1101 PyObject *conversion_str = NULL;\r
1102 PyObject *tuple = NULL;\r
1103\r
1104 literal_str = SubString_new_object(&literal);\r
1105 if (literal_str == NULL)\r
1106 goto done;\r
1107\r
1108 field_name_str = SubString_new_object(&field_name);\r
1109 if (field_name_str == NULL)\r
1110 goto done;\r
1111\r
1112 /* if field_name is non-zero length, return a string for\r
1113 format_spec (even if zero length), else return None */\r
1114 format_spec_str = (field_present ?\r
1115 SubString_new_object_or_empty :\r
1116 SubString_new_object)(&format_spec);\r
1117 if (format_spec_str == NULL)\r
1118 goto done;\r
1119\r
1120 /* if the conversion is not specified, return a None,\r
1121 otherwise create a one length string with the conversion\r
1122 character */\r
1123 if (conversion == '\0') {\r
1124 conversion_str = Py_None;\r
1125 Py_INCREF(conversion_str);\r
1126 }\r
1127 else\r
1128 conversion_str = STRINGLIB_NEW(&conversion, 1);\r
1129 if (conversion_str == NULL)\r
1130 goto done;\r
1131\r
1132 tuple = PyTuple_Pack(4, literal_str, field_name_str, format_spec_str,\r
1133 conversion_str);\r
1134 done:\r
1135 Py_XDECREF(literal_str);\r
1136 Py_XDECREF(field_name_str);\r
1137 Py_XDECREF(format_spec_str);\r
1138 Py_XDECREF(conversion_str);\r
1139 return tuple;\r
1140 }\r
1141}\r
1142\r
1143static PyMethodDef formatteriter_methods[] = {\r
1144 {NULL, NULL} /* sentinel */\r
1145};\r
1146\r
1147static PyTypeObject PyFormatterIter_Type = {\r
1148 PyVarObject_HEAD_INIT(&PyType_Type, 0)\r
1149 "formatteriterator", /* tp_name */\r
1150 sizeof(formatteriterobject), /* tp_basicsize */\r
1151 0, /* tp_itemsize */\r
1152 /* methods */\r
1153 (destructor)formatteriter_dealloc, /* tp_dealloc */\r
1154 0, /* tp_print */\r
1155 0, /* tp_getattr */\r
1156 0, /* tp_setattr */\r
1157 0, /* tp_compare */\r
1158 0, /* tp_repr */\r
1159 0, /* tp_as_number */\r
1160 0, /* tp_as_sequence */\r
1161 0, /* tp_as_mapping */\r
1162 0, /* tp_hash */\r
1163 0, /* tp_call */\r
1164 0, /* tp_str */\r
1165 PyObject_GenericGetAttr, /* tp_getattro */\r
1166 0, /* tp_setattro */\r
1167 0, /* tp_as_buffer */\r
1168 Py_TPFLAGS_DEFAULT, /* tp_flags */\r
1169 0, /* tp_doc */\r
1170 0, /* tp_traverse */\r
1171 0, /* tp_clear */\r
1172 0, /* tp_richcompare */\r
1173 0, /* tp_weaklistoffset */\r
1174 PyObject_SelfIter, /* tp_iter */\r
1175 (iternextfunc)formatteriter_next, /* tp_iternext */\r
1176 formatteriter_methods, /* tp_methods */\r
1177 0,\r
1178};\r
1179\r
1180/* unicode_formatter_parser is used to implement\r
1181 string.Formatter.vformat. it parses a string and returns tuples\r
1182 describing the parsed elements. It's a wrapper around\r
1183 stringlib/string_format.h's MarkupIterator */\r
1184static PyObject *\r
1185formatter_parser(STRINGLIB_OBJECT *self)\r
1186{\r
1187 formatteriterobject *it;\r
1188\r
1189 it = PyObject_New(formatteriterobject, &PyFormatterIter_Type);\r
1190 if (it == NULL)\r
1191 return NULL;\r
1192\r
1193 /* take ownership, give the object to the iterator */\r
1194 Py_INCREF(self);\r
1195 it->str = self;\r
1196\r
1197 /* initialize the contained MarkupIterator */\r
1198 MarkupIterator_init(&it->it_markup,\r
1199 STRINGLIB_STR(self),\r
1200 STRINGLIB_LEN(self));\r
1201\r
1202 return (PyObject *)it;\r
1203}\r
1204\r
1205\r
1206/************************************************************************/\r
1207/*********** fieldnameiterator ******************************************/\r
1208/************************************************************************/\r
1209\r
1210\r
1211/* This is used to implement string.Formatter.vparse(). It parses the\r
1212 field name into attribute and item values. It's a Python-callable\r
1213 wrapper around FieldNameIterator */\r
1214\r
1215typedef struct {\r
1216 PyObject_HEAD\r
1217\r
1218 STRINGLIB_OBJECT *str;\r
1219\r
1220 FieldNameIterator it_field;\r
1221} fieldnameiterobject;\r
1222\r
1223static void\r
1224fieldnameiter_dealloc(fieldnameiterobject *it)\r
1225{\r
1226 Py_XDECREF(it->str);\r
1227 PyObject_FREE(it);\r
1228}\r
1229\r
1230/* returns a tuple:\r
1231 (is_attr, value)\r
1232 is_attr is true if we used attribute syntax (e.g., '.foo')\r
1233 false if we used index syntax (e.g., '[foo]')\r
1234 value is an integer or string\r
1235*/\r
1236static PyObject *\r
1237fieldnameiter_next(fieldnameiterobject *it)\r
1238{\r
1239 int result;\r
1240 int is_attr;\r
1241 Py_ssize_t idx;\r
1242 SubString name;\r
1243\r
1244 result = FieldNameIterator_next(&it->it_field, &is_attr,\r
1245 &idx, &name);\r
1246 if (result == 0 || result == 1)\r
1247 /* if 0, error has already been set, if 1, iterator is empty */\r
1248 return NULL;\r
1249 else {\r
1250 PyObject* result = NULL;\r
1251 PyObject* is_attr_obj = NULL;\r
1252 PyObject* obj = NULL;\r
1253\r
1254 is_attr_obj = PyBool_FromLong(is_attr);\r
1255 if (is_attr_obj == NULL)\r
1256 goto done;\r
1257\r
1258 /* either an integer or a string */\r
1259 if (idx != -1)\r
1260 obj = PyLong_FromSsize_t(idx);\r
1261 else\r
1262 obj = SubString_new_object(&name);\r
1263 if (obj == NULL)\r
1264 goto done;\r
1265\r
1266 /* return a tuple of values */\r
1267 result = PyTuple_Pack(2, is_attr_obj, obj);\r
1268\r
1269 done:\r
1270 Py_XDECREF(is_attr_obj);\r
1271 Py_XDECREF(obj);\r
1272 return result;\r
1273 }\r
1274}\r
1275\r
1276static PyMethodDef fieldnameiter_methods[] = {\r
1277 {NULL, NULL} /* sentinel */\r
1278};\r
1279\r
1280static PyTypeObject PyFieldNameIter_Type = {\r
1281 PyVarObject_HEAD_INIT(&PyType_Type, 0)\r
1282 "fieldnameiterator", /* tp_name */\r
1283 sizeof(fieldnameiterobject), /* tp_basicsize */\r
1284 0, /* tp_itemsize */\r
1285 /* methods */\r
1286 (destructor)fieldnameiter_dealloc, /* tp_dealloc */\r
1287 0, /* tp_print */\r
1288 0, /* tp_getattr */\r
1289 0, /* tp_setattr */\r
1290 0, /* tp_compare */\r
1291 0, /* tp_repr */\r
1292 0, /* tp_as_number */\r
1293 0, /* tp_as_sequence */\r
1294 0, /* tp_as_mapping */\r
1295 0, /* tp_hash */\r
1296 0, /* tp_call */\r
1297 0, /* tp_str */\r
1298 PyObject_GenericGetAttr, /* tp_getattro */\r
1299 0, /* tp_setattro */\r
1300 0, /* tp_as_buffer */\r
1301 Py_TPFLAGS_DEFAULT, /* tp_flags */\r
1302 0, /* tp_doc */\r
1303 0, /* tp_traverse */\r
1304 0, /* tp_clear */\r
1305 0, /* tp_richcompare */\r
1306 0, /* tp_weaklistoffset */\r
1307 PyObject_SelfIter, /* tp_iter */\r
1308 (iternextfunc)fieldnameiter_next, /* tp_iternext */\r
1309 fieldnameiter_methods, /* tp_methods */\r
1310 0};\r
1311\r
1312/* unicode_formatter_field_name_split is used to implement\r
1313 string.Formatter.vformat. it takes an PEP 3101 "field name", and\r
1314 returns a tuple of (first, rest): "first", the part before the\r
1315 first '.' or '['; and "rest", an iterator for the rest of the field\r
1316 name. it's a wrapper around stringlib/string_format.h's\r
1317 field_name_split. The iterator it returns is a\r
1318 FieldNameIterator */\r
1319static PyObject *\r
1320formatter_field_name_split(STRINGLIB_OBJECT *self)\r
1321{\r
1322 SubString first;\r
1323 Py_ssize_t first_idx;\r
1324 fieldnameiterobject *it;\r
1325\r
1326 PyObject *first_obj = NULL;\r
1327 PyObject *result = NULL;\r
1328\r
1329 it = PyObject_New(fieldnameiterobject, &PyFieldNameIter_Type);\r
1330 if (it == NULL)\r
1331 return NULL;\r
1332\r
1333 /* take ownership, give the object to the iterator. this is\r
1334 just to keep the field_name alive */\r
1335 Py_INCREF(self);\r
1336 it->str = self;\r
1337\r
1338 /* Pass in auto_number = NULL. We'll return an empty string for\r
1339 first_obj in that case. */\r
1340 if (!field_name_split(STRINGLIB_STR(self),\r
1341 STRINGLIB_LEN(self),\r
1342 &first, &first_idx, &it->it_field, NULL))\r
1343 goto done;\r
1344\r
1345 /* first becomes an integer, if possible; else a string */\r
1346 if (first_idx != -1)\r
1347 first_obj = PyLong_FromSsize_t(first_idx);\r
1348 else\r
1349 /* convert "first" into a string object */\r
1350 first_obj = SubString_new_object(&first);\r
1351 if (first_obj == NULL)\r
1352 goto done;\r
1353\r
1354 /* return a tuple of values */\r
1355 result = PyTuple_Pack(2, first_obj, it);\r
1356\r
1357done:\r
1358 Py_XDECREF(it);\r
1359 Py_XDECREF(first_obj);\r
1360 return result;\r
1361}\r