]>
Commit | Line | Data |
---|---|---|
53b2ba57 DM |
1 | /*\r |
2 | string_format.h -- implementation of string.format().\r | |
3 | \r | |
4 | It uses the Objects/stringlib conventions, so that it can be\r | |
5 | compiled for both unicode and string objects.\r | |
6 | */\r | |
7 | \r | |
8 | \r | |
9 | /* Defines for Python 2.6 compatibility */\r | |
10 | #if PY_VERSION_HEX < 0x03000000\r | |
11 | #define PyLong_FromSsize_t _PyLong_FromSsize_t\r | |
12 | #endif\r | |
13 | \r | |
14 | /* Defines for more efficiently reallocating the string buffer */\r | |
15 | #define INITIAL_SIZE_INCREMENT 100\r | |
16 | #define SIZE_MULTIPLIER 2\r | |
17 | #define MAX_SIZE_INCREMENT 3200\r | |
18 | \r | |
19 | \r | |
20 | /************************************************************************/\r | |
21 | /*********** Global data structures and forward declarations *********/\r | |
22 | /************************************************************************/\r | |
23 | \r | |
24 | /*\r | |
25 | A SubString consists of the characters between two string or\r | |
26 | unicode pointers.\r | |
27 | */\r | |
28 | typedef struct {\r | |
29 | STRINGLIB_CHAR *ptr;\r | |
30 | STRINGLIB_CHAR *end;\r | |
31 | } SubString;\r | |
32 | \r | |
33 | \r | |
34 | typedef enum {\r | |
35 | ANS_INIT,\r | |
36 | ANS_AUTO,\r | |
37 | ANS_MANUAL\r | |
38 | } AutoNumberState; /* Keep track if we're auto-numbering fields */\r | |
39 | \r | |
40 | /* Keeps track of our auto-numbering state, and which number field we're on */\r | |
41 | typedef struct {\r | |
42 | AutoNumberState an_state;\r | |
43 | int an_field_number;\r | |
44 | } AutoNumber;\r | |
45 | \r | |
46 | \r | |
47 | /* forward declaration for recursion */\r | |
48 | static PyObject *\r | |
49 | build_string(SubString *input, PyObject *args, PyObject *kwargs,\r | |
50 | int recursion_depth, AutoNumber *auto_number);\r | |
51 | \r | |
52 | \r | |
53 | \r | |
54 | /************************************************************************/\r | |
55 | /************************** Utility functions ************************/\r | |
56 | /************************************************************************/\r | |
57 | \r | |
58 | static void\r | |
59 | AutoNumber_Init(AutoNumber *auto_number)\r | |
60 | {\r | |
61 | auto_number->an_state = ANS_INIT;\r | |
62 | auto_number->an_field_number = 0;\r | |
63 | }\r | |
64 | \r | |
65 | /* fill in a SubString from a pointer and length */\r | |
66 | Py_LOCAL_INLINE(void)\r | |
67 | SubString_init(SubString *str, STRINGLIB_CHAR *p, Py_ssize_t len)\r | |
68 | {\r | |
69 | str->ptr = p;\r | |
70 | if (p == NULL)\r | |
71 | str->end = NULL;\r | |
72 | else\r | |
73 | str->end = str->ptr + len;\r | |
74 | }\r | |
75 | \r | |
76 | /* return a new string. if str->ptr is NULL, return None */\r | |
77 | Py_LOCAL_INLINE(PyObject *)\r | |
78 | SubString_new_object(SubString *str)\r | |
79 | {\r | |
80 | if (str->ptr == NULL) {\r | |
81 | Py_INCREF(Py_None);\r | |
82 | return Py_None;\r | |
83 | }\r | |
84 | return STRINGLIB_NEW(str->ptr, str->end - str->ptr);\r | |
85 | }\r | |
86 | \r | |
87 | /* return a new string. if str->ptr is NULL, return None */\r | |
88 | Py_LOCAL_INLINE(PyObject *)\r | |
89 | SubString_new_object_or_empty(SubString *str)\r | |
90 | {\r | |
91 | if (str->ptr == NULL) {\r | |
92 | return STRINGLIB_NEW(NULL, 0);\r | |
93 | }\r | |
94 | return STRINGLIB_NEW(str->ptr, str->end - str->ptr);\r | |
95 | }\r | |
96 | \r | |
97 | /* Return 1 if an error has been detected switching between automatic\r | |
98 | field numbering and manual field specification, else return 0. Set\r | |
99 | ValueError on error. */\r | |
100 | static int\r | |
101 | autonumber_state_error(AutoNumberState state, int field_name_is_empty)\r | |
102 | {\r | |
103 | if (state == ANS_MANUAL) {\r | |
104 | if (field_name_is_empty) {\r | |
105 | PyErr_SetString(PyExc_ValueError, "cannot switch from "\r | |
106 | "manual field specification to "\r | |
107 | "automatic field numbering");\r | |
108 | return 1;\r | |
109 | }\r | |
110 | }\r | |
111 | else {\r | |
112 | if (!field_name_is_empty) {\r | |
113 | PyErr_SetString(PyExc_ValueError, "cannot switch from "\r | |
114 | "automatic field numbering to "\r | |
115 | "manual field specification");\r | |
116 | return 1;\r | |
117 | }\r | |
118 | }\r | |
119 | return 0;\r | |
120 | }\r | |
121 | \r | |
122 | \r | |
123 | /************************************************************************/\r | |
124 | /*********** Output string management functions ****************/\r | |
125 | /************************************************************************/\r | |
126 | \r | |
127 | typedef struct {\r | |
128 | STRINGLIB_CHAR *ptr;\r | |
129 | STRINGLIB_CHAR *end;\r | |
130 | PyObject *obj;\r | |
131 | Py_ssize_t size_increment;\r | |
132 | } OutputString;\r | |
133 | \r | |
134 | /* initialize an OutputString object, reserving size characters */\r | |
135 | static int\r | |
136 | output_initialize(OutputString *output, Py_ssize_t size)\r | |
137 | {\r | |
138 | output->obj = STRINGLIB_NEW(NULL, size);\r | |
139 | if (output->obj == NULL)\r | |
140 | return 0;\r | |
141 | \r | |
142 | output->ptr = STRINGLIB_STR(output->obj);\r | |
143 | output->end = STRINGLIB_LEN(output->obj) + output->ptr;\r | |
144 | output->size_increment = INITIAL_SIZE_INCREMENT;\r | |
145 | \r | |
146 | return 1;\r | |
147 | }\r | |
148 | \r | |
149 | /*\r | |
150 | output_extend reallocates the output string buffer.\r | |
151 | It returns a status: 0 for a failed reallocation,\r | |
152 | 1 for success.\r | |
153 | */\r | |
154 | \r | |
155 | static int\r | |
156 | output_extend(OutputString *output, Py_ssize_t count)\r | |
157 | {\r | |
158 | STRINGLIB_CHAR *startptr = STRINGLIB_STR(output->obj);\r | |
159 | Py_ssize_t curlen = output->ptr - startptr;\r | |
160 | Py_ssize_t maxlen = curlen + count + output->size_increment;\r | |
161 | \r | |
162 | if (STRINGLIB_RESIZE(&output->obj, maxlen) < 0)\r | |
163 | return 0;\r | |
164 | startptr = STRINGLIB_STR(output->obj);\r | |
165 | output->ptr = startptr + curlen;\r | |
166 | output->end = startptr + maxlen;\r | |
167 | if (output->size_increment < MAX_SIZE_INCREMENT)\r | |
168 | output->size_increment *= SIZE_MULTIPLIER;\r | |
169 | return 1;\r | |
170 | }\r | |
171 | \r | |
172 | /*\r | |
173 | output_data dumps characters into our output string\r | |
174 | buffer.\r | |
175 | \r | |
176 | In some cases, it has to reallocate the string.\r | |
177 | \r | |
178 | It returns a status: 0 for a failed reallocation,\r | |
179 | 1 for success.\r | |
180 | */\r | |
181 | static int\r | |
182 | output_data(OutputString *output, const STRINGLIB_CHAR *s, Py_ssize_t count)\r | |
183 | {\r | |
184 | if ((count > output->end - output->ptr) && !output_extend(output, count))\r | |
185 | return 0;\r | |
186 | memcpy(output->ptr, s, count * sizeof(STRINGLIB_CHAR));\r | |
187 | output->ptr += count;\r | |
188 | return 1;\r | |
189 | }\r | |
190 | \r | |
191 | /************************************************************************/\r | |
192 | /*********** Format string parsing -- integers and identifiers *********/\r | |
193 | /************************************************************************/\r | |
194 | \r | |
195 | static Py_ssize_t\r | |
196 | get_integer(const SubString *str)\r | |
197 | {\r | |
198 | Py_ssize_t accumulator = 0;\r | |
199 | Py_ssize_t digitval;\r | |
200 | STRINGLIB_CHAR *p;\r | |
201 | \r | |
202 | /* empty string is an error */\r | |
203 | if (str->ptr >= str->end)\r | |
204 | return -1;\r | |
205 | \r | |
206 | for (p = str->ptr; p < str->end; p++) {\r | |
207 | digitval = STRINGLIB_TODECIMAL(*p);\r | |
208 | if (digitval < 0)\r | |
209 | return -1;\r | |
210 | /*\r | |
211 | Detect possible overflow before it happens:\r | |
212 | \r | |
213 | accumulator * 10 + digitval > PY_SSIZE_T_MAX if and only if\r | |
214 | accumulator > (PY_SSIZE_T_MAX - digitval) / 10.\r | |
215 | */\r | |
216 | if (accumulator > (PY_SSIZE_T_MAX - digitval) / 10) {\r | |
217 | PyErr_Format(PyExc_ValueError,\r | |
218 | "Too many decimal digits in format string");\r | |
219 | return -1;\r | |
220 | }\r | |
221 | accumulator = accumulator * 10 + digitval;\r | |
222 | }\r | |
223 | return accumulator;\r | |
224 | }\r | |
225 | \r | |
226 | /************************************************************************/\r | |
227 | /******** Functions to get field objects and specification strings ******/\r | |
228 | /************************************************************************/\r | |
229 | \r | |
230 | /* do the equivalent of obj.name */\r | |
231 | static PyObject *\r | |
232 | getattr(PyObject *obj, SubString *name)\r | |
233 | {\r | |
234 | PyObject *newobj;\r | |
235 | PyObject *str = SubString_new_object(name);\r | |
236 | if (str == NULL)\r | |
237 | return NULL;\r | |
238 | newobj = PyObject_GetAttr(obj, str);\r | |
239 | Py_DECREF(str);\r | |
240 | return newobj;\r | |
241 | }\r | |
242 | \r | |
243 | /* do the equivalent of obj[idx], where obj is a sequence */\r | |
244 | static PyObject *\r | |
245 | getitem_sequence(PyObject *obj, Py_ssize_t idx)\r | |
246 | {\r | |
247 | return PySequence_GetItem(obj, idx);\r | |
248 | }\r | |
249 | \r | |
250 | /* do the equivalent of obj[idx], where obj is not a sequence */\r | |
251 | static PyObject *\r | |
252 | getitem_idx(PyObject *obj, Py_ssize_t idx)\r | |
253 | {\r | |
254 | PyObject *newobj;\r | |
255 | PyObject *idx_obj = PyLong_FromSsize_t(idx);\r | |
256 | if (idx_obj == NULL)\r | |
257 | return NULL;\r | |
258 | newobj = PyObject_GetItem(obj, idx_obj);\r | |
259 | Py_DECREF(idx_obj);\r | |
260 | return newobj;\r | |
261 | }\r | |
262 | \r | |
263 | /* do the equivalent of obj[name] */\r | |
264 | static PyObject *\r | |
265 | getitem_str(PyObject *obj, SubString *name)\r | |
266 | {\r | |
267 | PyObject *newobj;\r | |
268 | PyObject *str = SubString_new_object(name);\r | |
269 | if (str == NULL)\r | |
270 | return NULL;\r | |
271 | newobj = PyObject_GetItem(obj, str);\r | |
272 | Py_DECREF(str);\r | |
273 | return newobj;\r | |
274 | }\r | |
275 | \r | |
276 | typedef struct {\r | |
277 | /* the entire string we're parsing. we assume that someone else\r | |
278 | is managing its lifetime, and that it will exist for the\r | |
279 | lifetime of the iterator. can be empty */\r | |
280 | SubString str;\r | |
281 | \r | |
282 | /* pointer to where we are inside field_name */\r | |
283 | STRINGLIB_CHAR *ptr;\r | |
284 | } FieldNameIterator;\r | |
285 | \r | |
286 | \r | |
287 | static int\r | |
288 | FieldNameIterator_init(FieldNameIterator *self, STRINGLIB_CHAR *ptr,\r | |
289 | Py_ssize_t len)\r | |
290 | {\r | |
291 | SubString_init(&self->str, ptr, len);\r | |
292 | self->ptr = self->str.ptr;\r | |
293 | return 1;\r | |
294 | }\r | |
295 | \r | |
296 | static int\r | |
297 | _FieldNameIterator_attr(FieldNameIterator *self, SubString *name)\r | |
298 | {\r | |
299 | STRINGLIB_CHAR c;\r | |
300 | \r | |
301 | name->ptr = self->ptr;\r | |
302 | \r | |
303 | /* return everything until '.' or '[' */\r | |
304 | while (self->ptr < self->str.end) {\r | |
305 | switch (c = *self->ptr++) {\r | |
306 | case '[':\r | |
307 | case '.':\r | |
308 | /* backup so that we this character will be seen next time */\r | |
309 | self->ptr--;\r | |
310 | break;\r | |
311 | default:\r | |
312 | continue;\r | |
313 | }\r | |
314 | break;\r | |
315 | }\r | |
316 | /* end of string is okay */\r | |
317 | name->end = self->ptr;\r | |
318 | return 1;\r | |
319 | }\r | |
320 | \r | |
321 | static int\r | |
322 | _FieldNameIterator_item(FieldNameIterator *self, SubString *name)\r | |
323 | {\r | |
324 | int bracket_seen = 0;\r | |
325 | STRINGLIB_CHAR c;\r | |
326 | \r | |
327 | name->ptr = self->ptr;\r | |
328 | \r | |
329 | /* return everything until ']' */\r | |
330 | while (self->ptr < self->str.end) {\r | |
331 | switch (c = *self->ptr++) {\r | |
332 | case ']':\r | |
333 | bracket_seen = 1;\r | |
334 | break;\r | |
335 | default:\r | |
336 | continue;\r | |
337 | }\r | |
338 | break;\r | |
339 | }\r | |
340 | /* make sure we ended with a ']' */\r | |
341 | if (!bracket_seen) {\r | |
342 | PyErr_SetString(PyExc_ValueError, "Missing ']' in format string");\r | |
343 | return 0;\r | |
344 | }\r | |
345 | \r | |
346 | /* end of string is okay */\r | |
347 | /* don't include the ']' */\r | |
348 | name->end = self->ptr-1;\r | |
349 | return 1;\r | |
350 | }\r | |
351 | \r | |
352 | /* returns 0 on error, 1 on non-error termination, and 2 if it returns a value */\r | |
353 | static int\r | |
354 | FieldNameIterator_next(FieldNameIterator *self, int *is_attribute,\r | |
355 | Py_ssize_t *name_idx, SubString *name)\r | |
356 | {\r | |
357 | /* check at end of input */\r | |
358 | if (self->ptr >= self->str.end)\r | |
359 | return 1;\r | |
360 | \r | |
361 | switch (*self->ptr++) {\r | |
362 | case '.':\r | |
363 | *is_attribute = 1;\r | |
364 | if (_FieldNameIterator_attr(self, name) == 0)\r | |
365 | return 0;\r | |
366 | *name_idx = -1;\r | |
367 | break;\r | |
368 | case '[':\r | |
369 | *is_attribute = 0;\r | |
370 | if (_FieldNameIterator_item(self, name) == 0)\r | |
371 | return 0;\r | |
372 | *name_idx = get_integer(name);\r | |
373 | if (*name_idx == -1 && PyErr_Occurred())\r | |
374 | return 0;\r | |
375 | break;\r | |
376 | default:\r | |
377 | /* Invalid character follows ']' */\r | |
378 | PyErr_SetString(PyExc_ValueError, "Only '.' or '[' may "\r | |
379 | "follow ']' in format field specifier");\r | |
380 | return 0;\r | |
381 | }\r | |
382 | \r | |
383 | /* empty string is an error */\r | |
384 | if (name->ptr == name->end) {\r | |
385 | PyErr_SetString(PyExc_ValueError, "Empty attribute in format string");\r | |
386 | return 0;\r | |
387 | }\r | |
388 | \r | |
389 | return 2;\r | |
390 | }\r | |
391 | \r | |
392 | \r | |
393 | /* input: field_name\r | |
394 | output: 'first' points to the part before the first '[' or '.'\r | |
395 | 'first_idx' is -1 if 'first' is not an integer, otherwise\r | |
396 | it's the value of first converted to an integer\r | |
397 | 'rest' is an iterator to return the rest\r | |
398 | */\r | |
399 | static int\r | |
400 | field_name_split(STRINGLIB_CHAR *ptr, Py_ssize_t len, SubString *first,\r | |
401 | Py_ssize_t *first_idx, FieldNameIterator *rest,\r | |
402 | AutoNumber *auto_number)\r | |
403 | {\r | |
404 | STRINGLIB_CHAR c;\r | |
405 | STRINGLIB_CHAR *p = ptr;\r | |
406 | STRINGLIB_CHAR *end = ptr + len;\r | |
407 | int field_name_is_empty;\r | |
408 | int using_numeric_index;\r | |
409 | \r | |
410 | /* find the part up until the first '.' or '[' */\r | |
411 | while (p < end) {\r | |
412 | switch (c = *p++) {\r | |
413 | case '[':\r | |
414 | case '.':\r | |
415 | /* backup so that we this character is available to the\r | |
416 | "rest" iterator */\r | |
417 | p--;\r | |
418 | break;\r | |
419 | default:\r | |
420 | continue;\r | |
421 | }\r | |
422 | break;\r | |
423 | }\r | |
424 | \r | |
425 | /* set up the return values */\r | |
426 | SubString_init(first, ptr, p - ptr);\r | |
427 | FieldNameIterator_init(rest, p, end - p);\r | |
428 | \r | |
429 | /* see if "first" is an integer, in which case it's used as an index */\r | |
430 | *first_idx = get_integer(first);\r | |
431 | if (*first_idx == -1 && PyErr_Occurred())\r | |
432 | return 0;\r | |
433 | \r | |
434 | field_name_is_empty = first->ptr >= first->end;\r | |
435 | \r | |
436 | /* If the field name is omitted or if we have a numeric index\r | |
437 | specified, then we're doing numeric indexing into args. */\r | |
438 | using_numeric_index = field_name_is_empty || *first_idx != -1;\r | |
439 | \r | |
440 | /* We always get here exactly one time for each field we're\r | |
441 | processing. And we get here in field order (counting by left\r | |
442 | braces). So this is the perfect place to handle automatic field\r | |
443 | numbering if the field name is omitted. */\r | |
444 | \r | |
445 | /* Check if we need to do the auto-numbering. It's not needed if\r | |
446 | we're called from string.Format routines, because it's handled\r | |
447 | in that class by itself. */\r | |
448 | if (auto_number) {\r | |
449 | /* Initialize our auto numbering state if this is the first\r | |
450 | time we're either auto-numbering or manually numbering. */\r | |
451 | if (auto_number->an_state == ANS_INIT && using_numeric_index)\r | |
452 | auto_number->an_state = field_name_is_empty ?\r | |
453 | ANS_AUTO : ANS_MANUAL;\r | |
454 | \r | |
455 | /* Make sure our state is consistent with what we're doing\r | |
456 | this time through. Only check if we're using a numeric\r | |
457 | index. */\r | |
458 | if (using_numeric_index)\r | |
459 | if (autonumber_state_error(auto_number->an_state,\r | |
460 | field_name_is_empty))\r | |
461 | return 0;\r | |
462 | /* Zero length field means we want to do auto-numbering of the\r | |
463 | fields. */\r | |
464 | if (field_name_is_empty)\r | |
465 | *first_idx = (auto_number->an_field_number)++;\r | |
466 | }\r | |
467 | \r | |
468 | return 1;\r | |
469 | }\r | |
470 | \r | |
471 | \r | |
472 | /*\r | |
473 | get_field_object returns the object inside {}, before the\r | |
474 | format_spec. It handles getindex and getattr lookups and consumes\r | |
475 | the entire input string.\r | |
476 | */\r | |
477 | static PyObject *\r | |
478 | get_field_object(SubString *input, PyObject *args, PyObject *kwargs,\r | |
479 | AutoNumber *auto_number)\r | |
480 | {\r | |
481 | PyObject *obj = NULL;\r | |
482 | int ok;\r | |
483 | int is_attribute;\r | |
484 | SubString name;\r | |
485 | SubString first;\r | |
486 | Py_ssize_t index;\r | |
487 | FieldNameIterator rest;\r | |
488 | \r | |
489 | if (!field_name_split(input->ptr, input->end - input->ptr, &first,\r | |
490 | &index, &rest, auto_number)) {\r | |
491 | goto error;\r | |
492 | }\r | |
493 | \r | |
494 | if (index == -1) {\r | |
495 | /* look up in kwargs */\r | |
496 | PyObject *key = SubString_new_object(&first);\r | |
497 | if (key == NULL)\r | |
498 | goto error;\r | |
499 | if ((kwargs == NULL) || (obj = PyDict_GetItem(kwargs, key)) == NULL) {\r | |
500 | PyErr_SetObject(PyExc_KeyError, key);\r | |
501 | Py_DECREF(key);\r | |
502 | goto error;\r | |
503 | }\r | |
504 | Py_DECREF(key);\r | |
505 | Py_INCREF(obj);\r | |
506 | }\r | |
507 | else {\r | |
508 | /* look up in args */\r | |
509 | obj = PySequence_GetItem(args, index);\r | |
510 | if (obj == NULL)\r | |
511 | goto error;\r | |
512 | }\r | |
513 | \r | |
514 | /* iterate over the rest of the field_name */\r | |
515 | while ((ok = FieldNameIterator_next(&rest, &is_attribute, &index,\r | |
516 | &name)) == 2) {\r | |
517 | PyObject *tmp;\r | |
518 | \r | |
519 | if (is_attribute)\r | |
520 | /* getattr lookup "." */\r | |
521 | tmp = getattr(obj, &name);\r | |
522 | else\r | |
523 | /* getitem lookup "[]" */\r | |
524 | if (index == -1)\r | |
525 | tmp = getitem_str(obj, &name);\r | |
526 | else\r | |
527 | if (PySequence_Check(obj))\r | |
528 | tmp = getitem_sequence(obj, index);\r | |
529 | else\r | |
530 | /* not a sequence */\r | |
531 | tmp = getitem_idx(obj, index);\r | |
532 | if (tmp == NULL)\r | |
533 | goto error;\r | |
534 | \r | |
535 | /* assign to obj */\r | |
536 | Py_DECREF(obj);\r | |
537 | obj = tmp;\r | |
538 | }\r | |
539 | /* end of iterator, this is the non-error case */\r | |
540 | if (ok == 1)\r | |
541 | return obj;\r | |
542 | error:\r | |
543 | Py_XDECREF(obj);\r | |
544 | return NULL;\r | |
545 | }\r | |
546 | \r | |
547 | /************************************************************************/\r | |
548 | /***************** Field rendering functions **************************/\r | |
549 | /************************************************************************/\r | |
550 | \r | |
551 | /*\r | |
552 | render_field() is the main function in this section. It takes the\r | |
553 | field object and field specification string generated by\r | |
554 | get_field_and_spec, and renders the field into the output string.\r | |
555 | \r | |
556 | render_field calls fieldobj.__format__(format_spec) method, and\r | |
557 | appends to the output.\r | |
558 | */\r | |
559 | static int\r | |
560 | render_field(PyObject *fieldobj, SubString *format_spec, OutputString *output)\r | |
561 | {\r | |
562 | int ok = 0;\r | |
563 | PyObject *result = NULL;\r | |
564 | PyObject *format_spec_object = NULL;\r | |
565 | PyObject *(*formatter)(PyObject *, STRINGLIB_CHAR *, Py_ssize_t) = NULL;\r | |
566 | STRINGLIB_CHAR* format_spec_start = format_spec->ptr ?\r | |
567 | format_spec->ptr : NULL;\r | |
568 | Py_ssize_t format_spec_len = format_spec->ptr ?\r | |
569 | format_spec->end - format_spec->ptr : 0;\r | |
570 | \r | |
571 | /* If we know the type exactly, skip the lookup of __format__ and just\r | |
572 | call the formatter directly. */\r | |
573 | #if STRINGLIB_IS_UNICODE\r | |
574 | if (PyUnicode_CheckExact(fieldobj))\r | |
575 | formatter = _PyUnicode_FormatAdvanced;\r | |
576 | /* Unfortunately, there's a problem with checking for int, long,\r | |
577 | and float here. If we're being included as unicode, their\r | |
578 | formatters expect string format_spec args. For now, just skip\r | |
579 | this optimization for unicode. This could be fixed, but it's a\r | |
580 | hassle. */\r | |
581 | #else\r | |
582 | if (PyString_CheckExact(fieldobj))\r | |
583 | formatter = _PyBytes_FormatAdvanced;\r | |
584 | else if (PyInt_CheckExact(fieldobj))\r | |
585 | formatter =_PyInt_FormatAdvanced;\r | |
586 | else if (PyLong_CheckExact(fieldobj))\r | |
587 | formatter =_PyLong_FormatAdvanced;\r | |
588 | else if (PyFloat_CheckExact(fieldobj))\r | |
589 | formatter = _PyFloat_FormatAdvanced;\r | |
590 | #endif\r | |
591 | \r | |
592 | if (formatter) {\r | |
593 | /* we know exactly which formatter will be called when __format__ is\r | |
594 | looked up, so call it directly, instead. */\r | |
595 | result = formatter(fieldobj, format_spec_start, format_spec_len);\r | |
596 | }\r | |
597 | else {\r | |
598 | /* We need to create an object out of the pointers we have, because\r | |
599 | __format__ takes a string/unicode object for format_spec. */\r | |
600 | format_spec_object = STRINGLIB_NEW(format_spec_start,\r | |
601 | format_spec_len);\r | |
602 | if (format_spec_object == NULL)\r | |
603 | goto done;\r | |
604 | \r | |
605 | result = PyObject_Format(fieldobj, format_spec_object);\r | |
606 | }\r | |
607 | if (result == NULL)\r | |
608 | goto done;\r | |
609 | \r | |
610 | #if PY_VERSION_HEX >= 0x03000000\r | |
611 | assert(PyUnicode_Check(result));\r | |
612 | #else\r | |
613 | assert(PyString_Check(result) || PyUnicode_Check(result));\r | |
614 | \r | |
615 | /* Convert result to our type. We could be str, and result could\r | |
616 | be unicode */\r | |
617 | {\r | |
618 | PyObject *tmp = STRINGLIB_TOSTR(result);\r | |
619 | if (tmp == NULL)\r | |
620 | goto done;\r | |
621 | Py_DECREF(result);\r | |
622 | result = tmp;\r | |
623 | }\r | |
624 | #endif\r | |
625 | \r | |
626 | ok = output_data(output,\r | |
627 | STRINGLIB_STR(result), STRINGLIB_LEN(result));\r | |
628 | done:\r | |
629 | Py_XDECREF(format_spec_object);\r | |
630 | Py_XDECREF(result);\r | |
631 | return ok;\r | |
632 | }\r | |
633 | \r | |
634 | static int\r | |
635 | parse_field(SubString *str, SubString *field_name, SubString *format_spec,\r | |
636 | STRINGLIB_CHAR *conversion)\r | |
637 | {\r | |
638 | /* Note this function works if the field name is zero length,\r | |
639 | which is good. Zero length field names are handled later, in\r | |
640 | field_name_split. */\r | |
641 | \r | |
642 | STRINGLIB_CHAR c = 0;\r | |
643 | \r | |
644 | /* initialize these, as they may be empty */\r | |
645 | *conversion = '\0';\r | |
646 | SubString_init(format_spec, NULL, 0);\r | |
647 | \r | |
648 | /* Search for the field name. it's terminated by the end of\r | |
649 | the string, or a ':' or '!' */\r | |
650 | field_name->ptr = str->ptr;\r | |
651 | while (str->ptr < str->end) {\r | |
652 | switch (c = *(str->ptr++)) {\r | |
653 | case ':':\r | |
654 | case '!':\r | |
655 | break;\r | |
656 | default:\r | |
657 | continue;\r | |
658 | }\r | |
659 | break;\r | |
660 | }\r | |
661 | \r | |
662 | if (c == '!' || c == ':') {\r | |
663 | /* we have a format specifier and/or a conversion */\r | |
664 | /* don't include the last character */\r | |
665 | field_name->end = str->ptr-1;\r | |
666 | \r | |
667 | /* the format specifier is the rest of the string */\r | |
668 | format_spec->ptr = str->ptr;\r | |
669 | format_spec->end = str->end;\r | |
670 | \r | |
671 | /* see if there's a conversion specifier */\r | |
672 | if (c == '!') {\r | |
673 | /* there must be another character present */\r | |
674 | if (format_spec->ptr >= format_spec->end) {\r | |
675 | PyErr_SetString(PyExc_ValueError,\r | |
676 | "end of format while looking for conversion "\r | |
677 | "specifier");\r | |
678 | return 0;\r | |
679 | }\r | |
680 | *conversion = *(format_spec->ptr++);\r | |
681 | \r | |
682 | /* if there is another character, it must be a colon */\r | |
683 | if (format_spec->ptr < format_spec->end) {\r | |
684 | c = *(format_spec->ptr++);\r | |
685 | if (c != ':') {\r | |
686 | PyErr_SetString(PyExc_ValueError,\r | |
687 | "expected ':' after format specifier");\r | |
688 | return 0;\r | |
689 | }\r | |
690 | }\r | |
691 | }\r | |
692 | }\r | |
693 | else\r | |
694 | /* end of string, there's no format_spec or conversion */\r | |
695 | field_name->end = str->ptr;\r | |
696 | \r | |
697 | return 1;\r | |
698 | }\r | |
699 | \r | |
700 | /************************************************************************/\r | |
701 | /******* Output string allocation and escape-to-markup processing ******/\r | |
702 | /************************************************************************/\r | |
703 | \r | |
704 | /* MarkupIterator breaks the string into pieces of either literal\r | |
705 | text, or things inside {} that need to be marked up. it is\r | |
706 | designed to make it easy to wrap a Python iterator around it, for\r | |
707 | use with the Formatter class */\r | |
708 | \r | |
709 | typedef struct {\r | |
710 | SubString str;\r | |
711 | } MarkupIterator;\r | |
712 | \r | |
713 | static int\r | |
714 | MarkupIterator_init(MarkupIterator *self, STRINGLIB_CHAR *ptr, Py_ssize_t len)\r | |
715 | {\r | |
716 | SubString_init(&self->str, ptr, len);\r | |
717 | return 1;\r | |
718 | }\r | |
719 | \r | |
720 | /* returns 0 on error, 1 on non-error termination, and 2 if it got a\r | |
721 | string (or something to be expanded) */\r | |
722 | static int\r | |
723 | MarkupIterator_next(MarkupIterator *self, SubString *literal,\r | |
724 | int *field_present, SubString *field_name,\r | |
725 | SubString *format_spec, STRINGLIB_CHAR *conversion,\r | |
726 | int *format_spec_needs_expanding)\r | |
727 | {\r | |
728 | int at_end;\r | |
729 | STRINGLIB_CHAR c = 0;\r | |
730 | STRINGLIB_CHAR *start;\r | |
731 | int count;\r | |
732 | Py_ssize_t len;\r | |
733 | int markup_follows = 0;\r | |
734 | \r | |
735 | /* initialize all of the output variables */\r | |
736 | SubString_init(literal, NULL, 0);\r | |
737 | SubString_init(field_name, NULL, 0);\r | |
738 | SubString_init(format_spec, NULL, 0);\r | |
739 | *conversion = '\0';\r | |
740 | *format_spec_needs_expanding = 0;\r | |
741 | *field_present = 0;\r | |
742 | \r | |
743 | /* No more input, end of iterator. This is the normal exit\r | |
744 | path. */\r | |
745 | if (self->str.ptr >= self->str.end)\r | |
746 | return 1;\r | |
747 | \r | |
748 | start = self->str.ptr;\r | |
749 | \r | |
750 | /* First read any literal text. Read until the end of string, an\r | |
751 | escaped '{' or '}', or an unescaped '{'. In order to never\r | |
752 | allocate memory and so I can just pass pointers around, if\r | |
753 | there's an escaped '{' or '}' then we'll return the literal\r | |
754 | including the brace, but no format object. The next time\r | |
755 | through, we'll return the rest of the literal, skipping past\r | |
756 | the second consecutive brace. */\r | |
757 | while (self->str.ptr < self->str.end) {\r | |
758 | switch (c = *(self->str.ptr++)) {\r | |
759 | case '{':\r | |
760 | case '}':\r | |
761 | markup_follows = 1;\r | |
762 | break;\r | |
763 | default:\r | |
764 | continue;\r | |
765 | }\r | |
766 | break;\r | |
767 | }\r | |
768 | \r | |
769 | at_end = self->str.ptr >= self->str.end;\r | |
770 | len = self->str.ptr - start;\r | |
771 | \r | |
772 | if ((c == '}') && (at_end || (c != *self->str.ptr))) {\r | |
773 | PyErr_SetString(PyExc_ValueError, "Single '}' encountered "\r | |
774 | "in format string");\r | |
775 | return 0;\r | |
776 | }\r | |
777 | if (at_end && c == '{') {\r | |
778 | PyErr_SetString(PyExc_ValueError, "Single '{' encountered "\r | |
779 | "in format string");\r | |
780 | return 0;\r | |
781 | }\r | |
782 | if (!at_end) {\r | |
783 | if (c == *self->str.ptr) {\r | |
784 | /* escaped } or {, skip it in the input. there is no\r | |
785 | markup object following us, just this literal text */\r | |
786 | self->str.ptr++;\r | |
787 | markup_follows = 0;\r | |
788 | }\r | |
789 | else\r | |
790 | len--;\r | |
791 | }\r | |
792 | \r | |
793 | /* record the literal text */\r | |
794 | literal->ptr = start;\r | |
795 | literal->end = start + len;\r | |
796 | \r | |
797 | if (!markup_follows)\r | |
798 | return 2;\r | |
799 | \r | |
800 | /* this is markup, find the end of the string by counting nested\r | |
801 | braces. note that this prohibits escaped braces, so that\r | |
802 | format_specs cannot have braces in them. */\r | |
803 | *field_present = 1;\r | |
804 | count = 1;\r | |
805 | \r | |
806 | start = self->str.ptr;\r | |
807 | \r | |
808 | /* we know we can't have a zero length string, so don't worry\r | |
809 | about that case */\r | |
810 | while (self->str.ptr < self->str.end) {\r | |
811 | switch (c = *(self->str.ptr++)) {\r | |
812 | case '{':\r | |
813 | /* the format spec needs to be recursively expanded.\r | |
814 | this is an optimization, and not strictly needed */\r | |
815 | *format_spec_needs_expanding = 1;\r | |
816 | count++;\r | |
817 | break;\r | |
818 | case '}':\r | |
819 | count--;\r | |
820 | if (count <= 0) {\r | |
821 | /* we're done. parse and get out */\r | |
822 | SubString s;\r | |
823 | \r | |
824 | SubString_init(&s, start, self->str.ptr - 1 - start);\r | |
825 | if (parse_field(&s, field_name, format_spec, conversion) == 0)\r | |
826 | return 0;\r | |
827 | \r | |
828 | /* success */\r | |
829 | return 2;\r | |
830 | }\r | |
831 | break;\r | |
832 | }\r | |
833 | }\r | |
834 | \r | |
835 | /* end of string while searching for matching '}' */\r | |
836 | PyErr_SetString(PyExc_ValueError, "unmatched '{' in format");\r | |
837 | return 0;\r | |
838 | }\r | |
839 | \r | |
840 | \r | |
841 | /* do the !r or !s conversion on obj */\r | |
842 | static PyObject *\r | |
843 | do_conversion(PyObject *obj, STRINGLIB_CHAR conversion)\r | |
844 | {\r | |
845 | /* XXX in pre-3.0, do we need to convert this to unicode, since it\r | |
846 | might have returned a string? */\r | |
847 | switch (conversion) {\r | |
848 | case 'r':\r | |
849 | return PyObject_Repr(obj);\r | |
850 | case 's':\r | |
851 | return STRINGLIB_TOSTR(obj);\r | |
852 | default:\r | |
853 | if (conversion > 32 && conversion < 127) {\r | |
854 | /* It's the ASCII subrange; casting to char is safe\r | |
855 | (assuming the execution character set is an ASCII\r | |
856 | superset). */\r | |
857 | PyErr_Format(PyExc_ValueError,\r | |
858 | "Unknown conversion specifier %c",\r | |
859 | (char)conversion);\r | |
860 | } else\r | |
861 | PyErr_Format(PyExc_ValueError,\r | |
862 | "Unknown conversion specifier \\x%x",\r | |
863 | (unsigned int)conversion);\r | |
864 | return NULL;\r | |
865 | }\r | |
866 | }\r | |
867 | \r | |
868 | /* given:\r | |
869 | \r | |
870 | {field_name!conversion:format_spec}\r | |
871 | \r | |
872 | compute the result and write it to output.\r | |
873 | format_spec_needs_expanding is an optimization. if it's false,\r | |
874 | just output the string directly, otherwise recursively expand the\r | |
875 | format_spec string.\r | |
876 | \r | |
877 | field_name is allowed to be zero length, in which case we\r | |
878 | are doing auto field numbering.\r | |
879 | */\r | |
880 | \r | |
881 | static int\r | |
882 | output_markup(SubString *field_name, SubString *format_spec,\r | |
883 | int format_spec_needs_expanding, STRINGLIB_CHAR conversion,\r | |
884 | OutputString *output, PyObject *args, PyObject *kwargs,\r | |
885 | int recursion_depth, AutoNumber *auto_number)\r | |
886 | {\r | |
887 | PyObject *tmp = NULL;\r | |
888 | PyObject *fieldobj = NULL;\r | |
889 | SubString expanded_format_spec;\r | |
890 | SubString *actual_format_spec;\r | |
891 | int result = 0;\r | |
892 | \r | |
893 | /* convert field_name to an object */\r | |
894 | fieldobj = get_field_object(field_name, args, kwargs, auto_number);\r | |
895 | if (fieldobj == NULL)\r | |
896 | goto done;\r | |
897 | \r | |
898 | if (conversion != '\0') {\r | |
899 | tmp = do_conversion(fieldobj, conversion);\r | |
900 | if (tmp == NULL)\r | |
901 | goto done;\r | |
902 | \r | |
903 | /* do the assignment, transferring ownership: fieldobj = tmp */\r | |
904 | Py_DECREF(fieldobj);\r | |
905 | fieldobj = tmp;\r | |
906 | tmp = NULL;\r | |
907 | }\r | |
908 | \r | |
909 | /* if needed, recurively compute the format_spec */\r | |
910 | if (format_spec_needs_expanding) {\r | |
911 | tmp = build_string(format_spec, args, kwargs, recursion_depth-1,\r | |
912 | auto_number);\r | |
913 | if (tmp == NULL)\r | |
914 | goto done;\r | |
915 | \r | |
916 | /* note that in the case we're expanding the format string,\r | |
917 | tmp must be kept around until after the call to\r | |
918 | render_field. */\r | |
919 | SubString_init(&expanded_format_spec,\r | |
920 | STRINGLIB_STR(tmp), STRINGLIB_LEN(tmp));\r | |
921 | actual_format_spec = &expanded_format_spec;\r | |
922 | }\r | |
923 | else\r | |
924 | actual_format_spec = format_spec;\r | |
925 | \r | |
926 | if (render_field(fieldobj, actual_format_spec, output) == 0)\r | |
927 | goto done;\r | |
928 | \r | |
929 | result = 1;\r | |
930 | \r | |
931 | done:\r | |
932 | Py_XDECREF(fieldobj);\r | |
933 | Py_XDECREF(tmp);\r | |
934 | \r | |
935 | return result;\r | |
936 | }\r | |
937 | \r | |
938 | /*\r | |
939 | do_markup is the top-level loop for the format() method. It\r | |
940 | searches through the format string for escapes to markup codes, and\r | |
941 | calls other functions to move non-markup text to the output,\r | |
942 | and to perform the markup to the output.\r | |
943 | */\r | |
944 | static int\r | |
945 | do_markup(SubString *input, PyObject *args, PyObject *kwargs,\r | |
946 | OutputString *output, int recursion_depth, AutoNumber *auto_number)\r | |
947 | {\r | |
948 | MarkupIterator iter;\r | |
949 | int format_spec_needs_expanding;\r | |
950 | int result;\r | |
951 | int field_present;\r | |
952 | SubString literal;\r | |
953 | SubString field_name;\r | |
954 | SubString format_spec;\r | |
955 | STRINGLIB_CHAR conversion;\r | |
956 | \r | |
957 | MarkupIterator_init(&iter, input->ptr, input->end - input->ptr);\r | |
958 | while ((result = MarkupIterator_next(&iter, &literal, &field_present,\r | |
959 | &field_name, &format_spec,\r | |
960 | &conversion,\r | |
961 | &format_spec_needs_expanding)) == 2) {\r | |
962 | if (!output_data(output, literal.ptr, literal.end - literal.ptr))\r | |
963 | return 0;\r | |
964 | if (field_present)\r | |
965 | if (!output_markup(&field_name, &format_spec,\r | |
966 | format_spec_needs_expanding, conversion, output,\r | |
967 | args, kwargs, recursion_depth, auto_number))\r | |
968 | return 0;\r | |
969 | }\r | |
970 | return result;\r | |
971 | }\r | |
972 | \r | |
973 | \r | |
974 | /*\r | |
975 | build_string allocates the output string and then\r | |
976 | calls do_markup to do the heavy lifting.\r | |
977 | */\r | |
978 | static PyObject *\r | |
979 | build_string(SubString *input, PyObject *args, PyObject *kwargs,\r | |
980 | int recursion_depth, AutoNumber *auto_number)\r | |
981 | {\r | |
982 | OutputString output;\r | |
983 | PyObject *result = NULL;\r | |
984 | Py_ssize_t count;\r | |
985 | \r | |
986 | output.obj = NULL; /* needed so cleanup code always works */\r | |
987 | \r | |
988 | /* check the recursion level */\r | |
989 | if (recursion_depth <= 0) {\r | |
990 | PyErr_SetString(PyExc_ValueError,\r | |
991 | "Max string recursion exceeded");\r | |
992 | goto done;\r | |
993 | }\r | |
994 | \r | |
995 | /* initial size is the length of the format string, plus the size\r | |
996 | increment. seems like a reasonable default */\r | |
997 | if (!output_initialize(&output,\r | |
998 | input->end - input->ptr +\r | |
999 | INITIAL_SIZE_INCREMENT))\r | |
1000 | goto done;\r | |
1001 | \r | |
1002 | if (!do_markup(input, args, kwargs, &output, recursion_depth,\r | |
1003 | auto_number)) {\r | |
1004 | goto done;\r | |
1005 | }\r | |
1006 | \r | |
1007 | count = output.ptr - STRINGLIB_STR(output.obj);\r | |
1008 | if (STRINGLIB_RESIZE(&output.obj, count) < 0) {\r | |
1009 | goto done;\r | |
1010 | }\r | |
1011 | \r | |
1012 | /* transfer ownership to result */\r | |
1013 | result = output.obj;\r | |
1014 | output.obj = NULL;\r | |
1015 | \r | |
1016 | done:\r | |
1017 | Py_XDECREF(output.obj);\r | |
1018 | return result;\r | |
1019 | }\r | |
1020 | \r | |
1021 | /************************************************************************/\r | |
1022 | /*********** main routine ***********************************************/\r | |
1023 | /************************************************************************/\r | |
1024 | \r | |
1025 | /* this is the main entry point */\r | |
1026 | static PyObject *\r | |
1027 | do_string_format(PyObject *self, PyObject *args, PyObject *kwargs)\r | |
1028 | {\r | |
1029 | SubString input;\r | |
1030 | \r | |
1031 | /* PEP 3101 says only 2 levels, so that\r | |
1032 | "{0:{1}}".format('abc', 's') # works\r | |
1033 | "{0:{1:{2}}}".format('abc', 's', '') # fails\r | |
1034 | */\r | |
1035 | int recursion_depth = 2;\r | |
1036 | \r | |
1037 | AutoNumber auto_number;\r | |
1038 | \r | |
1039 | AutoNumber_Init(&auto_number);\r | |
1040 | SubString_init(&input, STRINGLIB_STR(self), STRINGLIB_LEN(self));\r | |
1041 | return build_string(&input, args, kwargs, recursion_depth, &auto_number);\r | |
1042 | }\r | |
1043 | \r | |
1044 | \r | |
1045 | \r | |
1046 | /************************************************************************/\r | |
1047 | /*********** formatteriterator ******************************************/\r | |
1048 | /************************************************************************/\r | |
1049 | \r | |
1050 | /* This is used to implement string.Formatter.vparse(). It exists so\r | |
1051 | Formatter can share code with the built in unicode.format() method.\r | |
1052 | It's really just a wrapper around MarkupIterator that is callable\r | |
1053 | from Python. */\r | |
1054 | \r | |
1055 | typedef struct {\r | |
1056 | PyObject_HEAD\r | |
1057 | \r | |
1058 | STRINGLIB_OBJECT *str;\r | |
1059 | \r | |
1060 | MarkupIterator it_markup;\r | |
1061 | } formatteriterobject;\r | |
1062 | \r | |
1063 | static void\r | |
1064 | formatteriter_dealloc(formatteriterobject *it)\r | |
1065 | {\r | |
1066 | Py_XDECREF(it->str);\r | |
1067 | PyObject_FREE(it);\r | |
1068 | }\r | |
1069 | \r | |
1070 | /* returns a tuple:\r | |
1071 | (literal, field_name, format_spec, conversion)\r | |
1072 | \r | |
1073 | literal is any literal text to output. might be zero length\r | |
1074 | field_name is the string before the ':'. might be None\r | |
1075 | format_spec is the string after the ':'. mibht be None\r | |
1076 | conversion is either None, or the string after the '!'\r | |
1077 | */\r | |
1078 | static PyObject *\r | |
1079 | formatteriter_next(formatteriterobject *it)\r | |
1080 | {\r | |
1081 | SubString literal;\r | |
1082 | SubString field_name;\r | |
1083 | SubString format_spec;\r | |
1084 | STRINGLIB_CHAR conversion;\r | |
1085 | int format_spec_needs_expanding;\r | |
1086 | int field_present;\r | |
1087 | int result = MarkupIterator_next(&it->it_markup, &literal, &field_present,\r | |
1088 | &field_name, &format_spec, &conversion,\r | |
1089 | &format_spec_needs_expanding);\r | |
1090 | \r | |
1091 | /* all of the SubString objects point into it->str, so no\r | |
1092 | memory management needs to be done on them */\r | |
1093 | assert(0 <= result && result <= 2);\r | |
1094 | if (result == 0 || result == 1)\r | |
1095 | /* if 0, error has already been set, if 1, iterator is empty */\r | |
1096 | return NULL;\r | |
1097 | else {\r | |
1098 | PyObject *literal_str = NULL;\r | |
1099 | PyObject *field_name_str = NULL;\r | |
1100 | PyObject *format_spec_str = NULL;\r | |
1101 | PyObject *conversion_str = NULL;\r | |
1102 | PyObject *tuple = NULL;\r | |
1103 | \r | |
1104 | literal_str = SubString_new_object(&literal);\r | |
1105 | if (literal_str == NULL)\r | |
1106 | goto done;\r | |
1107 | \r | |
1108 | field_name_str = SubString_new_object(&field_name);\r | |
1109 | if (field_name_str == NULL)\r | |
1110 | goto done;\r | |
1111 | \r | |
1112 | /* if field_name is non-zero length, return a string for\r | |
1113 | format_spec (even if zero length), else return None */\r | |
1114 | format_spec_str = (field_present ?\r | |
1115 | SubString_new_object_or_empty :\r | |
1116 | SubString_new_object)(&format_spec);\r | |
1117 | if (format_spec_str == NULL)\r | |
1118 | goto done;\r | |
1119 | \r | |
1120 | /* if the conversion is not specified, return a None,\r | |
1121 | otherwise create a one length string with the conversion\r | |
1122 | character */\r | |
1123 | if (conversion == '\0') {\r | |
1124 | conversion_str = Py_None;\r | |
1125 | Py_INCREF(conversion_str);\r | |
1126 | }\r | |
1127 | else\r | |
1128 | conversion_str = STRINGLIB_NEW(&conversion, 1);\r | |
1129 | if (conversion_str == NULL)\r | |
1130 | goto done;\r | |
1131 | \r | |
1132 | tuple = PyTuple_Pack(4, literal_str, field_name_str, format_spec_str,\r | |
1133 | conversion_str);\r | |
1134 | done:\r | |
1135 | Py_XDECREF(literal_str);\r | |
1136 | Py_XDECREF(field_name_str);\r | |
1137 | Py_XDECREF(format_spec_str);\r | |
1138 | Py_XDECREF(conversion_str);\r | |
1139 | return tuple;\r | |
1140 | }\r | |
1141 | }\r | |
1142 | \r | |
1143 | static PyMethodDef formatteriter_methods[] = {\r | |
1144 | {NULL, NULL} /* sentinel */\r | |
1145 | };\r | |
1146 | \r | |
1147 | static PyTypeObject PyFormatterIter_Type = {\r | |
1148 | PyVarObject_HEAD_INIT(&PyType_Type, 0)\r | |
1149 | "formatteriterator", /* tp_name */\r | |
1150 | sizeof(formatteriterobject), /* tp_basicsize */\r | |
1151 | 0, /* tp_itemsize */\r | |
1152 | /* methods */\r | |
1153 | (destructor)formatteriter_dealloc, /* tp_dealloc */\r | |
1154 | 0, /* tp_print */\r | |
1155 | 0, /* tp_getattr */\r | |
1156 | 0, /* tp_setattr */\r | |
1157 | 0, /* tp_compare */\r | |
1158 | 0, /* tp_repr */\r | |
1159 | 0, /* tp_as_number */\r | |
1160 | 0, /* tp_as_sequence */\r | |
1161 | 0, /* tp_as_mapping */\r | |
1162 | 0, /* tp_hash */\r | |
1163 | 0, /* tp_call */\r | |
1164 | 0, /* tp_str */\r | |
1165 | PyObject_GenericGetAttr, /* tp_getattro */\r | |
1166 | 0, /* tp_setattro */\r | |
1167 | 0, /* tp_as_buffer */\r | |
1168 | Py_TPFLAGS_DEFAULT, /* tp_flags */\r | |
1169 | 0, /* tp_doc */\r | |
1170 | 0, /* tp_traverse */\r | |
1171 | 0, /* tp_clear */\r | |
1172 | 0, /* tp_richcompare */\r | |
1173 | 0, /* tp_weaklistoffset */\r | |
1174 | PyObject_SelfIter, /* tp_iter */\r | |
1175 | (iternextfunc)formatteriter_next, /* tp_iternext */\r | |
1176 | formatteriter_methods, /* tp_methods */\r | |
1177 | 0,\r | |
1178 | };\r | |
1179 | \r | |
1180 | /* unicode_formatter_parser is used to implement\r | |
1181 | string.Formatter.vformat. it parses a string and returns tuples\r | |
1182 | describing the parsed elements. It's a wrapper around\r | |
1183 | stringlib/string_format.h's MarkupIterator */\r | |
1184 | static PyObject *\r | |
1185 | formatter_parser(STRINGLIB_OBJECT *self)\r | |
1186 | {\r | |
1187 | formatteriterobject *it;\r | |
1188 | \r | |
1189 | it = PyObject_New(formatteriterobject, &PyFormatterIter_Type);\r | |
1190 | if (it == NULL)\r | |
1191 | return NULL;\r | |
1192 | \r | |
1193 | /* take ownership, give the object to the iterator */\r | |
1194 | Py_INCREF(self);\r | |
1195 | it->str = self;\r | |
1196 | \r | |
1197 | /* initialize the contained MarkupIterator */\r | |
1198 | MarkupIterator_init(&it->it_markup,\r | |
1199 | STRINGLIB_STR(self),\r | |
1200 | STRINGLIB_LEN(self));\r | |
1201 | \r | |
1202 | return (PyObject *)it;\r | |
1203 | }\r | |
1204 | \r | |
1205 | \r | |
1206 | /************************************************************************/\r | |
1207 | /*********** fieldnameiterator ******************************************/\r | |
1208 | /************************************************************************/\r | |
1209 | \r | |
1210 | \r | |
1211 | /* This is used to implement string.Formatter.vparse(). It parses the\r | |
1212 | field name into attribute and item values. It's a Python-callable\r | |
1213 | wrapper around FieldNameIterator */\r | |
1214 | \r | |
1215 | typedef struct {\r | |
1216 | PyObject_HEAD\r | |
1217 | \r | |
1218 | STRINGLIB_OBJECT *str;\r | |
1219 | \r | |
1220 | FieldNameIterator it_field;\r | |
1221 | } fieldnameiterobject;\r | |
1222 | \r | |
1223 | static void\r | |
1224 | fieldnameiter_dealloc(fieldnameiterobject *it)\r | |
1225 | {\r | |
1226 | Py_XDECREF(it->str);\r | |
1227 | PyObject_FREE(it);\r | |
1228 | }\r | |
1229 | \r | |
1230 | /* returns a tuple:\r | |
1231 | (is_attr, value)\r | |
1232 | is_attr is true if we used attribute syntax (e.g., '.foo')\r | |
1233 | false if we used index syntax (e.g., '[foo]')\r | |
1234 | value is an integer or string\r | |
1235 | */\r | |
1236 | static PyObject *\r | |
1237 | fieldnameiter_next(fieldnameiterobject *it)\r | |
1238 | {\r | |
1239 | int result;\r | |
1240 | int is_attr;\r | |
1241 | Py_ssize_t idx;\r | |
1242 | SubString name;\r | |
1243 | \r | |
1244 | result = FieldNameIterator_next(&it->it_field, &is_attr,\r | |
1245 | &idx, &name);\r | |
1246 | if (result == 0 || result == 1)\r | |
1247 | /* if 0, error has already been set, if 1, iterator is empty */\r | |
1248 | return NULL;\r | |
1249 | else {\r | |
1250 | PyObject* result = NULL;\r | |
1251 | PyObject* is_attr_obj = NULL;\r | |
1252 | PyObject* obj = NULL;\r | |
1253 | \r | |
1254 | is_attr_obj = PyBool_FromLong(is_attr);\r | |
1255 | if (is_attr_obj == NULL)\r | |
1256 | goto done;\r | |
1257 | \r | |
1258 | /* either an integer or a string */\r | |
1259 | if (idx != -1)\r | |
1260 | obj = PyLong_FromSsize_t(idx);\r | |
1261 | else\r | |
1262 | obj = SubString_new_object(&name);\r | |
1263 | if (obj == NULL)\r | |
1264 | goto done;\r | |
1265 | \r | |
1266 | /* return a tuple of values */\r | |
1267 | result = PyTuple_Pack(2, is_attr_obj, obj);\r | |
1268 | \r | |
1269 | done:\r | |
1270 | Py_XDECREF(is_attr_obj);\r | |
1271 | Py_XDECREF(obj);\r | |
1272 | return result;\r | |
1273 | }\r | |
1274 | }\r | |
1275 | \r | |
1276 | static PyMethodDef fieldnameiter_methods[] = {\r | |
1277 | {NULL, NULL} /* sentinel */\r | |
1278 | };\r | |
1279 | \r | |
1280 | static PyTypeObject PyFieldNameIter_Type = {\r | |
1281 | PyVarObject_HEAD_INIT(&PyType_Type, 0)\r | |
1282 | "fieldnameiterator", /* tp_name */\r | |
1283 | sizeof(fieldnameiterobject), /* tp_basicsize */\r | |
1284 | 0, /* tp_itemsize */\r | |
1285 | /* methods */\r | |
1286 | (destructor)fieldnameiter_dealloc, /* tp_dealloc */\r | |
1287 | 0, /* tp_print */\r | |
1288 | 0, /* tp_getattr */\r | |
1289 | 0, /* tp_setattr */\r | |
1290 | 0, /* tp_compare */\r | |
1291 | 0, /* tp_repr */\r | |
1292 | 0, /* tp_as_number */\r | |
1293 | 0, /* tp_as_sequence */\r | |
1294 | 0, /* tp_as_mapping */\r | |
1295 | 0, /* tp_hash */\r | |
1296 | 0, /* tp_call */\r | |
1297 | 0, /* tp_str */\r | |
1298 | PyObject_GenericGetAttr, /* tp_getattro */\r | |
1299 | 0, /* tp_setattro */\r | |
1300 | 0, /* tp_as_buffer */\r | |
1301 | Py_TPFLAGS_DEFAULT, /* tp_flags */\r | |
1302 | 0, /* tp_doc */\r | |
1303 | 0, /* tp_traverse */\r | |
1304 | 0, /* tp_clear */\r | |
1305 | 0, /* tp_richcompare */\r | |
1306 | 0, /* tp_weaklistoffset */\r | |
1307 | PyObject_SelfIter, /* tp_iter */\r | |
1308 | (iternextfunc)fieldnameiter_next, /* tp_iternext */\r | |
1309 | fieldnameiter_methods, /* tp_methods */\r | |
1310 | 0};\r | |
1311 | \r | |
1312 | /* unicode_formatter_field_name_split is used to implement\r | |
1313 | string.Formatter.vformat. it takes an PEP 3101 "field name", and\r | |
1314 | returns a tuple of (first, rest): "first", the part before the\r | |
1315 | first '.' or '['; and "rest", an iterator for the rest of the field\r | |
1316 | name. it's a wrapper around stringlib/string_format.h's\r | |
1317 | field_name_split. The iterator it returns is a\r | |
1318 | FieldNameIterator */\r | |
1319 | static PyObject *\r | |
1320 | formatter_field_name_split(STRINGLIB_OBJECT *self)\r | |
1321 | {\r | |
1322 | SubString first;\r | |
1323 | Py_ssize_t first_idx;\r | |
1324 | fieldnameiterobject *it;\r | |
1325 | \r | |
1326 | PyObject *first_obj = NULL;\r | |
1327 | PyObject *result = NULL;\r | |
1328 | \r | |
1329 | it = PyObject_New(fieldnameiterobject, &PyFieldNameIter_Type);\r | |
1330 | if (it == NULL)\r | |
1331 | return NULL;\r | |
1332 | \r | |
1333 | /* take ownership, give the object to the iterator. this is\r | |
1334 | just to keep the field_name alive */\r | |
1335 | Py_INCREF(self);\r | |
1336 | it->str = self;\r | |
1337 | \r | |
1338 | /* Pass in auto_number = NULL. We'll return an empty string for\r | |
1339 | first_obj in that case. */\r | |
1340 | if (!field_name_split(STRINGLIB_STR(self),\r | |
1341 | STRINGLIB_LEN(self),\r | |
1342 | &first, &first_idx, &it->it_field, NULL))\r | |
1343 | goto done;\r | |
1344 | \r | |
1345 | /* first becomes an integer, if possible; else a string */\r | |
1346 | if (first_idx != -1)\r | |
1347 | first_obj = PyLong_FromSsize_t(first_idx);\r | |
1348 | else\r | |
1349 | /* convert "first" into a string object */\r | |
1350 | first_obj = SubString_new_object(&first);\r | |
1351 | if (first_obj == NULL)\r | |
1352 | goto done;\r | |
1353 | \r | |
1354 | /* return a tuple of values */\r | |
1355 | result = PyTuple_Pack(2, first_obj, it);\r | |
1356 | \r | |
1357 | done:\r | |
1358 | Py_XDECREF(it);\r | |
1359 | Py_XDECREF(first_obj);\r | |
1360 | return result;\r | |
1361 | }\r |