]> git.proxmox.com Git - mirror_edk2.git/blame - AppPkg/Applications/Python/Python-2.7.2/Objects/stringobject.c
EmbeddedPkg: Extend NvVarStoreFormattedLib LIBRARY_CLASS
[mirror_edk2.git] / AppPkg / Applications / Python / Python-2.7.2 / Objects / stringobject.c
CommitLineData
4710c53d 1/* String (str/bytes) object implementation */\r
2\r
3#define PY_SSIZE_T_CLEAN\r
4\r
5#include "Python.h"\r
6#include <ctype.h>\r
7#include <stddef.h>\r
8\r
9#ifdef COUNT_ALLOCS\r
10Py_ssize_t null_strings, one_strings;\r
11#endif\r
12\r
13static PyStringObject *characters[UCHAR_MAX + 1];\r
14static PyStringObject *nullstring;\r
15\r
16/* This dictionary holds all interned strings. Note that references to\r
17 strings in this dictionary are *not* counted in the string's ob_refcnt.\r
18 When the interned string reaches a refcnt of 0 the string deallocation\r
19 function will delete the reference from this dictionary.\r
20\r
21 Another way to look at this is that to say that the actual reference\r
22 count of a string is: s->ob_refcnt + (s->ob_sstate?2:0)\r
23*/\r
24static PyObject *interned;\r
25\r
26/* PyStringObject_SIZE gives the basic size of a string; any memory allocation\r
27 for a string of length n should request PyStringObject_SIZE + n bytes.\r
28\r
29 Using PyStringObject_SIZE instead of sizeof(PyStringObject) saves\r
30 3 bytes per string allocation on a typical system.\r
31*/\r
32#define PyStringObject_SIZE (offsetof(PyStringObject, ob_sval) + 1)\r
33\r
34/*\r
35 For PyString_FromString(), the parameter `str' points to a null-terminated\r
36 string containing exactly `size' bytes.\r
37\r
38 For PyString_FromStringAndSize(), the parameter the parameter `str' is\r
39 either NULL or else points to a string containing at least `size' bytes.\r
40 For PyString_FromStringAndSize(), the string in the `str' parameter does\r
41 not have to be null-terminated. (Therefore it is safe to construct a\r
42 substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)\r
43 If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'\r
44 bytes (setting the last byte to the null terminating character) and you can\r
45 fill in the data yourself. If `str' is non-NULL then the resulting\r
46 PyString object must be treated as immutable and you must not fill in nor\r
47 alter the data yourself, since the strings may be shared.\r
48\r
49 The PyObject member `op->ob_size', which denotes the number of "extra\r
50 items" in a variable-size object, will contain the number of bytes\r
51 allocated for string data, not counting the null terminating character.\r
52 It is therefore equal to the `size' parameter (for\r
53 PyString_FromStringAndSize()) or the length of the string in the `str'\r
54 parameter (for PyString_FromString()).\r
55*/\r
56PyObject *\r
57PyString_FromStringAndSize(const char *str, Py_ssize_t size)\r
58{\r
59 register PyStringObject *op;\r
60 if (size < 0) {\r
61 PyErr_SetString(PyExc_SystemError,\r
62 "Negative size passed to PyString_FromStringAndSize");\r
63 return NULL;\r
64 }\r
65 if (size == 0 && (op = nullstring) != NULL) {\r
66#ifdef COUNT_ALLOCS\r
67 null_strings++;\r
68#endif\r
69 Py_INCREF(op);\r
70 return (PyObject *)op;\r
71 }\r
72 if (size == 1 && str != NULL &&\r
73 (op = characters[*str & UCHAR_MAX]) != NULL)\r
74 {\r
75#ifdef COUNT_ALLOCS\r
76 one_strings++;\r
77#endif\r
78 Py_INCREF(op);\r
79 return (PyObject *)op;\r
80 }\r
81\r
82 if (size > PY_SSIZE_T_MAX - PyStringObject_SIZE) {\r
83 PyErr_SetString(PyExc_OverflowError, "string is too large");\r
84 return NULL;\r
85 }\r
86\r
87 /* Inline PyObject_NewVar */\r
88 op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + size);\r
89 if (op == NULL)\r
90 return PyErr_NoMemory();\r
91 PyObject_INIT_VAR(op, &PyString_Type, size);\r
92 op->ob_shash = -1;\r
93 op->ob_sstate = SSTATE_NOT_INTERNED;\r
94 if (str != NULL)\r
95 Py_MEMCPY(op->ob_sval, str, size);\r
96 op->ob_sval[size] = '\0';\r
97 /* share short strings */\r
98 if (size == 0) {\r
99 PyObject *t = (PyObject *)op;\r
100 PyString_InternInPlace(&t);\r
101 op = (PyStringObject *)t;\r
102 nullstring = op;\r
103 Py_INCREF(op);\r
104 } else if (size == 1 && str != NULL) {\r
105 PyObject *t = (PyObject *)op;\r
106 PyString_InternInPlace(&t);\r
107 op = (PyStringObject *)t;\r
108 characters[*str & UCHAR_MAX] = op;\r
109 Py_INCREF(op);\r
110 }\r
111 return (PyObject *) op;\r
112}\r
113\r
114PyObject *\r
115PyString_FromString(const char *str)\r
116{\r
117 register size_t size;\r
118 register PyStringObject *op;\r
119\r
120 assert(str != NULL);\r
121 size = strlen(str);\r
122 if (size > PY_SSIZE_T_MAX - PyStringObject_SIZE) {\r
123 PyErr_SetString(PyExc_OverflowError,\r
124 "string is too long for a Python string");\r
125 return NULL;\r
126 }\r
127 if (size == 0 && (op = nullstring) != NULL) {\r
128#ifdef COUNT_ALLOCS\r
129 null_strings++;\r
130#endif\r
131 Py_INCREF(op);\r
132 return (PyObject *)op;\r
133 }\r
134 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {\r
135#ifdef COUNT_ALLOCS\r
136 one_strings++;\r
137#endif\r
138 Py_INCREF(op);\r
139 return (PyObject *)op;\r
140 }\r
141\r
142 /* Inline PyObject_NewVar */\r
143 op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + size);\r
144 if (op == NULL)\r
145 return PyErr_NoMemory();\r
146 PyObject_INIT_VAR(op, &PyString_Type, size);\r
147 op->ob_shash = -1;\r
148 op->ob_sstate = SSTATE_NOT_INTERNED;\r
149 Py_MEMCPY(op->ob_sval, str, size+1);\r
150 /* share short strings */\r
151 if (size == 0) {\r
152 PyObject *t = (PyObject *)op;\r
153 PyString_InternInPlace(&t);\r
154 op = (PyStringObject *)t;\r
155 nullstring = op;\r
156 Py_INCREF(op);\r
157 } else if (size == 1) {\r
158 PyObject *t = (PyObject *)op;\r
159 PyString_InternInPlace(&t);\r
160 op = (PyStringObject *)t;\r
161 characters[*str & UCHAR_MAX] = op;\r
162 Py_INCREF(op);\r
163 }\r
164 return (PyObject *) op;\r
165}\r
166\r
167PyObject *\r
168PyString_FromFormatV(const char *format, va_list vargs)\r
169{\r
170 va_list count;\r
171 Py_ssize_t n = 0;\r
172 const char* f;\r
173 char *s;\r
174 PyObject* string;\r
175\r
176#ifdef VA_LIST_IS_ARRAY\r
177 Py_MEMCPY(count, vargs, sizeof(va_list));\r
178#else\r
179#ifdef __va_copy\r
180 __va_copy(count, vargs);\r
181#else\r
182 count = vargs;\r
183#endif\r
184#endif\r
185 /* step 1: figure out how large a buffer we need */\r
186 for (f = format; *f; f++) {\r
187 if (*f == '%') {\r
188#ifdef HAVE_LONG_LONG\r
189 int longlongflag = 0;\r
190#endif\r
191 const char* p = f;\r
192 while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))\r
193 ;\r
194\r
195 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since\r
196 * they don't affect the amount of space we reserve.\r
197 */\r
198 if (*f == 'l') {\r
199 if (f[1] == 'd' || f[1] == 'u') {\r
200 ++f;\r
201 }\r
202#ifdef HAVE_LONG_LONG\r
203 else if (f[1] == 'l' &&\r
204 (f[2] == 'd' || f[2] == 'u')) {\r
205 longlongflag = 1;\r
206 f += 2;\r
207 }\r
208#endif\r
209 }\r
210 else if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {\r
211 ++f;\r
212 }\r
213\r
214 switch (*f) {\r
215 case 'c':\r
216 (void)va_arg(count, int);\r
217 /* fall through... */\r
218 case '%':\r
219 n++;\r
220 break;\r
221 case 'd': case 'u': case 'i': case 'x':\r
222 (void) va_arg(count, int);\r
223#ifdef HAVE_LONG_LONG\r
224 /* Need at most\r
225 ceil(log10(256)*SIZEOF_LONG_LONG) digits,\r
226 plus 1 for the sign. 53/22 is an upper\r
227 bound for log10(256). */\r
228 if (longlongflag)\r
229 n += 2 + (SIZEOF_LONG_LONG*53-1) / 22;\r
230 else\r
231#endif\r
232 /* 20 bytes is enough to hold a 64-bit\r
233 integer. Decimal takes the most\r
234 space. This isn't enough for\r
235 octal. */\r
236 n += 20;\r
237\r
238 break;\r
239 case 's':\r
240 s = va_arg(count, char*);\r
241 n += strlen(s);\r
242 break;\r
243 case 'p':\r
244 (void) va_arg(count, int);\r
245 /* maximum 64-bit pointer representation:\r
246 * 0xffffffffffffffff\r
247 * so 19 characters is enough.\r
248 * XXX I count 18 -- what's the extra for?\r
249 */\r
250 n += 19;\r
251 break;\r
252 default:\r
253 /* if we stumble upon an unknown\r
254 formatting code, copy the rest of\r
255 the format string to the output\r
256 string. (we cannot just skip the\r
257 code, since there's no way to know\r
258 what's in the argument list) */\r
259 n += strlen(p);\r
260 goto expand;\r
261 }\r
262 } else\r
263 n++;\r
264 }\r
265 expand:\r
266 /* step 2: fill the buffer */\r
267 /* Since we've analyzed how much space we need for the worst case,\r
268 use sprintf directly instead of the slower PyOS_snprintf. */\r
269 string = PyString_FromStringAndSize(NULL, n);\r
270 if (!string)\r
271 return NULL;\r
272\r
273 s = PyString_AsString(string);\r
274\r
275 for (f = format; *f; f++) {\r
276 if (*f == '%') {\r
277 const char* p = f++;\r
278 Py_ssize_t i;\r
279 int longflag = 0;\r
280#ifdef HAVE_LONG_LONG\r
281 int longlongflag = 0;\r
282#endif\r
283 int size_tflag = 0;\r
284 /* parse the width.precision part (we're only\r
285 interested in the precision value, if any) */\r
286 n = 0;\r
287 while (isdigit(Py_CHARMASK(*f)))\r
288 n = (n*10) + *f++ - '0';\r
289 if (*f == '.') {\r
290 f++;\r
291 n = 0;\r
292 while (isdigit(Py_CHARMASK(*f)))\r
293 n = (n*10) + *f++ - '0';\r
294 }\r
295 while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))\r
296 f++;\r
297 /* Handle %ld, %lu, %lld and %llu. */\r
298 if (*f == 'l') {\r
299 if (f[1] == 'd' || f[1] == 'u') {\r
300 longflag = 1;\r
301 ++f;\r
302 }\r
303#ifdef HAVE_LONG_LONG\r
304 else if (f[1] == 'l' &&\r
305 (f[2] == 'd' || f[2] == 'u')) {\r
306 longlongflag = 1;\r
307 f += 2;\r
308 }\r
309#endif\r
310 }\r
311 /* handle the size_t flag. */\r
312 else if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {\r
313 size_tflag = 1;\r
314 ++f;\r
315 }\r
316\r
317 switch (*f) {\r
318 case 'c':\r
319 *s++ = va_arg(vargs, int);\r
320 break;\r
321 case 'd':\r
322 if (longflag)\r
323 sprintf(s, "%ld", va_arg(vargs, long));\r
324#ifdef HAVE_LONG_LONG\r
325 else if (longlongflag)\r
326 sprintf(s, "%" PY_FORMAT_LONG_LONG "d",\r
327 va_arg(vargs, PY_LONG_LONG));\r
328#endif\r
329 else if (size_tflag)\r
330 sprintf(s, "%" PY_FORMAT_SIZE_T "d",\r
331 va_arg(vargs, Py_ssize_t));\r
332 else\r
333 sprintf(s, "%d", va_arg(vargs, int));\r
334 s += strlen(s);\r
335 break;\r
336 case 'u':\r
337 if (longflag)\r
338 sprintf(s, "%lu",\r
339 va_arg(vargs, unsigned long));\r
340#ifdef HAVE_LONG_LONG\r
341 else if (longlongflag)\r
342 sprintf(s, "%" PY_FORMAT_LONG_LONG "u",\r
343 va_arg(vargs, PY_LONG_LONG));\r
344#endif\r
345 else if (size_tflag)\r
346 sprintf(s, "%" PY_FORMAT_SIZE_T "u",\r
347 va_arg(vargs, size_t));\r
348 else\r
349 sprintf(s, "%u",\r
350 va_arg(vargs, unsigned int));\r
351 s += strlen(s);\r
352 break;\r
353 case 'i':\r
354 sprintf(s, "%i", va_arg(vargs, int));\r
355 s += strlen(s);\r
356 break;\r
357 case 'x':\r
358 sprintf(s, "%x", va_arg(vargs, int));\r
359 s += strlen(s);\r
360 break;\r
361 case 's':\r
362 p = va_arg(vargs, char*);\r
363 i = strlen(p);\r
364 if (n > 0 && i > n)\r
365 i = n;\r
366 Py_MEMCPY(s, p, i);\r
367 s += i;\r
368 break;\r
369 case 'p':\r
370 sprintf(s, "%p", va_arg(vargs, void*));\r
371 /* %p is ill-defined: ensure leading 0x. */\r
372 if (s[1] == 'X')\r
373 s[1] = 'x';\r
374 else if (s[1] != 'x') {\r
375 memmove(s+2, s, strlen(s)+1);\r
376 s[0] = '0';\r
377 s[1] = 'x';\r
378 }\r
379 s += strlen(s);\r
380 break;\r
381 case '%':\r
382 *s++ = '%';\r
383 break;\r
384 default:\r
385 strcpy(s, p);\r
386 s += strlen(s);\r
387 goto end;\r
388 }\r
389 } else\r
390 *s++ = *f;\r
391 }\r
392\r
393 end:\r
394 if (_PyString_Resize(&string, s - PyString_AS_STRING(string)))\r
395 return NULL;\r
396 return string;\r
397}\r
398\r
399PyObject *\r
400PyString_FromFormat(const char *format, ...)\r
401{\r
402 PyObject* ret;\r
403 va_list vargs;\r
404\r
405#ifdef HAVE_STDARG_PROTOTYPES\r
406 va_start(vargs, format);\r
407#else\r
408 va_start(vargs);\r
409#endif\r
410 ret = PyString_FromFormatV(format, vargs);\r
411 va_end(vargs);\r
412 return ret;\r
413}\r
414\r
415\r
416PyObject *PyString_Decode(const char *s,\r
417 Py_ssize_t size,\r
418 const char *encoding,\r
419 const char *errors)\r
420{\r
421 PyObject *v, *str;\r
422\r
423 str = PyString_FromStringAndSize(s, size);\r
424 if (str == NULL)\r
425 return NULL;\r
426 v = PyString_AsDecodedString(str, encoding, errors);\r
427 Py_DECREF(str);\r
428 return v;\r
429}\r
430\r
431PyObject *PyString_AsDecodedObject(PyObject *str,\r
432 const char *encoding,\r
433 const char *errors)\r
434{\r
435 PyObject *v;\r
436\r
437 if (!PyString_Check(str)) {\r
438 PyErr_BadArgument();\r
439 goto onError;\r
440 }\r
441\r
442 if (encoding == NULL) {\r
443#ifdef Py_USING_UNICODE\r
444 encoding = PyUnicode_GetDefaultEncoding();\r
445#else\r
446 PyErr_SetString(PyExc_ValueError, "no encoding specified");\r
447 goto onError;\r
448#endif\r
449 }\r
450\r
451 /* Decode via the codec registry */\r
452 v = PyCodec_Decode(str, encoding, errors);\r
453 if (v == NULL)\r
454 goto onError;\r
455\r
456 return v;\r
457\r
458 onError:\r
459 return NULL;\r
460}\r
461\r
462PyObject *PyString_AsDecodedString(PyObject *str,\r
463 const char *encoding,\r
464 const char *errors)\r
465{\r
466 PyObject *v;\r
467\r
468 v = PyString_AsDecodedObject(str, encoding, errors);\r
469 if (v == NULL)\r
470 goto onError;\r
471\r
472#ifdef Py_USING_UNICODE\r
473 /* Convert Unicode to a string using the default encoding */\r
474 if (PyUnicode_Check(v)) {\r
475 PyObject *temp = v;\r
476 v = PyUnicode_AsEncodedString(v, NULL, NULL);\r
477 Py_DECREF(temp);\r
478 if (v == NULL)\r
479 goto onError;\r
480 }\r
481#endif\r
482 if (!PyString_Check(v)) {\r
483 PyErr_Format(PyExc_TypeError,\r
484 "decoder did not return a string object (type=%.400s)",\r
485 Py_TYPE(v)->tp_name);\r
486 Py_DECREF(v);\r
487 goto onError;\r
488 }\r
489\r
490 return v;\r
491\r
492 onError:\r
493 return NULL;\r
494}\r
495\r
496PyObject *PyString_Encode(const char *s,\r
497 Py_ssize_t size,\r
498 const char *encoding,\r
499 const char *errors)\r
500{\r
501 PyObject *v, *str;\r
502\r
503 str = PyString_FromStringAndSize(s, size);\r
504 if (str == NULL)\r
505 return NULL;\r
506 v = PyString_AsEncodedString(str, encoding, errors);\r
507 Py_DECREF(str);\r
508 return v;\r
509}\r
510\r
511PyObject *PyString_AsEncodedObject(PyObject *str,\r
512 const char *encoding,\r
513 const char *errors)\r
514{\r
515 PyObject *v;\r
516\r
517 if (!PyString_Check(str)) {\r
518 PyErr_BadArgument();\r
519 goto onError;\r
520 }\r
521\r
522 if (encoding == NULL) {\r
523#ifdef Py_USING_UNICODE\r
524 encoding = PyUnicode_GetDefaultEncoding();\r
525#else\r
526 PyErr_SetString(PyExc_ValueError, "no encoding specified");\r
527 goto onError;\r
528#endif\r
529 }\r
530\r
531 /* Encode via the codec registry */\r
532 v = PyCodec_Encode(str, encoding, errors);\r
533 if (v == NULL)\r
534 goto onError;\r
535\r
536 return v;\r
537\r
538 onError:\r
539 return NULL;\r
540}\r
541\r
542PyObject *PyString_AsEncodedString(PyObject *str,\r
543 const char *encoding,\r
544 const char *errors)\r
545{\r
546 PyObject *v;\r
547\r
548 v = PyString_AsEncodedObject(str, encoding, errors);\r
549 if (v == NULL)\r
550 goto onError;\r
551\r
552#ifdef Py_USING_UNICODE\r
553 /* Convert Unicode to a string using the default encoding */\r
554 if (PyUnicode_Check(v)) {\r
555 PyObject *temp = v;\r
556 v = PyUnicode_AsEncodedString(v, NULL, NULL);\r
557 Py_DECREF(temp);\r
558 if (v == NULL)\r
559 goto onError;\r
560 }\r
561#endif\r
562 if (!PyString_Check(v)) {\r
563 PyErr_Format(PyExc_TypeError,\r
564 "encoder did not return a string object (type=%.400s)",\r
565 Py_TYPE(v)->tp_name);\r
566 Py_DECREF(v);\r
567 goto onError;\r
568 }\r
569\r
570 return v;\r
571\r
572 onError:\r
573 return NULL;\r
574}\r
575\r
576static void\r
577string_dealloc(PyObject *op)\r
578{\r
579 switch (PyString_CHECK_INTERNED(op)) {\r
580 case SSTATE_NOT_INTERNED:\r
581 break;\r
582\r
583 case SSTATE_INTERNED_MORTAL:\r
584 /* revive dead object temporarily for DelItem */\r
585 Py_REFCNT(op) = 3;\r
586 if (PyDict_DelItem(interned, op) != 0)\r
587 Py_FatalError(\r
588 "deletion of interned string failed");\r
589 break;\r
590\r
591 case SSTATE_INTERNED_IMMORTAL:\r
592 Py_FatalError("Immortal interned string died.");\r
593\r
594 default:\r
595 Py_FatalError("Inconsistent interned string state.");\r
596 }\r
597 Py_TYPE(op)->tp_free(op);\r
598}\r
599\r
600/* Unescape a backslash-escaped string. If unicode is non-zero,\r
601 the string is a u-literal. If recode_encoding is non-zero,\r
602 the string is UTF-8 encoded and should be re-encoded in the\r
603 specified encoding. */\r
604\r
605PyObject *PyString_DecodeEscape(const char *s,\r
606 Py_ssize_t len,\r
607 const char *errors,\r
608 Py_ssize_t unicode,\r
609 const char *recode_encoding)\r
610{\r
611 int c;\r
612 char *p, *buf;\r
613 const char *end;\r
614 PyObject *v;\r
615 Py_ssize_t newlen = recode_encoding ? 4*len:len;\r
616 v = PyString_FromStringAndSize((char *)NULL, newlen);\r
617 if (v == NULL)\r
618 return NULL;\r
619 p = buf = PyString_AsString(v);\r
620 end = s + len;\r
621 while (s < end) {\r
622 if (*s != '\\') {\r
623 non_esc:\r
624#ifdef Py_USING_UNICODE\r
625 if (recode_encoding && (*s & 0x80)) {\r
626 PyObject *u, *w;\r
627 char *r;\r
628 const char* t;\r
629 Py_ssize_t rn;\r
630 t = s;\r
631 /* Decode non-ASCII bytes as UTF-8. */\r
632 while (t < end && (*t & 0x80)) t++;\r
633 u = PyUnicode_DecodeUTF8(s, t - s, errors);\r
634 if(!u) goto failed;\r
635\r
636 /* Recode them in target encoding. */\r
637 w = PyUnicode_AsEncodedString(\r
638 u, recode_encoding, errors);\r
639 Py_DECREF(u);\r
640 if (!w) goto failed;\r
641\r
642 /* Append bytes to output buffer. */\r
643 assert(PyString_Check(w));\r
644 r = PyString_AS_STRING(w);\r
645 rn = PyString_GET_SIZE(w);\r
646 Py_MEMCPY(p, r, rn);\r
647 p += rn;\r
648 Py_DECREF(w);\r
649 s = t;\r
650 } else {\r
651 *p++ = *s++;\r
652 }\r
653#else\r
654 *p++ = *s++;\r
655#endif\r
656 continue;\r
657 }\r
658 s++;\r
659 if (s==end) {\r
660 PyErr_SetString(PyExc_ValueError,\r
661 "Trailing \\ in string");\r
662 goto failed;\r
663 }\r
664 switch (*s++) {\r
665 /* XXX This assumes ASCII! */\r
666 case '\n': break;\r
667 case '\\': *p++ = '\\'; break;\r
668 case '\'': *p++ = '\''; break;\r
669 case '\"': *p++ = '\"'; break;\r
670 case 'b': *p++ = '\b'; break;\r
671 case 'f': *p++ = '\014'; break; /* FF */\r
672 case 't': *p++ = '\t'; break;\r
673 case 'n': *p++ = '\n'; break;\r
674 case 'r': *p++ = '\r'; break;\r
675 case 'v': *p++ = '\013'; break; /* VT */\r
676 case 'a': *p++ = '\007'; break; /* BEL, not classic C */\r
677 case '0': case '1': case '2': case '3':\r
678 case '4': case '5': case '6': case '7':\r
679 c = s[-1] - '0';\r
680 if (s < end && '0' <= *s && *s <= '7') {\r
681 c = (c<<3) + *s++ - '0';\r
682 if (s < end && '0' <= *s && *s <= '7')\r
683 c = (c<<3) + *s++ - '0';\r
684 }\r
685 *p++ = c;\r
686 break;\r
687 case 'x':\r
688 if (s+1 < end &&\r
689 isxdigit(Py_CHARMASK(s[0])) &&\r
690 isxdigit(Py_CHARMASK(s[1])))\r
691 {\r
692 unsigned int x = 0;\r
693 c = Py_CHARMASK(*s);\r
694 s++;\r
695 if (isdigit(c))\r
696 x = c - '0';\r
697 else if (islower(c))\r
698 x = 10 + c - 'a';\r
699 else\r
700 x = 10 + c - 'A';\r
701 x = x << 4;\r
702 c = Py_CHARMASK(*s);\r
703 s++;\r
704 if (isdigit(c))\r
705 x += c - '0';\r
706 else if (islower(c))\r
707 x += 10 + c - 'a';\r
708 else\r
709 x += 10 + c - 'A';\r
710 *p++ = x;\r
711 break;\r
712 }\r
713 if (!errors || strcmp(errors, "strict") == 0) {\r
714 PyErr_SetString(PyExc_ValueError,\r
715 "invalid \\x escape");\r
716 goto failed;\r
717 }\r
718 if (strcmp(errors, "replace") == 0) {\r
719 *p++ = '?';\r
720 } else if (strcmp(errors, "ignore") == 0)\r
721 /* do nothing */;\r
722 else {\r
723 PyErr_Format(PyExc_ValueError,\r
724 "decoding error; "\r
725 "unknown error handling code: %.400s",\r
726 errors);\r
727 goto failed;\r
728 }\r
729#ifndef Py_USING_UNICODE\r
730 case 'u':\r
731 case 'U':\r
732 case 'N':\r
733 if (unicode) {\r
734 PyErr_SetString(PyExc_ValueError,\r
735 "Unicode escapes not legal "\r
736 "when Unicode disabled");\r
737 goto failed;\r
738 }\r
739#endif\r
740 default:\r
741 *p++ = '\\';\r
742 s--;\r
743 goto non_esc; /* an arbitrary number of unescaped\r
744 UTF-8 bytes may follow. */\r
745 }\r
746 }\r
747 if (p-buf < newlen && _PyString_Resize(&v, p - buf))\r
748 goto failed;\r
749 return v;\r
750 failed:\r
751 Py_DECREF(v);\r
752 return NULL;\r
753}\r
754\r
755/* -------------------------------------------------------------------- */\r
756/* object api */\r
757\r
758static Py_ssize_t\r
759string_getsize(register PyObject *op)\r
760{\r
761 char *s;\r
762 Py_ssize_t len;\r
763 if (PyString_AsStringAndSize(op, &s, &len))\r
764 return -1;\r
765 return len;\r
766}\r
767\r
768static /*const*/ char *\r
769string_getbuffer(register PyObject *op)\r
770{\r
771 char *s;\r
772 Py_ssize_t len;\r
773 if (PyString_AsStringAndSize(op, &s, &len))\r
774 return NULL;\r
775 return s;\r
776}\r
777\r
778Py_ssize_t\r
779PyString_Size(register PyObject *op)\r
780{\r
781 if (!PyString_Check(op))\r
782 return string_getsize(op);\r
783 return Py_SIZE(op);\r
784}\r
785\r
786/*const*/ char *\r
787PyString_AsString(register PyObject *op)\r
788{\r
789 if (!PyString_Check(op))\r
790 return string_getbuffer(op);\r
791 return ((PyStringObject *)op) -> ob_sval;\r
792}\r
793\r
794int\r
795PyString_AsStringAndSize(register PyObject *obj,\r
796 register char **s,\r
797 register Py_ssize_t *len)\r
798{\r
799 if (s == NULL) {\r
800 PyErr_BadInternalCall();\r
801 return -1;\r
802 }\r
803\r
804 if (!PyString_Check(obj)) {\r
805#ifdef Py_USING_UNICODE\r
806 if (PyUnicode_Check(obj)) {\r
807 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);\r
808 if (obj == NULL)\r
809 return -1;\r
810 }\r
811 else\r
812#endif\r
813 {\r
814 PyErr_Format(PyExc_TypeError,\r
815 "expected string or Unicode object, "\r
816 "%.200s found", Py_TYPE(obj)->tp_name);\r
817 return -1;\r
818 }\r
819 }\r
820\r
821 *s = PyString_AS_STRING(obj);\r
822 if (len != NULL)\r
823 *len = PyString_GET_SIZE(obj);\r
824 else if (strlen(*s) != (size_t)PyString_GET_SIZE(obj)) {\r
825 PyErr_SetString(PyExc_TypeError,\r
826 "expected string without null bytes");\r
827 return -1;\r
828 }\r
829 return 0;\r
830}\r
831\r
832/* -------------------------------------------------------------------- */\r
833/* Methods */\r
834\r
835#include "stringlib/stringdefs.h"\r
836#include "stringlib/fastsearch.h"\r
837\r
838#include "stringlib/count.h"\r
839#include "stringlib/find.h"\r
840#include "stringlib/partition.h"\r
841#include "stringlib/split.h"\r
842\r
843#define _Py_InsertThousandsGrouping _PyString_InsertThousandsGrouping\r
844#include "stringlib/localeutil.h"\r
845\r
846\r
847\r
848static int\r
849string_print(PyStringObject *op, FILE *fp, int flags)\r
850{\r
851 Py_ssize_t i, str_len;\r
852 char c;\r
853 int quote;\r
854\r
855 /* XXX Ought to check for interrupts when writing long strings */\r
856 if (! PyString_CheckExact(op)) {\r
857 int ret;\r
858 /* A str subclass may have its own __str__ method. */\r
859 op = (PyStringObject *) PyObject_Str((PyObject *)op);\r
860 if (op == NULL)\r
861 return -1;\r
862 ret = string_print(op, fp, flags);\r
863 Py_DECREF(op);\r
864 return ret;\r
865 }\r
866 if (flags & Py_PRINT_RAW) {\r
867 char *data = op->ob_sval;\r
868 Py_ssize_t size = Py_SIZE(op);\r
869 Py_BEGIN_ALLOW_THREADS\r
870 while (size > INT_MAX) {\r
871 /* Very long strings cannot be written atomically.\r
872 * But don't write exactly INT_MAX bytes at a time\r
873 * to avoid memory aligment issues.\r
874 */\r
875 const int chunk_size = INT_MAX & ~0x3FFF;\r
876 fwrite(data, 1, chunk_size, fp);\r
877 data += chunk_size;\r
878 size -= chunk_size;\r
879 }\r
880#ifdef __VMS\r
881 if (size) fwrite(data, (int)size, 1, fp);\r
882#else\r
883 fwrite(data, 1, (int)size, fp);\r
884#endif\r
885 Py_END_ALLOW_THREADS\r
886 return 0;\r
887 }\r
888\r
889 /* figure out which quote to use; single is preferred */\r
890 quote = '\'';\r
891 if (memchr(op->ob_sval, '\'', Py_SIZE(op)) &&\r
892 !memchr(op->ob_sval, '"', Py_SIZE(op)))\r
893 quote = '"';\r
894\r
895 str_len = Py_SIZE(op);\r
896 Py_BEGIN_ALLOW_THREADS\r
897 fputc(quote, fp);\r
898 for (i = 0; i < str_len; i++) {\r
899 /* Since strings are immutable and the caller should have a\r
900 reference, accessing the interal buffer should not be an issue\r
901 with the GIL released. */\r
902 c = op->ob_sval[i];\r
903 if (c == quote || c == '\\')\r
904 fprintf(fp, "\\%c", c);\r
905 else if (c == '\t')\r
906 fprintf(fp, "\\t");\r
907 else if (c == '\n')\r
908 fprintf(fp, "\\n");\r
909 else if (c == '\r')\r
910 fprintf(fp, "\\r");\r
911 else if (c < ' ' || c >= 0x7f)\r
912 fprintf(fp, "\\x%02x", c & 0xff);\r
913 else\r
914 fputc(c, fp);\r
915 }\r
916 fputc(quote, fp);\r
917 Py_END_ALLOW_THREADS\r
918 return 0;\r
919}\r
920\r
921PyObject *\r
922PyString_Repr(PyObject *obj, int smartquotes)\r
923{\r
924 register PyStringObject* op = (PyStringObject*) obj;\r
925 size_t newsize = 2 + 4 * Py_SIZE(op);\r
926 PyObject *v;\r
927 if (newsize > PY_SSIZE_T_MAX || newsize / 4 != Py_SIZE(op)) {\r
928 PyErr_SetString(PyExc_OverflowError,\r
929 "string is too large to make repr");\r
930 return NULL;\r
931 }\r
932 v = PyString_FromStringAndSize((char *)NULL, newsize);\r
933 if (v == NULL) {\r
934 return NULL;\r
935 }\r
936 else {\r
937 register Py_ssize_t i;\r
938 register char c;\r
939 register char *p;\r
940 int quote;\r
941\r
942 /* figure out which quote to use; single is preferred */\r
943 quote = '\'';\r
944 if (smartquotes &&\r
945 memchr(op->ob_sval, '\'', Py_SIZE(op)) &&\r
946 !memchr(op->ob_sval, '"', Py_SIZE(op)))\r
947 quote = '"';\r
948\r
949 p = PyString_AS_STRING(v);\r
950 *p++ = quote;\r
951 for (i = 0; i < Py_SIZE(op); i++) {\r
952 /* There's at least enough room for a hex escape\r
953 and a closing quote. */\r
954 assert(newsize - (p - PyString_AS_STRING(v)) >= 5);\r
955 c = op->ob_sval[i];\r
956 if (c == quote || c == '\\')\r
957 *p++ = '\\', *p++ = c;\r
958 else if (c == '\t')\r
959 *p++ = '\\', *p++ = 't';\r
960 else if (c == '\n')\r
961 *p++ = '\\', *p++ = 'n';\r
962 else if (c == '\r')\r
963 *p++ = '\\', *p++ = 'r';\r
964 else if (c < ' ' || c >= 0x7f) {\r
965 /* For performance, we don't want to call\r
966 PyOS_snprintf here (extra layers of\r
967 function call). */\r
968 sprintf(p, "\\x%02x", c & 0xff);\r
969 p += 4;\r
970 }\r
971 else\r
972 *p++ = c;\r
973 }\r
974 assert(newsize - (p - PyString_AS_STRING(v)) >= 1);\r
975 *p++ = quote;\r
976 *p = '\0';\r
977 if (_PyString_Resize(&v, (p - PyString_AS_STRING(v))))\r
978 return NULL;\r
979 return v;\r
980 }\r
981}\r
982\r
983static PyObject *\r
984string_repr(PyObject *op)\r
985{\r
986 return PyString_Repr(op, 1);\r
987}\r
988\r
989static PyObject *\r
990string_str(PyObject *s)\r
991{\r
992 assert(PyString_Check(s));\r
993 if (PyString_CheckExact(s)) {\r
994 Py_INCREF(s);\r
995 return s;\r
996 }\r
997 else {\r
998 /* Subtype -- return genuine string with the same value. */\r
999 PyStringObject *t = (PyStringObject *) s;\r
1000 return PyString_FromStringAndSize(t->ob_sval, Py_SIZE(t));\r
1001 }\r
1002}\r
1003\r
1004static Py_ssize_t\r
1005string_length(PyStringObject *a)\r
1006{\r
1007 return Py_SIZE(a);\r
1008}\r
1009\r
1010static PyObject *\r
1011string_concat(register PyStringObject *a, register PyObject *bb)\r
1012{\r
1013 register Py_ssize_t size;\r
1014 register PyStringObject *op;\r
1015 if (!PyString_Check(bb)) {\r
1016#ifdef Py_USING_UNICODE\r
1017 if (PyUnicode_Check(bb))\r
1018 return PyUnicode_Concat((PyObject *)a, bb);\r
1019#endif\r
1020 if (PyByteArray_Check(bb))\r
1021 return PyByteArray_Concat((PyObject *)a, bb);\r
1022 PyErr_Format(PyExc_TypeError,\r
1023 "cannot concatenate 'str' and '%.200s' objects",\r
1024 Py_TYPE(bb)->tp_name);\r
1025 return NULL;\r
1026 }\r
1027#define b ((PyStringObject *)bb)\r
1028 /* Optimize cases with empty left or right operand */\r
1029 if ((Py_SIZE(a) == 0 || Py_SIZE(b) == 0) &&\r
1030 PyString_CheckExact(a) && PyString_CheckExact(b)) {\r
1031 if (Py_SIZE(a) == 0) {\r
1032 Py_INCREF(bb);\r
1033 return bb;\r
1034 }\r
1035 Py_INCREF(a);\r
1036 return (PyObject *)a;\r
1037 }\r
1038 size = Py_SIZE(a) + Py_SIZE(b);\r
1039 /* Check that string sizes are not negative, to prevent an\r
1040 overflow in cases where we are passed incorrectly-created\r
1041 strings with negative lengths (due to a bug in other code).\r
1042 */\r
1043 if (Py_SIZE(a) < 0 || Py_SIZE(b) < 0 ||\r
1044 Py_SIZE(a) > PY_SSIZE_T_MAX - Py_SIZE(b)) {\r
1045 PyErr_SetString(PyExc_OverflowError,\r
1046 "strings are too large to concat");\r
1047 return NULL;\r
1048 }\r
1049\r
1050 /* Inline PyObject_NewVar */\r
1051 if (size > PY_SSIZE_T_MAX - PyStringObject_SIZE) {\r
1052 PyErr_SetString(PyExc_OverflowError,\r
1053 "strings are too large to concat");\r
1054 return NULL;\r
1055 }\r
1056 op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + size);\r
1057 if (op == NULL)\r
1058 return PyErr_NoMemory();\r
1059 PyObject_INIT_VAR(op, &PyString_Type, size);\r
1060 op->ob_shash = -1;\r
1061 op->ob_sstate = SSTATE_NOT_INTERNED;\r
1062 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));\r
1063 Py_MEMCPY(op->ob_sval + Py_SIZE(a), b->ob_sval, Py_SIZE(b));\r
1064 op->ob_sval[size] = '\0';\r
1065 return (PyObject *) op;\r
1066#undef b\r
1067}\r
1068\r
1069static PyObject *\r
1070string_repeat(register PyStringObject *a, register Py_ssize_t n)\r
1071{\r
1072 register Py_ssize_t i;\r
1073 register Py_ssize_t j;\r
1074 register Py_ssize_t size;\r
1075 register PyStringObject *op;\r
1076 size_t nbytes;\r
1077 if (n < 0)\r
1078 n = 0;\r
1079 /* watch out for overflows: the size can overflow int,\r
1080 * and the # of bytes needed can overflow size_t\r
1081 */\r
1082 size = Py_SIZE(a) * n;\r
1083 if (n && size / n != Py_SIZE(a)) {\r
1084 PyErr_SetString(PyExc_OverflowError,\r
1085 "repeated string is too long");\r
1086 return NULL;\r
1087 }\r
1088 if (size == Py_SIZE(a) && PyString_CheckExact(a)) {\r
1089 Py_INCREF(a);\r
1090 return (PyObject *)a;\r
1091 }\r
1092 nbytes = (size_t)size;\r
1093 if (nbytes + PyStringObject_SIZE <= nbytes) {\r
1094 PyErr_SetString(PyExc_OverflowError,\r
1095 "repeated string is too long");\r
1096 return NULL;\r
1097 }\r
1098 op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + nbytes);\r
1099 if (op == NULL)\r
1100 return PyErr_NoMemory();\r
1101 PyObject_INIT_VAR(op, &PyString_Type, size);\r
1102 op->ob_shash = -1;\r
1103 op->ob_sstate = SSTATE_NOT_INTERNED;\r
1104 op->ob_sval[size] = '\0';\r
1105 if (Py_SIZE(a) == 1 && n > 0) {\r
1106 memset(op->ob_sval, a->ob_sval[0] , n);\r
1107 return (PyObject *) op;\r
1108 }\r
1109 i = 0;\r
1110 if (i < size) {\r
1111 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));\r
1112 i = Py_SIZE(a);\r
1113 }\r
1114 while (i < size) {\r
1115 j = (i <= size-i) ? i : size-i;\r
1116 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);\r
1117 i += j;\r
1118 }\r
1119 return (PyObject *) op;\r
1120}\r
1121\r
1122/* String slice a[i:j] consists of characters a[i] ... a[j-1] */\r
1123\r
1124static PyObject *\r
1125string_slice(register PyStringObject *a, register Py_ssize_t i,\r
1126 register Py_ssize_t j)\r
1127 /* j -- may be negative! */\r
1128{\r
1129 if (i < 0)\r
1130 i = 0;\r
1131 if (j < 0)\r
1132 j = 0; /* Avoid signed/unsigned bug in next line */\r
1133 if (j > Py_SIZE(a))\r
1134 j = Py_SIZE(a);\r
1135 if (i == 0 && j == Py_SIZE(a) && PyString_CheckExact(a)) {\r
1136 /* It's the same as a */\r
1137 Py_INCREF(a);\r
1138 return (PyObject *)a;\r
1139 }\r
1140 if (j < i)\r
1141 j = i;\r
1142 return PyString_FromStringAndSize(a->ob_sval + i, j-i);\r
1143}\r
1144\r
1145static int\r
1146string_contains(PyObject *str_obj, PyObject *sub_obj)\r
1147{\r
1148 if (!PyString_CheckExact(sub_obj)) {\r
1149#ifdef Py_USING_UNICODE\r
1150 if (PyUnicode_Check(sub_obj))\r
1151 return PyUnicode_Contains(str_obj, sub_obj);\r
1152#endif\r
1153 if (!PyString_Check(sub_obj)) {\r
1154 PyErr_Format(PyExc_TypeError,\r
1155 "'in <string>' requires string as left operand, "\r
1156 "not %.200s", Py_TYPE(sub_obj)->tp_name);\r
1157 return -1;\r
1158 }\r
1159 }\r
1160\r
1161 return stringlib_contains_obj(str_obj, sub_obj);\r
1162}\r
1163\r
1164static PyObject *\r
1165string_item(PyStringObject *a, register Py_ssize_t i)\r
1166{\r
1167 char pchar;\r
1168 PyObject *v;\r
1169 if (i < 0 || i >= Py_SIZE(a)) {\r
1170 PyErr_SetString(PyExc_IndexError, "string index out of range");\r
1171 return NULL;\r
1172 }\r
1173 pchar = a->ob_sval[i];\r
1174 v = (PyObject *)characters[pchar & UCHAR_MAX];\r
1175 if (v == NULL)\r
1176 v = PyString_FromStringAndSize(&pchar, 1);\r
1177 else {\r
1178#ifdef COUNT_ALLOCS\r
1179 one_strings++;\r
1180#endif\r
1181 Py_INCREF(v);\r
1182 }\r
1183 return v;\r
1184}\r
1185\r
1186static PyObject*\r
1187string_richcompare(PyStringObject *a, PyStringObject *b, int op)\r
1188{\r
1189 int c;\r
1190 Py_ssize_t len_a, len_b;\r
1191 Py_ssize_t min_len;\r
1192 PyObject *result;\r
1193\r
1194 /* Make sure both arguments are strings. */\r
1195 if (!(PyString_Check(a) && PyString_Check(b))) {\r
1196 result = Py_NotImplemented;\r
1197 goto out;\r
1198 }\r
1199 if (a == b) {\r
1200 switch (op) {\r
1201 case Py_EQ:case Py_LE:case Py_GE:\r
1202 result = Py_True;\r
1203 goto out;\r
1204 case Py_NE:case Py_LT:case Py_GT:\r
1205 result = Py_False;\r
1206 goto out;\r
1207 }\r
1208 }\r
1209 if (op == Py_EQ) {\r
1210 /* Supporting Py_NE here as well does not save\r
1211 much time, since Py_NE is rarely used. */\r
1212 if (Py_SIZE(a) == Py_SIZE(b)\r
1213 && (a->ob_sval[0] == b->ob_sval[0]\r
1214 && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0)) {\r
1215 result = Py_True;\r
1216 } else {\r
1217 result = Py_False;\r
1218 }\r
1219 goto out;\r
1220 }\r
1221 len_a = Py_SIZE(a); len_b = Py_SIZE(b);\r
1222 min_len = (len_a < len_b) ? len_a : len_b;\r
1223 if (min_len > 0) {\r
1224 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);\r
1225 if (c==0)\r
1226 c = memcmp(a->ob_sval, b->ob_sval, min_len);\r
1227 } else\r
1228 c = 0;\r
1229 if (c == 0)\r
1230 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;\r
1231 switch (op) {\r
1232 case Py_LT: c = c < 0; break;\r
1233 case Py_LE: c = c <= 0; break;\r
1234 case Py_EQ: assert(0); break; /* unreachable */\r
1235 case Py_NE: c = c != 0; break;\r
1236 case Py_GT: c = c > 0; break;\r
1237 case Py_GE: c = c >= 0; break;\r
1238 default:\r
1239 result = Py_NotImplemented;\r
1240 goto out;\r
1241 }\r
1242 result = c ? Py_True : Py_False;\r
1243 out:\r
1244 Py_INCREF(result);\r
1245 return result;\r
1246}\r
1247\r
1248int\r
1249_PyString_Eq(PyObject *o1, PyObject *o2)\r
1250{\r
1251 PyStringObject *a = (PyStringObject*) o1;\r
1252 PyStringObject *b = (PyStringObject*) o2;\r
1253 return Py_SIZE(a) == Py_SIZE(b)\r
1254 && *a->ob_sval == *b->ob_sval\r
1255 && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0;\r
1256}\r
1257\r
1258static long\r
1259string_hash(PyStringObject *a)\r
1260{\r
1261 register Py_ssize_t len;\r
1262 register unsigned char *p;\r
1263 register long x;\r
1264\r
1265 if (a->ob_shash != -1)\r
1266 return a->ob_shash;\r
1267 len = Py_SIZE(a);\r
1268 p = (unsigned char *) a->ob_sval;\r
1269 x = *p << 7;\r
1270 while (--len >= 0)\r
1271 x = (1000003*x) ^ *p++;\r
1272 x ^= Py_SIZE(a);\r
1273 if (x == -1)\r
1274 x = -2;\r
1275 a->ob_shash = x;\r
1276 return x;\r
1277}\r
1278\r
1279static PyObject*\r
1280string_subscript(PyStringObject* self, PyObject* item)\r
1281{\r
1282 if (PyIndex_Check(item)) {\r
1283 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);\r
1284 if (i == -1 && PyErr_Occurred())\r
1285 return NULL;\r
1286 if (i < 0)\r
1287 i += PyString_GET_SIZE(self);\r
1288 return string_item(self, i);\r
1289 }\r
1290 else if (PySlice_Check(item)) {\r
1291 Py_ssize_t start, stop, step, slicelength, cur, i;\r
1292 char* source_buf;\r
1293 char* result_buf;\r
1294 PyObject* result;\r
1295\r
1296 if (PySlice_GetIndicesEx((PySliceObject*)item,\r
1297 PyString_GET_SIZE(self),\r
1298 &start, &stop, &step, &slicelength) < 0) {\r
1299 return NULL;\r
1300 }\r
1301\r
1302 if (slicelength <= 0) {\r
1303 return PyString_FromStringAndSize("", 0);\r
1304 }\r
1305 else if (start == 0 && step == 1 &&\r
1306 slicelength == PyString_GET_SIZE(self) &&\r
1307 PyString_CheckExact(self)) {\r
1308 Py_INCREF(self);\r
1309 return (PyObject *)self;\r
1310 }\r
1311 else if (step == 1) {\r
1312 return PyString_FromStringAndSize(\r
1313 PyString_AS_STRING(self) + start,\r
1314 slicelength);\r
1315 }\r
1316 else {\r
1317 source_buf = PyString_AsString((PyObject*)self);\r
1318 result_buf = (char *)PyMem_Malloc(slicelength);\r
1319 if (result_buf == NULL)\r
1320 return PyErr_NoMemory();\r
1321\r
1322 for (cur = start, i = 0; i < slicelength;\r
1323 cur += step, i++) {\r
1324 result_buf[i] = source_buf[cur];\r
1325 }\r
1326\r
1327 result = PyString_FromStringAndSize(result_buf,\r
1328 slicelength);\r
1329 PyMem_Free(result_buf);\r
1330 return result;\r
1331 }\r
1332 }\r
1333 else {\r
1334 PyErr_Format(PyExc_TypeError,\r
1335 "string indices must be integers, not %.200s",\r
1336 Py_TYPE(item)->tp_name);\r
1337 return NULL;\r
1338 }\r
1339}\r
1340\r
1341static Py_ssize_t\r
1342string_buffer_getreadbuf(PyStringObject *self, Py_ssize_t index, const void **ptr)\r
1343{\r
1344 if ( index != 0 ) {\r
1345 PyErr_SetString(PyExc_SystemError,\r
1346 "accessing non-existent string segment");\r
1347 return -1;\r
1348 }\r
1349 *ptr = (void *)self->ob_sval;\r
1350 return Py_SIZE(self);\r
1351}\r
1352\r
1353static Py_ssize_t\r
1354string_buffer_getwritebuf(PyStringObject *self, Py_ssize_t index, const void **ptr)\r
1355{\r
1356 PyErr_SetString(PyExc_TypeError,\r
1357 "Cannot use string as modifiable buffer");\r
1358 return -1;\r
1359}\r
1360\r
1361static Py_ssize_t\r
1362string_buffer_getsegcount(PyStringObject *self, Py_ssize_t *lenp)\r
1363{\r
1364 if ( lenp )\r
1365 *lenp = Py_SIZE(self);\r
1366 return 1;\r
1367}\r
1368\r
1369static Py_ssize_t\r
1370string_buffer_getcharbuf(PyStringObject *self, Py_ssize_t index, const char **ptr)\r
1371{\r
1372 if ( index != 0 ) {\r
1373 PyErr_SetString(PyExc_SystemError,\r
1374 "accessing non-existent string segment");\r
1375 return -1;\r
1376 }\r
1377 *ptr = self->ob_sval;\r
1378 return Py_SIZE(self);\r
1379}\r
1380\r
1381static int\r
1382string_buffer_getbuffer(PyStringObject *self, Py_buffer *view, int flags)\r
1383{\r
1384 return PyBuffer_FillInfo(view, (PyObject*)self,\r
1385 (void *)self->ob_sval, Py_SIZE(self),\r
1386 1, flags);\r
1387}\r
1388\r
1389static PySequenceMethods string_as_sequence = {\r
1390 (lenfunc)string_length, /*sq_length*/\r
1391 (binaryfunc)string_concat, /*sq_concat*/\r
1392 (ssizeargfunc)string_repeat, /*sq_repeat*/\r
1393 (ssizeargfunc)string_item, /*sq_item*/\r
1394 (ssizessizeargfunc)string_slice, /*sq_slice*/\r
1395 0, /*sq_ass_item*/\r
1396 0, /*sq_ass_slice*/\r
1397 (objobjproc)string_contains /*sq_contains*/\r
1398};\r
1399\r
1400static PyMappingMethods string_as_mapping = {\r
1401 (lenfunc)string_length,\r
1402 (binaryfunc)string_subscript,\r
1403 0,\r
1404};\r
1405\r
1406static PyBufferProcs string_as_buffer = {\r
1407 (readbufferproc)string_buffer_getreadbuf,\r
1408 (writebufferproc)string_buffer_getwritebuf,\r
1409 (segcountproc)string_buffer_getsegcount,\r
1410 (charbufferproc)string_buffer_getcharbuf,\r
1411 (getbufferproc)string_buffer_getbuffer,\r
1412 0, /* XXX */\r
1413};\r
1414\r
1415\r
1416\r
1417#define LEFTSTRIP 0\r
1418#define RIGHTSTRIP 1\r
1419#define BOTHSTRIP 2\r
1420\r
1421/* Arrays indexed by above */\r
1422static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};\r
1423\r
1424#define STRIPNAME(i) (stripformat[i]+3)\r
1425\r
1426PyDoc_STRVAR(split__doc__,\r
1427"S.split([sep [,maxsplit]]) -> list of strings\n\\r
1428\n\\r
1429Return a list of the words in the string S, using sep as the\n\\r
1430delimiter string. If maxsplit is given, at most maxsplit\n\\r
1431splits are done. If sep is not specified or is None, any\n\\r
1432whitespace string is a separator and empty strings are removed\n\\r
1433from the result.");\r
1434\r
1435static PyObject *\r
1436string_split(PyStringObject *self, PyObject *args)\r
1437{\r
1438 Py_ssize_t len = PyString_GET_SIZE(self), n;\r
1439 Py_ssize_t maxsplit = -1;\r
1440 const char *s = PyString_AS_STRING(self), *sub;\r
1441 PyObject *subobj = Py_None;\r
1442\r
1443 if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))\r
1444 return NULL;\r
1445 if (maxsplit < 0)\r
1446 maxsplit = PY_SSIZE_T_MAX;\r
1447 if (subobj == Py_None)\r
1448 return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit);\r
1449 if (PyString_Check(subobj)) {\r
1450 sub = PyString_AS_STRING(subobj);\r
1451 n = PyString_GET_SIZE(subobj);\r
1452 }\r
1453#ifdef Py_USING_UNICODE\r
1454 else if (PyUnicode_Check(subobj))\r
1455 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);\r
1456#endif\r
1457 else if (PyObject_AsCharBuffer(subobj, &sub, &n))\r
1458 return NULL;\r
1459\r
1460 return stringlib_split((PyObject*) self, s, len, sub, n, maxsplit);\r
1461}\r
1462\r
1463PyDoc_STRVAR(partition__doc__,\r
1464"S.partition(sep) -> (head, sep, tail)\n\\r
1465\n\\r
1466Search for the separator sep in S, and return the part before it,\n\\r
1467the separator itself, and the part after it. If the separator is not\n\\r
1468found, return S and two empty strings.");\r
1469\r
1470static PyObject *\r
1471string_partition(PyStringObject *self, PyObject *sep_obj)\r
1472{\r
1473 const char *sep;\r
1474 Py_ssize_t sep_len;\r
1475\r
1476 if (PyString_Check(sep_obj)) {\r
1477 sep = PyString_AS_STRING(sep_obj);\r
1478 sep_len = PyString_GET_SIZE(sep_obj);\r
1479 }\r
1480#ifdef Py_USING_UNICODE\r
1481 else if (PyUnicode_Check(sep_obj))\r
1482 return PyUnicode_Partition((PyObject *) self, sep_obj);\r
1483#endif\r
1484 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))\r
1485 return NULL;\r
1486\r
1487 return stringlib_partition(\r
1488 (PyObject*) self,\r
1489 PyString_AS_STRING(self), PyString_GET_SIZE(self),\r
1490 sep_obj, sep, sep_len\r
1491 );\r
1492}\r
1493\r
1494PyDoc_STRVAR(rpartition__doc__,\r
1495"S.rpartition(sep) -> (head, sep, tail)\n\\r
1496\n\\r
1497Search for the separator sep in S, starting at the end of S, and return\n\\r
1498the part before it, the separator itself, and the part after it. If the\n\\r
1499separator is not found, return two empty strings and S.");\r
1500\r
1501static PyObject *\r
1502string_rpartition(PyStringObject *self, PyObject *sep_obj)\r
1503{\r
1504 const char *sep;\r
1505 Py_ssize_t sep_len;\r
1506\r
1507 if (PyString_Check(sep_obj)) {\r
1508 sep = PyString_AS_STRING(sep_obj);\r
1509 sep_len = PyString_GET_SIZE(sep_obj);\r
1510 }\r
1511#ifdef Py_USING_UNICODE\r
1512 else if (PyUnicode_Check(sep_obj))\r
1513 return PyUnicode_RPartition((PyObject *) self, sep_obj);\r
1514#endif\r
1515 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))\r
1516 return NULL;\r
1517\r
1518 return stringlib_rpartition(\r
1519 (PyObject*) self,\r
1520 PyString_AS_STRING(self), PyString_GET_SIZE(self),\r
1521 sep_obj, sep, sep_len\r
1522 );\r
1523}\r
1524\r
1525PyDoc_STRVAR(rsplit__doc__,\r
1526"S.rsplit([sep [,maxsplit]]) -> list of strings\n\\r
1527\n\\r
1528Return a list of the words in the string S, using sep as the\n\\r
1529delimiter string, starting at the end of the string and working\n\\r
1530to the front. If maxsplit is given, at most maxsplit splits are\n\\r
1531done. If sep is not specified or is None, any whitespace string\n\\r
1532is a separator.");\r
1533\r
1534static PyObject *\r
1535string_rsplit(PyStringObject *self, PyObject *args)\r
1536{\r
1537 Py_ssize_t len = PyString_GET_SIZE(self), n;\r
1538 Py_ssize_t maxsplit = -1;\r
1539 const char *s = PyString_AS_STRING(self), *sub;\r
1540 PyObject *subobj = Py_None;\r
1541\r
1542 if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))\r
1543 return NULL;\r
1544 if (maxsplit < 0)\r
1545 maxsplit = PY_SSIZE_T_MAX;\r
1546 if (subobj == Py_None)\r
1547 return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit);\r
1548 if (PyString_Check(subobj)) {\r
1549 sub = PyString_AS_STRING(subobj);\r
1550 n = PyString_GET_SIZE(subobj);\r
1551 }\r
1552#ifdef Py_USING_UNICODE\r
1553 else if (PyUnicode_Check(subobj))\r
1554 return PyUnicode_RSplit((PyObject *)self, subobj, maxsplit);\r
1555#endif\r
1556 else if (PyObject_AsCharBuffer(subobj, &sub, &n))\r
1557 return NULL;\r
1558\r
1559 return stringlib_rsplit((PyObject*) self, s, len, sub, n, maxsplit);\r
1560}\r
1561\r
1562\r
1563PyDoc_STRVAR(join__doc__,\r
1564"S.join(iterable) -> string\n\\r
1565\n\\r
1566Return a string which is the concatenation of the strings in the\n\\r
1567iterable. The separator between elements is S.");\r
1568\r
1569static PyObject *\r
1570string_join(PyStringObject *self, PyObject *orig)\r
1571{\r
1572 char *sep = PyString_AS_STRING(self);\r
1573 const Py_ssize_t seplen = PyString_GET_SIZE(self);\r
1574 PyObject *res = NULL;\r
1575 char *p;\r
1576 Py_ssize_t seqlen = 0;\r
1577 size_t sz = 0;\r
1578 Py_ssize_t i;\r
1579 PyObject *seq, *item;\r
1580\r
1581 seq = PySequence_Fast(orig, "");\r
1582 if (seq == NULL) {\r
1583 return NULL;\r
1584 }\r
1585\r
1586 seqlen = PySequence_Size(seq);\r
1587 if (seqlen == 0) {\r
1588 Py_DECREF(seq);\r
1589 return PyString_FromString("");\r
1590 }\r
1591 if (seqlen == 1) {\r
1592 item = PySequence_Fast_GET_ITEM(seq, 0);\r
1593 if (PyString_CheckExact(item) || PyUnicode_CheckExact(item)) {\r
1594 Py_INCREF(item);\r
1595 Py_DECREF(seq);\r
1596 return item;\r
1597 }\r
1598 }\r
1599\r
1600 /* There are at least two things to join, or else we have a subclass\r
1601 * of the builtin types in the sequence.\r
1602 * Do a pre-pass to figure out the total amount of space we'll\r
1603 * need (sz), see whether any argument is absurd, and defer to\r
1604 * the Unicode join if appropriate.\r
1605 */\r
1606 for (i = 0; i < seqlen; i++) {\r
1607 const size_t old_sz = sz;\r
1608 item = PySequence_Fast_GET_ITEM(seq, i);\r
1609 if (!PyString_Check(item)){\r
1610#ifdef Py_USING_UNICODE\r
1611 if (PyUnicode_Check(item)) {\r
1612 /* Defer to Unicode join.\r
1613 * CAUTION: There's no gurantee that the\r
1614 * original sequence can be iterated over\r
1615 * again, so we must pass seq here.\r
1616 */\r
1617 PyObject *result;\r
1618 result = PyUnicode_Join((PyObject *)self, seq);\r
1619 Py_DECREF(seq);\r
1620 return result;\r
1621 }\r
1622#endif\r
1623 PyErr_Format(PyExc_TypeError,\r
1624 "sequence item %zd: expected string,"\r
1625 " %.80s found",\r
1626 i, Py_TYPE(item)->tp_name);\r
1627 Py_DECREF(seq);\r
1628 return NULL;\r
1629 }\r
1630 sz += PyString_GET_SIZE(item);\r
1631 if (i != 0)\r
1632 sz += seplen;\r
1633 if (sz < old_sz || sz > PY_SSIZE_T_MAX) {\r
1634 PyErr_SetString(PyExc_OverflowError,\r
1635 "join() result is too long for a Python string");\r
1636 Py_DECREF(seq);\r
1637 return NULL;\r
1638 }\r
1639 }\r
1640\r
1641 /* Allocate result space. */\r
1642 res = PyString_FromStringAndSize((char*)NULL, sz);\r
1643 if (res == NULL) {\r
1644 Py_DECREF(seq);\r
1645 return NULL;\r
1646 }\r
1647\r
1648 /* Catenate everything. */\r
1649 p = PyString_AS_STRING(res);\r
1650 for (i = 0; i < seqlen; ++i) {\r
1651 size_t n;\r
1652 item = PySequence_Fast_GET_ITEM(seq, i);\r
1653 n = PyString_GET_SIZE(item);\r
1654 Py_MEMCPY(p, PyString_AS_STRING(item), n);\r
1655 p += n;\r
1656 if (i < seqlen - 1) {\r
1657 Py_MEMCPY(p, sep, seplen);\r
1658 p += seplen;\r
1659 }\r
1660 }\r
1661\r
1662 Py_DECREF(seq);\r
1663 return res;\r
1664}\r
1665\r
1666PyObject *\r
1667_PyString_Join(PyObject *sep, PyObject *x)\r
1668{\r
1669 assert(sep != NULL && PyString_Check(sep));\r
1670 assert(x != NULL);\r
1671 return string_join((PyStringObject *)sep, x);\r
1672}\r
1673\r
1674/* helper macro to fixup start/end slice values */\r
1675#define ADJUST_INDICES(start, end, len) \\r
1676 if (end > len) \\r
1677 end = len; \\r
1678 else if (end < 0) { \\r
1679 end += len; \\r
1680 if (end < 0) \\r
1681 end = 0; \\r
1682 } \\r
1683 if (start < 0) { \\r
1684 start += len; \\r
1685 if (start < 0) \\r
1686 start = 0; \\r
1687 }\r
1688\r
1689Py_LOCAL_INLINE(Py_ssize_t)\r
1690string_find_internal(PyStringObject *self, PyObject *args, int dir)\r
1691{\r
1692 PyObject *subobj;\r
1693 const char *sub;\r
1694 Py_ssize_t sub_len;\r
1695 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;\r
1696\r
1697 if (!stringlib_parse_args_finds("find/rfind/index/rindex",\r
1698 args, &subobj, &start, &end))\r
1699 return -2;\r
1700\r
1701 if (PyString_Check(subobj)) {\r
1702 sub = PyString_AS_STRING(subobj);\r
1703 sub_len = PyString_GET_SIZE(subobj);\r
1704 }\r
1705#ifdef Py_USING_UNICODE\r
1706 else if (PyUnicode_Check(subobj))\r
1707 return PyUnicode_Find(\r
1708 (PyObject *)self, subobj, start, end, dir);\r
1709#endif\r
1710 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))\r
1711 /* XXX - the "expected a character buffer object" is pretty\r
1712 confusing for a non-expert. remap to something else ? */\r
1713 return -2;\r
1714\r
1715 if (dir > 0)\r
1716 return stringlib_find_slice(\r
1717 PyString_AS_STRING(self), PyString_GET_SIZE(self),\r
1718 sub, sub_len, start, end);\r
1719 else\r
1720 return stringlib_rfind_slice(\r
1721 PyString_AS_STRING(self), PyString_GET_SIZE(self),\r
1722 sub, sub_len, start, end);\r
1723}\r
1724\r
1725\r
1726PyDoc_STRVAR(find__doc__,\r
1727"S.find(sub [,start [,end]]) -> int\n\\r
1728\n\\r
1729Return the lowest index in S where substring sub is found,\n\\r
1730such that sub is contained within s[start:end]. Optional\n\\r
1731arguments start and end are interpreted as in slice notation.\n\\r
1732\n\\r
1733Return -1 on failure.");\r
1734\r
1735static PyObject *\r
1736string_find(PyStringObject *self, PyObject *args)\r
1737{\r
1738 Py_ssize_t result = string_find_internal(self, args, +1);\r
1739 if (result == -2)\r
1740 return NULL;\r
1741 return PyInt_FromSsize_t(result);\r
1742}\r
1743\r
1744\r
1745PyDoc_STRVAR(index__doc__,\r
1746"S.index(sub [,start [,end]]) -> int\n\\r
1747\n\\r
1748Like S.find() but raise ValueError when the substring is not found.");\r
1749\r
1750static PyObject *\r
1751string_index(PyStringObject *self, PyObject *args)\r
1752{\r
1753 Py_ssize_t result = string_find_internal(self, args, +1);\r
1754 if (result == -2)\r
1755 return NULL;\r
1756 if (result == -1) {\r
1757 PyErr_SetString(PyExc_ValueError,\r
1758 "substring not found");\r
1759 return NULL;\r
1760 }\r
1761 return PyInt_FromSsize_t(result);\r
1762}\r
1763\r
1764\r
1765PyDoc_STRVAR(rfind__doc__,\r
1766"S.rfind(sub [,start [,end]]) -> int\n\\r
1767\n\\r
1768Return the highest index in S where substring sub is found,\n\\r
1769such that sub is contained within s[start:end]. Optional\n\\r
1770arguments start and end are interpreted as in slice notation.\n\\r
1771\n\\r
1772Return -1 on failure.");\r
1773\r
1774static PyObject *\r
1775string_rfind(PyStringObject *self, PyObject *args)\r
1776{\r
1777 Py_ssize_t result = string_find_internal(self, args, -1);\r
1778 if (result == -2)\r
1779 return NULL;\r
1780 return PyInt_FromSsize_t(result);\r
1781}\r
1782\r
1783\r
1784PyDoc_STRVAR(rindex__doc__,\r
1785"S.rindex(sub [,start [,end]]) -> int\n\\r
1786\n\\r
1787Like S.rfind() but raise ValueError when the substring is not found.");\r
1788\r
1789static PyObject *\r
1790string_rindex(PyStringObject *self, PyObject *args)\r
1791{\r
1792 Py_ssize_t result = string_find_internal(self, args, -1);\r
1793 if (result == -2)\r
1794 return NULL;\r
1795 if (result == -1) {\r
1796 PyErr_SetString(PyExc_ValueError,\r
1797 "substring not found");\r
1798 return NULL;\r
1799 }\r
1800 return PyInt_FromSsize_t(result);\r
1801}\r
1802\r
1803\r
1804Py_LOCAL_INLINE(PyObject *)\r
1805do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)\r
1806{\r
1807 char *s = PyString_AS_STRING(self);\r
1808 Py_ssize_t len = PyString_GET_SIZE(self);\r
1809 char *sep = PyString_AS_STRING(sepobj);\r
1810 Py_ssize_t seplen = PyString_GET_SIZE(sepobj);\r
1811 Py_ssize_t i, j;\r
1812\r
1813 i = 0;\r
1814 if (striptype != RIGHTSTRIP) {\r
1815 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {\r
1816 i++;\r
1817 }\r
1818 }\r
1819\r
1820 j = len;\r
1821 if (striptype != LEFTSTRIP) {\r
1822 do {\r
1823 j--;\r
1824 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));\r
1825 j++;\r
1826 }\r
1827\r
1828 if (i == 0 && j == len && PyString_CheckExact(self)) {\r
1829 Py_INCREF(self);\r
1830 return (PyObject*)self;\r
1831 }\r
1832 else\r
1833 return PyString_FromStringAndSize(s+i, j-i);\r
1834}\r
1835\r
1836\r
1837Py_LOCAL_INLINE(PyObject *)\r
1838do_strip(PyStringObject *self, int striptype)\r
1839{\r
1840 char *s = PyString_AS_STRING(self);\r
1841 Py_ssize_t len = PyString_GET_SIZE(self), i, j;\r
1842\r
1843 i = 0;\r
1844 if (striptype != RIGHTSTRIP) {\r
1845 while (i < len && isspace(Py_CHARMASK(s[i]))) {\r
1846 i++;\r
1847 }\r
1848 }\r
1849\r
1850 j = len;\r
1851 if (striptype != LEFTSTRIP) {\r
1852 do {\r
1853 j--;\r
1854 } while (j >= i && isspace(Py_CHARMASK(s[j])));\r
1855 j++;\r
1856 }\r
1857\r
1858 if (i == 0 && j == len && PyString_CheckExact(self)) {\r
1859 Py_INCREF(self);\r
1860 return (PyObject*)self;\r
1861 }\r
1862 else\r
1863 return PyString_FromStringAndSize(s+i, j-i);\r
1864}\r
1865\r
1866\r
1867Py_LOCAL_INLINE(PyObject *)\r
1868do_argstrip(PyStringObject *self, int striptype, PyObject *args)\r
1869{\r
1870 PyObject *sep = NULL;\r
1871\r
1872 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))\r
1873 return NULL;\r
1874\r
1875 if (sep != NULL && sep != Py_None) {\r
1876 if (PyString_Check(sep))\r
1877 return do_xstrip(self, striptype, sep);\r
1878#ifdef Py_USING_UNICODE\r
1879 else if (PyUnicode_Check(sep)) {\r
1880 PyObject *uniself = PyUnicode_FromObject((PyObject *)self);\r
1881 PyObject *res;\r
1882 if (uniself==NULL)\r
1883 return NULL;\r
1884 res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,\r
1885 striptype, sep);\r
1886 Py_DECREF(uniself);\r
1887 return res;\r
1888 }\r
1889#endif\r
1890 PyErr_Format(PyExc_TypeError,\r
1891#ifdef Py_USING_UNICODE\r
1892 "%s arg must be None, str or unicode",\r
1893#else\r
1894 "%s arg must be None or str",\r
1895#endif\r
1896 STRIPNAME(striptype));\r
1897 return NULL;\r
1898 }\r
1899\r
1900 return do_strip(self, striptype);\r
1901}\r
1902\r
1903\r
1904PyDoc_STRVAR(strip__doc__,\r
1905"S.strip([chars]) -> string or unicode\n\\r
1906\n\\r
1907Return a copy of the string S with leading and trailing\n\\r
1908whitespace removed.\n\\r
1909If chars is given and not None, remove characters in chars instead.\n\\r
1910If chars is unicode, S will be converted to unicode before stripping");\r
1911\r
1912static PyObject *\r
1913string_strip(PyStringObject *self, PyObject *args)\r
1914{\r
1915 if (PyTuple_GET_SIZE(args) == 0)\r
1916 return do_strip(self, BOTHSTRIP); /* Common case */\r
1917 else\r
1918 return do_argstrip(self, BOTHSTRIP, args);\r
1919}\r
1920\r
1921\r
1922PyDoc_STRVAR(lstrip__doc__,\r
1923"S.lstrip([chars]) -> string or unicode\n\\r
1924\n\\r
1925Return a copy of the string S with leading whitespace removed.\n\\r
1926If chars is given and not None, remove characters in chars instead.\n\\r
1927If chars is unicode, S will be converted to unicode before stripping");\r
1928\r
1929static PyObject *\r
1930string_lstrip(PyStringObject *self, PyObject *args)\r
1931{\r
1932 if (PyTuple_GET_SIZE(args) == 0)\r
1933 return do_strip(self, LEFTSTRIP); /* Common case */\r
1934 else\r
1935 return do_argstrip(self, LEFTSTRIP, args);\r
1936}\r
1937\r
1938\r
1939PyDoc_STRVAR(rstrip__doc__,\r
1940"S.rstrip([chars]) -> string or unicode\n\\r
1941\n\\r
1942Return a copy of the string S with trailing whitespace removed.\n\\r
1943If chars is given and not None, remove characters in chars instead.\n\\r
1944If chars is unicode, S will be converted to unicode before stripping");\r
1945\r
1946static PyObject *\r
1947string_rstrip(PyStringObject *self, PyObject *args)\r
1948{\r
1949 if (PyTuple_GET_SIZE(args) == 0)\r
1950 return do_strip(self, RIGHTSTRIP); /* Common case */\r
1951 else\r
1952 return do_argstrip(self, RIGHTSTRIP, args);\r
1953}\r
1954\r
1955\r
1956PyDoc_STRVAR(lower__doc__,\r
1957"S.lower() -> string\n\\r
1958\n\\r
1959Return a copy of the string S converted to lowercase.");\r
1960\r
1961/* _tolower and _toupper are defined by SUSv2, but they're not ISO C */\r
1962#ifndef _tolower\r
1963#define _tolower tolower\r
1964#endif\r
1965\r
1966static PyObject *\r
1967string_lower(PyStringObject *self)\r
1968{\r
1969 char *s;\r
1970 Py_ssize_t i, n = PyString_GET_SIZE(self);\r
1971 PyObject *newobj;\r
1972\r
1973 newobj = PyString_FromStringAndSize(NULL, n);\r
1974 if (!newobj)\r
1975 return NULL;\r
1976\r
1977 s = PyString_AS_STRING(newobj);\r
1978\r
1979 Py_MEMCPY(s, PyString_AS_STRING(self), n);\r
1980\r
1981 for (i = 0; i < n; i++) {\r
1982 int c = Py_CHARMASK(s[i]);\r
1983 if (isupper(c))\r
1984 s[i] = _tolower(c);\r
1985 }\r
1986\r
1987 return newobj;\r
1988}\r
1989\r
1990PyDoc_STRVAR(upper__doc__,\r
1991"S.upper() -> string\n\\r
1992\n\\r
1993Return a copy of the string S converted to uppercase.");\r
1994\r
1995#ifndef _toupper\r
1996#define _toupper toupper\r
1997#endif\r
1998\r
1999static PyObject *\r
2000string_upper(PyStringObject *self)\r
2001{\r
2002 char *s;\r
2003 Py_ssize_t i, n = PyString_GET_SIZE(self);\r
2004 PyObject *newobj;\r
2005\r
2006 newobj = PyString_FromStringAndSize(NULL, n);\r
2007 if (!newobj)\r
2008 return NULL;\r
2009\r
2010 s = PyString_AS_STRING(newobj);\r
2011\r
2012 Py_MEMCPY(s, PyString_AS_STRING(self), n);\r
2013\r
2014 for (i = 0; i < n; i++) {\r
2015 int c = Py_CHARMASK(s[i]);\r
2016 if (islower(c))\r
2017 s[i] = _toupper(c);\r
2018 }\r
2019\r
2020 return newobj;\r
2021}\r
2022\r
2023PyDoc_STRVAR(title__doc__,\r
2024"S.title() -> string\n\\r
2025\n\\r
2026Return a titlecased version of S, i.e. words start with uppercase\n\\r
2027characters, all remaining cased characters have lowercase.");\r
2028\r
2029static PyObject*\r
2030string_title(PyStringObject *self)\r
2031{\r
2032 char *s = PyString_AS_STRING(self), *s_new;\r
2033 Py_ssize_t i, n = PyString_GET_SIZE(self);\r
2034 int previous_is_cased = 0;\r
2035 PyObject *newobj;\r
2036\r
2037 newobj = PyString_FromStringAndSize(NULL, n);\r
2038 if (newobj == NULL)\r
2039 return NULL;\r
2040 s_new = PyString_AsString(newobj);\r
2041 for (i = 0; i < n; i++) {\r
2042 int c = Py_CHARMASK(*s++);\r
2043 if (islower(c)) {\r
2044 if (!previous_is_cased)\r
2045 c = toupper(c);\r
2046 previous_is_cased = 1;\r
2047 } else if (isupper(c)) {\r
2048 if (previous_is_cased)\r
2049 c = tolower(c);\r
2050 previous_is_cased = 1;\r
2051 } else\r
2052 previous_is_cased = 0;\r
2053 *s_new++ = c;\r
2054 }\r
2055 return newobj;\r
2056}\r
2057\r
2058PyDoc_STRVAR(capitalize__doc__,\r
2059"S.capitalize() -> string\n\\r
2060\n\\r
2061Return a copy of the string S with only its first character\n\\r
2062capitalized.");\r
2063\r
2064static PyObject *\r
2065string_capitalize(PyStringObject *self)\r
2066{\r
2067 char *s = PyString_AS_STRING(self), *s_new;\r
2068 Py_ssize_t i, n = PyString_GET_SIZE(self);\r
2069 PyObject *newobj;\r
2070\r
2071 newobj = PyString_FromStringAndSize(NULL, n);\r
2072 if (newobj == NULL)\r
2073 return NULL;\r
2074 s_new = PyString_AsString(newobj);\r
2075 if (0 < n) {\r
2076 int c = Py_CHARMASK(*s++);\r
2077 if (islower(c))\r
2078 *s_new = toupper(c);\r
2079 else\r
2080 *s_new = c;\r
2081 s_new++;\r
2082 }\r
2083 for (i = 1; i < n; i++) {\r
2084 int c = Py_CHARMASK(*s++);\r
2085 if (isupper(c))\r
2086 *s_new = tolower(c);\r
2087 else\r
2088 *s_new = c;\r
2089 s_new++;\r
2090 }\r
2091 return newobj;\r
2092}\r
2093\r
2094\r
2095PyDoc_STRVAR(count__doc__,\r
2096"S.count(sub[, start[, end]]) -> int\n\\r
2097\n\\r
2098Return the number of non-overlapping occurrences of substring sub in\n\\r
2099string S[start:end]. Optional arguments start and end are interpreted\n\\r
2100as in slice notation.");\r
2101\r
2102static PyObject *\r
2103string_count(PyStringObject *self, PyObject *args)\r
2104{\r
2105 PyObject *sub_obj;\r
2106 const char *str = PyString_AS_STRING(self), *sub;\r
2107 Py_ssize_t sub_len;\r
2108 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;\r
2109\r
2110 if (!stringlib_parse_args_finds("count", args, &sub_obj, &start, &end))\r
2111 return NULL;\r
2112\r
2113 if (PyString_Check(sub_obj)) {\r
2114 sub = PyString_AS_STRING(sub_obj);\r
2115 sub_len = PyString_GET_SIZE(sub_obj);\r
2116 }\r
2117#ifdef Py_USING_UNICODE\r
2118 else if (PyUnicode_Check(sub_obj)) {\r
2119 Py_ssize_t count;\r
2120 count = PyUnicode_Count((PyObject *)self, sub_obj, start, end);\r
2121 if (count == -1)\r
2122 return NULL;\r
2123 else\r
2124 return PyInt_FromSsize_t(count);\r
2125 }\r
2126#endif\r
2127 else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))\r
2128 return NULL;\r
2129\r
2130 ADJUST_INDICES(start, end, PyString_GET_SIZE(self));\r
2131\r
2132 return PyInt_FromSsize_t(\r
2133 stringlib_count(str + start, end - start, sub, sub_len, PY_SSIZE_T_MAX)\r
2134 );\r
2135}\r
2136\r
2137PyDoc_STRVAR(swapcase__doc__,\r
2138"S.swapcase() -> string\n\\r
2139\n\\r
2140Return a copy of the string S with uppercase characters\n\\r
2141converted to lowercase and vice versa.");\r
2142\r
2143static PyObject *\r
2144string_swapcase(PyStringObject *self)\r
2145{\r
2146 char *s = PyString_AS_STRING(self), *s_new;\r
2147 Py_ssize_t i, n = PyString_GET_SIZE(self);\r
2148 PyObject *newobj;\r
2149\r
2150 newobj = PyString_FromStringAndSize(NULL, n);\r
2151 if (newobj == NULL)\r
2152 return NULL;\r
2153 s_new = PyString_AsString(newobj);\r
2154 for (i = 0; i < n; i++) {\r
2155 int c = Py_CHARMASK(*s++);\r
2156 if (islower(c)) {\r
2157 *s_new = toupper(c);\r
2158 }\r
2159 else if (isupper(c)) {\r
2160 *s_new = tolower(c);\r
2161 }\r
2162 else\r
2163 *s_new = c;\r
2164 s_new++;\r
2165 }\r
2166 return newobj;\r
2167}\r
2168\r
2169\r
2170PyDoc_STRVAR(translate__doc__,\r
2171"S.translate(table [,deletechars]) -> string\n\\r
2172\n\\r
2173Return a copy of the string S, where all characters occurring\n\\r
2174in the optional argument deletechars are removed, and the\n\\r
2175remaining characters have been mapped through the given\n\\r
2176translation table, which must be a string of length 256.");\r
2177\r
2178static PyObject *\r
2179string_translate(PyStringObject *self, PyObject *args)\r
2180{\r
2181 register char *input, *output;\r
2182 const char *table;\r
2183 register Py_ssize_t i, c, changed = 0;\r
2184 PyObject *input_obj = (PyObject*)self;\r
2185 const char *output_start, *del_table=NULL;\r
2186 Py_ssize_t inlen, tablen, dellen = 0;\r
2187 PyObject *result;\r
2188 int trans_table[256];\r
2189 PyObject *tableobj, *delobj = NULL;\r
2190\r
2191 if (!PyArg_UnpackTuple(args, "translate", 1, 2,\r
2192 &tableobj, &delobj))\r
2193 return NULL;\r
2194\r
2195 if (PyString_Check(tableobj)) {\r
2196 table = PyString_AS_STRING(tableobj);\r
2197 tablen = PyString_GET_SIZE(tableobj);\r
2198 }\r
2199 else if (tableobj == Py_None) {\r
2200 table = NULL;\r
2201 tablen = 256;\r
2202 }\r
2203#ifdef Py_USING_UNICODE\r
2204 else if (PyUnicode_Check(tableobj)) {\r
2205 /* Unicode .translate() does not support the deletechars\r
2206 parameter; instead a mapping to None will cause characters\r
2207 to be deleted. */\r
2208 if (delobj != NULL) {\r
2209 PyErr_SetString(PyExc_TypeError,\r
2210 "deletions are implemented differently for unicode");\r
2211 return NULL;\r
2212 }\r
2213 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);\r
2214 }\r
2215#endif\r
2216 else if (PyObject_AsCharBuffer(tableobj, &table, &tablen))\r
2217 return NULL;\r
2218\r
2219 if (tablen != 256) {\r
2220 PyErr_SetString(PyExc_ValueError,\r
2221 "translation table must be 256 characters long");\r
2222 return NULL;\r
2223 }\r
2224\r
2225 if (delobj != NULL) {\r
2226 if (PyString_Check(delobj)) {\r
2227 del_table = PyString_AS_STRING(delobj);\r
2228 dellen = PyString_GET_SIZE(delobj);\r
2229 }\r
2230#ifdef Py_USING_UNICODE\r
2231 else if (PyUnicode_Check(delobj)) {\r
2232 PyErr_SetString(PyExc_TypeError,\r
2233 "deletions are implemented differently for unicode");\r
2234 return NULL;\r
2235 }\r
2236#endif\r
2237 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))\r
2238 return NULL;\r
2239 }\r
2240 else {\r
2241 del_table = NULL;\r
2242 dellen = 0;\r
2243 }\r
2244\r
2245 inlen = PyString_GET_SIZE(input_obj);\r
2246 result = PyString_FromStringAndSize((char *)NULL, inlen);\r
2247 if (result == NULL)\r
2248 return NULL;\r
2249 output_start = output = PyString_AsString(result);\r
2250 input = PyString_AS_STRING(input_obj);\r
2251\r
2252 if (dellen == 0 && table != NULL) {\r
2253 /* If no deletions are required, use faster code */\r
2254 for (i = inlen; --i >= 0; ) {\r
2255 c = Py_CHARMASK(*input++);\r
2256 if (Py_CHARMASK((*output++ = table[c])) != c)\r
2257 changed = 1;\r
2258 }\r
2259 if (changed || !PyString_CheckExact(input_obj))\r
2260 return result;\r
2261 Py_DECREF(result);\r
2262 Py_INCREF(input_obj);\r
2263 return input_obj;\r
2264 }\r
2265\r
2266 if (table == NULL) {\r
2267 for (i = 0; i < 256; i++)\r
2268 trans_table[i] = Py_CHARMASK(i);\r
2269 } else {\r
2270 for (i = 0; i < 256; i++)\r
2271 trans_table[i] = Py_CHARMASK(table[i]);\r
2272 }\r
2273\r
2274 for (i = 0; i < dellen; i++)\r
2275 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;\r
2276\r
2277 for (i = inlen; --i >= 0; ) {\r
2278 c = Py_CHARMASK(*input++);\r
2279 if (trans_table[c] != -1)\r
2280 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)\r
2281 continue;\r
2282 changed = 1;\r
2283 }\r
2284 if (!changed && PyString_CheckExact(input_obj)) {\r
2285 Py_DECREF(result);\r
2286 Py_INCREF(input_obj);\r
2287 return input_obj;\r
2288 }\r
2289 /* Fix the size of the resulting string */\r
2290 if (inlen > 0 && _PyString_Resize(&result, output - output_start))\r
2291 return NULL;\r
2292 return result;\r
2293}\r
2294\r
2295\r
2296/* find and count characters and substrings */\r
2297\r
2298#define findchar(target, target_len, c) \\r
2299 ((char *)memchr((const void *)(target), c, target_len))\r
2300\r
2301/* String ops must return a string. */\r
2302/* If the object is subclass of string, create a copy */\r
2303Py_LOCAL(PyStringObject *)\r
2304return_self(PyStringObject *self)\r
2305{\r
2306 if (PyString_CheckExact(self)) {\r
2307 Py_INCREF(self);\r
2308 return self;\r
2309 }\r
2310 return (PyStringObject *)PyString_FromStringAndSize(\r
2311 PyString_AS_STRING(self),\r
2312 PyString_GET_SIZE(self));\r
2313}\r
2314\r
2315Py_LOCAL_INLINE(Py_ssize_t)\r
2316countchar(const char *target, int target_len, char c, Py_ssize_t maxcount)\r
2317{\r
2318 Py_ssize_t count=0;\r
2319 const char *start=target;\r
2320 const char *end=target+target_len;\r
2321\r
2322 while ( (start=findchar(start, end-start, c)) != NULL ) {\r
2323 count++;\r
2324 if (count >= maxcount)\r
2325 break;\r
2326 start += 1;\r
2327 }\r
2328 return count;\r
2329}\r
2330\r
2331\r
2332/* Algorithms for different cases of string replacement */\r
2333\r
2334/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */\r
2335Py_LOCAL(PyStringObject *)\r
2336replace_interleave(PyStringObject *self,\r
2337 const char *to_s, Py_ssize_t to_len,\r
2338 Py_ssize_t maxcount)\r
2339{\r
2340 char *self_s, *result_s;\r
2341 Py_ssize_t self_len, result_len;\r
2342 Py_ssize_t count, i, product;\r
2343 PyStringObject *result;\r
2344\r
2345 self_len = PyString_GET_SIZE(self);\r
2346\r
2347 /* 1 at the end plus 1 after every character */\r
2348 count = self_len+1;\r
2349 if (maxcount < count)\r
2350 count = maxcount;\r
2351\r
2352 /* Check for overflow */\r
2353 /* result_len = count * to_len + self_len; */\r
2354 product = count * to_len;\r
2355 if (product / to_len != count) {\r
2356 PyErr_SetString(PyExc_OverflowError,\r
2357 "replace string is too long");\r
2358 return NULL;\r
2359 }\r
2360 result_len = product + self_len;\r
2361 if (result_len < 0) {\r
2362 PyErr_SetString(PyExc_OverflowError,\r
2363 "replace string is too long");\r
2364 return NULL;\r
2365 }\r
2366\r
2367 if (! (result = (PyStringObject *)\r
2368 PyString_FromStringAndSize(NULL, result_len)) )\r
2369 return NULL;\r
2370\r
2371 self_s = PyString_AS_STRING(self);\r
2372 result_s = PyString_AS_STRING(result);\r
2373\r
2374 /* TODO: special case single character, which doesn't need memcpy */\r
2375\r
2376 /* Lay the first one down (guaranteed this will occur) */\r
2377 Py_MEMCPY(result_s, to_s, to_len);\r
2378 result_s += to_len;\r
2379 count -= 1;\r
2380\r
2381 for (i=0; i<count; i++) {\r
2382 *result_s++ = *self_s++;\r
2383 Py_MEMCPY(result_s, to_s, to_len);\r
2384 result_s += to_len;\r
2385 }\r
2386\r
2387 /* Copy the rest of the original string */\r
2388 Py_MEMCPY(result_s, self_s, self_len-i);\r
2389\r
2390 return result;\r
2391}\r
2392\r
2393/* Special case for deleting a single character */\r
2394/* len(self)>=1, len(from)==1, to="", maxcount>=1 */\r
2395Py_LOCAL(PyStringObject *)\r
2396replace_delete_single_character(PyStringObject *self,\r
2397 char from_c, Py_ssize_t maxcount)\r
2398{\r
2399 char *self_s, *result_s;\r
2400 char *start, *next, *end;\r
2401 Py_ssize_t self_len, result_len;\r
2402 Py_ssize_t count;\r
2403 PyStringObject *result;\r
2404\r
2405 self_len = PyString_GET_SIZE(self);\r
2406 self_s = PyString_AS_STRING(self);\r
2407\r
2408 count = countchar(self_s, self_len, from_c, maxcount);\r
2409 if (count == 0) {\r
2410 return return_self(self);\r
2411 }\r
2412\r
2413 result_len = self_len - count; /* from_len == 1 */\r
2414 assert(result_len>=0);\r
2415\r
2416 if ( (result = (PyStringObject *)\r
2417 PyString_FromStringAndSize(NULL, result_len)) == NULL)\r
2418 return NULL;\r
2419 result_s = PyString_AS_STRING(result);\r
2420\r
2421 start = self_s;\r
2422 end = self_s + self_len;\r
2423 while (count-- > 0) {\r
2424 next = findchar(start, end-start, from_c);\r
2425 if (next == NULL)\r
2426 break;\r
2427 Py_MEMCPY(result_s, start, next-start);\r
2428 result_s += (next-start);\r
2429 start = next+1;\r
2430 }\r
2431 Py_MEMCPY(result_s, start, end-start);\r
2432\r
2433 return result;\r
2434}\r
2435\r
2436/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */\r
2437\r
2438Py_LOCAL(PyStringObject *)\r
2439replace_delete_substring(PyStringObject *self,\r
2440 const char *from_s, Py_ssize_t from_len,\r
2441 Py_ssize_t maxcount) {\r
2442 char *self_s, *result_s;\r
2443 char *start, *next, *end;\r
2444 Py_ssize_t self_len, result_len;\r
2445 Py_ssize_t count, offset;\r
2446 PyStringObject *result;\r
2447\r
2448 self_len = PyString_GET_SIZE(self);\r
2449 self_s = PyString_AS_STRING(self);\r
2450\r
2451 count = stringlib_count(self_s, self_len,\r
2452 from_s, from_len,\r
2453 maxcount);\r
2454\r
2455 if (count == 0) {\r
2456 /* no matches */\r
2457 return return_self(self);\r
2458 }\r
2459\r
2460 result_len = self_len - (count * from_len);\r
2461 assert (result_len>=0);\r
2462\r
2463 if ( (result = (PyStringObject *)\r
2464 PyString_FromStringAndSize(NULL, result_len)) == NULL )\r
2465 return NULL;\r
2466\r
2467 result_s = PyString_AS_STRING(result);\r
2468\r
2469 start = self_s;\r
2470 end = self_s + self_len;\r
2471 while (count-- > 0) {\r
2472 offset = stringlib_find(start, end-start,\r
2473 from_s, from_len,\r
2474 0);\r
2475 if (offset == -1)\r
2476 break;\r
2477 next = start + offset;\r
2478\r
2479 Py_MEMCPY(result_s, start, next-start);\r
2480\r
2481 result_s += (next-start);\r
2482 start = next+from_len;\r
2483 }\r
2484 Py_MEMCPY(result_s, start, end-start);\r
2485 return result;\r
2486}\r
2487\r
2488/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */\r
2489Py_LOCAL(PyStringObject *)\r
2490replace_single_character_in_place(PyStringObject *self,\r
2491 char from_c, char to_c,\r
2492 Py_ssize_t maxcount)\r
2493{\r
2494 char *self_s, *result_s, *start, *end, *next;\r
2495 Py_ssize_t self_len;\r
2496 PyStringObject *result;\r
2497\r
2498 /* The result string will be the same size */\r
2499 self_s = PyString_AS_STRING(self);\r
2500 self_len = PyString_GET_SIZE(self);\r
2501\r
2502 next = findchar(self_s, self_len, from_c);\r
2503\r
2504 if (next == NULL) {\r
2505 /* No matches; return the original string */\r
2506 return return_self(self);\r
2507 }\r
2508\r
2509 /* Need to make a new string */\r
2510 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);\r
2511 if (result == NULL)\r
2512 return NULL;\r
2513 result_s = PyString_AS_STRING(result);\r
2514 Py_MEMCPY(result_s, self_s, self_len);\r
2515\r
2516 /* change everything in-place, starting with this one */\r
2517 start = result_s + (next-self_s);\r
2518 *start = to_c;\r
2519 start++;\r
2520 end = result_s + self_len;\r
2521\r
2522 while (--maxcount > 0) {\r
2523 next = findchar(start, end-start, from_c);\r
2524 if (next == NULL)\r
2525 break;\r
2526 *next = to_c;\r
2527 start = next+1;\r
2528 }\r
2529\r
2530 return result;\r
2531}\r
2532\r
2533/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */\r
2534Py_LOCAL(PyStringObject *)\r
2535replace_substring_in_place(PyStringObject *self,\r
2536 const char *from_s, Py_ssize_t from_len,\r
2537 const char *to_s, Py_ssize_t to_len,\r
2538 Py_ssize_t maxcount)\r
2539{\r
2540 char *result_s, *start, *end;\r
2541 char *self_s;\r
2542 Py_ssize_t self_len, offset;\r
2543 PyStringObject *result;\r
2544\r
2545 /* The result string will be the same size */\r
2546\r
2547 self_s = PyString_AS_STRING(self);\r
2548 self_len = PyString_GET_SIZE(self);\r
2549\r
2550 offset = stringlib_find(self_s, self_len,\r
2551 from_s, from_len,\r
2552 0);\r
2553 if (offset == -1) {\r
2554 /* No matches; return the original string */\r
2555 return return_self(self);\r
2556 }\r
2557\r
2558 /* Need to make a new string */\r
2559 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);\r
2560 if (result == NULL)\r
2561 return NULL;\r
2562 result_s = PyString_AS_STRING(result);\r
2563 Py_MEMCPY(result_s, self_s, self_len);\r
2564\r
2565 /* change everything in-place, starting with this one */\r
2566 start = result_s + offset;\r
2567 Py_MEMCPY(start, to_s, from_len);\r
2568 start += from_len;\r
2569 end = result_s + self_len;\r
2570\r
2571 while ( --maxcount > 0) {\r
2572 offset = stringlib_find(start, end-start,\r
2573 from_s, from_len,\r
2574 0);\r
2575 if (offset==-1)\r
2576 break;\r
2577 Py_MEMCPY(start+offset, to_s, from_len);\r
2578 start += offset+from_len;\r
2579 }\r
2580\r
2581 return result;\r
2582}\r
2583\r
2584/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */\r
2585Py_LOCAL(PyStringObject *)\r
2586replace_single_character(PyStringObject *self,\r
2587 char from_c,\r
2588 const char *to_s, Py_ssize_t to_len,\r
2589 Py_ssize_t maxcount)\r
2590{\r
2591 char *self_s, *result_s;\r
2592 char *start, *next, *end;\r
2593 Py_ssize_t self_len, result_len;\r
2594 Py_ssize_t count, product;\r
2595 PyStringObject *result;\r
2596\r
2597 self_s = PyString_AS_STRING(self);\r
2598 self_len = PyString_GET_SIZE(self);\r
2599\r
2600 count = countchar(self_s, self_len, from_c, maxcount);\r
2601 if (count == 0) {\r
2602 /* no matches, return unchanged */\r
2603 return return_self(self);\r
2604 }\r
2605\r
2606 /* use the difference between current and new, hence the "-1" */\r
2607 /* result_len = self_len + count * (to_len-1) */\r
2608 product = count * (to_len-1);\r
2609 if (product / (to_len-1) != count) {\r
2610 PyErr_SetString(PyExc_OverflowError, "replace string is too long");\r
2611 return NULL;\r
2612 }\r
2613 result_len = self_len + product;\r
2614 if (result_len < 0) {\r
2615 PyErr_SetString(PyExc_OverflowError, "replace string is too long");\r
2616 return NULL;\r
2617 }\r
2618\r
2619 if ( (result = (PyStringObject *)\r
2620 PyString_FromStringAndSize(NULL, result_len)) == NULL)\r
2621 return NULL;\r
2622 result_s = PyString_AS_STRING(result);\r
2623\r
2624 start = self_s;\r
2625 end = self_s + self_len;\r
2626 while (count-- > 0) {\r
2627 next = findchar(start, end-start, from_c);\r
2628 if (next == NULL)\r
2629 break;\r
2630\r
2631 if (next == start) {\r
2632 /* replace with the 'to' */\r
2633 Py_MEMCPY(result_s, to_s, to_len);\r
2634 result_s += to_len;\r
2635 start += 1;\r
2636 } else {\r
2637 /* copy the unchanged old then the 'to' */\r
2638 Py_MEMCPY(result_s, start, next-start);\r
2639 result_s += (next-start);\r
2640 Py_MEMCPY(result_s, to_s, to_len);\r
2641 result_s += to_len;\r
2642 start = next+1;\r
2643 }\r
2644 }\r
2645 /* Copy the remainder of the remaining string */\r
2646 Py_MEMCPY(result_s, start, end-start);\r
2647\r
2648 return result;\r
2649}\r
2650\r
2651/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */\r
2652Py_LOCAL(PyStringObject *)\r
2653replace_substring(PyStringObject *self,\r
2654 const char *from_s, Py_ssize_t from_len,\r
2655 const char *to_s, Py_ssize_t to_len,\r
2656 Py_ssize_t maxcount) {\r
2657 char *self_s, *result_s;\r
2658 char *start, *next, *end;\r
2659 Py_ssize_t self_len, result_len;\r
2660 Py_ssize_t count, offset, product;\r
2661 PyStringObject *result;\r
2662\r
2663 self_s = PyString_AS_STRING(self);\r
2664 self_len = PyString_GET_SIZE(self);\r
2665\r
2666 count = stringlib_count(self_s, self_len,\r
2667 from_s, from_len,\r
2668 maxcount);\r
2669\r
2670 if (count == 0) {\r
2671 /* no matches, return unchanged */\r
2672 return return_self(self);\r
2673 }\r
2674\r
2675 /* Check for overflow */\r
2676 /* result_len = self_len + count * (to_len-from_len) */\r
2677 product = count * (to_len-from_len);\r
2678 if (product / (to_len-from_len) != count) {\r
2679 PyErr_SetString(PyExc_OverflowError, "replace string is too long");\r
2680 return NULL;\r
2681 }\r
2682 result_len = self_len + product;\r
2683 if (result_len < 0) {\r
2684 PyErr_SetString(PyExc_OverflowError, "replace string is too long");\r
2685 return NULL;\r
2686 }\r
2687\r
2688 if ( (result = (PyStringObject *)\r
2689 PyString_FromStringAndSize(NULL, result_len)) == NULL)\r
2690 return NULL;\r
2691 result_s = PyString_AS_STRING(result);\r
2692\r
2693 start = self_s;\r
2694 end = self_s + self_len;\r
2695 while (count-- > 0) {\r
2696 offset = stringlib_find(start, end-start,\r
2697 from_s, from_len,\r
2698 0);\r
2699 if (offset == -1)\r
2700 break;\r
2701 next = start+offset;\r
2702 if (next == start) {\r
2703 /* replace with the 'to' */\r
2704 Py_MEMCPY(result_s, to_s, to_len);\r
2705 result_s += to_len;\r
2706 start += from_len;\r
2707 } else {\r
2708 /* copy the unchanged old then the 'to' */\r
2709 Py_MEMCPY(result_s, start, next-start);\r
2710 result_s += (next-start);\r
2711 Py_MEMCPY(result_s, to_s, to_len);\r
2712 result_s += to_len;\r
2713 start = next+from_len;\r
2714 }\r
2715 }\r
2716 /* Copy the remainder of the remaining string */\r
2717 Py_MEMCPY(result_s, start, end-start);\r
2718\r
2719 return result;\r
2720}\r
2721\r
2722\r
2723Py_LOCAL(PyStringObject *)\r
2724replace(PyStringObject *self,\r
2725 const char *from_s, Py_ssize_t from_len,\r
2726 const char *to_s, Py_ssize_t to_len,\r
2727 Py_ssize_t maxcount)\r
2728{\r
2729 if (maxcount < 0) {\r
2730 maxcount = PY_SSIZE_T_MAX;\r
2731 } else if (maxcount == 0 || PyString_GET_SIZE(self) == 0) {\r
2732 /* nothing to do; return the original string */\r
2733 return return_self(self);\r
2734 }\r
2735\r
2736 if (maxcount == 0 ||\r
2737 (from_len == 0 && to_len == 0)) {\r
2738 /* nothing to do; return the original string */\r
2739 return return_self(self);\r
2740 }\r
2741\r
2742 /* Handle zero-length special cases */\r
2743\r
2744 if (from_len == 0) {\r
2745 /* insert the 'to' string everywhere. */\r
2746 /* >>> "Python".replace("", ".") */\r
2747 /* '.P.y.t.h.o.n.' */\r
2748 return replace_interleave(self, to_s, to_len, maxcount);\r
2749 }\r
2750\r
2751 /* Except for "".replace("", "A") == "A" there is no way beyond this */\r
2752 /* point for an empty self string to generate a non-empty string */\r
2753 /* Special case so the remaining code always gets a non-empty string */\r
2754 if (PyString_GET_SIZE(self) == 0) {\r
2755 return return_self(self);\r
2756 }\r
2757\r
2758 if (to_len == 0) {\r
2759 /* delete all occurances of 'from' string */\r
2760 if (from_len == 1) {\r
2761 return replace_delete_single_character(\r
2762 self, from_s[0], maxcount);\r
2763 } else {\r
2764 return replace_delete_substring(self, from_s, from_len, maxcount);\r
2765 }\r
2766 }\r
2767\r
2768 /* Handle special case where both strings have the same length */\r
2769\r
2770 if (from_len == to_len) {\r
2771 if (from_len == 1) {\r
2772 return replace_single_character_in_place(\r
2773 self,\r
2774 from_s[0],\r
2775 to_s[0],\r
2776 maxcount);\r
2777 } else {\r
2778 return replace_substring_in_place(\r
2779 self, from_s, from_len, to_s, to_len, maxcount);\r
2780 }\r
2781 }\r
2782\r
2783 /* Otherwise use the more generic algorithms */\r
2784 if (from_len == 1) {\r
2785 return replace_single_character(self, from_s[0],\r
2786 to_s, to_len, maxcount);\r
2787 } else {\r
2788 /* len('from')>=2, len('to')>=1 */\r
2789 return replace_substring(self, from_s, from_len, to_s, to_len, maxcount);\r
2790 }\r
2791}\r
2792\r
2793PyDoc_STRVAR(replace__doc__,\r
2794"S.replace(old, new[, count]) -> string\n\\r
2795\n\\r
2796Return a copy of string S with all occurrences of substring\n\\r
2797old replaced by new. If the optional argument count is\n\\r
2798given, only the first count occurrences are replaced.");\r
2799\r
2800static PyObject *\r
2801string_replace(PyStringObject *self, PyObject *args)\r
2802{\r
2803 Py_ssize_t count = -1;\r
2804 PyObject *from, *to;\r
2805 const char *from_s, *to_s;\r
2806 Py_ssize_t from_len, to_len;\r
2807\r
2808 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))\r
2809 return NULL;\r
2810\r
2811 if (PyString_Check(from)) {\r
2812 from_s = PyString_AS_STRING(from);\r
2813 from_len = PyString_GET_SIZE(from);\r
2814 }\r
2815#ifdef Py_USING_UNICODE\r
2816 if (PyUnicode_Check(from))\r
2817 return PyUnicode_Replace((PyObject *)self,\r
2818 from, to, count);\r
2819#endif\r
2820 else if (PyObject_AsCharBuffer(from, &from_s, &from_len))\r
2821 return NULL;\r
2822\r
2823 if (PyString_Check(to)) {\r
2824 to_s = PyString_AS_STRING(to);\r
2825 to_len = PyString_GET_SIZE(to);\r
2826 }\r
2827#ifdef Py_USING_UNICODE\r
2828 else if (PyUnicode_Check(to))\r
2829 return PyUnicode_Replace((PyObject *)self,\r
2830 from, to, count);\r
2831#endif\r
2832 else if (PyObject_AsCharBuffer(to, &to_s, &to_len))\r
2833 return NULL;\r
2834\r
2835 return (PyObject *)replace((PyStringObject *) self,\r
2836 from_s, from_len,\r
2837 to_s, to_len, count);\r
2838}\r
2839\r
2840/** End DALKE **/\r
2841\r
2842/* Matches the end (direction >= 0) or start (direction < 0) of self\r
2843 * against substr, using the start and end arguments. Returns\r
2844 * -1 on error, 0 if not found and 1 if found.\r
2845 */\r
2846Py_LOCAL(int)\r
2847_string_tailmatch(PyStringObject *self, PyObject *substr, Py_ssize_t start,\r
2848 Py_ssize_t end, int direction)\r
2849{\r
2850 Py_ssize_t len = PyString_GET_SIZE(self);\r
2851 Py_ssize_t slen;\r
2852 const char* sub;\r
2853 const char* str;\r
2854\r
2855 if (PyString_Check(substr)) {\r
2856 sub = PyString_AS_STRING(substr);\r
2857 slen = PyString_GET_SIZE(substr);\r
2858 }\r
2859#ifdef Py_USING_UNICODE\r
2860 else if (PyUnicode_Check(substr))\r
2861 return PyUnicode_Tailmatch((PyObject *)self,\r
2862 substr, start, end, direction);\r
2863#endif\r
2864 else if (PyObject_AsCharBuffer(substr, &sub, &slen))\r
2865 return -1;\r
2866 str = PyString_AS_STRING(self);\r
2867\r
2868 ADJUST_INDICES(start, end, len);\r
2869\r
2870 if (direction < 0) {\r
2871 /* startswith */\r
2872 if (start+slen > len)\r
2873 return 0;\r
2874 } else {\r
2875 /* endswith */\r
2876 if (end-start < slen || start > len)\r
2877 return 0;\r
2878\r
2879 if (end-slen > start)\r
2880 start = end - slen;\r
2881 }\r
2882 if (end-start >= slen)\r
2883 return ! memcmp(str+start, sub, slen);\r
2884 return 0;\r
2885}\r
2886\r
2887\r
2888PyDoc_STRVAR(startswith__doc__,\r
2889"S.startswith(prefix[, start[, end]]) -> bool\n\\r
2890\n\\r
2891Return True if S starts with the specified prefix, False otherwise.\n\\r
2892With optional start, test S beginning at that position.\n\\r
2893With optional end, stop comparing S at that position.\n\\r
2894prefix can also be a tuple of strings to try.");\r
2895\r
2896static PyObject *\r
2897string_startswith(PyStringObject *self, PyObject *args)\r
2898{\r
2899 Py_ssize_t start = 0;\r
2900 Py_ssize_t end = PY_SSIZE_T_MAX;\r
2901 PyObject *subobj;\r
2902 int result;\r
2903\r
2904 if (!stringlib_parse_args_finds("startswith", args, &subobj, &start, &end))\r
2905 return NULL;\r
2906 if (PyTuple_Check(subobj)) {\r
2907 Py_ssize_t i;\r
2908 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {\r
2909 result = _string_tailmatch(self,\r
2910 PyTuple_GET_ITEM(subobj, i),\r
2911 start, end, -1);\r
2912 if (result == -1)\r
2913 return NULL;\r
2914 else if (result) {\r
2915 Py_RETURN_TRUE;\r
2916 }\r
2917 }\r
2918 Py_RETURN_FALSE;\r
2919 }\r
2920 result = _string_tailmatch(self, subobj, start, end, -1);\r
2921 if (result == -1) {\r
2922 if (PyErr_ExceptionMatches(PyExc_TypeError))\r
2923 PyErr_Format(PyExc_TypeError, "startswith first arg must be str, "\r
2924 "unicode, or tuple, not %s", Py_TYPE(subobj)->tp_name);\r
2925 return NULL;\r
2926 }\r
2927 else\r
2928 return PyBool_FromLong(result);\r
2929}\r
2930\r
2931\r
2932PyDoc_STRVAR(endswith__doc__,\r
2933"S.endswith(suffix[, start[, end]]) -> bool\n\\r
2934\n\\r
2935Return True if S ends with the specified suffix, False otherwise.\n\\r
2936With optional start, test S beginning at that position.\n\\r
2937With optional end, stop comparing S at that position.\n\\r
2938suffix can also be a tuple of strings to try.");\r
2939\r
2940static PyObject *\r
2941string_endswith(PyStringObject *self, PyObject *args)\r
2942{\r
2943 Py_ssize_t start = 0;\r
2944 Py_ssize_t end = PY_SSIZE_T_MAX;\r
2945 PyObject *subobj;\r
2946 int result;\r
2947\r
2948 if (!stringlib_parse_args_finds("endswith", args, &subobj, &start, &end))\r
2949 return NULL;\r
2950 if (PyTuple_Check(subobj)) {\r
2951 Py_ssize_t i;\r
2952 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {\r
2953 result = _string_tailmatch(self,\r
2954 PyTuple_GET_ITEM(subobj, i),\r
2955 start, end, +1);\r
2956 if (result == -1)\r
2957 return NULL;\r
2958 else if (result) {\r
2959 Py_RETURN_TRUE;\r
2960 }\r
2961 }\r
2962 Py_RETURN_FALSE;\r
2963 }\r
2964 result = _string_tailmatch(self, subobj, start, end, +1);\r
2965 if (result == -1) {\r
2966 if (PyErr_ExceptionMatches(PyExc_TypeError))\r
2967 PyErr_Format(PyExc_TypeError, "endswith first arg must be str, "\r
2968 "unicode, or tuple, not %s", Py_TYPE(subobj)->tp_name);\r
2969 return NULL;\r
2970 }\r
2971 else\r
2972 return PyBool_FromLong(result);\r
2973}\r
2974\r
2975\r
2976PyDoc_STRVAR(encode__doc__,\r
2977"S.encode([encoding[,errors]]) -> object\n\\r
2978\n\\r
2979Encodes S using the codec registered for encoding. encoding defaults\n\\r
2980to the default encoding. errors may be given to set a different error\n\\r
2981handling scheme. Default is 'strict' meaning that encoding errors raise\n\\r
2982a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\\r
2983'xmlcharrefreplace' as well as any other name registered with\n\\r
2984codecs.register_error that is able to handle UnicodeEncodeErrors.");\r
2985\r
2986static PyObject *\r
2987string_encode(PyStringObject *self, PyObject *args, PyObject *kwargs)\r
2988{\r
2989 static char *kwlist[] = {"encoding", "errors", 0};\r
2990 char *encoding = NULL;\r
2991 char *errors = NULL;\r
2992 PyObject *v;\r
2993\r
2994 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ss:encode",\r
2995 kwlist, &encoding, &errors))\r
2996 return NULL;\r
2997 v = PyString_AsEncodedObject((PyObject *)self, encoding, errors);\r
2998 if (v == NULL)\r
2999 goto onError;\r
3000 if (!PyString_Check(v) && !PyUnicode_Check(v)) {\r
3001 PyErr_Format(PyExc_TypeError,\r
3002 "encoder did not return a string/unicode object "\r
3003 "(type=%.400s)",\r
3004 Py_TYPE(v)->tp_name);\r
3005 Py_DECREF(v);\r
3006 return NULL;\r
3007 }\r
3008 return v;\r
3009\r
3010 onError:\r
3011 return NULL;\r
3012}\r
3013\r
3014\r
3015PyDoc_STRVAR(decode__doc__,\r
3016"S.decode([encoding[,errors]]) -> object\n\\r
3017\n\\r
3018Decodes S using the codec registered for encoding. encoding defaults\n\\r
3019to the default encoding. errors may be given to set a different error\n\\r
3020handling scheme. Default is 'strict' meaning that encoding errors raise\n\\r
3021a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\\r
3022as well as any other name registered with codecs.register_error that is\n\\r
3023able to handle UnicodeDecodeErrors.");\r
3024\r
3025static PyObject *\r
3026string_decode(PyStringObject *self, PyObject *args, PyObject *kwargs)\r
3027{\r
3028 static char *kwlist[] = {"encoding", "errors", 0};\r
3029 char *encoding = NULL;\r
3030 char *errors = NULL;\r
3031 PyObject *v;\r
3032\r
3033 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ss:decode",\r
3034 kwlist, &encoding, &errors))\r
3035 return NULL;\r
3036 v = PyString_AsDecodedObject((PyObject *)self, encoding, errors);\r
3037 if (v == NULL)\r
3038 goto onError;\r
3039 if (!PyString_Check(v) && !PyUnicode_Check(v)) {\r
3040 PyErr_Format(PyExc_TypeError,\r
3041 "decoder did not return a string/unicode object "\r
3042 "(type=%.400s)",\r
3043 Py_TYPE(v)->tp_name);\r
3044 Py_DECREF(v);\r
3045 return NULL;\r
3046 }\r
3047 return v;\r
3048\r
3049 onError:\r
3050 return NULL;\r
3051}\r
3052\r
3053\r
3054PyDoc_STRVAR(expandtabs__doc__,\r
3055"S.expandtabs([tabsize]) -> string\n\\r
3056\n\\r
3057Return a copy of S where all tab characters are expanded using spaces.\n\\r
3058If tabsize is not given, a tab size of 8 characters is assumed.");\r
3059\r
3060static PyObject*\r
3061string_expandtabs(PyStringObject *self, PyObject *args)\r
3062{\r
3063 const char *e, *p, *qe;\r
3064 char *q;\r
3065 Py_ssize_t i, j, incr;\r
3066 PyObject *u;\r
3067 int tabsize = 8;\r
3068\r
3069 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))\r
3070 return NULL;\r
3071\r
3072 /* First pass: determine size of output string */\r
3073 i = 0; /* chars up to and including most recent \n or \r */\r
3074 j = 0; /* chars since most recent \n or \r (use in tab calculations) */\r
3075 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self); /* end of input */\r
3076 for (p = PyString_AS_STRING(self); p < e; p++)\r
3077 if (*p == '\t') {\r
3078 if (tabsize > 0) {\r
3079 incr = tabsize - (j % tabsize);\r
3080 if (j > PY_SSIZE_T_MAX - incr)\r
3081 goto overflow1;\r
3082 j += incr;\r
3083 }\r
3084 }\r
3085 else {\r
3086 if (j > PY_SSIZE_T_MAX - 1)\r
3087 goto overflow1;\r
3088 j++;\r
3089 if (*p == '\n' || *p == '\r') {\r
3090 if (i > PY_SSIZE_T_MAX - j)\r
3091 goto overflow1;\r
3092 i += j;\r
3093 j = 0;\r
3094 }\r
3095 }\r
3096\r
3097 if (i > PY_SSIZE_T_MAX - j)\r
3098 goto overflow1;\r
3099\r
3100 /* Second pass: create output string and fill it */\r
3101 u = PyString_FromStringAndSize(NULL, i + j);\r
3102 if (!u)\r
3103 return NULL;\r
3104\r
3105 j = 0; /* same as in first pass */\r
3106 q = PyString_AS_STRING(u); /* next output char */\r
3107 qe = PyString_AS_STRING(u) + PyString_GET_SIZE(u); /* end of output */\r
3108\r
3109 for (p = PyString_AS_STRING(self); p < e; p++)\r
3110 if (*p == '\t') {\r
3111 if (tabsize > 0) {\r
3112 i = tabsize - (j % tabsize);\r
3113 j += i;\r
3114 while (i--) {\r
3115 if (q >= qe)\r
3116 goto overflow2;\r
3117 *q++ = ' ';\r
3118 }\r
3119 }\r
3120 }\r
3121 else {\r
3122 if (q >= qe)\r
3123 goto overflow2;\r
3124 *q++ = *p;\r
3125 j++;\r
3126 if (*p == '\n' || *p == '\r')\r
3127 j = 0;\r
3128 }\r
3129\r
3130 return u;\r
3131\r
3132 overflow2:\r
3133 Py_DECREF(u);\r
3134 overflow1:\r
3135 PyErr_SetString(PyExc_OverflowError, "new string is too long");\r
3136 return NULL;\r
3137}\r
3138\r
3139Py_LOCAL_INLINE(PyObject *)\r
3140pad(PyStringObject *self, Py_ssize_t left, Py_ssize_t right, char fill)\r
3141{\r
3142 PyObject *u;\r
3143\r
3144 if (left < 0)\r
3145 left = 0;\r
3146 if (right < 0)\r
3147 right = 0;\r
3148\r
3149 if (left == 0 && right == 0 && PyString_CheckExact(self)) {\r
3150 Py_INCREF(self);\r
3151 return (PyObject *)self;\r
3152 }\r
3153\r
3154 u = PyString_FromStringAndSize(NULL,\r
3155 left + PyString_GET_SIZE(self) + right);\r
3156 if (u) {\r
3157 if (left)\r
3158 memset(PyString_AS_STRING(u), fill, left);\r
3159 Py_MEMCPY(PyString_AS_STRING(u) + left,\r
3160 PyString_AS_STRING(self),\r
3161 PyString_GET_SIZE(self));\r
3162 if (right)\r
3163 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),\r
3164 fill, right);\r
3165 }\r
3166\r
3167 return u;\r
3168}\r
3169\r
3170PyDoc_STRVAR(ljust__doc__,\r
3171"S.ljust(width[, fillchar]) -> string\n"\r
3172"\n"\r
3173"Return S left-justified in a string of length width. Padding is\n"\r
3174"done using the specified fill character (default is a space).");\r
3175\r
3176static PyObject *\r
3177string_ljust(PyStringObject *self, PyObject *args)\r
3178{\r
3179 Py_ssize_t width;\r
3180 char fillchar = ' ';\r
3181\r
3182 if (!PyArg_ParseTuple(args, "n|c:ljust", &width, &fillchar))\r
3183 return NULL;\r
3184\r
3185 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {\r
3186 Py_INCREF(self);\r
3187 return (PyObject*) self;\r
3188 }\r
3189\r
3190 return pad(self, 0, width - PyString_GET_SIZE(self), fillchar);\r
3191}\r
3192\r
3193\r
3194PyDoc_STRVAR(rjust__doc__,\r
3195"S.rjust(width[, fillchar]) -> string\n"\r
3196"\n"\r
3197"Return S right-justified in a string of length width. Padding is\n"\r
3198"done using the specified fill character (default is a space)");\r
3199\r
3200static PyObject *\r
3201string_rjust(PyStringObject *self, PyObject *args)\r
3202{\r
3203 Py_ssize_t width;\r
3204 char fillchar = ' ';\r
3205\r
3206 if (!PyArg_ParseTuple(args, "n|c:rjust", &width, &fillchar))\r
3207 return NULL;\r
3208\r
3209 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {\r
3210 Py_INCREF(self);\r
3211 return (PyObject*) self;\r
3212 }\r
3213\r
3214 return pad(self, width - PyString_GET_SIZE(self), 0, fillchar);\r
3215}\r
3216\r
3217\r
3218PyDoc_STRVAR(center__doc__,\r
3219"S.center(width[, fillchar]) -> string\n"\r
3220"\n"\r
3221"Return S centered in a string of length width. Padding is\n"\r
3222"done using the specified fill character (default is a space)");\r
3223\r
3224static PyObject *\r
3225string_center(PyStringObject *self, PyObject *args)\r
3226{\r
3227 Py_ssize_t marg, left;\r
3228 Py_ssize_t width;\r
3229 char fillchar = ' ';\r
3230\r
3231 if (!PyArg_ParseTuple(args, "n|c:center", &width, &fillchar))\r
3232 return NULL;\r
3233\r
3234 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {\r
3235 Py_INCREF(self);\r
3236 return (PyObject*) self;\r
3237 }\r
3238\r
3239 marg = width - PyString_GET_SIZE(self);\r
3240 left = marg / 2 + (marg & width & 1);\r
3241\r
3242 return pad(self, left, marg - left, fillchar);\r
3243}\r
3244\r
3245PyDoc_STRVAR(zfill__doc__,\r
3246"S.zfill(width) -> string\n"\r
3247"\n"\r
3248"Pad a numeric string S with zeros on the left, to fill a field\n"\r
3249"of the specified width. The string S is never truncated.");\r
3250\r
3251static PyObject *\r
3252string_zfill(PyStringObject *self, PyObject *args)\r
3253{\r
3254 Py_ssize_t fill;\r
3255 PyObject *s;\r
3256 char *p;\r
3257 Py_ssize_t width;\r
3258\r
3259 if (!PyArg_ParseTuple(args, "n:zfill", &width))\r
3260 return NULL;\r
3261\r
3262 if (PyString_GET_SIZE(self) >= width) {\r
3263 if (PyString_CheckExact(self)) {\r
3264 Py_INCREF(self);\r
3265 return (PyObject*) self;\r
3266 }\r
3267 else\r
3268 return PyString_FromStringAndSize(\r
3269 PyString_AS_STRING(self),\r
3270 PyString_GET_SIZE(self)\r
3271 );\r
3272 }\r
3273\r
3274 fill = width - PyString_GET_SIZE(self);\r
3275\r
3276 s = pad(self, fill, 0, '0');\r
3277\r
3278 if (s == NULL)\r
3279 return NULL;\r
3280\r
3281 p = PyString_AS_STRING(s);\r
3282 if (p[fill] == '+' || p[fill] == '-') {\r
3283 /* move sign to beginning of string */\r
3284 p[0] = p[fill];\r
3285 p[fill] = '0';\r
3286 }\r
3287\r
3288 return (PyObject*) s;\r
3289}\r
3290\r
3291PyDoc_STRVAR(isspace__doc__,\r
3292"S.isspace() -> bool\n\\r
3293\n\\r
3294Return True if all characters in S are whitespace\n\\r
3295and there is at least one character in S, False otherwise.");\r
3296\r
3297static PyObject*\r
3298string_isspace(PyStringObject *self)\r
3299{\r
3300 register const unsigned char *p\r
3301 = (unsigned char *) PyString_AS_STRING(self);\r
3302 register const unsigned char *e;\r
3303\r
3304 /* Shortcut for single character strings */\r
3305 if (PyString_GET_SIZE(self) == 1 &&\r
3306 isspace(*p))\r
3307 return PyBool_FromLong(1);\r
3308\r
3309 /* Special case for empty strings */\r
3310 if (PyString_GET_SIZE(self) == 0)\r
3311 return PyBool_FromLong(0);\r
3312\r
3313 e = p + PyString_GET_SIZE(self);\r
3314 for (; p < e; p++) {\r
3315 if (!isspace(*p))\r
3316 return PyBool_FromLong(0);\r
3317 }\r
3318 return PyBool_FromLong(1);\r
3319}\r
3320\r
3321\r
3322PyDoc_STRVAR(isalpha__doc__,\r
3323"S.isalpha() -> bool\n\\r
3324\n\\r
3325Return True if all characters in S are alphabetic\n\\r
3326and there is at least one character in S, False otherwise.");\r
3327\r
3328static PyObject*\r
3329string_isalpha(PyStringObject *self)\r
3330{\r
3331 register const unsigned char *p\r
3332 = (unsigned char *) PyString_AS_STRING(self);\r
3333 register const unsigned char *e;\r
3334\r
3335 /* Shortcut for single character strings */\r
3336 if (PyString_GET_SIZE(self) == 1 &&\r
3337 isalpha(*p))\r
3338 return PyBool_FromLong(1);\r
3339\r
3340 /* Special case for empty strings */\r
3341 if (PyString_GET_SIZE(self) == 0)\r
3342 return PyBool_FromLong(0);\r
3343\r
3344 e = p + PyString_GET_SIZE(self);\r
3345 for (; p < e; p++) {\r
3346 if (!isalpha(*p))\r
3347 return PyBool_FromLong(0);\r
3348 }\r
3349 return PyBool_FromLong(1);\r
3350}\r
3351\r
3352\r
3353PyDoc_STRVAR(isalnum__doc__,\r
3354"S.isalnum() -> bool\n\\r
3355\n\\r
3356Return True if all characters in S are alphanumeric\n\\r
3357and there is at least one character in S, False otherwise.");\r
3358\r
3359static PyObject*\r
3360string_isalnum(PyStringObject *self)\r
3361{\r
3362 register const unsigned char *p\r
3363 = (unsigned char *) PyString_AS_STRING(self);\r
3364 register const unsigned char *e;\r
3365\r
3366 /* Shortcut for single character strings */\r
3367 if (PyString_GET_SIZE(self) == 1 &&\r
3368 isalnum(*p))\r
3369 return PyBool_FromLong(1);\r
3370\r
3371 /* Special case for empty strings */\r
3372 if (PyString_GET_SIZE(self) == 0)\r
3373 return PyBool_FromLong(0);\r
3374\r
3375 e = p + PyString_GET_SIZE(self);\r
3376 for (; p < e; p++) {\r
3377 if (!isalnum(*p))\r
3378 return PyBool_FromLong(0);\r
3379 }\r
3380 return PyBool_FromLong(1);\r
3381}\r
3382\r
3383\r
3384PyDoc_STRVAR(isdigit__doc__,\r
3385"S.isdigit() -> bool\n\\r
3386\n\\r
3387Return True if all characters in S are digits\n\\r
3388and there is at least one character in S, False otherwise.");\r
3389\r
3390static PyObject*\r
3391string_isdigit(PyStringObject *self)\r
3392{\r
3393 register const unsigned char *p\r
3394 = (unsigned char *) PyString_AS_STRING(self);\r
3395 register const unsigned char *e;\r
3396\r
3397 /* Shortcut for single character strings */\r
3398 if (PyString_GET_SIZE(self) == 1 &&\r
3399 isdigit(*p))\r
3400 return PyBool_FromLong(1);\r
3401\r
3402 /* Special case for empty strings */\r
3403 if (PyString_GET_SIZE(self) == 0)\r
3404 return PyBool_FromLong(0);\r
3405\r
3406 e = p + PyString_GET_SIZE(self);\r
3407 for (; p < e; p++) {\r
3408 if (!isdigit(*p))\r
3409 return PyBool_FromLong(0);\r
3410 }\r
3411 return PyBool_FromLong(1);\r
3412}\r
3413\r
3414\r
3415PyDoc_STRVAR(islower__doc__,\r
3416"S.islower() -> bool\n\\r
3417\n\\r
3418Return True if all cased characters in S are lowercase and there is\n\\r
3419at least one cased character in S, False otherwise.");\r
3420\r
3421static PyObject*\r
3422string_islower(PyStringObject *self)\r
3423{\r
3424 register const unsigned char *p\r
3425 = (unsigned char *) PyString_AS_STRING(self);\r
3426 register const unsigned char *e;\r
3427 int cased;\r
3428\r
3429 /* Shortcut for single character strings */\r
3430 if (PyString_GET_SIZE(self) == 1)\r
3431 return PyBool_FromLong(islower(*p) != 0);\r
3432\r
3433 /* Special case for empty strings */\r
3434 if (PyString_GET_SIZE(self) == 0)\r
3435 return PyBool_FromLong(0);\r
3436\r
3437 e = p + PyString_GET_SIZE(self);\r
3438 cased = 0;\r
3439 for (; p < e; p++) {\r
3440 if (isupper(*p))\r
3441 return PyBool_FromLong(0);\r
3442 else if (!cased && islower(*p))\r
3443 cased = 1;\r
3444 }\r
3445 return PyBool_FromLong(cased);\r
3446}\r
3447\r
3448\r
3449PyDoc_STRVAR(isupper__doc__,\r
3450"S.isupper() -> bool\n\\r
3451\n\\r
3452Return True if all cased characters in S are uppercase and there is\n\\r
3453at least one cased character in S, False otherwise.");\r
3454\r
3455static PyObject*\r
3456string_isupper(PyStringObject *self)\r
3457{\r
3458 register const unsigned char *p\r
3459 = (unsigned char *) PyString_AS_STRING(self);\r
3460 register const unsigned char *e;\r
3461 int cased;\r
3462\r
3463 /* Shortcut for single character strings */\r
3464 if (PyString_GET_SIZE(self) == 1)\r
3465 return PyBool_FromLong(isupper(*p) != 0);\r
3466\r
3467 /* Special case for empty strings */\r
3468 if (PyString_GET_SIZE(self) == 0)\r
3469 return PyBool_FromLong(0);\r
3470\r
3471 e = p + PyString_GET_SIZE(self);\r
3472 cased = 0;\r
3473 for (; p < e; p++) {\r
3474 if (islower(*p))\r
3475 return PyBool_FromLong(0);\r
3476 else if (!cased && isupper(*p))\r
3477 cased = 1;\r
3478 }\r
3479 return PyBool_FromLong(cased);\r
3480}\r
3481\r
3482\r
3483PyDoc_STRVAR(istitle__doc__,\r
3484"S.istitle() -> bool\n\\r
3485\n\\r
3486Return True if S is a titlecased string and there is at least one\n\\r
3487character in S, i.e. uppercase characters may only follow uncased\n\\r
3488characters and lowercase characters only cased ones. Return False\n\\r
3489otherwise.");\r
3490\r
3491static PyObject*\r
3492string_istitle(PyStringObject *self, PyObject *uncased)\r
3493{\r
3494 register const unsigned char *p\r
3495 = (unsigned char *) PyString_AS_STRING(self);\r
3496 register const unsigned char *e;\r
3497 int cased, previous_is_cased;\r
3498\r
3499 /* Shortcut for single character strings */\r
3500 if (PyString_GET_SIZE(self) == 1)\r
3501 return PyBool_FromLong(isupper(*p) != 0);\r
3502\r
3503 /* Special case for empty strings */\r
3504 if (PyString_GET_SIZE(self) == 0)\r
3505 return PyBool_FromLong(0);\r
3506\r
3507 e = p + PyString_GET_SIZE(self);\r
3508 cased = 0;\r
3509 previous_is_cased = 0;\r
3510 for (; p < e; p++) {\r
3511 register const unsigned char ch = *p;\r
3512\r
3513 if (isupper(ch)) {\r
3514 if (previous_is_cased)\r
3515 return PyBool_FromLong(0);\r
3516 previous_is_cased = 1;\r
3517 cased = 1;\r
3518 }\r
3519 else if (islower(ch)) {\r
3520 if (!previous_is_cased)\r
3521 return PyBool_FromLong(0);\r
3522 previous_is_cased = 1;\r
3523 cased = 1;\r
3524 }\r
3525 else\r
3526 previous_is_cased = 0;\r
3527 }\r
3528 return PyBool_FromLong(cased);\r
3529}\r
3530\r
3531\r
3532PyDoc_STRVAR(splitlines__doc__,\r
3533"S.splitlines([keepends]) -> list of strings\n\\r
3534\n\\r
3535Return a list of the lines in S, breaking at line boundaries.\n\\r
3536Line breaks are not included in the resulting list unless keepends\n\\r
3537is given and true.");\r
3538\r
3539static PyObject*\r
3540string_splitlines(PyStringObject *self, PyObject *args)\r
3541{\r
3542 int keepends = 0;\r
3543\r
3544 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))\r
3545 return NULL;\r
3546\r
3547 return stringlib_splitlines(\r
3548 (PyObject*) self, PyString_AS_STRING(self), PyString_GET_SIZE(self),\r
3549 keepends\r
3550 );\r
3551}\r
3552\r
3553PyDoc_STRVAR(sizeof__doc__,\r
3554"S.__sizeof__() -> size of S in memory, in bytes");\r
3555\r
3556static PyObject *\r
3557string_sizeof(PyStringObject *v)\r
3558{\r
3559 Py_ssize_t res;\r
3560 res = PyStringObject_SIZE + PyString_GET_SIZE(v) * Py_TYPE(v)->tp_itemsize;\r
3561 return PyInt_FromSsize_t(res);\r
3562}\r
3563\r
3564static PyObject *\r
3565string_getnewargs(PyStringObject *v)\r
3566{\r
3567 return Py_BuildValue("(s#)", v->ob_sval, Py_SIZE(v));\r
3568}\r
3569\r
3570\r
3571#include "stringlib/string_format.h"\r
3572\r
3573PyDoc_STRVAR(format__doc__,\r
3574"S.format(*args, **kwargs) -> string\n\\r
3575\n\\r
3576Return a formatted version of S, using substitutions from args and kwargs.\n\\r
3577The substitutions are identified by braces ('{' and '}').");\r
3578\r
3579static PyObject *\r
3580string__format__(PyObject* self, PyObject* args)\r
3581{\r
3582 PyObject *format_spec;\r
3583 PyObject *result = NULL;\r
3584 PyObject *tmp = NULL;\r
3585\r
3586 /* If 2.x, convert format_spec to the same type as value */\r
3587 /* This is to allow things like u''.format('') */\r
3588 if (!PyArg_ParseTuple(args, "O:__format__", &format_spec))\r
3589 goto done;\r
3590 if (!(PyString_Check(format_spec) || PyUnicode_Check(format_spec))) {\r
3591 PyErr_Format(PyExc_TypeError, "__format__ arg must be str "\r
3592 "or unicode, not %s", Py_TYPE(format_spec)->tp_name);\r
3593 goto done;\r
3594 }\r
3595 tmp = PyObject_Str(format_spec);\r
3596 if (tmp == NULL)\r
3597 goto done;\r
3598 format_spec = tmp;\r
3599\r
3600 result = _PyBytes_FormatAdvanced(self,\r
3601 PyString_AS_STRING(format_spec),\r
3602 PyString_GET_SIZE(format_spec));\r
3603done:\r
3604 Py_XDECREF(tmp);\r
3605 return result;\r
3606}\r
3607\r
3608PyDoc_STRVAR(p_format__doc__,\r
3609"S.__format__(format_spec) -> string\n\\r
3610\n\\r
3611Return a formatted version of S as described by format_spec.");\r
3612\r
3613\r
3614static PyMethodDef\r
3615string_methods[] = {\r
3616 /* Counterparts of the obsolete stropmodule functions; except\r
3617 string.maketrans(). */\r
3618 {"join", (PyCFunction)string_join, METH_O, join__doc__},\r
3619 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},\r
3620 {"rsplit", (PyCFunction)string_rsplit, METH_VARARGS, rsplit__doc__},\r
3621 {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},\r
3622 {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},\r
3623 {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},\r
3624 {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},\r
3625 {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},\r
3626 {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},\r
3627 {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},\r
3628 {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},\r
3629 {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},\r
3630 {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS,\r
3631 capitalize__doc__},\r
3632 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},\r
3633 {"endswith", (PyCFunction)string_endswith, METH_VARARGS,\r
3634 endswith__doc__},\r
3635 {"partition", (PyCFunction)string_partition, METH_O, partition__doc__},\r
3636 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},\r
3637 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},\r
3638 {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},\r
3639 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},\r
3640 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},\r
3641 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},\r
3642 {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},\r
3643 {"rpartition", (PyCFunction)string_rpartition, METH_O,\r
3644 rpartition__doc__},\r
3645 {"startswith", (PyCFunction)string_startswith, METH_VARARGS,\r
3646 startswith__doc__},\r
3647 {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},\r
3648 {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS,\r
3649 swapcase__doc__},\r
3650 {"translate", (PyCFunction)string_translate, METH_VARARGS,\r
3651 translate__doc__},\r
3652 {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},\r
3653 {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},\r
3654 {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},\r
3655 {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},\r
3656 {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},\r
3657 {"format", (PyCFunction) do_string_format, METH_VARARGS | METH_KEYWORDS, format__doc__},\r
3658 {"__format__", (PyCFunction) string__format__, METH_VARARGS, p_format__doc__},\r
3659 {"_formatter_field_name_split", (PyCFunction) formatter_field_name_split, METH_NOARGS},\r
3660 {"_formatter_parser", (PyCFunction) formatter_parser, METH_NOARGS},\r
3661 {"encode", (PyCFunction)string_encode, METH_VARARGS | METH_KEYWORDS, encode__doc__},\r
3662 {"decode", (PyCFunction)string_decode, METH_VARARGS | METH_KEYWORDS, decode__doc__},\r
3663 {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS,\r
3664 expandtabs__doc__},\r
3665 {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS,\r
3666 splitlines__doc__},\r
3667 {"__sizeof__", (PyCFunction)string_sizeof, METH_NOARGS,\r
3668 sizeof__doc__},\r
3669 {"__getnewargs__", (PyCFunction)string_getnewargs, METH_NOARGS},\r
3670 {NULL, NULL} /* sentinel */\r
3671};\r
3672\r
3673static PyObject *\r
3674str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);\r
3675\r
3676static PyObject *\r
3677string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)\r
3678{\r
3679 PyObject *x = NULL;\r
3680 static char *kwlist[] = {"object", 0};\r
3681\r
3682 if (type != &PyString_Type)\r
3683 return str_subtype_new(type, args, kwds);\r
3684 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))\r
3685 return NULL;\r
3686 if (x == NULL)\r
3687 return PyString_FromString("");\r
3688 return PyObject_Str(x);\r
3689}\r
3690\r
3691static PyObject *\r
3692str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)\r
3693{\r
3694 PyObject *tmp, *pnew;\r
3695 Py_ssize_t n;\r
3696\r
3697 assert(PyType_IsSubtype(type, &PyString_Type));\r
3698 tmp = string_new(&PyString_Type, args, kwds);\r
3699 if (tmp == NULL)\r
3700 return NULL;\r
3701 assert(PyString_CheckExact(tmp));\r
3702 n = PyString_GET_SIZE(tmp);\r
3703 pnew = type->tp_alloc(type, n);\r
3704 if (pnew != NULL) {\r
3705 Py_MEMCPY(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);\r
3706 ((PyStringObject *)pnew)->ob_shash =\r
3707 ((PyStringObject *)tmp)->ob_shash;\r
3708 ((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED;\r
3709 }\r
3710 Py_DECREF(tmp);\r
3711 return pnew;\r
3712}\r
3713\r
3714static PyObject *\r
3715basestring_new(PyTypeObject *type, PyObject *args, PyObject *kwds)\r
3716{\r
3717 PyErr_SetString(PyExc_TypeError,\r
3718 "The basestring type cannot be instantiated");\r
3719 return NULL;\r
3720}\r
3721\r
3722static PyObject *\r
3723string_mod(PyObject *v, PyObject *w)\r
3724{\r
3725 if (!PyString_Check(v)) {\r
3726 Py_INCREF(Py_NotImplemented);\r
3727 return Py_NotImplemented;\r
3728 }\r
3729 return PyString_Format(v, w);\r
3730}\r
3731\r
3732PyDoc_STRVAR(basestring_doc,\r
3733"Type basestring cannot be instantiated; it is the base for str and unicode.");\r
3734\r
3735static PyNumberMethods string_as_number = {\r
3736 0, /*nb_add*/\r
3737 0, /*nb_subtract*/\r
3738 0, /*nb_multiply*/\r
3739 0, /*nb_divide*/\r
3740 string_mod, /*nb_remainder*/\r
3741};\r
3742\r
3743\r
3744PyTypeObject PyBaseString_Type = {\r
3745 PyVarObject_HEAD_INIT(&PyType_Type, 0)\r
3746 "basestring",\r
3747 0,\r
3748 0,\r
3749 0, /* tp_dealloc */\r
3750 0, /* tp_print */\r
3751 0, /* tp_getattr */\r
3752 0, /* tp_setattr */\r
3753 0, /* tp_compare */\r
3754 0, /* tp_repr */\r
3755 0, /* tp_as_number */\r
3756 0, /* tp_as_sequence */\r
3757 0, /* tp_as_mapping */\r
3758 0, /* tp_hash */\r
3759 0, /* tp_call */\r
3760 0, /* tp_str */\r
3761 0, /* tp_getattro */\r
3762 0, /* tp_setattro */\r
3763 0, /* tp_as_buffer */\r
3764 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */\r
3765 basestring_doc, /* tp_doc */\r
3766 0, /* tp_traverse */\r
3767 0, /* tp_clear */\r
3768 0, /* tp_richcompare */\r
3769 0, /* tp_weaklistoffset */\r
3770 0, /* tp_iter */\r
3771 0, /* tp_iternext */\r
3772 0, /* tp_methods */\r
3773 0, /* tp_members */\r
3774 0, /* tp_getset */\r
3775 &PyBaseObject_Type, /* tp_base */\r
3776 0, /* tp_dict */\r
3777 0, /* tp_descr_get */\r
3778 0, /* tp_descr_set */\r
3779 0, /* tp_dictoffset */\r
3780 0, /* tp_init */\r
3781 0, /* tp_alloc */\r
3782 basestring_new, /* tp_new */\r
3783 0, /* tp_free */\r
3784};\r
3785\r
3786PyDoc_STRVAR(string_doc,\r
3787"str(object) -> string\n\\r
3788\n\\r
3789Return a nice string representation of the object.\n\\r
3790If the argument is a string, the return value is the same object.");\r
3791\r
3792PyTypeObject PyString_Type = {\r
3793 PyVarObject_HEAD_INIT(&PyType_Type, 0)\r
3794 "str",\r
3795 PyStringObject_SIZE,\r
3796 sizeof(char),\r
3797 string_dealloc, /* tp_dealloc */\r
3798 (printfunc)string_print, /* tp_print */\r
3799 0, /* tp_getattr */\r
3800 0, /* tp_setattr */\r
3801 0, /* tp_compare */\r
3802 string_repr, /* tp_repr */\r
3803 &string_as_number, /* tp_as_number */\r
3804 &string_as_sequence, /* tp_as_sequence */\r
3805 &string_as_mapping, /* tp_as_mapping */\r
3806 (hashfunc)string_hash, /* tp_hash */\r
3807 0, /* tp_call */\r
3808 string_str, /* tp_str */\r
3809 PyObject_GenericGetAttr, /* tp_getattro */\r
3810 0, /* tp_setattro */\r
3811 &string_as_buffer, /* tp_as_buffer */\r
3812 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_CHECKTYPES |\r
3813 Py_TPFLAGS_BASETYPE | Py_TPFLAGS_STRING_SUBCLASS |\r
3814 Py_TPFLAGS_HAVE_NEWBUFFER, /* tp_flags */\r
3815 string_doc, /* tp_doc */\r
3816 0, /* tp_traverse */\r
3817 0, /* tp_clear */\r
3818 (richcmpfunc)string_richcompare, /* tp_richcompare */\r
3819 0, /* tp_weaklistoffset */\r
3820 0, /* tp_iter */\r
3821 0, /* tp_iternext */\r
3822 string_methods, /* tp_methods */\r
3823 0, /* tp_members */\r
3824 0, /* tp_getset */\r
3825 &PyBaseString_Type, /* tp_base */\r
3826 0, /* tp_dict */\r
3827 0, /* tp_descr_get */\r
3828 0, /* tp_descr_set */\r
3829 0, /* tp_dictoffset */\r
3830 0, /* tp_init */\r
3831 0, /* tp_alloc */\r
3832 string_new, /* tp_new */\r
3833 PyObject_Del, /* tp_free */\r
3834};\r
3835\r
3836void\r
3837PyString_Concat(register PyObject **pv, register PyObject *w)\r
3838{\r
3839 register PyObject *v;\r
3840 if (*pv == NULL)\r
3841 return;\r
3842 if (w == NULL || !PyString_Check(*pv)) {\r
3843 Py_DECREF(*pv);\r
3844 *pv = NULL;\r
3845 return;\r
3846 }\r
3847 v = string_concat((PyStringObject *) *pv, w);\r
3848 Py_DECREF(*pv);\r
3849 *pv = v;\r
3850}\r
3851\r
3852void\r
3853PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)\r
3854{\r
3855 PyString_Concat(pv, w);\r
3856 Py_XDECREF(w);\r
3857}\r
3858\r
3859\r
3860/* The following function breaks the notion that strings are immutable:\r
3861 it changes the size of a string. We get away with this only if there\r
3862 is only one module referencing the object. You can also think of it\r
3863 as creating a new string object and destroying the old one, only\r
3864 more efficiently. In any case, don't use this if the string may\r
3865 already be known to some other part of the code...\r
3866 Note that if there's not enough memory to resize the string, the original\r
3867 string object at *pv is deallocated, *pv is set to NULL, an "out of\r
3868 memory" exception is set, and -1 is returned. Else (on success) 0 is\r
3869 returned, and the value in *pv may or may not be the same as on input.\r
3870 As always, an extra byte is allocated for a trailing \0 byte (newsize\r
3871 does *not* include that), and a trailing \0 byte is stored.\r
3872*/\r
3873\r
3874int\r
3875_PyString_Resize(PyObject **pv, Py_ssize_t newsize)\r
3876{\r
3877 register PyObject *v;\r
3878 register PyStringObject *sv;\r
3879 v = *pv;\r
3880 if (!PyString_Check(v) || Py_REFCNT(v) != 1 || newsize < 0 ||\r
3881 PyString_CHECK_INTERNED(v)) {\r
3882 *pv = 0;\r
3883 Py_DECREF(v);\r
3884 PyErr_BadInternalCall();\r
3885 return -1;\r
3886 }\r
3887 /* XXX UNREF/NEWREF interface should be more symmetrical */\r
3888 _Py_DEC_REFTOTAL;\r
3889 _Py_ForgetReference(v);\r
3890 *pv = (PyObject *)\r
3891 PyObject_REALLOC((char *)v, PyStringObject_SIZE + newsize);\r
3892 if (*pv == NULL) {\r
3893 PyObject_Del(v);\r
3894 PyErr_NoMemory();\r
3895 return -1;\r
3896 }\r
3897 _Py_NewReference(*pv);\r
3898 sv = (PyStringObject *) *pv;\r
3899 Py_SIZE(sv) = newsize;\r
3900 sv->ob_sval[newsize] = '\0';\r
3901 sv->ob_shash = -1; /* invalidate cached hash value */\r
3902 return 0;\r
3903}\r
3904\r
3905/* Helpers for formatstring */\r
3906\r
3907Py_LOCAL_INLINE(PyObject *)\r
3908getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)\r
3909{\r
3910 Py_ssize_t argidx = *p_argidx;\r
3911 if (argidx < arglen) {\r
3912 (*p_argidx)++;\r
3913 if (arglen < 0)\r
3914 return args;\r
3915 else\r
3916 return PyTuple_GetItem(args, argidx);\r
3917 }\r
3918 PyErr_SetString(PyExc_TypeError,\r
3919 "not enough arguments for format string");\r
3920 return NULL;\r
3921}\r
3922\r
3923/* Format codes\r
3924 * F_LJUST '-'\r
3925 * F_SIGN '+'\r
3926 * F_BLANK ' '\r
3927 * F_ALT '#'\r
3928 * F_ZERO '0'\r
3929 */\r
3930#define F_LJUST (1<<0)\r
3931#define F_SIGN (1<<1)\r
3932#define F_BLANK (1<<2)\r
3933#define F_ALT (1<<3)\r
3934#define F_ZERO (1<<4)\r
3935\r
3936/* Returns a new reference to a PyString object, or NULL on failure. */\r
3937\r
3938static PyObject *\r
3939formatfloat(PyObject *v, int flags, int prec, int type)\r
3940{\r
3941 char *p;\r
3942 PyObject *result;\r
3943 double x;\r
3944\r
3945 x = PyFloat_AsDouble(v);\r
3946 if (x == -1.0 && PyErr_Occurred()) {\r
3947 PyErr_Format(PyExc_TypeError, "float argument required, "\r
3948 "not %.200s", Py_TYPE(v)->tp_name);\r
3949 return NULL;\r
3950 }\r
3951\r
3952 if (prec < 0)\r
3953 prec = 6;\r
3954\r
3955 p = PyOS_double_to_string(x, type, prec,\r
3956 (flags & F_ALT) ? Py_DTSF_ALT : 0, NULL);\r
3957\r
3958 if (p == NULL)\r
3959 return NULL;\r
3960 result = PyString_FromStringAndSize(p, strlen(p));\r
3961 PyMem_Free(p);\r
3962 return result;\r
3963}\r
3964\r
3965/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and\r
3966 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for\r
3967 * Python's regular ints.\r
3968 * Return value: a new PyString*, or NULL if error.\r
3969 * . *pbuf is set to point into it,\r
3970 * *plen set to the # of chars following that.\r
3971 * Caller must decref it when done using pbuf.\r
3972 * The string starting at *pbuf is of the form\r
3973 * "-"? ("0x" | "0X")? digit+\r
3974 * "0x"/"0X" are present only for x and X conversions, with F_ALT\r
3975 * set in flags. The case of hex digits will be correct,\r
3976 * There will be at least prec digits, zero-filled on the left if\r
3977 * necessary to get that many.\r
3978 * val object to be converted\r
3979 * flags bitmask of format flags; only F_ALT is looked at\r
3980 * prec minimum number of digits; 0-fill on left if needed\r
3981 * type a character in [duoxX]; u acts the same as d\r
3982 *\r
3983 * CAUTION: o, x and X conversions on regular ints can never\r
3984 * produce a '-' sign, but can for Python's unbounded ints.\r
3985 */\r
3986PyObject*\r
3987_PyString_FormatLong(PyObject *val, int flags, int prec, int type,\r
3988 char **pbuf, int *plen)\r
3989{\r
3990 PyObject *result = NULL;\r
3991 char *buf;\r
3992 Py_ssize_t i;\r
3993 int sign; /* 1 if '-', else 0 */\r
3994 int len; /* number of characters */\r
3995 Py_ssize_t llen;\r
3996 int numdigits; /* len == numnondigits + numdigits */\r
3997 int numnondigits = 0;\r
3998\r
3999 switch (type) {\r
4000 case 'd':\r
4001 case 'u':\r
4002 result = Py_TYPE(val)->tp_str(val);\r
4003 break;\r
4004 case 'o':\r
4005 result = Py_TYPE(val)->tp_as_number->nb_oct(val);\r
4006 break;\r
4007 case 'x':\r
4008 case 'X':\r
4009 numnondigits = 2;\r
4010 result = Py_TYPE(val)->tp_as_number->nb_hex(val);\r
4011 break;\r
4012 default:\r
4013 assert(!"'type' not in [duoxX]");\r
4014 }\r
4015 if (!result)\r
4016 return NULL;\r
4017\r
4018 buf = PyString_AsString(result);\r
4019 if (!buf) {\r
4020 Py_DECREF(result);\r
4021 return NULL;\r
4022 }\r
4023\r
4024 /* To modify the string in-place, there can only be one reference. */\r
4025 if (Py_REFCNT(result) != 1) {\r
4026 PyErr_BadInternalCall();\r
4027 return NULL;\r
4028 }\r
4029 llen = PyString_Size(result);\r
4030 if (llen > INT_MAX) {\r
4031 PyErr_SetString(PyExc_ValueError, "string too large in _PyString_FormatLong");\r
4032 return NULL;\r
4033 }\r
4034 len = (int)llen;\r
4035 if (buf[len-1] == 'L') {\r
4036 --len;\r
4037 buf[len] = '\0';\r
4038 }\r
4039 sign = buf[0] == '-';\r
4040 numnondigits += sign;\r
4041 numdigits = len - numnondigits;\r
4042 assert(numdigits > 0);\r
4043\r
4044 /* Get rid of base marker unless F_ALT */\r
4045 if ((flags & F_ALT) == 0) {\r
4046 /* Need to skip 0x, 0X or 0. */\r
4047 int skipped = 0;\r
4048 switch (type) {\r
4049 case 'o':\r
4050 assert(buf[sign] == '0');\r
4051 /* If 0 is only digit, leave it alone. */\r
4052 if (numdigits > 1) {\r
4053 skipped = 1;\r
4054 --numdigits;\r
4055 }\r
4056 break;\r
4057 case 'x':\r
4058 case 'X':\r
4059 assert(buf[sign] == '0');\r
4060 assert(buf[sign + 1] == 'x');\r
4061 skipped = 2;\r
4062 numnondigits -= 2;\r
4063 break;\r
4064 }\r
4065 if (skipped) {\r
4066 buf += skipped;\r
4067 len -= skipped;\r
4068 if (sign)\r
4069 buf[0] = '-';\r
4070 }\r
4071 assert(len == numnondigits + numdigits);\r
4072 assert(numdigits > 0);\r
4073 }\r
4074\r
4075 /* Fill with leading zeroes to meet minimum width. */\r
4076 if (prec > numdigits) {\r
4077 PyObject *r1 = PyString_FromStringAndSize(NULL,\r
4078 numnondigits + prec);\r
4079 char *b1;\r
4080 if (!r1) {\r
4081 Py_DECREF(result);\r
4082 return NULL;\r
4083 }\r
4084 b1 = PyString_AS_STRING(r1);\r
4085 for (i = 0; i < numnondigits; ++i)\r
4086 *b1++ = *buf++;\r
4087 for (i = 0; i < prec - numdigits; i++)\r
4088 *b1++ = '0';\r
4089 for (i = 0; i < numdigits; i++)\r
4090 *b1++ = *buf++;\r
4091 *b1 = '\0';\r
4092 Py_DECREF(result);\r
4093 result = r1;\r
4094 buf = PyString_AS_STRING(result);\r
4095 len = numnondigits + prec;\r
4096 }\r
4097\r
4098 /* Fix up case for hex conversions. */\r
4099 if (type == 'X') {\r
4100 /* Need to convert all lower case letters to upper case.\r
4101 and need to convert 0x to 0X (and -0x to -0X). */\r
4102 for (i = 0; i < len; i++)\r
4103 if (buf[i] >= 'a' && buf[i] <= 'x')\r
4104 buf[i] -= 'a'-'A';\r
4105 }\r
4106 *pbuf = buf;\r
4107 *plen = len;\r
4108 return result;\r
4109}\r
4110\r
4111Py_LOCAL_INLINE(int)\r
4112formatint(char *buf, size_t buflen, int flags,\r
4113 int prec, int type, PyObject *v)\r
4114{\r
4115 /* fmt = '%#.' + `prec` + 'l' + `type`\r
4116 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)\r
4117 + 1 + 1 = 24 */\r
4118 char fmt[64]; /* plenty big enough! */\r
4119 char *sign;\r
4120 long x;\r
4121\r
4122 x = PyInt_AsLong(v);\r
4123 if (x == -1 && PyErr_Occurred()) {\r
4124 PyErr_Format(PyExc_TypeError, "int argument required, not %.200s",\r
4125 Py_TYPE(v)->tp_name);\r
4126 return -1;\r
4127 }\r
4128 if (x < 0 && type == 'u') {\r
4129 type = 'd';\r
4130 }\r
4131 if (x < 0 && (type == 'x' || type == 'X' || type == 'o'))\r
4132 sign = "-";\r
4133 else\r
4134 sign = "";\r
4135 if (prec < 0)\r
4136 prec = 1;\r
4137\r
4138 if ((flags & F_ALT) &&\r
4139 (type == 'x' || type == 'X')) {\r
4140 /* When converting under %#x or %#X, there are a number\r
4141 * of issues that cause pain:\r
4142 * - when 0 is being converted, the C standard leaves off\r
4143 * the '0x' or '0X', which is inconsistent with other\r
4144 * %#x/%#X conversions and inconsistent with Python's\r
4145 * hex() function\r
4146 * - there are platforms that violate the standard and\r
4147 * convert 0 with the '0x' or '0X'\r
4148 * (Metrowerks, Compaq Tru64)\r
4149 * - there are platforms that give '0x' when converting\r
4150 * under %#X, but convert 0 in accordance with the\r
4151 * standard (OS/2 EMX)\r
4152 *\r
4153 * We can achieve the desired consistency by inserting our\r
4154 * own '0x' or '0X' prefix, and substituting %x/%X in place\r
4155 * of %#x/%#X.\r
4156 *\r
4157 * Note that this is the same approach as used in\r
4158 * formatint() in unicodeobject.c\r
4159 */\r
4160 PyOS_snprintf(fmt, sizeof(fmt), "%s0%c%%.%dl%c",\r
4161 sign, type, prec, type);\r
4162 }\r
4163 else {\r
4164 PyOS_snprintf(fmt, sizeof(fmt), "%s%%%s.%dl%c",\r
4165 sign, (flags&F_ALT) ? "#" : "",\r
4166 prec, type);\r
4167 }\r
4168\r
4169 /* buf = '+'/'-'/'' + '0'/'0x'/'' + '[0-9]'*max(prec, len(x in octal))\r
4170 * worst case buf = '-0x' + [0-9]*prec, where prec >= 11\r
4171 */\r
4172 if (buflen <= 14 || buflen <= (size_t)3 + (size_t)prec) {\r
4173 PyErr_SetString(PyExc_OverflowError,\r
4174 "formatted integer is too long (precision too large?)");\r
4175 return -1;\r
4176 }\r
4177 if (sign[0])\r
4178 PyOS_snprintf(buf, buflen, fmt, -x);\r
4179 else\r
4180 PyOS_snprintf(buf, buflen, fmt, x);\r
4181 return (int)strlen(buf);\r
4182}\r
4183\r
4184Py_LOCAL_INLINE(int)\r
4185formatchar(char *buf, size_t buflen, PyObject *v)\r
4186{\r
4187 /* presume that the buffer is at least 2 characters long */\r
4188 if (PyString_Check(v)) {\r
4189 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))\r
4190 return -1;\r
4191 }\r
4192 else {\r
4193 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))\r
4194 return -1;\r
4195 }\r
4196 buf[1] = '\0';\r
4197 return 1;\r
4198}\r
4199\r
4200/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)\r
4201\r
4202 FORMATBUFLEN is the length of the buffer in which the ints &\r
4203 chars are formatted. XXX This is a magic number. Each formatting\r
4204 routine does bounds checking to ensure no overflow, but a better\r
4205 solution may be to malloc a buffer of appropriate size for each\r
4206 format. For now, the current solution is sufficient.\r
4207*/\r
4208#define FORMATBUFLEN (size_t)120\r
4209\r
4210PyObject *\r
4211PyString_Format(PyObject *format, PyObject *args)\r
4212{\r
4213 char *fmt, *res;\r
4214 Py_ssize_t arglen, argidx;\r
4215 Py_ssize_t reslen, rescnt, fmtcnt;\r
4216 int args_owned = 0;\r
4217 PyObject *result, *orig_args;\r
4218#ifdef Py_USING_UNICODE\r
4219 PyObject *v, *w;\r
4220#endif\r
4221 PyObject *dict = NULL;\r
4222 if (format == NULL || !PyString_Check(format) || args == NULL) {\r
4223 PyErr_BadInternalCall();\r
4224 return NULL;\r
4225 }\r
4226 orig_args = args;\r
4227 fmt = PyString_AS_STRING(format);\r
4228 fmtcnt = PyString_GET_SIZE(format);\r
4229 reslen = rescnt = fmtcnt + 100;\r
4230 result = PyString_FromStringAndSize((char *)NULL, reslen);\r
4231 if (result == NULL)\r
4232 return NULL;\r
4233 res = PyString_AsString(result);\r
4234 if (PyTuple_Check(args)) {\r
4235 arglen = PyTuple_GET_SIZE(args);\r
4236 argidx = 0;\r
4237 }\r
4238 else {\r
4239 arglen = -1;\r
4240 argidx = -2;\r
4241 }\r
4242 if (Py_TYPE(args)->tp_as_mapping && !PyTuple_Check(args) &&\r
4243 !PyObject_TypeCheck(args, &PyBaseString_Type))\r
4244 dict = args;\r
4245 while (--fmtcnt >= 0) {\r
4246 if (*fmt != '%') {\r
4247 if (--rescnt < 0) {\r
4248 rescnt = fmtcnt + 100;\r
4249 reslen += rescnt;\r
4250 if (_PyString_Resize(&result, reslen))\r
4251 return NULL;\r
4252 res = PyString_AS_STRING(result)\r
4253 + reslen - rescnt;\r
4254 --rescnt;\r
4255 }\r
4256 *res++ = *fmt++;\r
4257 }\r
4258 else {\r
4259 /* Got a format specifier */\r
4260 int flags = 0;\r
4261 Py_ssize_t width = -1;\r
4262 int prec = -1;\r
4263 int c = '\0';\r
4264 int fill;\r
4265 int isnumok;\r
de08c53b
DM
4266 PyObject *v = NULL;\r
4267 PyObject *temp = NULL;\r
4268 char *pbuf = NULL;\r
4710c53d 4269 int sign;\r
4270 Py_ssize_t len;\r
4271 char formatbuf[FORMATBUFLEN];\r
4272 /* For format{int,char}() */\r
4273#ifdef Py_USING_UNICODE\r
4274 char *fmt_start = fmt;\r
4275 Py_ssize_t argidx_start = argidx;\r
4276#endif\r
4277\r
4278 fmt++;\r
4279 if (*fmt == '(') {\r
4280 char *keystart;\r
4281 Py_ssize_t keylen;\r
4282 PyObject *key;\r
4283 int pcount = 1;\r
4284\r
4285 if (dict == NULL) {\r
4286 PyErr_SetString(PyExc_TypeError,\r
4287 "format requires a mapping");\r
4288 goto error;\r
4289 }\r
4290 ++fmt;\r
4291 --fmtcnt;\r
4292 keystart = fmt;\r
4293 /* Skip over balanced parentheses */\r
4294 while (pcount > 0 && --fmtcnt >= 0) {\r
4295 if (*fmt == ')')\r
4296 --pcount;\r
4297 else if (*fmt == '(')\r
4298 ++pcount;\r
4299 fmt++;\r
4300 }\r
4301 keylen = fmt - keystart - 1;\r
4302 if (fmtcnt < 0 || pcount > 0) {\r
4303 PyErr_SetString(PyExc_ValueError,\r
4304 "incomplete format key");\r
4305 goto error;\r
4306 }\r
4307 key = PyString_FromStringAndSize(keystart,\r
4308 keylen);\r
4309 if (key == NULL)\r
4310 goto error;\r
4311 if (args_owned) {\r
4312 Py_DECREF(args);\r
4313 args_owned = 0;\r
4314 }\r
4315 args = PyObject_GetItem(dict, key);\r
4316 Py_DECREF(key);\r
4317 if (args == NULL) {\r
4318 goto error;\r
4319 }\r
4320 args_owned = 1;\r
4321 arglen = -1;\r
4322 argidx = -2;\r
4323 }\r
4324 while (--fmtcnt >= 0) {\r
4325 switch (c = *fmt++) {\r
4326 case '-': flags |= F_LJUST; continue;\r
4327 case '+': flags |= F_SIGN; continue;\r
4328 case ' ': flags |= F_BLANK; continue;\r
4329 case '#': flags |= F_ALT; continue;\r
4330 case '0': flags |= F_ZERO; continue;\r
4331 }\r
4332 break;\r
4333 }\r
4334 if (c == '*') {\r
4335 v = getnextarg(args, arglen, &argidx);\r
4336 if (v == NULL)\r
4337 goto error;\r
4338 if (!PyInt_Check(v)) {\r
4339 PyErr_SetString(PyExc_TypeError,\r
4340 "* wants int");\r
4341 goto error;\r
4342 }\r
4343 width = PyInt_AsLong(v);\r
4344 if (width < 0) {\r
4345 flags |= F_LJUST;\r
4346 width = -width;\r
4347 }\r
4348 if (--fmtcnt >= 0)\r
4349 c = *fmt++;\r
4350 }\r
4351 else if (c >= 0 && isdigit(c)) {\r
4352 width = c - '0';\r
4353 while (--fmtcnt >= 0) {\r
4354 c = Py_CHARMASK(*fmt++);\r
4355 if (!isdigit(c))\r
4356 break;\r
4357 if ((width*10) / 10 != width) {\r
4358 PyErr_SetString(\r
4359 PyExc_ValueError,\r
4360 "width too big");\r
4361 goto error;\r
4362 }\r
4363 width = width*10 + (c - '0');\r
4364 }\r
4365 }\r
4366 if (c == '.') {\r
4367 prec = 0;\r
4368 if (--fmtcnt >= 0)\r
4369 c = *fmt++;\r
4370 if (c == '*') {\r
4371 v = getnextarg(args, arglen, &argidx);\r
4372 if (v == NULL)\r
4373 goto error;\r
4374 if (!PyInt_Check(v)) {\r
4375 PyErr_SetString(\r
4376 PyExc_TypeError,\r
4377 "* wants int");\r
4378 goto error;\r
4379 }\r
4380 prec = PyInt_AsLong(v);\r
4381 if (prec < 0)\r
4382 prec = 0;\r
4383 if (--fmtcnt >= 0)\r
4384 c = *fmt++;\r
4385 }\r
4386 else if (c >= 0 && isdigit(c)) {\r
4387 prec = c - '0';\r
4388 while (--fmtcnt >= 0) {\r
4389 c = Py_CHARMASK(*fmt++);\r
4390 if (!isdigit(c))\r
4391 break;\r
4392 if ((prec*10) / 10 != prec) {\r
4393 PyErr_SetString(\r
4394 PyExc_ValueError,\r
4395 "prec too big");\r
4396 goto error;\r
4397 }\r
4398 prec = prec*10 + (c - '0');\r
4399 }\r
4400 }\r
4401 } /* prec */\r
4402 if (fmtcnt >= 0) {\r
4403 if (c == 'h' || c == 'l' || c == 'L') {\r
4404 if (--fmtcnt >= 0)\r
4405 c = *fmt++;\r
4406 }\r
4407 }\r
4408 if (fmtcnt < 0) {\r
4409 PyErr_SetString(PyExc_ValueError,\r
4410 "incomplete format");\r
4411 goto error;\r
4412 }\r
4413 if (c != '%') {\r
4414 v = getnextarg(args, arglen, &argidx);\r
4415 if (v == NULL)\r
4416 goto error;\r
4417 }\r
4418 sign = 0;\r
4419 fill = ' ';\r
4420 switch (c) {\r
4421 case '%':\r
4422 pbuf = "%";\r
4423 len = 1;\r
4424 break;\r
4425 case 's':\r
4426#ifdef Py_USING_UNICODE\r
4427 if (PyUnicode_Check(v)) {\r
4428 fmt = fmt_start;\r
4429 argidx = argidx_start;\r
4430 goto unicode;\r
4431 }\r
4432#endif\r
4433 temp = _PyObject_Str(v);\r
4434#ifdef Py_USING_UNICODE\r
4435 if (temp != NULL && PyUnicode_Check(temp)) {\r
4436 Py_DECREF(temp);\r
4437 fmt = fmt_start;\r
4438 argidx = argidx_start;\r
4439 goto unicode;\r
4440 }\r
4441#endif\r
4442 /* Fall through */\r
4443 case 'r':\r
4444 if (c == 'r')\r
4445 temp = PyObject_Repr(v);\r
4446 if (temp == NULL)\r
4447 goto error;\r
4448 if (!PyString_Check(temp)) {\r
4449 PyErr_SetString(PyExc_TypeError,\r
4450 "%s argument has non-string str()");\r
4451 Py_DECREF(temp);\r
4452 goto error;\r
4453 }\r
4454 pbuf = PyString_AS_STRING(temp);\r
4455 len = PyString_GET_SIZE(temp);\r
4456 if (prec >= 0 && len > prec)\r
4457 len = prec;\r
4458 break;\r
4459 case 'i':\r
4460 case 'd':\r
4461 case 'u':\r
4462 case 'o':\r
4463 case 'x':\r
4464 case 'X':\r
4465 if (c == 'i')\r
4466 c = 'd';\r
4467 isnumok = 0;\r
4468 if (PyNumber_Check(v)) {\r
4469 PyObject *iobj=NULL;\r
4470\r
4471 if (PyInt_Check(v) || (PyLong_Check(v))) {\r
4472 iobj = v;\r
4473 Py_INCREF(iobj);\r
4474 }\r
4475 else {\r
4476 iobj = PyNumber_Int(v);\r
4477 if (iobj==NULL) iobj = PyNumber_Long(v);\r
4478 }\r
4479 if (iobj!=NULL) {\r
4480 if (PyInt_Check(iobj)) {\r
4481 isnumok = 1;\r
4482 pbuf = formatbuf;\r
4483 len = formatint(pbuf,\r
4484 sizeof(formatbuf),\r
4485 flags, prec, c, iobj);\r
4486 Py_DECREF(iobj);\r
4487 if (len < 0)\r
4488 goto error;\r
4489 sign = 1;\r
4490 }\r
4491 else if (PyLong_Check(iobj)) {\r
4492 int ilen;\r
4493\r
4494 isnumok = 1;\r
4495 temp = _PyString_FormatLong(iobj, flags,\r
4496 prec, c, &pbuf, &ilen);\r
4497 Py_DECREF(iobj);\r
4498 len = ilen;\r
4499 if (!temp)\r
4500 goto error;\r
4501 sign = 1;\r
4502 }\r
4503 else {\r
4504 Py_DECREF(iobj);\r
4505 }\r
4506 }\r
4507 }\r
4508 if (!isnumok) {\r
4509 PyErr_Format(PyExc_TypeError,\r
4510 "%%%c format: a number is required, "\r
4511 "not %.200s", c, Py_TYPE(v)->tp_name);\r
4512 goto error;\r
4513 }\r
4514 if (flags & F_ZERO)\r
4515 fill = '0';\r
4516 break;\r
4517 case 'e':\r
4518 case 'E':\r
4519 case 'f':\r
4520 case 'F':\r
4521 case 'g':\r
4522 case 'G':\r
4523 temp = formatfloat(v, flags, prec, c);\r
4524 if (temp == NULL)\r
4525 goto error;\r
4526 pbuf = PyString_AS_STRING(temp);\r
4527 len = PyString_GET_SIZE(temp);\r
4528 sign = 1;\r
4529 if (flags & F_ZERO)\r
4530 fill = '0';\r
4531 break;\r
4532 case 'c':\r
4533#ifdef Py_USING_UNICODE\r
4534 if (PyUnicode_Check(v)) {\r
4535 fmt = fmt_start;\r
4536 argidx = argidx_start;\r
4537 goto unicode;\r
4538 }\r
4539#endif\r
4540 pbuf = formatbuf;\r
4541 len = formatchar(pbuf, sizeof(formatbuf), v);\r
4542 if (len < 0)\r
4543 goto error;\r
4544 break;\r
4545 default:\r
4546 PyErr_Format(PyExc_ValueError,\r
4547 "unsupported format character '%c' (0x%x) "\r
4548 "at index %zd",\r
4549 c, c,\r
4550 (Py_ssize_t)(fmt - 1 -\r
4551 PyString_AsString(format)));\r
4552 goto error;\r
4553 }\r
4554 if (sign) {\r
4555 if (*pbuf == '-' || *pbuf == '+') {\r
4556 sign = *pbuf++;\r
4557 len--;\r
4558 }\r
4559 else if (flags & F_SIGN)\r
4560 sign = '+';\r
4561 else if (flags & F_BLANK)\r
4562 sign = ' ';\r
4563 else\r
4564 sign = 0;\r
4565 }\r
4566 if (width < len)\r
4567 width = len;\r
4568 if (rescnt - (sign != 0) < width) {\r
4569 reslen -= rescnt;\r
4570 rescnt = width + fmtcnt + 100;\r
4571 reslen += rescnt;\r
4572 if (reslen < 0) {\r
4573 Py_DECREF(result);\r
4574 Py_XDECREF(temp);\r
4575 return PyErr_NoMemory();\r
4576 }\r
4577 if (_PyString_Resize(&result, reslen)) {\r
4578 Py_XDECREF(temp);\r
4579 return NULL;\r
4580 }\r
4581 res = PyString_AS_STRING(result)\r
4582 + reslen - rescnt;\r
4583 }\r
4584 if (sign) {\r
4585 if (fill != ' ')\r
4586 *res++ = sign;\r
4587 rescnt--;\r
4588 if (width > len)\r
4589 width--;\r
4590 }\r
4591 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {\r
4592 assert(pbuf[0] == '0');\r
4593 assert(pbuf[1] == c);\r
4594 if (fill != ' ') {\r
4595 *res++ = *pbuf++;\r
4596 *res++ = *pbuf++;\r
4597 }\r
4598 rescnt -= 2;\r
4599 width -= 2;\r
4600 if (width < 0)\r
4601 width = 0;\r
4602 len -= 2;\r
4603 }\r
4604 if (width > len && !(flags & F_LJUST)) {\r
4605 do {\r
4606 --rescnt;\r
4607 *res++ = fill;\r
4608 } while (--width > len);\r
4609 }\r
4610 if (fill == ' ') {\r
4611 if (sign)\r
4612 *res++ = sign;\r
4613 if ((flags & F_ALT) &&\r
4614 (c == 'x' || c == 'X')) {\r
4615 assert(pbuf[0] == '0');\r
4616 assert(pbuf[1] == c);\r
4617 *res++ = *pbuf++;\r
4618 *res++ = *pbuf++;\r
4619 }\r
4620 }\r
4621 Py_MEMCPY(res, pbuf, len);\r
4622 res += len;\r
4623 rescnt -= len;\r
4624 while (--width >= len) {\r
4625 --rescnt;\r
4626 *res++ = ' ';\r
4627 }\r
4628 if (dict && (argidx < arglen) && c != '%') {\r
4629 PyErr_SetString(PyExc_TypeError,\r
4630 "not all arguments converted during string formatting");\r
4631 Py_XDECREF(temp);\r
4632 goto error;\r
4633 }\r
4634 Py_XDECREF(temp);\r
4635 } /* '%' */\r
4636 } /* until end */\r
4637 if (argidx < arglen && !dict) {\r
4638 PyErr_SetString(PyExc_TypeError,\r
4639 "not all arguments converted during string formatting");\r
4640 goto error;\r
4641 }\r
4642 if (args_owned) {\r
4643 Py_DECREF(args);\r
4644 }\r
4645 if (_PyString_Resize(&result, reslen - rescnt))\r
4646 return NULL;\r
4647 return result;\r
4648\r
4649#ifdef Py_USING_UNICODE\r
4650 unicode:\r
4651 if (args_owned) {\r
4652 Py_DECREF(args);\r
4653 args_owned = 0;\r
4654 }\r
4655 /* Fiddle args right (remove the first argidx arguments) */\r
4656 if (PyTuple_Check(orig_args) && argidx > 0) {\r
4657 PyObject *v;\r
4658 Py_ssize_t n = PyTuple_GET_SIZE(orig_args) - argidx;\r
4659 v = PyTuple_New(n);\r
4660 if (v == NULL)\r
4661 goto error;\r
4662 while (--n >= 0) {\r
4663 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);\r
4664 Py_INCREF(w);\r
4665 PyTuple_SET_ITEM(v, n, w);\r
4666 }\r
4667 args = v;\r
4668 } else {\r
4669 Py_INCREF(orig_args);\r
4670 args = orig_args;\r
4671 }\r
4672 args_owned = 1;\r
4673 /* Take what we have of the result and let the Unicode formatting\r
4674 function format the rest of the input. */\r
4675 rescnt = res - PyString_AS_STRING(result);\r
4676 if (_PyString_Resize(&result, rescnt))\r
4677 goto error;\r
4678 fmtcnt = PyString_GET_SIZE(format) - \\r
4679 (fmt - PyString_AS_STRING(format));\r
4680 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);\r
4681 if (format == NULL)\r
4682 goto error;\r
4683 v = PyUnicode_Format(format, args);\r
4684 Py_DECREF(format);\r
4685 if (v == NULL)\r
4686 goto error;\r
4687 /* Paste what we have (result) to what the Unicode formatting\r
4688 function returned (v) and return the result (or error) */\r
4689 w = PyUnicode_Concat(result, v);\r
4690 Py_DECREF(result);\r
4691 Py_DECREF(v);\r
4692 Py_DECREF(args);\r
4693 return w;\r
4694#endif /* Py_USING_UNICODE */\r
4695\r
4696 error:\r
4697 Py_DECREF(result);\r
4698 if (args_owned) {\r
4699 Py_DECREF(args);\r
4700 }\r
4701 return NULL;\r
4702}\r
4703\r
4704void\r
4705PyString_InternInPlace(PyObject **p)\r
4706{\r
4707 register PyStringObject *s = (PyStringObject *)(*p);\r
4708 PyObject *t;\r
4709 if (s == NULL || !PyString_Check(s))\r
4710 Py_FatalError("PyString_InternInPlace: strings only please!");\r
4711 /* If it's a string subclass, we don't really know what putting\r
4712 it in the interned dict might do. */\r
4713 if (!PyString_CheckExact(s))\r
4714 return;\r
4715 if (PyString_CHECK_INTERNED(s))\r
4716 return;\r
4717 if (interned == NULL) {\r
4718 interned = PyDict_New();\r
4719 if (interned == NULL) {\r
4720 PyErr_Clear(); /* Don't leave an exception */\r
4721 return;\r
4722 }\r
4723 }\r
4724 t = PyDict_GetItem(interned, (PyObject *)s);\r
4725 if (t) {\r
4726 Py_INCREF(t);\r
4727 Py_DECREF(*p);\r
4728 *p = t;\r
4729 return;\r
4730 }\r
4731\r
4732 if (PyDict_SetItem(interned, (PyObject *)s, (PyObject *)s) < 0) {\r
4733 PyErr_Clear();\r
4734 return;\r
4735 }\r
4736 /* The two references in interned are not counted by refcnt.\r
4737 The string deallocator will take care of this */\r
4738 Py_REFCNT(s) -= 2;\r
4739 PyString_CHECK_INTERNED(s) = SSTATE_INTERNED_MORTAL;\r
4740}\r
4741\r
4742void\r
4743PyString_InternImmortal(PyObject **p)\r
4744{\r
4745 PyString_InternInPlace(p);\r
4746 if (PyString_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {\r
4747 PyString_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL;\r
4748 Py_INCREF(*p);\r
4749 }\r
4750}\r
4751\r
4752\r
4753PyObject *\r
4754PyString_InternFromString(const char *cp)\r
4755{\r
4756 PyObject *s = PyString_FromString(cp);\r
4757 if (s == NULL)\r
4758 return NULL;\r
4759 PyString_InternInPlace(&s);\r
4760 return s;\r
4761}\r
4762\r
4763void\r
4764PyString_Fini(void)\r
4765{\r
4766 int i;\r
4767 for (i = 0; i < UCHAR_MAX + 1; i++) {\r
4768 Py_XDECREF(characters[i]);\r
4769 characters[i] = NULL;\r
4770 }\r
4771 Py_XDECREF(nullstring);\r
4772 nullstring = NULL;\r
4773}\r
4774\r
4775void _Py_ReleaseInternedStrings(void)\r
4776{\r
4777 PyObject *keys;\r
4778 PyStringObject *s;\r
4779 Py_ssize_t i, n;\r
4780 Py_ssize_t immortal_size = 0, mortal_size = 0;\r
4781\r
4782 if (interned == NULL || !PyDict_Check(interned))\r
4783 return;\r
4784 keys = PyDict_Keys(interned);\r
4785 if (keys == NULL || !PyList_Check(keys)) {\r
4786 PyErr_Clear();\r
4787 return;\r
4788 }\r
4789\r
4790 /* Since _Py_ReleaseInternedStrings() is intended to help a leak\r
4791 detector, interned strings are not forcibly deallocated; rather, we\r
4792 give them their stolen references back, and then clear and DECREF\r
4793 the interned dict. */\r
4794\r
4795 n = PyList_GET_SIZE(keys);\r
4796 fprintf(stderr, "releasing %" PY_FORMAT_SIZE_T "d interned strings\n",\r
4797 n);\r
4798 for (i = 0; i < n; i++) {\r
4799 s = (PyStringObject *) PyList_GET_ITEM(keys, i);\r
4800 switch (s->ob_sstate) {\r
4801 case SSTATE_NOT_INTERNED:\r
4802 /* XXX Shouldn't happen */\r
4803 break;\r
4804 case SSTATE_INTERNED_IMMORTAL:\r
4805 Py_REFCNT(s) += 1;\r
4806 immortal_size += Py_SIZE(s);\r
4807 break;\r
4808 case SSTATE_INTERNED_MORTAL:\r
4809 Py_REFCNT(s) += 2;\r
4810 mortal_size += Py_SIZE(s);\r
4811 break;\r
4812 default:\r
4813 Py_FatalError("Inconsistent interned string state.");\r
4814 }\r
4815 s->ob_sstate = SSTATE_NOT_INTERNED;\r
4816 }\r
4817 fprintf(stderr, "total size of all interned strings: "\r
4818 "%" PY_FORMAT_SIZE_T "d/%" PY_FORMAT_SIZE_T "d "\r
4819 "mortal/immortal\n", mortal_size, immortal_size);\r
4820 Py_DECREF(keys);\r
4821 PyDict_Clear(interned);\r
4822 Py_DECREF(interned);\r
4823 interned = NULL;\r
4824}\r