]>
Commit | Line | Data |
---|---|---|
53b2ba57 DM |
1 | /* String (str/bytes) object implementation */\r |
2 | \r | |
3 | #define PY_SSIZE_T_CLEAN\r | |
4 | \r | |
5 | #include "Python.h"\r | |
6 | #include <ctype.h>\r | |
7 | #include <stddef.h>\r | |
8 | \r | |
9 | #ifdef COUNT_ALLOCS\r | |
10 | Py_ssize_t null_strings, one_strings;\r | |
11 | #endif\r | |
12 | \r | |
13 | static PyStringObject *characters[UCHAR_MAX + 1];\r | |
14 | static PyStringObject *nullstring;\r | |
15 | \r | |
16 | /* This dictionary holds all interned strings. Note that references to\r | |
17 | strings in this dictionary are *not* counted in the string's ob_refcnt.\r | |
18 | When the interned string reaches a refcnt of 0 the string deallocation\r | |
19 | function will delete the reference from this dictionary.\r | |
20 | \r | |
21 | Another way to look at this is that to say that the actual reference\r | |
22 | count of a string is: s->ob_refcnt + (s->ob_sstate?2:0)\r | |
23 | */\r | |
24 | static PyObject *interned;\r | |
25 | \r | |
26 | /* PyStringObject_SIZE gives the basic size of a string; any memory allocation\r | |
27 | for a string of length n should request PyStringObject_SIZE + n bytes.\r | |
28 | \r | |
29 | Using PyStringObject_SIZE instead of sizeof(PyStringObject) saves\r | |
30 | 3 bytes per string allocation on a typical system.\r | |
31 | */\r | |
32 | #define PyStringObject_SIZE (offsetof(PyStringObject, ob_sval) + 1)\r | |
33 | \r | |
34 | /*\r | |
35 | For PyString_FromString(), the parameter `str' points to a null-terminated\r | |
36 | string containing exactly `size' bytes.\r | |
37 | \r | |
38 | For PyString_FromStringAndSize(), the parameter the parameter `str' is\r | |
39 | either NULL or else points to a string containing at least `size' bytes.\r | |
40 | For PyString_FromStringAndSize(), the string in the `str' parameter does\r | |
41 | not have to be null-terminated. (Therefore it is safe to construct a\r | |
42 | substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)\r | |
43 | If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'\r | |
44 | bytes (setting the last byte to the null terminating character) and you can\r | |
45 | fill in the data yourself. If `str' is non-NULL then the resulting\r | |
46 | PyString object must be treated as immutable and you must not fill in nor\r | |
47 | alter the data yourself, since the strings may be shared.\r | |
48 | \r | |
49 | The PyObject member `op->ob_size', which denotes the number of "extra\r | |
50 | items" in a variable-size object, will contain the number of bytes\r | |
51 | allocated for string data, not counting the null terminating character.\r | |
52 | It is therefore equal to the `size' parameter (for\r | |
53 | PyString_FromStringAndSize()) or the length of the string in the `str'\r | |
54 | parameter (for PyString_FromString()).\r | |
55 | */\r | |
56 | PyObject *\r | |
57 | PyString_FromStringAndSize(const char *str, Py_ssize_t size)\r | |
58 | {\r | |
59 | register PyStringObject *op;\r | |
60 | if (size < 0) {\r | |
61 | PyErr_SetString(PyExc_SystemError,\r | |
62 | "Negative size passed to PyString_FromStringAndSize");\r | |
63 | return NULL;\r | |
64 | }\r | |
65 | if (size == 0 && (op = nullstring) != NULL) {\r | |
66 | #ifdef COUNT_ALLOCS\r | |
67 | null_strings++;\r | |
68 | #endif\r | |
69 | Py_INCREF(op);\r | |
70 | return (PyObject *)op;\r | |
71 | }\r | |
72 | if (size == 1 && str != NULL &&\r | |
73 | (op = characters[*str & UCHAR_MAX]) != NULL)\r | |
74 | {\r | |
75 | #ifdef COUNT_ALLOCS\r | |
76 | one_strings++;\r | |
77 | #endif\r | |
78 | Py_INCREF(op);\r | |
79 | return (PyObject *)op;\r | |
80 | }\r | |
81 | \r | |
82 | if (size > PY_SSIZE_T_MAX - PyStringObject_SIZE) {\r | |
83 | PyErr_SetString(PyExc_OverflowError, "string is too large");\r | |
84 | return NULL;\r | |
85 | }\r | |
86 | \r | |
87 | /* Inline PyObject_NewVar */\r | |
88 | op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + size);\r | |
89 | if (op == NULL)\r | |
90 | return PyErr_NoMemory();\r | |
91 | PyObject_INIT_VAR(op, &PyString_Type, size);\r | |
92 | op->ob_shash = -1;\r | |
93 | op->ob_sstate = SSTATE_NOT_INTERNED;\r | |
94 | if (str != NULL)\r | |
95 | Py_MEMCPY(op->ob_sval, str, size);\r | |
96 | op->ob_sval[size] = '\0';\r | |
97 | /* share short strings */\r | |
98 | if (size == 0) {\r | |
99 | PyObject *t = (PyObject *)op;\r | |
100 | PyString_InternInPlace(&t);\r | |
101 | op = (PyStringObject *)t;\r | |
102 | nullstring = op;\r | |
103 | Py_INCREF(op);\r | |
104 | } else if (size == 1 && str != NULL) {\r | |
105 | PyObject *t = (PyObject *)op;\r | |
106 | PyString_InternInPlace(&t);\r | |
107 | op = (PyStringObject *)t;\r | |
108 | characters[*str & UCHAR_MAX] = op;\r | |
109 | Py_INCREF(op);\r | |
110 | }\r | |
111 | return (PyObject *) op;\r | |
112 | }\r | |
113 | \r | |
114 | PyObject *\r | |
115 | PyString_FromString(const char *str)\r | |
116 | {\r | |
117 | register size_t size;\r | |
118 | register PyStringObject *op;\r | |
119 | \r | |
120 | assert(str != NULL);\r | |
121 | size = strlen(str);\r | |
122 | if (size > PY_SSIZE_T_MAX - PyStringObject_SIZE) {\r | |
123 | PyErr_SetString(PyExc_OverflowError,\r | |
124 | "string is too long for a Python string");\r | |
125 | return NULL;\r | |
126 | }\r | |
127 | if (size == 0 && (op = nullstring) != NULL) {\r | |
128 | #ifdef COUNT_ALLOCS\r | |
129 | null_strings++;\r | |
130 | #endif\r | |
131 | Py_INCREF(op);\r | |
132 | return (PyObject *)op;\r | |
133 | }\r | |
134 | if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {\r | |
135 | #ifdef COUNT_ALLOCS\r | |
136 | one_strings++;\r | |
137 | #endif\r | |
138 | Py_INCREF(op);\r | |
139 | return (PyObject *)op;\r | |
140 | }\r | |
141 | \r | |
142 | /* Inline PyObject_NewVar */\r | |
143 | op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + size);\r | |
144 | if (op == NULL)\r | |
145 | return PyErr_NoMemory();\r | |
146 | PyObject_INIT_VAR(op, &PyString_Type, size);\r | |
147 | op->ob_shash = -1;\r | |
148 | op->ob_sstate = SSTATE_NOT_INTERNED;\r | |
149 | Py_MEMCPY(op->ob_sval, str, size+1);\r | |
150 | /* share short strings */\r | |
151 | if (size == 0) {\r | |
152 | PyObject *t = (PyObject *)op;\r | |
153 | PyString_InternInPlace(&t);\r | |
154 | op = (PyStringObject *)t;\r | |
155 | nullstring = op;\r | |
156 | Py_INCREF(op);\r | |
157 | } else if (size == 1) {\r | |
158 | PyObject *t = (PyObject *)op;\r | |
159 | PyString_InternInPlace(&t);\r | |
160 | op = (PyStringObject *)t;\r | |
161 | characters[*str & UCHAR_MAX] = op;\r | |
162 | Py_INCREF(op);\r | |
163 | }\r | |
164 | return (PyObject *) op;\r | |
165 | }\r | |
166 | \r | |
167 | PyObject *\r | |
168 | PyString_FromFormatV(const char *format, va_list vargs)\r | |
169 | {\r | |
170 | va_list count;\r | |
171 | Py_ssize_t n = 0;\r | |
172 | const char* f;\r | |
173 | char *s;\r | |
174 | PyObject* string;\r | |
175 | \r | |
176 | #ifdef VA_LIST_IS_ARRAY\r | |
177 | Py_MEMCPY(count, vargs, sizeof(va_list));\r | |
178 | #else\r | |
179 | #ifdef __va_copy\r | |
180 | __va_copy(count, vargs);\r | |
181 | #else\r | |
182 | count = vargs;\r | |
183 | #endif\r | |
184 | #endif\r | |
185 | /* step 1: figure out how large a buffer we need */\r | |
186 | for (f = format; *f; f++) {\r | |
187 | if (*f == '%') {\r | |
188 | #ifdef HAVE_LONG_LONG\r | |
189 | int longlongflag = 0;\r | |
190 | #endif\r | |
191 | const char* p = f;\r | |
192 | while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))\r | |
193 | ;\r | |
194 | \r | |
195 | /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since\r | |
196 | * they don't affect the amount of space we reserve.\r | |
197 | */\r | |
198 | if (*f == 'l') {\r | |
199 | if (f[1] == 'd' || f[1] == 'u') {\r | |
200 | ++f;\r | |
201 | }\r | |
202 | #ifdef HAVE_LONG_LONG\r | |
203 | else if (f[1] == 'l' &&\r | |
204 | (f[2] == 'd' || f[2] == 'u')) {\r | |
205 | longlongflag = 1;\r | |
206 | f += 2;\r | |
207 | }\r | |
208 | #endif\r | |
209 | }\r | |
210 | else if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {\r | |
211 | ++f;\r | |
212 | }\r | |
213 | \r | |
214 | switch (*f) {\r | |
215 | case 'c':\r | |
216 | (void)va_arg(count, int);\r | |
217 | /* fall through... */\r | |
218 | case '%':\r | |
219 | n++;\r | |
220 | break;\r | |
221 | case 'd': case 'u': case 'i': case 'x':\r | |
222 | (void) va_arg(count, int);\r | |
223 | #ifdef HAVE_LONG_LONG\r | |
224 | /* Need at most\r | |
225 | ceil(log10(256)*SIZEOF_LONG_LONG) digits,\r | |
226 | plus 1 for the sign. 53/22 is an upper\r | |
227 | bound for log10(256). */\r | |
228 | if (longlongflag)\r | |
229 | n += 2 + (SIZEOF_LONG_LONG*53-1) / 22;\r | |
230 | else\r | |
231 | #endif\r | |
232 | /* 20 bytes is enough to hold a 64-bit\r | |
233 | integer. Decimal takes the most\r | |
234 | space. This isn't enough for\r | |
235 | octal. */\r | |
236 | n += 20;\r | |
237 | \r | |
238 | break;\r | |
239 | case 's':\r | |
240 | s = va_arg(count, char*);\r | |
241 | n += strlen(s);\r | |
242 | break;\r | |
243 | case 'p':\r | |
244 | (void) va_arg(count, int);\r | |
245 | /* maximum 64-bit pointer representation:\r | |
246 | * 0xffffffffffffffff\r | |
247 | * so 19 characters is enough.\r | |
248 | * XXX I count 18 -- what's the extra for?\r | |
249 | */\r | |
250 | n += 19;\r | |
251 | break;\r | |
252 | default:\r | |
253 | /* if we stumble upon an unknown\r | |
254 | formatting code, copy the rest of\r | |
255 | the format string to the output\r | |
256 | string. (we cannot just skip the\r | |
257 | code, since there's no way to know\r | |
258 | what's in the argument list) */\r | |
259 | n += strlen(p);\r | |
260 | goto expand;\r | |
261 | }\r | |
262 | } else\r | |
263 | n++;\r | |
264 | }\r | |
265 | expand:\r | |
266 | /* step 2: fill the buffer */\r | |
267 | /* Since we've analyzed how much space we need for the worst case,\r | |
268 | use sprintf directly instead of the slower PyOS_snprintf. */\r | |
269 | string = PyString_FromStringAndSize(NULL, n);\r | |
270 | if (!string)\r | |
271 | return NULL;\r | |
272 | \r | |
273 | s = PyString_AsString(string);\r | |
274 | \r | |
275 | for (f = format; *f; f++) {\r | |
276 | if (*f == '%') {\r | |
277 | const char* p = f++;\r | |
278 | Py_ssize_t i;\r | |
279 | int longflag = 0;\r | |
280 | #ifdef HAVE_LONG_LONG\r | |
281 | int longlongflag = 0;\r | |
282 | #endif\r | |
283 | int size_tflag = 0;\r | |
284 | /* parse the width.precision part (we're only\r | |
285 | interested in the precision value, if any) */\r | |
286 | n = 0;\r | |
287 | while (isdigit(Py_CHARMASK(*f)))\r | |
288 | n = (n*10) + *f++ - '0';\r | |
289 | if (*f == '.') {\r | |
290 | f++;\r | |
291 | n = 0;\r | |
292 | while (isdigit(Py_CHARMASK(*f)))\r | |
293 | n = (n*10) + *f++ - '0';\r | |
294 | }\r | |
295 | while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))\r | |
296 | f++;\r | |
297 | /* Handle %ld, %lu, %lld and %llu. */\r | |
298 | if (*f == 'l') {\r | |
299 | if (f[1] == 'd' || f[1] == 'u') {\r | |
300 | longflag = 1;\r | |
301 | ++f;\r | |
302 | }\r | |
303 | #ifdef HAVE_LONG_LONG\r | |
304 | else if (f[1] == 'l' &&\r | |
305 | (f[2] == 'd' || f[2] == 'u')) {\r | |
306 | longlongflag = 1;\r | |
307 | f += 2;\r | |
308 | }\r | |
309 | #endif\r | |
310 | }\r | |
311 | /* handle the size_t flag. */\r | |
312 | else if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {\r | |
313 | size_tflag = 1;\r | |
314 | ++f;\r | |
315 | }\r | |
316 | \r | |
317 | switch (*f) {\r | |
318 | case 'c':\r | |
319 | *s++ = va_arg(vargs, int);\r | |
320 | break;\r | |
321 | case 'd':\r | |
322 | if (longflag)\r | |
323 | sprintf(s, "%ld", va_arg(vargs, long));\r | |
324 | #ifdef HAVE_LONG_LONG\r | |
325 | else if (longlongflag)\r | |
326 | sprintf(s, "%" PY_FORMAT_LONG_LONG "d",\r | |
327 | va_arg(vargs, PY_LONG_LONG));\r | |
328 | #endif\r | |
329 | else if (size_tflag)\r | |
330 | sprintf(s, "%" PY_FORMAT_SIZE_T "d",\r | |
331 | va_arg(vargs, Py_ssize_t));\r | |
332 | else\r | |
333 | sprintf(s, "%d", va_arg(vargs, int));\r | |
334 | s += strlen(s);\r | |
335 | break;\r | |
336 | case 'u':\r | |
337 | if (longflag)\r | |
338 | sprintf(s, "%lu",\r | |
339 | va_arg(vargs, unsigned long));\r | |
340 | #ifdef HAVE_LONG_LONG\r | |
341 | else if (longlongflag)\r | |
342 | sprintf(s, "%" PY_FORMAT_LONG_LONG "u",\r | |
343 | va_arg(vargs, PY_LONG_LONG));\r | |
344 | #endif\r | |
345 | else if (size_tflag)\r | |
346 | sprintf(s, "%" PY_FORMAT_SIZE_T "u",\r | |
347 | va_arg(vargs, size_t));\r | |
348 | else\r | |
349 | sprintf(s, "%u",\r | |
350 | va_arg(vargs, unsigned int));\r | |
351 | s += strlen(s);\r | |
352 | break;\r | |
353 | case 'i':\r | |
354 | sprintf(s, "%i", va_arg(vargs, int));\r | |
355 | s += strlen(s);\r | |
356 | break;\r | |
357 | case 'x':\r | |
358 | sprintf(s, "%x", va_arg(vargs, int));\r | |
359 | s += strlen(s);\r | |
360 | break;\r | |
361 | case 's':\r | |
362 | p = va_arg(vargs, char*);\r | |
363 | i = strlen(p);\r | |
364 | if (n > 0 && i > n)\r | |
365 | i = n;\r | |
366 | Py_MEMCPY(s, p, i);\r | |
367 | s += i;\r | |
368 | break;\r | |
369 | case 'p':\r | |
370 | sprintf(s, "%p", va_arg(vargs, void*));\r | |
371 | /* %p is ill-defined: ensure leading 0x. */\r | |
372 | if (s[1] == 'X')\r | |
373 | s[1] = 'x';\r | |
374 | else if (s[1] != 'x') {\r | |
375 | memmove(s+2, s, strlen(s)+1);\r | |
376 | s[0] = '0';\r | |
377 | s[1] = 'x';\r | |
378 | }\r | |
379 | s += strlen(s);\r | |
380 | break;\r | |
381 | case '%':\r | |
382 | *s++ = '%';\r | |
383 | break;\r | |
384 | default:\r | |
385 | strcpy(s, p);\r | |
386 | s += strlen(s);\r | |
387 | goto end;\r | |
388 | }\r | |
389 | } else\r | |
390 | *s++ = *f;\r | |
391 | }\r | |
392 | \r | |
393 | end:\r | |
394 | if (_PyString_Resize(&string, s - PyString_AS_STRING(string)))\r | |
395 | return NULL;\r | |
396 | return string;\r | |
397 | }\r | |
398 | \r | |
399 | PyObject *\r | |
400 | PyString_FromFormat(const char *format, ...)\r | |
401 | {\r | |
402 | PyObject* ret;\r | |
403 | va_list vargs;\r | |
404 | \r | |
405 | #ifdef HAVE_STDARG_PROTOTYPES\r | |
406 | va_start(vargs, format);\r | |
407 | #else\r | |
408 | va_start(vargs);\r | |
409 | #endif\r | |
410 | ret = PyString_FromFormatV(format, vargs);\r | |
411 | va_end(vargs);\r | |
412 | return ret;\r | |
413 | }\r | |
414 | \r | |
415 | \r | |
416 | PyObject *PyString_Decode(const char *s,\r | |
417 | Py_ssize_t size,\r | |
418 | const char *encoding,\r | |
419 | const char *errors)\r | |
420 | {\r | |
421 | PyObject *v, *str;\r | |
422 | \r | |
423 | str = PyString_FromStringAndSize(s, size);\r | |
424 | if (str == NULL)\r | |
425 | return NULL;\r | |
426 | v = PyString_AsDecodedString(str, encoding, errors);\r | |
427 | Py_DECREF(str);\r | |
428 | return v;\r | |
429 | }\r | |
430 | \r | |
431 | PyObject *PyString_AsDecodedObject(PyObject *str,\r | |
432 | const char *encoding,\r | |
433 | const char *errors)\r | |
434 | {\r | |
435 | PyObject *v;\r | |
436 | \r | |
437 | if (!PyString_Check(str)) {\r | |
438 | PyErr_BadArgument();\r | |
439 | goto onError;\r | |
440 | }\r | |
441 | \r | |
442 | if (encoding == NULL) {\r | |
443 | #ifdef Py_USING_UNICODE\r | |
444 | encoding = PyUnicode_GetDefaultEncoding();\r | |
445 | #else\r | |
446 | PyErr_SetString(PyExc_ValueError, "no encoding specified");\r | |
447 | goto onError;\r | |
448 | #endif\r | |
449 | }\r | |
450 | \r | |
451 | /* Decode via the codec registry */\r | |
452 | v = PyCodec_Decode(str, encoding, errors);\r | |
453 | if (v == NULL)\r | |
454 | goto onError;\r | |
455 | \r | |
456 | return v;\r | |
457 | \r | |
458 | onError:\r | |
459 | return NULL;\r | |
460 | }\r | |
461 | \r | |
462 | PyObject *PyString_AsDecodedString(PyObject *str,\r | |
463 | const char *encoding,\r | |
464 | const char *errors)\r | |
465 | {\r | |
466 | PyObject *v;\r | |
467 | \r | |
468 | v = PyString_AsDecodedObject(str, encoding, errors);\r | |
469 | if (v == NULL)\r | |
470 | goto onError;\r | |
471 | \r | |
472 | #ifdef Py_USING_UNICODE\r | |
473 | /* Convert Unicode to a string using the default encoding */\r | |
474 | if (PyUnicode_Check(v)) {\r | |
475 | PyObject *temp = v;\r | |
476 | v = PyUnicode_AsEncodedString(v, NULL, NULL);\r | |
477 | Py_DECREF(temp);\r | |
478 | if (v == NULL)\r | |
479 | goto onError;\r | |
480 | }\r | |
481 | #endif\r | |
482 | if (!PyString_Check(v)) {\r | |
483 | PyErr_Format(PyExc_TypeError,\r | |
484 | "decoder did not return a string object (type=%.400s)",\r | |
485 | Py_TYPE(v)->tp_name);\r | |
486 | Py_DECREF(v);\r | |
487 | goto onError;\r | |
488 | }\r | |
489 | \r | |
490 | return v;\r | |
491 | \r | |
492 | onError:\r | |
493 | return NULL;\r | |
494 | }\r | |
495 | \r | |
496 | PyObject *PyString_Encode(const char *s,\r | |
497 | Py_ssize_t size,\r | |
498 | const char *encoding,\r | |
499 | const char *errors)\r | |
500 | {\r | |
501 | PyObject *v, *str;\r | |
502 | \r | |
503 | str = PyString_FromStringAndSize(s, size);\r | |
504 | if (str == NULL)\r | |
505 | return NULL;\r | |
506 | v = PyString_AsEncodedString(str, encoding, errors);\r | |
507 | Py_DECREF(str);\r | |
508 | return v;\r | |
509 | }\r | |
510 | \r | |
511 | PyObject *PyString_AsEncodedObject(PyObject *str,\r | |
512 | const char *encoding,\r | |
513 | const char *errors)\r | |
514 | {\r | |
515 | PyObject *v;\r | |
516 | \r | |
517 | if (!PyString_Check(str)) {\r | |
518 | PyErr_BadArgument();\r | |
519 | goto onError;\r | |
520 | }\r | |
521 | \r | |
522 | if (encoding == NULL) {\r | |
523 | #ifdef Py_USING_UNICODE\r | |
524 | encoding = PyUnicode_GetDefaultEncoding();\r | |
525 | #else\r | |
526 | PyErr_SetString(PyExc_ValueError, "no encoding specified");\r | |
527 | goto onError;\r | |
528 | #endif\r | |
529 | }\r | |
530 | \r | |
531 | /* Encode via the codec registry */\r | |
532 | v = PyCodec_Encode(str, encoding, errors);\r | |
533 | if (v == NULL)\r | |
534 | goto onError;\r | |
535 | \r | |
536 | return v;\r | |
537 | \r | |
538 | onError:\r | |
539 | return NULL;\r | |
540 | }\r | |
541 | \r | |
542 | PyObject *PyString_AsEncodedString(PyObject *str,\r | |
543 | const char *encoding,\r | |
544 | const char *errors)\r | |
545 | {\r | |
546 | PyObject *v;\r | |
547 | \r | |
548 | v = PyString_AsEncodedObject(str, encoding, errors);\r | |
549 | if (v == NULL)\r | |
550 | goto onError;\r | |
551 | \r | |
552 | #ifdef Py_USING_UNICODE\r | |
553 | /* Convert Unicode to a string using the default encoding */\r | |
554 | if (PyUnicode_Check(v)) {\r | |
555 | PyObject *temp = v;\r | |
556 | v = PyUnicode_AsEncodedString(v, NULL, NULL);\r | |
557 | Py_DECREF(temp);\r | |
558 | if (v == NULL)\r | |
559 | goto onError;\r | |
560 | }\r | |
561 | #endif\r | |
562 | if (!PyString_Check(v)) {\r | |
563 | PyErr_Format(PyExc_TypeError,\r | |
564 | "encoder did not return a string object (type=%.400s)",\r | |
565 | Py_TYPE(v)->tp_name);\r | |
566 | Py_DECREF(v);\r | |
567 | goto onError;\r | |
568 | }\r | |
569 | \r | |
570 | return v;\r | |
571 | \r | |
572 | onError:\r | |
573 | return NULL;\r | |
574 | }\r | |
575 | \r | |
576 | static void\r | |
577 | string_dealloc(PyObject *op)\r | |
578 | {\r | |
579 | switch (PyString_CHECK_INTERNED(op)) {\r | |
580 | case SSTATE_NOT_INTERNED:\r | |
581 | break;\r | |
582 | \r | |
583 | case SSTATE_INTERNED_MORTAL:\r | |
584 | /* revive dead object temporarily for DelItem */\r | |
585 | Py_REFCNT(op) = 3;\r | |
586 | if (PyDict_DelItem(interned, op) != 0)\r | |
587 | Py_FatalError(\r | |
588 | "deletion of interned string failed");\r | |
589 | break;\r | |
590 | \r | |
591 | case SSTATE_INTERNED_IMMORTAL:\r | |
592 | Py_FatalError("Immortal interned string died.");\r | |
593 | \r | |
594 | default:\r | |
595 | Py_FatalError("Inconsistent interned string state.");\r | |
596 | }\r | |
597 | Py_TYPE(op)->tp_free(op);\r | |
598 | }\r | |
599 | \r | |
600 | /* Unescape a backslash-escaped string. If unicode is non-zero,\r | |
601 | the string is a u-literal. If recode_encoding is non-zero,\r | |
602 | the string is UTF-8 encoded and should be re-encoded in the\r | |
603 | specified encoding. */\r | |
604 | \r | |
605 | PyObject *PyString_DecodeEscape(const char *s,\r | |
606 | Py_ssize_t len,\r | |
607 | const char *errors,\r | |
608 | Py_ssize_t unicode,\r | |
609 | const char *recode_encoding)\r | |
610 | {\r | |
611 | int c;\r | |
612 | char *p, *buf;\r | |
613 | const char *end;\r | |
614 | PyObject *v;\r | |
615 | Py_ssize_t newlen = recode_encoding ? 4*len:len;\r | |
616 | v = PyString_FromStringAndSize((char *)NULL, newlen);\r | |
617 | if (v == NULL)\r | |
618 | return NULL;\r | |
619 | p = buf = PyString_AsString(v);\r | |
620 | end = s + len;\r | |
621 | while (s < end) {\r | |
622 | if (*s != '\\') {\r | |
623 | non_esc:\r | |
624 | #ifdef Py_USING_UNICODE\r | |
625 | if (recode_encoding && (*s & 0x80)) {\r | |
626 | PyObject *u, *w;\r | |
627 | char *r;\r | |
628 | const char* t;\r | |
629 | Py_ssize_t rn;\r | |
630 | t = s;\r | |
631 | /* Decode non-ASCII bytes as UTF-8. */\r | |
632 | while (t < end && (*t & 0x80)) t++;\r | |
633 | u = PyUnicode_DecodeUTF8(s, t - s, errors);\r | |
634 | if(!u) goto failed;\r | |
635 | \r | |
636 | /* Recode them in target encoding. */\r | |
637 | w = PyUnicode_AsEncodedString(\r | |
638 | u, recode_encoding, errors);\r | |
639 | Py_DECREF(u);\r | |
640 | if (!w) goto failed;\r | |
641 | \r | |
642 | /* Append bytes to output buffer. */\r | |
643 | assert(PyString_Check(w));\r | |
644 | r = PyString_AS_STRING(w);\r | |
645 | rn = PyString_GET_SIZE(w);\r | |
646 | Py_MEMCPY(p, r, rn);\r | |
647 | p += rn;\r | |
648 | Py_DECREF(w);\r | |
649 | s = t;\r | |
650 | } else {\r | |
651 | *p++ = *s++;\r | |
652 | }\r | |
653 | #else\r | |
654 | *p++ = *s++;\r | |
655 | #endif\r | |
656 | continue;\r | |
657 | }\r | |
658 | s++;\r | |
659 | if (s==end) {\r | |
660 | PyErr_SetString(PyExc_ValueError,\r | |
661 | "Trailing \\ in string");\r | |
662 | goto failed;\r | |
663 | }\r | |
664 | switch (*s++) {\r | |
665 | /* XXX This assumes ASCII! */\r | |
666 | case '\n': break;\r | |
667 | case '\\': *p++ = '\\'; break;\r | |
668 | case '\'': *p++ = '\''; break;\r | |
669 | case '\"': *p++ = '\"'; break;\r | |
670 | case 'b': *p++ = '\b'; break;\r | |
671 | case 'f': *p++ = '\014'; break; /* FF */\r | |
672 | case 't': *p++ = '\t'; break;\r | |
673 | case 'n': *p++ = '\n'; break;\r | |
674 | case 'r': *p++ = '\r'; break;\r | |
675 | case 'v': *p++ = '\013'; break; /* VT */\r | |
676 | case 'a': *p++ = '\007'; break; /* BEL, not classic C */\r | |
677 | case '0': case '1': case '2': case '3':\r | |
678 | case '4': case '5': case '6': case '7':\r | |
679 | c = s[-1] - '0';\r | |
680 | if (s < end && '0' <= *s && *s <= '7') {\r | |
681 | c = (c<<3) + *s++ - '0';\r | |
682 | if (s < end && '0' <= *s && *s <= '7')\r | |
683 | c = (c<<3) + *s++ - '0';\r | |
684 | }\r | |
685 | *p++ = c;\r | |
686 | break;\r | |
687 | case 'x':\r | |
688 | if (s+1 < end &&\r | |
689 | isxdigit(Py_CHARMASK(s[0])) &&\r | |
690 | isxdigit(Py_CHARMASK(s[1])))\r | |
691 | {\r | |
692 | unsigned int x = 0;\r | |
693 | c = Py_CHARMASK(*s);\r | |
694 | s++;\r | |
695 | if (isdigit(c))\r | |
696 | x = c - '0';\r | |
697 | else if (islower(c))\r | |
698 | x = 10 + c - 'a';\r | |
699 | else\r | |
700 | x = 10 + c - 'A';\r | |
701 | x = x << 4;\r | |
702 | c = Py_CHARMASK(*s);\r | |
703 | s++;\r | |
704 | if (isdigit(c))\r | |
705 | x += c - '0';\r | |
706 | else if (islower(c))\r | |
707 | x += 10 + c - 'a';\r | |
708 | else\r | |
709 | x += 10 + c - 'A';\r | |
710 | *p++ = x;\r | |
711 | break;\r | |
712 | }\r | |
713 | if (!errors || strcmp(errors, "strict") == 0) {\r | |
714 | PyErr_SetString(PyExc_ValueError,\r | |
715 | "invalid \\x escape");\r | |
716 | goto failed;\r | |
717 | }\r | |
718 | if (strcmp(errors, "replace") == 0) {\r | |
719 | *p++ = '?';\r | |
720 | } else if (strcmp(errors, "ignore") == 0)\r | |
721 | /* do nothing */;\r | |
722 | else {\r | |
723 | PyErr_Format(PyExc_ValueError,\r | |
724 | "decoding error; "\r | |
725 | "unknown error handling code: %.400s",\r | |
726 | errors);\r | |
727 | goto failed;\r | |
728 | }\r | |
729 | /* skip \x */\r | |
730 | if (s < end && isxdigit(Py_CHARMASK(s[0])))\r | |
731 | s++; /* and a hexdigit */\r | |
732 | break;\r | |
733 | #ifndef Py_USING_UNICODE\r | |
734 | case 'u':\r | |
735 | case 'U':\r | |
736 | case 'N':\r | |
737 | if (unicode) {\r | |
738 | PyErr_SetString(PyExc_ValueError,\r | |
739 | "Unicode escapes not legal "\r | |
740 | "when Unicode disabled");\r | |
741 | goto failed;\r | |
742 | }\r | |
743 | #endif\r | |
744 | default:\r | |
745 | *p++ = '\\';\r | |
746 | s--;\r | |
747 | goto non_esc; /* an arbitrary number of unescaped\r | |
748 | UTF-8 bytes may follow. */\r | |
749 | }\r | |
750 | }\r | |
751 | if (p-buf < newlen)\r | |
752 | _PyString_Resize(&v, p - buf); /* v is cleared on error */\r | |
753 | return v;\r | |
754 | failed:\r | |
755 | Py_DECREF(v);\r | |
756 | return NULL;\r | |
757 | }\r | |
758 | \r | |
759 | /* -------------------------------------------------------------------- */\r | |
760 | /* object api */\r | |
761 | \r | |
762 | static Py_ssize_t\r | |
763 | string_getsize(register PyObject *op)\r | |
764 | {\r | |
765 | char *s;\r | |
766 | Py_ssize_t len;\r | |
767 | if (PyString_AsStringAndSize(op, &s, &len))\r | |
768 | return -1;\r | |
769 | return len;\r | |
770 | }\r | |
771 | \r | |
772 | static /*const*/ char *\r | |
773 | string_getbuffer(register PyObject *op)\r | |
774 | {\r | |
775 | char *s;\r | |
776 | Py_ssize_t len;\r | |
777 | if (PyString_AsStringAndSize(op, &s, &len))\r | |
778 | return NULL;\r | |
779 | return s;\r | |
780 | }\r | |
781 | \r | |
782 | Py_ssize_t\r | |
783 | PyString_Size(register PyObject *op)\r | |
784 | {\r | |
785 | if (!PyString_Check(op))\r | |
786 | return string_getsize(op);\r | |
787 | return Py_SIZE(op);\r | |
788 | }\r | |
789 | \r | |
790 | /*const*/ char *\r | |
791 | PyString_AsString(register PyObject *op)\r | |
792 | {\r | |
793 | if (!PyString_Check(op))\r | |
794 | return string_getbuffer(op);\r | |
795 | return ((PyStringObject *)op) -> ob_sval;\r | |
796 | }\r | |
797 | \r | |
798 | int\r | |
799 | PyString_AsStringAndSize(register PyObject *obj,\r | |
800 | register char **s,\r | |
801 | register Py_ssize_t *len)\r | |
802 | {\r | |
803 | if (s == NULL) {\r | |
804 | PyErr_BadInternalCall();\r | |
805 | return -1;\r | |
806 | }\r | |
807 | \r | |
808 | if (!PyString_Check(obj)) {\r | |
809 | #ifdef Py_USING_UNICODE\r | |
810 | if (PyUnicode_Check(obj)) {\r | |
811 | obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);\r | |
812 | if (obj == NULL)\r | |
813 | return -1;\r | |
814 | }\r | |
815 | else\r | |
816 | #endif\r | |
817 | {\r | |
818 | PyErr_Format(PyExc_TypeError,\r | |
819 | "expected string or Unicode object, "\r | |
820 | "%.200s found", Py_TYPE(obj)->tp_name);\r | |
821 | return -1;\r | |
822 | }\r | |
823 | }\r | |
824 | \r | |
825 | *s = PyString_AS_STRING(obj);\r | |
826 | if (len != NULL)\r | |
827 | *len = PyString_GET_SIZE(obj);\r | |
828 | else if (strlen(*s) != (size_t)PyString_GET_SIZE(obj)) {\r | |
829 | PyErr_SetString(PyExc_TypeError,\r | |
830 | "expected string without null bytes");\r | |
831 | return -1;\r | |
832 | }\r | |
833 | return 0;\r | |
834 | }\r | |
835 | \r | |
836 | /* -------------------------------------------------------------------- */\r | |
837 | /* Methods */\r | |
838 | \r | |
839 | #include "stringlib/stringdefs.h"\r | |
840 | #include "stringlib/fastsearch.h"\r | |
841 | \r | |
842 | #include "stringlib/count.h"\r | |
843 | #include "stringlib/find.h"\r | |
844 | #include "stringlib/partition.h"\r | |
845 | #include "stringlib/split.h"\r | |
846 | \r | |
847 | #define _Py_InsertThousandsGrouping _PyString_InsertThousandsGrouping\r | |
848 | #include "stringlib/localeutil.h"\r | |
849 | \r | |
850 | \r | |
851 | \r | |
852 | static int\r | |
853 | string_print(PyStringObject *op, FILE *fp, int flags)\r | |
854 | {\r | |
855 | Py_ssize_t i, str_len;\r | |
856 | char c;\r | |
857 | int quote;\r | |
858 | \r | |
859 | /* XXX Ought to check for interrupts when writing long strings */\r | |
860 | if (! PyString_CheckExact(op)) {\r | |
861 | int ret;\r | |
862 | /* A str subclass may have its own __str__ method. */\r | |
863 | op = (PyStringObject *) PyObject_Str((PyObject *)op);\r | |
864 | if (op == NULL)\r | |
865 | return -1;\r | |
866 | ret = string_print(op, fp, flags);\r | |
867 | Py_DECREF(op);\r | |
868 | return ret;\r | |
869 | }\r | |
870 | if (flags & Py_PRINT_RAW) {\r | |
871 | char *data = op->ob_sval;\r | |
872 | Py_ssize_t size = Py_SIZE(op);\r | |
873 | Py_BEGIN_ALLOW_THREADS\r | |
874 | while (size > INT_MAX) {\r | |
875 | /* Very long strings cannot be written atomically.\r | |
876 | * But don't write exactly INT_MAX bytes at a time\r | |
877 | * to avoid memory aligment issues.\r | |
878 | */\r | |
879 | const int chunk_size = INT_MAX & ~0x3FFF;\r | |
880 | fwrite(data, 1, chunk_size, fp);\r | |
881 | data += chunk_size;\r | |
882 | size -= chunk_size;\r | |
883 | }\r | |
884 | #ifdef __VMS\r | |
885 | if (size) fwrite(data, (size_t)size, 1, fp);\r | |
886 | #else\r | |
887 | fwrite(data, 1, (size_t)size, fp);\r | |
888 | #endif\r | |
889 | Py_END_ALLOW_THREADS\r | |
890 | return 0;\r | |
891 | }\r | |
892 | \r | |
893 | /* figure out which quote to use; single is preferred */\r | |
894 | quote = '\'';\r | |
895 | if (memchr(op->ob_sval, '\'', Py_SIZE(op)) &&\r | |
896 | !memchr(op->ob_sval, '"', Py_SIZE(op)))\r | |
897 | quote = '"';\r | |
898 | \r | |
899 | str_len = Py_SIZE(op);\r | |
900 | Py_BEGIN_ALLOW_THREADS\r | |
901 | fputc(quote, fp);\r | |
902 | for (i = 0; i < str_len; i++) {\r | |
903 | /* Since strings are immutable and the caller should have a\r | |
904 | reference, accessing the interal buffer should not be an issue\r | |
905 | with the GIL released. */\r | |
906 | c = op->ob_sval[i];\r | |
907 | if (c == quote || c == '\\')\r | |
908 | fprintf(fp, "\\%c", c);\r | |
909 | else if (c == '\t')\r | |
910 | fprintf(fp, "\\t");\r | |
911 | else if (c == '\n')\r | |
912 | fprintf(fp, "\\n");\r | |
913 | else if (c == '\r')\r | |
914 | fprintf(fp, "\\r");\r | |
915 | else if (c < ' ' || c >= 0x7f)\r | |
916 | fprintf(fp, "\\x%02x", c & 0xff);\r | |
917 | else\r | |
918 | fputc(c, fp);\r | |
919 | }\r | |
920 | fputc(quote, fp);\r | |
921 | Py_END_ALLOW_THREADS\r | |
922 | return 0;\r | |
923 | }\r | |
924 | \r | |
925 | PyObject *\r | |
926 | PyString_Repr(PyObject *obj, int smartquotes)\r | |
927 | {\r | |
928 | register PyStringObject* op = (PyStringObject*) obj;\r | |
929 | size_t newsize;\r | |
930 | PyObject *v;\r | |
931 | if (Py_SIZE(op) > (PY_SSIZE_T_MAX - 2)/4) {\r | |
932 | PyErr_SetString(PyExc_OverflowError,\r | |
933 | "string is too large to make repr");\r | |
934 | return NULL;\r | |
935 | }\r | |
936 | newsize = 2 + 4*Py_SIZE(op);\r | |
937 | v = PyString_FromStringAndSize((char *)NULL, newsize);\r | |
938 | if (v == NULL) {\r | |
939 | return NULL;\r | |
940 | }\r | |
941 | else {\r | |
942 | register Py_ssize_t i;\r | |
943 | register char c;\r | |
944 | register char *p;\r | |
945 | int quote;\r | |
946 | \r | |
947 | /* figure out which quote to use; single is preferred */\r | |
948 | quote = '\'';\r | |
949 | if (smartquotes &&\r | |
950 | memchr(op->ob_sval, '\'', Py_SIZE(op)) &&\r | |
951 | !memchr(op->ob_sval, '"', Py_SIZE(op)))\r | |
952 | quote = '"';\r | |
953 | \r | |
954 | p = PyString_AS_STRING(v);\r | |
955 | *p++ = quote;\r | |
956 | for (i = 0; i < Py_SIZE(op); i++) {\r | |
957 | /* There's at least enough room for a hex escape\r | |
958 | and a closing quote. */\r | |
959 | assert(newsize - (p - PyString_AS_STRING(v)) >= 5);\r | |
960 | c = op->ob_sval[i];\r | |
961 | if (c == quote || c == '\\')\r | |
962 | *p++ = '\\', *p++ = c;\r | |
963 | else if (c == '\t')\r | |
964 | *p++ = '\\', *p++ = 't';\r | |
965 | else if (c == '\n')\r | |
966 | *p++ = '\\', *p++ = 'n';\r | |
967 | else if (c == '\r')\r | |
968 | *p++ = '\\', *p++ = 'r';\r | |
969 | else if (c < ' ' || c >= 0x7f) {\r | |
970 | /* For performance, we don't want to call\r | |
971 | PyOS_snprintf here (extra layers of\r | |
972 | function call). */\r | |
973 | sprintf(p, "\\x%02x", c & 0xff);\r | |
974 | p += 4;\r | |
975 | }\r | |
976 | else\r | |
977 | *p++ = c;\r | |
978 | }\r | |
979 | assert(newsize - (p - PyString_AS_STRING(v)) >= 1);\r | |
980 | *p++ = quote;\r | |
981 | *p = '\0';\r | |
982 | if (_PyString_Resize(&v, (p - PyString_AS_STRING(v))))\r | |
983 | return NULL;\r | |
984 | return v;\r | |
985 | }\r | |
986 | }\r | |
987 | \r | |
988 | static PyObject *\r | |
989 | string_repr(PyObject *op)\r | |
990 | {\r | |
991 | return PyString_Repr(op, 1);\r | |
992 | }\r | |
993 | \r | |
994 | static PyObject *\r | |
995 | string_str(PyObject *s)\r | |
996 | {\r | |
997 | assert(PyString_Check(s));\r | |
998 | if (PyString_CheckExact(s)) {\r | |
999 | Py_INCREF(s);\r | |
1000 | return s;\r | |
1001 | }\r | |
1002 | else {\r | |
1003 | /* Subtype -- return genuine string with the same value. */\r | |
1004 | PyStringObject *t = (PyStringObject *) s;\r | |
1005 | return PyString_FromStringAndSize(t->ob_sval, Py_SIZE(t));\r | |
1006 | }\r | |
1007 | }\r | |
1008 | \r | |
1009 | static Py_ssize_t\r | |
1010 | string_length(PyStringObject *a)\r | |
1011 | {\r | |
1012 | return Py_SIZE(a);\r | |
1013 | }\r | |
1014 | \r | |
1015 | static PyObject *\r | |
1016 | string_concat(register PyStringObject *a, register PyObject *bb)\r | |
1017 | {\r | |
1018 | register Py_ssize_t size;\r | |
1019 | register PyStringObject *op;\r | |
1020 | if (!PyString_Check(bb)) {\r | |
1021 | #ifdef Py_USING_UNICODE\r | |
1022 | if (PyUnicode_Check(bb))\r | |
1023 | return PyUnicode_Concat((PyObject *)a, bb);\r | |
1024 | #endif\r | |
1025 | if (PyByteArray_Check(bb))\r | |
1026 | return PyByteArray_Concat((PyObject *)a, bb);\r | |
1027 | PyErr_Format(PyExc_TypeError,\r | |
1028 | "cannot concatenate 'str' and '%.200s' objects",\r | |
1029 | Py_TYPE(bb)->tp_name);\r | |
1030 | return NULL;\r | |
1031 | }\r | |
1032 | #define b ((PyStringObject *)bb)\r | |
1033 | /* Optimize cases with empty left or right operand */\r | |
1034 | if ((Py_SIZE(a) == 0 || Py_SIZE(b) == 0) &&\r | |
1035 | PyString_CheckExact(a) && PyString_CheckExact(b)) {\r | |
1036 | if (Py_SIZE(a) == 0) {\r | |
1037 | Py_INCREF(bb);\r | |
1038 | return bb;\r | |
1039 | }\r | |
1040 | Py_INCREF(a);\r | |
1041 | return (PyObject *)a;\r | |
1042 | }\r | |
1043 | size = Py_SIZE(a) + Py_SIZE(b);\r | |
1044 | /* Check that string sizes are not negative, to prevent an\r | |
1045 | overflow in cases where we are passed incorrectly-created\r | |
1046 | strings with negative lengths (due to a bug in other code).\r | |
1047 | */\r | |
1048 | if (Py_SIZE(a) < 0 || Py_SIZE(b) < 0 ||\r | |
1049 | Py_SIZE(a) > PY_SSIZE_T_MAX - Py_SIZE(b)) {\r | |
1050 | PyErr_SetString(PyExc_OverflowError,\r | |
1051 | "strings are too large to concat");\r | |
1052 | return NULL;\r | |
1053 | }\r | |
1054 | \r | |
1055 | /* Inline PyObject_NewVar */\r | |
1056 | if (size > PY_SSIZE_T_MAX - PyStringObject_SIZE) {\r | |
1057 | PyErr_SetString(PyExc_OverflowError,\r | |
1058 | "strings are too large to concat");\r | |
1059 | return NULL;\r | |
1060 | }\r | |
1061 | op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + size);\r | |
1062 | if (op == NULL)\r | |
1063 | return PyErr_NoMemory();\r | |
1064 | PyObject_INIT_VAR(op, &PyString_Type, size);\r | |
1065 | op->ob_shash = -1;\r | |
1066 | op->ob_sstate = SSTATE_NOT_INTERNED;\r | |
1067 | Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));\r | |
1068 | Py_MEMCPY(op->ob_sval + Py_SIZE(a), b->ob_sval, Py_SIZE(b));\r | |
1069 | op->ob_sval[size] = '\0';\r | |
1070 | return (PyObject *) op;\r | |
1071 | #undef b\r | |
1072 | }\r | |
1073 | \r | |
1074 | static PyObject *\r | |
1075 | string_repeat(register PyStringObject *a, register Py_ssize_t n)\r | |
1076 | {\r | |
1077 | register Py_ssize_t i;\r | |
1078 | register Py_ssize_t j;\r | |
1079 | register Py_ssize_t size;\r | |
1080 | register PyStringObject *op;\r | |
1081 | size_t nbytes;\r | |
1082 | if (n < 0)\r | |
1083 | n = 0;\r | |
1084 | /* watch out for overflows: the size can overflow int,\r | |
1085 | * and the # of bytes needed can overflow size_t\r | |
1086 | */\r | |
1087 | size = Py_SIZE(a) * n;\r | |
1088 | if (n && size / n != Py_SIZE(a)) {\r | |
1089 | PyErr_SetString(PyExc_OverflowError,\r | |
1090 | "repeated string is too long");\r | |
1091 | return NULL;\r | |
1092 | }\r | |
1093 | if (size == Py_SIZE(a) && PyString_CheckExact(a)) {\r | |
1094 | Py_INCREF(a);\r | |
1095 | return (PyObject *)a;\r | |
1096 | }\r | |
1097 | nbytes = (size_t)size;\r | |
1098 | if (nbytes + PyStringObject_SIZE <= nbytes) {\r | |
1099 | PyErr_SetString(PyExc_OverflowError,\r | |
1100 | "repeated string is too long");\r | |
1101 | return NULL;\r | |
1102 | }\r | |
1103 | op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + nbytes);\r | |
1104 | if (op == NULL)\r | |
1105 | return PyErr_NoMemory();\r | |
1106 | PyObject_INIT_VAR(op, &PyString_Type, size);\r | |
1107 | op->ob_shash = -1;\r | |
1108 | op->ob_sstate = SSTATE_NOT_INTERNED;\r | |
1109 | op->ob_sval[size] = '\0';\r | |
1110 | if (Py_SIZE(a) == 1 && n > 0) {\r | |
1111 | memset(op->ob_sval, a->ob_sval[0] , n);\r | |
1112 | return (PyObject *) op;\r | |
1113 | }\r | |
1114 | i = 0;\r | |
1115 | if (i < size) {\r | |
1116 | Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));\r | |
1117 | i = Py_SIZE(a);\r | |
1118 | }\r | |
1119 | while (i < size) {\r | |
1120 | j = (i <= size-i) ? i : size-i;\r | |
1121 | Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);\r | |
1122 | i += j;\r | |
1123 | }\r | |
1124 | return (PyObject *) op;\r | |
1125 | }\r | |
1126 | \r | |
1127 | /* String slice a[i:j] consists of characters a[i] ... a[j-1] */\r | |
1128 | \r | |
1129 | static PyObject *\r | |
1130 | string_slice(register PyStringObject *a, register Py_ssize_t i,\r | |
1131 | register Py_ssize_t j)\r | |
1132 | /* j -- may be negative! */\r | |
1133 | {\r | |
1134 | if (i < 0)\r | |
1135 | i = 0;\r | |
1136 | if (j < 0)\r | |
1137 | j = 0; /* Avoid signed/unsigned bug in next line */\r | |
1138 | if (j > Py_SIZE(a))\r | |
1139 | j = Py_SIZE(a);\r | |
1140 | if (i == 0 && j == Py_SIZE(a) && PyString_CheckExact(a)) {\r | |
1141 | /* It's the same as a */\r | |
1142 | Py_INCREF(a);\r | |
1143 | return (PyObject *)a;\r | |
1144 | }\r | |
1145 | if (j < i)\r | |
1146 | j = i;\r | |
1147 | return PyString_FromStringAndSize(a->ob_sval + i, j-i);\r | |
1148 | }\r | |
1149 | \r | |
1150 | static int\r | |
1151 | string_contains(PyObject *str_obj, PyObject *sub_obj)\r | |
1152 | {\r | |
1153 | if (!PyString_CheckExact(sub_obj)) {\r | |
1154 | #ifdef Py_USING_UNICODE\r | |
1155 | if (PyUnicode_Check(sub_obj))\r | |
1156 | return PyUnicode_Contains(str_obj, sub_obj);\r | |
1157 | #endif\r | |
1158 | if (!PyString_Check(sub_obj)) {\r | |
1159 | PyErr_Format(PyExc_TypeError,\r | |
1160 | "'in <string>' requires string as left operand, "\r | |
1161 | "not %.200s", Py_TYPE(sub_obj)->tp_name);\r | |
1162 | return -1;\r | |
1163 | }\r | |
1164 | }\r | |
1165 | \r | |
1166 | return stringlib_contains_obj(str_obj, sub_obj);\r | |
1167 | }\r | |
1168 | \r | |
1169 | static PyObject *\r | |
1170 | string_item(PyStringObject *a, register Py_ssize_t i)\r | |
1171 | {\r | |
1172 | char pchar;\r | |
1173 | PyObject *v;\r | |
1174 | if (i < 0 || i >= Py_SIZE(a)) {\r | |
1175 | PyErr_SetString(PyExc_IndexError, "string index out of range");\r | |
1176 | return NULL;\r | |
1177 | }\r | |
1178 | pchar = a->ob_sval[i];\r | |
1179 | v = (PyObject *)characters[pchar & UCHAR_MAX];\r | |
1180 | if (v == NULL)\r | |
1181 | v = PyString_FromStringAndSize(&pchar, 1);\r | |
1182 | else {\r | |
1183 | #ifdef COUNT_ALLOCS\r | |
1184 | one_strings++;\r | |
1185 | #endif\r | |
1186 | Py_INCREF(v);\r | |
1187 | }\r | |
1188 | return v;\r | |
1189 | }\r | |
1190 | \r | |
1191 | static PyObject*\r | |
1192 | string_richcompare(PyStringObject *a, PyStringObject *b, int op)\r | |
1193 | {\r | |
1194 | int c;\r | |
1195 | Py_ssize_t len_a, len_b;\r | |
1196 | Py_ssize_t min_len;\r | |
1197 | PyObject *result;\r | |
1198 | \r | |
1199 | /* Make sure both arguments are strings. */\r | |
1200 | if (!(PyString_Check(a) && PyString_Check(b))) {\r | |
1201 | result = Py_NotImplemented;\r | |
1202 | goto out;\r | |
1203 | }\r | |
1204 | if (a == b) {\r | |
1205 | switch (op) {\r | |
1206 | case Py_EQ:case Py_LE:case Py_GE:\r | |
1207 | result = Py_True;\r | |
1208 | goto out;\r | |
1209 | case Py_NE:case Py_LT:case Py_GT:\r | |
1210 | result = Py_False;\r | |
1211 | goto out;\r | |
1212 | }\r | |
1213 | }\r | |
1214 | if (op == Py_EQ) {\r | |
1215 | /* Supporting Py_NE here as well does not save\r | |
1216 | much time, since Py_NE is rarely used. */\r | |
1217 | if (Py_SIZE(a) == Py_SIZE(b)\r | |
1218 | && (a->ob_sval[0] == b->ob_sval[0]\r | |
1219 | && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0)) {\r | |
1220 | result = Py_True;\r | |
1221 | } else {\r | |
1222 | result = Py_False;\r | |
1223 | }\r | |
1224 | goto out;\r | |
1225 | }\r | |
1226 | len_a = Py_SIZE(a); len_b = Py_SIZE(b);\r | |
1227 | min_len = (len_a < len_b) ? len_a : len_b;\r | |
1228 | if (min_len > 0) {\r | |
1229 | c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);\r | |
1230 | if (c==0)\r | |
1231 | c = memcmp(a->ob_sval, b->ob_sval, min_len);\r | |
1232 | } else\r | |
1233 | c = 0;\r | |
1234 | if (c == 0)\r | |
1235 | c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;\r | |
1236 | switch (op) {\r | |
1237 | case Py_LT: c = c < 0; break;\r | |
1238 | case Py_LE: c = c <= 0; break;\r | |
1239 | case Py_EQ: assert(0); break; /* unreachable */\r | |
1240 | case Py_NE: c = c != 0; break;\r | |
1241 | case Py_GT: c = c > 0; break;\r | |
1242 | case Py_GE: c = c >= 0; break;\r | |
1243 | default:\r | |
1244 | result = Py_NotImplemented;\r | |
1245 | goto out;\r | |
1246 | }\r | |
1247 | result = c ? Py_True : Py_False;\r | |
1248 | out:\r | |
1249 | Py_INCREF(result);\r | |
1250 | return result;\r | |
1251 | }\r | |
1252 | \r | |
1253 | int\r | |
1254 | _PyString_Eq(PyObject *o1, PyObject *o2)\r | |
1255 | {\r | |
1256 | PyStringObject *a = (PyStringObject*) o1;\r | |
1257 | PyStringObject *b = (PyStringObject*) o2;\r | |
1258 | return Py_SIZE(a) == Py_SIZE(b)\r | |
1259 | && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0;\r | |
1260 | }\r | |
1261 | \r | |
1262 | static long\r | |
1263 | string_hash(PyStringObject *a)\r | |
1264 | {\r | |
1265 | register Py_ssize_t len;\r | |
1266 | register unsigned char *p;\r | |
1267 | register long x;\r | |
1268 | \r | |
1269 | #ifdef Py_DEBUG\r | |
1270 | assert(_Py_HashSecret_Initialized);\r | |
1271 | #endif\r | |
1272 | if (a->ob_shash != -1)\r | |
1273 | return a->ob_shash;\r | |
1274 | len = Py_SIZE(a);\r | |
1275 | /*\r | |
1276 | We make the hash of the empty string be 0, rather than using\r | |
1277 | (prefix ^ suffix), since this slightly obfuscates the hash secret\r | |
1278 | */\r | |
1279 | if (len == 0) {\r | |
1280 | a->ob_shash = 0;\r | |
1281 | return 0;\r | |
1282 | }\r | |
1283 | p = (unsigned char *) a->ob_sval;\r | |
1284 | x = _Py_HashSecret.prefix;\r | |
1285 | x ^= *p << 7;\r | |
1286 | while (--len >= 0)\r | |
1287 | x = (1000003*x) ^ *p++;\r | |
1288 | x ^= Py_SIZE(a);\r | |
1289 | x ^= _Py_HashSecret.suffix;\r | |
1290 | if (x == -1)\r | |
1291 | x = -2;\r | |
1292 | a->ob_shash = x;\r | |
1293 | return x;\r | |
1294 | }\r | |
1295 | \r | |
1296 | static PyObject*\r | |
1297 | string_subscript(PyStringObject* self, PyObject* item)\r | |
1298 | {\r | |
1299 | if (PyIndex_Check(item)) {\r | |
1300 | Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);\r | |
1301 | if (i == -1 && PyErr_Occurred())\r | |
1302 | return NULL;\r | |
1303 | if (i < 0)\r | |
1304 | i += PyString_GET_SIZE(self);\r | |
1305 | return string_item(self, i);\r | |
1306 | }\r | |
1307 | else if (PySlice_Check(item)) {\r | |
1308 | Py_ssize_t start, stop, step, slicelength, cur, i;\r | |
1309 | char* source_buf;\r | |
1310 | char* result_buf;\r | |
1311 | PyObject* result;\r | |
1312 | \r | |
1313 | if (PySlice_GetIndicesEx((PySliceObject*)item,\r | |
1314 | PyString_GET_SIZE(self),\r | |
1315 | &start, &stop, &step, &slicelength) < 0) {\r | |
1316 | return NULL;\r | |
1317 | }\r | |
1318 | \r | |
1319 | if (slicelength <= 0) {\r | |
1320 | return PyString_FromStringAndSize("", 0);\r | |
1321 | }\r | |
1322 | else if (start == 0 && step == 1 &&\r | |
1323 | slicelength == PyString_GET_SIZE(self) &&\r | |
1324 | PyString_CheckExact(self)) {\r | |
1325 | Py_INCREF(self);\r | |
1326 | return (PyObject *)self;\r | |
1327 | }\r | |
1328 | else if (step == 1) {\r | |
1329 | return PyString_FromStringAndSize(\r | |
1330 | PyString_AS_STRING(self) + start,\r | |
1331 | slicelength);\r | |
1332 | }\r | |
1333 | else {\r | |
1334 | source_buf = PyString_AsString((PyObject*)self);\r | |
1335 | result_buf = (char *)PyMem_Malloc(slicelength);\r | |
1336 | if (result_buf == NULL)\r | |
1337 | return PyErr_NoMemory();\r | |
1338 | \r | |
1339 | for (cur = start, i = 0; i < slicelength;\r | |
1340 | cur += step, i++) {\r | |
1341 | result_buf[i] = source_buf[cur];\r | |
1342 | }\r | |
1343 | \r | |
1344 | result = PyString_FromStringAndSize(result_buf,\r | |
1345 | slicelength);\r | |
1346 | PyMem_Free(result_buf);\r | |
1347 | return result;\r | |
1348 | }\r | |
1349 | }\r | |
1350 | else {\r | |
1351 | PyErr_Format(PyExc_TypeError,\r | |
1352 | "string indices must be integers, not %.200s",\r | |
1353 | Py_TYPE(item)->tp_name);\r | |
1354 | return NULL;\r | |
1355 | }\r | |
1356 | }\r | |
1357 | \r | |
1358 | static Py_ssize_t\r | |
1359 | string_buffer_getreadbuf(PyStringObject *self, Py_ssize_t index, const void **ptr)\r | |
1360 | {\r | |
1361 | if ( index != 0 ) {\r | |
1362 | PyErr_SetString(PyExc_SystemError,\r | |
1363 | "accessing non-existent string segment");\r | |
1364 | return -1;\r | |
1365 | }\r | |
1366 | *ptr = (void *)self->ob_sval;\r | |
1367 | return Py_SIZE(self);\r | |
1368 | }\r | |
1369 | \r | |
1370 | static Py_ssize_t\r | |
1371 | string_buffer_getwritebuf(PyStringObject *self, Py_ssize_t index, const void **ptr)\r | |
1372 | {\r | |
1373 | PyErr_SetString(PyExc_TypeError,\r | |
1374 | "Cannot use string as modifiable buffer");\r | |
1375 | return -1;\r | |
1376 | }\r | |
1377 | \r | |
1378 | static Py_ssize_t\r | |
1379 | string_buffer_getsegcount(PyStringObject *self, Py_ssize_t *lenp)\r | |
1380 | {\r | |
1381 | if ( lenp )\r | |
1382 | *lenp = Py_SIZE(self);\r | |
1383 | return 1;\r | |
1384 | }\r | |
1385 | \r | |
1386 | static Py_ssize_t\r | |
1387 | string_buffer_getcharbuf(PyStringObject *self, Py_ssize_t index, const char **ptr)\r | |
1388 | {\r | |
1389 | if ( index != 0 ) {\r | |
1390 | PyErr_SetString(PyExc_SystemError,\r | |
1391 | "accessing non-existent string segment");\r | |
1392 | return -1;\r | |
1393 | }\r | |
1394 | *ptr = self->ob_sval;\r | |
1395 | return Py_SIZE(self);\r | |
1396 | }\r | |
1397 | \r | |
1398 | static int\r | |
1399 | string_buffer_getbuffer(PyStringObject *self, Py_buffer *view, int flags)\r | |
1400 | {\r | |
1401 | return PyBuffer_FillInfo(view, (PyObject*)self,\r | |
1402 | (void *)self->ob_sval, Py_SIZE(self),\r | |
1403 | 1, flags);\r | |
1404 | }\r | |
1405 | \r | |
1406 | static PySequenceMethods string_as_sequence = {\r | |
1407 | (lenfunc)string_length, /*sq_length*/\r | |
1408 | (binaryfunc)string_concat, /*sq_concat*/\r | |
1409 | (ssizeargfunc)string_repeat, /*sq_repeat*/\r | |
1410 | (ssizeargfunc)string_item, /*sq_item*/\r | |
1411 | (ssizessizeargfunc)string_slice, /*sq_slice*/\r | |
1412 | 0, /*sq_ass_item*/\r | |
1413 | 0, /*sq_ass_slice*/\r | |
1414 | (objobjproc)string_contains /*sq_contains*/\r | |
1415 | };\r | |
1416 | \r | |
1417 | static PyMappingMethods string_as_mapping = {\r | |
1418 | (lenfunc)string_length,\r | |
1419 | (binaryfunc)string_subscript,\r | |
1420 | 0,\r | |
1421 | };\r | |
1422 | \r | |
1423 | static PyBufferProcs string_as_buffer = {\r | |
1424 | (readbufferproc)string_buffer_getreadbuf,\r | |
1425 | (writebufferproc)string_buffer_getwritebuf,\r | |
1426 | (segcountproc)string_buffer_getsegcount,\r | |
1427 | (charbufferproc)string_buffer_getcharbuf,\r | |
1428 | (getbufferproc)string_buffer_getbuffer,\r | |
1429 | 0, /* XXX */\r | |
1430 | };\r | |
1431 | \r | |
1432 | \r | |
1433 | \r | |
1434 | #define LEFTSTRIP 0\r | |
1435 | #define RIGHTSTRIP 1\r | |
1436 | #define BOTHSTRIP 2\r | |
1437 | \r | |
1438 | /* Arrays indexed by above */\r | |
1439 | static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};\r | |
1440 | \r | |
1441 | #define STRIPNAME(i) (stripformat[i]+3)\r | |
1442 | \r | |
1443 | PyDoc_STRVAR(split__doc__,\r | |
1444 | "S.split([sep [,maxsplit]]) -> list of strings\n\\r | |
1445 | \n\\r | |
1446 | Return a list of the words in the string S, using sep as the\n\\r | |
1447 | delimiter string. If maxsplit is given, at most maxsplit\n\\r | |
1448 | splits are done. If sep is not specified or is None, any\n\\r | |
1449 | whitespace string is a separator and empty strings are removed\n\\r | |
1450 | from the result.");\r | |
1451 | \r | |
1452 | static PyObject *\r | |
1453 | string_split(PyStringObject *self, PyObject *args)\r | |
1454 | {\r | |
1455 | Py_ssize_t len = PyString_GET_SIZE(self), n;\r | |
1456 | Py_ssize_t maxsplit = -1;\r | |
1457 | const char *s = PyString_AS_STRING(self), *sub;\r | |
1458 | PyObject *subobj = Py_None;\r | |
1459 | \r | |
1460 | if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))\r | |
1461 | return NULL;\r | |
1462 | if (maxsplit < 0)\r | |
1463 | maxsplit = PY_SSIZE_T_MAX;\r | |
1464 | if (subobj == Py_None)\r | |
1465 | return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit);\r | |
1466 | if (PyString_Check(subobj)) {\r | |
1467 | sub = PyString_AS_STRING(subobj);\r | |
1468 | n = PyString_GET_SIZE(subobj);\r | |
1469 | }\r | |
1470 | #ifdef Py_USING_UNICODE\r | |
1471 | else if (PyUnicode_Check(subobj))\r | |
1472 | return PyUnicode_Split((PyObject *)self, subobj, maxsplit);\r | |
1473 | #endif\r | |
1474 | else if (PyObject_AsCharBuffer(subobj, &sub, &n))\r | |
1475 | return NULL;\r | |
1476 | \r | |
1477 | return stringlib_split((PyObject*) self, s, len, sub, n, maxsplit);\r | |
1478 | }\r | |
1479 | \r | |
1480 | PyDoc_STRVAR(partition__doc__,\r | |
1481 | "S.partition(sep) -> (head, sep, tail)\n\\r | |
1482 | \n\\r | |
1483 | Search for the separator sep in S, and return the part before it,\n\\r | |
1484 | the separator itself, and the part after it. If the separator is not\n\\r | |
1485 | found, return S and two empty strings.");\r | |
1486 | \r | |
1487 | static PyObject *\r | |
1488 | string_partition(PyStringObject *self, PyObject *sep_obj)\r | |
1489 | {\r | |
1490 | const char *sep;\r | |
1491 | Py_ssize_t sep_len;\r | |
1492 | \r | |
1493 | if (PyString_Check(sep_obj)) {\r | |
1494 | sep = PyString_AS_STRING(sep_obj);\r | |
1495 | sep_len = PyString_GET_SIZE(sep_obj);\r | |
1496 | }\r | |
1497 | #ifdef Py_USING_UNICODE\r | |
1498 | else if (PyUnicode_Check(sep_obj))\r | |
1499 | return PyUnicode_Partition((PyObject *) self, sep_obj);\r | |
1500 | #endif\r | |
1501 | else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))\r | |
1502 | return NULL;\r | |
1503 | \r | |
1504 | return stringlib_partition(\r | |
1505 | (PyObject*) self,\r | |
1506 | PyString_AS_STRING(self), PyString_GET_SIZE(self),\r | |
1507 | sep_obj, sep, sep_len\r | |
1508 | );\r | |
1509 | }\r | |
1510 | \r | |
1511 | PyDoc_STRVAR(rpartition__doc__,\r | |
1512 | "S.rpartition(sep) -> (head, sep, tail)\n\\r | |
1513 | \n\\r | |
1514 | Search for the separator sep in S, starting at the end of S, and return\n\\r | |
1515 | the part before it, the separator itself, and the part after it. If the\n\\r | |
1516 | separator is not found, return two empty strings and S.");\r | |
1517 | \r | |
1518 | static PyObject *\r | |
1519 | string_rpartition(PyStringObject *self, PyObject *sep_obj)\r | |
1520 | {\r | |
1521 | const char *sep;\r | |
1522 | Py_ssize_t sep_len;\r | |
1523 | \r | |
1524 | if (PyString_Check(sep_obj)) {\r | |
1525 | sep = PyString_AS_STRING(sep_obj);\r | |
1526 | sep_len = PyString_GET_SIZE(sep_obj);\r | |
1527 | }\r | |
1528 | #ifdef Py_USING_UNICODE\r | |
1529 | else if (PyUnicode_Check(sep_obj))\r | |
1530 | return PyUnicode_RPartition((PyObject *) self, sep_obj);\r | |
1531 | #endif\r | |
1532 | else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))\r | |
1533 | return NULL;\r | |
1534 | \r | |
1535 | return stringlib_rpartition(\r | |
1536 | (PyObject*) self,\r | |
1537 | PyString_AS_STRING(self), PyString_GET_SIZE(self),\r | |
1538 | sep_obj, sep, sep_len\r | |
1539 | );\r | |
1540 | }\r | |
1541 | \r | |
1542 | PyDoc_STRVAR(rsplit__doc__,\r | |
1543 | "S.rsplit([sep [,maxsplit]]) -> list of strings\n\\r | |
1544 | \n\\r | |
1545 | Return a list of the words in the string S, using sep as the\n\\r | |
1546 | delimiter string, starting at the end of the string and working\n\\r | |
1547 | to the front. If maxsplit is given, at most maxsplit splits are\n\\r | |
1548 | done. If sep is not specified or is None, any whitespace string\n\\r | |
1549 | is a separator.");\r | |
1550 | \r | |
1551 | static PyObject *\r | |
1552 | string_rsplit(PyStringObject *self, PyObject *args)\r | |
1553 | {\r | |
1554 | Py_ssize_t len = PyString_GET_SIZE(self), n;\r | |
1555 | Py_ssize_t maxsplit = -1;\r | |
1556 | const char *s = PyString_AS_STRING(self), *sub;\r | |
1557 | PyObject *subobj = Py_None;\r | |
1558 | \r | |
1559 | if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))\r | |
1560 | return NULL;\r | |
1561 | if (maxsplit < 0)\r | |
1562 | maxsplit = PY_SSIZE_T_MAX;\r | |
1563 | if (subobj == Py_None)\r | |
1564 | return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit);\r | |
1565 | if (PyString_Check(subobj)) {\r | |
1566 | sub = PyString_AS_STRING(subobj);\r | |
1567 | n = PyString_GET_SIZE(subobj);\r | |
1568 | }\r | |
1569 | #ifdef Py_USING_UNICODE\r | |
1570 | else if (PyUnicode_Check(subobj))\r | |
1571 | return PyUnicode_RSplit((PyObject *)self, subobj, maxsplit);\r | |
1572 | #endif\r | |
1573 | else if (PyObject_AsCharBuffer(subobj, &sub, &n))\r | |
1574 | return NULL;\r | |
1575 | \r | |
1576 | return stringlib_rsplit((PyObject*) self, s, len, sub, n, maxsplit);\r | |
1577 | }\r | |
1578 | \r | |
1579 | \r | |
1580 | PyDoc_STRVAR(join__doc__,\r | |
1581 | "S.join(iterable) -> string\n\\r | |
1582 | \n\\r | |
1583 | Return a string which is the concatenation of the strings in the\n\\r | |
1584 | iterable. The separator between elements is S.");\r | |
1585 | \r | |
1586 | static PyObject *\r | |
1587 | string_join(PyStringObject *self, PyObject *orig)\r | |
1588 | {\r | |
1589 | char *sep = PyString_AS_STRING(self);\r | |
1590 | const Py_ssize_t seplen = PyString_GET_SIZE(self);\r | |
1591 | PyObject *res = NULL;\r | |
1592 | char *p;\r | |
1593 | Py_ssize_t seqlen = 0;\r | |
1594 | size_t sz = 0;\r | |
1595 | Py_ssize_t i;\r | |
1596 | PyObject *seq, *item;\r | |
1597 | \r | |
1598 | seq = PySequence_Fast(orig, "can only join an iterable");\r | |
1599 | if (seq == NULL) {\r | |
1600 | return NULL;\r | |
1601 | }\r | |
1602 | \r | |
1603 | seqlen = PySequence_Size(seq);\r | |
1604 | if (seqlen == 0) {\r | |
1605 | Py_DECREF(seq);\r | |
1606 | return PyString_FromString("");\r | |
1607 | }\r | |
1608 | if (seqlen == 1) {\r | |
1609 | item = PySequence_Fast_GET_ITEM(seq, 0);\r | |
1610 | if (PyString_CheckExact(item) || PyUnicode_CheckExact(item)) {\r | |
1611 | Py_INCREF(item);\r | |
1612 | Py_DECREF(seq);\r | |
1613 | return item;\r | |
1614 | }\r | |
1615 | }\r | |
1616 | \r | |
1617 | /* There are at least two things to join, or else we have a subclass\r | |
1618 | * of the builtin types in the sequence.\r | |
1619 | * Do a pre-pass to figure out the total amount of space we'll\r | |
1620 | * need (sz), see whether any argument is absurd, and defer to\r | |
1621 | * the Unicode join if appropriate.\r | |
1622 | */\r | |
1623 | for (i = 0; i < seqlen; i++) {\r | |
1624 | const size_t old_sz = sz;\r | |
1625 | item = PySequence_Fast_GET_ITEM(seq, i);\r | |
1626 | if (!PyString_Check(item)){\r | |
1627 | #ifdef Py_USING_UNICODE\r | |
1628 | if (PyUnicode_Check(item)) {\r | |
1629 | /* Defer to Unicode join.\r | |
1630 | * CAUTION: There's no gurantee that the\r | |
1631 | * original sequence can be iterated over\r | |
1632 | * again, so we must pass seq here.\r | |
1633 | */\r | |
1634 | PyObject *result;\r | |
1635 | result = PyUnicode_Join((PyObject *)self, seq);\r | |
1636 | Py_DECREF(seq);\r | |
1637 | return result;\r | |
1638 | }\r | |
1639 | #endif\r | |
1640 | PyErr_Format(PyExc_TypeError,\r | |
1641 | "sequence item %zd: expected string,"\r | |
1642 | " %.80s found",\r | |
1643 | i, Py_TYPE(item)->tp_name);\r | |
1644 | Py_DECREF(seq);\r | |
1645 | return NULL;\r | |
1646 | }\r | |
1647 | sz += PyString_GET_SIZE(item);\r | |
1648 | if (i != 0)\r | |
1649 | sz += seplen;\r | |
1650 | if (sz < old_sz || sz > PY_SSIZE_T_MAX) {\r | |
1651 | PyErr_SetString(PyExc_OverflowError,\r | |
1652 | "join() result is too long for a Python string");\r | |
1653 | Py_DECREF(seq);\r | |
1654 | return NULL;\r | |
1655 | }\r | |
1656 | }\r | |
1657 | \r | |
1658 | /* Allocate result space. */\r | |
1659 | res = PyString_FromStringAndSize((char*)NULL, sz);\r | |
1660 | if (res == NULL) {\r | |
1661 | Py_DECREF(seq);\r | |
1662 | return NULL;\r | |
1663 | }\r | |
1664 | \r | |
1665 | /* Catenate everything. */\r | |
1666 | p = PyString_AS_STRING(res);\r | |
1667 | for (i = 0; i < seqlen; ++i) {\r | |
1668 | size_t n;\r | |
1669 | item = PySequence_Fast_GET_ITEM(seq, i);\r | |
1670 | n = PyString_GET_SIZE(item);\r | |
1671 | Py_MEMCPY(p, PyString_AS_STRING(item), n);\r | |
1672 | p += n;\r | |
1673 | if (i < seqlen - 1) {\r | |
1674 | Py_MEMCPY(p, sep, seplen);\r | |
1675 | p += seplen;\r | |
1676 | }\r | |
1677 | }\r | |
1678 | \r | |
1679 | Py_DECREF(seq);\r | |
1680 | return res;\r | |
1681 | }\r | |
1682 | \r | |
1683 | PyObject *\r | |
1684 | _PyString_Join(PyObject *sep, PyObject *x)\r | |
1685 | {\r | |
1686 | assert(sep != NULL && PyString_Check(sep));\r | |
1687 | assert(x != NULL);\r | |
1688 | return string_join((PyStringObject *)sep, x);\r | |
1689 | }\r | |
1690 | \r | |
1691 | /* helper macro to fixup start/end slice values */\r | |
1692 | #define ADJUST_INDICES(start, end, len) \\r | |
1693 | if (end > len) \\r | |
1694 | end = len; \\r | |
1695 | else if (end < 0) { \\r | |
1696 | end += len; \\r | |
1697 | if (end < 0) \\r | |
1698 | end = 0; \\r | |
1699 | } \\r | |
1700 | if (start < 0) { \\r | |
1701 | start += len; \\r | |
1702 | if (start < 0) \\r | |
1703 | start = 0; \\r | |
1704 | }\r | |
1705 | \r | |
1706 | Py_LOCAL_INLINE(Py_ssize_t)\r | |
1707 | string_find_internal(PyStringObject *self, PyObject *args, int dir)\r | |
1708 | {\r | |
1709 | PyObject *subobj;\r | |
1710 | const char *sub;\r | |
1711 | Py_ssize_t sub_len;\r | |
1712 | Py_ssize_t start=0, end=PY_SSIZE_T_MAX;\r | |
1713 | \r | |
1714 | if (!stringlib_parse_args_finds("find/rfind/index/rindex",\r | |
1715 | args, &subobj, &start, &end))\r | |
1716 | return -2;\r | |
1717 | \r | |
1718 | if (PyString_Check(subobj)) {\r | |
1719 | sub = PyString_AS_STRING(subobj);\r | |
1720 | sub_len = PyString_GET_SIZE(subobj);\r | |
1721 | }\r | |
1722 | #ifdef Py_USING_UNICODE\r | |
1723 | else if (PyUnicode_Check(subobj))\r | |
1724 | return PyUnicode_Find(\r | |
1725 | (PyObject *)self, subobj, start, end, dir);\r | |
1726 | #endif\r | |
1727 | else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))\r | |
1728 | /* XXX - the "expected a character buffer object" is pretty\r | |
1729 | confusing for a non-expert. remap to something else ? */\r | |
1730 | return -2;\r | |
1731 | \r | |
1732 | if (dir > 0)\r | |
1733 | return stringlib_find_slice(\r | |
1734 | PyString_AS_STRING(self), PyString_GET_SIZE(self),\r | |
1735 | sub, sub_len, start, end);\r | |
1736 | else\r | |
1737 | return stringlib_rfind_slice(\r | |
1738 | PyString_AS_STRING(self), PyString_GET_SIZE(self),\r | |
1739 | sub, sub_len, start, end);\r | |
1740 | }\r | |
1741 | \r | |
1742 | \r | |
1743 | PyDoc_STRVAR(find__doc__,\r | |
1744 | "S.find(sub [,start [,end]]) -> int\n\\r | |
1745 | \n\\r | |
1746 | Return the lowest index in S where substring sub is found,\n\\r | |
1747 | such that sub is contained within S[start:end]. Optional\n\\r | |
1748 | arguments start and end are interpreted as in slice notation.\n\\r | |
1749 | \n\\r | |
1750 | Return -1 on failure.");\r | |
1751 | \r | |
1752 | static PyObject *\r | |
1753 | string_find(PyStringObject *self, PyObject *args)\r | |
1754 | {\r | |
1755 | Py_ssize_t result = string_find_internal(self, args, +1);\r | |
1756 | if (result == -2)\r | |
1757 | return NULL;\r | |
1758 | return PyInt_FromSsize_t(result);\r | |
1759 | }\r | |
1760 | \r | |
1761 | \r | |
1762 | PyDoc_STRVAR(index__doc__,\r | |
1763 | "S.index(sub [,start [,end]]) -> int\n\\r | |
1764 | \n\\r | |
1765 | Like S.find() but raise ValueError when the substring is not found.");\r | |
1766 | \r | |
1767 | static PyObject *\r | |
1768 | string_index(PyStringObject *self, PyObject *args)\r | |
1769 | {\r | |
1770 | Py_ssize_t result = string_find_internal(self, args, +1);\r | |
1771 | if (result == -2)\r | |
1772 | return NULL;\r | |
1773 | if (result == -1) {\r | |
1774 | PyErr_SetString(PyExc_ValueError,\r | |
1775 | "substring not found");\r | |
1776 | return NULL;\r | |
1777 | }\r | |
1778 | return PyInt_FromSsize_t(result);\r | |
1779 | }\r | |
1780 | \r | |
1781 | \r | |
1782 | PyDoc_STRVAR(rfind__doc__,\r | |
1783 | "S.rfind(sub [,start [,end]]) -> int\n\\r | |
1784 | \n\\r | |
1785 | Return the highest index in S where substring sub is found,\n\\r | |
1786 | such that sub is contained within S[start:end]. Optional\n\\r | |
1787 | arguments start and end are interpreted as in slice notation.\n\\r | |
1788 | \n\\r | |
1789 | Return -1 on failure.");\r | |
1790 | \r | |
1791 | static PyObject *\r | |
1792 | string_rfind(PyStringObject *self, PyObject *args)\r | |
1793 | {\r | |
1794 | Py_ssize_t result = string_find_internal(self, args, -1);\r | |
1795 | if (result == -2)\r | |
1796 | return NULL;\r | |
1797 | return PyInt_FromSsize_t(result);\r | |
1798 | }\r | |
1799 | \r | |
1800 | \r | |
1801 | PyDoc_STRVAR(rindex__doc__,\r | |
1802 | "S.rindex(sub [,start [,end]]) -> int\n\\r | |
1803 | \n\\r | |
1804 | Like S.rfind() but raise ValueError when the substring is not found.");\r | |
1805 | \r | |
1806 | static PyObject *\r | |
1807 | string_rindex(PyStringObject *self, PyObject *args)\r | |
1808 | {\r | |
1809 | Py_ssize_t result = string_find_internal(self, args, -1);\r | |
1810 | if (result == -2)\r | |
1811 | return NULL;\r | |
1812 | if (result == -1) {\r | |
1813 | PyErr_SetString(PyExc_ValueError,\r | |
1814 | "substring not found");\r | |
1815 | return NULL;\r | |
1816 | }\r | |
1817 | return PyInt_FromSsize_t(result);\r | |
1818 | }\r | |
1819 | \r | |
1820 | \r | |
1821 | Py_LOCAL_INLINE(PyObject *)\r | |
1822 | do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)\r | |
1823 | {\r | |
1824 | char *s = PyString_AS_STRING(self);\r | |
1825 | Py_ssize_t len = PyString_GET_SIZE(self);\r | |
1826 | char *sep = PyString_AS_STRING(sepobj);\r | |
1827 | Py_ssize_t seplen = PyString_GET_SIZE(sepobj);\r | |
1828 | Py_ssize_t i, j;\r | |
1829 | \r | |
1830 | i = 0;\r | |
1831 | if (striptype != RIGHTSTRIP) {\r | |
1832 | while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {\r | |
1833 | i++;\r | |
1834 | }\r | |
1835 | }\r | |
1836 | \r | |
1837 | j = len;\r | |
1838 | if (striptype != LEFTSTRIP) {\r | |
1839 | do {\r | |
1840 | j--;\r | |
1841 | } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));\r | |
1842 | j++;\r | |
1843 | }\r | |
1844 | \r | |
1845 | if (i == 0 && j == len && PyString_CheckExact(self)) {\r | |
1846 | Py_INCREF(self);\r | |
1847 | return (PyObject*)self;\r | |
1848 | }\r | |
1849 | else\r | |
1850 | return PyString_FromStringAndSize(s+i, j-i);\r | |
1851 | }\r | |
1852 | \r | |
1853 | \r | |
1854 | Py_LOCAL_INLINE(PyObject *)\r | |
1855 | do_strip(PyStringObject *self, int striptype)\r | |
1856 | {\r | |
1857 | char *s = PyString_AS_STRING(self);\r | |
1858 | Py_ssize_t len = PyString_GET_SIZE(self), i, j;\r | |
1859 | \r | |
1860 | i = 0;\r | |
1861 | if (striptype != RIGHTSTRIP) {\r | |
1862 | while (i < len && isspace(Py_CHARMASK(s[i]))) {\r | |
1863 | i++;\r | |
1864 | }\r | |
1865 | }\r | |
1866 | \r | |
1867 | j = len;\r | |
1868 | if (striptype != LEFTSTRIP) {\r | |
1869 | do {\r | |
1870 | j--;\r | |
1871 | } while (j >= i && isspace(Py_CHARMASK(s[j])));\r | |
1872 | j++;\r | |
1873 | }\r | |
1874 | \r | |
1875 | if (i == 0 && j == len && PyString_CheckExact(self)) {\r | |
1876 | Py_INCREF(self);\r | |
1877 | return (PyObject*)self;\r | |
1878 | }\r | |
1879 | else\r | |
1880 | return PyString_FromStringAndSize(s+i, j-i);\r | |
1881 | }\r | |
1882 | \r | |
1883 | \r | |
1884 | Py_LOCAL_INLINE(PyObject *)\r | |
1885 | do_argstrip(PyStringObject *self, int striptype, PyObject *args)\r | |
1886 | {\r | |
1887 | PyObject *sep = NULL;\r | |
1888 | \r | |
1889 | if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))\r | |
1890 | return NULL;\r | |
1891 | \r | |
1892 | if (sep != NULL && sep != Py_None) {\r | |
1893 | if (PyString_Check(sep))\r | |
1894 | return do_xstrip(self, striptype, sep);\r | |
1895 | #ifdef Py_USING_UNICODE\r | |
1896 | else if (PyUnicode_Check(sep)) {\r | |
1897 | PyObject *uniself = PyUnicode_FromObject((PyObject *)self);\r | |
1898 | PyObject *res;\r | |
1899 | if (uniself==NULL)\r | |
1900 | return NULL;\r | |
1901 | res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,\r | |
1902 | striptype, sep);\r | |
1903 | Py_DECREF(uniself);\r | |
1904 | return res;\r | |
1905 | }\r | |
1906 | #endif\r | |
1907 | PyErr_Format(PyExc_TypeError,\r | |
1908 | #ifdef Py_USING_UNICODE\r | |
1909 | "%s arg must be None, str or unicode",\r | |
1910 | #else\r | |
1911 | "%s arg must be None or str",\r | |
1912 | #endif\r | |
1913 | STRIPNAME(striptype));\r | |
1914 | return NULL;\r | |
1915 | }\r | |
1916 | \r | |
1917 | return do_strip(self, striptype);\r | |
1918 | }\r | |
1919 | \r | |
1920 | \r | |
1921 | PyDoc_STRVAR(strip__doc__,\r | |
1922 | "S.strip([chars]) -> string or unicode\n\\r | |
1923 | \n\\r | |
1924 | Return a copy of the string S with leading and trailing\n\\r | |
1925 | whitespace removed.\n\\r | |
1926 | If chars is given and not None, remove characters in chars instead.\n\\r | |
1927 | If chars is unicode, S will be converted to unicode before stripping");\r | |
1928 | \r | |
1929 | static PyObject *\r | |
1930 | string_strip(PyStringObject *self, PyObject *args)\r | |
1931 | {\r | |
1932 | if (PyTuple_GET_SIZE(args) == 0)\r | |
1933 | return do_strip(self, BOTHSTRIP); /* Common case */\r | |
1934 | else\r | |
1935 | return do_argstrip(self, BOTHSTRIP, args);\r | |
1936 | }\r | |
1937 | \r | |
1938 | \r | |
1939 | PyDoc_STRVAR(lstrip__doc__,\r | |
1940 | "S.lstrip([chars]) -> string or unicode\n\\r | |
1941 | \n\\r | |
1942 | Return a copy of the string S with leading whitespace removed.\n\\r | |
1943 | If chars is given and not None, remove characters in chars instead.\n\\r | |
1944 | If chars is unicode, S will be converted to unicode before stripping");\r | |
1945 | \r | |
1946 | static PyObject *\r | |
1947 | string_lstrip(PyStringObject *self, PyObject *args)\r | |
1948 | {\r | |
1949 | if (PyTuple_GET_SIZE(args) == 0)\r | |
1950 | return do_strip(self, LEFTSTRIP); /* Common case */\r | |
1951 | else\r | |
1952 | return do_argstrip(self, LEFTSTRIP, args);\r | |
1953 | }\r | |
1954 | \r | |
1955 | \r | |
1956 | PyDoc_STRVAR(rstrip__doc__,\r | |
1957 | "S.rstrip([chars]) -> string or unicode\n\\r | |
1958 | \n\\r | |
1959 | Return a copy of the string S with trailing whitespace removed.\n\\r | |
1960 | If chars is given and not None, remove characters in chars instead.\n\\r | |
1961 | If chars is unicode, S will be converted to unicode before stripping");\r | |
1962 | \r | |
1963 | static PyObject *\r | |
1964 | string_rstrip(PyStringObject *self, PyObject *args)\r | |
1965 | {\r | |
1966 | if (PyTuple_GET_SIZE(args) == 0)\r | |
1967 | return do_strip(self, RIGHTSTRIP); /* Common case */\r | |
1968 | else\r | |
1969 | return do_argstrip(self, RIGHTSTRIP, args);\r | |
1970 | }\r | |
1971 | \r | |
1972 | \r | |
1973 | PyDoc_STRVAR(lower__doc__,\r | |
1974 | "S.lower() -> string\n\\r | |
1975 | \n\\r | |
1976 | Return a copy of the string S converted to lowercase.");\r | |
1977 | \r | |
1978 | /* _tolower and _toupper are defined by SUSv2, but they're not ISO C */\r | |
1979 | #ifndef _tolower\r | |
1980 | #define _tolower tolower\r | |
1981 | #endif\r | |
1982 | \r | |
1983 | static PyObject *\r | |
1984 | string_lower(PyStringObject *self)\r | |
1985 | {\r | |
1986 | char *s;\r | |
1987 | Py_ssize_t i, n = PyString_GET_SIZE(self);\r | |
1988 | PyObject *newobj;\r | |
1989 | \r | |
1990 | newobj = PyString_FromStringAndSize(NULL, n);\r | |
1991 | if (!newobj)\r | |
1992 | return NULL;\r | |
1993 | \r | |
1994 | s = PyString_AS_STRING(newobj);\r | |
1995 | \r | |
1996 | Py_MEMCPY(s, PyString_AS_STRING(self), n);\r | |
1997 | \r | |
1998 | for (i = 0; i < n; i++) {\r | |
1999 | int c = Py_CHARMASK(s[i]);\r | |
2000 | if (isupper(c))\r | |
2001 | s[i] = _tolower(c);\r | |
2002 | }\r | |
2003 | \r | |
2004 | return newobj;\r | |
2005 | }\r | |
2006 | \r | |
2007 | PyDoc_STRVAR(upper__doc__,\r | |
2008 | "S.upper() -> string\n\\r | |
2009 | \n\\r | |
2010 | Return a copy of the string S converted to uppercase.");\r | |
2011 | \r | |
2012 | #ifndef _toupper\r | |
2013 | #define _toupper toupper\r | |
2014 | #endif\r | |
2015 | \r | |
2016 | static PyObject *\r | |
2017 | string_upper(PyStringObject *self)\r | |
2018 | {\r | |
2019 | char *s;\r | |
2020 | Py_ssize_t i, n = PyString_GET_SIZE(self);\r | |
2021 | PyObject *newobj;\r | |
2022 | \r | |
2023 | newobj = PyString_FromStringAndSize(NULL, n);\r | |
2024 | if (!newobj)\r | |
2025 | return NULL;\r | |
2026 | \r | |
2027 | s = PyString_AS_STRING(newobj);\r | |
2028 | \r | |
2029 | Py_MEMCPY(s, PyString_AS_STRING(self), n);\r | |
2030 | \r | |
2031 | for (i = 0; i < n; i++) {\r | |
2032 | int c = Py_CHARMASK(s[i]);\r | |
2033 | if (islower(c))\r | |
2034 | s[i] = _toupper(c);\r | |
2035 | }\r | |
2036 | \r | |
2037 | return newobj;\r | |
2038 | }\r | |
2039 | \r | |
2040 | PyDoc_STRVAR(title__doc__,\r | |
2041 | "S.title() -> string\n\\r | |
2042 | \n\\r | |
2043 | Return a titlecased version of S, i.e. words start with uppercase\n\\r | |
2044 | characters, all remaining cased characters have lowercase.");\r | |
2045 | \r | |
2046 | static PyObject*\r | |
2047 | string_title(PyStringObject *self)\r | |
2048 | {\r | |
2049 | char *s = PyString_AS_STRING(self), *s_new;\r | |
2050 | Py_ssize_t i, n = PyString_GET_SIZE(self);\r | |
2051 | int previous_is_cased = 0;\r | |
2052 | PyObject *newobj;\r | |
2053 | \r | |
2054 | newobj = PyString_FromStringAndSize(NULL, n);\r | |
2055 | if (newobj == NULL)\r | |
2056 | return NULL;\r | |
2057 | s_new = PyString_AsString(newobj);\r | |
2058 | for (i = 0; i < n; i++) {\r | |
2059 | int c = Py_CHARMASK(*s++);\r | |
2060 | if (islower(c)) {\r | |
2061 | if (!previous_is_cased)\r | |
2062 | c = toupper(c);\r | |
2063 | previous_is_cased = 1;\r | |
2064 | } else if (isupper(c)) {\r | |
2065 | if (previous_is_cased)\r | |
2066 | c = tolower(c);\r | |
2067 | previous_is_cased = 1;\r | |
2068 | } else\r | |
2069 | previous_is_cased = 0;\r | |
2070 | *s_new++ = c;\r | |
2071 | }\r | |
2072 | return newobj;\r | |
2073 | }\r | |
2074 | \r | |
2075 | PyDoc_STRVAR(capitalize__doc__,\r | |
2076 | "S.capitalize() -> string\n\\r | |
2077 | \n\\r | |
2078 | Return a copy of the string S with only its first character\n\\r | |
2079 | capitalized.");\r | |
2080 | \r | |
2081 | static PyObject *\r | |
2082 | string_capitalize(PyStringObject *self)\r | |
2083 | {\r | |
2084 | char *s = PyString_AS_STRING(self), *s_new;\r | |
2085 | Py_ssize_t i, n = PyString_GET_SIZE(self);\r | |
2086 | PyObject *newobj;\r | |
2087 | \r | |
2088 | newobj = PyString_FromStringAndSize(NULL, n);\r | |
2089 | if (newobj == NULL)\r | |
2090 | return NULL;\r | |
2091 | s_new = PyString_AsString(newobj);\r | |
2092 | if (0 < n) {\r | |
2093 | int c = Py_CHARMASK(*s++);\r | |
2094 | if (islower(c))\r | |
2095 | *s_new = toupper(c);\r | |
2096 | else\r | |
2097 | *s_new = c;\r | |
2098 | s_new++;\r | |
2099 | }\r | |
2100 | for (i = 1; i < n; i++) {\r | |
2101 | int c = Py_CHARMASK(*s++);\r | |
2102 | if (isupper(c))\r | |
2103 | *s_new = tolower(c);\r | |
2104 | else\r | |
2105 | *s_new = c;\r | |
2106 | s_new++;\r | |
2107 | }\r | |
2108 | return newobj;\r | |
2109 | }\r | |
2110 | \r | |
2111 | \r | |
2112 | PyDoc_STRVAR(count__doc__,\r | |
2113 | "S.count(sub[, start[, end]]) -> int\n\\r | |
2114 | \n\\r | |
2115 | Return the number of non-overlapping occurrences of substring sub in\n\\r | |
2116 | string S[start:end]. Optional arguments start and end are interpreted\n\\r | |
2117 | as in slice notation.");\r | |
2118 | \r | |
2119 | static PyObject *\r | |
2120 | string_count(PyStringObject *self, PyObject *args)\r | |
2121 | {\r | |
2122 | PyObject *sub_obj;\r | |
2123 | const char *str = PyString_AS_STRING(self), *sub;\r | |
2124 | Py_ssize_t sub_len;\r | |
2125 | Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;\r | |
2126 | \r | |
2127 | if (!stringlib_parse_args_finds("count", args, &sub_obj, &start, &end))\r | |
2128 | return NULL;\r | |
2129 | \r | |
2130 | if (PyString_Check(sub_obj)) {\r | |
2131 | sub = PyString_AS_STRING(sub_obj);\r | |
2132 | sub_len = PyString_GET_SIZE(sub_obj);\r | |
2133 | }\r | |
2134 | #ifdef Py_USING_UNICODE\r | |
2135 | else if (PyUnicode_Check(sub_obj)) {\r | |
2136 | Py_ssize_t count;\r | |
2137 | count = PyUnicode_Count((PyObject *)self, sub_obj, start, end);\r | |
2138 | if (count == -1)\r | |
2139 | return NULL;\r | |
2140 | else\r | |
2141 | return PyInt_FromSsize_t(count);\r | |
2142 | }\r | |
2143 | #endif\r | |
2144 | else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))\r | |
2145 | return NULL;\r | |
2146 | \r | |
2147 | ADJUST_INDICES(start, end, PyString_GET_SIZE(self));\r | |
2148 | \r | |
2149 | return PyInt_FromSsize_t(\r | |
2150 | stringlib_count(str + start, end - start, sub, sub_len, PY_SSIZE_T_MAX)\r | |
2151 | );\r | |
2152 | }\r | |
2153 | \r | |
2154 | PyDoc_STRVAR(swapcase__doc__,\r | |
2155 | "S.swapcase() -> string\n\\r | |
2156 | \n\\r | |
2157 | Return a copy of the string S with uppercase characters\n\\r | |
2158 | converted to lowercase and vice versa.");\r | |
2159 | \r | |
2160 | static PyObject *\r | |
2161 | string_swapcase(PyStringObject *self)\r | |
2162 | {\r | |
2163 | char *s = PyString_AS_STRING(self), *s_new;\r | |
2164 | Py_ssize_t i, n = PyString_GET_SIZE(self);\r | |
2165 | PyObject *newobj;\r | |
2166 | \r | |
2167 | newobj = PyString_FromStringAndSize(NULL, n);\r | |
2168 | if (newobj == NULL)\r | |
2169 | return NULL;\r | |
2170 | s_new = PyString_AsString(newobj);\r | |
2171 | for (i = 0; i < n; i++) {\r | |
2172 | int c = Py_CHARMASK(*s++);\r | |
2173 | if (islower(c)) {\r | |
2174 | *s_new = toupper(c);\r | |
2175 | }\r | |
2176 | else if (isupper(c)) {\r | |
2177 | *s_new = tolower(c);\r | |
2178 | }\r | |
2179 | else\r | |
2180 | *s_new = c;\r | |
2181 | s_new++;\r | |
2182 | }\r | |
2183 | return newobj;\r | |
2184 | }\r | |
2185 | \r | |
2186 | \r | |
2187 | PyDoc_STRVAR(translate__doc__,\r | |
2188 | "S.translate(table [,deletechars]) -> string\n\\r | |
2189 | \n\\r | |
2190 | Return a copy of the string S, where all characters occurring\n\\r | |
2191 | in the optional argument deletechars are removed, and the\n\\r | |
2192 | remaining characters have been mapped through the given\n\\r | |
2193 | translation table, which must be a string of length 256 or None.\n\\r | |
2194 | If the table argument is None, no translation is applied and\n\\r | |
2195 | the operation simply removes the characters in deletechars.");\r | |
2196 | \r | |
2197 | static PyObject *\r | |
2198 | string_translate(PyStringObject *self, PyObject *args)\r | |
2199 | {\r | |
2200 | register char *input, *output;\r | |
2201 | const char *table;\r | |
2202 | register Py_ssize_t i, c, changed = 0;\r | |
2203 | PyObject *input_obj = (PyObject*)self;\r | |
2204 | const char *output_start, *del_table=NULL;\r | |
2205 | Py_ssize_t inlen, tablen, dellen = 0;\r | |
2206 | PyObject *result;\r | |
2207 | int trans_table[256];\r | |
2208 | PyObject *tableobj, *delobj = NULL;\r | |
2209 | \r | |
2210 | if (!PyArg_UnpackTuple(args, "translate", 1, 2,\r | |
2211 | &tableobj, &delobj))\r | |
2212 | return NULL;\r | |
2213 | \r | |
2214 | if (PyString_Check(tableobj)) {\r | |
2215 | table = PyString_AS_STRING(tableobj);\r | |
2216 | tablen = PyString_GET_SIZE(tableobj);\r | |
2217 | }\r | |
2218 | else if (tableobj == Py_None) {\r | |
2219 | table = NULL;\r | |
2220 | tablen = 256;\r | |
2221 | }\r | |
2222 | #ifdef Py_USING_UNICODE\r | |
2223 | else if (PyUnicode_Check(tableobj)) {\r | |
2224 | /* Unicode .translate() does not support the deletechars\r | |
2225 | parameter; instead a mapping to None will cause characters\r | |
2226 | to be deleted. */\r | |
2227 | if (delobj != NULL) {\r | |
2228 | PyErr_SetString(PyExc_TypeError,\r | |
2229 | "deletions are implemented differently for unicode");\r | |
2230 | return NULL;\r | |
2231 | }\r | |
2232 | return PyUnicode_Translate((PyObject *)self, tableobj, NULL);\r | |
2233 | }\r | |
2234 | #endif\r | |
2235 | else if (PyObject_AsCharBuffer(tableobj, &table, &tablen))\r | |
2236 | return NULL;\r | |
2237 | \r | |
2238 | if (tablen != 256) {\r | |
2239 | PyErr_SetString(PyExc_ValueError,\r | |
2240 | "translation table must be 256 characters long");\r | |
2241 | return NULL;\r | |
2242 | }\r | |
2243 | \r | |
2244 | if (delobj != NULL) {\r | |
2245 | if (PyString_Check(delobj)) {\r | |
2246 | del_table = PyString_AS_STRING(delobj);\r | |
2247 | dellen = PyString_GET_SIZE(delobj);\r | |
2248 | }\r | |
2249 | #ifdef Py_USING_UNICODE\r | |
2250 | else if (PyUnicode_Check(delobj)) {\r | |
2251 | PyErr_SetString(PyExc_TypeError,\r | |
2252 | "deletions are implemented differently for unicode");\r | |
2253 | return NULL;\r | |
2254 | }\r | |
2255 | #endif\r | |
2256 | else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))\r | |
2257 | return NULL;\r | |
2258 | }\r | |
2259 | else {\r | |
2260 | del_table = NULL;\r | |
2261 | dellen = 0;\r | |
2262 | }\r | |
2263 | \r | |
2264 | inlen = PyString_GET_SIZE(input_obj);\r | |
2265 | result = PyString_FromStringAndSize((char *)NULL, inlen);\r | |
2266 | if (result == NULL)\r | |
2267 | return NULL;\r | |
2268 | output_start = output = PyString_AsString(result);\r | |
2269 | input = PyString_AS_STRING(input_obj);\r | |
2270 | \r | |
2271 | if (dellen == 0 && table != NULL) {\r | |
2272 | /* If no deletions are required, use faster code */\r | |
2273 | for (i = inlen; --i >= 0; ) {\r | |
2274 | c = Py_CHARMASK(*input++);\r | |
2275 | if (Py_CHARMASK((*output++ = table[c])) != c)\r | |
2276 | changed = 1;\r | |
2277 | }\r | |
2278 | if (changed || !PyString_CheckExact(input_obj))\r | |
2279 | return result;\r | |
2280 | Py_DECREF(result);\r | |
2281 | Py_INCREF(input_obj);\r | |
2282 | return input_obj;\r | |
2283 | }\r | |
2284 | \r | |
2285 | if (table == NULL) {\r | |
2286 | for (i = 0; i < 256; i++)\r | |
2287 | trans_table[i] = Py_CHARMASK(i);\r | |
2288 | } else {\r | |
2289 | for (i = 0; i < 256; i++)\r | |
2290 | trans_table[i] = Py_CHARMASK(table[i]);\r | |
2291 | }\r | |
2292 | \r | |
2293 | for (i = 0; i < dellen; i++)\r | |
2294 | trans_table[(int) Py_CHARMASK(del_table[i])] = -1;\r | |
2295 | \r | |
2296 | for (i = inlen; --i >= 0; ) {\r | |
2297 | c = Py_CHARMASK(*input++);\r | |
2298 | if (trans_table[c] != -1)\r | |
2299 | if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)\r | |
2300 | continue;\r | |
2301 | changed = 1;\r | |
2302 | }\r | |
2303 | if (!changed && PyString_CheckExact(input_obj)) {\r | |
2304 | Py_DECREF(result);\r | |
2305 | Py_INCREF(input_obj);\r | |
2306 | return input_obj;\r | |
2307 | }\r | |
2308 | /* Fix the size of the resulting string */\r | |
2309 | if (inlen > 0 && _PyString_Resize(&result, output - output_start))\r | |
2310 | return NULL;\r | |
2311 | return result;\r | |
2312 | }\r | |
2313 | \r | |
2314 | \r | |
2315 | /* find and count characters and substrings */\r | |
2316 | \r | |
2317 | #define findchar(target, target_len, c) \\r | |
2318 | ((char *)memchr((const void *)(target), c, target_len))\r | |
2319 | \r | |
2320 | /* String ops must return a string. */\r | |
2321 | /* If the object is subclass of string, create a copy */\r | |
2322 | Py_LOCAL(PyStringObject *)\r | |
2323 | return_self(PyStringObject *self)\r | |
2324 | {\r | |
2325 | if (PyString_CheckExact(self)) {\r | |
2326 | Py_INCREF(self);\r | |
2327 | return self;\r | |
2328 | }\r | |
2329 | return (PyStringObject *)PyString_FromStringAndSize(\r | |
2330 | PyString_AS_STRING(self),\r | |
2331 | PyString_GET_SIZE(self));\r | |
2332 | }\r | |
2333 | \r | |
2334 | Py_LOCAL_INLINE(Py_ssize_t)\r | |
2335 | countchar(const char *target, Py_ssize_t target_len, char c, Py_ssize_t maxcount)\r | |
2336 | {\r | |
2337 | Py_ssize_t count=0;\r | |
2338 | const char *start=target;\r | |
2339 | const char *end=target+target_len;\r | |
2340 | \r | |
2341 | while ( (start=findchar(start, end-start, c)) != NULL ) {\r | |
2342 | count++;\r | |
2343 | if (count >= maxcount)\r | |
2344 | break;\r | |
2345 | start += 1;\r | |
2346 | }\r | |
2347 | return count;\r | |
2348 | }\r | |
2349 | \r | |
2350 | \r | |
2351 | /* Algorithms for different cases of string replacement */\r | |
2352 | \r | |
2353 | /* len(self)>=1, from="", len(to)>=1, maxcount>=1 */\r | |
2354 | Py_LOCAL(PyStringObject *)\r | |
2355 | replace_interleave(PyStringObject *self,\r | |
2356 | const char *to_s, Py_ssize_t to_len,\r | |
2357 | Py_ssize_t maxcount)\r | |
2358 | {\r | |
2359 | char *self_s, *result_s;\r | |
2360 | Py_ssize_t self_len, result_len;\r | |
2361 | Py_ssize_t count, i, product;\r | |
2362 | PyStringObject *result;\r | |
2363 | \r | |
2364 | self_len = PyString_GET_SIZE(self);\r | |
2365 | \r | |
2366 | /* 1 at the end plus 1 after every character */\r | |
2367 | count = self_len+1;\r | |
2368 | if (maxcount < count)\r | |
2369 | count = maxcount;\r | |
2370 | \r | |
2371 | /* Check for overflow */\r | |
2372 | /* result_len = count * to_len + self_len; */\r | |
2373 | product = count * to_len;\r | |
2374 | if (product / to_len != count) {\r | |
2375 | PyErr_SetString(PyExc_OverflowError,\r | |
2376 | "replace string is too long");\r | |
2377 | return NULL;\r | |
2378 | }\r | |
2379 | result_len = product + self_len;\r | |
2380 | if (result_len < 0) {\r | |
2381 | PyErr_SetString(PyExc_OverflowError,\r | |
2382 | "replace string is too long");\r | |
2383 | return NULL;\r | |
2384 | }\r | |
2385 | \r | |
2386 | if (! (result = (PyStringObject *)\r | |
2387 | PyString_FromStringAndSize(NULL, result_len)) )\r | |
2388 | return NULL;\r | |
2389 | \r | |
2390 | self_s = PyString_AS_STRING(self);\r | |
2391 | result_s = PyString_AS_STRING(result);\r | |
2392 | \r | |
2393 | /* TODO: special case single character, which doesn't need memcpy */\r | |
2394 | \r | |
2395 | /* Lay the first one down (guaranteed this will occur) */\r | |
2396 | Py_MEMCPY(result_s, to_s, to_len);\r | |
2397 | result_s += to_len;\r | |
2398 | count -= 1;\r | |
2399 | \r | |
2400 | for (i=0; i<count; i++) {\r | |
2401 | *result_s++ = *self_s++;\r | |
2402 | Py_MEMCPY(result_s, to_s, to_len);\r | |
2403 | result_s += to_len;\r | |
2404 | }\r | |
2405 | \r | |
2406 | /* Copy the rest of the original string */\r | |
2407 | Py_MEMCPY(result_s, self_s, self_len-i);\r | |
2408 | \r | |
2409 | return result;\r | |
2410 | }\r | |
2411 | \r | |
2412 | /* Special case for deleting a single character */\r | |
2413 | /* len(self)>=1, len(from)==1, to="", maxcount>=1 */\r | |
2414 | Py_LOCAL(PyStringObject *)\r | |
2415 | replace_delete_single_character(PyStringObject *self,\r | |
2416 | char from_c, Py_ssize_t maxcount)\r | |
2417 | {\r | |
2418 | char *self_s, *result_s;\r | |
2419 | char *start, *next, *end;\r | |
2420 | Py_ssize_t self_len, result_len;\r | |
2421 | Py_ssize_t count;\r | |
2422 | PyStringObject *result;\r | |
2423 | \r | |
2424 | self_len = PyString_GET_SIZE(self);\r | |
2425 | self_s = PyString_AS_STRING(self);\r | |
2426 | \r | |
2427 | count = countchar(self_s, self_len, from_c, maxcount);\r | |
2428 | if (count == 0) {\r | |
2429 | return return_self(self);\r | |
2430 | }\r | |
2431 | \r | |
2432 | result_len = self_len - count; /* from_len == 1 */\r | |
2433 | assert(result_len>=0);\r | |
2434 | \r | |
2435 | if ( (result = (PyStringObject *)\r | |
2436 | PyString_FromStringAndSize(NULL, result_len)) == NULL)\r | |
2437 | return NULL;\r | |
2438 | result_s = PyString_AS_STRING(result);\r | |
2439 | \r | |
2440 | start = self_s;\r | |
2441 | end = self_s + self_len;\r | |
2442 | while (count-- > 0) {\r | |
2443 | next = findchar(start, end-start, from_c);\r | |
2444 | if (next == NULL)\r | |
2445 | break;\r | |
2446 | Py_MEMCPY(result_s, start, next-start);\r | |
2447 | result_s += (next-start);\r | |
2448 | start = next+1;\r | |
2449 | }\r | |
2450 | Py_MEMCPY(result_s, start, end-start);\r | |
2451 | \r | |
2452 | return result;\r | |
2453 | }\r | |
2454 | \r | |
2455 | /* len(self)>=1, len(from)>=2, to="", maxcount>=1 */\r | |
2456 | \r | |
2457 | Py_LOCAL(PyStringObject *)\r | |
2458 | replace_delete_substring(PyStringObject *self,\r | |
2459 | const char *from_s, Py_ssize_t from_len,\r | |
2460 | Py_ssize_t maxcount) {\r | |
2461 | char *self_s, *result_s;\r | |
2462 | char *start, *next, *end;\r | |
2463 | Py_ssize_t self_len, result_len;\r | |
2464 | Py_ssize_t count, offset;\r | |
2465 | PyStringObject *result;\r | |
2466 | \r | |
2467 | self_len = PyString_GET_SIZE(self);\r | |
2468 | self_s = PyString_AS_STRING(self);\r | |
2469 | \r | |
2470 | count = stringlib_count(self_s, self_len,\r | |
2471 | from_s, from_len,\r | |
2472 | maxcount);\r | |
2473 | \r | |
2474 | if (count == 0) {\r | |
2475 | /* no matches */\r | |
2476 | return return_self(self);\r | |
2477 | }\r | |
2478 | \r | |
2479 | result_len = self_len - (count * from_len);\r | |
2480 | assert (result_len>=0);\r | |
2481 | \r | |
2482 | if ( (result = (PyStringObject *)\r | |
2483 | PyString_FromStringAndSize(NULL, result_len)) == NULL )\r | |
2484 | return NULL;\r | |
2485 | \r | |
2486 | result_s = PyString_AS_STRING(result);\r | |
2487 | \r | |
2488 | start = self_s;\r | |
2489 | end = self_s + self_len;\r | |
2490 | while (count-- > 0) {\r | |
2491 | offset = stringlib_find(start, end-start,\r | |
2492 | from_s, from_len,\r | |
2493 | 0);\r | |
2494 | if (offset == -1)\r | |
2495 | break;\r | |
2496 | next = start + offset;\r | |
2497 | \r | |
2498 | Py_MEMCPY(result_s, start, next-start);\r | |
2499 | \r | |
2500 | result_s += (next-start);\r | |
2501 | start = next+from_len;\r | |
2502 | }\r | |
2503 | Py_MEMCPY(result_s, start, end-start);\r | |
2504 | return result;\r | |
2505 | }\r | |
2506 | \r | |
2507 | /* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */\r | |
2508 | Py_LOCAL(PyStringObject *)\r | |
2509 | replace_single_character_in_place(PyStringObject *self,\r | |
2510 | char from_c, char to_c,\r | |
2511 | Py_ssize_t maxcount)\r | |
2512 | {\r | |
2513 | char *self_s, *result_s, *start, *end, *next;\r | |
2514 | Py_ssize_t self_len;\r | |
2515 | PyStringObject *result;\r | |
2516 | \r | |
2517 | /* The result string will be the same size */\r | |
2518 | self_s = PyString_AS_STRING(self);\r | |
2519 | self_len = PyString_GET_SIZE(self);\r | |
2520 | \r | |
2521 | next = findchar(self_s, self_len, from_c);\r | |
2522 | \r | |
2523 | if (next == NULL) {\r | |
2524 | /* No matches; return the original string */\r | |
2525 | return return_self(self);\r | |
2526 | }\r | |
2527 | \r | |
2528 | /* Need to make a new string */\r | |
2529 | result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);\r | |
2530 | if (result == NULL)\r | |
2531 | return NULL;\r | |
2532 | result_s = PyString_AS_STRING(result);\r | |
2533 | Py_MEMCPY(result_s, self_s, self_len);\r | |
2534 | \r | |
2535 | /* change everything in-place, starting with this one */\r | |
2536 | start = result_s + (next-self_s);\r | |
2537 | *start = to_c;\r | |
2538 | start++;\r | |
2539 | end = result_s + self_len;\r | |
2540 | \r | |
2541 | while (--maxcount > 0) {\r | |
2542 | next = findchar(start, end-start, from_c);\r | |
2543 | if (next == NULL)\r | |
2544 | break;\r | |
2545 | *next = to_c;\r | |
2546 | start = next+1;\r | |
2547 | }\r | |
2548 | \r | |
2549 | return result;\r | |
2550 | }\r | |
2551 | \r | |
2552 | /* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */\r | |
2553 | Py_LOCAL(PyStringObject *)\r | |
2554 | replace_substring_in_place(PyStringObject *self,\r | |
2555 | const char *from_s, Py_ssize_t from_len,\r | |
2556 | const char *to_s, Py_ssize_t to_len,\r | |
2557 | Py_ssize_t maxcount)\r | |
2558 | {\r | |
2559 | char *result_s, *start, *end;\r | |
2560 | char *self_s;\r | |
2561 | Py_ssize_t self_len, offset;\r | |
2562 | PyStringObject *result;\r | |
2563 | \r | |
2564 | /* The result string will be the same size */\r | |
2565 | \r | |
2566 | self_s = PyString_AS_STRING(self);\r | |
2567 | self_len = PyString_GET_SIZE(self);\r | |
2568 | \r | |
2569 | offset = stringlib_find(self_s, self_len,\r | |
2570 | from_s, from_len,\r | |
2571 | 0);\r | |
2572 | if (offset == -1) {\r | |
2573 | /* No matches; return the original string */\r | |
2574 | return return_self(self);\r | |
2575 | }\r | |
2576 | \r | |
2577 | /* Need to make a new string */\r | |
2578 | result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);\r | |
2579 | if (result == NULL)\r | |
2580 | return NULL;\r | |
2581 | result_s = PyString_AS_STRING(result);\r | |
2582 | Py_MEMCPY(result_s, self_s, self_len);\r | |
2583 | \r | |
2584 | /* change everything in-place, starting with this one */\r | |
2585 | start = result_s + offset;\r | |
2586 | Py_MEMCPY(start, to_s, from_len);\r | |
2587 | start += from_len;\r | |
2588 | end = result_s + self_len;\r | |
2589 | \r | |
2590 | while ( --maxcount > 0) {\r | |
2591 | offset = stringlib_find(start, end-start,\r | |
2592 | from_s, from_len,\r | |
2593 | 0);\r | |
2594 | if (offset==-1)\r | |
2595 | break;\r | |
2596 | Py_MEMCPY(start+offset, to_s, from_len);\r | |
2597 | start += offset+from_len;\r | |
2598 | }\r | |
2599 | \r | |
2600 | return result;\r | |
2601 | }\r | |
2602 | \r | |
2603 | /* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */\r | |
2604 | Py_LOCAL(PyStringObject *)\r | |
2605 | replace_single_character(PyStringObject *self,\r | |
2606 | char from_c,\r | |
2607 | const char *to_s, Py_ssize_t to_len,\r | |
2608 | Py_ssize_t maxcount)\r | |
2609 | {\r | |
2610 | char *self_s, *result_s;\r | |
2611 | char *start, *next, *end;\r | |
2612 | Py_ssize_t self_len, result_len;\r | |
2613 | Py_ssize_t count, product;\r | |
2614 | PyStringObject *result;\r | |
2615 | \r | |
2616 | self_s = PyString_AS_STRING(self);\r | |
2617 | self_len = PyString_GET_SIZE(self);\r | |
2618 | \r | |
2619 | count = countchar(self_s, self_len, from_c, maxcount);\r | |
2620 | if (count == 0) {\r | |
2621 | /* no matches, return unchanged */\r | |
2622 | return return_self(self);\r | |
2623 | }\r | |
2624 | \r | |
2625 | /* use the difference between current and new, hence the "-1" */\r | |
2626 | /* result_len = self_len + count * (to_len-1) */\r | |
2627 | product = count * (to_len-1);\r | |
2628 | if (product / (to_len-1) != count) {\r | |
2629 | PyErr_SetString(PyExc_OverflowError, "replace string is too long");\r | |
2630 | return NULL;\r | |
2631 | }\r | |
2632 | result_len = self_len + product;\r | |
2633 | if (result_len < 0) {\r | |
2634 | PyErr_SetString(PyExc_OverflowError, "replace string is too long");\r | |
2635 | return NULL;\r | |
2636 | }\r | |
2637 | \r | |
2638 | if ( (result = (PyStringObject *)\r | |
2639 | PyString_FromStringAndSize(NULL, result_len)) == NULL)\r | |
2640 | return NULL;\r | |
2641 | result_s = PyString_AS_STRING(result);\r | |
2642 | \r | |
2643 | start = self_s;\r | |
2644 | end = self_s + self_len;\r | |
2645 | while (count-- > 0) {\r | |
2646 | next = findchar(start, end-start, from_c);\r | |
2647 | if (next == NULL)\r | |
2648 | break;\r | |
2649 | \r | |
2650 | if (next == start) {\r | |
2651 | /* replace with the 'to' */\r | |
2652 | Py_MEMCPY(result_s, to_s, to_len);\r | |
2653 | result_s += to_len;\r | |
2654 | start += 1;\r | |
2655 | } else {\r | |
2656 | /* copy the unchanged old then the 'to' */\r | |
2657 | Py_MEMCPY(result_s, start, next-start);\r | |
2658 | result_s += (next-start);\r | |
2659 | Py_MEMCPY(result_s, to_s, to_len);\r | |
2660 | result_s += to_len;\r | |
2661 | start = next+1;\r | |
2662 | }\r | |
2663 | }\r | |
2664 | /* Copy the remainder of the remaining string */\r | |
2665 | Py_MEMCPY(result_s, start, end-start);\r | |
2666 | \r | |
2667 | return result;\r | |
2668 | }\r | |
2669 | \r | |
2670 | /* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */\r | |
2671 | Py_LOCAL(PyStringObject *)\r | |
2672 | replace_substring(PyStringObject *self,\r | |
2673 | const char *from_s, Py_ssize_t from_len,\r | |
2674 | const char *to_s, Py_ssize_t to_len,\r | |
2675 | Py_ssize_t maxcount) {\r | |
2676 | char *self_s, *result_s;\r | |
2677 | char *start, *next, *end;\r | |
2678 | Py_ssize_t self_len, result_len;\r | |
2679 | Py_ssize_t count, offset, product;\r | |
2680 | PyStringObject *result;\r | |
2681 | \r | |
2682 | self_s = PyString_AS_STRING(self);\r | |
2683 | self_len = PyString_GET_SIZE(self);\r | |
2684 | \r | |
2685 | count = stringlib_count(self_s, self_len,\r | |
2686 | from_s, from_len,\r | |
2687 | maxcount);\r | |
2688 | \r | |
2689 | if (count == 0) {\r | |
2690 | /* no matches, return unchanged */\r | |
2691 | return return_self(self);\r | |
2692 | }\r | |
2693 | \r | |
2694 | /* Check for overflow */\r | |
2695 | /* result_len = self_len + count * (to_len-from_len) */\r | |
2696 | product = count * (to_len-from_len);\r | |
2697 | if (product / (to_len-from_len) != count) {\r | |
2698 | PyErr_SetString(PyExc_OverflowError, "replace string is too long");\r | |
2699 | return NULL;\r | |
2700 | }\r | |
2701 | result_len = self_len + product;\r | |
2702 | if (result_len < 0) {\r | |
2703 | PyErr_SetString(PyExc_OverflowError, "replace string is too long");\r | |
2704 | return NULL;\r | |
2705 | }\r | |
2706 | \r | |
2707 | if ( (result = (PyStringObject *)\r | |
2708 | PyString_FromStringAndSize(NULL, result_len)) == NULL)\r | |
2709 | return NULL;\r | |
2710 | result_s = PyString_AS_STRING(result);\r | |
2711 | \r | |
2712 | start = self_s;\r | |
2713 | end = self_s + self_len;\r | |
2714 | while (count-- > 0) {\r | |
2715 | offset = stringlib_find(start, end-start,\r | |
2716 | from_s, from_len,\r | |
2717 | 0);\r | |
2718 | if (offset == -1)\r | |
2719 | break;\r | |
2720 | next = start+offset;\r | |
2721 | if (next == start) {\r | |
2722 | /* replace with the 'to' */\r | |
2723 | Py_MEMCPY(result_s, to_s, to_len);\r | |
2724 | result_s += to_len;\r | |
2725 | start += from_len;\r | |
2726 | } else {\r | |
2727 | /* copy the unchanged old then the 'to' */\r | |
2728 | Py_MEMCPY(result_s, start, next-start);\r | |
2729 | result_s += (next-start);\r | |
2730 | Py_MEMCPY(result_s, to_s, to_len);\r | |
2731 | result_s += to_len;\r | |
2732 | start = next+from_len;\r | |
2733 | }\r | |
2734 | }\r | |
2735 | /* Copy the remainder of the remaining string */\r | |
2736 | Py_MEMCPY(result_s, start, end-start);\r | |
2737 | \r | |
2738 | return result;\r | |
2739 | }\r | |
2740 | \r | |
2741 | \r | |
2742 | Py_LOCAL(PyStringObject *)\r | |
2743 | replace(PyStringObject *self,\r | |
2744 | const char *from_s, Py_ssize_t from_len,\r | |
2745 | const char *to_s, Py_ssize_t to_len,\r | |
2746 | Py_ssize_t maxcount)\r | |
2747 | {\r | |
2748 | if (maxcount < 0) {\r | |
2749 | maxcount = PY_SSIZE_T_MAX;\r | |
2750 | } else if (maxcount == 0 || PyString_GET_SIZE(self) == 0) {\r | |
2751 | /* nothing to do; return the original string */\r | |
2752 | return return_self(self);\r | |
2753 | }\r | |
2754 | \r | |
2755 | if (maxcount == 0 ||\r | |
2756 | (from_len == 0 && to_len == 0)) {\r | |
2757 | /* nothing to do; return the original string */\r | |
2758 | return return_self(self);\r | |
2759 | }\r | |
2760 | \r | |
2761 | /* Handle zero-length special cases */\r | |
2762 | \r | |
2763 | if (from_len == 0) {\r | |
2764 | /* insert the 'to' string everywhere. */\r | |
2765 | /* >>> "Python".replace("", ".") */\r | |
2766 | /* '.P.y.t.h.o.n.' */\r | |
2767 | return replace_interleave(self, to_s, to_len, maxcount);\r | |
2768 | }\r | |
2769 | \r | |
2770 | /* Except for "".replace("", "A") == "A" there is no way beyond this */\r | |
2771 | /* point for an empty self string to generate a non-empty string */\r | |
2772 | /* Special case so the remaining code always gets a non-empty string */\r | |
2773 | if (PyString_GET_SIZE(self) == 0) {\r | |
2774 | return return_self(self);\r | |
2775 | }\r | |
2776 | \r | |
2777 | if (to_len == 0) {\r | |
2778 | /* delete all occurances of 'from' string */\r | |
2779 | if (from_len == 1) {\r | |
2780 | return replace_delete_single_character(\r | |
2781 | self, from_s[0], maxcount);\r | |
2782 | } else {\r | |
2783 | return replace_delete_substring(self, from_s, from_len, maxcount);\r | |
2784 | }\r | |
2785 | }\r | |
2786 | \r | |
2787 | /* Handle special case where both strings have the same length */\r | |
2788 | \r | |
2789 | if (from_len == to_len) {\r | |
2790 | if (from_len == 1) {\r | |
2791 | return replace_single_character_in_place(\r | |
2792 | self,\r | |
2793 | from_s[0],\r | |
2794 | to_s[0],\r | |
2795 | maxcount);\r | |
2796 | } else {\r | |
2797 | return replace_substring_in_place(\r | |
2798 | self, from_s, from_len, to_s, to_len, maxcount);\r | |
2799 | }\r | |
2800 | }\r | |
2801 | \r | |
2802 | /* Otherwise use the more generic algorithms */\r | |
2803 | if (from_len == 1) {\r | |
2804 | return replace_single_character(self, from_s[0],\r | |
2805 | to_s, to_len, maxcount);\r | |
2806 | } else {\r | |
2807 | /* len('from')>=2, len('to')>=1 */\r | |
2808 | return replace_substring(self, from_s, from_len, to_s, to_len, maxcount);\r | |
2809 | }\r | |
2810 | }\r | |
2811 | \r | |
2812 | PyDoc_STRVAR(replace__doc__,\r | |
2813 | "S.replace(old, new[, count]) -> string\n\\r | |
2814 | \n\\r | |
2815 | Return a copy of string S with all occurrences of substring\n\\r | |
2816 | old replaced by new. If the optional argument count is\n\\r | |
2817 | given, only the first count occurrences are replaced.");\r | |
2818 | \r | |
2819 | static PyObject *\r | |
2820 | string_replace(PyStringObject *self, PyObject *args)\r | |
2821 | {\r | |
2822 | Py_ssize_t count = -1;\r | |
2823 | PyObject *from, *to;\r | |
2824 | const char *from_s, *to_s;\r | |
2825 | Py_ssize_t from_len, to_len;\r | |
2826 | \r | |
2827 | if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))\r | |
2828 | return NULL;\r | |
2829 | \r | |
2830 | if (PyString_Check(from)) {\r | |
2831 | from_s = PyString_AS_STRING(from);\r | |
2832 | from_len = PyString_GET_SIZE(from);\r | |
2833 | }\r | |
2834 | #ifdef Py_USING_UNICODE\r | |
2835 | if (PyUnicode_Check(from))\r | |
2836 | return PyUnicode_Replace((PyObject *)self,\r | |
2837 | from, to, count);\r | |
2838 | #endif\r | |
2839 | else if (PyObject_AsCharBuffer(from, &from_s, &from_len))\r | |
2840 | return NULL;\r | |
2841 | \r | |
2842 | if (PyString_Check(to)) {\r | |
2843 | to_s = PyString_AS_STRING(to);\r | |
2844 | to_len = PyString_GET_SIZE(to);\r | |
2845 | }\r | |
2846 | #ifdef Py_USING_UNICODE\r | |
2847 | else if (PyUnicode_Check(to))\r | |
2848 | return PyUnicode_Replace((PyObject *)self,\r | |
2849 | from, to, count);\r | |
2850 | #endif\r | |
2851 | else if (PyObject_AsCharBuffer(to, &to_s, &to_len))\r | |
2852 | return NULL;\r | |
2853 | \r | |
2854 | return (PyObject *)replace((PyStringObject *) self,\r | |
2855 | from_s, from_len,\r | |
2856 | to_s, to_len, count);\r | |
2857 | }\r | |
2858 | \r | |
2859 | /** End DALKE **/\r | |
2860 | \r | |
2861 | /* Matches the end (direction >= 0) or start (direction < 0) of self\r | |
2862 | * against substr, using the start and end arguments. Returns\r | |
2863 | * -1 on error, 0 if not found and 1 if found.\r | |
2864 | */\r | |
2865 | Py_LOCAL(int)\r | |
2866 | _string_tailmatch(PyStringObject *self, PyObject *substr, Py_ssize_t start,\r | |
2867 | Py_ssize_t end, int direction)\r | |
2868 | {\r | |
2869 | Py_ssize_t len = PyString_GET_SIZE(self);\r | |
2870 | Py_ssize_t slen;\r | |
2871 | const char* sub;\r | |
2872 | const char* str;\r | |
2873 | \r | |
2874 | if (PyString_Check(substr)) {\r | |
2875 | sub = PyString_AS_STRING(substr);\r | |
2876 | slen = PyString_GET_SIZE(substr);\r | |
2877 | }\r | |
2878 | #ifdef Py_USING_UNICODE\r | |
2879 | else if (PyUnicode_Check(substr))\r | |
2880 | return PyUnicode_Tailmatch((PyObject *)self,\r | |
2881 | substr, start, end, direction);\r | |
2882 | #endif\r | |
2883 | else if (PyObject_AsCharBuffer(substr, &sub, &slen))\r | |
2884 | return -1;\r | |
2885 | str = PyString_AS_STRING(self);\r | |
2886 | \r | |
2887 | ADJUST_INDICES(start, end, len);\r | |
2888 | \r | |
2889 | if (direction < 0) {\r | |
2890 | /* startswith */\r | |
2891 | if (start+slen > len)\r | |
2892 | return 0;\r | |
2893 | } else {\r | |
2894 | /* endswith */\r | |
2895 | if (end-start < slen || start > len)\r | |
2896 | return 0;\r | |
2897 | \r | |
2898 | if (end-slen > start)\r | |
2899 | start = end - slen;\r | |
2900 | }\r | |
2901 | if (end-start >= slen)\r | |
2902 | return ! memcmp(str+start, sub, slen);\r | |
2903 | return 0;\r | |
2904 | }\r | |
2905 | \r | |
2906 | \r | |
2907 | PyDoc_STRVAR(startswith__doc__,\r | |
2908 | "S.startswith(prefix[, start[, end]]) -> bool\n\\r | |
2909 | \n\\r | |
2910 | Return True if S starts with the specified prefix, False otherwise.\n\\r | |
2911 | With optional start, test S beginning at that position.\n\\r | |
2912 | With optional end, stop comparing S at that position.\n\\r | |
2913 | prefix can also be a tuple of strings to try.");\r | |
2914 | \r | |
2915 | static PyObject *\r | |
2916 | string_startswith(PyStringObject *self, PyObject *args)\r | |
2917 | {\r | |
2918 | Py_ssize_t start = 0;\r | |
2919 | Py_ssize_t end = PY_SSIZE_T_MAX;\r | |
2920 | PyObject *subobj;\r | |
2921 | int result;\r | |
2922 | \r | |
2923 | if (!stringlib_parse_args_finds("startswith", args, &subobj, &start, &end))\r | |
2924 | return NULL;\r | |
2925 | if (PyTuple_Check(subobj)) {\r | |
2926 | Py_ssize_t i;\r | |
2927 | for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {\r | |
2928 | result = _string_tailmatch(self,\r | |
2929 | PyTuple_GET_ITEM(subobj, i),\r | |
2930 | start, end, -1);\r | |
2931 | if (result == -1)\r | |
2932 | return NULL;\r | |
2933 | else if (result) {\r | |
2934 | Py_RETURN_TRUE;\r | |
2935 | }\r | |
2936 | }\r | |
2937 | Py_RETURN_FALSE;\r | |
2938 | }\r | |
2939 | result = _string_tailmatch(self, subobj, start, end, -1);\r | |
2940 | if (result == -1) {\r | |
2941 | if (PyErr_ExceptionMatches(PyExc_TypeError))\r | |
2942 | PyErr_Format(PyExc_TypeError, "startswith first arg must be str, "\r | |
2943 | "unicode, or tuple, not %s", Py_TYPE(subobj)->tp_name);\r | |
2944 | return NULL;\r | |
2945 | }\r | |
2946 | else\r | |
2947 | return PyBool_FromLong(result);\r | |
2948 | }\r | |
2949 | \r | |
2950 | \r | |
2951 | PyDoc_STRVAR(endswith__doc__,\r | |
2952 | "S.endswith(suffix[, start[, end]]) -> bool\n\\r | |
2953 | \n\\r | |
2954 | Return True if S ends with the specified suffix, False otherwise.\n\\r | |
2955 | With optional start, test S beginning at that position.\n\\r | |
2956 | With optional end, stop comparing S at that position.\n\\r | |
2957 | suffix can also be a tuple of strings to try.");\r | |
2958 | \r | |
2959 | static PyObject *\r | |
2960 | string_endswith(PyStringObject *self, PyObject *args)\r | |
2961 | {\r | |
2962 | Py_ssize_t start = 0;\r | |
2963 | Py_ssize_t end = PY_SSIZE_T_MAX;\r | |
2964 | PyObject *subobj;\r | |
2965 | int result;\r | |
2966 | \r | |
2967 | if (!stringlib_parse_args_finds("endswith", args, &subobj, &start, &end))\r | |
2968 | return NULL;\r | |
2969 | if (PyTuple_Check(subobj)) {\r | |
2970 | Py_ssize_t i;\r | |
2971 | for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {\r | |
2972 | result = _string_tailmatch(self,\r | |
2973 | PyTuple_GET_ITEM(subobj, i),\r | |
2974 | start, end, +1);\r | |
2975 | if (result == -1)\r | |
2976 | return NULL;\r | |
2977 | else if (result) {\r | |
2978 | Py_RETURN_TRUE;\r | |
2979 | }\r | |
2980 | }\r | |
2981 | Py_RETURN_FALSE;\r | |
2982 | }\r | |
2983 | result = _string_tailmatch(self, subobj, start, end, +1);\r | |
2984 | if (result == -1) {\r | |
2985 | if (PyErr_ExceptionMatches(PyExc_TypeError))\r | |
2986 | PyErr_Format(PyExc_TypeError, "endswith first arg must be str, "\r | |
2987 | "unicode, or tuple, not %s", Py_TYPE(subobj)->tp_name);\r | |
2988 | return NULL;\r | |
2989 | }\r | |
2990 | else\r | |
2991 | return PyBool_FromLong(result);\r | |
2992 | }\r | |
2993 | \r | |
2994 | \r | |
2995 | PyDoc_STRVAR(encode__doc__,\r | |
2996 | "S.encode([encoding[,errors]]) -> object\n\\r | |
2997 | \n\\r | |
2998 | Encodes S using the codec registered for encoding. encoding defaults\n\\r | |
2999 | to the default encoding. errors may be given to set a different error\n\\r | |
3000 | handling scheme. Default is 'strict' meaning that encoding errors raise\n\\r | |
3001 | a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\\r | |
3002 | 'xmlcharrefreplace' as well as any other name registered with\n\\r | |
3003 | codecs.register_error that is able to handle UnicodeEncodeErrors.");\r | |
3004 | \r | |
3005 | static PyObject *\r | |
3006 | string_encode(PyStringObject *self, PyObject *args, PyObject *kwargs)\r | |
3007 | {\r | |
3008 | static char *kwlist[] = {"encoding", "errors", 0};\r | |
3009 | char *encoding = NULL;\r | |
3010 | char *errors = NULL;\r | |
3011 | PyObject *v;\r | |
3012 | \r | |
3013 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ss:encode",\r | |
3014 | kwlist, &encoding, &errors))\r | |
3015 | return NULL;\r | |
3016 | v = PyString_AsEncodedObject((PyObject *)self, encoding, errors);\r | |
3017 | if (v == NULL)\r | |
3018 | goto onError;\r | |
3019 | if (!PyString_Check(v) && !PyUnicode_Check(v)) {\r | |
3020 | PyErr_Format(PyExc_TypeError,\r | |
3021 | "encoder did not return a string/unicode object "\r | |
3022 | "(type=%.400s)",\r | |
3023 | Py_TYPE(v)->tp_name);\r | |
3024 | Py_DECREF(v);\r | |
3025 | return NULL;\r | |
3026 | }\r | |
3027 | return v;\r | |
3028 | \r | |
3029 | onError:\r | |
3030 | return NULL;\r | |
3031 | }\r | |
3032 | \r | |
3033 | \r | |
3034 | PyDoc_STRVAR(decode__doc__,\r | |
3035 | "S.decode([encoding[,errors]]) -> object\n\\r | |
3036 | \n\\r | |
3037 | Decodes S using the codec registered for encoding. encoding defaults\n\\r | |
3038 | to the default encoding. errors may be given to set a different error\n\\r | |
3039 | handling scheme. Default is 'strict' meaning that encoding errors raise\n\\r | |
3040 | a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\\r | |
3041 | as well as any other name registered with codecs.register_error that is\n\\r | |
3042 | able to handle UnicodeDecodeErrors.");\r | |
3043 | \r | |
3044 | static PyObject *\r | |
3045 | string_decode(PyStringObject *self, PyObject *args, PyObject *kwargs)\r | |
3046 | {\r | |
3047 | static char *kwlist[] = {"encoding", "errors", 0};\r | |
3048 | char *encoding = NULL;\r | |
3049 | char *errors = NULL;\r | |
3050 | PyObject *v;\r | |
3051 | \r | |
3052 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ss:decode",\r | |
3053 | kwlist, &encoding, &errors))\r | |
3054 | return NULL;\r | |
3055 | v = PyString_AsDecodedObject((PyObject *)self, encoding, errors);\r | |
3056 | if (v == NULL)\r | |
3057 | goto onError;\r | |
3058 | if (!PyString_Check(v) && !PyUnicode_Check(v)) {\r | |
3059 | PyErr_Format(PyExc_TypeError,\r | |
3060 | "decoder did not return a string/unicode object "\r | |
3061 | "(type=%.400s)",\r | |
3062 | Py_TYPE(v)->tp_name);\r | |
3063 | Py_DECREF(v);\r | |
3064 | return NULL;\r | |
3065 | }\r | |
3066 | return v;\r | |
3067 | \r | |
3068 | onError:\r | |
3069 | return NULL;\r | |
3070 | }\r | |
3071 | \r | |
3072 | \r | |
3073 | PyDoc_STRVAR(expandtabs__doc__,\r | |
3074 | "S.expandtabs([tabsize]) -> string\n\\r | |
3075 | \n\\r | |
3076 | Return a copy of S where all tab characters are expanded using spaces.\n\\r | |
3077 | If tabsize is not given, a tab size of 8 characters is assumed.");\r | |
3078 | \r | |
3079 | static PyObject*\r | |
3080 | string_expandtabs(PyStringObject *self, PyObject *args)\r | |
3081 | {\r | |
3082 | const char *e, *p, *qe;\r | |
3083 | char *q;\r | |
3084 | Py_ssize_t i, j, incr;\r | |
3085 | PyObject *u;\r | |
3086 | int tabsize = 8;\r | |
3087 | \r | |
3088 | if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))\r | |
3089 | return NULL;\r | |
3090 | \r | |
3091 | /* First pass: determine size of output string */\r | |
3092 | i = 0; /* chars up to and including most recent \n or \r */\r | |
3093 | j = 0; /* chars since most recent \n or \r (use in tab calculations) */\r | |
3094 | e = PyString_AS_STRING(self) + PyString_GET_SIZE(self); /* end of input */\r | |
3095 | for (p = PyString_AS_STRING(self); p < e; p++) {\r | |
3096 | if (*p == '\t') {\r | |
3097 | if (tabsize > 0) {\r | |
3098 | incr = tabsize - (j % tabsize);\r | |
3099 | if (j > PY_SSIZE_T_MAX - incr)\r | |
3100 | goto overflow1;\r | |
3101 | j += incr;\r | |
3102 | }\r | |
3103 | }\r | |
3104 | else {\r | |
3105 | if (j > PY_SSIZE_T_MAX - 1)\r | |
3106 | goto overflow1;\r | |
3107 | j++;\r | |
3108 | if (*p == '\n' || *p == '\r') {\r | |
3109 | if (i > PY_SSIZE_T_MAX - j)\r | |
3110 | goto overflow1;\r | |
3111 | i += j;\r | |
3112 | j = 0;\r | |
3113 | }\r | |
3114 | }\r | |
3115 | }\r | |
3116 | \r | |
3117 | if (i > PY_SSIZE_T_MAX - j)\r | |
3118 | goto overflow1;\r | |
3119 | \r | |
3120 | /* Second pass: create output string and fill it */\r | |
3121 | u = PyString_FromStringAndSize(NULL, i + j);\r | |
3122 | if (!u)\r | |
3123 | return NULL;\r | |
3124 | \r | |
3125 | j = 0; /* same as in first pass */\r | |
3126 | q = PyString_AS_STRING(u); /* next output char */\r | |
3127 | qe = PyString_AS_STRING(u) + PyString_GET_SIZE(u); /* end of output */\r | |
3128 | \r | |
3129 | for (p = PyString_AS_STRING(self); p < e; p++) {\r | |
3130 | if (*p == '\t') {\r | |
3131 | if (tabsize > 0) {\r | |
3132 | i = tabsize - (j % tabsize);\r | |
3133 | j += i;\r | |
3134 | while (i--) {\r | |
3135 | if (q >= qe)\r | |
3136 | goto overflow2;\r | |
3137 | *q++ = ' ';\r | |
3138 | }\r | |
3139 | }\r | |
3140 | }\r | |
3141 | else {\r | |
3142 | if (q >= qe)\r | |
3143 | goto overflow2;\r | |
3144 | *q++ = *p;\r | |
3145 | j++;\r | |
3146 | if (*p == '\n' || *p == '\r')\r | |
3147 | j = 0;\r | |
3148 | }\r | |
3149 | }\r | |
3150 | \r | |
3151 | return u;\r | |
3152 | \r | |
3153 | overflow2:\r | |
3154 | Py_DECREF(u);\r | |
3155 | overflow1:\r | |
3156 | PyErr_SetString(PyExc_OverflowError, "new string is too long");\r | |
3157 | return NULL;\r | |
3158 | }\r | |
3159 | \r | |
3160 | Py_LOCAL_INLINE(PyObject *)\r | |
3161 | pad(PyStringObject *self, Py_ssize_t left, Py_ssize_t right, char fill)\r | |
3162 | {\r | |
3163 | PyObject *u;\r | |
3164 | \r | |
3165 | if (left < 0)\r | |
3166 | left = 0;\r | |
3167 | if (right < 0)\r | |
3168 | right = 0;\r | |
3169 | \r | |
3170 | if (left == 0 && right == 0 && PyString_CheckExact(self)) {\r | |
3171 | Py_INCREF(self);\r | |
3172 | return (PyObject *)self;\r | |
3173 | }\r | |
3174 | \r | |
3175 | u = PyString_FromStringAndSize(NULL,\r | |
3176 | left + PyString_GET_SIZE(self) + right);\r | |
3177 | if (u) {\r | |
3178 | if (left)\r | |
3179 | memset(PyString_AS_STRING(u), fill, left);\r | |
3180 | Py_MEMCPY(PyString_AS_STRING(u) + left,\r | |
3181 | PyString_AS_STRING(self),\r | |
3182 | PyString_GET_SIZE(self));\r | |
3183 | if (right)\r | |
3184 | memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),\r | |
3185 | fill, right);\r | |
3186 | }\r | |
3187 | \r | |
3188 | return u;\r | |
3189 | }\r | |
3190 | \r | |
3191 | PyDoc_STRVAR(ljust__doc__,\r | |
3192 | "S.ljust(width[, fillchar]) -> string\n"\r | |
3193 | "\n"\r | |
3194 | "Return S left-justified in a string of length width. Padding is\n"\r | |
3195 | "done using the specified fill character (default is a space).");\r | |
3196 | \r | |
3197 | static PyObject *\r | |
3198 | string_ljust(PyStringObject *self, PyObject *args)\r | |
3199 | {\r | |
3200 | Py_ssize_t width;\r | |
3201 | char fillchar = ' ';\r | |
3202 | \r | |
3203 | if (!PyArg_ParseTuple(args, "n|c:ljust", &width, &fillchar))\r | |
3204 | return NULL;\r | |
3205 | \r | |
3206 | if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {\r | |
3207 | Py_INCREF(self);\r | |
3208 | return (PyObject*) self;\r | |
3209 | }\r | |
3210 | \r | |
3211 | return pad(self, 0, width - PyString_GET_SIZE(self), fillchar);\r | |
3212 | }\r | |
3213 | \r | |
3214 | \r | |
3215 | PyDoc_STRVAR(rjust__doc__,\r | |
3216 | "S.rjust(width[, fillchar]) -> string\n"\r | |
3217 | "\n"\r | |
3218 | "Return S right-justified in a string of length width. Padding is\n"\r | |
3219 | "done using the specified fill character (default is a space)");\r | |
3220 | \r | |
3221 | static PyObject *\r | |
3222 | string_rjust(PyStringObject *self, PyObject *args)\r | |
3223 | {\r | |
3224 | Py_ssize_t width;\r | |
3225 | char fillchar = ' ';\r | |
3226 | \r | |
3227 | if (!PyArg_ParseTuple(args, "n|c:rjust", &width, &fillchar))\r | |
3228 | return NULL;\r | |
3229 | \r | |
3230 | if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {\r | |
3231 | Py_INCREF(self);\r | |
3232 | return (PyObject*) self;\r | |
3233 | }\r | |
3234 | \r | |
3235 | return pad(self, width - PyString_GET_SIZE(self), 0, fillchar);\r | |
3236 | }\r | |
3237 | \r | |
3238 | \r | |
3239 | PyDoc_STRVAR(center__doc__,\r | |
3240 | "S.center(width[, fillchar]) -> string\n"\r | |
3241 | "\n"\r | |
3242 | "Return S centered in a string of length width. Padding is\n"\r | |
3243 | "done using the specified fill character (default is a space)");\r | |
3244 | \r | |
3245 | static PyObject *\r | |
3246 | string_center(PyStringObject *self, PyObject *args)\r | |
3247 | {\r | |
3248 | Py_ssize_t marg, left;\r | |
3249 | Py_ssize_t width;\r | |
3250 | char fillchar = ' ';\r | |
3251 | \r | |
3252 | if (!PyArg_ParseTuple(args, "n|c:center", &width, &fillchar))\r | |
3253 | return NULL;\r | |
3254 | \r | |
3255 | if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {\r | |
3256 | Py_INCREF(self);\r | |
3257 | return (PyObject*) self;\r | |
3258 | }\r | |
3259 | \r | |
3260 | marg = width - PyString_GET_SIZE(self);\r | |
3261 | left = marg / 2 + (marg & width & 1);\r | |
3262 | \r | |
3263 | return pad(self, left, marg - left, fillchar);\r | |
3264 | }\r | |
3265 | \r | |
3266 | PyDoc_STRVAR(zfill__doc__,\r | |
3267 | "S.zfill(width) -> string\n"\r | |
3268 | "\n"\r | |
3269 | "Pad a numeric string S with zeros on the left, to fill a field\n"\r | |
3270 | "of the specified width. The string S is never truncated.");\r | |
3271 | \r | |
3272 | static PyObject *\r | |
3273 | string_zfill(PyStringObject *self, PyObject *args)\r | |
3274 | {\r | |
3275 | Py_ssize_t fill;\r | |
3276 | PyObject *s;\r | |
3277 | char *p;\r | |
3278 | Py_ssize_t width;\r | |
3279 | \r | |
3280 | if (!PyArg_ParseTuple(args, "n:zfill", &width))\r | |
3281 | return NULL;\r | |
3282 | \r | |
3283 | if (PyString_GET_SIZE(self) >= width) {\r | |
3284 | if (PyString_CheckExact(self)) {\r | |
3285 | Py_INCREF(self);\r | |
3286 | return (PyObject*) self;\r | |
3287 | }\r | |
3288 | else\r | |
3289 | return PyString_FromStringAndSize(\r | |
3290 | PyString_AS_STRING(self),\r | |
3291 | PyString_GET_SIZE(self)\r | |
3292 | );\r | |
3293 | }\r | |
3294 | \r | |
3295 | fill = width - PyString_GET_SIZE(self);\r | |
3296 | \r | |
3297 | s = pad(self, fill, 0, '0');\r | |
3298 | \r | |
3299 | if (s == NULL)\r | |
3300 | return NULL;\r | |
3301 | \r | |
3302 | p = PyString_AS_STRING(s);\r | |
3303 | if (p[fill] == '+' || p[fill] == '-') {\r | |
3304 | /* move sign to beginning of string */\r | |
3305 | p[0] = p[fill];\r | |
3306 | p[fill] = '0';\r | |
3307 | }\r | |
3308 | \r | |
3309 | return (PyObject*) s;\r | |
3310 | }\r | |
3311 | \r | |
3312 | PyDoc_STRVAR(isspace__doc__,\r | |
3313 | "S.isspace() -> bool\n\\r | |
3314 | \n\\r | |
3315 | Return True if all characters in S are whitespace\n\\r | |
3316 | and there is at least one character in S, False otherwise.");\r | |
3317 | \r | |
3318 | static PyObject*\r | |
3319 | string_isspace(PyStringObject *self)\r | |
3320 | {\r | |
3321 | register const unsigned char *p\r | |
3322 | = (unsigned char *) PyString_AS_STRING(self);\r | |
3323 | register const unsigned char *e;\r | |
3324 | \r | |
3325 | /* Shortcut for single character strings */\r | |
3326 | if (PyString_GET_SIZE(self) == 1 &&\r | |
3327 | isspace(*p))\r | |
3328 | return PyBool_FromLong(1);\r | |
3329 | \r | |
3330 | /* Special case for empty strings */\r | |
3331 | if (PyString_GET_SIZE(self) == 0)\r | |
3332 | return PyBool_FromLong(0);\r | |
3333 | \r | |
3334 | e = p + PyString_GET_SIZE(self);\r | |
3335 | for (; p < e; p++) {\r | |
3336 | if (!isspace(*p))\r | |
3337 | return PyBool_FromLong(0);\r | |
3338 | }\r | |
3339 | return PyBool_FromLong(1);\r | |
3340 | }\r | |
3341 | \r | |
3342 | \r | |
3343 | PyDoc_STRVAR(isalpha__doc__,\r | |
3344 | "S.isalpha() -> bool\n\\r | |
3345 | \n\\r | |
3346 | Return True if all characters in S are alphabetic\n\\r | |
3347 | and there is at least one character in S, False otherwise.");\r | |
3348 | \r | |
3349 | static PyObject*\r | |
3350 | string_isalpha(PyStringObject *self)\r | |
3351 | {\r | |
3352 | register const unsigned char *p\r | |
3353 | = (unsigned char *) PyString_AS_STRING(self);\r | |
3354 | register const unsigned char *e;\r | |
3355 | \r | |
3356 | /* Shortcut for single character strings */\r | |
3357 | if (PyString_GET_SIZE(self) == 1 &&\r | |
3358 | isalpha(*p))\r | |
3359 | return PyBool_FromLong(1);\r | |
3360 | \r | |
3361 | /* Special case for empty strings */\r | |
3362 | if (PyString_GET_SIZE(self) == 0)\r | |
3363 | return PyBool_FromLong(0);\r | |
3364 | \r | |
3365 | e = p + PyString_GET_SIZE(self);\r | |
3366 | for (; p < e; p++) {\r | |
3367 | if (!isalpha(*p))\r | |
3368 | return PyBool_FromLong(0);\r | |
3369 | }\r | |
3370 | return PyBool_FromLong(1);\r | |
3371 | }\r | |
3372 | \r | |
3373 | \r | |
3374 | PyDoc_STRVAR(isalnum__doc__,\r | |
3375 | "S.isalnum() -> bool\n\\r | |
3376 | \n\\r | |
3377 | Return True if all characters in S are alphanumeric\n\\r | |
3378 | and there is at least one character in S, False otherwise.");\r | |
3379 | \r | |
3380 | static PyObject*\r | |
3381 | string_isalnum(PyStringObject *self)\r | |
3382 | {\r | |
3383 | register const unsigned char *p\r | |
3384 | = (unsigned char *) PyString_AS_STRING(self);\r | |
3385 | register const unsigned char *e;\r | |
3386 | \r | |
3387 | /* Shortcut for single character strings */\r | |
3388 | if (PyString_GET_SIZE(self) == 1 &&\r | |
3389 | isalnum(*p))\r | |
3390 | return PyBool_FromLong(1);\r | |
3391 | \r | |
3392 | /* Special case for empty strings */\r | |
3393 | if (PyString_GET_SIZE(self) == 0)\r | |
3394 | return PyBool_FromLong(0);\r | |
3395 | \r | |
3396 | e = p + PyString_GET_SIZE(self);\r | |
3397 | for (; p < e; p++) {\r | |
3398 | if (!isalnum(*p))\r | |
3399 | return PyBool_FromLong(0);\r | |
3400 | }\r | |
3401 | return PyBool_FromLong(1);\r | |
3402 | }\r | |
3403 | \r | |
3404 | \r | |
3405 | PyDoc_STRVAR(isdigit__doc__,\r | |
3406 | "S.isdigit() -> bool\n\\r | |
3407 | \n\\r | |
3408 | Return True if all characters in S are digits\n\\r | |
3409 | and there is at least one character in S, False otherwise.");\r | |
3410 | \r | |
3411 | static PyObject*\r | |
3412 | string_isdigit(PyStringObject *self)\r | |
3413 | {\r | |
3414 | register const unsigned char *p\r | |
3415 | = (unsigned char *) PyString_AS_STRING(self);\r | |
3416 | register const unsigned char *e;\r | |
3417 | \r | |
3418 | /* Shortcut for single character strings */\r | |
3419 | if (PyString_GET_SIZE(self) == 1 &&\r | |
3420 | isdigit(*p))\r | |
3421 | return PyBool_FromLong(1);\r | |
3422 | \r | |
3423 | /* Special case for empty strings */\r | |
3424 | if (PyString_GET_SIZE(self) == 0)\r | |
3425 | return PyBool_FromLong(0);\r | |
3426 | \r | |
3427 | e = p + PyString_GET_SIZE(self);\r | |
3428 | for (; p < e; p++) {\r | |
3429 | if (!isdigit(*p))\r | |
3430 | return PyBool_FromLong(0);\r | |
3431 | }\r | |
3432 | return PyBool_FromLong(1);\r | |
3433 | }\r | |
3434 | \r | |
3435 | \r | |
3436 | PyDoc_STRVAR(islower__doc__,\r | |
3437 | "S.islower() -> bool\n\\r | |
3438 | \n\\r | |
3439 | Return True if all cased characters in S are lowercase and there is\n\\r | |
3440 | at least one cased character in S, False otherwise.");\r | |
3441 | \r | |
3442 | static PyObject*\r | |
3443 | string_islower(PyStringObject *self)\r | |
3444 | {\r | |
3445 | register const unsigned char *p\r | |
3446 | = (unsigned char *) PyString_AS_STRING(self);\r | |
3447 | register const unsigned char *e;\r | |
3448 | int cased;\r | |
3449 | \r | |
3450 | /* Shortcut for single character strings */\r | |
3451 | if (PyString_GET_SIZE(self) == 1)\r | |
3452 | return PyBool_FromLong(islower(*p) != 0);\r | |
3453 | \r | |
3454 | /* Special case for empty strings */\r | |
3455 | if (PyString_GET_SIZE(self) == 0)\r | |
3456 | return PyBool_FromLong(0);\r | |
3457 | \r | |
3458 | e = p + PyString_GET_SIZE(self);\r | |
3459 | cased = 0;\r | |
3460 | for (; p < e; p++) {\r | |
3461 | if (isupper(*p))\r | |
3462 | return PyBool_FromLong(0);\r | |
3463 | else if (!cased && islower(*p))\r | |
3464 | cased = 1;\r | |
3465 | }\r | |
3466 | return PyBool_FromLong(cased);\r | |
3467 | }\r | |
3468 | \r | |
3469 | \r | |
3470 | PyDoc_STRVAR(isupper__doc__,\r | |
3471 | "S.isupper() -> bool\n\\r | |
3472 | \n\\r | |
3473 | Return True if all cased characters in S are uppercase and there is\n\\r | |
3474 | at least one cased character in S, False otherwise.");\r | |
3475 | \r | |
3476 | static PyObject*\r | |
3477 | string_isupper(PyStringObject *self)\r | |
3478 | {\r | |
3479 | register const unsigned char *p\r | |
3480 | = (unsigned char *) PyString_AS_STRING(self);\r | |
3481 | register const unsigned char *e;\r | |
3482 | int cased;\r | |
3483 | \r | |
3484 | /* Shortcut for single character strings */\r | |
3485 | if (PyString_GET_SIZE(self) == 1)\r | |
3486 | return PyBool_FromLong(isupper(*p) != 0);\r | |
3487 | \r | |
3488 | /* Special case for empty strings */\r | |
3489 | if (PyString_GET_SIZE(self) == 0)\r | |
3490 | return PyBool_FromLong(0);\r | |
3491 | \r | |
3492 | e = p + PyString_GET_SIZE(self);\r | |
3493 | cased = 0;\r | |
3494 | for (; p < e; p++) {\r | |
3495 | if (islower(*p))\r | |
3496 | return PyBool_FromLong(0);\r | |
3497 | else if (!cased && isupper(*p))\r | |
3498 | cased = 1;\r | |
3499 | }\r | |
3500 | return PyBool_FromLong(cased);\r | |
3501 | }\r | |
3502 | \r | |
3503 | \r | |
3504 | PyDoc_STRVAR(istitle__doc__,\r | |
3505 | "S.istitle() -> bool\n\\r | |
3506 | \n\\r | |
3507 | Return True if S is a titlecased string and there is at least one\n\\r | |
3508 | character in S, i.e. uppercase characters may only follow uncased\n\\r | |
3509 | characters and lowercase characters only cased ones. Return False\n\\r | |
3510 | otherwise.");\r | |
3511 | \r | |
3512 | static PyObject*\r | |
3513 | string_istitle(PyStringObject *self, PyObject *uncased)\r | |
3514 | {\r | |
3515 | register const unsigned char *p\r | |
3516 | = (unsigned char *) PyString_AS_STRING(self);\r | |
3517 | register const unsigned char *e;\r | |
3518 | int cased, previous_is_cased;\r | |
3519 | \r | |
3520 | /* Shortcut for single character strings */\r | |
3521 | if (PyString_GET_SIZE(self) == 1)\r | |
3522 | return PyBool_FromLong(isupper(*p) != 0);\r | |
3523 | \r | |
3524 | /* Special case for empty strings */\r | |
3525 | if (PyString_GET_SIZE(self) == 0)\r | |
3526 | return PyBool_FromLong(0);\r | |
3527 | \r | |
3528 | e = p + PyString_GET_SIZE(self);\r | |
3529 | cased = 0;\r | |
3530 | previous_is_cased = 0;\r | |
3531 | for (; p < e; p++) {\r | |
3532 | register const unsigned char ch = *p;\r | |
3533 | \r | |
3534 | if (isupper(ch)) {\r | |
3535 | if (previous_is_cased)\r | |
3536 | return PyBool_FromLong(0);\r | |
3537 | previous_is_cased = 1;\r | |
3538 | cased = 1;\r | |
3539 | }\r | |
3540 | else if (islower(ch)) {\r | |
3541 | if (!previous_is_cased)\r | |
3542 | return PyBool_FromLong(0);\r | |
3543 | previous_is_cased = 1;\r | |
3544 | cased = 1;\r | |
3545 | }\r | |
3546 | else\r | |
3547 | previous_is_cased = 0;\r | |
3548 | }\r | |
3549 | return PyBool_FromLong(cased);\r | |
3550 | }\r | |
3551 | \r | |
3552 | \r | |
3553 | PyDoc_STRVAR(splitlines__doc__,\r | |
3554 | "S.splitlines(keepends=False) -> list of strings\n\\r | |
3555 | \n\\r | |
3556 | Return a list of the lines in S, breaking at line boundaries.\n\\r | |
3557 | Line breaks are not included in the resulting list unless keepends\n\\r | |
3558 | is given and true.");\r | |
3559 | \r | |
3560 | static PyObject*\r | |
3561 | string_splitlines(PyStringObject *self, PyObject *args)\r | |
3562 | {\r | |
3563 | int keepends = 0;\r | |
3564 | \r | |
3565 | if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))\r | |
3566 | return NULL;\r | |
3567 | \r | |
3568 | return stringlib_splitlines(\r | |
3569 | (PyObject*) self, PyString_AS_STRING(self), PyString_GET_SIZE(self),\r | |
3570 | keepends\r | |
3571 | );\r | |
3572 | }\r | |
3573 | \r | |
3574 | PyDoc_STRVAR(sizeof__doc__,\r | |
3575 | "S.__sizeof__() -> size of S in memory, in bytes");\r | |
3576 | \r | |
3577 | static PyObject *\r | |
3578 | string_sizeof(PyStringObject *v)\r | |
3579 | {\r | |
3580 | Py_ssize_t res;\r | |
3581 | res = PyStringObject_SIZE + PyString_GET_SIZE(v) * Py_TYPE(v)->tp_itemsize;\r | |
3582 | return PyInt_FromSsize_t(res);\r | |
3583 | }\r | |
3584 | \r | |
3585 | static PyObject *\r | |
3586 | string_getnewargs(PyStringObject *v)\r | |
3587 | {\r | |
3588 | return Py_BuildValue("(s#)", v->ob_sval, Py_SIZE(v));\r | |
3589 | }\r | |
3590 | \r | |
3591 | \r | |
3592 | #include "stringlib/string_format.h"\r | |
3593 | \r | |
3594 | PyDoc_STRVAR(format__doc__,\r | |
3595 | "S.format(*args, **kwargs) -> string\n\\r | |
3596 | \n\\r | |
3597 | Return a formatted version of S, using substitutions from args and kwargs.\n\\r | |
3598 | The substitutions are identified by braces ('{' and '}').");\r | |
3599 | \r | |
3600 | static PyObject *\r | |
3601 | string__format__(PyObject* self, PyObject* args)\r | |
3602 | {\r | |
3603 | PyObject *format_spec;\r | |
3604 | PyObject *result = NULL;\r | |
3605 | PyObject *tmp = NULL;\r | |
3606 | \r | |
3607 | /* If 2.x, convert format_spec to the same type as value */\r | |
3608 | /* This is to allow things like u''.format('') */\r | |
3609 | if (!PyArg_ParseTuple(args, "O:__format__", &format_spec))\r | |
3610 | goto done;\r | |
3611 | if (!(PyString_Check(format_spec) || PyUnicode_Check(format_spec))) {\r | |
3612 | PyErr_Format(PyExc_TypeError, "__format__ arg must be str "\r | |
3613 | "or unicode, not %s", Py_TYPE(format_spec)->tp_name);\r | |
3614 | goto done;\r | |
3615 | }\r | |
3616 | tmp = PyObject_Str(format_spec);\r | |
3617 | if (tmp == NULL)\r | |
3618 | goto done;\r | |
3619 | format_spec = tmp;\r | |
3620 | \r | |
3621 | result = _PyBytes_FormatAdvanced(self,\r | |
3622 | PyString_AS_STRING(format_spec),\r | |
3623 | PyString_GET_SIZE(format_spec));\r | |
3624 | done:\r | |
3625 | Py_XDECREF(tmp);\r | |
3626 | return result;\r | |
3627 | }\r | |
3628 | \r | |
3629 | PyDoc_STRVAR(p_format__doc__,\r | |
3630 | "S.__format__(format_spec) -> string\n\\r | |
3631 | \n\\r | |
3632 | Return a formatted version of S as described by format_spec.");\r | |
3633 | \r | |
3634 | \r | |
3635 | static PyMethodDef\r | |
3636 | string_methods[] = {\r | |
3637 | /* Counterparts of the obsolete stropmodule functions; except\r | |
3638 | string.maketrans(). */\r | |
3639 | {"join", (PyCFunction)string_join, METH_O, join__doc__},\r | |
3640 | {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},\r | |
3641 | {"rsplit", (PyCFunction)string_rsplit, METH_VARARGS, rsplit__doc__},\r | |
3642 | {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},\r | |
3643 | {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},\r | |
3644 | {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},\r | |
3645 | {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},\r | |
3646 | {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},\r | |
3647 | {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},\r | |
3648 | {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},\r | |
3649 | {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},\r | |
3650 | {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},\r | |
3651 | {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS,\r | |
3652 | capitalize__doc__},\r | |
3653 | {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},\r | |
3654 | {"endswith", (PyCFunction)string_endswith, METH_VARARGS,\r | |
3655 | endswith__doc__},\r | |
3656 | {"partition", (PyCFunction)string_partition, METH_O, partition__doc__},\r | |
3657 | {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},\r | |
3658 | {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},\r | |
3659 | {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},\r | |
3660 | {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},\r | |
3661 | {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},\r | |
3662 | {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},\r | |
3663 | {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},\r | |
3664 | {"rpartition", (PyCFunction)string_rpartition, METH_O,\r | |
3665 | rpartition__doc__},\r | |
3666 | {"startswith", (PyCFunction)string_startswith, METH_VARARGS,\r | |
3667 | startswith__doc__},\r | |
3668 | {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},\r | |
3669 | {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS,\r | |
3670 | swapcase__doc__},\r | |
3671 | {"translate", (PyCFunction)string_translate, METH_VARARGS,\r | |
3672 | translate__doc__},\r | |
3673 | {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},\r | |
3674 | {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},\r | |
3675 | {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},\r | |
3676 | {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},\r | |
3677 | {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},\r | |
3678 | {"format", (PyCFunction) do_string_format, METH_VARARGS | METH_KEYWORDS, format__doc__},\r | |
3679 | {"__format__", (PyCFunction) string__format__, METH_VARARGS, p_format__doc__},\r | |
3680 | {"_formatter_field_name_split", (PyCFunction) formatter_field_name_split, METH_NOARGS},\r | |
3681 | {"_formatter_parser", (PyCFunction) formatter_parser, METH_NOARGS},\r | |
3682 | {"encode", (PyCFunction)string_encode, METH_VARARGS | METH_KEYWORDS, encode__doc__},\r | |
3683 | {"decode", (PyCFunction)string_decode, METH_VARARGS | METH_KEYWORDS, decode__doc__},\r | |
3684 | {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS,\r | |
3685 | expandtabs__doc__},\r | |
3686 | {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS,\r | |
3687 | splitlines__doc__},\r | |
3688 | {"__sizeof__", (PyCFunction)string_sizeof, METH_NOARGS,\r | |
3689 | sizeof__doc__},\r | |
3690 | {"__getnewargs__", (PyCFunction)string_getnewargs, METH_NOARGS},\r | |
3691 | {NULL, NULL} /* sentinel */\r | |
3692 | };\r | |
3693 | \r | |
3694 | static PyObject *\r | |
3695 | str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);\r | |
3696 | \r | |
3697 | static PyObject *\r | |
3698 | string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)\r | |
3699 | {\r | |
3700 | PyObject *x = NULL;\r | |
3701 | static char *kwlist[] = {"object", 0};\r | |
3702 | \r | |
3703 | if (type != &PyString_Type)\r | |
3704 | return str_subtype_new(type, args, kwds);\r | |
3705 | if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))\r | |
3706 | return NULL;\r | |
3707 | if (x == NULL)\r | |
3708 | return PyString_FromString("");\r | |
3709 | return PyObject_Str(x);\r | |
3710 | }\r | |
3711 | \r | |
3712 | static PyObject *\r | |
3713 | str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)\r | |
3714 | {\r | |
3715 | PyObject *tmp, *pnew;\r | |
3716 | Py_ssize_t n;\r | |
3717 | \r | |
3718 | assert(PyType_IsSubtype(type, &PyString_Type));\r | |
3719 | tmp = string_new(&PyString_Type, args, kwds);\r | |
3720 | if (tmp == NULL)\r | |
3721 | return NULL;\r | |
3722 | assert(PyString_CheckExact(tmp));\r | |
3723 | n = PyString_GET_SIZE(tmp);\r | |
3724 | pnew = type->tp_alloc(type, n);\r | |
3725 | if (pnew != NULL) {\r | |
3726 | Py_MEMCPY(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);\r | |
3727 | ((PyStringObject *)pnew)->ob_shash =\r | |
3728 | ((PyStringObject *)tmp)->ob_shash;\r | |
3729 | ((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED;\r | |
3730 | }\r | |
3731 | Py_DECREF(tmp);\r | |
3732 | return pnew;\r | |
3733 | }\r | |
3734 | \r | |
3735 | static PyObject *\r | |
3736 | basestring_new(PyTypeObject *type, PyObject *args, PyObject *kwds)\r | |
3737 | {\r | |
3738 | PyErr_SetString(PyExc_TypeError,\r | |
3739 | "The basestring type cannot be instantiated");\r | |
3740 | return NULL;\r | |
3741 | }\r | |
3742 | \r | |
3743 | static PyObject *\r | |
3744 | string_mod(PyObject *v, PyObject *w)\r | |
3745 | {\r | |
3746 | if (!PyString_Check(v)) {\r | |
3747 | Py_INCREF(Py_NotImplemented);\r | |
3748 | return Py_NotImplemented;\r | |
3749 | }\r | |
3750 | return PyString_Format(v, w);\r | |
3751 | }\r | |
3752 | \r | |
3753 | PyDoc_STRVAR(basestring_doc,\r | |
3754 | "Type basestring cannot be instantiated; it is the base for str and unicode.");\r | |
3755 | \r | |
3756 | static PyNumberMethods string_as_number = {\r | |
3757 | 0, /*nb_add*/\r | |
3758 | 0, /*nb_subtract*/\r | |
3759 | 0, /*nb_multiply*/\r | |
3760 | 0, /*nb_divide*/\r | |
3761 | string_mod, /*nb_remainder*/\r | |
3762 | };\r | |
3763 | \r | |
3764 | \r | |
3765 | PyTypeObject PyBaseString_Type = {\r | |
3766 | PyVarObject_HEAD_INIT(&PyType_Type, 0)\r | |
3767 | "basestring",\r | |
3768 | 0,\r | |
3769 | 0,\r | |
3770 | 0, /* tp_dealloc */\r | |
3771 | 0, /* tp_print */\r | |
3772 | 0, /* tp_getattr */\r | |
3773 | 0, /* tp_setattr */\r | |
3774 | 0, /* tp_compare */\r | |
3775 | 0, /* tp_repr */\r | |
3776 | 0, /* tp_as_number */\r | |
3777 | 0, /* tp_as_sequence */\r | |
3778 | 0, /* tp_as_mapping */\r | |
3779 | 0, /* tp_hash */\r | |
3780 | 0, /* tp_call */\r | |
3781 | 0, /* tp_str */\r | |
3782 | 0, /* tp_getattro */\r | |
3783 | 0, /* tp_setattro */\r | |
3784 | 0, /* tp_as_buffer */\r | |
3785 | Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */\r | |
3786 | basestring_doc, /* tp_doc */\r | |
3787 | 0, /* tp_traverse */\r | |
3788 | 0, /* tp_clear */\r | |
3789 | 0, /* tp_richcompare */\r | |
3790 | 0, /* tp_weaklistoffset */\r | |
3791 | 0, /* tp_iter */\r | |
3792 | 0, /* tp_iternext */\r | |
3793 | 0, /* tp_methods */\r | |
3794 | 0, /* tp_members */\r | |
3795 | 0, /* tp_getset */\r | |
3796 | &PyBaseObject_Type, /* tp_base */\r | |
3797 | 0, /* tp_dict */\r | |
3798 | 0, /* tp_descr_get */\r | |
3799 | 0, /* tp_descr_set */\r | |
3800 | 0, /* tp_dictoffset */\r | |
3801 | 0, /* tp_init */\r | |
3802 | 0, /* tp_alloc */\r | |
3803 | basestring_new, /* tp_new */\r | |
3804 | 0, /* tp_free */\r | |
3805 | };\r | |
3806 | \r | |
3807 | PyDoc_STRVAR(string_doc,\r | |
3808 | "str(object='') -> string\n\\r | |
3809 | \n\\r | |
3810 | Return a nice string representation of the object.\n\\r | |
3811 | If the argument is a string, the return value is the same object.");\r | |
3812 | \r | |
3813 | PyTypeObject PyString_Type = {\r | |
3814 | PyVarObject_HEAD_INIT(&PyType_Type, 0)\r | |
3815 | "str",\r | |
3816 | PyStringObject_SIZE,\r | |
3817 | sizeof(char),\r | |
3818 | string_dealloc, /* tp_dealloc */\r | |
3819 | (printfunc)string_print, /* tp_print */\r | |
3820 | 0, /* tp_getattr */\r | |
3821 | 0, /* tp_setattr */\r | |
3822 | 0, /* tp_compare */\r | |
3823 | string_repr, /* tp_repr */\r | |
3824 | &string_as_number, /* tp_as_number */\r | |
3825 | &string_as_sequence, /* tp_as_sequence */\r | |
3826 | &string_as_mapping, /* tp_as_mapping */\r | |
3827 | (hashfunc)string_hash, /* tp_hash */\r | |
3828 | 0, /* tp_call */\r | |
3829 | string_str, /* tp_str */\r | |
3830 | PyObject_GenericGetAttr, /* tp_getattro */\r | |
3831 | 0, /* tp_setattro */\r | |
3832 | &string_as_buffer, /* tp_as_buffer */\r | |
3833 | Py_TPFLAGS_DEFAULT | Py_TPFLAGS_CHECKTYPES |\r | |
3834 | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_STRING_SUBCLASS |\r | |
3835 | Py_TPFLAGS_HAVE_NEWBUFFER, /* tp_flags */\r | |
3836 | string_doc, /* tp_doc */\r | |
3837 | 0, /* tp_traverse */\r | |
3838 | 0, /* tp_clear */\r | |
3839 | (richcmpfunc)string_richcompare, /* tp_richcompare */\r | |
3840 | 0, /* tp_weaklistoffset */\r | |
3841 | 0, /* tp_iter */\r | |
3842 | 0, /* tp_iternext */\r | |
3843 | string_methods, /* tp_methods */\r | |
3844 | 0, /* tp_members */\r | |
3845 | 0, /* tp_getset */\r | |
3846 | &PyBaseString_Type, /* tp_base */\r | |
3847 | 0, /* tp_dict */\r | |
3848 | 0, /* tp_descr_get */\r | |
3849 | 0, /* tp_descr_set */\r | |
3850 | 0, /* tp_dictoffset */\r | |
3851 | 0, /* tp_init */\r | |
3852 | 0, /* tp_alloc */\r | |
3853 | string_new, /* tp_new */\r | |
3854 | PyObject_Del, /* tp_free */\r | |
3855 | };\r | |
3856 | \r | |
3857 | void\r | |
3858 | PyString_Concat(register PyObject **pv, register PyObject *w)\r | |
3859 | {\r | |
3860 | register PyObject *v;\r | |
3861 | if (*pv == NULL)\r | |
3862 | return;\r | |
3863 | if (w == NULL || !PyString_Check(*pv)) {\r | |
3864 | Py_CLEAR(*pv);\r | |
3865 | return;\r | |
3866 | }\r | |
3867 | v = string_concat((PyStringObject *) *pv, w);\r | |
3868 | Py_DECREF(*pv);\r | |
3869 | *pv = v;\r | |
3870 | }\r | |
3871 | \r | |
3872 | void\r | |
3873 | PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)\r | |
3874 | {\r | |
3875 | PyString_Concat(pv, w);\r | |
3876 | Py_XDECREF(w);\r | |
3877 | }\r | |
3878 | \r | |
3879 | \r | |
3880 | /* The following function breaks the notion that strings are immutable:\r | |
3881 | it changes the size of a string. We get away with this only if there\r | |
3882 | is only one module referencing the object. You can also think of it\r | |
3883 | as creating a new string object and destroying the old one, only\r | |
3884 | more efficiently. In any case, don't use this if the string may\r | |
3885 | already be known to some other part of the code...\r | |
3886 | Note that if there's not enough memory to resize the string, the original\r | |
3887 | string object at *pv is deallocated, *pv is set to NULL, an "out of\r | |
3888 | memory" exception is set, and -1 is returned. Else (on success) 0 is\r | |
3889 | returned, and the value in *pv may or may not be the same as on input.\r | |
3890 | As always, an extra byte is allocated for a trailing \0 byte (newsize\r | |
3891 | does *not* include that), and a trailing \0 byte is stored.\r | |
3892 | */\r | |
3893 | \r | |
3894 | int\r | |
3895 | _PyString_Resize(PyObject **pv, Py_ssize_t newsize)\r | |
3896 | {\r | |
3897 | register PyObject *v;\r | |
3898 | register PyStringObject *sv;\r | |
3899 | v = *pv;\r | |
3900 | if (!PyString_Check(v) || Py_REFCNT(v) != 1 || newsize < 0 ||\r | |
3901 | PyString_CHECK_INTERNED(v)) {\r | |
3902 | *pv = 0;\r | |
3903 | Py_DECREF(v);\r | |
3904 | PyErr_BadInternalCall();\r | |
3905 | return -1;\r | |
3906 | }\r | |
3907 | /* XXX UNREF/NEWREF interface should be more symmetrical */\r | |
3908 | _Py_DEC_REFTOTAL;\r | |
3909 | _Py_ForgetReference(v);\r | |
3910 | *pv = (PyObject *)\r | |
3911 | PyObject_REALLOC((char *)v, PyStringObject_SIZE + newsize);\r | |
3912 | if (*pv == NULL) {\r | |
3913 | PyObject_Del(v);\r | |
3914 | PyErr_NoMemory();\r | |
3915 | return -1;\r | |
3916 | }\r | |
3917 | _Py_NewReference(*pv);\r | |
3918 | sv = (PyStringObject *) *pv;\r | |
3919 | Py_SIZE(sv) = newsize;\r | |
3920 | sv->ob_sval[newsize] = '\0';\r | |
3921 | sv->ob_shash = -1; /* invalidate cached hash value */\r | |
3922 | return 0;\r | |
3923 | }\r | |
3924 | \r | |
3925 | /* Helpers for formatstring */\r | |
3926 | \r | |
3927 | Py_LOCAL_INLINE(PyObject *)\r | |
3928 | getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)\r | |
3929 | {\r | |
3930 | Py_ssize_t argidx = *p_argidx;\r | |
3931 | if (argidx < arglen) {\r | |
3932 | (*p_argidx)++;\r | |
3933 | if (arglen < 0)\r | |
3934 | return args;\r | |
3935 | else\r | |
3936 | return PyTuple_GetItem(args, argidx);\r | |
3937 | }\r | |
3938 | PyErr_SetString(PyExc_TypeError,\r | |
3939 | "not enough arguments for format string");\r | |
3940 | return NULL;\r | |
3941 | }\r | |
3942 | \r | |
3943 | /* Format codes\r | |
3944 | * F_LJUST '-'\r | |
3945 | * F_SIGN '+'\r | |
3946 | * F_BLANK ' '\r | |
3947 | * F_ALT '#'\r | |
3948 | * F_ZERO '0'\r | |
3949 | */\r | |
3950 | #define F_LJUST (1<<0)\r | |
3951 | #define F_SIGN (1<<1)\r | |
3952 | #define F_BLANK (1<<2)\r | |
3953 | #define F_ALT (1<<3)\r | |
3954 | #define F_ZERO (1<<4)\r | |
3955 | \r | |
3956 | /* Returns a new reference to a PyString object, or NULL on failure. */\r | |
3957 | \r | |
3958 | static PyObject *\r | |
3959 | formatfloat(PyObject *v, int flags, int prec, int type)\r | |
3960 | {\r | |
3961 | char *p;\r | |
3962 | PyObject *result;\r | |
3963 | double x;\r | |
3964 | \r | |
3965 | x = PyFloat_AsDouble(v);\r | |
3966 | if (x == -1.0 && PyErr_Occurred()) {\r | |
3967 | PyErr_Format(PyExc_TypeError, "float argument required, "\r | |
3968 | "not %.200s", Py_TYPE(v)->tp_name);\r | |
3969 | return NULL;\r | |
3970 | }\r | |
3971 | \r | |
3972 | if (prec < 0)\r | |
3973 | prec = 6;\r | |
3974 | \r | |
3975 | p = PyOS_double_to_string(x, type, prec,\r | |
3976 | (flags & F_ALT) ? Py_DTSF_ALT : 0, NULL);\r | |
3977 | \r | |
3978 | if (p == NULL)\r | |
3979 | return NULL;\r | |
3980 | result = PyString_FromStringAndSize(p, strlen(p));\r | |
3981 | PyMem_Free(p);\r | |
3982 | return result;\r | |
3983 | }\r | |
3984 | \r | |
3985 | /* _PyString_FormatLong emulates the format codes d, u, o, x and X, and\r | |
3986 | * the F_ALT flag, for Python's long (unbounded) ints. It's not used for\r | |
3987 | * Python's regular ints.\r | |
3988 | * Return value: a new PyString*, or NULL if error.\r | |
3989 | * . *pbuf is set to point into it,\r | |
3990 | * *plen set to the # of chars following that.\r | |
3991 | * Caller must decref it when done using pbuf.\r | |
3992 | * The string starting at *pbuf is of the form\r | |
3993 | * "-"? ("0x" | "0X")? digit+\r | |
3994 | * "0x"/"0X" are present only for x and X conversions, with F_ALT\r | |
3995 | * set in flags. The case of hex digits will be correct,\r | |
3996 | * There will be at least prec digits, zero-filled on the left if\r | |
3997 | * necessary to get that many.\r | |
3998 | * val object to be converted\r | |
3999 | * flags bitmask of format flags; only F_ALT is looked at\r | |
4000 | * prec minimum number of digits; 0-fill on left if needed\r | |
4001 | * type a character in [duoxX]; u acts the same as d\r | |
4002 | *\r | |
4003 | * CAUTION: o, x and X conversions on regular ints can never\r | |
4004 | * produce a '-' sign, but can for Python's unbounded ints.\r | |
4005 | */\r | |
4006 | PyObject*\r | |
4007 | _PyString_FormatLong(PyObject *val, int flags, int prec, int type,\r | |
4008 | char **pbuf, int *plen)\r | |
4009 | {\r | |
4010 | PyObject *result = NULL;\r | |
4011 | char *buf;\r | |
4012 | Py_ssize_t i;\r | |
4013 | int sign; /* 1 if '-', else 0 */\r | |
4014 | int len; /* number of characters */\r | |
4015 | Py_ssize_t llen;\r | |
4016 | int numdigits; /* len == numnondigits + numdigits */\r | |
4017 | int numnondigits = 0;\r | |
4018 | \r | |
4019 | switch (type) {\r | |
4020 | case 'd':\r | |
4021 | case 'u':\r | |
4022 | result = Py_TYPE(val)->tp_str(val);\r | |
4023 | break;\r | |
4024 | case 'o':\r | |
4025 | result = Py_TYPE(val)->tp_as_number->nb_oct(val);\r | |
4026 | break;\r | |
4027 | case 'x':\r | |
4028 | case 'X':\r | |
4029 | numnondigits = 2;\r | |
4030 | result = Py_TYPE(val)->tp_as_number->nb_hex(val);\r | |
4031 | break;\r | |
4032 | default:\r | |
4033 | assert(!"'type' not in [duoxX]");\r | |
4034 | }\r | |
4035 | if (!result)\r | |
4036 | return NULL;\r | |
4037 | \r | |
4038 | buf = PyString_AsString(result);\r | |
4039 | if (!buf) {\r | |
4040 | Py_DECREF(result);\r | |
4041 | return NULL;\r | |
4042 | }\r | |
4043 | \r | |
4044 | /* To modify the string in-place, there can only be one reference. */\r | |
4045 | if (Py_REFCNT(result) != 1) {\r | |
4046 | PyErr_BadInternalCall();\r | |
4047 | return NULL;\r | |
4048 | }\r | |
4049 | llen = PyString_Size(result);\r | |
4050 | if (llen > INT_MAX) {\r | |
4051 | PyErr_SetString(PyExc_ValueError, "string too large in _PyString_FormatLong");\r | |
4052 | return NULL;\r | |
4053 | }\r | |
4054 | len = (int)llen;\r | |
4055 | if (buf[len-1] == 'L') {\r | |
4056 | --len;\r | |
4057 | buf[len] = '\0';\r | |
4058 | }\r | |
4059 | sign = buf[0] == '-';\r | |
4060 | numnondigits += sign;\r | |
4061 | numdigits = len - numnondigits;\r | |
4062 | assert(numdigits > 0);\r | |
4063 | \r | |
4064 | /* Get rid of base marker unless F_ALT */\r | |
4065 | if ((flags & F_ALT) == 0) {\r | |
4066 | /* Need to skip 0x, 0X or 0. */\r | |
4067 | int skipped = 0;\r | |
4068 | switch (type) {\r | |
4069 | case 'o':\r | |
4070 | assert(buf[sign] == '0');\r | |
4071 | /* If 0 is only digit, leave it alone. */\r | |
4072 | if (numdigits > 1) {\r | |
4073 | skipped = 1;\r | |
4074 | --numdigits;\r | |
4075 | }\r | |
4076 | break;\r | |
4077 | case 'x':\r | |
4078 | case 'X':\r | |
4079 | assert(buf[sign] == '0');\r | |
4080 | assert(buf[sign + 1] == 'x');\r | |
4081 | skipped = 2;\r | |
4082 | numnondigits -= 2;\r | |
4083 | break;\r | |
4084 | }\r | |
4085 | if (skipped) {\r | |
4086 | buf += skipped;\r | |
4087 | len -= skipped;\r | |
4088 | if (sign)\r | |
4089 | buf[0] = '-';\r | |
4090 | }\r | |
4091 | assert(len == numnondigits + numdigits);\r | |
4092 | assert(numdigits > 0);\r | |
4093 | }\r | |
4094 | \r | |
4095 | /* Fill with leading zeroes to meet minimum width. */\r | |
4096 | if (prec > numdigits) {\r | |
4097 | PyObject *r1 = PyString_FromStringAndSize(NULL,\r | |
4098 | numnondigits + prec);\r | |
4099 | char *b1;\r | |
4100 | if (!r1) {\r | |
4101 | Py_DECREF(result);\r | |
4102 | return NULL;\r | |
4103 | }\r | |
4104 | b1 = PyString_AS_STRING(r1);\r | |
4105 | for (i = 0; i < numnondigits; ++i)\r | |
4106 | *b1++ = *buf++;\r | |
4107 | for (i = 0; i < prec - numdigits; i++)\r | |
4108 | *b1++ = '0';\r | |
4109 | for (i = 0; i < numdigits; i++)\r | |
4110 | *b1++ = *buf++;\r | |
4111 | *b1 = '\0';\r | |
4112 | Py_DECREF(result);\r | |
4113 | result = r1;\r | |
4114 | buf = PyString_AS_STRING(result);\r | |
4115 | len = numnondigits + prec;\r | |
4116 | }\r | |
4117 | \r | |
4118 | /* Fix up case for hex conversions. */\r | |
4119 | if (type == 'X') {\r | |
4120 | /* Need to convert all lower case letters to upper case.\r | |
4121 | and need to convert 0x to 0X (and -0x to -0X). */\r | |
4122 | for (i = 0; i < len; i++)\r | |
4123 | if (buf[i] >= 'a' && buf[i] <= 'x')\r | |
4124 | buf[i] -= 'a'-'A';\r | |
4125 | }\r | |
4126 | *pbuf = buf;\r | |
4127 | *plen = len;\r | |
4128 | return result;\r | |
4129 | }\r | |
4130 | \r | |
4131 | Py_LOCAL_INLINE(int)\r | |
4132 | formatint(char *buf, size_t buflen, int flags,\r | |
4133 | int prec, int type, PyObject *v)\r | |
4134 | {\r | |
4135 | /* fmt = '%#.' + `prec` + 'l' + `type`\r | |
4136 | worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)\r | |
4137 | + 1 + 1 = 24 */\r | |
4138 | char fmt[64]; /* plenty big enough! */\r | |
4139 | char *sign;\r | |
4140 | long x;\r | |
4141 | \r | |
4142 | x = PyInt_AsLong(v);\r | |
4143 | if (x == -1 && PyErr_Occurred()) {\r | |
4144 | PyErr_Format(PyExc_TypeError, "int argument required, not %.200s",\r | |
4145 | Py_TYPE(v)->tp_name);\r | |
4146 | return -1;\r | |
4147 | }\r | |
4148 | if (x < 0 && type == 'u') {\r | |
4149 | type = 'd';\r | |
4150 | }\r | |
4151 | if (x < 0 && (type == 'x' || type == 'X' || type == 'o'))\r | |
4152 | sign = "-";\r | |
4153 | else\r | |
4154 | sign = "";\r | |
4155 | if (prec < 0)\r | |
4156 | prec = 1;\r | |
4157 | \r | |
4158 | if ((flags & F_ALT) &&\r | |
4159 | (type == 'x' || type == 'X')) {\r | |
4160 | /* When converting under %#x or %#X, there are a number\r | |
4161 | * of issues that cause pain:\r | |
4162 | * - when 0 is being converted, the C standard leaves off\r | |
4163 | * the '0x' or '0X', which is inconsistent with other\r | |
4164 | * %#x/%#X conversions and inconsistent with Python's\r | |
4165 | * hex() function\r | |
4166 | * - there are platforms that violate the standard and\r | |
4167 | * convert 0 with the '0x' or '0X'\r | |
4168 | * (Metrowerks, Compaq Tru64)\r | |
4169 | * - there are platforms that give '0x' when converting\r | |
4170 | * under %#X, but convert 0 in accordance with the\r | |
4171 | * standard (OS/2 EMX)\r | |
4172 | *\r | |
4173 | * We can achieve the desired consistency by inserting our\r | |
4174 | * own '0x' or '0X' prefix, and substituting %x/%X in place\r | |
4175 | * of %#x/%#X.\r | |
4176 | *\r | |
4177 | * Note that this is the same approach as used in\r | |
4178 | * formatint() in unicodeobject.c\r | |
4179 | */\r | |
4180 | PyOS_snprintf(fmt, sizeof(fmt), "%s0%c%%.%dl%c",\r | |
4181 | sign, type, prec, type);\r | |
4182 | }\r | |
4183 | else {\r | |
4184 | PyOS_snprintf(fmt, sizeof(fmt), "%s%%%s.%dl%c",\r | |
4185 | sign, (flags&F_ALT) ? "#" : "",\r | |
4186 | prec, type);\r | |
4187 | }\r | |
4188 | \r | |
4189 | /* buf = '+'/'-'/'' + '0'/'0x'/'' + '[0-9]'*max(prec, len(x in octal))\r | |
4190 | * worst case buf = '-0x' + [0-9]*prec, where prec >= 11\r | |
4191 | */\r | |
4192 | if (buflen <= 14 || buflen <= (size_t)3 + (size_t)prec) {\r | |
4193 | PyErr_SetString(PyExc_OverflowError,\r | |
4194 | "formatted integer is too long (precision too large?)");\r | |
4195 | return -1;\r | |
4196 | }\r | |
4197 | if (sign[0])\r | |
4198 | PyOS_snprintf(buf, buflen, fmt, -x);\r | |
4199 | else\r | |
4200 | PyOS_snprintf(buf, buflen, fmt, x);\r | |
4201 | return (int)strlen(buf);\r | |
4202 | }\r | |
4203 | \r | |
4204 | Py_LOCAL_INLINE(int)\r | |
4205 | formatchar(char *buf, size_t buflen, PyObject *v)\r | |
4206 | {\r | |
4207 | /* presume that the buffer is at least 2 characters long */\r | |
4208 | if (PyString_Check(v)) {\r | |
4209 | if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))\r | |
4210 | return -1;\r | |
4211 | }\r | |
4212 | else {\r | |
4213 | if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))\r | |
4214 | return -1;\r | |
4215 | }\r | |
4216 | buf[1] = '\0';\r | |
4217 | return 1;\r | |
4218 | }\r | |
4219 | \r | |
4220 | /* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)\r | |
4221 | \r | |
4222 | FORMATBUFLEN is the length of the buffer in which the ints &\r | |
4223 | chars are formatted. XXX This is a magic number. Each formatting\r | |
4224 | routine does bounds checking to ensure no overflow, but a better\r | |
4225 | solution may be to malloc a buffer of appropriate size for each\r | |
4226 | format. For now, the current solution is sufficient.\r | |
4227 | */\r | |
4228 | #define FORMATBUFLEN (size_t)120\r | |
4229 | \r | |
4230 | PyObject *\r | |
4231 | PyString_Format(PyObject *format, PyObject *args)\r | |
4232 | {\r | |
4233 | char *fmt, *res;\r | |
4234 | Py_ssize_t arglen, argidx;\r | |
4235 | Py_ssize_t reslen, rescnt, fmtcnt;\r | |
4236 | int args_owned = 0;\r | |
4237 | PyObject *result, *orig_args;\r | |
4238 | #ifdef Py_USING_UNICODE\r | |
4239 | PyObject *v, *w;\r | |
4240 | #endif\r | |
4241 | PyObject *dict = NULL;\r | |
4242 | if (format == NULL || !PyString_Check(format) || args == NULL) {\r | |
4243 | PyErr_BadInternalCall();\r | |
4244 | return NULL;\r | |
4245 | }\r | |
4246 | orig_args = args;\r | |
4247 | fmt = PyString_AS_STRING(format);\r | |
4248 | fmtcnt = PyString_GET_SIZE(format);\r | |
4249 | reslen = rescnt = fmtcnt + 100;\r | |
4250 | result = PyString_FromStringAndSize((char *)NULL, reslen);\r | |
4251 | if (result == NULL)\r | |
4252 | return NULL;\r | |
4253 | res = PyString_AsString(result);\r | |
4254 | if (PyTuple_Check(args)) {\r | |
4255 | arglen = PyTuple_GET_SIZE(args);\r | |
4256 | argidx = 0;\r | |
4257 | }\r | |
4258 | else {\r | |
4259 | arglen = -1;\r | |
4260 | argidx = -2;\r | |
4261 | }\r | |
4262 | if (Py_TYPE(args)->tp_as_mapping && Py_TYPE(args)->tp_as_mapping->mp_subscript &&\r | |
4263 | !PyTuple_Check(args) && !PyObject_TypeCheck(args, &PyBaseString_Type))\r | |
4264 | dict = args;\r | |
4265 | while (--fmtcnt >= 0) {\r | |
4266 | if (*fmt != '%') {\r | |
4267 | if (--rescnt < 0) {\r | |
4268 | rescnt = fmtcnt + 100;\r | |
4269 | reslen += rescnt;\r | |
4270 | if (_PyString_Resize(&result, reslen))\r | |
4271 | return NULL;\r | |
4272 | res = PyString_AS_STRING(result)\r | |
4273 | + reslen - rescnt;\r | |
4274 | --rescnt;\r | |
4275 | }\r | |
4276 | *res++ = *fmt++;\r | |
4277 | }\r | |
4278 | else {\r | |
4279 | /* Got a format specifier */\r | |
4280 | int flags = 0;\r | |
4281 | Py_ssize_t width = -1;\r | |
4282 | int prec = -1;\r | |
4283 | int c = '\0';\r | |
4284 | int fill;\r | |
4285 | int isnumok;\r | |
4286 | PyObject *v = NULL;\r | |
4287 | PyObject *temp = NULL;\r | |
4288 | char *pbuf;\r | |
4289 | int sign;\r | |
4290 | Py_ssize_t len;\r | |
4291 | char formatbuf[FORMATBUFLEN];\r | |
4292 | /* For format{int,char}() */\r | |
4293 | #ifdef Py_USING_UNICODE\r | |
4294 | char *fmt_start = fmt;\r | |
4295 | Py_ssize_t argidx_start = argidx;\r | |
4296 | #endif\r | |
4297 | \r | |
4298 | fmt++;\r | |
4299 | if (*fmt == '(') {\r | |
4300 | char *keystart;\r | |
4301 | Py_ssize_t keylen;\r | |
4302 | PyObject *key;\r | |
4303 | int pcount = 1;\r | |
4304 | \r | |
4305 | if (dict == NULL) {\r | |
4306 | PyErr_SetString(PyExc_TypeError,\r | |
4307 | "format requires a mapping");\r | |
4308 | goto error;\r | |
4309 | }\r | |
4310 | ++fmt;\r | |
4311 | --fmtcnt;\r | |
4312 | keystart = fmt;\r | |
4313 | /* Skip over balanced parentheses */\r | |
4314 | while (pcount > 0 && --fmtcnt >= 0) {\r | |
4315 | if (*fmt == ')')\r | |
4316 | --pcount;\r | |
4317 | else if (*fmt == '(')\r | |
4318 | ++pcount;\r | |
4319 | fmt++;\r | |
4320 | }\r | |
4321 | keylen = fmt - keystart - 1;\r | |
4322 | if (fmtcnt < 0 || pcount > 0) {\r | |
4323 | PyErr_SetString(PyExc_ValueError,\r | |
4324 | "incomplete format key");\r | |
4325 | goto error;\r | |
4326 | }\r | |
4327 | key = PyString_FromStringAndSize(keystart,\r | |
4328 | keylen);\r | |
4329 | if (key == NULL)\r | |
4330 | goto error;\r | |
4331 | if (args_owned) {\r | |
4332 | Py_DECREF(args);\r | |
4333 | args_owned = 0;\r | |
4334 | }\r | |
4335 | args = PyObject_GetItem(dict, key);\r | |
4336 | Py_DECREF(key);\r | |
4337 | if (args == NULL) {\r | |
4338 | goto error;\r | |
4339 | }\r | |
4340 | args_owned = 1;\r | |
4341 | arglen = -1;\r | |
4342 | argidx = -2;\r | |
4343 | }\r | |
4344 | while (--fmtcnt >= 0) {\r | |
4345 | switch (c = *fmt++) {\r | |
4346 | case '-': flags |= F_LJUST; continue;\r | |
4347 | case '+': flags |= F_SIGN; continue;\r | |
4348 | case ' ': flags |= F_BLANK; continue;\r | |
4349 | case '#': flags |= F_ALT; continue;\r | |
4350 | case '0': flags |= F_ZERO; continue;\r | |
4351 | }\r | |
4352 | break;\r | |
4353 | }\r | |
4354 | if (c == '*') {\r | |
4355 | v = getnextarg(args, arglen, &argidx);\r | |
4356 | if (v == NULL)\r | |
4357 | goto error;\r | |
4358 | if (!PyInt_Check(v)) {\r | |
4359 | PyErr_SetString(PyExc_TypeError,\r | |
4360 | "* wants int");\r | |
4361 | goto error;\r | |
4362 | }\r | |
4363 | width = PyInt_AsSsize_t(v);\r | |
4364 | if (width == -1 && PyErr_Occurred())\r | |
4365 | goto error;\r | |
4366 | if (width < 0) {\r | |
4367 | flags |= F_LJUST;\r | |
4368 | width = -width;\r | |
4369 | }\r | |
4370 | if (--fmtcnt >= 0)\r | |
4371 | c = *fmt++;\r | |
4372 | }\r | |
4373 | else if (c >= 0 && isdigit(c)) {\r | |
4374 | width = c - '0';\r | |
4375 | while (--fmtcnt >= 0) {\r | |
4376 | c = Py_CHARMASK(*fmt++);\r | |
4377 | if (!isdigit(c))\r | |
4378 | break;\r | |
4379 | if (width > (PY_SSIZE_T_MAX - ((int)c - '0')) / 10) {\r | |
4380 | PyErr_SetString(\r | |
4381 | PyExc_ValueError,\r | |
4382 | "width too big");\r | |
4383 | goto error;\r | |
4384 | }\r | |
4385 | width = width*10 + (c - '0');\r | |
4386 | }\r | |
4387 | }\r | |
4388 | if (c == '.') {\r | |
4389 | prec = 0;\r | |
4390 | if (--fmtcnt >= 0)\r | |
4391 | c = *fmt++;\r | |
4392 | if (c == '*') {\r | |
4393 | v = getnextarg(args, arglen, &argidx);\r | |
4394 | if (v == NULL)\r | |
4395 | goto error;\r | |
4396 | if (!PyInt_Check(v)) {\r | |
4397 | PyErr_SetString(\r | |
4398 | PyExc_TypeError,\r | |
4399 | "* wants int");\r | |
4400 | goto error;\r | |
4401 | }\r | |
4402 | prec = _PyInt_AsInt(v);\r | |
4403 | if (prec == -1 && PyErr_Occurred())\r | |
4404 | goto error;\r | |
4405 | if (prec < 0)\r | |
4406 | prec = 0;\r | |
4407 | if (--fmtcnt >= 0)\r | |
4408 | c = *fmt++;\r | |
4409 | }\r | |
4410 | else if (c >= 0 && isdigit(c)) {\r | |
4411 | prec = c - '0';\r | |
4412 | while (--fmtcnt >= 0) {\r | |
4413 | c = Py_CHARMASK(*fmt++);\r | |
4414 | if (!isdigit(c))\r | |
4415 | break;\r | |
4416 | if (prec > (INT_MAX - ((int)c - '0')) / 10) {\r | |
4417 | PyErr_SetString(\r | |
4418 | PyExc_ValueError,\r | |
4419 | "prec too big");\r | |
4420 | goto error;\r | |
4421 | }\r | |
4422 | prec = prec*10 + (c - '0');\r | |
4423 | }\r | |
4424 | }\r | |
4425 | } /* prec */\r | |
4426 | if (fmtcnt >= 0) {\r | |
4427 | if (c == 'h' || c == 'l' || c == 'L') {\r | |
4428 | if (--fmtcnt >= 0)\r | |
4429 | c = *fmt++;\r | |
4430 | }\r | |
4431 | }\r | |
4432 | if (fmtcnt < 0) {\r | |
4433 | PyErr_SetString(PyExc_ValueError,\r | |
4434 | "incomplete format");\r | |
4435 | goto error;\r | |
4436 | }\r | |
4437 | if (c != '%') {\r | |
4438 | v = getnextarg(args, arglen, &argidx);\r | |
4439 | if (v == NULL)\r | |
4440 | goto error;\r | |
4441 | }\r | |
4442 | sign = 0;\r | |
4443 | fill = ' ';\r | |
4444 | switch (c) {\r | |
4445 | case '%':\r | |
4446 | pbuf = "%";\r | |
4447 | len = 1;\r | |
4448 | break;\r | |
4449 | case 's':\r | |
4450 | #ifdef Py_USING_UNICODE\r | |
4451 | if (PyUnicode_Check(v)) {\r | |
4452 | fmt = fmt_start;\r | |
4453 | argidx = argidx_start;\r | |
4454 | goto unicode;\r | |
4455 | }\r | |
4456 | #endif\r | |
4457 | temp = _PyObject_Str(v);\r | |
4458 | #ifdef Py_USING_UNICODE\r | |
4459 | if (temp != NULL && PyUnicode_Check(temp)) {\r | |
4460 | Py_DECREF(temp);\r | |
4461 | fmt = fmt_start;\r | |
4462 | argidx = argidx_start;\r | |
4463 | goto unicode;\r | |
4464 | }\r | |
4465 | #endif\r | |
4466 | /* Fall through */\r | |
4467 | case 'r':\r | |
4468 | if (c == 'r')\r | |
4469 | temp = PyObject_Repr(v);\r | |
4470 | if (temp == NULL)\r | |
4471 | goto error;\r | |
4472 | if (!PyString_Check(temp)) {\r | |
4473 | PyErr_SetString(PyExc_TypeError,\r | |
4474 | "%s argument has non-string str()");\r | |
4475 | Py_DECREF(temp);\r | |
4476 | goto error;\r | |
4477 | }\r | |
4478 | pbuf = PyString_AS_STRING(temp);\r | |
4479 | len = PyString_GET_SIZE(temp);\r | |
4480 | if (prec >= 0 && len > prec)\r | |
4481 | len = prec;\r | |
4482 | break;\r | |
4483 | case 'i':\r | |
4484 | case 'd':\r | |
4485 | case 'u':\r | |
4486 | case 'o':\r | |
4487 | case 'x':\r | |
4488 | case 'X':\r | |
4489 | if (c == 'i')\r | |
4490 | c = 'd';\r | |
4491 | isnumok = 0;\r | |
4492 | if (PyNumber_Check(v)) {\r | |
4493 | PyObject *iobj=NULL;\r | |
4494 | \r | |
4495 | if (PyInt_Check(v) || (PyLong_Check(v))) {\r | |
4496 | iobj = v;\r | |
4497 | Py_INCREF(iobj);\r | |
4498 | }\r | |
4499 | else {\r | |
4500 | iobj = PyNumber_Int(v);\r | |
4501 | if (iobj==NULL) {\r | |
4502 | PyErr_Clear();\r | |
4503 | iobj = PyNumber_Long(v);\r | |
4504 | }\r | |
4505 | }\r | |
4506 | if (iobj!=NULL) {\r | |
4507 | if (PyInt_Check(iobj)) {\r | |
4508 | isnumok = 1;\r | |
4509 | pbuf = formatbuf;\r | |
4510 | len = formatint(pbuf,\r | |
4511 | sizeof(formatbuf),\r | |
4512 | flags, prec, c, iobj);\r | |
4513 | Py_DECREF(iobj);\r | |
4514 | if (len < 0)\r | |
4515 | goto error;\r | |
4516 | sign = 1;\r | |
4517 | }\r | |
4518 | else if (PyLong_Check(iobj)) {\r | |
4519 | int ilen;\r | |
4520 | \r | |
4521 | isnumok = 1;\r | |
4522 | temp = _PyString_FormatLong(iobj, flags,\r | |
4523 | prec, c, &pbuf, &ilen);\r | |
4524 | Py_DECREF(iobj);\r | |
4525 | len = ilen;\r | |
4526 | if (!temp)\r | |
4527 | goto error;\r | |
4528 | sign = 1;\r | |
4529 | }\r | |
4530 | else {\r | |
4531 | Py_DECREF(iobj);\r | |
4532 | }\r | |
4533 | }\r | |
4534 | }\r | |
4535 | if (!isnumok) {\r | |
4536 | PyErr_Format(PyExc_TypeError,\r | |
4537 | "%%%c format: a number is required, "\r | |
4538 | "not %.200s", c, Py_TYPE(v)->tp_name);\r | |
4539 | goto error;\r | |
4540 | }\r | |
4541 | if (flags & F_ZERO)\r | |
4542 | fill = '0';\r | |
4543 | break;\r | |
4544 | case 'e':\r | |
4545 | case 'E':\r | |
4546 | case 'f':\r | |
4547 | case 'F':\r | |
4548 | case 'g':\r | |
4549 | case 'G':\r | |
4550 | temp = formatfloat(v, flags, prec, c);\r | |
4551 | if (temp == NULL)\r | |
4552 | goto error;\r | |
4553 | pbuf = PyString_AS_STRING(temp);\r | |
4554 | len = PyString_GET_SIZE(temp);\r | |
4555 | sign = 1;\r | |
4556 | if (flags & F_ZERO)\r | |
4557 | fill = '0';\r | |
4558 | break;\r | |
4559 | case 'c':\r | |
4560 | #ifdef Py_USING_UNICODE\r | |
4561 | if (PyUnicode_Check(v)) {\r | |
4562 | fmt = fmt_start;\r | |
4563 | argidx = argidx_start;\r | |
4564 | goto unicode;\r | |
4565 | }\r | |
4566 | #endif\r | |
4567 | pbuf = formatbuf;\r | |
4568 | len = formatchar(pbuf, sizeof(formatbuf), v);\r | |
4569 | if (len < 0)\r | |
4570 | goto error;\r | |
4571 | break;\r | |
4572 | default:\r | |
4573 | PyErr_Format(PyExc_ValueError,\r | |
4574 | "unsupported format character '%c' (0x%x) "\r | |
4575 | "at index %zd",\r | |
4576 | c, c,\r | |
4577 | (Py_ssize_t)(fmt - 1 -\r | |
4578 | PyString_AsString(format)));\r | |
4579 | goto error;\r | |
4580 | }\r | |
4581 | if (sign) {\r | |
4582 | if (*pbuf == '-' || *pbuf == '+') {\r | |
4583 | sign = *pbuf++;\r | |
4584 | len--;\r | |
4585 | }\r | |
4586 | else if (flags & F_SIGN)\r | |
4587 | sign = '+';\r | |
4588 | else if (flags & F_BLANK)\r | |
4589 | sign = ' ';\r | |
4590 | else\r | |
4591 | sign = 0;\r | |
4592 | }\r | |
4593 | if (width < len)\r | |
4594 | width = len;\r | |
4595 | if (rescnt - (sign != 0) < width) {\r | |
4596 | reslen -= rescnt;\r | |
4597 | rescnt = width + fmtcnt + 100;\r | |
4598 | reslen += rescnt;\r | |
4599 | if (reslen < 0) {\r | |
4600 | Py_DECREF(result);\r | |
4601 | Py_XDECREF(temp);\r | |
4602 | return PyErr_NoMemory();\r | |
4603 | }\r | |
4604 | if (_PyString_Resize(&result, reslen)) {\r | |
4605 | Py_XDECREF(temp);\r | |
4606 | return NULL;\r | |
4607 | }\r | |
4608 | res = PyString_AS_STRING(result)\r | |
4609 | + reslen - rescnt;\r | |
4610 | }\r | |
4611 | if (sign) {\r | |
4612 | if (fill != ' ')\r | |
4613 | *res++ = sign;\r | |
4614 | rescnt--;\r | |
4615 | if (width > len)\r | |
4616 | width--;\r | |
4617 | }\r | |
4618 | if ((flags & F_ALT) && (c == 'x' || c == 'X')) {\r | |
4619 | assert(pbuf[0] == '0');\r | |
4620 | assert(pbuf[1] == c);\r | |
4621 | if (fill != ' ') {\r | |
4622 | *res++ = *pbuf++;\r | |
4623 | *res++ = *pbuf++;\r | |
4624 | }\r | |
4625 | rescnt -= 2;\r | |
4626 | width -= 2;\r | |
4627 | if (width < 0)\r | |
4628 | width = 0;\r | |
4629 | len -= 2;\r | |
4630 | }\r | |
4631 | if (width > len && !(flags & F_LJUST)) {\r | |
4632 | do {\r | |
4633 | --rescnt;\r | |
4634 | *res++ = fill;\r | |
4635 | } while (--width > len);\r | |
4636 | }\r | |
4637 | if (fill == ' ') {\r | |
4638 | if (sign)\r | |
4639 | *res++ = sign;\r | |
4640 | if ((flags & F_ALT) &&\r | |
4641 | (c == 'x' || c == 'X')) {\r | |
4642 | assert(pbuf[0] == '0');\r | |
4643 | assert(pbuf[1] == c);\r | |
4644 | *res++ = *pbuf++;\r | |
4645 | *res++ = *pbuf++;\r | |
4646 | }\r | |
4647 | }\r | |
4648 | Py_MEMCPY(res, pbuf, len);\r | |
4649 | res += len;\r | |
4650 | rescnt -= len;\r | |
4651 | while (--width >= len) {\r | |
4652 | --rescnt;\r | |
4653 | *res++ = ' ';\r | |
4654 | }\r | |
4655 | if (dict && (argidx < arglen) && c != '%') {\r | |
4656 | PyErr_SetString(PyExc_TypeError,\r | |
4657 | "not all arguments converted during string formatting");\r | |
4658 | Py_XDECREF(temp);\r | |
4659 | goto error;\r | |
4660 | }\r | |
4661 | Py_XDECREF(temp);\r | |
4662 | } /* '%' */\r | |
4663 | } /* until end */\r | |
4664 | if (argidx < arglen && !dict) {\r | |
4665 | PyErr_SetString(PyExc_TypeError,\r | |
4666 | "not all arguments converted during string formatting");\r | |
4667 | goto error;\r | |
4668 | }\r | |
4669 | if (args_owned) {\r | |
4670 | Py_DECREF(args);\r | |
4671 | }\r | |
4672 | if (_PyString_Resize(&result, reslen - rescnt))\r | |
4673 | return NULL;\r | |
4674 | return result;\r | |
4675 | \r | |
4676 | #ifdef Py_USING_UNICODE\r | |
4677 | unicode:\r | |
4678 | if (args_owned) {\r | |
4679 | Py_DECREF(args);\r | |
4680 | args_owned = 0;\r | |
4681 | }\r | |
4682 | /* Fiddle args right (remove the first argidx arguments) */\r | |
4683 | if (PyTuple_Check(orig_args) && argidx > 0) {\r | |
4684 | PyObject *v;\r | |
4685 | Py_ssize_t n = PyTuple_GET_SIZE(orig_args) - argidx;\r | |
4686 | v = PyTuple_New(n);\r | |
4687 | if (v == NULL)\r | |
4688 | goto error;\r | |
4689 | while (--n >= 0) {\r | |
4690 | PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);\r | |
4691 | Py_INCREF(w);\r | |
4692 | PyTuple_SET_ITEM(v, n, w);\r | |
4693 | }\r | |
4694 | args = v;\r | |
4695 | } else {\r | |
4696 | Py_INCREF(orig_args);\r | |
4697 | args = orig_args;\r | |
4698 | }\r | |
4699 | args_owned = 1;\r | |
4700 | /* Take what we have of the result and let the Unicode formatting\r | |
4701 | function format the rest of the input. */\r | |
4702 | rescnt = res - PyString_AS_STRING(result);\r | |
4703 | if (_PyString_Resize(&result, rescnt))\r | |
4704 | goto error;\r | |
4705 | fmtcnt = PyString_GET_SIZE(format) - \\r | |
4706 | (fmt - PyString_AS_STRING(format));\r | |
4707 | format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);\r | |
4708 | if (format == NULL)\r | |
4709 | goto error;\r | |
4710 | v = PyUnicode_Format(format, args);\r | |
4711 | Py_DECREF(format);\r | |
4712 | if (v == NULL)\r | |
4713 | goto error;\r | |
4714 | /* Paste what we have (result) to what the Unicode formatting\r | |
4715 | function returned (v) and return the result (or error) */\r | |
4716 | w = PyUnicode_Concat(result, v);\r | |
4717 | Py_DECREF(result);\r | |
4718 | Py_DECREF(v);\r | |
4719 | Py_DECREF(args);\r | |
4720 | return w;\r | |
4721 | #endif /* Py_USING_UNICODE */\r | |
4722 | \r | |
4723 | error:\r | |
4724 | Py_DECREF(result);\r | |
4725 | if (args_owned) {\r | |
4726 | Py_DECREF(args);\r | |
4727 | }\r | |
4728 | return NULL;\r | |
4729 | }\r | |
4730 | \r | |
4731 | void\r | |
4732 | PyString_InternInPlace(PyObject **p)\r | |
4733 | {\r | |
4734 | register PyStringObject *s = (PyStringObject *)(*p);\r | |
4735 | PyObject *t;\r | |
4736 | if (s == NULL || !PyString_Check(s))\r | |
4737 | Py_FatalError("PyString_InternInPlace: strings only please!");\r | |
4738 | /* If it's a string subclass, we don't really know what putting\r | |
4739 | it in the interned dict might do. */\r | |
4740 | if (!PyString_CheckExact(s))\r | |
4741 | return;\r | |
4742 | if (PyString_CHECK_INTERNED(s))\r | |
4743 | return;\r | |
4744 | if (interned == NULL) {\r | |
4745 | interned = PyDict_New();\r | |
4746 | if (interned == NULL) {\r | |
4747 | PyErr_Clear(); /* Don't leave an exception */\r | |
4748 | return;\r | |
4749 | }\r | |
4750 | }\r | |
4751 | t = PyDict_GetItem(interned, (PyObject *)s);\r | |
4752 | if (t) {\r | |
4753 | Py_INCREF(t);\r | |
4754 | Py_DECREF(*p);\r | |
4755 | *p = t;\r | |
4756 | return;\r | |
4757 | }\r | |
4758 | \r | |
4759 | if (PyDict_SetItem(interned, (PyObject *)s, (PyObject *)s) < 0) {\r | |
4760 | PyErr_Clear();\r | |
4761 | return;\r | |
4762 | }\r | |
4763 | /* The two references in interned are not counted by refcnt.\r | |
4764 | The string deallocator will take care of this */\r | |
4765 | Py_REFCNT(s) -= 2;\r | |
4766 | PyString_CHECK_INTERNED(s) = SSTATE_INTERNED_MORTAL;\r | |
4767 | }\r | |
4768 | \r | |
4769 | void\r | |
4770 | PyString_InternImmortal(PyObject **p)\r | |
4771 | {\r | |
4772 | PyString_InternInPlace(p);\r | |
4773 | if (PyString_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {\r | |
4774 | PyString_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL;\r | |
4775 | Py_INCREF(*p);\r | |
4776 | }\r | |
4777 | }\r | |
4778 | \r | |
4779 | \r | |
4780 | PyObject *\r | |
4781 | PyString_InternFromString(const char *cp)\r | |
4782 | {\r | |
4783 | PyObject *s = PyString_FromString(cp);\r | |
4784 | if (s == NULL)\r | |
4785 | return NULL;\r | |
4786 | PyString_InternInPlace(&s);\r | |
4787 | return s;\r | |
4788 | }\r | |
4789 | \r | |
4790 | void\r | |
4791 | PyString_Fini(void)\r | |
4792 | {\r | |
4793 | int i;\r | |
4794 | for (i = 0; i < UCHAR_MAX + 1; i++)\r | |
4795 | Py_CLEAR(characters[i]);\r | |
4796 | Py_CLEAR(nullstring);\r | |
4797 | }\r | |
4798 | \r | |
4799 | void _Py_ReleaseInternedStrings(void)\r | |
4800 | {\r | |
4801 | PyObject *keys;\r | |
4802 | PyStringObject *s;\r | |
4803 | Py_ssize_t i, n;\r | |
4804 | Py_ssize_t immortal_size = 0, mortal_size = 0;\r | |
4805 | \r | |
4806 | if (interned == NULL || !PyDict_Check(interned))\r | |
4807 | return;\r | |
4808 | keys = PyDict_Keys(interned);\r | |
4809 | if (keys == NULL || !PyList_Check(keys)) {\r | |
4810 | PyErr_Clear();\r | |
4811 | return;\r | |
4812 | }\r | |
4813 | \r | |
4814 | /* Since _Py_ReleaseInternedStrings() is intended to help a leak\r | |
4815 | detector, interned strings are not forcibly deallocated; rather, we\r | |
4816 | give them their stolen references back, and then clear and DECREF\r | |
4817 | the interned dict. */\r | |
4818 | \r | |
4819 | n = PyList_GET_SIZE(keys);\r | |
4820 | fprintf(stderr, "releasing %" PY_FORMAT_SIZE_T "d interned strings\n",\r | |
4821 | n);\r | |
4822 | for (i = 0; i < n; i++) {\r | |
4823 | s = (PyStringObject *) PyList_GET_ITEM(keys, i);\r | |
4824 | switch (s->ob_sstate) {\r | |
4825 | case SSTATE_NOT_INTERNED:\r | |
4826 | /* XXX Shouldn't happen */\r | |
4827 | break;\r | |
4828 | case SSTATE_INTERNED_IMMORTAL:\r | |
4829 | Py_REFCNT(s) += 1;\r | |
4830 | immortal_size += Py_SIZE(s);\r | |
4831 | break;\r | |
4832 | case SSTATE_INTERNED_MORTAL:\r | |
4833 | Py_REFCNT(s) += 2;\r | |
4834 | mortal_size += Py_SIZE(s);\r | |
4835 | break;\r | |
4836 | default:\r | |
4837 | Py_FatalError("Inconsistent interned string state.");\r | |
4838 | }\r | |
4839 | s->ob_sstate = SSTATE_NOT_INTERNED;\r | |
4840 | }\r | |
4841 | fprintf(stderr, "total size of all interned strings: "\r | |
4842 | "%" PY_FORMAT_SIZE_T "d/%" PY_FORMAT_SIZE_T "d "\r | |
4843 | "mortal/immortal\n", mortal_size, immortal_size);\r | |
4844 | Py_DECREF(keys);\r | |
4845 | PyDict_Clear(interned);\r | |
4846 | Py_CLEAR(interned);\r | |
4847 | }\r |