]>
Commit | Line | Data |
---|---|---|
4710c53d | 1 | /* String (str/bytes) object implementation */\r |
2 | \r | |
3 | #define PY_SSIZE_T_CLEAN\r | |
4 | \r | |
5 | #include "Python.h"\r | |
6 | #include <ctype.h>\r | |
7 | #include <stddef.h>\r | |
8 | \r | |
9 | #ifdef COUNT_ALLOCS\r | |
10 | Py_ssize_t null_strings, one_strings;\r | |
11 | #endif\r | |
12 | \r | |
13 | static PyStringObject *characters[UCHAR_MAX + 1];\r | |
14 | static PyStringObject *nullstring;\r | |
15 | \r | |
16 | /* This dictionary holds all interned strings. Note that references to\r | |
17 | strings in this dictionary are *not* counted in the string's ob_refcnt.\r | |
18 | When the interned string reaches a refcnt of 0 the string deallocation\r | |
19 | function will delete the reference from this dictionary.\r | |
20 | \r | |
21 | Another way to look at this is that to say that the actual reference\r | |
22 | count of a string is: s->ob_refcnt + (s->ob_sstate?2:0)\r | |
23 | */\r | |
24 | static PyObject *interned;\r | |
25 | \r | |
26 | /* PyStringObject_SIZE gives the basic size of a string; any memory allocation\r | |
27 | for a string of length n should request PyStringObject_SIZE + n bytes.\r | |
28 | \r | |
29 | Using PyStringObject_SIZE instead of sizeof(PyStringObject) saves\r | |
30 | 3 bytes per string allocation on a typical system.\r | |
31 | */\r | |
32 | #define PyStringObject_SIZE (offsetof(PyStringObject, ob_sval) + 1)\r | |
33 | \r | |
34 | /*\r | |
35 | For PyString_FromString(), the parameter `str' points to a null-terminated\r | |
36 | string containing exactly `size' bytes.\r | |
37 | \r | |
38 | For PyString_FromStringAndSize(), the parameter the parameter `str' is\r | |
39 | either NULL or else points to a string containing at least `size' bytes.\r | |
40 | For PyString_FromStringAndSize(), the string in the `str' parameter does\r | |
41 | not have to be null-terminated. (Therefore it is safe to construct a\r | |
42 | substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)\r | |
43 | If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'\r | |
44 | bytes (setting the last byte to the null terminating character) and you can\r | |
45 | fill in the data yourself. If `str' is non-NULL then the resulting\r | |
46 | PyString object must be treated as immutable and you must not fill in nor\r | |
47 | alter the data yourself, since the strings may be shared.\r | |
48 | \r | |
49 | The PyObject member `op->ob_size', which denotes the number of "extra\r | |
50 | items" in a variable-size object, will contain the number of bytes\r | |
51 | allocated for string data, not counting the null terminating character.\r | |
52 | It is therefore equal to the `size' parameter (for\r | |
53 | PyString_FromStringAndSize()) or the length of the string in the `str'\r | |
54 | parameter (for PyString_FromString()).\r | |
55 | */\r | |
56 | PyObject *\r | |
57 | PyString_FromStringAndSize(const char *str, Py_ssize_t size)\r | |
58 | {\r | |
59 | register PyStringObject *op;\r | |
60 | if (size < 0) {\r | |
61 | PyErr_SetString(PyExc_SystemError,\r | |
62 | "Negative size passed to PyString_FromStringAndSize");\r | |
63 | return NULL;\r | |
64 | }\r | |
65 | if (size == 0 && (op = nullstring) != NULL) {\r | |
66 | #ifdef COUNT_ALLOCS\r | |
67 | null_strings++;\r | |
68 | #endif\r | |
69 | Py_INCREF(op);\r | |
70 | return (PyObject *)op;\r | |
71 | }\r | |
72 | if (size == 1 && str != NULL &&\r | |
73 | (op = characters[*str & UCHAR_MAX]) != NULL)\r | |
74 | {\r | |
75 | #ifdef COUNT_ALLOCS\r | |
76 | one_strings++;\r | |
77 | #endif\r | |
78 | Py_INCREF(op);\r | |
79 | return (PyObject *)op;\r | |
80 | }\r | |
81 | \r | |
82 | if (size > PY_SSIZE_T_MAX - PyStringObject_SIZE) {\r | |
83 | PyErr_SetString(PyExc_OverflowError, "string is too large");\r | |
84 | return NULL;\r | |
85 | }\r | |
86 | \r | |
87 | /* Inline PyObject_NewVar */\r | |
88 | op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + size);\r | |
89 | if (op == NULL)\r | |
90 | return PyErr_NoMemory();\r | |
91 | PyObject_INIT_VAR(op, &PyString_Type, size);\r | |
92 | op->ob_shash = -1;\r | |
93 | op->ob_sstate = SSTATE_NOT_INTERNED;\r | |
94 | if (str != NULL)\r | |
95 | Py_MEMCPY(op->ob_sval, str, size);\r | |
96 | op->ob_sval[size] = '\0';\r | |
97 | /* share short strings */\r | |
98 | if (size == 0) {\r | |
99 | PyObject *t = (PyObject *)op;\r | |
100 | PyString_InternInPlace(&t);\r | |
101 | op = (PyStringObject *)t;\r | |
102 | nullstring = op;\r | |
103 | Py_INCREF(op);\r | |
104 | } else if (size == 1 && str != NULL) {\r | |
105 | PyObject *t = (PyObject *)op;\r | |
106 | PyString_InternInPlace(&t);\r | |
107 | op = (PyStringObject *)t;\r | |
108 | characters[*str & UCHAR_MAX] = op;\r | |
109 | Py_INCREF(op);\r | |
110 | }\r | |
111 | return (PyObject *) op;\r | |
112 | }\r | |
113 | \r | |
114 | PyObject *\r | |
115 | PyString_FromString(const char *str)\r | |
116 | {\r | |
117 | register size_t size;\r | |
118 | register PyStringObject *op;\r | |
119 | \r | |
120 | assert(str != NULL);\r | |
121 | size = strlen(str);\r | |
122 | if (size > PY_SSIZE_T_MAX - PyStringObject_SIZE) {\r | |
123 | PyErr_SetString(PyExc_OverflowError,\r | |
124 | "string is too long for a Python string");\r | |
125 | return NULL;\r | |
126 | }\r | |
127 | if (size == 0 && (op = nullstring) != NULL) {\r | |
128 | #ifdef COUNT_ALLOCS\r | |
129 | null_strings++;\r | |
130 | #endif\r | |
131 | Py_INCREF(op);\r | |
132 | return (PyObject *)op;\r | |
133 | }\r | |
134 | if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {\r | |
135 | #ifdef COUNT_ALLOCS\r | |
136 | one_strings++;\r | |
137 | #endif\r | |
138 | Py_INCREF(op);\r | |
139 | return (PyObject *)op;\r | |
140 | }\r | |
141 | \r | |
142 | /* Inline PyObject_NewVar */\r | |
143 | op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + size);\r | |
144 | if (op == NULL)\r | |
145 | return PyErr_NoMemory();\r | |
146 | PyObject_INIT_VAR(op, &PyString_Type, size);\r | |
147 | op->ob_shash = -1;\r | |
148 | op->ob_sstate = SSTATE_NOT_INTERNED;\r | |
149 | Py_MEMCPY(op->ob_sval, str, size+1);\r | |
150 | /* share short strings */\r | |
151 | if (size == 0) {\r | |
152 | PyObject *t = (PyObject *)op;\r | |
153 | PyString_InternInPlace(&t);\r | |
154 | op = (PyStringObject *)t;\r | |
155 | nullstring = op;\r | |
156 | Py_INCREF(op);\r | |
157 | } else if (size == 1) {\r | |
158 | PyObject *t = (PyObject *)op;\r | |
159 | PyString_InternInPlace(&t);\r | |
160 | op = (PyStringObject *)t;\r | |
161 | characters[*str & UCHAR_MAX] = op;\r | |
162 | Py_INCREF(op);\r | |
163 | }\r | |
164 | return (PyObject *) op;\r | |
165 | }\r | |
166 | \r | |
167 | PyObject *\r | |
168 | PyString_FromFormatV(const char *format, va_list vargs)\r | |
169 | {\r | |
170 | va_list count;\r | |
171 | Py_ssize_t n = 0;\r | |
172 | const char* f;\r | |
173 | char *s;\r | |
174 | PyObject* string;\r | |
175 | \r | |
176 | #ifdef VA_LIST_IS_ARRAY\r | |
177 | Py_MEMCPY(count, vargs, sizeof(va_list));\r | |
178 | #else\r | |
179 | #ifdef __va_copy\r | |
180 | __va_copy(count, vargs);\r | |
181 | #else\r | |
182 | count = vargs;\r | |
183 | #endif\r | |
184 | #endif\r | |
185 | /* step 1: figure out how large a buffer we need */\r | |
186 | for (f = format; *f; f++) {\r | |
187 | if (*f == '%') {\r | |
188 | #ifdef HAVE_LONG_LONG\r | |
189 | int longlongflag = 0;\r | |
190 | #endif\r | |
191 | const char* p = f;\r | |
192 | while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))\r | |
193 | ;\r | |
194 | \r | |
195 | /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since\r | |
196 | * they don't affect the amount of space we reserve.\r | |
197 | */\r | |
198 | if (*f == 'l') {\r | |
199 | if (f[1] == 'd' || f[1] == 'u') {\r | |
200 | ++f;\r | |
201 | }\r | |
202 | #ifdef HAVE_LONG_LONG\r | |
203 | else if (f[1] == 'l' &&\r | |
204 | (f[2] == 'd' || f[2] == 'u')) {\r | |
205 | longlongflag = 1;\r | |
206 | f += 2;\r | |
207 | }\r | |
208 | #endif\r | |
209 | }\r | |
210 | else if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {\r | |
211 | ++f;\r | |
212 | }\r | |
213 | \r | |
214 | switch (*f) {\r | |
215 | case 'c':\r | |
216 | (void)va_arg(count, int);\r | |
217 | /* fall through... */\r | |
218 | case '%':\r | |
219 | n++;\r | |
220 | break;\r | |
221 | case 'd': case 'u': case 'i': case 'x':\r | |
222 | (void) va_arg(count, int);\r | |
223 | #ifdef HAVE_LONG_LONG\r | |
224 | /* Need at most\r | |
225 | ceil(log10(256)*SIZEOF_LONG_LONG) digits,\r | |
226 | plus 1 for the sign. 53/22 is an upper\r | |
227 | bound for log10(256). */\r | |
228 | if (longlongflag)\r | |
229 | n += 2 + (SIZEOF_LONG_LONG*53-1) / 22;\r | |
230 | else\r | |
231 | #endif\r | |
232 | /* 20 bytes is enough to hold a 64-bit\r | |
233 | integer. Decimal takes the most\r | |
234 | space. This isn't enough for\r | |
235 | octal. */\r | |
236 | n += 20;\r | |
237 | \r | |
238 | break;\r | |
239 | case 's':\r | |
240 | s = va_arg(count, char*);\r | |
241 | n += strlen(s);\r | |
242 | break;\r | |
243 | case 'p':\r | |
244 | (void) va_arg(count, int);\r | |
245 | /* maximum 64-bit pointer representation:\r | |
246 | * 0xffffffffffffffff\r | |
247 | * so 19 characters is enough.\r | |
248 | * XXX I count 18 -- what's the extra for?\r | |
249 | */\r | |
250 | n += 19;\r | |
251 | break;\r | |
252 | default:\r | |
253 | /* if we stumble upon an unknown\r | |
254 | formatting code, copy the rest of\r | |
255 | the format string to the output\r | |
256 | string. (we cannot just skip the\r | |
257 | code, since there's no way to know\r | |
258 | what's in the argument list) */\r | |
259 | n += strlen(p);\r | |
260 | goto expand;\r | |
261 | }\r | |
262 | } else\r | |
263 | n++;\r | |
264 | }\r | |
265 | expand:\r | |
266 | /* step 2: fill the buffer */\r | |
267 | /* Since we've analyzed how much space we need for the worst case,\r | |
268 | use sprintf directly instead of the slower PyOS_snprintf. */\r | |
269 | string = PyString_FromStringAndSize(NULL, n);\r | |
270 | if (!string)\r | |
271 | return NULL;\r | |
272 | \r | |
273 | s = PyString_AsString(string);\r | |
274 | \r | |
275 | for (f = format; *f; f++) {\r | |
276 | if (*f == '%') {\r | |
277 | const char* p = f++;\r | |
278 | Py_ssize_t i;\r | |
279 | int longflag = 0;\r | |
280 | #ifdef HAVE_LONG_LONG\r | |
281 | int longlongflag = 0;\r | |
282 | #endif\r | |
283 | int size_tflag = 0;\r | |
284 | /* parse the width.precision part (we're only\r | |
285 | interested in the precision value, if any) */\r | |
286 | n = 0;\r | |
287 | while (isdigit(Py_CHARMASK(*f)))\r | |
288 | n = (n*10) + *f++ - '0';\r | |
289 | if (*f == '.') {\r | |
290 | f++;\r | |
291 | n = 0;\r | |
292 | while (isdigit(Py_CHARMASK(*f)))\r | |
293 | n = (n*10) + *f++ - '0';\r | |
294 | }\r | |
295 | while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))\r | |
296 | f++;\r | |
297 | /* Handle %ld, %lu, %lld and %llu. */\r | |
298 | if (*f == 'l') {\r | |
299 | if (f[1] == 'd' || f[1] == 'u') {\r | |
300 | longflag = 1;\r | |
301 | ++f;\r | |
302 | }\r | |
303 | #ifdef HAVE_LONG_LONG\r | |
304 | else if (f[1] == 'l' &&\r | |
305 | (f[2] == 'd' || f[2] == 'u')) {\r | |
306 | longlongflag = 1;\r | |
307 | f += 2;\r | |
308 | }\r | |
309 | #endif\r | |
310 | }\r | |
311 | /* handle the size_t flag. */\r | |
312 | else if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {\r | |
313 | size_tflag = 1;\r | |
314 | ++f;\r | |
315 | }\r | |
316 | \r | |
317 | switch (*f) {\r | |
318 | case 'c':\r | |
319 | *s++ = va_arg(vargs, int);\r | |
320 | break;\r | |
321 | case 'd':\r | |
322 | if (longflag)\r | |
323 | sprintf(s, "%ld", va_arg(vargs, long));\r | |
324 | #ifdef HAVE_LONG_LONG\r | |
325 | else if (longlongflag)\r | |
326 | sprintf(s, "%" PY_FORMAT_LONG_LONG "d",\r | |
327 | va_arg(vargs, PY_LONG_LONG));\r | |
328 | #endif\r | |
329 | else if (size_tflag)\r | |
330 | sprintf(s, "%" PY_FORMAT_SIZE_T "d",\r | |
331 | va_arg(vargs, Py_ssize_t));\r | |
332 | else\r | |
333 | sprintf(s, "%d", va_arg(vargs, int));\r | |
334 | s += strlen(s);\r | |
335 | break;\r | |
336 | case 'u':\r | |
337 | if (longflag)\r | |
338 | sprintf(s, "%lu",\r | |
339 | va_arg(vargs, unsigned long));\r | |
340 | #ifdef HAVE_LONG_LONG\r | |
341 | else if (longlongflag)\r | |
342 | sprintf(s, "%" PY_FORMAT_LONG_LONG "u",\r | |
343 | va_arg(vargs, PY_LONG_LONG));\r | |
344 | #endif\r | |
345 | else if (size_tflag)\r | |
346 | sprintf(s, "%" PY_FORMAT_SIZE_T "u",\r | |
347 | va_arg(vargs, size_t));\r | |
348 | else\r | |
349 | sprintf(s, "%u",\r | |
350 | va_arg(vargs, unsigned int));\r | |
351 | s += strlen(s);\r | |
352 | break;\r | |
353 | case 'i':\r | |
354 | sprintf(s, "%i", va_arg(vargs, int));\r | |
355 | s += strlen(s);\r | |
356 | break;\r | |
357 | case 'x':\r | |
358 | sprintf(s, "%x", va_arg(vargs, int));\r | |
359 | s += strlen(s);\r | |
360 | break;\r | |
361 | case 's':\r | |
362 | p = va_arg(vargs, char*);\r | |
363 | i = strlen(p);\r | |
364 | if (n > 0 && i > n)\r | |
365 | i = n;\r | |
366 | Py_MEMCPY(s, p, i);\r | |
367 | s += i;\r | |
368 | break;\r | |
369 | case 'p':\r | |
370 | sprintf(s, "%p", va_arg(vargs, void*));\r | |
371 | /* %p is ill-defined: ensure leading 0x. */\r | |
372 | if (s[1] == 'X')\r | |
373 | s[1] = 'x';\r | |
374 | else if (s[1] != 'x') {\r | |
375 | memmove(s+2, s, strlen(s)+1);\r | |
376 | s[0] = '0';\r | |
377 | s[1] = 'x';\r | |
378 | }\r | |
379 | s += strlen(s);\r | |
380 | break;\r | |
381 | case '%':\r | |
382 | *s++ = '%';\r | |
383 | break;\r | |
384 | default:\r | |
385 | strcpy(s, p);\r | |
386 | s += strlen(s);\r | |
387 | goto end;\r | |
388 | }\r | |
389 | } else\r | |
390 | *s++ = *f;\r | |
391 | }\r | |
392 | \r | |
393 | end:\r | |
394 | if (_PyString_Resize(&string, s - PyString_AS_STRING(string)))\r | |
395 | return NULL;\r | |
396 | return string;\r | |
397 | }\r | |
398 | \r | |
399 | PyObject *\r | |
400 | PyString_FromFormat(const char *format, ...)\r | |
401 | {\r | |
402 | PyObject* ret;\r | |
403 | va_list vargs;\r | |
404 | \r | |
405 | #ifdef HAVE_STDARG_PROTOTYPES\r | |
406 | va_start(vargs, format);\r | |
407 | #else\r | |
408 | va_start(vargs);\r | |
409 | #endif\r | |
410 | ret = PyString_FromFormatV(format, vargs);\r | |
411 | va_end(vargs);\r | |
412 | return ret;\r | |
413 | }\r | |
414 | \r | |
415 | \r | |
416 | PyObject *PyString_Decode(const char *s,\r | |
417 | Py_ssize_t size,\r | |
418 | const char *encoding,\r | |
419 | const char *errors)\r | |
420 | {\r | |
421 | PyObject *v, *str;\r | |
422 | \r | |
423 | str = PyString_FromStringAndSize(s, size);\r | |
424 | if (str == NULL)\r | |
425 | return NULL;\r | |
426 | v = PyString_AsDecodedString(str, encoding, errors);\r | |
427 | Py_DECREF(str);\r | |
428 | return v;\r | |
429 | }\r | |
430 | \r | |
431 | PyObject *PyString_AsDecodedObject(PyObject *str,\r | |
432 | const char *encoding,\r | |
433 | const char *errors)\r | |
434 | {\r | |
435 | PyObject *v;\r | |
436 | \r | |
437 | if (!PyString_Check(str)) {\r | |
438 | PyErr_BadArgument();\r | |
439 | goto onError;\r | |
440 | }\r | |
441 | \r | |
442 | if (encoding == NULL) {\r | |
443 | #ifdef Py_USING_UNICODE\r | |
444 | encoding = PyUnicode_GetDefaultEncoding();\r | |
445 | #else\r | |
446 | PyErr_SetString(PyExc_ValueError, "no encoding specified");\r | |
447 | goto onError;\r | |
448 | #endif\r | |
449 | }\r | |
450 | \r | |
451 | /* Decode via the codec registry */\r | |
452 | v = PyCodec_Decode(str, encoding, errors);\r | |
453 | if (v == NULL)\r | |
454 | goto onError;\r | |
455 | \r | |
456 | return v;\r | |
457 | \r | |
458 | onError:\r | |
459 | return NULL;\r | |
460 | }\r | |
461 | \r | |
462 | PyObject *PyString_AsDecodedString(PyObject *str,\r | |
463 | const char *encoding,\r | |
464 | const char *errors)\r | |
465 | {\r | |
466 | PyObject *v;\r | |
467 | \r | |
468 | v = PyString_AsDecodedObject(str, encoding, errors);\r | |
469 | if (v == NULL)\r | |
470 | goto onError;\r | |
471 | \r | |
472 | #ifdef Py_USING_UNICODE\r | |
473 | /* Convert Unicode to a string using the default encoding */\r | |
474 | if (PyUnicode_Check(v)) {\r | |
475 | PyObject *temp = v;\r | |
476 | v = PyUnicode_AsEncodedString(v, NULL, NULL);\r | |
477 | Py_DECREF(temp);\r | |
478 | if (v == NULL)\r | |
479 | goto onError;\r | |
480 | }\r | |
481 | #endif\r | |
482 | if (!PyString_Check(v)) {\r | |
483 | PyErr_Format(PyExc_TypeError,\r | |
484 | "decoder did not return a string object (type=%.400s)",\r | |
485 | Py_TYPE(v)->tp_name);\r | |
486 | Py_DECREF(v);\r | |
487 | goto onError;\r | |
488 | }\r | |
489 | \r | |
490 | return v;\r | |
491 | \r | |
492 | onError:\r | |
493 | return NULL;\r | |
494 | }\r | |
495 | \r | |
496 | PyObject *PyString_Encode(const char *s,\r | |
497 | Py_ssize_t size,\r | |
498 | const char *encoding,\r | |
499 | const char *errors)\r | |
500 | {\r | |
501 | PyObject *v, *str;\r | |
502 | \r | |
503 | str = PyString_FromStringAndSize(s, size);\r | |
504 | if (str == NULL)\r | |
505 | return NULL;\r | |
506 | v = PyString_AsEncodedString(str, encoding, errors);\r | |
507 | Py_DECREF(str);\r | |
508 | return v;\r | |
509 | }\r | |
510 | \r | |
511 | PyObject *PyString_AsEncodedObject(PyObject *str,\r | |
512 | const char *encoding,\r | |
513 | const char *errors)\r | |
514 | {\r | |
515 | PyObject *v;\r | |
516 | \r | |
517 | if (!PyString_Check(str)) {\r | |
518 | PyErr_BadArgument();\r | |
519 | goto onError;\r | |
520 | }\r | |
521 | \r | |
522 | if (encoding == NULL) {\r | |
523 | #ifdef Py_USING_UNICODE\r | |
524 | encoding = PyUnicode_GetDefaultEncoding();\r | |
525 | #else\r | |
526 | PyErr_SetString(PyExc_ValueError, "no encoding specified");\r | |
527 | goto onError;\r | |
528 | #endif\r | |
529 | }\r | |
530 | \r | |
531 | /* Encode via the codec registry */\r | |
532 | v = PyCodec_Encode(str, encoding, errors);\r | |
533 | if (v == NULL)\r | |
534 | goto onError;\r | |
535 | \r | |
536 | return v;\r | |
537 | \r | |
538 | onError:\r | |
539 | return NULL;\r | |
540 | }\r | |
541 | \r | |
542 | PyObject *PyString_AsEncodedString(PyObject *str,\r | |
543 | const char *encoding,\r | |
544 | const char *errors)\r | |
545 | {\r | |
546 | PyObject *v;\r | |
547 | \r | |
548 | v = PyString_AsEncodedObject(str, encoding, errors);\r | |
549 | if (v == NULL)\r | |
550 | goto onError;\r | |
551 | \r | |
552 | #ifdef Py_USING_UNICODE\r | |
553 | /* Convert Unicode to a string using the default encoding */\r | |
554 | if (PyUnicode_Check(v)) {\r | |
555 | PyObject *temp = v;\r | |
556 | v = PyUnicode_AsEncodedString(v, NULL, NULL);\r | |
557 | Py_DECREF(temp);\r | |
558 | if (v == NULL)\r | |
559 | goto onError;\r | |
560 | }\r | |
561 | #endif\r | |
562 | if (!PyString_Check(v)) {\r | |
563 | PyErr_Format(PyExc_TypeError,\r | |
564 | "encoder did not return a string object (type=%.400s)",\r | |
565 | Py_TYPE(v)->tp_name);\r | |
566 | Py_DECREF(v);\r | |
567 | goto onError;\r | |
568 | }\r | |
569 | \r | |
570 | return v;\r | |
571 | \r | |
572 | onError:\r | |
573 | return NULL;\r | |
574 | }\r | |
575 | \r | |
576 | static void\r | |
577 | string_dealloc(PyObject *op)\r | |
578 | {\r | |
579 | switch (PyString_CHECK_INTERNED(op)) {\r | |
580 | case SSTATE_NOT_INTERNED:\r | |
581 | break;\r | |
582 | \r | |
583 | case SSTATE_INTERNED_MORTAL:\r | |
584 | /* revive dead object temporarily for DelItem */\r | |
585 | Py_REFCNT(op) = 3;\r | |
586 | if (PyDict_DelItem(interned, op) != 0)\r | |
587 | Py_FatalError(\r | |
588 | "deletion of interned string failed");\r | |
589 | break;\r | |
590 | \r | |
591 | case SSTATE_INTERNED_IMMORTAL:\r | |
592 | Py_FatalError("Immortal interned string died.");\r | |
593 | \r | |
594 | default:\r | |
595 | Py_FatalError("Inconsistent interned string state.");\r | |
596 | }\r | |
597 | Py_TYPE(op)->tp_free(op);\r | |
598 | }\r | |
599 | \r | |
600 | /* Unescape a backslash-escaped string. If unicode is non-zero,\r | |
601 | the string is a u-literal. If recode_encoding is non-zero,\r | |
602 | the string is UTF-8 encoded and should be re-encoded in the\r | |
603 | specified encoding. */\r | |
604 | \r | |
605 | PyObject *PyString_DecodeEscape(const char *s,\r | |
606 | Py_ssize_t len,\r | |
607 | const char *errors,\r | |
608 | Py_ssize_t unicode,\r | |
609 | const char *recode_encoding)\r | |
610 | {\r | |
611 | int c;\r | |
612 | char *p, *buf;\r | |
613 | const char *end;\r | |
614 | PyObject *v;\r | |
615 | Py_ssize_t newlen = recode_encoding ? 4*len:len;\r | |
616 | v = PyString_FromStringAndSize((char *)NULL, newlen);\r | |
617 | if (v == NULL)\r | |
618 | return NULL;\r | |
619 | p = buf = PyString_AsString(v);\r | |
620 | end = s + len;\r | |
621 | while (s < end) {\r | |
622 | if (*s != '\\') {\r | |
623 | non_esc:\r | |
624 | #ifdef Py_USING_UNICODE\r | |
625 | if (recode_encoding && (*s & 0x80)) {\r | |
626 | PyObject *u, *w;\r | |
627 | char *r;\r | |
628 | const char* t;\r | |
629 | Py_ssize_t rn;\r | |
630 | t = s;\r | |
631 | /* Decode non-ASCII bytes as UTF-8. */\r | |
632 | while (t < end && (*t & 0x80)) t++;\r | |
633 | u = PyUnicode_DecodeUTF8(s, t - s, errors);\r | |
634 | if(!u) goto failed;\r | |
635 | \r | |
636 | /* Recode them in target encoding. */\r | |
637 | w = PyUnicode_AsEncodedString(\r | |
638 | u, recode_encoding, errors);\r | |
639 | Py_DECREF(u);\r | |
640 | if (!w) goto failed;\r | |
641 | \r | |
642 | /* Append bytes to output buffer. */\r | |
643 | assert(PyString_Check(w));\r | |
644 | r = PyString_AS_STRING(w);\r | |
645 | rn = PyString_GET_SIZE(w);\r | |
646 | Py_MEMCPY(p, r, rn);\r | |
647 | p += rn;\r | |
648 | Py_DECREF(w);\r | |
649 | s = t;\r | |
650 | } else {\r | |
651 | *p++ = *s++;\r | |
652 | }\r | |
653 | #else\r | |
654 | *p++ = *s++;\r | |
655 | #endif\r | |
656 | continue;\r | |
657 | }\r | |
658 | s++;\r | |
659 | if (s==end) {\r | |
660 | PyErr_SetString(PyExc_ValueError,\r | |
661 | "Trailing \\ in string");\r | |
662 | goto failed;\r | |
663 | }\r | |
664 | switch (*s++) {\r | |
665 | /* XXX This assumes ASCII! */\r | |
666 | case '\n': break;\r | |
667 | case '\\': *p++ = '\\'; break;\r | |
668 | case '\'': *p++ = '\''; break;\r | |
669 | case '\"': *p++ = '\"'; break;\r | |
670 | case 'b': *p++ = '\b'; break;\r | |
671 | case 'f': *p++ = '\014'; break; /* FF */\r | |
672 | case 't': *p++ = '\t'; break;\r | |
673 | case 'n': *p++ = '\n'; break;\r | |
674 | case 'r': *p++ = '\r'; break;\r | |
675 | case 'v': *p++ = '\013'; break; /* VT */\r | |
676 | case 'a': *p++ = '\007'; break; /* BEL, not classic C */\r | |
677 | case '0': case '1': case '2': case '3':\r | |
678 | case '4': case '5': case '6': case '7':\r | |
679 | c = s[-1] - '0';\r | |
680 | if (s < end && '0' <= *s && *s <= '7') {\r | |
681 | c = (c<<3) + *s++ - '0';\r | |
682 | if (s < end && '0' <= *s && *s <= '7')\r | |
683 | c = (c<<3) + *s++ - '0';\r | |
684 | }\r | |
685 | *p++ = c;\r | |
686 | break;\r | |
687 | case 'x':\r | |
688 | if (s+1 < end &&\r | |
689 | isxdigit(Py_CHARMASK(s[0])) &&\r | |
690 | isxdigit(Py_CHARMASK(s[1])))\r | |
691 | {\r | |
692 | unsigned int x = 0;\r | |
693 | c = Py_CHARMASK(*s);\r | |
694 | s++;\r | |
695 | if (isdigit(c))\r | |
696 | x = c - '0';\r | |
697 | else if (islower(c))\r | |
698 | x = 10 + c - 'a';\r | |
699 | else\r | |
700 | x = 10 + c - 'A';\r | |
701 | x = x << 4;\r | |
702 | c = Py_CHARMASK(*s);\r | |
703 | s++;\r | |
704 | if (isdigit(c))\r | |
705 | x += c - '0';\r | |
706 | else if (islower(c))\r | |
707 | x += 10 + c - 'a';\r | |
708 | else\r | |
709 | x += 10 + c - 'A';\r | |
710 | *p++ = x;\r | |
711 | break;\r | |
712 | }\r | |
713 | if (!errors || strcmp(errors, "strict") == 0) {\r | |
714 | PyErr_SetString(PyExc_ValueError,\r | |
715 | "invalid \\x escape");\r | |
716 | goto failed;\r | |
717 | }\r | |
718 | if (strcmp(errors, "replace") == 0) {\r | |
719 | *p++ = '?';\r | |
720 | } else if (strcmp(errors, "ignore") == 0)\r | |
721 | /* do nothing */;\r | |
722 | else {\r | |
723 | PyErr_Format(PyExc_ValueError,\r | |
724 | "decoding error; "\r | |
725 | "unknown error handling code: %.400s",\r | |
726 | errors);\r | |
727 | goto failed;\r | |
728 | }\r | |
729 | #ifndef Py_USING_UNICODE\r | |
730 | case 'u':\r | |
731 | case 'U':\r | |
732 | case 'N':\r | |
733 | if (unicode) {\r | |
734 | PyErr_SetString(PyExc_ValueError,\r | |
735 | "Unicode escapes not legal "\r | |
736 | "when Unicode disabled");\r | |
737 | goto failed;\r | |
738 | }\r | |
739 | #endif\r | |
740 | default:\r | |
741 | *p++ = '\\';\r | |
742 | s--;\r | |
743 | goto non_esc; /* an arbitrary number of unescaped\r | |
744 | UTF-8 bytes may follow. */\r | |
745 | }\r | |
746 | }\r | |
747 | if (p-buf < newlen && _PyString_Resize(&v, p - buf))\r | |
748 | goto failed;\r | |
749 | return v;\r | |
750 | failed:\r | |
751 | Py_DECREF(v);\r | |
752 | return NULL;\r | |
753 | }\r | |
754 | \r | |
755 | /* -------------------------------------------------------------------- */\r | |
756 | /* object api */\r | |
757 | \r | |
758 | static Py_ssize_t\r | |
759 | string_getsize(register PyObject *op)\r | |
760 | {\r | |
761 | char *s;\r | |
762 | Py_ssize_t len;\r | |
763 | if (PyString_AsStringAndSize(op, &s, &len))\r | |
764 | return -1;\r | |
765 | return len;\r | |
766 | }\r | |
767 | \r | |
768 | static /*const*/ char *\r | |
769 | string_getbuffer(register PyObject *op)\r | |
770 | {\r | |
771 | char *s;\r | |
772 | Py_ssize_t len;\r | |
773 | if (PyString_AsStringAndSize(op, &s, &len))\r | |
774 | return NULL;\r | |
775 | return s;\r | |
776 | }\r | |
777 | \r | |
778 | Py_ssize_t\r | |
779 | PyString_Size(register PyObject *op)\r | |
780 | {\r | |
781 | if (!PyString_Check(op))\r | |
782 | return string_getsize(op);\r | |
783 | return Py_SIZE(op);\r | |
784 | }\r | |
785 | \r | |
786 | /*const*/ char *\r | |
787 | PyString_AsString(register PyObject *op)\r | |
788 | {\r | |
789 | if (!PyString_Check(op))\r | |
790 | return string_getbuffer(op);\r | |
791 | return ((PyStringObject *)op) -> ob_sval;\r | |
792 | }\r | |
793 | \r | |
794 | int\r | |
795 | PyString_AsStringAndSize(register PyObject *obj,\r | |
796 | register char **s,\r | |
797 | register Py_ssize_t *len)\r | |
798 | {\r | |
799 | if (s == NULL) {\r | |
800 | PyErr_BadInternalCall();\r | |
801 | return -1;\r | |
802 | }\r | |
803 | \r | |
804 | if (!PyString_Check(obj)) {\r | |
805 | #ifdef Py_USING_UNICODE\r | |
806 | if (PyUnicode_Check(obj)) {\r | |
807 | obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);\r | |
808 | if (obj == NULL)\r | |
809 | return -1;\r | |
810 | }\r | |
811 | else\r | |
812 | #endif\r | |
813 | {\r | |
814 | PyErr_Format(PyExc_TypeError,\r | |
815 | "expected string or Unicode object, "\r | |
816 | "%.200s found", Py_TYPE(obj)->tp_name);\r | |
817 | return -1;\r | |
818 | }\r | |
819 | }\r | |
820 | \r | |
821 | *s = PyString_AS_STRING(obj);\r | |
822 | if (len != NULL)\r | |
823 | *len = PyString_GET_SIZE(obj);\r | |
824 | else if (strlen(*s) != (size_t)PyString_GET_SIZE(obj)) {\r | |
825 | PyErr_SetString(PyExc_TypeError,\r | |
826 | "expected string without null bytes");\r | |
827 | return -1;\r | |
828 | }\r | |
829 | return 0;\r | |
830 | }\r | |
831 | \r | |
832 | /* -------------------------------------------------------------------- */\r | |
833 | /* Methods */\r | |
834 | \r | |
835 | #include "stringlib/stringdefs.h"\r | |
836 | #include "stringlib/fastsearch.h"\r | |
837 | \r | |
838 | #include "stringlib/count.h"\r | |
839 | #include "stringlib/find.h"\r | |
840 | #include "stringlib/partition.h"\r | |
841 | #include "stringlib/split.h"\r | |
842 | \r | |
843 | #define _Py_InsertThousandsGrouping _PyString_InsertThousandsGrouping\r | |
844 | #include "stringlib/localeutil.h"\r | |
845 | \r | |
846 | \r | |
847 | \r | |
848 | static int\r | |
849 | string_print(PyStringObject *op, FILE *fp, int flags)\r | |
850 | {\r | |
851 | Py_ssize_t i, str_len;\r | |
852 | char c;\r | |
853 | int quote;\r | |
854 | \r | |
855 | /* XXX Ought to check for interrupts when writing long strings */\r | |
856 | if (! PyString_CheckExact(op)) {\r | |
857 | int ret;\r | |
858 | /* A str subclass may have its own __str__ method. */\r | |
859 | op = (PyStringObject *) PyObject_Str((PyObject *)op);\r | |
860 | if (op == NULL)\r | |
861 | return -1;\r | |
862 | ret = string_print(op, fp, flags);\r | |
863 | Py_DECREF(op);\r | |
864 | return ret;\r | |
865 | }\r | |
866 | if (flags & Py_PRINT_RAW) {\r | |
867 | char *data = op->ob_sval;\r | |
868 | Py_ssize_t size = Py_SIZE(op);\r | |
869 | Py_BEGIN_ALLOW_THREADS\r | |
870 | while (size > INT_MAX) {\r | |
871 | /* Very long strings cannot be written atomically.\r | |
872 | * But don't write exactly INT_MAX bytes at a time\r | |
873 | * to avoid memory aligment issues.\r | |
874 | */\r | |
875 | const int chunk_size = INT_MAX & ~0x3FFF;\r | |
876 | fwrite(data, 1, chunk_size, fp);\r | |
877 | data += chunk_size;\r | |
878 | size -= chunk_size;\r | |
879 | }\r | |
880 | #ifdef __VMS\r | |
881 | if (size) fwrite(data, (int)size, 1, fp);\r | |
882 | #else\r | |
883 | fwrite(data, 1, (int)size, fp);\r | |
884 | #endif\r | |
885 | Py_END_ALLOW_THREADS\r | |
886 | return 0;\r | |
887 | }\r | |
888 | \r | |
889 | /* figure out which quote to use; single is preferred */\r | |
890 | quote = '\'';\r | |
891 | if (memchr(op->ob_sval, '\'', Py_SIZE(op)) &&\r | |
892 | !memchr(op->ob_sval, '"', Py_SIZE(op)))\r | |
893 | quote = '"';\r | |
894 | \r | |
895 | str_len = Py_SIZE(op);\r | |
896 | Py_BEGIN_ALLOW_THREADS\r | |
897 | fputc(quote, fp);\r | |
898 | for (i = 0; i < str_len; i++) {\r | |
899 | /* Since strings are immutable and the caller should have a\r | |
900 | reference, accessing the interal buffer should not be an issue\r | |
901 | with the GIL released. */\r | |
902 | c = op->ob_sval[i];\r | |
903 | if (c == quote || c == '\\')\r | |
904 | fprintf(fp, "\\%c", c);\r | |
905 | else if (c == '\t')\r | |
906 | fprintf(fp, "\\t");\r | |
907 | else if (c == '\n')\r | |
908 | fprintf(fp, "\\n");\r | |
909 | else if (c == '\r')\r | |
910 | fprintf(fp, "\\r");\r | |
911 | else if (c < ' ' || c >= 0x7f)\r | |
912 | fprintf(fp, "\\x%02x", c & 0xff);\r | |
913 | else\r | |
914 | fputc(c, fp);\r | |
915 | }\r | |
916 | fputc(quote, fp);\r | |
917 | Py_END_ALLOW_THREADS\r | |
918 | return 0;\r | |
919 | }\r | |
920 | \r | |
921 | PyObject *\r | |
922 | PyString_Repr(PyObject *obj, int smartquotes)\r | |
923 | {\r | |
924 | register PyStringObject* op = (PyStringObject*) obj;\r | |
925 | size_t newsize = 2 + 4 * Py_SIZE(op);\r | |
926 | PyObject *v;\r | |
927 | if (newsize > PY_SSIZE_T_MAX || newsize / 4 != Py_SIZE(op)) {\r | |
928 | PyErr_SetString(PyExc_OverflowError,\r | |
929 | "string is too large to make repr");\r | |
930 | return NULL;\r | |
931 | }\r | |
932 | v = PyString_FromStringAndSize((char *)NULL, newsize);\r | |
933 | if (v == NULL) {\r | |
934 | return NULL;\r | |
935 | }\r | |
936 | else {\r | |
937 | register Py_ssize_t i;\r | |
938 | register char c;\r | |
939 | register char *p;\r | |
940 | int quote;\r | |
941 | \r | |
942 | /* figure out which quote to use; single is preferred */\r | |
943 | quote = '\'';\r | |
944 | if (smartquotes &&\r | |
945 | memchr(op->ob_sval, '\'', Py_SIZE(op)) &&\r | |
946 | !memchr(op->ob_sval, '"', Py_SIZE(op)))\r | |
947 | quote = '"';\r | |
948 | \r | |
949 | p = PyString_AS_STRING(v);\r | |
950 | *p++ = quote;\r | |
951 | for (i = 0; i < Py_SIZE(op); i++) {\r | |
952 | /* There's at least enough room for a hex escape\r | |
953 | and a closing quote. */\r | |
954 | assert(newsize - (p - PyString_AS_STRING(v)) >= 5);\r | |
955 | c = op->ob_sval[i];\r | |
956 | if (c == quote || c == '\\')\r | |
957 | *p++ = '\\', *p++ = c;\r | |
958 | else if (c == '\t')\r | |
959 | *p++ = '\\', *p++ = 't';\r | |
960 | else if (c == '\n')\r | |
961 | *p++ = '\\', *p++ = 'n';\r | |
962 | else if (c == '\r')\r | |
963 | *p++ = '\\', *p++ = 'r';\r | |
964 | else if (c < ' ' || c >= 0x7f) {\r | |
965 | /* For performance, we don't want to call\r | |
966 | PyOS_snprintf here (extra layers of\r | |
967 | function call). */\r | |
968 | sprintf(p, "\\x%02x", c & 0xff);\r | |
969 | p += 4;\r | |
970 | }\r | |
971 | else\r | |
972 | *p++ = c;\r | |
973 | }\r | |
974 | assert(newsize - (p - PyString_AS_STRING(v)) >= 1);\r | |
975 | *p++ = quote;\r | |
976 | *p = '\0';\r | |
977 | if (_PyString_Resize(&v, (p - PyString_AS_STRING(v))))\r | |
978 | return NULL;\r | |
979 | return v;\r | |
980 | }\r | |
981 | }\r | |
982 | \r | |
983 | static PyObject *\r | |
984 | string_repr(PyObject *op)\r | |
985 | {\r | |
986 | return PyString_Repr(op, 1);\r | |
987 | }\r | |
988 | \r | |
989 | static PyObject *\r | |
990 | string_str(PyObject *s)\r | |
991 | {\r | |
992 | assert(PyString_Check(s));\r | |
993 | if (PyString_CheckExact(s)) {\r | |
994 | Py_INCREF(s);\r | |
995 | return s;\r | |
996 | }\r | |
997 | else {\r | |
998 | /* Subtype -- return genuine string with the same value. */\r | |
999 | PyStringObject *t = (PyStringObject *) s;\r | |
1000 | return PyString_FromStringAndSize(t->ob_sval, Py_SIZE(t));\r | |
1001 | }\r | |
1002 | }\r | |
1003 | \r | |
1004 | static Py_ssize_t\r | |
1005 | string_length(PyStringObject *a)\r | |
1006 | {\r | |
1007 | return Py_SIZE(a);\r | |
1008 | }\r | |
1009 | \r | |
1010 | static PyObject *\r | |
1011 | string_concat(register PyStringObject *a, register PyObject *bb)\r | |
1012 | {\r | |
1013 | register Py_ssize_t size;\r | |
1014 | register PyStringObject *op;\r | |
1015 | if (!PyString_Check(bb)) {\r | |
1016 | #ifdef Py_USING_UNICODE\r | |
1017 | if (PyUnicode_Check(bb))\r | |
1018 | return PyUnicode_Concat((PyObject *)a, bb);\r | |
1019 | #endif\r | |
1020 | if (PyByteArray_Check(bb))\r | |
1021 | return PyByteArray_Concat((PyObject *)a, bb);\r | |
1022 | PyErr_Format(PyExc_TypeError,\r | |
1023 | "cannot concatenate 'str' and '%.200s' objects",\r | |
1024 | Py_TYPE(bb)->tp_name);\r | |
1025 | return NULL;\r | |
1026 | }\r | |
1027 | #define b ((PyStringObject *)bb)\r | |
1028 | /* Optimize cases with empty left or right operand */\r | |
1029 | if ((Py_SIZE(a) == 0 || Py_SIZE(b) == 0) &&\r | |
1030 | PyString_CheckExact(a) && PyString_CheckExact(b)) {\r | |
1031 | if (Py_SIZE(a) == 0) {\r | |
1032 | Py_INCREF(bb);\r | |
1033 | return bb;\r | |
1034 | }\r | |
1035 | Py_INCREF(a);\r | |
1036 | return (PyObject *)a;\r | |
1037 | }\r | |
1038 | size = Py_SIZE(a) + Py_SIZE(b);\r | |
1039 | /* Check that string sizes are not negative, to prevent an\r | |
1040 | overflow in cases where we are passed incorrectly-created\r | |
1041 | strings with negative lengths (due to a bug in other code).\r | |
1042 | */\r | |
1043 | if (Py_SIZE(a) < 0 || Py_SIZE(b) < 0 ||\r | |
1044 | Py_SIZE(a) > PY_SSIZE_T_MAX - Py_SIZE(b)) {\r | |
1045 | PyErr_SetString(PyExc_OverflowError,\r | |
1046 | "strings are too large to concat");\r | |
1047 | return NULL;\r | |
1048 | }\r | |
1049 | \r | |
1050 | /* Inline PyObject_NewVar */\r | |
1051 | if (size > PY_SSIZE_T_MAX - PyStringObject_SIZE) {\r | |
1052 | PyErr_SetString(PyExc_OverflowError,\r | |
1053 | "strings are too large to concat");\r | |
1054 | return NULL;\r | |
1055 | }\r | |
1056 | op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + size);\r | |
1057 | if (op == NULL)\r | |
1058 | return PyErr_NoMemory();\r | |
1059 | PyObject_INIT_VAR(op, &PyString_Type, size);\r | |
1060 | op->ob_shash = -1;\r | |
1061 | op->ob_sstate = SSTATE_NOT_INTERNED;\r | |
1062 | Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));\r | |
1063 | Py_MEMCPY(op->ob_sval + Py_SIZE(a), b->ob_sval, Py_SIZE(b));\r | |
1064 | op->ob_sval[size] = '\0';\r | |
1065 | return (PyObject *) op;\r | |
1066 | #undef b\r | |
1067 | }\r | |
1068 | \r | |
1069 | static PyObject *\r | |
1070 | string_repeat(register PyStringObject *a, register Py_ssize_t n)\r | |
1071 | {\r | |
1072 | register Py_ssize_t i;\r | |
1073 | register Py_ssize_t j;\r | |
1074 | register Py_ssize_t size;\r | |
1075 | register PyStringObject *op;\r | |
1076 | size_t nbytes;\r | |
1077 | if (n < 0)\r | |
1078 | n = 0;\r | |
1079 | /* watch out for overflows: the size can overflow int,\r | |
1080 | * and the # of bytes needed can overflow size_t\r | |
1081 | */\r | |
1082 | size = Py_SIZE(a) * n;\r | |
1083 | if (n && size / n != Py_SIZE(a)) {\r | |
1084 | PyErr_SetString(PyExc_OverflowError,\r | |
1085 | "repeated string is too long");\r | |
1086 | return NULL;\r | |
1087 | }\r | |
1088 | if (size == Py_SIZE(a) && PyString_CheckExact(a)) {\r | |
1089 | Py_INCREF(a);\r | |
1090 | return (PyObject *)a;\r | |
1091 | }\r | |
1092 | nbytes = (size_t)size;\r | |
1093 | if (nbytes + PyStringObject_SIZE <= nbytes) {\r | |
1094 | PyErr_SetString(PyExc_OverflowError,\r | |
1095 | "repeated string is too long");\r | |
1096 | return NULL;\r | |
1097 | }\r | |
1098 | op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + nbytes);\r | |
1099 | if (op == NULL)\r | |
1100 | return PyErr_NoMemory();\r | |
1101 | PyObject_INIT_VAR(op, &PyString_Type, size);\r | |
1102 | op->ob_shash = -1;\r | |
1103 | op->ob_sstate = SSTATE_NOT_INTERNED;\r | |
1104 | op->ob_sval[size] = '\0';\r | |
1105 | if (Py_SIZE(a) == 1 && n > 0) {\r | |
1106 | memset(op->ob_sval, a->ob_sval[0] , n);\r | |
1107 | return (PyObject *) op;\r | |
1108 | }\r | |
1109 | i = 0;\r | |
1110 | if (i < size) {\r | |
1111 | Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));\r | |
1112 | i = Py_SIZE(a);\r | |
1113 | }\r | |
1114 | while (i < size) {\r | |
1115 | j = (i <= size-i) ? i : size-i;\r | |
1116 | Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);\r | |
1117 | i += j;\r | |
1118 | }\r | |
1119 | return (PyObject *) op;\r | |
1120 | }\r | |
1121 | \r | |
1122 | /* String slice a[i:j] consists of characters a[i] ... a[j-1] */\r | |
1123 | \r | |
1124 | static PyObject *\r | |
1125 | string_slice(register PyStringObject *a, register Py_ssize_t i,\r | |
1126 | register Py_ssize_t j)\r | |
1127 | /* j -- may be negative! */\r | |
1128 | {\r | |
1129 | if (i < 0)\r | |
1130 | i = 0;\r | |
1131 | if (j < 0)\r | |
1132 | j = 0; /* Avoid signed/unsigned bug in next line */\r | |
1133 | if (j > Py_SIZE(a))\r | |
1134 | j = Py_SIZE(a);\r | |
1135 | if (i == 0 && j == Py_SIZE(a) && PyString_CheckExact(a)) {\r | |
1136 | /* It's the same as a */\r | |
1137 | Py_INCREF(a);\r | |
1138 | return (PyObject *)a;\r | |
1139 | }\r | |
1140 | if (j < i)\r | |
1141 | j = i;\r | |
1142 | return PyString_FromStringAndSize(a->ob_sval + i, j-i);\r | |
1143 | }\r | |
1144 | \r | |
1145 | static int\r | |
1146 | string_contains(PyObject *str_obj, PyObject *sub_obj)\r | |
1147 | {\r | |
1148 | if (!PyString_CheckExact(sub_obj)) {\r | |
1149 | #ifdef Py_USING_UNICODE\r | |
1150 | if (PyUnicode_Check(sub_obj))\r | |
1151 | return PyUnicode_Contains(str_obj, sub_obj);\r | |
1152 | #endif\r | |
1153 | if (!PyString_Check(sub_obj)) {\r | |
1154 | PyErr_Format(PyExc_TypeError,\r | |
1155 | "'in <string>' requires string as left operand, "\r | |
1156 | "not %.200s", Py_TYPE(sub_obj)->tp_name);\r | |
1157 | return -1;\r | |
1158 | }\r | |
1159 | }\r | |
1160 | \r | |
1161 | return stringlib_contains_obj(str_obj, sub_obj);\r | |
1162 | }\r | |
1163 | \r | |
1164 | static PyObject *\r | |
1165 | string_item(PyStringObject *a, register Py_ssize_t i)\r | |
1166 | {\r | |
1167 | char pchar;\r | |
1168 | PyObject *v;\r | |
1169 | if (i < 0 || i >= Py_SIZE(a)) {\r | |
1170 | PyErr_SetString(PyExc_IndexError, "string index out of range");\r | |
1171 | return NULL;\r | |
1172 | }\r | |
1173 | pchar = a->ob_sval[i];\r | |
1174 | v = (PyObject *)characters[pchar & UCHAR_MAX];\r | |
1175 | if (v == NULL)\r | |
1176 | v = PyString_FromStringAndSize(&pchar, 1);\r | |
1177 | else {\r | |
1178 | #ifdef COUNT_ALLOCS\r | |
1179 | one_strings++;\r | |
1180 | #endif\r | |
1181 | Py_INCREF(v);\r | |
1182 | }\r | |
1183 | return v;\r | |
1184 | }\r | |
1185 | \r | |
1186 | static PyObject*\r | |
1187 | string_richcompare(PyStringObject *a, PyStringObject *b, int op)\r | |
1188 | {\r | |
1189 | int c;\r | |
1190 | Py_ssize_t len_a, len_b;\r | |
1191 | Py_ssize_t min_len;\r | |
1192 | PyObject *result;\r | |
1193 | \r | |
1194 | /* Make sure both arguments are strings. */\r | |
1195 | if (!(PyString_Check(a) && PyString_Check(b))) {\r | |
1196 | result = Py_NotImplemented;\r | |
1197 | goto out;\r | |
1198 | }\r | |
1199 | if (a == b) {\r | |
1200 | switch (op) {\r | |
1201 | case Py_EQ:case Py_LE:case Py_GE:\r | |
1202 | result = Py_True;\r | |
1203 | goto out;\r | |
1204 | case Py_NE:case Py_LT:case Py_GT:\r | |
1205 | result = Py_False;\r | |
1206 | goto out;\r | |
1207 | }\r | |
1208 | }\r | |
1209 | if (op == Py_EQ) {\r | |
1210 | /* Supporting Py_NE here as well does not save\r | |
1211 | much time, since Py_NE is rarely used. */\r | |
1212 | if (Py_SIZE(a) == Py_SIZE(b)\r | |
1213 | && (a->ob_sval[0] == b->ob_sval[0]\r | |
1214 | && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0)) {\r | |
1215 | result = Py_True;\r | |
1216 | } else {\r | |
1217 | result = Py_False;\r | |
1218 | }\r | |
1219 | goto out;\r | |
1220 | }\r | |
1221 | len_a = Py_SIZE(a); len_b = Py_SIZE(b);\r | |
1222 | min_len = (len_a < len_b) ? len_a : len_b;\r | |
1223 | if (min_len > 0) {\r | |
1224 | c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);\r | |
1225 | if (c==0)\r | |
1226 | c = memcmp(a->ob_sval, b->ob_sval, min_len);\r | |
1227 | } else\r | |
1228 | c = 0;\r | |
1229 | if (c == 0)\r | |
1230 | c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;\r | |
1231 | switch (op) {\r | |
1232 | case Py_LT: c = c < 0; break;\r | |
1233 | case Py_LE: c = c <= 0; break;\r | |
1234 | case Py_EQ: assert(0); break; /* unreachable */\r | |
1235 | case Py_NE: c = c != 0; break;\r | |
1236 | case Py_GT: c = c > 0; break;\r | |
1237 | case Py_GE: c = c >= 0; break;\r | |
1238 | default:\r | |
1239 | result = Py_NotImplemented;\r | |
1240 | goto out;\r | |
1241 | }\r | |
1242 | result = c ? Py_True : Py_False;\r | |
1243 | out:\r | |
1244 | Py_INCREF(result);\r | |
1245 | return result;\r | |
1246 | }\r | |
1247 | \r | |
1248 | int\r | |
1249 | _PyString_Eq(PyObject *o1, PyObject *o2)\r | |
1250 | {\r | |
1251 | PyStringObject *a = (PyStringObject*) o1;\r | |
1252 | PyStringObject *b = (PyStringObject*) o2;\r | |
1253 | return Py_SIZE(a) == Py_SIZE(b)\r | |
1254 | && *a->ob_sval == *b->ob_sval\r | |
1255 | && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0;\r | |
1256 | }\r | |
1257 | \r | |
1258 | static long\r | |
1259 | string_hash(PyStringObject *a)\r | |
1260 | {\r | |
1261 | register Py_ssize_t len;\r | |
1262 | register unsigned char *p;\r | |
1263 | register long x;\r | |
1264 | \r | |
1265 | if (a->ob_shash != -1)\r | |
1266 | return a->ob_shash;\r | |
1267 | len = Py_SIZE(a);\r | |
1268 | p = (unsigned char *) a->ob_sval;\r | |
1269 | x = *p << 7;\r | |
1270 | while (--len >= 0)\r | |
1271 | x = (1000003*x) ^ *p++;\r | |
1272 | x ^= Py_SIZE(a);\r | |
1273 | if (x == -1)\r | |
1274 | x = -2;\r | |
1275 | a->ob_shash = x;\r | |
1276 | return x;\r | |
1277 | }\r | |
1278 | \r | |
1279 | static PyObject*\r | |
1280 | string_subscript(PyStringObject* self, PyObject* item)\r | |
1281 | {\r | |
1282 | if (PyIndex_Check(item)) {\r | |
1283 | Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);\r | |
1284 | if (i == -1 && PyErr_Occurred())\r | |
1285 | return NULL;\r | |
1286 | if (i < 0)\r | |
1287 | i += PyString_GET_SIZE(self);\r | |
1288 | return string_item(self, i);\r | |
1289 | }\r | |
1290 | else if (PySlice_Check(item)) {\r | |
1291 | Py_ssize_t start, stop, step, slicelength, cur, i;\r | |
1292 | char* source_buf;\r | |
1293 | char* result_buf;\r | |
1294 | PyObject* result;\r | |
1295 | \r | |
1296 | if (PySlice_GetIndicesEx((PySliceObject*)item,\r | |
1297 | PyString_GET_SIZE(self),\r | |
1298 | &start, &stop, &step, &slicelength) < 0) {\r | |
1299 | return NULL;\r | |
1300 | }\r | |
1301 | \r | |
1302 | if (slicelength <= 0) {\r | |
1303 | return PyString_FromStringAndSize("", 0);\r | |
1304 | }\r | |
1305 | else if (start == 0 && step == 1 &&\r | |
1306 | slicelength == PyString_GET_SIZE(self) &&\r | |
1307 | PyString_CheckExact(self)) {\r | |
1308 | Py_INCREF(self);\r | |
1309 | return (PyObject *)self;\r | |
1310 | }\r | |
1311 | else if (step == 1) {\r | |
1312 | return PyString_FromStringAndSize(\r | |
1313 | PyString_AS_STRING(self) + start,\r | |
1314 | slicelength);\r | |
1315 | }\r | |
1316 | else {\r | |
1317 | source_buf = PyString_AsString((PyObject*)self);\r | |
1318 | result_buf = (char *)PyMem_Malloc(slicelength);\r | |
1319 | if (result_buf == NULL)\r | |
1320 | return PyErr_NoMemory();\r | |
1321 | \r | |
1322 | for (cur = start, i = 0; i < slicelength;\r | |
1323 | cur += step, i++) {\r | |
1324 | result_buf[i] = source_buf[cur];\r | |
1325 | }\r | |
1326 | \r | |
1327 | result = PyString_FromStringAndSize(result_buf,\r | |
1328 | slicelength);\r | |
1329 | PyMem_Free(result_buf);\r | |
1330 | return result;\r | |
1331 | }\r | |
1332 | }\r | |
1333 | else {\r | |
1334 | PyErr_Format(PyExc_TypeError,\r | |
1335 | "string indices must be integers, not %.200s",\r | |
1336 | Py_TYPE(item)->tp_name);\r | |
1337 | return NULL;\r | |
1338 | }\r | |
1339 | }\r | |
1340 | \r | |
1341 | static Py_ssize_t\r | |
1342 | string_buffer_getreadbuf(PyStringObject *self, Py_ssize_t index, const void **ptr)\r | |
1343 | {\r | |
1344 | if ( index != 0 ) {\r | |
1345 | PyErr_SetString(PyExc_SystemError,\r | |
1346 | "accessing non-existent string segment");\r | |
1347 | return -1;\r | |
1348 | }\r | |
1349 | *ptr = (void *)self->ob_sval;\r | |
1350 | return Py_SIZE(self);\r | |
1351 | }\r | |
1352 | \r | |
1353 | static Py_ssize_t\r | |
1354 | string_buffer_getwritebuf(PyStringObject *self, Py_ssize_t index, const void **ptr)\r | |
1355 | {\r | |
1356 | PyErr_SetString(PyExc_TypeError,\r | |
1357 | "Cannot use string as modifiable buffer");\r | |
1358 | return -1;\r | |
1359 | }\r | |
1360 | \r | |
1361 | static Py_ssize_t\r | |
1362 | string_buffer_getsegcount(PyStringObject *self, Py_ssize_t *lenp)\r | |
1363 | {\r | |
1364 | if ( lenp )\r | |
1365 | *lenp = Py_SIZE(self);\r | |
1366 | return 1;\r | |
1367 | }\r | |
1368 | \r | |
1369 | static Py_ssize_t\r | |
1370 | string_buffer_getcharbuf(PyStringObject *self, Py_ssize_t index, const char **ptr)\r | |
1371 | {\r | |
1372 | if ( index != 0 ) {\r | |
1373 | PyErr_SetString(PyExc_SystemError,\r | |
1374 | "accessing non-existent string segment");\r | |
1375 | return -1;\r | |
1376 | }\r | |
1377 | *ptr = self->ob_sval;\r | |
1378 | return Py_SIZE(self);\r | |
1379 | }\r | |
1380 | \r | |
1381 | static int\r | |
1382 | string_buffer_getbuffer(PyStringObject *self, Py_buffer *view, int flags)\r | |
1383 | {\r | |
1384 | return PyBuffer_FillInfo(view, (PyObject*)self,\r | |
1385 | (void *)self->ob_sval, Py_SIZE(self),\r | |
1386 | 1, flags);\r | |
1387 | }\r | |
1388 | \r | |
1389 | static PySequenceMethods string_as_sequence = {\r | |
1390 | (lenfunc)string_length, /*sq_length*/\r | |
1391 | (binaryfunc)string_concat, /*sq_concat*/\r | |
1392 | (ssizeargfunc)string_repeat, /*sq_repeat*/\r | |
1393 | (ssizeargfunc)string_item, /*sq_item*/\r | |
1394 | (ssizessizeargfunc)string_slice, /*sq_slice*/\r | |
1395 | 0, /*sq_ass_item*/\r | |
1396 | 0, /*sq_ass_slice*/\r | |
1397 | (objobjproc)string_contains /*sq_contains*/\r | |
1398 | };\r | |
1399 | \r | |
1400 | static PyMappingMethods string_as_mapping = {\r | |
1401 | (lenfunc)string_length,\r | |
1402 | (binaryfunc)string_subscript,\r | |
1403 | 0,\r | |
1404 | };\r | |
1405 | \r | |
1406 | static PyBufferProcs string_as_buffer = {\r | |
1407 | (readbufferproc)string_buffer_getreadbuf,\r | |
1408 | (writebufferproc)string_buffer_getwritebuf,\r | |
1409 | (segcountproc)string_buffer_getsegcount,\r | |
1410 | (charbufferproc)string_buffer_getcharbuf,\r | |
1411 | (getbufferproc)string_buffer_getbuffer,\r | |
1412 | 0, /* XXX */\r | |
1413 | };\r | |
1414 | \r | |
1415 | \r | |
1416 | \r | |
1417 | #define LEFTSTRIP 0\r | |
1418 | #define RIGHTSTRIP 1\r | |
1419 | #define BOTHSTRIP 2\r | |
1420 | \r | |
1421 | /* Arrays indexed by above */\r | |
1422 | static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};\r | |
1423 | \r | |
1424 | #define STRIPNAME(i) (stripformat[i]+3)\r | |
1425 | \r | |
1426 | PyDoc_STRVAR(split__doc__,\r | |
1427 | "S.split([sep [,maxsplit]]) -> list of strings\n\\r | |
1428 | \n\\r | |
1429 | Return a list of the words in the string S, using sep as the\n\\r | |
1430 | delimiter string. If maxsplit is given, at most maxsplit\n\\r | |
1431 | splits are done. If sep is not specified or is None, any\n\\r | |
1432 | whitespace string is a separator and empty strings are removed\n\\r | |
1433 | from the result.");\r | |
1434 | \r | |
1435 | static PyObject *\r | |
1436 | string_split(PyStringObject *self, PyObject *args)\r | |
1437 | {\r | |
1438 | Py_ssize_t len = PyString_GET_SIZE(self), n;\r | |
1439 | Py_ssize_t maxsplit = -1;\r | |
1440 | const char *s = PyString_AS_STRING(self), *sub;\r | |
1441 | PyObject *subobj = Py_None;\r | |
1442 | \r | |
1443 | if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))\r | |
1444 | return NULL;\r | |
1445 | if (maxsplit < 0)\r | |
1446 | maxsplit = PY_SSIZE_T_MAX;\r | |
1447 | if (subobj == Py_None)\r | |
1448 | return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit);\r | |
1449 | if (PyString_Check(subobj)) {\r | |
1450 | sub = PyString_AS_STRING(subobj);\r | |
1451 | n = PyString_GET_SIZE(subobj);\r | |
1452 | }\r | |
1453 | #ifdef Py_USING_UNICODE\r | |
1454 | else if (PyUnicode_Check(subobj))\r | |
1455 | return PyUnicode_Split((PyObject *)self, subobj, maxsplit);\r | |
1456 | #endif\r | |
1457 | else if (PyObject_AsCharBuffer(subobj, &sub, &n))\r | |
1458 | return NULL;\r | |
1459 | \r | |
1460 | return stringlib_split((PyObject*) self, s, len, sub, n, maxsplit);\r | |
1461 | }\r | |
1462 | \r | |
1463 | PyDoc_STRVAR(partition__doc__,\r | |
1464 | "S.partition(sep) -> (head, sep, tail)\n\\r | |
1465 | \n\\r | |
1466 | Search for the separator sep in S, and return the part before it,\n\\r | |
1467 | the separator itself, and the part after it. If the separator is not\n\\r | |
1468 | found, return S and two empty strings.");\r | |
1469 | \r | |
1470 | static PyObject *\r | |
1471 | string_partition(PyStringObject *self, PyObject *sep_obj)\r | |
1472 | {\r | |
1473 | const char *sep;\r | |
1474 | Py_ssize_t sep_len;\r | |
1475 | \r | |
1476 | if (PyString_Check(sep_obj)) {\r | |
1477 | sep = PyString_AS_STRING(sep_obj);\r | |
1478 | sep_len = PyString_GET_SIZE(sep_obj);\r | |
1479 | }\r | |
1480 | #ifdef Py_USING_UNICODE\r | |
1481 | else if (PyUnicode_Check(sep_obj))\r | |
1482 | return PyUnicode_Partition((PyObject *) self, sep_obj);\r | |
1483 | #endif\r | |
1484 | else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))\r | |
1485 | return NULL;\r | |
1486 | \r | |
1487 | return stringlib_partition(\r | |
1488 | (PyObject*) self,\r | |
1489 | PyString_AS_STRING(self), PyString_GET_SIZE(self),\r | |
1490 | sep_obj, sep, sep_len\r | |
1491 | );\r | |
1492 | }\r | |
1493 | \r | |
1494 | PyDoc_STRVAR(rpartition__doc__,\r | |
1495 | "S.rpartition(sep) -> (head, sep, tail)\n\\r | |
1496 | \n\\r | |
1497 | Search for the separator sep in S, starting at the end of S, and return\n\\r | |
1498 | the part before it, the separator itself, and the part after it. If the\n\\r | |
1499 | separator is not found, return two empty strings and S.");\r | |
1500 | \r | |
1501 | static PyObject *\r | |
1502 | string_rpartition(PyStringObject *self, PyObject *sep_obj)\r | |
1503 | {\r | |
1504 | const char *sep;\r | |
1505 | Py_ssize_t sep_len;\r | |
1506 | \r | |
1507 | if (PyString_Check(sep_obj)) {\r | |
1508 | sep = PyString_AS_STRING(sep_obj);\r | |
1509 | sep_len = PyString_GET_SIZE(sep_obj);\r | |
1510 | }\r | |
1511 | #ifdef Py_USING_UNICODE\r | |
1512 | else if (PyUnicode_Check(sep_obj))\r | |
1513 | return PyUnicode_RPartition((PyObject *) self, sep_obj);\r | |
1514 | #endif\r | |
1515 | else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))\r | |
1516 | return NULL;\r | |
1517 | \r | |
1518 | return stringlib_rpartition(\r | |
1519 | (PyObject*) self,\r | |
1520 | PyString_AS_STRING(self), PyString_GET_SIZE(self),\r | |
1521 | sep_obj, sep, sep_len\r | |
1522 | );\r | |
1523 | }\r | |
1524 | \r | |
1525 | PyDoc_STRVAR(rsplit__doc__,\r | |
1526 | "S.rsplit([sep [,maxsplit]]) -> list of strings\n\\r | |
1527 | \n\\r | |
1528 | Return a list of the words in the string S, using sep as the\n\\r | |
1529 | delimiter string, starting at the end of the string and working\n\\r | |
1530 | to the front. If maxsplit is given, at most maxsplit splits are\n\\r | |
1531 | done. If sep is not specified or is None, any whitespace string\n\\r | |
1532 | is a separator.");\r | |
1533 | \r | |
1534 | static PyObject *\r | |
1535 | string_rsplit(PyStringObject *self, PyObject *args)\r | |
1536 | {\r | |
1537 | Py_ssize_t len = PyString_GET_SIZE(self), n;\r | |
1538 | Py_ssize_t maxsplit = -1;\r | |
1539 | const char *s = PyString_AS_STRING(self), *sub;\r | |
1540 | PyObject *subobj = Py_None;\r | |
1541 | \r | |
1542 | if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))\r | |
1543 | return NULL;\r | |
1544 | if (maxsplit < 0)\r | |
1545 | maxsplit = PY_SSIZE_T_MAX;\r | |
1546 | if (subobj == Py_None)\r | |
1547 | return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit);\r | |
1548 | if (PyString_Check(subobj)) {\r | |
1549 | sub = PyString_AS_STRING(subobj);\r | |
1550 | n = PyString_GET_SIZE(subobj);\r | |
1551 | }\r | |
1552 | #ifdef Py_USING_UNICODE\r | |
1553 | else if (PyUnicode_Check(subobj))\r | |
1554 | return PyUnicode_RSplit((PyObject *)self, subobj, maxsplit);\r | |
1555 | #endif\r | |
1556 | else if (PyObject_AsCharBuffer(subobj, &sub, &n))\r | |
1557 | return NULL;\r | |
1558 | \r | |
1559 | return stringlib_rsplit((PyObject*) self, s, len, sub, n, maxsplit);\r | |
1560 | }\r | |
1561 | \r | |
1562 | \r | |
1563 | PyDoc_STRVAR(join__doc__,\r | |
1564 | "S.join(iterable) -> string\n\\r | |
1565 | \n\\r | |
1566 | Return a string which is the concatenation of the strings in the\n\\r | |
1567 | iterable. The separator between elements is S.");\r | |
1568 | \r | |
1569 | static PyObject *\r | |
1570 | string_join(PyStringObject *self, PyObject *orig)\r | |
1571 | {\r | |
1572 | char *sep = PyString_AS_STRING(self);\r | |
1573 | const Py_ssize_t seplen = PyString_GET_SIZE(self);\r | |
1574 | PyObject *res = NULL;\r | |
1575 | char *p;\r | |
1576 | Py_ssize_t seqlen = 0;\r | |
1577 | size_t sz = 0;\r | |
1578 | Py_ssize_t i;\r | |
1579 | PyObject *seq, *item;\r | |
1580 | \r | |
1581 | seq = PySequence_Fast(orig, "");\r | |
1582 | if (seq == NULL) {\r | |
1583 | return NULL;\r | |
1584 | }\r | |
1585 | \r | |
1586 | seqlen = PySequence_Size(seq);\r | |
1587 | if (seqlen == 0) {\r | |
1588 | Py_DECREF(seq);\r | |
1589 | return PyString_FromString("");\r | |
1590 | }\r | |
1591 | if (seqlen == 1) {\r | |
1592 | item = PySequence_Fast_GET_ITEM(seq, 0);\r | |
1593 | if (PyString_CheckExact(item) || PyUnicode_CheckExact(item)) {\r | |
1594 | Py_INCREF(item);\r | |
1595 | Py_DECREF(seq);\r | |
1596 | return item;\r | |
1597 | }\r | |
1598 | }\r | |
1599 | \r | |
1600 | /* There are at least two things to join, or else we have a subclass\r | |
1601 | * of the builtin types in the sequence.\r | |
1602 | * Do a pre-pass to figure out the total amount of space we'll\r | |
1603 | * need (sz), see whether any argument is absurd, and defer to\r | |
1604 | * the Unicode join if appropriate.\r | |
1605 | */\r | |
1606 | for (i = 0; i < seqlen; i++) {\r | |
1607 | const size_t old_sz = sz;\r | |
1608 | item = PySequence_Fast_GET_ITEM(seq, i);\r | |
1609 | if (!PyString_Check(item)){\r | |
1610 | #ifdef Py_USING_UNICODE\r | |
1611 | if (PyUnicode_Check(item)) {\r | |
1612 | /* Defer to Unicode join.\r | |
1613 | * CAUTION: There's no gurantee that the\r | |
1614 | * original sequence can be iterated over\r | |
1615 | * again, so we must pass seq here.\r | |
1616 | */\r | |
1617 | PyObject *result;\r | |
1618 | result = PyUnicode_Join((PyObject *)self, seq);\r | |
1619 | Py_DECREF(seq);\r | |
1620 | return result;\r | |
1621 | }\r | |
1622 | #endif\r | |
1623 | PyErr_Format(PyExc_TypeError,\r | |
1624 | "sequence item %zd: expected string,"\r | |
1625 | " %.80s found",\r | |
1626 | i, Py_TYPE(item)->tp_name);\r | |
1627 | Py_DECREF(seq);\r | |
1628 | return NULL;\r | |
1629 | }\r | |
1630 | sz += PyString_GET_SIZE(item);\r | |
1631 | if (i != 0)\r | |
1632 | sz += seplen;\r | |
1633 | if (sz < old_sz || sz > PY_SSIZE_T_MAX) {\r | |
1634 | PyErr_SetString(PyExc_OverflowError,\r | |
1635 | "join() result is too long for a Python string");\r | |
1636 | Py_DECREF(seq);\r | |
1637 | return NULL;\r | |
1638 | }\r | |
1639 | }\r | |
1640 | \r | |
1641 | /* Allocate result space. */\r | |
1642 | res = PyString_FromStringAndSize((char*)NULL, sz);\r | |
1643 | if (res == NULL) {\r | |
1644 | Py_DECREF(seq);\r | |
1645 | return NULL;\r | |
1646 | }\r | |
1647 | \r | |
1648 | /* Catenate everything. */\r | |
1649 | p = PyString_AS_STRING(res);\r | |
1650 | for (i = 0; i < seqlen; ++i) {\r | |
1651 | size_t n;\r | |
1652 | item = PySequence_Fast_GET_ITEM(seq, i);\r | |
1653 | n = PyString_GET_SIZE(item);\r | |
1654 | Py_MEMCPY(p, PyString_AS_STRING(item), n);\r | |
1655 | p += n;\r | |
1656 | if (i < seqlen - 1) {\r | |
1657 | Py_MEMCPY(p, sep, seplen);\r | |
1658 | p += seplen;\r | |
1659 | }\r | |
1660 | }\r | |
1661 | \r | |
1662 | Py_DECREF(seq);\r | |
1663 | return res;\r | |
1664 | }\r | |
1665 | \r | |
1666 | PyObject *\r | |
1667 | _PyString_Join(PyObject *sep, PyObject *x)\r | |
1668 | {\r | |
1669 | assert(sep != NULL && PyString_Check(sep));\r | |
1670 | assert(x != NULL);\r | |
1671 | return string_join((PyStringObject *)sep, x);\r | |
1672 | }\r | |
1673 | \r | |
1674 | /* helper macro to fixup start/end slice values */\r | |
1675 | #define ADJUST_INDICES(start, end, len) \\r | |
1676 | if (end > len) \\r | |
1677 | end = len; \\r | |
1678 | else if (end < 0) { \\r | |
1679 | end += len; \\r | |
1680 | if (end < 0) \\r | |
1681 | end = 0; \\r | |
1682 | } \\r | |
1683 | if (start < 0) { \\r | |
1684 | start += len; \\r | |
1685 | if (start < 0) \\r | |
1686 | start = 0; \\r | |
1687 | }\r | |
1688 | \r | |
1689 | Py_LOCAL_INLINE(Py_ssize_t)\r | |
1690 | string_find_internal(PyStringObject *self, PyObject *args, int dir)\r | |
1691 | {\r | |
1692 | PyObject *subobj;\r | |
1693 | const char *sub;\r | |
1694 | Py_ssize_t sub_len;\r | |
1695 | Py_ssize_t start=0, end=PY_SSIZE_T_MAX;\r | |
1696 | \r | |
1697 | if (!stringlib_parse_args_finds("find/rfind/index/rindex",\r | |
1698 | args, &subobj, &start, &end))\r | |
1699 | return -2;\r | |
1700 | \r | |
1701 | if (PyString_Check(subobj)) {\r | |
1702 | sub = PyString_AS_STRING(subobj);\r | |
1703 | sub_len = PyString_GET_SIZE(subobj);\r | |
1704 | }\r | |
1705 | #ifdef Py_USING_UNICODE\r | |
1706 | else if (PyUnicode_Check(subobj))\r | |
1707 | return PyUnicode_Find(\r | |
1708 | (PyObject *)self, subobj, start, end, dir);\r | |
1709 | #endif\r | |
1710 | else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))\r | |
1711 | /* XXX - the "expected a character buffer object" is pretty\r | |
1712 | confusing for a non-expert. remap to something else ? */\r | |
1713 | return -2;\r | |
1714 | \r | |
1715 | if (dir > 0)\r | |
1716 | return stringlib_find_slice(\r | |
1717 | PyString_AS_STRING(self), PyString_GET_SIZE(self),\r | |
1718 | sub, sub_len, start, end);\r | |
1719 | else\r | |
1720 | return stringlib_rfind_slice(\r | |
1721 | PyString_AS_STRING(self), PyString_GET_SIZE(self),\r | |
1722 | sub, sub_len, start, end);\r | |
1723 | }\r | |
1724 | \r | |
1725 | \r | |
1726 | PyDoc_STRVAR(find__doc__,\r | |
1727 | "S.find(sub [,start [,end]]) -> int\n\\r | |
1728 | \n\\r | |
1729 | Return the lowest index in S where substring sub is found,\n\\r | |
1730 | such that sub is contained within s[start:end]. Optional\n\\r | |
1731 | arguments start and end are interpreted as in slice notation.\n\\r | |
1732 | \n\\r | |
1733 | Return -1 on failure.");\r | |
1734 | \r | |
1735 | static PyObject *\r | |
1736 | string_find(PyStringObject *self, PyObject *args)\r | |
1737 | {\r | |
1738 | Py_ssize_t result = string_find_internal(self, args, +1);\r | |
1739 | if (result == -2)\r | |
1740 | return NULL;\r | |
1741 | return PyInt_FromSsize_t(result);\r | |
1742 | }\r | |
1743 | \r | |
1744 | \r | |
1745 | PyDoc_STRVAR(index__doc__,\r | |
1746 | "S.index(sub [,start [,end]]) -> int\n\\r | |
1747 | \n\\r | |
1748 | Like S.find() but raise ValueError when the substring is not found.");\r | |
1749 | \r | |
1750 | static PyObject *\r | |
1751 | string_index(PyStringObject *self, PyObject *args)\r | |
1752 | {\r | |
1753 | Py_ssize_t result = string_find_internal(self, args, +1);\r | |
1754 | if (result == -2)\r | |
1755 | return NULL;\r | |
1756 | if (result == -1) {\r | |
1757 | PyErr_SetString(PyExc_ValueError,\r | |
1758 | "substring not found");\r | |
1759 | return NULL;\r | |
1760 | }\r | |
1761 | return PyInt_FromSsize_t(result);\r | |
1762 | }\r | |
1763 | \r | |
1764 | \r | |
1765 | PyDoc_STRVAR(rfind__doc__,\r | |
1766 | "S.rfind(sub [,start [,end]]) -> int\n\\r | |
1767 | \n\\r | |
1768 | Return the highest index in S where substring sub is found,\n\\r | |
1769 | such that sub is contained within s[start:end]. Optional\n\\r | |
1770 | arguments start and end are interpreted as in slice notation.\n\\r | |
1771 | \n\\r | |
1772 | Return -1 on failure.");\r | |
1773 | \r | |
1774 | static PyObject *\r | |
1775 | string_rfind(PyStringObject *self, PyObject *args)\r | |
1776 | {\r | |
1777 | Py_ssize_t result = string_find_internal(self, args, -1);\r | |
1778 | if (result == -2)\r | |
1779 | return NULL;\r | |
1780 | return PyInt_FromSsize_t(result);\r | |
1781 | }\r | |
1782 | \r | |
1783 | \r | |
1784 | PyDoc_STRVAR(rindex__doc__,\r | |
1785 | "S.rindex(sub [,start [,end]]) -> int\n\\r | |
1786 | \n\\r | |
1787 | Like S.rfind() but raise ValueError when the substring is not found.");\r | |
1788 | \r | |
1789 | static PyObject *\r | |
1790 | string_rindex(PyStringObject *self, PyObject *args)\r | |
1791 | {\r | |
1792 | Py_ssize_t result = string_find_internal(self, args, -1);\r | |
1793 | if (result == -2)\r | |
1794 | return NULL;\r | |
1795 | if (result == -1) {\r | |
1796 | PyErr_SetString(PyExc_ValueError,\r | |
1797 | "substring not found");\r | |
1798 | return NULL;\r | |
1799 | }\r | |
1800 | return PyInt_FromSsize_t(result);\r | |
1801 | }\r | |
1802 | \r | |
1803 | \r | |
1804 | Py_LOCAL_INLINE(PyObject *)\r | |
1805 | do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)\r | |
1806 | {\r | |
1807 | char *s = PyString_AS_STRING(self);\r | |
1808 | Py_ssize_t len = PyString_GET_SIZE(self);\r | |
1809 | char *sep = PyString_AS_STRING(sepobj);\r | |
1810 | Py_ssize_t seplen = PyString_GET_SIZE(sepobj);\r | |
1811 | Py_ssize_t i, j;\r | |
1812 | \r | |
1813 | i = 0;\r | |
1814 | if (striptype != RIGHTSTRIP) {\r | |
1815 | while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {\r | |
1816 | i++;\r | |
1817 | }\r | |
1818 | }\r | |
1819 | \r | |
1820 | j = len;\r | |
1821 | if (striptype != LEFTSTRIP) {\r | |
1822 | do {\r | |
1823 | j--;\r | |
1824 | } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));\r | |
1825 | j++;\r | |
1826 | }\r | |
1827 | \r | |
1828 | if (i == 0 && j == len && PyString_CheckExact(self)) {\r | |
1829 | Py_INCREF(self);\r | |
1830 | return (PyObject*)self;\r | |
1831 | }\r | |
1832 | else\r | |
1833 | return PyString_FromStringAndSize(s+i, j-i);\r | |
1834 | }\r | |
1835 | \r | |
1836 | \r | |
1837 | Py_LOCAL_INLINE(PyObject *)\r | |
1838 | do_strip(PyStringObject *self, int striptype)\r | |
1839 | {\r | |
1840 | char *s = PyString_AS_STRING(self);\r | |
1841 | Py_ssize_t len = PyString_GET_SIZE(self), i, j;\r | |
1842 | \r | |
1843 | i = 0;\r | |
1844 | if (striptype != RIGHTSTRIP) {\r | |
1845 | while (i < len && isspace(Py_CHARMASK(s[i]))) {\r | |
1846 | i++;\r | |
1847 | }\r | |
1848 | }\r | |
1849 | \r | |
1850 | j = len;\r | |
1851 | if (striptype != LEFTSTRIP) {\r | |
1852 | do {\r | |
1853 | j--;\r | |
1854 | } while (j >= i && isspace(Py_CHARMASK(s[j])));\r | |
1855 | j++;\r | |
1856 | }\r | |
1857 | \r | |
1858 | if (i == 0 && j == len && PyString_CheckExact(self)) {\r | |
1859 | Py_INCREF(self);\r | |
1860 | return (PyObject*)self;\r | |
1861 | }\r | |
1862 | else\r | |
1863 | return PyString_FromStringAndSize(s+i, j-i);\r | |
1864 | }\r | |
1865 | \r | |
1866 | \r | |
1867 | Py_LOCAL_INLINE(PyObject *)\r | |
1868 | do_argstrip(PyStringObject *self, int striptype, PyObject *args)\r | |
1869 | {\r | |
1870 | PyObject *sep = NULL;\r | |
1871 | \r | |
1872 | if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))\r | |
1873 | return NULL;\r | |
1874 | \r | |
1875 | if (sep != NULL && sep != Py_None) {\r | |
1876 | if (PyString_Check(sep))\r | |
1877 | return do_xstrip(self, striptype, sep);\r | |
1878 | #ifdef Py_USING_UNICODE\r | |
1879 | else if (PyUnicode_Check(sep)) {\r | |
1880 | PyObject *uniself = PyUnicode_FromObject((PyObject *)self);\r | |
1881 | PyObject *res;\r | |
1882 | if (uniself==NULL)\r | |
1883 | return NULL;\r | |
1884 | res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,\r | |
1885 | striptype, sep);\r | |
1886 | Py_DECREF(uniself);\r | |
1887 | return res;\r | |
1888 | }\r | |
1889 | #endif\r | |
1890 | PyErr_Format(PyExc_TypeError,\r | |
1891 | #ifdef Py_USING_UNICODE\r | |
1892 | "%s arg must be None, str or unicode",\r | |
1893 | #else\r | |
1894 | "%s arg must be None or str",\r | |
1895 | #endif\r | |
1896 | STRIPNAME(striptype));\r | |
1897 | return NULL;\r | |
1898 | }\r | |
1899 | \r | |
1900 | return do_strip(self, striptype);\r | |
1901 | }\r | |
1902 | \r | |
1903 | \r | |
1904 | PyDoc_STRVAR(strip__doc__,\r | |
1905 | "S.strip([chars]) -> string or unicode\n\\r | |
1906 | \n\\r | |
1907 | Return a copy of the string S with leading and trailing\n\\r | |
1908 | whitespace removed.\n\\r | |
1909 | If chars is given and not None, remove characters in chars instead.\n\\r | |
1910 | If chars is unicode, S will be converted to unicode before stripping");\r | |
1911 | \r | |
1912 | static PyObject *\r | |
1913 | string_strip(PyStringObject *self, PyObject *args)\r | |
1914 | {\r | |
1915 | if (PyTuple_GET_SIZE(args) == 0)\r | |
1916 | return do_strip(self, BOTHSTRIP); /* Common case */\r | |
1917 | else\r | |
1918 | return do_argstrip(self, BOTHSTRIP, args);\r | |
1919 | }\r | |
1920 | \r | |
1921 | \r | |
1922 | PyDoc_STRVAR(lstrip__doc__,\r | |
1923 | "S.lstrip([chars]) -> string or unicode\n\\r | |
1924 | \n\\r | |
1925 | Return a copy of the string S with leading whitespace removed.\n\\r | |
1926 | If chars is given and not None, remove characters in chars instead.\n\\r | |
1927 | If chars is unicode, S will be converted to unicode before stripping");\r | |
1928 | \r | |
1929 | static PyObject *\r | |
1930 | string_lstrip(PyStringObject *self, PyObject *args)\r | |
1931 | {\r | |
1932 | if (PyTuple_GET_SIZE(args) == 0)\r | |
1933 | return do_strip(self, LEFTSTRIP); /* Common case */\r | |
1934 | else\r | |
1935 | return do_argstrip(self, LEFTSTRIP, args);\r | |
1936 | }\r | |
1937 | \r | |
1938 | \r | |
1939 | PyDoc_STRVAR(rstrip__doc__,\r | |
1940 | "S.rstrip([chars]) -> string or unicode\n\\r | |
1941 | \n\\r | |
1942 | Return a copy of the string S with trailing whitespace removed.\n\\r | |
1943 | If chars is given and not None, remove characters in chars instead.\n\\r | |
1944 | If chars is unicode, S will be converted to unicode before stripping");\r | |
1945 | \r | |
1946 | static PyObject *\r | |
1947 | string_rstrip(PyStringObject *self, PyObject *args)\r | |
1948 | {\r | |
1949 | if (PyTuple_GET_SIZE(args) == 0)\r | |
1950 | return do_strip(self, RIGHTSTRIP); /* Common case */\r | |
1951 | else\r | |
1952 | return do_argstrip(self, RIGHTSTRIP, args);\r | |
1953 | }\r | |
1954 | \r | |
1955 | \r | |
1956 | PyDoc_STRVAR(lower__doc__,\r | |
1957 | "S.lower() -> string\n\\r | |
1958 | \n\\r | |
1959 | Return a copy of the string S converted to lowercase.");\r | |
1960 | \r | |
1961 | /* _tolower and _toupper are defined by SUSv2, but they're not ISO C */\r | |
1962 | #ifndef _tolower\r | |
1963 | #define _tolower tolower\r | |
1964 | #endif\r | |
1965 | \r | |
1966 | static PyObject *\r | |
1967 | string_lower(PyStringObject *self)\r | |
1968 | {\r | |
1969 | char *s;\r | |
1970 | Py_ssize_t i, n = PyString_GET_SIZE(self);\r | |
1971 | PyObject *newobj;\r | |
1972 | \r | |
1973 | newobj = PyString_FromStringAndSize(NULL, n);\r | |
1974 | if (!newobj)\r | |
1975 | return NULL;\r | |
1976 | \r | |
1977 | s = PyString_AS_STRING(newobj);\r | |
1978 | \r | |
1979 | Py_MEMCPY(s, PyString_AS_STRING(self), n);\r | |
1980 | \r | |
1981 | for (i = 0; i < n; i++) {\r | |
1982 | int c = Py_CHARMASK(s[i]);\r | |
1983 | if (isupper(c))\r | |
1984 | s[i] = _tolower(c);\r | |
1985 | }\r | |
1986 | \r | |
1987 | return newobj;\r | |
1988 | }\r | |
1989 | \r | |
1990 | PyDoc_STRVAR(upper__doc__,\r | |
1991 | "S.upper() -> string\n\\r | |
1992 | \n\\r | |
1993 | Return a copy of the string S converted to uppercase.");\r | |
1994 | \r | |
1995 | #ifndef _toupper\r | |
1996 | #define _toupper toupper\r | |
1997 | #endif\r | |
1998 | \r | |
1999 | static PyObject *\r | |
2000 | string_upper(PyStringObject *self)\r | |
2001 | {\r | |
2002 | char *s;\r | |
2003 | Py_ssize_t i, n = PyString_GET_SIZE(self);\r | |
2004 | PyObject *newobj;\r | |
2005 | \r | |
2006 | newobj = PyString_FromStringAndSize(NULL, n);\r | |
2007 | if (!newobj)\r | |
2008 | return NULL;\r | |
2009 | \r | |
2010 | s = PyString_AS_STRING(newobj);\r | |
2011 | \r | |
2012 | Py_MEMCPY(s, PyString_AS_STRING(self), n);\r | |
2013 | \r | |
2014 | for (i = 0; i < n; i++) {\r | |
2015 | int c = Py_CHARMASK(s[i]);\r | |
2016 | if (islower(c))\r | |
2017 | s[i] = _toupper(c);\r | |
2018 | }\r | |
2019 | \r | |
2020 | return newobj;\r | |
2021 | }\r | |
2022 | \r | |
2023 | PyDoc_STRVAR(title__doc__,\r | |
2024 | "S.title() -> string\n\\r | |
2025 | \n\\r | |
2026 | Return a titlecased version of S, i.e. words start with uppercase\n\\r | |
2027 | characters, all remaining cased characters have lowercase.");\r | |
2028 | \r | |
2029 | static PyObject*\r | |
2030 | string_title(PyStringObject *self)\r | |
2031 | {\r | |
2032 | char *s = PyString_AS_STRING(self), *s_new;\r | |
2033 | Py_ssize_t i, n = PyString_GET_SIZE(self);\r | |
2034 | int previous_is_cased = 0;\r | |
2035 | PyObject *newobj;\r | |
2036 | \r | |
2037 | newobj = PyString_FromStringAndSize(NULL, n);\r | |
2038 | if (newobj == NULL)\r | |
2039 | return NULL;\r | |
2040 | s_new = PyString_AsString(newobj);\r | |
2041 | for (i = 0; i < n; i++) {\r | |
2042 | int c = Py_CHARMASK(*s++);\r | |
2043 | if (islower(c)) {\r | |
2044 | if (!previous_is_cased)\r | |
2045 | c = toupper(c);\r | |
2046 | previous_is_cased = 1;\r | |
2047 | } else if (isupper(c)) {\r | |
2048 | if (previous_is_cased)\r | |
2049 | c = tolower(c);\r | |
2050 | previous_is_cased = 1;\r | |
2051 | } else\r | |
2052 | previous_is_cased = 0;\r | |
2053 | *s_new++ = c;\r | |
2054 | }\r | |
2055 | return newobj;\r | |
2056 | }\r | |
2057 | \r | |
2058 | PyDoc_STRVAR(capitalize__doc__,\r | |
2059 | "S.capitalize() -> string\n\\r | |
2060 | \n\\r | |
2061 | Return a copy of the string S with only its first character\n\\r | |
2062 | capitalized.");\r | |
2063 | \r | |
2064 | static PyObject *\r | |
2065 | string_capitalize(PyStringObject *self)\r | |
2066 | {\r | |
2067 | char *s = PyString_AS_STRING(self), *s_new;\r | |
2068 | Py_ssize_t i, n = PyString_GET_SIZE(self);\r | |
2069 | PyObject *newobj;\r | |
2070 | \r | |
2071 | newobj = PyString_FromStringAndSize(NULL, n);\r | |
2072 | if (newobj == NULL)\r | |
2073 | return NULL;\r | |
2074 | s_new = PyString_AsString(newobj);\r | |
2075 | if (0 < n) {\r | |
2076 | int c = Py_CHARMASK(*s++);\r | |
2077 | if (islower(c))\r | |
2078 | *s_new = toupper(c);\r | |
2079 | else\r | |
2080 | *s_new = c;\r | |
2081 | s_new++;\r | |
2082 | }\r | |
2083 | for (i = 1; i < n; i++) {\r | |
2084 | int c = Py_CHARMASK(*s++);\r | |
2085 | if (isupper(c))\r | |
2086 | *s_new = tolower(c);\r | |
2087 | else\r | |
2088 | *s_new = c;\r | |
2089 | s_new++;\r | |
2090 | }\r | |
2091 | return newobj;\r | |
2092 | }\r | |
2093 | \r | |
2094 | \r | |
2095 | PyDoc_STRVAR(count__doc__,\r | |
2096 | "S.count(sub[, start[, end]]) -> int\n\\r | |
2097 | \n\\r | |
2098 | Return the number of non-overlapping occurrences of substring sub in\n\\r | |
2099 | string S[start:end]. Optional arguments start and end are interpreted\n\\r | |
2100 | as in slice notation.");\r | |
2101 | \r | |
2102 | static PyObject *\r | |
2103 | string_count(PyStringObject *self, PyObject *args)\r | |
2104 | {\r | |
2105 | PyObject *sub_obj;\r | |
2106 | const char *str = PyString_AS_STRING(self), *sub;\r | |
2107 | Py_ssize_t sub_len;\r | |
2108 | Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;\r | |
2109 | \r | |
2110 | if (!stringlib_parse_args_finds("count", args, &sub_obj, &start, &end))\r | |
2111 | return NULL;\r | |
2112 | \r | |
2113 | if (PyString_Check(sub_obj)) {\r | |
2114 | sub = PyString_AS_STRING(sub_obj);\r | |
2115 | sub_len = PyString_GET_SIZE(sub_obj);\r | |
2116 | }\r | |
2117 | #ifdef Py_USING_UNICODE\r | |
2118 | else if (PyUnicode_Check(sub_obj)) {\r | |
2119 | Py_ssize_t count;\r | |
2120 | count = PyUnicode_Count((PyObject *)self, sub_obj, start, end);\r | |
2121 | if (count == -1)\r | |
2122 | return NULL;\r | |
2123 | else\r | |
2124 | return PyInt_FromSsize_t(count);\r | |
2125 | }\r | |
2126 | #endif\r | |
2127 | else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))\r | |
2128 | return NULL;\r | |
2129 | \r | |
2130 | ADJUST_INDICES(start, end, PyString_GET_SIZE(self));\r | |
2131 | \r | |
2132 | return PyInt_FromSsize_t(\r | |
2133 | stringlib_count(str + start, end - start, sub, sub_len, PY_SSIZE_T_MAX)\r | |
2134 | );\r | |
2135 | }\r | |
2136 | \r | |
2137 | PyDoc_STRVAR(swapcase__doc__,\r | |
2138 | "S.swapcase() -> string\n\\r | |
2139 | \n\\r | |
2140 | Return a copy of the string S with uppercase characters\n\\r | |
2141 | converted to lowercase and vice versa.");\r | |
2142 | \r | |
2143 | static PyObject *\r | |
2144 | string_swapcase(PyStringObject *self)\r | |
2145 | {\r | |
2146 | char *s = PyString_AS_STRING(self), *s_new;\r | |
2147 | Py_ssize_t i, n = PyString_GET_SIZE(self);\r | |
2148 | PyObject *newobj;\r | |
2149 | \r | |
2150 | newobj = PyString_FromStringAndSize(NULL, n);\r | |
2151 | if (newobj == NULL)\r | |
2152 | return NULL;\r | |
2153 | s_new = PyString_AsString(newobj);\r | |
2154 | for (i = 0; i < n; i++) {\r | |
2155 | int c = Py_CHARMASK(*s++);\r | |
2156 | if (islower(c)) {\r | |
2157 | *s_new = toupper(c);\r | |
2158 | }\r | |
2159 | else if (isupper(c)) {\r | |
2160 | *s_new = tolower(c);\r | |
2161 | }\r | |
2162 | else\r | |
2163 | *s_new = c;\r | |
2164 | s_new++;\r | |
2165 | }\r | |
2166 | return newobj;\r | |
2167 | }\r | |
2168 | \r | |
2169 | \r | |
2170 | PyDoc_STRVAR(translate__doc__,\r | |
2171 | "S.translate(table [,deletechars]) -> string\n\\r | |
2172 | \n\\r | |
2173 | Return a copy of the string S, where all characters occurring\n\\r | |
2174 | in the optional argument deletechars are removed, and the\n\\r | |
2175 | remaining characters have been mapped through the given\n\\r | |
2176 | translation table, which must be a string of length 256.");\r | |
2177 | \r | |
2178 | static PyObject *\r | |
2179 | string_translate(PyStringObject *self, PyObject *args)\r | |
2180 | {\r | |
2181 | register char *input, *output;\r | |
2182 | const char *table;\r | |
2183 | register Py_ssize_t i, c, changed = 0;\r | |
2184 | PyObject *input_obj = (PyObject*)self;\r | |
2185 | const char *output_start, *del_table=NULL;\r | |
2186 | Py_ssize_t inlen, tablen, dellen = 0;\r | |
2187 | PyObject *result;\r | |
2188 | int trans_table[256];\r | |
2189 | PyObject *tableobj, *delobj = NULL;\r | |
2190 | \r | |
2191 | if (!PyArg_UnpackTuple(args, "translate", 1, 2,\r | |
2192 | &tableobj, &delobj))\r | |
2193 | return NULL;\r | |
2194 | \r | |
2195 | if (PyString_Check(tableobj)) {\r | |
2196 | table = PyString_AS_STRING(tableobj);\r | |
2197 | tablen = PyString_GET_SIZE(tableobj);\r | |
2198 | }\r | |
2199 | else if (tableobj == Py_None) {\r | |
2200 | table = NULL;\r | |
2201 | tablen = 256;\r | |
2202 | }\r | |
2203 | #ifdef Py_USING_UNICODE\r | |
2204 | else if (PyUnicode_Check(tableobj)) {\r | |
2205 | /* Unicode .translate() does not support the deletechars\r | |
2206 | parameter; instead a mapping to None will cause characters\r | |
2207 | to be deleted. */\r | |
2208 | if (delobj != NULL) {\r | |
2209 | PyErr_SetString(PyExc_TypeError,\r | |
2210 | "deletions are implemented differently for unicode");\r | |
2211 | return NULL;\r | |
2212 | }\r | |
2213 | return PyUnicode_Translate((PyObject *)self, tableobj, NULL);\r | |
2214 | }\r | |
2215 | #endif\r | |
2216 | else if (PyObject_AsCharBuffer(tableobj, &table, &tablen))\r | |
2217 | return NULL;\r | |
2218 | \r | |
2219 | if (tablen != 256) {\r | |
2220 | PyErr_SetString(PyExc_ValueError,\r | |
2221 | "translation table must be 256 characters long");\r | |
2222 | return NULL;\r | |
2223 | }\r | |
2224 | \r | |
2225 | if (delobj != NULL) {\r | |
2226 | if (PyString_Check(delobj)) {\r | |
2227 | del_table = PyString_AS_STRING(delobj);\r | |
2228 | dellen = PyString_GET_SIZE(delobj);\r | |
2229 | }\r | |
2230 | #ifdef Py_USING_UNICODE\r | |
2231 | else if (PyUnicode_Check(delobj)) {\r | |
2232 | PyErr_SetString(PyExc_TypeError,\r | |
2233 | "deletions are implemented differently for unicode");\r | |
2234 | return NULL;\r | |
2235 | }\r | |
2236 | #endif\r | |
2237 | else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))\r | |
2238 | return NULL;\r | |
2239 | }\r | |
2240 | else {\r | |
2241 | del_table = NULL;\r | |
2242 | dellen = 0;\r | |
2243 | }\r | |
2244 | \r | |
2245 | inlen = PyString_GET_SIZE(input_obj);\r | |
2246 | result = PyString_FromStringAndSize((char *)NULL, inlen);\r | |
2247 | if (result == NULL)\r | |
2248 | return NULL;\r | |
2249 | output_start = output = PyString_AsString(result);\r | |
2250 | input = PyString_AS_STRING(input_obj);\r | |
2251 | \r | |
2252 | if (dellen == 0 && table != NULL) {\r | |
2253 | /* If no deletions are required, use faster code */\r | |
2254 | for (i = inlen; --i >= 0; ) {\r | |
2255 | c = Py_CHARMASK(*input++);\r | |
2256 | if (Py_CHARMASK((*output++ = table[c])) != c)\r | |
2257 | changed = 1;\r | |
2258 | }\r | |
2259 | if (changed || !PyString_CheckExact(input_obj))\r | |
2260 | return result;\r | |
2261 | Py_DECREF(result);\r | |
2262 | Py_INCREF(input_obj);\r | |
2263 | return input_obj;\r | |
2264 | }\r | |
2265 | \r | |
2266 | if (table == NULL) {\r | |
2267 | for (i = 0; i < 256; i++)\r | |
2268 | trans_table[i] = Py_CHARMASK(i);\r | |
2269 | } else {\r | |
2270 | for (i = 0; i < 256; i++)\r | |
2271 | trans_table[i] = Py_CHARMASK(table[i]);\r | |
2272 | }\r | |
2273 | \r | |
2274 | for (i = 0; i < dellen; i++)\r | |
2275 | trans_table[(int) Py_CHARMASK(del_table[i])] = -1;\r | |
2276 | \r | |
2277 | for (i = inlen; --i >= 0; ) {\r | |
2278 | c = Py_CHARMASK(*input++);\r | |
2279 | if (trans_table[c] != -1)\r | |
2280 | if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)\r | |
2281 | continue;\r | |
2282 | changed = 1;\r | |
2283 | }\r | |
2284 | if (!changed && PyString_CheckExact(input_obj)) {\r | |
2285 | Py_DECREF(result);\r | |
2286 | Py_INCREF(input_obj);\r | |
2287 | return input_obj;\r | |
2288 | }\r | |
2289 | /* Fix the size of the resulting string */\r | |
2290 | if (inlen > 0 && _PyString_Resize(&result, output - output_start))\r | |
2291 | return NULL;\r | |
2292 | return result;\r | |
2293 | }\r | |
2294 | \r | |
2295 | \r | |
2296 | /* find and count characters and substrings */\r | |
2297 | \r | |
2298 | #define findchar(target, target_len, c) \\r | |
2299 | ((char *)memchr((const void *)(target), c, target_len))\r | |
2300 | \r | |
2301 | /* String ops must return a string. */\r | |
2302 | /* If the object is subclass of string, create a copy */\r | |
2303 | Py_LOCAL(PyStringObject *)\r | |
2304 | return_self(PyStringObject *self)\r | |
2305 | {\r | |
2306 | if (PyString_CheckExact(self)) {\r | |
2307 | Py_INCREF(self);\r | |
2308 | return self;\r | |
2309 | }\r | |
2310 | return (PyStringObject *)PyString_FromStringAndSize(\r | |
2311 | PyString_AS_STRING(self),\r | |
2312 | PyString_GET_SIZE(self));\r | |
2313 | }\r | |
2314 | \r | |
2315 | Py_LOCAL_INLINE(Py_ssize_t)\r | |
2316 | countchar(const char *target, int target_len, char c, Py_ssize_t maxcount)\r | |
2317 | {\r | |
2318 | Py_ssize_t count=0;\r | |
2319 | const char *start=target;\r | |
2320 | const char *end=target+target_len;\r | |
2321 | \r | |
2322 | while ( (start=findchar(start, end-start, c)) != NULL ) {\r | |
2323 | count++;\r | |
2324 | if (count >= maxcount)\r | |
2325 | break;\r | |
2326 | start += 1;\r | |
2327 | }\r | |
2328 | return count;\r | |
2329 | }\r | |
2330 | \r | |
2331 | \r | |
2332 | /* Algorithms for different cases of string replacement */\r | |
2333 | \r | |
2334 | /* len(self)>=1, from="", len(to)>=1, maxcount>=1 */\r | |
2335 | Py_LOCAL(PyStringObject *)\r | |
2336 | replace_interleave(PyStringObject *self,\r | |
2337 | const char *to_s, Py_ssize_t to_len,\r | |
2338 | Py_ssize_t maxcount)\r | |
2339 | {\r | |
2340 | char *self_s, *result_s;\r | |
2341 | Py_ssize_t self_len, result_len;\r | |
2342 | Py_ssize_t count, i, product;\r | |
2343 | PyStringObject *result;\r | |
2344 | \r | |
2345 | self_len = PyString_GET_SIZE(self);\r | |
2346 | \r | |
2347 | /* 1 at the end plus 1 after every character */\r | |
2348 | count = self_len+1;\r | |
2349 | if (maxcount < count)\r | |
2350 | count = maxcount;\r | |
2351 | \r | |
2352 | /* Check for overflow */\r | |
2353 | /* result_len = count * to_len + self_len; */\r | |
2354 | product = count * to_len;\r | |
2355 | if (product / to_len != count) {\r | |
2356 | PyErr_SetString(PyExc_OverflowError,\r | |
2357 | "replace string is too long");\r | |
2358 | return NULL;\r | |
2359 | }\r | |
2360 | result_len = product + self_len;\r | |
2361 | if (result_len < 0) {\r | |
2362 | PyErr_SetString(PyExc_OverflowError,\r | |
2363 | "replace string is too long");\r | |
2364 | return NULL;\r | |
2365 | }\r | |
2366 | \r | |
2367 | if (! (result = (PyStringObject *)\r | |
2368 | PyString_FromStringAndSize(NULL, result_len)) )\r | |
2369 | return NULL;\r | |
2370 | \r | |
2371 | self_s = PyString_AS_STRING(self);\r | |
2372 | result_s = PyString_AS_STRING(result);\r | |
2373 | \r | |
2374 | /* TODO: special case single character, which doesn't need memcpy */\r | |
2375 | \r | |
2376 | /* Lay the first one down (guaranteed this will occur) */\r | |
2377 | Py_MEMCPY(result_s, to_s, to_len);\r | |
2378 | result_s += to_len;\r | |
2379 | count -= 1;\r | |
2380 | \r | |
2381 | for (i=0; i<count; i++) {\r | |
2382 | *result_s++ = *self_s++;\r | |
2383 | Py_MEMCPY(result_s, to_s, to_len);\r | |
2384 | result_s += to_len;\r | |
2385 | }\r | |
2386 | \r | |
2387 | /* Copy the rest of the original string */\r | |
2388 | Py_MEMCPY(result_s, self_s, self_len-i);\r | |
2389 | \r | |
2390 | return result;\r | |
2391 | }\r | |
2392 | \r | |
2393 | /* Special case for deleting a single character */\r | |
2394 | /* len(self)>=1, len(from)==1, to="", maxcount>=1 */\r | |
2395 | Py_LOCAL(PyStringObject *)\r | |
2396 | replace_delete_single_character(PyStringObject *self,\r | |
2397 | char from_c, Py_ssize_t maxcount)\r | |
2398 | {\r | |
2399 | char *self_s, *result_s;\r | |
2400 | char *start, *next, *end;\r | |
2401 | Py_ssize_t self_len, result_len;\r | |
2402 | Py_ssize_t count;\r | |
2403 | PyStringObject *result;\r | |
2404 | \r | |
2405 | self_len = PyString_GET_SIZE(self);\r | |
2406 | self_s = PyString_AS_STRING(self);\r | |
2407 | \r | |
2408 | count = countchar(self_s, self_len, from_c, maxcount);\r | |
2409 | if (count == 0) {\r | |
2410 | return return_self(self);\r | |
2411 | }\r | |
2412 | \r | |
2413 | result_len = self_len - count; /* from_len == 1 */\r | |
2414 | assert(result_len>=0);\r | |
2415 | \r | |
2416 | if ( (result = (PyStringObject *)\r | |
2417 | PyString_FromStringAndSize(NULL, result_len)) == NULL)\r | |
2418 | return NULL;\r | |
2419 | result_s = PyString_AS_STRING(result);\r | |
2420 | \r | |
2421 | start = self_s;\r | |
2422 | end = self_s + self_len;\r | |
2423 | while (count-- > 0) {\r | |
2424 | next = findchar(start, end-start, from_c);\r | |
2425 | if (next == NULL)\r | |
2426 | break;\r | |
2427 | Py_MEMCPY(result_s, start, next-start);\r | |
2428 | result_s += (next-start);\r | |
2429 | start = next+1;\r | |
2430 | }\r | |
2431 | Py_MEMCPY(result_s, start, end-start);\r | |
2432 | \r | |
2433 | return result;\r | |
2434 | }\r | |
2435 | \r | |
2436 | /* len(self)>=1, len(from)>=2, to="", maxcount>=1 */\r | |
2437 | \r | |
2438 | Py_LOCAL(PyStringObject *)\r | |
2439 | replace_delete_substring(PyStringObject *self,\r | |
2440 | const char *from_s, Py_ssize_t from_len,\r | |
2441 | Py_ssize_t maxcount) {\r | |
2442 | char *self_s, *result_s;\r | |
2443 | char *start, *next, *end;\r | |
2444 | Py_ssize_t self_len, result_len;\r | |
2445 | Py_ssize_t count, offset;\r | |
2446 | PyStringObject *result;\r | |
2447 | \r | |
2448 | self_len = PyString_GET_SIZE(self);\r | |
2449 | self_s = PyString_AS_STRING(self);\r | |
2450 | \r | |
2451 | count = stringlib_count(self_s, self_len,\r | |
2452 | from_s, from_len,\r | |
2453 | maxcount);\r | |
2454 | \r | |
2455 | if (count == 0) {\r | |
2456 | /* no matches */\r | |
2457 | return return_self(self);\r | |
2458 | }\r | |
2459 | \r | |
2460 | result_len = self_len - (count * from_len);\r | |
2461 | assert (result_len>=0);\r | |
2462 | \r | |
2463 | if ( (result = (PyStringObject *)\r | |
2464 | PyString_FromStringAndSize(NULL, result_len)) == NULL )\r | |
2465 | return NULL;\r | |
2466 | \r | |
2467 | result_s = PyString_AS_STRING(result);\r | |
2468 | \r | |
2469 | start = self_s;\r | |
2470 | end = self_s + self_len;\r | |
2471 | while (count-- > 0) {\r | |
2472 | offset = stringlib_find(start, end-start,\r | |
2473 | from_s, from_len,\r | |
2474 | 0);\r | |
2475 | if (offset == -1)\r | |
2476 | break;\r | |
2477 | next = start + offset;\r | |
2478 | \r | |
2479 | Py_MEMCPY(result_s, start, next-start);\r | |
2480 | \r | |
2481 | result_s += (next-start);\r | |
2482 | start = next+from_len;\r | |
2483 | }\r | |
2484 | Py_MEMCPY(result_s, start, end-start);\r | |
2485 | return result;\r | |
2486 | }\r | |
2487 | \r | |
2488 | /* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */\r | |
2489 | Py_LOCAL(PyStringObject *)\r | |
2490 | replace_single_character_in_place(PyStringObject *self,\r | |
2491 | char from_c, char to_c,\r | |
2492 | Py_ssize_t maxcount)\r | |
2493 | {\r | |
2494 | char *self_s, *result_s, *start, *end, *next;\r | |
2495 | Py_ssize_t self_len;\r | |
2496 | PyStringObject *result;\r | |
2497 | \r | |
2498 | /* The result string will be the same size */\r | |
2499 | self_s = PyString_AS_STRING(self);\r | |
2500 | self_len = PyString_GET_SIZE(self);\r | |
2501 | \r | |
2502 | next = findchar(self_s, self_len, from_c);\r | |
2503 | \r | |
2504 | if (next == NULL) {\r | |
2505 | /* No matches; return the original string */\r | |
2506 | return return_self(self);\r | |
2507 | }\r | |
2508 | \r | |
2509 | /* Need to make a new string */\r | |
2510 | result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);\r | |
2511 | if (result == NULL)\r | |
2512 | return NULL;\r | |
2513 | result_s = PyString_AS_STRING(result);\r | |
2514 | Py_MEMCPY(result_s, self_s, self_len);\r | |
2515 | \r | |
2516 | /* change everything in-place, starting with this one */\r | |
2517 | start = result_s + (next-self_s);\r | |
2518 | *start = to_c;\r | |
2519 | start++;\r | |
2520 | end = result_s + self_len;\r | |
2521 | \r | |
2522 | while (--maxcount > 0) {\r | |
2523 | next = findchar(start, end-start, from_c);\r | |
2524 | if (next == NULL)\r | |
2525 | break;\r | |
2526 | *next = to_c;\r | |
2527 | start = next+1;\r | |
2528 | }\r | |
2529 | \r | |
2530 | return result;\r | |
2531 | }\r | |
2532 | \r | |
2533 | /* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */\r | |
2534 | Py_LOCAL(PyStringObject *)\r | |
2535 | replace_substring_in_place(PyStringObject *self,\r | |
2536 | const char *from_s, Py_ssize_t from_len,\r | |
2537 | const char *to_s, Py_ssize_t to_len,\r | |
2538 | Py_ssize_t maxcount)\r | |
2539 | {\r | |
2540 | char *result_s, *start, *end;\r | |
2541 | char *self_s;\r | |
2542 | Py_ssize_t self_len, offset;\r | |
2543 | PyStringObject *result;\r | |
2544 | \r | |
2545 | /* The result string will be the same size */\r | |
2546 | \r | |
2547 | self_s = PyString_AS_STRING(self);\r | |
2548 | self_len = PyString_GET_SIZE(self);\r | |
2549 | \r | |
2550 | offset = stringlib_find(self_s, self_len,\r | |
2551 | from_s, from_len,\r | |
2552 | 0);\r | |
2553 | if (offset == -1) {\r | |
2554 | /* No matches; return the original string */\r | |
2555 | return return_self(self);\r | |
2556 | }\r | |
2557 | \r | |
2558 | /* Need to make a new string */\r | |
2559 | result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);\r | |
2560 | if (result == NULL)\r | |
2561 | return NULL;\r | |
2562 | result_s = PyString_AS_STRING(result);\r | |
2563 | Py_MEMCPY(result_s, self_s, self_len);\r | |
2564 | \r | |
2565 | /* change everything in-place, starting with this one */\r | |
2566 | start = result_s + offset;\r | |
2567 | Py_MEMCPY(start, to_s, from_len);\r | |
2568 | start += from_len;\r | |
2569 | end = result_s + self_len;\r | |
2570 | \r | |
2571 | while ( --maxcount > 0) {\r | |
2572 | offset = stringlib_find(start, end-start,\r | |
2573 | from_s, from_len,\r | |
2574 | 0);\r | |
2575 | if (offset==-1)\r | |
2576 | break;\r | |
2577 | Py_MEMCPY(start+offset, to_s, from_len);\r | |
2578 | start += offset+from_len;\r | |
2579 | }\r | |
2580 | \r | |
2581 | return result;\r | |
2582 | }\r | |
2583 | \r | |
2584 | /* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */\r | |
2585 | Py_LOCAL(PyStringObject *)\r | |
2586 | replace_single_character(PyStringObject *self,\r | |
2587 | char from_c,\r | |
2588 | const char *to_s, Py_ssize_t to_len,\r | |
2589 | Py_ssize_t maxcount)\r | |
2590 | {\r | |
2591 | char *self_s, *result_s;\r | |
2592 | char *start, *next, *end;\r | |
2593 | Py_ssize_t self_len, result_len;\r | |
2594 | Py_ssize_t count, product;\r | |
2595 | PyStringObject *result;\r | |
2596 | \r | |
2597 | self_s = PyString_AS_STRING(self);\r | |
2598 | self_len = PyString_GET_SIZE(self);\r | |
2599 | \r | |
2600 | count = countchar(self_s, self_len, from_c, maxcount);\r | |
2601 | if (count == 0) {\r | |
2602 | /* no matches, return unchanged */\r | |
2603 | return return_self(self);\r | |
2604 | }\r | |
2605 | \r | |
2606 | /* use the difference between current and new, hence the "-1" */\r | |
2607 | /* result_len = self_len + count * (to_len-1) */\r | |
2608 | product = count * (to_len-1);\r | |
2609 | if (product / (to_len-1) != count) {\r | |
2610 | PyErr_SetString(PyExc_OverflowError, "replace string is too long");\r | |
2611 | return NULL;\r | |
2612 | }\r | |
2613 | result_len = self_len + product;\r | |
2614 | if (result_len < 0) {\r | |
2615 | PyErr_SetString(PyExc_OverflowError, "replace string is too long");\r | |
2616 | return NULL;\r | |
2617 | }\r | |
2618 | \r | |
2619 | if ( (result = (PyStringObject *)\r | |
2620 | PyString_FromStringAndSize(NULL, result_len)) == NULL)\r | |
2621 | return NULL;\r | |
2622 | result_s = PyString_AS_STRING(result);\r | |
2623 | \r | |
2624 | start = self_s;\r | |
2625 | end = self_s + self_len;\r | |
2626 | while (count-- > 0) {\r | |
2627 | next = findchar(start, end-start, from_c);\r | |
2628 | if (next == NULL)\r | |
2629 | break;\r | |
2630 | \r | |
2631 | if (next == start) {\r | |
2632 | /* replace with the 'to' */\r | |
2633 | Py_MEMCPY(result_s, to_s, to_len);\r | |
2634 | result_s += to_len;\r | |
2635 | start += 1;\r | |
2636 | } else {\r | |
2637 | /* copy the unchanged old then the 'to' */\r | |
2638 | Py_MEMCPY(result_s, start, next-start);\r | |
2639 | result_s += (next-start);\r | |
2640 | Py_MEMCPY(result_s, to_s, to_len);\r | |
2641 | result_s += to_len;\r | |
2642 | start = next+1;\r | |
2643 | }\r | |
2644 | }\r | |
2645 | /* Copy the remainder of the remaining string */\r | |
2646 | Py_MEMCPY(result_s, start, end-start);\r | |
2647 | \r | |
2648 | return result;\r | |
2649 | }\r | |
2650 | \r | |
2651 | /* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */\r | |
2652 | Py_LOCAL(PyStringObject *)\r | |
2653 | replace_substring(PyStringObject *self,\r | |
2654 | const char *from_s, Py_ssize_t from_len,\r | |
2655 | const char *to_s, Py_ssize_t to_len,\r | |
2656 | Py_ssize_t maxcount) {\r | |
2657 | char *self_s, *result_s;\r | |
2658 | char *start, *next, *end;\r | |
2659 | Py_ssize_t self_len, result_len;\r | |
2660 | Py_ssize_t count, offset, product;\r | |
2661 | PyStringObject *result;\r | |
2662 | \r | |
2663 | self_s = PyString_AS_STRING(self);\r | |
2664 | self_len = PyString_GET_SIZE(self);\r | |
2665 | \r | |
2666 | count = stringlib_count(self_s, self_len,\r | |
2667 | from_s, from_len,\r | |
2668 | maxcount);\r | |
2669 | \r | |
2670 | if (count == 0) {\r | |
2671 | /* no matches, return unchanged */\r | |
2672 | return return_self(self);\r | |
2673 | }\r | |
2674 | \r | |
2675 | /* Check for overflow */\r | |
2676 | /* result_len = self_len + count * (to_len-from_len) */\r | |
2677 | product = count * (to_len-from_len);\r | |
2678 | if (product / (to_len-from_len) != count) {\r | |
2679 | PyErr_SetString(PyExc_OverflowError, "replace string is too long");\r | |
2680 | return NULL;\r | |
2681 | }\r | |
2682 | result_len = self_len + product;\r | |
2683 | if (result_len < 0) {\r | |
2684 | PyErr_SetString(PyExc_OverflowError, "replace string is too long");\r | |
2685 | return NULL;\r | |
2686 | }\r | |
2687 | \r | |
2688 | if ( (result = (PyStringObject *)\r | |
2689 | PyString_FromStringAndSize(NULL, result_len)) == NULL)\r | |
2690 | return NULL;\r | |
2691 | result_s = PyString_AS_STRING(result);\r | |
2692 | \r | |
2693 | start = self_s;\r | |
2694 | end = self_s + self_len;\r | |
2695 | while (count-- > 0) {\r | |
2696 | offset = stringlib_find(start, end-start,\r | |
2697 | from_s, from_len,\r | |
2698 | 0);\r | |
2699 | if (offset == -1)\r | |
2700 | break;\r | |
2701 | next = start+offset;\r | |
2702 | if (next == start) {\r | |
2703 | /* replace with the 'to' */\r | |
2704 | Py_MEMCPY(result_s, to_s, to_len);\r | |
2705 | result_s += to_len;\r | |
2706 | start += from_len;\r | |
2707 | } else {\r | |
2708 | /* copy the unchanged old then the 'to' */\r | |
2709 | Py_MEMCPY(result_s, start, next-start);\r | |
2710 | result_s += (next-start);\r | |
2711 | Py_MEMCPY(result_s, to_s, to_len);\r | |
2712 | result_s += to_len;\r | |
2713 | start = next+from_len;\r | |
2714 | }\r | |
2715 | }\r | |
2716 | /* Copy the remainder of the remaining string */\r | |
2717 | Py_MEMCPY(result_s, start, end-start);\r | |
2718 | \r | |
2719 | return result;\r | |
2720 | }\r | |
2721 | \r | |
2722 | \r | |
2723 | Py_LOCAL(PyStringObject *)\r | |
2724 | replace(PyStringObject *self,\r | |
2725 | const char *from_s, Py_ssize_t from_len,\r | |
2726 | const char *to_s, Py_ssize_t to_len,\r | |
2727 | Py_ssize_t maxcount)\r | |
2728 | {\r | |
2729 | if (maxcount < 0) {\r | |
2730 | maxcount = PY_SSIZE_T_MAX;\r | |
2731 | } else if (maxcount == 0 || PyString_GET_SIZE(self) == 0) {\r | |
2732 | /* nothing to do; return the original string */\r | |
2733 | return return_self(self);\r | |
2734 | }\r | |
2735 | \r | |
2736 | if (maxcount == 0 ||\r | |
2737 | (from_len == 0 && to_len == 0)) {\r | |
2738 | /* nothing to do; return the original string */\r | |
2739 | return return_self(self);\r | |
2740 | }\r | |
2741 | \r | |
2742 | /* Handle zero-length special cases */\r | |
2743 | \r | |
2744 | if (from_len == 0) {\r | |
2745 | /* insert the 'to' string everywhere. */\r | |
2746 | /* >>> "Python".replace("", ".") */\r | |
2747 | /* '.P.y.t.h.o.n.' */\r | |
2748 | return replace_interleave(self, to_s, to_len, maxcount);\r | |
2749 | }\r | |
2750 | \r | |
2751 | /* Except for "".replace("", "A") == "A" there is no way beyond this */\r | |
2752 | /* point for an empty self string to generate a non-empty string */\r | |
2753 | /* Special case so the remaining code always gets a non-empty string */\r | |
2754 | if (PyString_GET_SIZE(self) == 0) {\r | |
2755 | return return_self(self);\r | |
2756 | }\r | |
2757 | \r | |
2758 | if (to_len == 0) {\r | |
2759 | /* delete all occurances of 'from' string */\r | |
2760 | if (from_len == 1) {\r | |
2761 | return replace_delete_single_character(\r | |
2762 | self, from_s[0], maxcount);\r | |
2763 | } else {\r | |
2764 | return replace_delete_substring(self, from_s, from_len, maxcount);\r | |
2765 | }\r | |
2766 | }\r | |
2767 | \r | |
2768 | /* Handle special case where both strings have the same length */\r | |
2769 | \r | |
2770 | if (from_len == to_len) {\r | |
2771 | if (from_len == 1) {\r | |
2772 | return replace_single_character_in_place(\r | |
2773 | self,\r | |
2774 | from_s[0],\r | |
2775 | to_s[0],\r | |
2776 | maxcount);\r | |
2777 | } else {\r | |
2778 | return replace_substring_in_place(\r | |
2779 | self, from_s, from_len, to_s, to_len, maxcount);\r | |
2780 | }\r | |
2781 | }\r | |
2782 | \r | |
2783 | /* Otherwise use the more generic algorithms */\r | |
2784 | if (from_len == 1) {\r | |
2785 | return replace_single_character(self, from_s[0],\r | |
2786 | to_s, to_len, maxcount);\r | |
2787 | } else {\r | |
2788 | /* len('from')>=2, len('to')>=1 */\r | |
2789 | return replace_substring(self, from_s, from_len, to_s, to_len, maxcount);\r | |
2790 | }\r | |
2791 | }\r | |
2792 | \r | |
2793 | PyDoc_STRVAR(replace__doc__,\r | |
2794 | "S.replace(old, new[, count]) -> string\n\\r | |
2795 | \n\\r | |
2796 | Return a copy of string S with all occurrences of substring\n\\r | |
2797 | old replaced by new. If the optional argument count is\n\\r | |
2798 | given, only the first count occurrences are replaced.");\r | |
2799 | \r | |
2800 | static PyObject *\r | |
2801 | string_replace(PyStringObject *self, PyObject *args)\r | |
2802 | {\r | |
2803 | Py_ssize_t count = -1;\r | |
2804 | PyObject *from, *to;\r | |
2805 | const char *from_s, *to_s;\r | |
2806 | Py_ssize_t from_len, to_len;\r | |
2807 | \r | |
2808 | if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))\r | |
2809 | return NULL;\r | |
2810 | \r | |
2811 | if (PyString_Check(from)) {\r | |
2812 | from_s = PyString_AS_STRING(from);\r | |
2813 | from_len = PyString_GET_SIZE(from);\r | |
2814 | }\r | |
2815 | #ifdef Py_USING_UNICODE\r | |
2816 | if (PyUnicode_Check(from))\r | |
2817 | return PyUnicode_Replace((PyObject *)self,\r | |
2818 | from, to, count);\r | |
2819 | #endif\r | |
2820 | else if (PyObject_AsCharBuffer(from, &from_s, &from_len))\r | |
2821 | return NULL;\r | |
2822 | \r | |
2823 | if (PyString_Check(to)) {\r | |
2824 | to_s = PyString_AS_STRING(to);\r | |
2825 | to_len = PyString_GET_SIZE(to);\r | |
2826 | }\r | |
2827 | #ifdef Py_USING_UNICODE\r | |
2828 | else if (PyUnicode_Check(to))\r | |
2829 | return PyUnicode_Replace((PyObject *)self,\r | |
2830 | from, to, count);\r | |
2831 | #endif\r | |
2832 | else if (PyObject_AsCharBuffer(to, &to_s, &to_len))\r | |
2833 | return NULL;\r | |
2834 | \r | |
2835 | return (PyObject *)replace((PyStringObject *) self,\r | |
2836 | from_s, from_len,\r | |
2837 | to_s, to_len, count);\r | |
2838 | }\r | |
2839 | \r | |
2840 | /** End DALKE **/\r | |
2841 | \r | |
2842 | /* Matches the end (direction >= 0) or start (direction < 0) of self\r | |
2843 | * against substr, using the start and end arguments. Returns\r | |
2844 | * -1 on error, 0 if not found and 1 if found.\r | |
2845 | */\r | |
2846 | Py_LOCAL(int)\r | |
2847 | _string_tailmatch(PyStringObject *self, PyObject *substr, Py_ssize_t start,\r | |
2848 | Py_ssize_t end, int direction)\r | |
2849 | {\r | |
2850 | Py_ssize_t len = PyString_GET_SIZE(self);\r | |
2851 | Py_ssize_t slen;\r | |
2852 | const char* sub;\r | |
2853 | const char* str;\r | |
2854 | \r | |
2855 | if (PyString_Check(substr)) {\r | |
2856 | sub = PyString_AS_STRING(substr);\r | |
2857 | slen = PyString_GET_SIZE(substr);\r | |
2858 | }\r | |
2859 | #ifdef Py_USING_UNICODE\r | |
2860 | else if (PyUnicode_Check(substr))\r | |
2861 | return PyUnicode_Tailmatch((PyObject *)self,\r | |
2862 | substr, start, end, direction);\r | |
2863 | #endif\r | |
2864 | else if (PyObject_AsCharBuffer(substr, &sub, &slen))\r | |
2865 | return -1;\r | |
2866 | str = PyString_AS_STRING(self);\r | |
2867 | \r | |
2868 | ADJUST_INDICES(start, end, len);\r | |
2869 | \r | |
2870 | if (direction < 0) {\r | |
2871 | /* startswith */\r | |
2872 | if (start+slen > len)\r | |
2873 | return 0;\r | |
2874 | } else {\r | |
2875 | /* endswith */\r | |
2876 | if (end-start < slen || start > len)\r | |
2877 | return 0;\r | |
2878 | \r | |
2879 | if (end-slen > start)\r | |
2880 | start = end - slen;\r | |
2881 | }\r | |
2882 | if (end-start >= slen)\r | |
2883 | return ! memcmp(str+start, sub, slen);\r | |
2884 | return 0;\r | |
2885 | }\r | |
2886 | \r | |
2887 | \r | |
2888 | PyDoc_STRVAR(startswith__doc__,\r | |
2889 | "S.startswith(prefix[, start[, end]]) -> bool\n\\r | |
2890 | \n\\r | |
2891 | Return True if S starts with the specified prefix, False otherwise.\n\\r | |
2892 | With optional start, test S beginning at that position.\n\\r | |
2893 | With optional end, stop comparing S at that position.\n\\r | |
2894 | prefix can also be a tuple of strings to try.");\r | |
2895 | \r | |
2896 | static PyObject *\r | |
2897 | string_startswith(PyStringObject *self, PyObject *args)\r | |
2898 | {\r | |
2899 | Py_ssize_t start = 0;\r | |
2900 | Py_ssize_t end = PY_SSIZE_T_MAX;\r | |
2901 | PyObject *subobj;\r | |
2902 | int result;\r | |
2903 | \r | |
2904 | if (!stringlib_parse_args_finds("startswith", args, &subobj, &start, &end))\r | |
2905 | return NULL;\r | |
2906 | if (PyTuple_Check(subobj)) {\r | |
2907 | Py_ssize_t i;\r | |
2908 | for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {\r | |
2909 | result = _string_tailmatch(self,\r | |
2910 | PyTuple_GET_ITEM(subobj, i),\r | |
2911 | start, end, -1);\r | |
2912 | if (result == -1)\r | |
2913 | return NULL;\r | |
2914 | else if (result) {\r | |
2915 | Py_RETURN_TRUE;\r | |
2916 | }\r | |
2917 | }\r | |
2918 | Py_RETURN_FALSE;\r | |
2919 | }\r | |
2920 | result = _string_tailmatch(self, subobj, start, end, -1);\r | |
2921 | if (result == -1) {\r | |
2922 | if (PyErr_ExceptionMatches(PyExc_TypeError))\r | |
2923 | PyErr_Format(PyExc_TypeError, "startswith first arg must be str, "\r | |
2924 | "unicode, or tuple, not %s", Py_TYPE(subobj)->tp_name);\r | |
2925 | return NULL;\r | |
2926 | }\r | |
2927 | else\r | |
2928 | return PyBool_FromLong(result);\r | |
2929 | }\r | |
2930 | \r | |
2931 | \r | |
2932 | PyDoc_STRVAR(endswith__doc__,\r | |
2933 | "S.endswith(suffix[, start[, end]]) -> bool\n\\r | |
2934 | \n\\r | |
2935 | Return True if S ends with the specified suffix, False otherwise.\n\\r | |
2936 | With optional start, test S beginning at that position.\n\\r | |
2937 | With optional end, stop comparing S at that position.\n\\r | |
2938 | suffix can also be a tuple of strings to try.");\r | |
2939 | \r | |
2940 | static PyObject *\r | |
2941 | string_endswith(PyStringObject *self, PyObject *args)\r | |
2942 | {\r | |
2943 | Py_ssize_t start = 0;\r | |
2944 | Py_ssize_t end = PY_SSIZE_T_MAX;\r | |
2945 | PyObject *subobj;\r | |
2946 | int result;\r | |
2947 | \r | |
2948 | if (!stringlib_parse_args_finds("endswith", args, &subobj, &start, &end))\r | |
2949 | return NULL;\r | |
2950 | if (PyTuple_Check(subobj)) {\r | |
2951 | Py_ssize_t i;\r | |
2952 | for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {\r | |
2953 | result = _string_tailmatch(self,\r | |
2954 | PyTuple_GET_ITEM(subobj, i),\r | |
2955 | start, end, +1);\r | |
2956 | if (result == -1)\r | |
2957 | return NULL;\r | |
2958 | else if (result) {\r | |
2959 | Py_RETURN_TRUE;\r | |
2960 | }\r | |
2961 | }\r | |
2962 | Py_RETURN_FALSE;\r | |
2963 | }\r | |
2964 | result = _string_tailmatch(self, subobj, start, end, +1);\r | |
2965 | if (result == -1) {\r | |
2966 | if (PyErr_ExceptionMatches(PyExc_TypeError))\r | |
2967 | PyErr_Format(PyExc_TypeError, "endswith first arg must be str, "\r | |
2968 | "unicode, or tuple, not %s", Py_TYPE(subobj)->tp_name);\r | |
2969 | return NULL;\r | |
2970 | }\r | |
2971 | else\r | |
2972 | return PyBool_FromLong(result);\r | |
2973 | }\r | |
2974 | \r | |
2975 | \r | |
2976 | PyDoc_STRVAR(encode__doc__,\r | |
2977 | "S.encode([encoding[,errors]]) -> object\n\\r | |
2978 | \n\\r | |
2979 | Encodes S using the codec registered for encoding. encoding defaults\n\\r | |
2980 | to the default encoding. errors may be given to set a different error\n\\r | |
2981 | handling scheme. Default is 'strict' meaning that encoding errors raise\n\\r | |
2982 | a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\\r | |
2983 | 'xmlcharrefreplace' as well as any other name registered with\n\\r | |
2984 | codecs.register_error that is able to handle UnicodeEncodeErrors.");\r | |
2985 | \r | |
2986 | static PyObject *\r | |
2987 | string_encode(PyStringObject *self, PyObject *args, PyObject *kwargs)\r | |
2988 | {\r | |
2989 | static char *kwlist[] = {"encoding", "errors", 0};\r | |
2990 | char *encoding = NULL;\r | |
2991 | char *errors = NULL;\r | |
2992 | PyObject *v;\r | |
2993 | \r | |
2994 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ss:encode",\r | |
2995 | kwlist, &encoding, &errors))\r | |
2996 | return NULL;\r | |
2997 | v = PyString_AsEncodedObject((PyObject *)self, encoding, errors);\r | |
2998 | if (v == NULL)\r | |
2999 | goto onError;\r | |
3000 | if (!PyString_Check(v) && !PyUnicode_Check(v)) {\r | |
3001 | PyErr_Format(PyExc_TypeError,\r | |
3002 | "encoder did not return a string/unicode object "\r | |
3003 | "(type=%.400s)",\r | |
3004 | Py_TYPE(v)->tp_name);\r | |
3005 | Py_DECREF(v);\r | |
3006 | return NULL;\r | |
3007 | }\r | |
3008 | return v;\r | |
3009 | \r | |
3010 | onError:\r | |
3011 | return NULL;\r | |
3012 | }\r | |
3013 | \r | |
3014 | \r | |
3015 | PyDoc_STRVAR(decode__doc__,\r | |
3016 | "S.decode([encoding[,errors]]) -> object\n\\r | |
3017 | \n\\r | |
3018 | Decodes S using the codec registered for encoding. encoding defaults\n\\r | |
3019 | to the default encoding. errors may be given to set a different error\n\\r | |
3020 | handling scheme. Default is 'strict' meaning that encoding errors raise\n\\r | |
3021 | a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\\r | |
3022 | as well as any other name registered with codecs.register_error that is\n\\r | |
3023 | able to handle UnicodeDecodeErrors.");\r | |
3024 | \r | |
3025 | static PyObject *\r | |
3026 | string_decode(PyStringObject *self, PyObject *args, PyObject *kwargs)\r | |
3027 | {\r | |
3028 | static char *kwlist[] = {"encoding", "errors", 0};\r | |
3029 | char *encoding = NULL;\r | |
3030 | char *errors = NULL;\r | |
3031 | PyObject *v;\r | |
3032 | \r | |
3033 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ss:decode",\r | |
3034 | kwlist, &encoding, &errors))\r | |
3035 | return NULL;\r | |
3036 | v = PyString_AsDecodedObject((PyObject *)self, encoding, errors);\r | |
3037 | if (v == NULL)\r | |
3038 | goto onError;\r | |
3039 | if (!PyString_Check(v) && !PyUnicode_Check(v)) {\r | |
3040 | PyErr_Format(PyExc_TypeError,\r | |
3041 | "decoder did not return a string/unicode object "\r | |
3042 | "(type=%.400s)",\r | |
3043 | Py_TYPE(v)->tp_name);\r | |
3044 | Py_DECREF(v);\r | |
3045 | return NULL;\r | |
3046 | }\r | |
3047 | return v;\r | |
3048 | \r | |
3049 | onError:\r | |
3050 | return NULL;\r | |
3051 | }\r | |
3052 | \r | |
3053 | \r | |
3054 | PyDoc_STRVAR(expandtabs__doc__,\r | |
3055 | "S.expandtabs([tabsize]) -> string\n\\r | |
3056 | \n\\r | |
3057 | Return a copy of S where all tab characters are expanded using spaces.\n\\r | |
3058 | If tabsize is not given, a tab size of 8 characters is assumed.");\r | |
3059 | \r | |
3060 | static PyObject*\r | |
3061 | string_expandtabs(PyStringObject *self, PyObject *args)\r | |
3062 | {\r | |
3063 | const char *e, *p, *qe;\r | |
3064 | char *q;\r | |
3065 | Py_ssize_t i, j, incr;\r | |
3066 | PyObject *u;\r | |
3067 | int tabsize = 8;\r | |
3068 | \r | |
3069 | if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))\r | |
3070 | return NULL;\r | |
3071 | \r | |
3072 | /* First pass: determine size of output string */\r | |
3073 | i = 0; /* chars up to and including most recent \n or \r */\r | |
3074 | j = 0; /* chars since most recent \n or \r (use in tab calculations) */\r | |
3075 | e = PyString_AS_STRING(self) + PyString_GET_SIZE(self); /* end of input */\r | |
3076 | for (p = PyString_AS_STRING(self); p < e; p++)\r | |
3077 | if (*p == '\t') {\r | |
3078 | if (tabsize > 0) {\r | |
3079 | incr = tabsize - (j % tabsize);\r | |
3080 | if (j > PY_SSIZE_T_MAX - incr)\r | |
3081 | goto overflow1;\r | |
3082 | j += incr;\r | |
3083 | }\r | |
3084 | }\r | |
3085 | else {\r | |
3086 | if (j > PY_SSIZE_T_MAX - 1)\r | |
3087 | goto overflow1;\r | |
3088 | j++;\r | |
3089 | if (*p == '\n' || *p == '\r') {\r | |
3090 | if (i > PY_SSIZE_T_MAX - j)\r | |
3091 | goto overflow1;\r | |
3092 | i += j;\r | |
3093 | j = 0;\r | |
3094 | }\r | |
3095 | }\r | |
3096 | \r | |
3097 | if (i > PY_SSIZE_T_MAX - j)\r | |
3098 | goto overflow1;\r | |
3099 | \r | |
3100 | /* Second pass: create output string and fill it */\r | |
3101 | u = PyString_FromStringAndSize(NULL, i + j);\r | |
3102 | if (!u)\r | |
3103 | return NULL;\r | |
3104 | \r | |
3105 | j = 0; /* same as in first pass */\r | |
3106 | q = PyString_AS_STRING(u); /* next output char */\r | |
3107 | qe = PyString_AS_STRING(u) + PyString_GET_SIZE(u); /* end of output */\r | |
3108 | \r | |
3109 | for (p = PyString_AS_STRING(self); p < e; p++)\r | |
3110 | if (*p == '\t') {\r | |
3111 | if (tabsize > 0) {\r | |
3112 | i = tabsize - (j % tabsize);\r | |
3113 | j += i;\r | |
3114 | while (i--) {\r | |
3115 | if (q >= qe)\r | |
3116 | goto overflow2;\r | |
3117 | *q++ = ' ';\r | |
3118 | }\r | |
3119 | }\r | |
3120 | }\r | |
3121 | else {\r | |
3122 | if (q >= qe)\r | |
3123 | goto overflow2;\r | |
3124 | *q++ = *p;\r | |
3125 | j++;\r | |
3126 | if (*p == '\n' || *p == '\r')\r | |
3127 | j = 0;\r | |
3128 | }\r | |
3129 | \r | |
3130 | return u;\r | |
3131 | \r | |
3132 | overflow2:\r | |
3133 | Py_DECREF(u);\r | |
3134 | overflow1:\r | |
3135 | PyErr_SetString(PyExc_OverflowError, "new string is too long");\r | |
3136 | return NULL;\r | |
3137 | }\r | |
3138 | \r | |
3139 | Py_LOCAL_INLINE(PyObject *)\r | |
3140 | pad(PyStringObject *self, Py_ssize_t left, Py_ssize_t right, char fill)\r | |
3141 | {\r | |
3142 | PyObject *u;\r | |
3143 | \r | |
3144 | if (left < 0)\r | |
3145 | left = 0;\r | |
3146 | if (right < 0)\r | |
3147 | right = 0;\r | |
3148 | \r | |
3149 | if (left == 0 && right == 0 && PyString_CheckExact(self)) {\r | |
3150 | Py_INCREF(self);\r | |
3151 | return (PyObject *)self;\r | |
3152 | }\r | |
3153 | \r | |
3154 | u = PyString_FromStringAndSize(NULL,\r | |
3155 | left + PyString_GET_SIZE(self) + right);\r | |
3156 | if (u) {\r | |
3157 | if (left)\r | |
3158 | memset(PyString_AS_STRING(u), fill, left);\r | |
3159 | Py_MEMCPY(PyString_AS_STRING(u) + left,\r | |
3160 | PyString_AS_STRING(self),\r | |
3161 | PyString_GET_SIZE(self));\r | |
3162 | if (right)\r | |
3163 | memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),\r | |
3164 | fill, right);\r | |
3165 | }\r | |
3166 | \r | |
3167 | return u;\r | |
3168 | }\r | |
3169 | \r | |
3170 | PyDoc_STRVAR(ljust__doc__,\r | |
3171 | "S.ljust(width[, fillchar]) -> string\n"\r | |
3172 | "\n"\r | |
3173 | "Return S left-justified in a string of length width. Padding is\n"\r | |
3174 | "done using the specified fill character (default is a space).");\r | |
3175 | \r | |
3176 | static PyObject *\r | |
3177 | string_ljust(PyStringObject *self, PyObject *args)\r | |
3178 | {\r | |
3179 | Py_ssize_t width;\r | |
3180 | char fillchar = ' ';\r | |
3181 | \r | |
3182 | if (!PyArg_ParseTuple(args, "n|c:ljust", &width, &fillchar))\r | |
3183 | return NULL;\r | |
3184 | \r | |
3185 | if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {\r | |
3186 | Py_INCREF(self);\r | |
3187 | return (PyObject*) self;\r | |
3188 | }\r | |
3189 | \r | |
3190 | return pad(self, 0, width - PyString_GET_SIZE(self), fillchar);\r | |
3191 | }\r | |
3192 | \r | |
3193 | \r | |
3194 | PyDoc_STRVAR(rjust__doc__,\r | |
3195 | "S.rjust(width[, fillchar]) -> string\n"\r | |
3196 | "\n"\r | |
3197 | "Return S right-justified in a string of length width. Padding is\n"\r | |
3198 | "done using the specified fill character (default is a space)");\r | |
3199 | \r | |
3200 | static PyObject *\r | |
3201 | string_rjust(PyStringObject *self, PyObject *args)\r | |
3202 | {\r | |
3203 | Py_ssize_t width;\r | |
3204 | char fillchar = ' ';\r | |
3205 | \r | |
3206 | if (!PyArg_ParseTuple(args, "n|c:rjust", &width, &fillchar))\r | |
3207 | return NULL;\r | |
3208 | \r | |
3209 | if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {\r | |
3210 | Py_INCREF(self);\r | |
3211 | return (PyObject*) self;\r | |
3212 | }\r | |
3213 | \r | |
3214 | return pad(self, width - PyString_GET_SIZE(self), 0, fillchar);\r | |
3215 | }\r | |
3216 | \r | |
3217 | \r | |
3218 | PyDoc_STRVAR(center__doc__,\r | |
3219 | "S.center(width[, fillchar]) -> string\n"\r | |
3220 | "\n"\r | |
3221 | "Return S centered in a string of length width. Padding is\n"\r | |
3222 | "done using the specified fill character (default is a space)");\r | |
3223 | \r | |
3224 | static PyObject *\r | |
3225 | string_center(PyStringObject *self, PyObject *args)\r | |
3226 | {\r | |
3227 | Py_ssize_t marg, left;\r | |
3228 | Py_ssize_t width;\r | |
3229 | char fillchar = ' ';\r | |
3230 | \r | |
3231 | if (!PyArg_ParseTuple(args, "n|c:center", &width, &fillchar))\r | |
3232 | return NULL;\r | |
3233 | \r | |
3234 | if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {\r | |
3235 | Py_INCREF(self);\r | |
3236 | return (PyObject*) self;\r | |
3237 | }\r | |
3238 | \r | |
3239 | marg = width - PyString_GET_SIZE(self);\r | |
3240 | left = marg / 2 + (marg & width & 1);\r | |
3241 | \r | |
3242 | return pad(self, left, marg - left, fillchar);\r | |
3243 | }\r | |
3244 | \r | |
3245 | PyDoc_STRVAR(zfill__doc__,\r | |
3246 | "S.zfill(width) -> string\n"\r | |
3247 | "\n"\r | |
3248 | "Pad a numeric string S with zeros on the left, to fill a field\n"\r | |
3249 | "of the specified width. The string S is never truncated.");\r | |
3250 | \r | |
3251 | static PyObject *\r | |
3252 | string_zfill(PyStringObject *self, PyObject *args)\r | |
3253 | {\r | |
3254 | Py_ssize_t fill;\r | |
3255 | PyObject *s;\r | |
3256 | char *p;\r | |
3257 | Py_ssize_t width;\r | |
3258 | \r | |
3259 | if (!PyArg_ParseTuple(args, "n:zfill", &width))\r | |
3260 | return NULL;\r | |
3261 | \r | |
3262 | if (PyString_GET_SIZE(self) >= width) {\r | |
3263 | if (PyString_CheckExact(self)) {\r | |
3264 | Py_INCREF(self);\r | |
3265 | return (PyObject*) self;\r | |
3266 | }\r | |
3267 | else\r | |
3268 | return PyString_FromStringAndSize(\r | |
3269 | PyString_AS_STRING(self),\r | |
3270 | PyString_GET_SIZE(self)\r | |
3271 | );\r | |
3272 | }\r | |
3273 | \r | |
3274 | fill = width - PyString_GET_SIZE(self);\r | |
3275 | \r | |
3276 | s = pad(self, fill, 0, '0');\r | |
3277 | \r | |
3278 | if (s == NULL)\r | |
3279 | return NULL;\r | |
3280 | \r | |
3281 | p = PyString_AS_STRING(s);\r | |
3282 | if (p[fill] == '+' || p[fill] == '-') {\r | |
3283 | /* move sign to beginning of string */\r | |
3284 | p[0] = p[fill];\r | |
3285 | p[fill] = '0';\r | |
3286 | }\r | |
3287 | \r | |
3288 | return (PyObject*) s;\r | |
3289 | }\r | |
3290 | \r | |
3291 | PyDoc_STRVAR(isspace__doc__,\r | |
3292 | "S.isspace() -> bool\n\\r | |
3293 | \n\\r | |
3294 | Return True if all characters in S are whitespace\n\\r | |
3295 | and there is at least one character in S, False otherwise.");\r | |
3296 | \r | |
3297 | static PyObject*\r | |
3298 | string_isspace(PyStringObject *self)\r | |
3299 | {\r | |
3300 | register const unsigned char *p\r | |
3301 | = (unsigned char *) PyString_AS_STRING(self);\r | |
3302 | register const unsigned char *e;\r | |
3303 | \r | |
3304 | /* Shortcut for single character strings */\r | |
3305 | if (PyString_GET_SIZE(self) == 1 &&\r | |
3306 | isspace(*p))\r | |
3307 | return PyBool_FromLong(1);\r | |
3308 | \r | |
3309 | /* Special case for empty strings */\r | |
3310 | if (PyString_GET_SIZE(self) == 0)\r | |
3311 | return PyBool_FromLong(0);\r | |
3312 | \r | |
3313 | e = p + PyString_GET_SIZE(self);\r | |
3314 | for (; p < e; p++) {\r | |
3315 | if (!isspace(*p))\r | |
3316 | return PyBool_FromLong(0);\r | |
3317 | }\r | |
3318 | return PyBool_FromLong(1);\r | |
3319 | }\r | |
3320 | \r | |
3321 | \r | |
3322 | PyDoc_STRVAR(isalpha__doc__,\r | |
3323 | "S.isalpha() -> bool\n\\r | |
3324 | \n\\r | |
3325 | Return True if all characters in S are alphabetic\n\\r | |
3326 | and there is at least one character in S, False otherwise.");\r | |
3327 | \r | |
3328 | static PyObject*\r | |
3329 | string_isalpha(PyStringObject *self)\r | |
3330 | {\r | |
3331 | register const unsigned char *p\r | |
3332 | = (unsigned char *) PyString_AS_STRING(self);\r | |
3333 | register const unsigned char *e;\r | |
3334 | \r | |
3335 | /* Shortcut for single character strings */\r | |
3336 | if (PyString_GET_SIZE(self) == 1 &&\r | |
3337 | isalpha(*p))\r | |
3338 | return PyBool_FromLong(1);\r | |
3339 | \r | |
3340 | /* Special case for empty strings */\r | |
3341 | if (PyString_GET_SIZE(self) == 0)\r | |
3342 | return PyBool_FromLong(0);\r | |
3343 | \r | |
3344 | e = p + PyString_GET_SIZE(self);\r | |
3345 | for (; p < e; p++) {\r | |
3346 | if (!isalpha(*p))\r | |
3347 | return PyBool_FromLong(0);\r | |
3348 | }\r | |
3349 | return PyBool_FromLong(1);\r | |
3350 | }\r | |
3351 | \r | |
3352 | \r | |
3353 | PyDoc_STRVAR(isalnum__doc__,\r | |
3354 | "S.isalnum() -> bool\n\\r | |
3355 | \n\\r | |
3356 | Return True if all characters in S are alphanumeric\n\\r | |
3357 | and there is at least one character in S, False otherwise.");\r | |
3358 | \r | |
3359 | static PyObject*\r | |
3360 | string_isalnum(PyStringObject *self)\r | |
3361 | {\r | |
3362 | register const unsigned char *p\r | |
3363 | = (unsigned char *) PyString_AS_STRING(self);\r | |
3364 | register const unsigned char *e;\r | |
3365 | \r | |
3366 | /* Shortcut for single character strings */\r | |
3367 | if (PyString_GET_SIZE(self) == 1 &&\r | |
3368 | isalnum(*p))\r | |
3369 | return PyBool_FromLong(1);\r | |
3370 | \r | |
3371 | /* Special case for empty strings */\r | |
3372 | if (PyString_GET_SIZE(self) == 0)\r | |
3373 | return PyBool_FromLong(0);\r | |
3374 | \r | |
3375 | e = p + PyString_GET_SIZE(self);\r | |
3376 | for (; p < e; p++) {\r | |
3377 | if (!isalnum(*p))\r | |
3378 | return PyBool_FromLong(0);\r | |
3379 | }\r | |
3380 | return PyBool_FromLong(1);\r | |
3381 | }\r | |
3382 | \r | |
3383 | \r | |
3384 | PyDoc_STRVAR(isdigit__doc__,\r | |
3385 | "S.isdigit() -> bool\n\\r | |
3386 | \n\\r | |
3387 | Return True if all characters in S are digits\n\\r | |
3388 | and there is at least one character in S, False otherwise.");\r | |
3389 | \r | |
3390 | static PyObject*\r | |
3391 | string_isdigit(PyStringObject *self)\r | |
3392 | {\r | |
3393 | register const unsigned char *p\r | |
3394 | = (unsigned char *) PyString_AS_STRING(self);\r | |
3395 | register const unsigned char *e;\r | |
3396 | \r | |
3397 | /* Shortcut for single character strings */\r | |
3398 | if (PyString_GET_SIZE(self) == 1 &&\r | |
3399 | isdigit(*p))\r | |
3400 | return PyBool_FromLong(1);\r | |
3401 | \r | |
3402 | /* Special case for empty strings */\r | |
3403 | if (PyString_GET_SIZE(self) == 0)\r | |
3404 | return PyBool_FromLong(0);\r | |
3405 | \r | |
3406 | e = p + PyString_GET_SIZE(self);\r | |
3407 | for (; p < e; p++) {\r | |
3408 | if (!isdigit(*p))\r | |
3409 | return PyBool_FromLong(0);\r | |
3410 | }\r | |
3411 | return PyBool_FromLong(1);\r | |
3412 | }\r | |
3413 | \r | |
3414 | \r | |
3415 | PyDoc_STRVAR(islower__doc__,\r | |
3416 | "S.islower() -> bool\n\\r | |
3417 | \n\\r | |
3418 | Return True if all cased characters in S are lowercase and there is\n\\r | |
3419 | at least one cased character in S, False otherwise.");\r | |
3420 | \r | |
3421 | static PyObject*\r | |
3422 | string_islower(PyStringObject *self)\r | |
3423 | {\r | |
3424 | register const unsigned char *p\r | |
3425 | = (unsigned char *) PyString_AS_STRING(self);\r | |
3426 | register const unsigned char *e;\r | |
3427 | int cased;\r | |
3428 | \r | |
3429 | /* Shortcut for single character strings */\r | |
3430 | if (PyString_GET_SIZE(self) == 1)\r | |
3431 | return PyBool_FromLong(islower(*p) != 0);\r | |
3432 | \r | |
3433 | /* Special case for empty strings */\r | |
3434 | if (PyString_GET_SIZE(self) == 0)\r | |
3435 | return PyBool_FromLong(0);\r | |
3436 | \r | |
3437 | e = p + PyString_GET_SIZE(self);\r | |
3438 | cased = 0;\r | |
3439 | for (; p < e; p++) {\r | |
3440 | if (isupper(*p))\r | |
3441 | return PyBool_FromLong(0);\r | |
3442 | else if (!cased && islower(*p))\r | |
3443 | cased = 1;\r | |
3444 | }\r | |
3445 | return PyBool_FromLong(cased);\r | |
3446 | }\r | |
3447 | \r | |
3448 | \r | |
3449 | PyDoc_STRVAR(isupper__doc__,\r | |
3450 | "S.isupper() -> bool\n\\r | |
3451 | \n\\r | |
3452 | Return True if all cased characters in S are uppercase and there is\n\\r | |
3453 | at least one cased character in S, False otherwise.");\r | |
3454 | \r | |
3455 | static PyObject*\r | |
3456 | string_isupper(PyStringObject *self)\r | |
3457 | {\r | |
3458 | register const unsigned char *p\r | |
3459 | = (unsigned char *) PyString_AS_STRING(self);\r | |
3460 | register const unsigned char *e;\r | |
3461 | int cased;\r | |
3462 | \r | |
3463 | /* Shortcut for single character strings */\r | |
3464 | if (PyString_GET_SIZE(self) == 1)\r | |
3465 | return PyBool_FromLong(isupper(*p) != 0);\r | |
3466 | \r | |
3467 | /* Special case for empty strings */\r | |
3468 | if (PyString_GET_SIZE(self) == 0)\r | |
3469 | return PyBool_FromLong(0);\r | |
3470 | \r | |
3471 | e = p + PyString_GET_SIZE(self);\r | |
3472 | cased = 0;\r | |
3473 | for (; p < e; p++) {\r | |
3474 | if (islower(*p))\r | |
3475 | return PyBool_FromLong(0);\r | |
3476 | else if (!cased && isupper(*p))\r | |
3477 | cased = 1;\r | |
3478 | }\r | |
3479 | return PyBool_FromLong(cased);\r | |
3480 | }\r | |
3481 | \r | |
3482 | \r | |
3483 | PyDoc_STRVAR(istitle__doc__,\r | |
3484 | "S.istitle() -> bool\n\\r | |
3485 | \n\\r | |
3486 | Return True if S is a titlecased string and there is at least one\n\\r | |
3487 | character in S, i.e. uppercase characters may only follow uncased\n\\r | |
3488 | characters and lowercase characters only cased ones. Return False\n\\r | |
3489 | otherwise.");\r | |
3490 | \r | |
3491 | static PyObject*\r | |
3492 | string_istitle(PyStringObject *self, PyObject *uncased)\r | |
3493 | {\r | |
3494 | register const unsigned char *p\r | |
3495 | = (unsigned char *) PyString_AS_STRING(self);\r | |
3496 | register const unsigned char *e;\r | |
3497 | int cased, previous_is_cased;\r | |
3498 | \r | |
3499 | /* Shortcut for single character strings */\r | |
3500 | if (PyString_GET_SIZE(self) == 1)\r | |
3501 | return PyBool_FromLong(isupper(*p) != 0);\r | |
3502 | \r | |
3503 | /* Special case for empty strings */\r | |
3504 | if (PyString_GET_SIZE(self) == 0)\r | |
3505 | return PyBool_FromLong(0);\r | |
3506 | \r | |
3507 | e = p + PyString_GET_SIZE(self);\r | |
3508 | cased = 0;\r | |
3509 | previous_is_cased = 0;\r | |
3510 | for (; p < e; p++) {\r | |
3511 | register const unsigned char ch = *p;\r | |
3512 | \r | |
3513 | if (isupper(ch)) {\r | |
3514 | if (previous_is_cased)\r | |
3515 | return PyBool_FromLong(0);\r | |
3516 | previous_is_cased = 1;\r | |
3517 | cased = 1;\r | |
3518 | }\r | |
3519 | else if (islower(ch)) {\r | |
3520 | if (!previous_is_cased)\r | |
3521 | return PyBool_FromLong(0);\r | |
3522 | previous_is_cased = 1;\r | |
3523 | cased = 1;\r | |
3524 | }\r | |
3525 | else\r | |
3526 | previous_is_cased = 0;\r | |
3527 | }\r | |
3528 | return PyBool_FromLong(cased);\r | |
3529 | }\r | |
3530 | \r | |
3531 | \r | |
3532 | PyDoc_STRVAR(splitlines__doc__,\r | |
3533 | "S.splitlines([keepends]) -> list of strings\n\\r | |
3534 | \n\\r | |
3535 | Return a list of the lines in S, breaking at line boundaries.\n\\r | |
3536 | Line breaks are not included in the resulting list unless keepends\n\\r | |
3537 | is given and true.");\r | |
3538 | \r | |
3539 | static PyObject*\r | |
3540 | string_splitlines(PyStringObject *self, PyObject *args)\r | |
3541 | {\r | |
3542 | int keepends = 0;\r | |
3543 | \r | |
3544 | if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))\r | |
3545 | return NULL;\r | |
3546 | \r | |
3547 | return stringlib_splitlines(\r | |
3548 | (PyObject*) self, PyString_AS_STRING(self), PyString_GET_SIZE(self),\r | |
3549 | keepends\r | |
3550 | );\r | |
3551 | }\r | |
3552 | \r | |
3553 | PyDoc_STRVAR(sizeof__doc__,\r | |
3554 | "S.__sizeof__() -> size of S in memory, in bytes");\r | |
3555 | \r | |
3556 | static PyObject *\r | |
3557 | string_sizeof(PyStringObject *v)\r | |
3558 | {\r | |
3559 | Py_ssize_t res;\r | |
3560 | res = PyStringObject_SIZE + PyString_GET_SIZE(v) * Py_TYPE(v)->tp_itemsize;\r | |
3561 | return PyInt_FromSsize_t(res);\r | |
3562 | }\r | |
3563 | \r | |
3564 | static PyObject *\r | |
3565 | string_getnewargs(PyStringObject *v)\r | |
3566 | {\r | |
3567 | return Py_BuildValue("(s#)", v->ob_sval, Py_SIZE(v));\r | |
3568 | }\r | |
3569 | \r | |
3570 | \r | |
3571 | #include "stringlib/string_format.h"\r | |
3572 | \r | |
3573 | PyDoc_STRVAR(format__doc__,\r | |
3574 | "S.format(*args, **kwargs) -> string\n\\r | |
3575 | \n\\r | |
3576 | Return a formatted version of S, using substitutions from args and kwargs.\n\\r | |
3577 | The substitutions are identified by braces ('{' and '}').");\r | |
3578 | \r | |
3579 | static PyObject *\r | |
3580 | string__format__(PyObject* self, PyObject* args)\r | |
3581 | {\r | |
3582 | PyObject *format_spec;\r | |
3583 | PyObject *result = NULL;\r | |
3584 | PyObject *tmp = NULL;\r | |
3585 | \r | |
3586 | /* If 2.x, convert format_spec to the same type as value */\r | |
3587 | /* This is to allow things like u''.format('') */\r | |
3588 | if (!PyArg_ParseTuple(args, "O:__format__", &format_spec))\r | |
3589 | goto done;\r | |
3590 | if (!(PyString_Check(format_spec) || PyUnicode_Check(format_spec))) {\r | |
3591 | PyErr_Format(PyExc_TypeError, "__format__ arg must be str "\r | |
3592 | "or unicode, not %s", Py_TYPE(format_spec)->tp_name);\r | |
3593 | goto done;\r | |
3594 | }\r | |
3595 | tmp = PyObject_Str(format_spec);\r | |
3596 | if (tmp == NULL)\r | |
3597 | goto done;\r | |
3598 | format_spec = tmp;\r | |
3599 | \r | |
3600 | result = _PyBytes_FormatAdvanced(self,\r | |
3601 | PyString_AS_STRING(format_spec),\r | |
3602 | PyString_GET_SIZE(format_spec));\r | |
3603 | done:\r | |
3604 | Py_XDECREF(tmp);\r | |
3605 | return result;\r | |
3606 | }\r | |
3607 | \r | |
3608 | PyDoc_STRVAR(p_format__doc__,\r | |
3609 | "S.__format__(format_spec) -> string\n\\r | |
3610 | \n\\r | |
3611 | Return a formatted version of S as described by format_spec.");\r | |
3612 | \r | |
3613 | \r | |
3614 | static PyMethodDef\r | |
3615 | string_methods[] = {\r | |
3616 | /* Counterparts of the obsolete stropmodule functions; except\r | |
3617 | string.maketrans(). */\r | |
3618 | {"join", (PyCFunction)string_join, METH_O, join__doc__},\r | |
3619 | {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},\r | |
3620 | {"rsplit", (PyCFunction)string_rsplit, METH_VARARGS, rsplit__doc__},\r | |
3621 | {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},\r | |
3622 | {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},\r | |
3623 | {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},\r | |
3624 | {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},\r | |
3625 | {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},\r | |
3626 | {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},\r | |
3627 | {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},\r | |
3628 | {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},\r | |
3629 | {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},\r | |
3630 | {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS,\r | |
3631 | capitalize__doc__},\r | |
3632 | {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},\r | |
3633 | {"endswith", (PyCFunction)string_endswith, METH_VARARGS,\r | |
3634 | endswith__doc__},\r | |
3635 | {"partition", (PyCFunction)string_partition, METH_O, partition__doc__},\r | |
3636 | {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},\r | |
3637 | {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},\r | |
3638 | {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},\r | |
3639 | {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},\r | |
3640 | {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},\r | |
3641 | {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},\r | |
3642 | {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},\r | |
3643 | {"rpartition", (PyCFunction)string_rpartition, METH_O,\r | |
3644 | rpartition__doc__},\r | |
3645 | {"startswith", (PyCFunction)string_startswith, METH_VARARGS,\r | |
3646 | startswith__doc__},\r | |
3647 | {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},\r | |
3648 | {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS,\r | |
3649 | swapcase__doc__},\r | |
3650 | {"translate", (PyCFunction)string_translate, METH_VARARGS,\r | |
3651 | translate__doc__},\r | |
3652 | {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},\r | |
3653 | {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},\r | |
3654 | {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},\r | |
3655 | {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},\r | |
3656 | {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},\r | |
3657 | {"format", (PyCFunction) do_string_format, METH_VARARGS | METH_KEYWORDS, format__doc__},\r | |
3658 | {"__format__", (PyCFunction) string__format__, METH_VARARGS, p_format__doc__},\r | |
3659 | {"_formatter_field_name_split", (PyCFunction) formatter_field_name_split, METH_NOARGS},\r | |
3660 | {"_formatter_parser", (PyCFunction) formatter_parser, METH_NOARGS},\r | |
3661 | {"encode", (PyCFunction)string_encode, METH_VARARGS | METH_KEYWORDS, encode__doc__},\r | |
3662 | {"decode", (PyCFunction)string_decode, METH_VARARGS | METH_KEYWORDS, decode__doc__},\r | |
3663 | {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS,\r | |
3664 | expandtabs__doc__},\r | |
3665 | {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS,\r | |
3666 | splitlines__doc__},\r | |
3667 | {"__sizeof__", (PyCFunction)string_sizeof, METH_NOARGS,\r | |
3668 | sizeof__doc__},\r | |
3669 | {"__getnewargs__", (PyCFunction)string_getnewargs, METH_NOARGS},\r | |
3670 | {NULL, NULL} /* sentinel */\r | |
3671 | };\r | |
3672 | \r | |
3673 | static PyObject *\r | |
3674 | str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);\r | |
3675 | \r | |
3676 | static PyObject *\r | |
3677 | string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)\r | |
3678 | {\r | |
3679 | PyObject *x = NULL;\r | |
3680 | static char *kwlist[] = {"object", 0};\r | |
3681 | \r | |
3682 | if (type != &PyString_Type)\r | |
3683 | return str_subtype_new(type, args, kwds);\r | |
3684 | if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))\r | |
3685 | return NULL;\r | |
3686 | if (x == NULL)\r | |
3687 | return PyString_FromString("");\r | |
3688 | return PyObject_Str(x);\r | |
3689 | }\r | |
3690 | \r | |
3691 | static PyObject *\r | |
3692 | str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)\r | |
3693 | {\r | |
3694 | PyObject *tmp, *pnew;\r | |
3695 | Py_ssize_t n;\r | |
3696 | \r | |
3697 | assert(PyType_IsSubtype(type, &PyString_Type));\r | |
3698 | tmp = string_new(&PyString_Type, args, kwds);\r | |
3699 | if (tmp == NULL)\r | |
3700 | return NULL;\r | |
3701 | assert(PyString_CheckExact(tmp));\r | |
3702 | n = PyString_GET_SIZE(tmp);\r | |
3703 | pnew = type->tp_alloc(type, n);\r | |
3704 | if (pnew != NULL) {\r | |
3705 | Py_MEMCPY(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);\r | |
3706 | ((PyStringObject *)pnew)->ob_shash =\r | |
3707 | ((PyStringObject *)tmp)->ob_shash;\r | |
3708 | ((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED;\r | |
3709 | }\r | |
3710 | Py_DECREF(tmp);\r | |
3711 | return pnew;\r | |
3712 | }\r | |
3713 | \r | |
3714 | static PyObject *\r | |
3715 | basestring_new(PyTypeObject *type, PyObject *args, PyObject *kwds)\r | |
3716 | {\r | |
3717 | PyErr_SetString(PyExc_TypeError,\r | |
3718 | "The basestring type cannot be instantiated");\r | |
3719 | return NULL;\r | |
3720 | }\r | |
3721 | \r | |
3722 | static PyObject *\r | |
3723 | string_mod(PyObject *v, PyObject *w)\r | |
3724 | {\r | |
3725 | if (!PyString_Check(v)) {\r | |
3726 | Py_INCREF(Py_NotImplemented);\r | |
3727 | return Py_NotImplemented;\r | |
3728 | }\r | |
3729 | return PyString_Format(v, w);\r | |
3730 | }\r | |
3731 | \r | |
3732 | PyDoc_STRVAR(basestring_doc,\r | |
3733 | "Type basestring cannot be instantiated; it is the base for str and unicode.");\r | |
3734 | \r | |
3735 | static PyNumberMethods string_as_number = {\r | |
3736 | 0, /*nb_add*/\r | |
3737 | 0, /*nb_subtract*/\r | |
3738 | 0, /*nb_multiply*/\r | |
3739 | 0, /*nb_divide*/\r | |
3740 | string_mod, /*nb_remainder*/\r | |
3741 | };\r | |
3742 | \r | |
3743 | \r | |
3744 | PyTypeObject PyBaseString_Type = {\r | |
3745 | PyVarObject_HEAD_INIT(&PyType_Type, 0)\r | |
3746 | "basestring",\r | |
3747 | 0,\r | |
3748 | 0,\r | |
3749 | 0, /* tp_dealloc */\r | |
3750 | 0, /* tp_print */\r | |
3751 | 0, /* tp_getattr */\r | |
3752 | 0, /* tp_setattr */\r | |
3753 | 0, /* tp_compare */\r | |
3754 | 0, /* tp_repr */\r | |
3755 | 0, /* tp_as_number */\r | |
3756 | 0, /* tp_as_sequence */\r | |
3757 | 0, /* tp_as_mapping */\r | |
3758 | 0, /* tp_hash */\r | |
3759 | 0, /* tp_call */\r | |
3760 | 0, /* tp_str */\r | |
3761 | 0, /* tp_getattro */\r | |
3762 | 0, /* tp_setattro */\r | |
3763 | 0, /* tp_as_buffer */\r | |
3764 | Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */\r | |
3765 | basestring_doc, /* tp_doc */\r | |
3766 | 0, /* tp_traverse */\r | |
3767 | 0, /* tp_clear */\r | |
3768 | 0, /* tp_richcompare */\r | |
3769 | 0, /* tp_weaklistoffset */\r | |
3770 | 0, /* tp_iter */\r | |
3771 | 0, /* tp_iternext */\r | |
3772 | 0, /* tp_methods */\r | |
3773 | 0, /* tp_members */\r | |
3774 | 0, /* tp_getset */\r | |
3775 | &PyBaseObject_Type, /* tp_base */\r | |
3776 | 0, /* tp_dict */\r | |
3777 | 0, /* tp_descr_get */\r | |
3778 | 0, /* tp_descr_set */\r | |
3779 | 0, /* tp_dictoffset */\r | |
3780 | 0, /* tp_init */\r | |
3781 | 0, /* tp_alloc */\r | |
3782 | basestring_new, /* tp_new */\r | |
3783 | 0, /* tp_free */\r | |
3784 | };\r | |
3785 | \r | |
3786 | PyDoc_STRVAR(string_doc,\r | |
3787 | "str(object) -> string\n\\r | |
3788 | \n\\r | |
3789 | Return a nice string representation of the object.\n\\r | |
3790 | If the argument is a string, the return value is the same object.");\r | |
3791 | \r | |
3792 | PyTypeObject PyString_Type = {\r | |
3793 | PyVarObject_HEAD_INIT(&PyType_Type, 0)\r | |
3794 | "str",\r | |
3795 | PyStringObject_SIZE,\r | |
3796 | sizeof(char),\r | |
3797 | string_dealloc, /* tp_dealloc */\r | |
3798 | (printfunc)string_print, /* tp_print */\r | |
3799 | 0, /* tp_getattr */\r | |
3800 | 0, /* tp_setattr */\r | |
3801 | 0, /* tp_compare */\r | |
3802 | string_repr, /* tp_repr */\r | |
3803 | &string_as_number, /* tp_as_number */\r | |
3804 | &string_as_sequence, /* tp_as_sequence */\r | |
3805 | &string_as_mapping, /* tp_as_mapping */\r | |
3806 | (hashfunc)string_hash, /* tp_hash */\r | |
3807 | 0, /* tp_call */\r | |
3808 | string_str, /* tp_str */\r | |
3809 | PyObject_GenericGetAttr, /* tp_getattro */\r | |
3810 | 0, /* tp_setattro */\r | |
3811 | &string_as_buffer, /* tp_as_buffer */\r | |
3812 | Py_TPFLAGS_DEFAULT | Py_TPFLAGS_CHECKTYPES |\r | |
3813 | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_STRING_SUBCLASS |\r | |
3814 | Py_TPFLAGS_HAVE_NEWBUFFER, /* tp_flags */\r | |
3815 | string_doc, /* tp_doc */\r | |
3816 | 0, /* tp_traverse */\r | |
3817 | 0, /* tp_clear */\r | |
3818 | (richcmpfunc)string_richcompare, /* tp_richcompare */\r | |
3819 | 0, /* tp_weaklistoffset */\r | |
3820 | 0, /* tp_iter */\r | |
3821 | 0, /* tp_iternext */\r | |
3822 | string_methods, /* tp_methods */\r | |
3823 | 0, /* tp_members */\r | |
3824 | 0, /* tp_getset */\r | |
3825 | &PyBaseString_Type, /* tp_base */\r | |
3826 | 0, /* tp_dict */\r | |
3827 | 0, /* tp_descr_get */\r | |
3828 | 0, /* tp_descr_set */\r | |
3829 | 0, /* tp_dictoffset */\r | |
3830 | 0, /* tp_init */\r | |
3831 | 0, /* tp_alloc */\r | |
3832 | string_new, /* tp_new */\r | |
3833 | PyObject_Del, /* tp_free */\r | |
3834 | };\r | |
3835 | \r | |
3836 | void\r | |
3837 | PyString_Concat(register PyObject **pv, register PyObject *w)\r | |
3838 | {\r | |
3839 | register PyObject *v;\r | |
3840 | if (*pv == NULL)\r | |
3841 | return;\r | |
3842 | if (w == NULL || !PyString_Check(*pv)) {\r | |
3843 | Py_DECREF(*pv);\r | |
3844 | *pv = NULL;\r | |
3845 | return;\r | |
3846 | }\r | |
3847 | v = string_concat((PyStringObject *) *pv, w);\r | |
3848 | Py_DECREF(*pv);\r | |
3849 | *pv = v;\r | |
3850 | }\r | |
3851 | \r | |
3852 | void\r | |
3853 | PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)\r | |
3854 | {\r | |
3855 | PyString_Concat(pv, w);\r | |
3856 | Py_XDECREF(w);\r | |
3857 | }\r | |
3858 | \r | |
3859 | \r | |
3860 | /* The following function breaks the notion that strings are immutable:\r | |
3861 | it changes the size of a string. We get away with this only if there\r | |
3862 | is only one module referencing the object. You can also think of it\r | |
3863 | as creating a new string object and destroying the old one, only\r | |
3864 | more efficiently. In any case, don't use this if the string may\r | |
3865 | already be known to some other part of the code...\r | |
3866 | Note that if there's not enough memory to resize the string, the original\r | |
3867 | string object at *pv is deallocated, *pv is set to NULL, an "out of\r | |
3868 | memory" exception is set, and -1 is returned. Else (on success) 0 is\r | |
3869 | returned, and the value in *pv may or may not be the same as on input.\r | |
3870 | As always, an extra byte is allocated for a trailing \0 byte (newsize\r | |
3871 | does *not* include that), and a trailing \0 byte is stored.\r | |
3872 | */\r | |
3873 | \r | |
3874 | int\r | |
3875 | _PyString_Resize(PyObject **pv, Py_ssize_t newsize)\r | |
3876 | {\r | |
3877 | register PyObject *v;\r | |
3878 | register PyStringObject *sv;\r | |
3879 | v = *pv;\r | |
3880 | if (!PyString_Check(v) || Py_REFCNT(v) != 1 || newsize < 0 ||\r | |
3881 | PyString_CHECK_INTERNED(v)) {\r | |
3882 | *pv = 0;\r | |
3883 | Py_DECREF(v);\r | |
3884 | PyErr_BadInternalCall();\r | |
3885 | return -1;\r | |
3886 | }\r | |
3887 | /* XXX UNREF/NEWREF interface should be more symmetrical */\r | |
3888 | _Py_DEC_REFTOTAL;\r | |
3889 | _Py_ForgetReference(v);\r | |
3890 | *pv = (PyObject *)\r | |
3891 | PyObject_REALLOC((char *)v, PyStringObject_SIZE + newsize);\r | |
3892 | if (*pv == NULL) {\r | |
3893 | PyObject_Del(v);\r | |
3894 | PyErr_NoMemory();\r | |
3895 | return -1;\r | |
3896 | }\r | |
3897 | _Py_NewReference(*pv);\r | |
3898 | sv = (PyStringObject *) *pv;\r | |
3899 | Py_SIZE(sv) = newsize;\r | |
3900 | sv->ob_sval[newsize] = '\0';\r | |
3901 | sv->ob_shash = -1; /* invalidate cached hash value */\r | |
3902 | return 0;\r | |
3903 | }\r | |
3904 | \r | |
3905 | /* Helpers for formatstring */\r | |
3906 | \r | |
3907 | Py_LOCAL_INLINE(PyObject *)\r | |
3908 | getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)\r | |
3909 | {\r | |
3910 | Py_ssize_t argidx = *p_argidx;\r | |
3911 | if (argidx < arglen) {\r | |
3912 | (*p_argidx)++;\r | |
3913 | if (arglen < 0)\r | |
3914 | return args;\r | |
3915 | else\r | |
3916 | return PyTuple_GetItem(args, argidx);\r | |
3917 | }\r | |
3918 | PyErr_SetString(PyExc_TypeError,\r | |
3919 | "not enough arguments for format string");\r | |
3920 | return NULL;\r | |
3921 | }\r | |
3922 | \r | |
3923 | /* Format codes\r | |
3924 | * F_LJUST '-'\r | |
3925 | * F_SIGN '+'\r | |
3926 | * F_BLANK ' '\r | |
3927 | * F_ALT '#'\r | |
3928 | * F_ZERO '0'\r | |
3929 | */\r | |
3930 | #define F_LJUST (1<<0)\r | |
3931 | #define F_SIGN (1<<1)\r | |
3932 | #define F_BLANK (1<<2)\r | |
3933 | #define F_ALT (1<<3)\r | |
3934 | #define F_ZERO (1<<4)\r | |
3935 | \r | |
3936 | /* Returns a new reference to a PyString object, or NULL on failure. */\r | |
3937 | \r | |
3938 | static PyObject *\r | |
3939 | formatfloat(PyObject *v, int flags, int prec, int type)\r | |
3940 | {\r | |
3941 | char *p;\r | |
3942 | PyObject *result;\r | |
3943 | double x;\r | |
3944 | \r | |
3945 | x = PyFloat_AsDouble(v);\r | |
3946 | if (x == -1.0 && PyErr_Occurred()) {\r | |
3947 | PyErr_Format(PyExc_TypeError, "float argument required, "\r | |
3948 | "not %.200s", Py_TYPE(v)->tp_name);\r | |
3949 | return NULL;\r | |
3950 | }\r | |
3951 | \r | |
3952 | if (prec < 0)\r | |
3953 | prec = 6;\r | |
3954 | \r | |
3955 | p = PyOS_double_to_string(x, type, prec,\r | |
3956 | (flags & F_ALT) ? Py_DTSF_ALT : 0, NULL);\r | |
3957 | \r | |
3958 | if (p == NULL)\r | |
3959 | return NULL;\r | |
3960 | result = PyString_FromStringAndSize(p, strlen(p));\r | |
3961 | PyMem_Free(p);\r | |
3962 | return result;\r | |
3963 | }\r | |
3964 | \r | |
3965 | /* _PyString_FormatLong emulates the format codes d, u, o, x and X, and\r | |
3966 | * the F_ALT flag, for Python's long (unbounded) ints. It's not used for\r | |
3967 | * Python's regular ints.\r | |
3968 | * Return value: a new PyString*, or NULL if error.\r | |
3969 | * . *pbuf is set to point into it,\r | |
3970 | * *plen set to the # of chars following that.\r | |
3971 | * Caller must decref it when done using pbuf.\r | |
3972 | * The string starting at *pbuf is of the form\r | |
3973 | * "-"? ("0x" | "0X")? digit+\r | |
3974 | * "0x"/"0X" are present only for x and X conversions, with F_ALT\r | |
3975 | * set in flags. The case of hex digits will be correct,\r | |
3976 | * There will be at least prec digits, zero-filled on the left if\r | |
3977 | * necessary to get that many.\r | |
3978 | * val object to be converted\r | |
3979 | * flags bitmask of format flags; only F_ALT is looked at\r | |
3980 | * prec minimum number of digits; 0-fill on left if needed\r | |
3981 | * type a character in [duoxX]; u acts the same as d\r | |
3982 | *\r | |
3983 | * CAUTION: o, x and X conversions on regular ints can never\r | |
3984 | * produce a '-' sign, but can for Python's unbounded ints.\r | |
3985 | */\r | |
3986 | PyObject*\r | |
3987 | _PyString_FormatLong(PyObject *val, int flags, int prec, int type,\r | |
3988 | char **pbuf, int *plen)\r | |
3989 | {\r | |
3990 | PyObject *result = NULL;\r | |
3991 | char *buf;\r | |
3992 | Py_ssize_t i;\r | |
3993 | int sign; /* 1 if '-', else 0 */\r | |
3994 | int len; /* number of characters */\r | |
3995 | Py_ssize_t llen;\r | |
3996 | int numdigits; /* len == numnondigits + numdigits */\r | |
3997 | int numnondigits = 0;\r | |
3998 | \r | |
3999 | switch (type) {\r | |
4000 | case 'd':\r | |
4001 | case 'u':\r | |
4002 | result = Py_TYPE(val)->tp_str(val);\r | |
4003 | break;\r | |
4004 | case 'o':\r | |
4005 | result = Py_TYPE(val)->tp_as_number->nb_oct(val);\r | |
4006 | break;\r | |
4007 | case 'x':\r | |
4008 | case 'X':\r | |
4009 | numnondigits = 2;\r | |
4010 | result = Py_TYPE(val)->tp_as_number->nb_hex(val);\r | |
4011 | break;\r | |
4012 | default:\r | |
4013 | assert(!"'type' not in [duoxX]");\r | |
4014 | }\r | |
4015 | if (!result)\r | |
4016 | return NULL;\r | |
4017 | \r | |
4018 | buf = PyString_AsString(result);\r | |
4019 | if (!buf) {\r | |
4020 | Py_DECREF(result);\r | |
4021 | return NULL;\r | |
4022 | }\r | |
4023 | \r | |
4024 | /* To modify the string in-place, there can only be one reference. */\r | |
4025 | if (Py_REFCNT(result) != 1) {\r | |
4026 | PyErr_BadInternalCall();\r | |
4027 | return NULL;\r | |
4028 | }\r | |
4029 | llen = PyString_Size(result);\r | |
4030 | if (llen > INT_MAX) {\r | |
4031 | PyErr_SetString(PyExc_ValueError, "string too large in _PyString_FormatLong");\r | |
4032 | return NULL;\r | |
4033 | }\r | |
4034 | len = (int)llen;\r | |
4035 | if (buf[len-1] == 'L') {\r | |
4036 | --len;\r | |
4037 | buf[len] = '\0';\r | |
4038 | }\r | |
4039 | sign = buf[0] == '-';\r | |
4040 | numnondigits += sign;\r | |
4041 | numdigits = len - numnondigits;\r | |
4042 | assert(numdigits > 0);\r | |
4043 | \r | |
4044 | /* Get rid of base marker unless F_ALT */\r | |
4045 | if ((flags & F_ALT) == 0) {\r | |
4046 | /* Need to skip 0x, 0X or 0. */\r | |
4047 | int skipped = 0;\r | |
4048 | switch (type) {\r | |
4049 | case 'o':\r | |
4050 | assert(buf[sign] == '0');\r | |
4051 | /* If 0 is only digit, leave it alone. */\r | |
4052 | if (numdigits > 1) {\r | |
4053 | skipped = 1;\r | |
4054 | --numdigits;\r | |
4055 | }\r | |
4056 | break;\r | |
4057 | case 'x':\r | |
4058 | case 'X':\r | |
4059 | assert(buf[sign] == '0');\r | |
4060 | assert(buf[sign + 1] == 'x');\r | |
4061 | skipped = 2;\r | |
4062 | numnondigits -= 2;\r | |
4063 | break;\r | |
4064 | }\r | |
4065 | if (skipped) {\r | |
4066 | buf += skipped;\r | |
4067 | len -= skipped;\r | |
4068 | if (sign)\r | |
4069 | buf[0] = '-';\r | |
4070 | }\r | |
4071 | assert(len == numnondigits + numdigits);\r | |
4072 | assert(numdigits > 0);\r | |
4073 | }\r | |
4074 | \r | |
4075 | /* Fill with leading zeroes to meet minimum width. */\r | |
4076 | if (prec > numdigits) {\r | |
4077 | PyObject *r1 = PyString_FromStringAndSize(NULL,\r | |
4078 | numnondigits + prec);\r | |
4079 | char *b1;\r | |
4080 | if (!r1) {\r | |
4081 | Py_DECREF(result);\r | |
4082 | return NULL;\r | |
4083 | }\r | |
4084 | b1 = PyString_AS_STRING(r1);\r | |
4085 | for (i = 0; i < numnondigits; ++i)\r | |
4086 | *b1++ = *buf++;\r | |
4087 | for (i = 0; i < prec - numdigits; i++)\r | |
4088 | *b1++ = '0';\r | |
4089 | for (i = 0; i < numdigits; i++)\r | |
4090 | *b1++ = *buf++;\r | |
4091 | *b1 = '\0';\r | |
4092 | Py_DECREF(result);\r | |
4093 | result = r1;\r | |
4094 | buf = PyString_AS_STRING(result);\r | |
4095 | len = numnondigits + prec;\r | |
4096 | }\r | |
4097 | \r | |
4098 | /* Fix up case for hex conversions. */\r | |
4099 | if (type == 'X') {\r | |
4100 | /* Need to convert all lower case letters to upper case.\r | |
4101 | and need to convert 0x to 0X (and -0x to -0X). */\r | |
4102 | for (i = 0; i < len; i++)\r | |
4103 | if (buf[i] >= 'a' && buf[i] <= 'x')\r | |
4104 | buf[i] -= 'a'-'A';\r | |
4105 | }\r | |
4106 | *pbuf = buf;\r | |
4107 | *plen = len;\r | |
4108 | return result;\r | |
4109 | }\r | |
4110 | \r | |
4111 | Py_LOCAL_INLINE(int)\r | |
4112 | formatint(char *buf, size_t buflen, int flags,\r | |
4113 | int prec, int type, PyObject *v)\r | |
4114 | {\r | |
4115 | /* fmt = '%#.' + `prec` + 'l' + `type`\r | |
4116 | worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)\r | |
4117 | + 1 + 1 = 24 */\r | |
4118 | char fmt[64]; /* plenty big enough! */\r | |
4119 | char *sign;\r | |
4120 | long x;\r | |
4121 | \r | |
4122 | x = PyInt_AsLong(v);\r | |
4123 | if (x == -1 && PyErr_Occurred()) {\r | |
4124 | PyErr_Format(PyExc_TypeError, "int argument required, not %.200s",\r | |
4125 | Py_TYPE(v)->tp_name);\r | |
4126 | return -1;\r | |
4127 | }\r | |
4128 | if (x < 0 && type == 'u') {\r | |
4129 | type = 'd';\r | |
4130 | }\r | |
4131 | if (x < 0 && (type == 'x' || type == 'X' || type == 'o'))\r | |
4132 | sign = "-";\r | |
4133 | else\r | |
4134 | sign = "";\r | |
4135 | if (prec < 0)\r | |
4136 | prec = 1;\r | |
4137 | \r | |
4138 | if ((flags & F_ALT) &&\r | |
4139 | (type == 'x' || type == 'X')) {\r | |
4140 | /* When converting under %#x or %#X, there are a number\r | |
4141 | * of issues that cause pain:\r | |
4142 | * - when 0 is being converted, the C standard leaves off\r | |
4143 | * the '0x' or '0X', which is inconsistent with other\r | |
4144 | * %#x/%#X conversions and inconsistent with Python's\r | |
4145 | * hex() function\r | |
4146 | * - there are platforms that violate the standard and\r | |
4147 | * convert 0 with the '0x' or '0X'\r | |
4148 | * (Metrowerks, Compaq Tru64)\r | |
4149 | * - there are platforms that give '0x' when converting\r | |
4150 | * under %#X, but convert 0 in accordance with the\r | |
4151 | * standard (OS/2 EMX)\r | |
4152 | *\r | |
4153 | * We can achieve the desired consistency by inserting our\r | |
4154 | * own '0x' or '0X' prefix, and substituting %x/%X in place\r | |
4155 | * of %#x/%#X.\r | |
4156 | *\r | |
4157 | * Note that this is the same approach as used in\r | |
4158 | * formatint() in unicodeobject.c\r | |
4159 | */\r | |
4160 | PyOS_snprintf(fmt, sizeof(fmt), "%s0%c%%.%dl%c",\r | |
4161 | sign, type, prec, type);\r | |
4162 | }\r | |
4163 | else {\r | |
4164 | PyOS_snprintf(fmt, sizeof(fmt), "%s%%%s.%dl%c",\r | |
4165 | sign, (flags&F_ALT) ? "#" : "",\r | |
4166 | prec, type);\r | |
4167 | }\r | |
4168 | \r | |
4169 | /* buf = '+'/'-'/'' + '0'/'0x'/'' + '[0-9]'*max(prec, len(x in octal))\r | |
4170 | * worst case buf = '-0x' + [0-9]*prec, where prec >= 11\r | |
4171 | */\r | |
4172 | if (buflen <= 14 || buflen <= (size_t)3 + (size_t)prec) {\r | |
4173 | PyErr_SetString(PyExc_OverflowError,\r | |
4174 | "formatted integer is too long (precision too large?)");\r | |
4175 | return -1;\r | |
4176 | }\r | |
4177 | if (sign[0])\r | |
4178 | PyOS_snprintf(buf, buflen, fmt, -x);\r | |
4179 | else\r | |
4180 | PyOS_snprintf(buf, buflen, fmt, x);\r | |
4181 | return (int)strlen(buf);\r | |
4182 | }\r | |
4183 | \r | |
4184 | Py_LOCAL_INLINE(int)\r | |
4185 | formatchar(char *buf, size_t buflen, PyObject *v)\r | |
4186 | {\r | |
4187 | /* presume that the buffer is at least 2 characters long */\r | |
4188 | if (PyString_Check(v)) {\r | |
4189 | if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))\r | |
4190 | return -1;\r | |
4191 | }\r | |
4192 | else {\r | |
4193 | if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))\r | |
4194 | return -1;\r | |
4195 | }\r | |
4196 | buf[1] = '\0';\r | |
4197 | return 1;\r | |
4198 | }\r | |
4199 | \r | |
4200 | /* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)\r | |
4201 | \r | |
4202 | FORMATBUFLEN is the length of the buffer in which the ints &\r | |
4203 | chars are formatted. XXX This is a magic number. Each formatting\r | |
4204 | routine does bounds checking to ensure no overflow, but a better\r | |
4205 | solution may be to malloc a buffer of appropriate size for each\r | |
4206 | format. For now, the current solution is sufficient.\r | |
4207 | */\r | |
4208 | #define FORMATBUFLEN (size_t)120\r | |
4209 | \r | |
4210 | PyObject *\r | |
4211 | PyString_Format(PyObject *format, PyObject *args)\r | |
4212 | {\r | |
4213 | char *fmt, *res;\r | |
4214 | Py_ssize_t arglen, argidx;\r | |
4215 | Py_ssize_t reslen, rescnt, fmtcnt;\r | |
4216 | int args_owned = 0;\r | |
4217 | PyObject *result, *orig_args;\r | |
4218 | #ifdef Py_USING_UNICODE\r | |
4219 | PyObject *v, *w;\r | |
4220 | #endif\r | |
4221 | PyObject *dict = NULL;\r | |
4222 | if (format == NULL || !PyString_Check(format) || args == NULL) {\r | |
4223 | PyErr_BadInternalCall();\r | |
4224 | return NULL;\r | |
4225 | }\r | |
4226 | orig_args = args;\r | |
4227 | fmt = PyString_AS_STRING(format);\r | |
4228 | fmtcnt = PyString_GET_SIZE(format);\r | |
4229 | reslen = rescnt = fmtcnt + 100;\r | |
4230 | result = PyString_FromStringAndSize((char *)NULL, reslen);\r | |
4231 | if (result == NULL)\r | |
4232 | return NULL;\r | |
4233 | res = PyString_AsString(result);\r | |
4234 | if (PyTuple_Check(args)) {\r | |
4235 | arglen = PyTuple_GET_SIZE(args);\r | |
4236 | argidx = 0;\r | |
4237 | }\r | |
4238 | else {\r | |
4239 | arglen = -1;\r | |
4240 | argidx = -2;\r | |
4241 | }\r | |
4242 | if (Py_TYPE(args)->tp_as_mapping && !PyTuple_Check(args) &&\r | |
4243 | !PyObject_TypeCheck(args, &PyBaseString_Type))\r | |
4244 | dict = args;\r | |
4245 | while (--fmtcnt >= 0) {\r | |
4246 | if (*fmt != '%') {\r | |
4247 | if (--rescnt < 0) {\r | |
4248 | rescnt = fmtcnt + 100;\r | |
4249 | reslen += rescnt;\r | |
4250 | if (_PyString_Resize(&result, reslen))\r | |
4251 | return NULL;\r | |
4252 | res = PyString_AS_STRING(result)\r | |
4253 | + reslen - rescnt;\r | |
4254 | --rescnt;\r | |
4255 | }\r | |
4256 | *res++ = *fmt++;\r | |
4257 | }\r | |
4258 | else {\r | |
4259 | /* Got a format specifier */\r | |
4260 | int flags = 0;\r | |
4261 | Py_ssize_t width = -1;\r | |
4262 | int prec = -1;\r | |
4263 | int c = '\0';\r | |
4264 | int fill;\r | |
4265 | int isnumok;\r | |
de08c53b DM |
4266 | PyObject *v = NULL;\r |
4267 | PyObject *temp = NULL;\r | |
4268 | char *pbuf = NULL;\r | |
4710c53d | 4269 | int sign;\r |
4270 | Py_ssize_t len;\r | |
4271 | char formatbuf[FORMATBUFLEN];\r | |
4272 | /* For format{int,char}() */\r | |
4273 | #ifdef Py_USING_UNICODE\r | |
4274 | char *fmt_start = fmt;\r | |
4275 | Py_ssize_t argidx_start = argidx;\r | |
4276 | #endif\r | |
4277 | \r | |
4278 | fmt++;\r | |
4279 | if (*fmt == '(') {\r | |
4280 | char *keystart;\r | |
4281 | Py_ssize_t keylen;\r | |
4282 | PyObject *key;\r | |
4283 | int pcount = 1;\r | |
4284 | \r | |
4285 | if (dict == NULL) {\r | |
4286 | PyErr_SetString(PyExc_TypeError,\r | |
4287 | "format requires a mapping");\r | |
4288 | goto error;\r | |
4289 | }\r | |
4290 | ++fmt;\r | |
4291 | --fmtcnt;\r | |
4292 | keystart = fmt;\r | |
4293 | /* Skip over balanced parentheses */\r | |
4294 | while (pcount > 0 && --fmtcnt >= 0) {\r | |
4295 | if (*fmt == ')')\r | |
4296 | --pcount;\r | |
4297 | else if (*fmt == '(')\r | |
4298 | ++pcount;\r | |
4299 | fmt++;\r | |
4300 | }\r | |
4301 | keylen = fmt - keystart - 1;\r | |
4302 | if (fmtcnt < 0 || pcount > 0) {\r | |
4303 | PyErr_SetString(PyExc_ValueError,\r | |
4304 | "incomplete format key");\r | |
4305 | goto error;\r | |
4306 | }\r | |
4307 | key = PyString_FromStringAndSize(keystart,\r | |
4308 | keylen);\r | |
4309 | if (key == NULL)\r | |
4310 | goto error;\r | |
4311 | if (args_owned) {\r | |
4312 | Py_DECREF(args);\r | |
4313 | args_owned = 0;\r | |
4314 | }\r | |
4315 | args = PyObject_GetItem(dict, key);\r | |
4316 | Py_DECREF(key);\r | |
4317 | if (args == NULL) {\r | |
4318 | goto error;\r | |
4319 | }\r | |
4320 | args_owned = 1;\r | |
4321 | arglen = -1;\r | |
4322 | argidx = -2;\r | |
4323 | }\r | |
4324 | while (--fmtcnt >= 0) {\r | |
4325 | switch (c = *fmt++) {\r | |
4326 | case '-': flags |= F_LJUST; continue;\r | |
4327 | case '+': flags |= F_SIGN; continue;\r | |
4328 | case ' ': flags |= F_BLANK; continue;\r | |
4329 | case '#': flags |= F_ALT; continue;\r | |
4330 | case '0': flags |= F_ZERO; continue;\r | |
4331 | }\r | |
4332 | break;\r | |
4333 | }\r | |
4334 | if (c == '*') {\r | |
4335 | v = getnextarg(args, arglen, &argidx);\r | |
4336 | if (v == NULL)\r | |
4337 | goto error;\r | |
4338 | if (!PyInt_Check(v)) {\r | |
4339 | PyErr_SetString(PyExc_TypeError,\r | |
4340 | "* wants int");\r | |
4341 | goto error;\r | |
4342 | }\r | |
4343 | width = PyInt_AsLong(v);\r | |
4344 | if (width < 0) {\r | |
4345 | flags |= F_LJUST;\r | |
4346 | width = -width;\r | |
4347 | }\r | |
4348 | if (--fmtcnt >= 0)\r | |
4349 | c = *fmt++;\r | |
4350 | }\r | |
4351 | else if (c >= 0 && isdigit(c)) {\r | |
4352 | width = c - '0';\r | |
4353 | while (--fmtcnt >= 0) {\r | |
4354 | c = Py_CHARMASK(*fmt++);\r | |
4355 | if (!isdigit(c))\r | |
4356 | break;\r | |
4357 | if ((width*10) / 10 != width) {\r | |
4358 | PyErr_SetString(\r | |
4359 | PyExc_ValueError,\r | |
4360 | "width too big");\r | |
4361 | goto error;\r | |
4362 | }\r | |
4363 | width = width*10 + (c - '0');\r | |
4364 | }\r | |
4365 | }\r | |
4366 | if (c == '.') {\r | |
4367 | prec = 0;\r | |
4368 | if (--fmtcnt >= 0)\r | |
4369 | c = *fmt++;\r | |
4370 | if (c == '*') {\r | |
4371 | v = getnextarg(args, arglen, &argidx);\r | |
4372 | if (v == NULL)\r | |
4373 | goto error;\r | |
4374 | if (!PyInt_Check(v)) {\r | |
4375 | PyErr_SetString(\r | |
4376 | PyExc_TypeError,\r | |
4377 | "* wants int");\r | |
4378 | goto error;\r | |
4379 | }\r | |
4380 | prec = PyInt_AsLong(v);\r | |
4381 | if (prec < 0)\r | |
4382 | prec = 0;\r | |
4383 | if (--fmtcnt >= 0)\r | |
4384 | c = *fmt++;\r | |
4385 | }\r | |
4386 | else if (c >= 0 && isdigit(c)) {\r | |
4387 | prec = c - '0';\r | |
4388 | while (--fmtcnt >= 0) {\r | |
4389 | c = Py_CHARMASK(*fmt++);\r | |
4390 | if (!isdigit(c))\r | |
4391 | break;\r | |
4392 | if ((prec*10) / 10 != prec) {\r | |
4393 | PyErr_SetString(\r | |
4394 | PyExc_ValueError,\r | |
4395 | "prec too big");\r | |
4396 | goto error;\r | |
4397 | }\r | |
4398 | prec = prec*10 + (c - '0');\r | |
4399 | }\r | |
4400 | }\r | |
4401 | } /* prec */\r | |
4402 | if (fmtcnt >= 0) {\r | |
4403 | if (c == 'h' || c == 'l' || c == 'L') {\r | |
4404 | if (--fmtcnt >= 0)\r | |
4405 | c = *fmt++;\r | |
4406 | }\r | |
4407 | }\r | |
4408 | if (fmtcnt < 0) {\r | |
4409 | PyErr_SetString(PyExc_ValueError,\r | |
4410 | "incomplete format");\r | |
4411 | goto error;\r | |
4412 | }\r | |
4413 | if (c != '%') {\r | |
4414 | v = getnextarg(args, arglen, &argidx);\r | |
4415 | if (v == NULL)\r | |
4416 | goto error;\r | |
4417 | }\r | |
4418 | sign = 0;\r | |
4419 | fill = ' ';\r | |
4420 | switch (c) {\r | |
4421 | case '%':\r | |
4422 | pbuf = "%";\r | |
4423 | len = 1;\r | |
4424 | break;\r | |
4425 | case 's':\r | |
4426 | #ifdef Py_USING_UNICODE\r | |
4427 | if (PyUnicode_Check(v)) {\r | |
4428 | fmt = fmt_start;\r | |
4429 | argidx = argidx_start;\r | |
4430 | goto unicode;\r | |
4431 | }\r | |
4432 | #endif\r | |
4433 | temp = _PyObject_Str(v);\r | |
4434 | #ifdef Py_USING_UNICODE\r | |
4435 | if (temp != NULL && PyUnicode_Check(temp)) {\r | |
4436 | Py_DECREF(temp);\r | |
4437 | fmt = fmt_start;\r | |
4438 | argidx = argidx_start;\r | |
4439 | goto unicode;\r | |
4440 | }\r | |
4441 | #endif\r | |
4442 | /* Fall through */\r | |
4443 | case 'r':\r | |
4444 | if (c == 'r')\r | |
4445 | temp = PyObject_Repr(v);\r | |
4446 | if (temp == NULL)\r | |
4447 | goto error;\r | |
4448 | if (!PyString_Check(temp)) {\r | |
4449 | PyErr_SetString(PyExc_TypeError,\r | |
4450 | "%s argument has non-string str()");\r | |
4451 | Py_DECREF(temp);\r | |
4452 | goto error;\r | |
4453 | }\r | |
4454 | pbuf = PyString_AS_STRING(temp);\r | |
4455 | len = PyString_GET_SIZE(temp);\r | |
4456 | if (prec >= 0 && len > prec)\r | |
4457 | len = prec;\r | |
4458 | break;\r | |
4459 | case 'i':\r | |
4460 | case 'd':\r | |
4461 | case 'u':\r | |
4462 | case 'o':\r | |
4463 | case 'x':\r | |
4464 | case 'X':\r | |
4465 | if (c == 'i')\r | |
4466 | c = 'd';\r | |
4467 | isnumok = 0;\r | |
4468 | if (PyNumber_Check(v)) {\r | |
4469 | PyObject *iobj=NULL;\r | |
4470 | \r | |
4471 | if (PyInt_Check(v) || (PyLong_Check(v))) {\r | |
4472 | iobj = v;\r | |
4473 | Py_INCREF(iobj);\r | |
4474 | }\r | |
4475 | else {\r | |
4476 | iobj = PyNumber_Int(v);\r | |
4477 | if (iobj==NULL) iobj = PyNumber_Long(v);\r | |
4478 | }\r | |
4479 | if (iobj!=NULL) {\r | |
4480 | if (PyInt_Check(iobj)) {\r | |
4481 | isnumok = 1;\r | |
4482 | pbuf = formatbuf;\r | |
4483 | len = formatint(pbuf,\r | |
4484 | sizeof(formatbuf),\r | |
4485 | flags, prec, c, iobj);\r | |
4486 | Py_DECREF(iobj);\r | |
4487 | if (len < 0)\r | |
4488 | goto error;\r | |
4489 | sign = 1;\r | |
4490 | }\r | |
4491 | else if (PyLong_Check(iobj)) {\r | |
4492 | int ilen;\r | |
4493 | \r | |
4494 | isnumok = 1;\r | |
4495 | temp = _PyString_FormatLong(iobj, flags,\r | |
4496 | prec, c, &pbuf, &ilen);\r | |
4497 | Py_DECREF(iobj);\r | |
4498 | len = ilen;\r | |
4499 | if (!temp)\r | |
4500 | goto error;\r | |
4501 | sign = 1;\r | |
4502 | }\r | |
4503 | else {\r | |
4504 | Py_DECREF(iobj);\r | |
4505 | }\r | |
4506 | }\r | |
4507 | }\r | |
4508 | if (!isnumok) {\r | |
4509 | PyErr_Format(PyExc_TypeError,\r | |
4510 | "%%%c format: a number is required, "\r | |
4511 | "not %.200s", c, Py_TYPE(v)->tp_name);\r | |
4512 | goto error;\r | |
4513 | }\r | |
4514 | if (flags & F_ZERO)\r | |
4515 | fill = '0';\r | |
4516 | break;\r | |
4517 | case 'e':\r | |
4518 | case 'E':\r | |
4519 | case 'f':\r | |
4520 | case 'F':\r | |
4521 | case 'g':\r | |
4522 | case 'G':\r | |
4523 | temp = formatfloat(v, flags, prec, c);\r | |
4524 | if (temp == NULL)\r | |
4525 | goto error;\r | |
4526 | pbuf = PyString_AS_STRING(temp);\r | |
4527 | len = PyString_GET_SIZE(temp);\r | |
4528 | sign = 1;\r | |
4529 | if (flags & F_ZERO)\r | |
4530 | fill = '0';\r | |
4531 | break;\r | |
4532 | case 'c':\r | |
4533 | #ifdef Py_USING_UNICODE\r | |
4534 | if (PyUnicode_Check(v)) {\r | |
4535 | fmt = fmt_start;\r | |
4536 | argidx = argidx_start;\r | |
4537 | goto unicode;\r | |
4538 | }\r | |
4539 | #endif\r | |
4540 | pbuf = formatbuf;\r | |
4541 | len = formatchar(pbuf, sizeof(formatbuf), v);\r | |
4542 | if (len < 0)\r | |
4543 | goto error;\r | |
4544 | break;\r | |
4545 | default:\r | |
4546 | PyErr_Format(PyExc_ValueError,\r | |
4547 | "unsupported format character '%c' (0x%x) "\r | |
4548 | "at index %zd",\r | |
4549 | c, c,\r | |
4550 | (Py_ssize_t)(fmt - 1 -\r | |
4551 | PyString_AsString(format)));\r | |
4552 | goto error;\r | |
4553 | }\r | |
4554 | if (sign) {\r | |
4555 | if (*pbuf == '-' || *pbuf == '+') {\r | |
4556 | sign = *pbuf++;\r | |
4557 | len--;\r | |
4558 | }\r | |
4559 | else if (flags & F_SIGN)\r | |
4560 | sign = '+';\r | |
4561 | else if (flags & F_BLANK)\r | |
4562 | sign = ' ';\r | |
4563 | else\r | |
4564 | sign = 0;\r | |
4565 | }\r | |
4566 | if (width < len)\r | |
4567 | width = len;\r | |
4568 | if (rescnt - (sign != 0) < width) {\r | |
4569 | reslen -= rescnt;\r | |
4570 | rescnt = width + fmtcnt + 100;\r | |
4571 | reslen += rescnt;\r | |
4572 | if (reslen < 0) {\r | |
4573 | Py_DECREF(result);\r | |
4574 | Py_XDECREF(temp);\r | |
4575 | return PyErr_NoMemory();\r | |
4576 | }\r | |
4577 | if (_PyString_Resize(&result, reslen)) {\r | |
4578 | Py_XDECREF(temp);\r | |
4579 | return NULL;\r | |
4580 | }\r | |
4581 | res = PyString_AS_STRING(result)\r | |
4582 | + reslen - rescnt;\r | |
4583 | }\r | |
4584 | if (sign) {\r | |
4585 | if (fill != ' ')\r | |
4586 | *res++ = sign;\r | |
4587 | rescnt--;\r | |
4588 | if (width > len)\r | |
4589 | width--;\r | |
4590 | }\r | |
4591 | if ((flags & F_ALT) && (c == 'x' || c == 'X')) {\r | |
4592 | assert(pbuf[0] == '0');\r | |
4593 | assert(pbuf[1] == c);\r | |
4594 | if (fill != ' ') {\r | |
4595 | *res++ = *pbuf++;\r | |
4596 | *res++ = *pbuf++;\r | |
4597 | }\r | |
4598 | rescnt -= 2;\r | |
4599 | width -= 2;\r | |
4600 | if (width < 0)\r | |
4601 | width = 0;\r | |
4602 | len -= 2;\r | |
4603 | }\r | |
4604 | if (width > len && !(flags & F_LJUST)) {\r | |
4605 | do {\r | |
4606 | --rescnt;\r | |
4607 | *res++ = fill;\r | |
4608 | } while (--width > len);\r | |
4609 | }\r | |
4610 | if (fill == ' ') {\r | |
4611 | if (sign)\r | |
4612 | *res++ = sign;\r | |
4613 | if ((flags & F_ALT) &&\r | |
4614 | (c == 'x' || c == 'X')) {\r | |
4615 | assert(pbuf[0] == '0');\r | |
4616 | assert(pbuf[1] == c);\r | |
4617 | *res++ = *pbuf++;\r | |
4618 | *res++ = *pbuf++;\r | |
4619 | }\r | |
4620 | }\r | |
4621 | Py_MEMCPY(res, pbuf, len);\r | |
4622 | res += len;\r | |
4623 | rescnt -= len;\r | |
4624 | while (--width >= len) {\r | |
4625 | --rescnt;\r | |
4626 | *res++ = ' ';\r | |
4627 | }\r | |
4628 | if (dict && (argidx < arglen) && c != '%') {\r | |
4629 | PyErr_SetString(PyExc_TypeError,\r | |
4630 | "not all arguments converted during string formatting");\r | |
4631 | Py_XDECREF(temp);\r | |
4632 | goto error;\r | |
4633 | }\r | |
4634 | Py_XDECREF(temp);\r | |
4635 | } /* '%' */\r | |
4636 | } /* until end */\r | |
4637 | if (argidx < arglen && !dict) {\r | |
4638 | PyErr_SetString(PyExc_TypeError,\r | |
4639 | "not all arguments converted during string formatting");\r | |
4640 | goto error;\r | |
4641 | }\r | |
4642 | if (args_owned) {\r | |
4643 | Py_DECREF(args);\r | |
4644 | }\r | |
4645 | if (_PyString_Resize(&result, reslen - rescnt))\r | |
4646 | return NULL;\r | |
4647 | return result;\r | |
4648 | \r | |
4649 | #ifdef Py_USING_UNICODE\r | |
4650 | unicode:\r | |
4651 | if (args_owned) {\r | |
4652 | Py_DECREF(args);\r | |
4653 | args_owned = 0;\r | |
4654 | }\r | |
4655 | /* Fiddle args right (remove the first argidx arguments) */\r | |
4656 | if (PyTuple_Check(orig_args) && argidx > 0) {\r | |
4657 | PyObject *v;\r | |
4658 | Py_ssize_t n = PyTuple_GET_SIZE(orig_args) - argidx;\r | |
4659 | v = PyTuple_New(n);\r | |
4660 | if (v == NULL)\r | |
4661 | goto error;\r | |
4662 | while (--n >= 0) {\r | |
4663 | PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);\r | |
4664 | Py_INCREF(w);\r | |
4665 | PyTuple_SET_ITEM(v, n, w);\r | |
4666 | }\r | |
4667 | args = v;\r | |
4668 | } else {\r | |
4669 | Py_INCREF(orig_args);\r | |
4670 | args = orig_args;\r | |
4671 | }\r | |
4672 | args_owned = 1;\r | |
4673 | /* Take what we have of the result and let the Unicode formatting\r | |
4674 | function format the rest of the input. */\r | |
4675 | rescnt = res - PyString_AS_STRING(result);\r | |
4676 | if (_PyString_Resize(&result, rescnt))\r | |
4677 | goto error;\r | |
4678 | fmtcnt = PyString_GET_SIZE(format) - \\r | |
4679 | (fmt - PyString_AS_STRING(format));\r | |
4680 | format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);\r | |
4681 | if (format == NULL)\r | |
4682 | goto error;\r | |
4683 | v = PyUnicode_Format(format, args);\r | |
4684 | Py_DECREF(format);\r | |
4685 | if (v == NULL)\r | |
4686 | goto error;\r | |
4687 | /* Paste what we have (result) to what the Unicode formatting\r | |
4688 | function returned (v) and return the result (or error) */\r | |
4689 | w = PyUnicode_Concat(result, v);\r | |
4690 | Py_DECREF(result);\r | |
4691 | Py_DECREF(v);\r | |
4692 | Py_DECREF(args);\r | |
4693 | return w;\r | |
4694 | #endif /* Py_USING_UNICODE */\r | |
4695 | \r | |
4696 | error:\r | |
4697 | Py_DECREF(result);\r | |
4698 | if (args_owned) {\r | |
4699 | Py_DECREF(args);\r | |
4700 | }\r | |
4701 | return NULL;\r | |
4702 | }\r | |
4703 | \r | |
4704 | void\r | |
4705 | PyString_InternInPlace(PyObject **p)\r | |
4706 | {\r | |
4707 | register PyStringObject *s = (PyStringObject *)(*p);\r | |
4708 | PyObject *t;\r | |
4709 | if (s == NULL || !PyString_Check(s))\r | |
4710 | Py_FatalError("PyString_InternInPlace: strings only please!");\r | |
4711 | /* If it's a string subclass, we don't really know what putting\r | |
4712 | it in the interned dict might do. */\r | |
4713 | if (!PyString_CheckExact(s))\r | |
4714 | return;\r | |
4715 | if (PyString_CHECK_INTERNED(s))\r | |
4716 | return;\r | |
4717 | if (interned == NULL) {\r | |
4718 | interned = PyDict_New();\r | |
4719 | if (interned == NULL) {\r | |
4720 | PyErr_Clear(); /* Don't leave an exception */\r | |
4721 | return;\r | |
4722 | }\r | |
4723 | }\r | |
4724 | t = PyDict_GetItem(interned, (PyObject *)s);\r | |
4725 | if (t) {\r | |
4726 | Py_INCREF(t);\r | |
4727 | Py_DECREF(*p);\r | |
4728 | *p = t;\r | |
4729 | return;\r | |
4730 | }\r | |
4731 | \r | |
4732 | if (PyDict_SetItem(interned, (PyObject *)s, (PyObject *)s) < 0) {\r | |
4733 | PyErr_Clear();\r | |
4734 | return;\r | |
4735 | }\r | |
4736 | /* The two references in interned are not counted by refcnt.\r | |
4737 | The string deallocator will take care of this */\r | |
4738 | Py_REFCNT(s) -= 2;\r | |
4739 | PyString_CHECK_INTERNED(s) = SSTATE_INTERNED_MORTAL;\r | |
4740 | }\r | |
4741 | \r | |
4742 | void\r | |
4743 | PyString_InternImmortal(PyObject **p)\r | |
4744 | {\r | |
4745 | PyString_InternInPlace(p);\r | |
4746 | if (PyString_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {\r | |
4747 | PyString_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL;\r | |
4748 | Py_INCREF(*p);\r | |
4749 | }\r | |
4750 | }\r | |
4751 | \r | |
4752 | \r | |
4753 | PyObject *\r | |
4754 | PyString_InternFromString(const char *cp)\r | |
4755 | {\r | |
4756 | PyObject *s = PyString_FromString(cp);\r | |
4757 | if (s == NULL)\r | |
4758 | return NULL;\r | |
4759 | PyString_InternInPlace(&s);\r | |
4760 | return s;\r | |
4761 | }\r | |
4762 | \r | |
4763 | void\r | |
4764 | PyString_Fini(void)\r | |
4765 | {\r | |
4766 | int i;\r | |
4767 | for (i = 0; i < UCHAR_MAX + 1; i++) {\r | |
4768 | Py_XDECREF(characters[i]);\r | |
4769 | characters[i] = NULL;\r | |
4770 | }\r | |
4771 | Py_XDECREF(nullstring);\r | |
4772 | nullstring = NULL;\r | |
4773 | }\r | |
4774 | \r | |
4775 | void _Py_ReleaseInternedStrings(void)\r | |
4776 | {\r | |
4777 | PyObject *keys;\r | |
4778 | PyStringObject *s;\r | |
4779 | Py_ssize_t i, n;\r | |
4780 | Py_ssize_t immortal_size = 0, mortal_size = 0;\r | |
4781 | \r | |
4782 | if (interned == NULL || !PyDict_Check(interned))\r | |
4783 | return;\r | |
4784 | keys = PyDict_Keys(interned);\r | |
4785 | if (keys == NULL || !PyList_Check(keys)) {\r | |
4786 | PyErr_Clear();\r | |
4787 | return;\r | |
4788 | }\r | |
4789 | \r | |
4790 | /* Since _Py_ReleaseInternedStrings() is intended to help a leak\r | |
4791 | detector, interned strings are not forcibly deallocated; rather, we\r | |
4792 | give them their stolen references back, and then clear and DECREF\r | |
4793 | the interned dict. */\r | |
4794 | \r | |
4795 | n = PyList_GET_SIZE(keys);\r | |
4796 | fprintf(stderr, "releasing %" PY_FORMAT_SIZE_T "d interned strings\n",\r | |
4797 | n);\r | |
4798 | for (i = 0; i < n; i++) {\r | |
4799 | s = (PyStringObject *) PyList_GET_ITEM(keys, i);\r | |
4800 | switch (s->ob_sstate) {\r | |
4801 | case SSTATE_NOT_INTERNED:\r | |
4802 | /* XXX Shouldn't happen */\r | |
4803 | break;\r | |
4804 | case SSTATE_INTERNED_IMMORTAL:\r | |
4805 | Py_REFCNT(s) += 1;\r | |
4806 | immortal_size += Py_SIZE(s);\r | |
4807 | break;\r | |
4808 | case SSTATE_INTERNED_MORTAL:\r | |
4809 | Py_REFCNT(s) += 2;\r | |
4810 | mortal_size += Py_SIZE(s);\r | |
4811 | break;\r | |
4812 | default:\r | |
4813 | Py_FatalError("Inconsistent interned string state.");\r | |
4814 | }\r | |
4815 | s->ob_sstate = SSTATE_NOT_INTERNED;\r | |
4816 | }\r | |
4817 | fprintf(stderr, "total size of all interned strings: "\r | |
4818 | "%" PY_FORMAT_SIZE_T "d/%" PY_FORMAT_SIZE_T "d "\r | |
4819 | "mortal/immortal\n", mortal_size, immortal_size);\r | |
4820 | Py_DECREF(keys);\r | |
4821 | PyDict_Clear(interned);\r | |
4822 | Py_DECREF(interned);\r | |
4823 | interned = NULL;\r | |
4824 | }\r |