1 /* String (str/bytes) object implementation */
3 #define PY_SSIZE_T_CLEAN
10 Py_ssize_t null_strings
, one_strings
;
13 static PyStringObject
*characters
[UCHAR_MAX
+ 1];
14 static PyStringObject
*nullstring
;
16 /* This dictionary holds all interned strings. Note that references to
17 strings in this dictionary are *not* counted in the string's ob_refcnt.
18 When the interned string reaches a refcnt of 0 the string deallocation
19 function will delete the reference from this dictionary.
21 Another way to look at this is that to say that the actual reference
22 count of a string is: s->ob_refcnt + (s->ob_sstate?2:0)
24 static PyObject
*interned
;
26 /* PyStringObject_SIZE gives the basic size of a string; any memory allocation
27 for a string of length n should request PyStringObject_SIZE + n bytes.
29 Using PyStringObject_SIZE instead of sizeof(PyStringObject) saves
30 3 bytes per string allocation on a typical system.
32 #define PyStringObject_SIZE (offsetof(PyStringObject, ob_sval) + 1)
35 For PyString_FromString(), the parameter `str' points to a null-terminated
36 string containing exactly `size' bytes.
38 For PyString_FromStringAndSize(), the parameter the parameter `str' is
39 either NULL or else points to a string containing at least `size' bytes.
40 For PyString_FromStringAndSize(), the string in the `str' parameter does
41 not have to be null-terminated. (Therefore it is safe to construct a
42 substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
43 If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
44 bytes (setting the last byte to the null terminating character) and you can
45 fill in the data yourself. If `str' is non-NULL then the resulting
46 PyString object must be treated as immutable and you must not fill in nor
47 alter the data yourself, since the strings may be shared.
49 The PyObject member `op->ob_size', which denotes the number of "extra
50 items" in a variable-size object, will contain the number of bytes
51 allocated for string data, not counting the null terminating character.
52 It is therefore equal to the `size' parameter (for
53 PyString_FromStringAndSize()) or the length of the string in the `str'
54 parameter (for PyString_FromString()).
57 PyString_FromStringAndSize(const char *str
, Py_ssize_t size
)
59 register PyStringObject
*op
;
61 PyErr_SetString(PyExc_SystemError
,
62 "Negative size passed to PyString_FromStringAndSize");
65 if (size
== 0 && (op
= nullstring
) != NULL
) {
70 return (PyObject
*)op
;
72 if (size
== 1 && str
!= NULL
&&
73 (op
= characters
[*str
& UCHAR_MAX
]) != NULL
)
79 return (PyObject
*)op
;
82 if (size
> PY_SSIZE_T_MAX
- PyStringObject_SIZE
) {
83 PyErr_SetString(PyExc_OverflowError
, "string is too large");
87 /* Inline PyObject_NewVar */
88 op
= (PyStringObject
*)PyObject_MALLOC(PyStringObject_SIZE
+ size
);
90 return PyErr_NoMemory();
91 PyObject_INIT_VAR(op
, &PyString_Type
, size
);
93 op
->ob_sstate
= SSTATE_NOT_INTERNED
;
95 Py_MEMCPY(op
->ob_sval
, str
, size
);
96 op
->ob_sval
[size
] = '\0';
97 /* share short strings */
99 PyObject
*t
= (PyObject
*)op
;
100 PyString_InternInPlace(&t
);
101 op
= (PyStringObject
*)t
;
104 } else if (size
== 1 && str
!= NULL
) {
105 PyObject
*t
= (PyObject
*)op
;
106 PyString_InternInPlace(&t
);
107 op
= (PyStringObject
*)t
;
108 characters
[*str
& UCHAR_MAX
] = op
;
111 return (PyObject
*) op
;
115 PyString_FromString(const char *str
)
117 register size_t size
;
118 register PyStringObject
*op
;
122 if (size
> PY_SSIZE_T_MAX
- PyStringObject_SIZE
) {
123 PyErr_SetString(PyExc_OverflowError
,
124 "string is too long for a Python string");
127 if (size
== 0 && (op
= nullstring
) != NULL
) {
132 return (PyObject
*)op
;
134 if (size
== 1 && (op
= characters
[*str
& UCHAR_MAX
]) != NULL
) {
139 return (PyObject
*)op
;
142 /* Inline PyObject_NewVar */
143 op
= (PyStringObject
*)PyObject_MALLOC(PyStringObject_SIZE
+ size
);
145 return PyErr_NoMemory();
146 PyObject_INIT_VAR(op
, &PyString_Type
, size
);
148 op
->ob_sstate
= SSTATE_NOT_INTERNED
;
149 Py_MEMCPY(op
->ob_sval
, str
, size
+1);
150 /* share short strings */
152 PyObject
*t
= (PyObject
*)op
;
153 PyString_InternInPlace(&t
);
154 op
= (PyStringObject
*)t
;
157 } else if (size
== 1) {
158 PyObject
*t
= (PyObject
*)op
;
159 PyString_InternInPlace(&t
);
160 op
= (PyStringObject
*)t
;
161 characters
[*str
& UCHAR_MAX
] = op
;
164 return (PyObject
*) op
;
168 PyString_FromFormatV(const char *format
, va_list vargs
)
176 #ifdef VA_LIST_IS_ARRAY
177 Py_MEMCPY(count
, vargs
, sizeof(va_list));
180 __va_copy(count
, vargs
);
185 /* step 1: figure out how large a buffer we need */
186 for (f
= format
; *f
; f
++) {
188 #ifdef HAVE_LONG_LONG
189 int longlongflag
= 0;
192 while (*++f
&& *f
!= '%' && !isalpha(Py_CHARMASK(*f
)))
195 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
196 * they don't affect the amount of space we reserve.
199 if (f
[1] == 'd' || f
[1] == 'u') {
202 #ifdef HAVE_LONG_LONG
203 else if (f
[1] == 'l' &&
204 (f
[2] == 'd' || f
[2] == 'u')) {
210 else if (*f
== 'z' && (f
[1] == 'd' || f
[1] == 'u')) {
216 (void)va_arg(count
, int);
217 /* fall through... */
221 case 'd': case 'u': case 'i': case 'x':
222 (void) va_arg(count
, int);
223 #ifdef HAVE_LONG_LONG
225 ceil(log10(256)*SIZEOF_LONG_LONG) digits,
226 plus 1 for the sign. 53/22 is an upper
227 bound for log10(256). */
229 n
+= 2 + (SIZEOF_LONG_LONG
*53-1) / 22;
232 /* 20 bytes is enough to hold a 64-bit
233 integer. Decimal takes the most
234 space. This isn't enough for
240 s
= va_arg(count
, char*);
244 (void) va_arg(count
, int);
245 /* maximum 64-bit pointer representation:
247 * so 19 characters is enough.
248 * XXX I count 18 -- what's the extra for?
253 /* if we stumble upon an unknown
254 formatting code, copy the rest of
255 the format string to the output
256 string. (we cannot just skip the
257 code, since there's no way to know
258 what's in the argument list) */
266 /* step 2: fill the buffer */
267 /* Since we've analyzed how much space we need for the worst case,
268 use sprintf directly instead of the slower PyOS_snprintf. */
269 string
= PyString_FromStringAndSize(NULL
, n
);
273 s
= PyString_AsString(string
);
275 for (f
= format
; *f
; f
++) {
280 #ifdef HAVE_LONG_LONG
281 int longlongflag
= 0;
284 /* parse the width.precision part (we're only
285 interested in the precision value, if any) */
287 while (isdigit(Py_CHARMASK(*f
)))
288 n
= (n
*10) + *f
++ - '0';
292 while (isdigit(Py_CHARMASK(*f
)))
293 n
= (n
*10) + *f
++ - '0';
295 while (*f
&& *f
!= '%' && !isalpha(Py_CHARMASK(*f
)))
297 /* Handle %ld, %lu, %lld and %llu. */
299 if (f
[1] == 'd' || f
[1] == 'u') {
303 #ifdef HAVE_LONG_LONG
304 else if (f
[1] == 'l' &&
305 (f
[2] == 'd' || f
[2] == 'u')) {
311 /* handle the size_t flag. */
312 else if (*f
== 'z' && (f
[1] == 'd' || f
[1] == 'u')) {
319 *s
++ = va_arg(vargs
, int);
323 sprintf(s
, "%ld", va_arg(vargs
, long));
324 #ifdef HAVE_LONG_LONG
325 else if (longlongflag
)
326 sprintf(s
, "%" PY_FORMAT_LONG_LONG
"d",
327 va_arg(vargs
, PY_LONG_LONG
));
330 sprintf(s
, "%" PY_FORMAT_SIZE_T
"d",
331 va_arg(vargs
, Py_ssize_t
));
333 sprintf(s
, "%d", va_arg(vargs
, int));
339 va_arg(vargs
, unsigned long));
340 #ifdef HAVE_LONG_LONG
341 else if (longlongflag
)
342 sprintf(s
, "%" PY_FORMAT_LONG_LONG
"u",
343 va_arg(vargs
, PY_LONG_LONG
));
346 sprintf(s
, "%" PY_FORMAT_SIZE_T
"u",
347 va_arg(vargs
, size_t));
350 va_arg(vargs
, unsigned int));
354 sprintf(s
, "%i", va_arg(vargs
, int));
358 sprintf(s
, "%x", va_arg(vargs
, int));
362 p
= va_arg(vargs
, char*);
370 sprintf(s
, "%p", va_arg(vargs
, void*));
371 /* %p is ill-defined: ensure leading 0x. */
374 else if (s
[1] != 'x') {
375 memmove(s
+2, s
, strlen(s
)+1);
394 if (_PyString_Resize(&string
, s
- PyString_AS_STRING(string
)))
400 PyString_FromFormat(const char *format
, ...)
405 #ifdef HAVE_STDARG_PROTOTYPES
406 va_start(vargs
, format
);
410 ret
= PyString_FromFormatV(format
, vargs
);
416 PyObject
*PyString_Decode(const char *s
,
418 const char *encoding
,
423 str
= PyString_FromStringAndSize(s
, size
);
426 v
= PyString_AsDecodedString(str
, encoding
, errors
);
431 PyObject
*PyString_AsDecodedObject(PyObject
*str
,
432 const char *encoding
,
437 if (!PyString_Check(str
)) {
442 if (encoding
== NULL
) {
443 #ifdef Py_USING_UNICODE
444 encoding
= PyUnicode_GetDefaultEncoding();
446 PyErr_SetString(PyExc_ValueError
, "no encoding specified");
451 /* Decode via the codec registry */
452 v
= PyCodec_Decode(str
, encoding
, errors
);
462 PyObject
*PyString_AsDecodedString(PyObject
*str
,
463 const char *encoding
,
468 v
= PyString_AsDecodedObject(str
, encoding
, errors
);
472 #ifdef Py_USING_UNICODE
473 /* Convert Unicode to a string using the default encoding */
474 if (PyUnicode_Check(v
)) {
476 v
= PyUnicode_AsEncodedString(v
, NULL
, NULL
);
482 if (!PyString_Check(v
)) {
483 PyErr_Format(PyExc_TypeError
,
484 "decoder did not return a string object (type=%.400s)",
485 Py_TYPE(v
)->tp_name
);
496 PyObject
*PyString_Encode(const char *s
,
498 const char *encoding
,
503 str
= PyString_FromStringAndSize(s
, size
);
506 v
= PyString_AsEncodedString(str
, encoding
, errors
);
511 PyObject
*PyString_AsEncodedObject(PyObject
*str
,
512 const char *encoding
,
517 if (!PyString_Check(str
)) {
522 if (encoding
== NULL
) {
523 #ifdef Py_USING_UNICODE
524 encoding
= PyUnicode_GetDefaultEncoding();
526 PyErr_SetString(PyExc_ValueError
, "no encoding specified");
531 /* Encode via the codec registry */
532 v
= PyCodec_Encode(str
, encoding
, errors
);
542 PyObject
*PyString_AsEncodedString(PyObject
*str
,
543 const char *encoding
,
548 v
= PyString_AsEncodedObject(str
, encoding
, errors
);
552 #ifdef Py_USING_UNICODE
553 /* Convert Unicode to a string using the default encoding */
554 if (PyUnicode_Check(v
)) {
556 v
= PyUnicode_AsEncodedString(v
, NULL
, NULL
);
562 if (!PyString_Check(v
)) {
563 PyErr_Format(PyExc_TypeError
,
564 "encoder did not return a string object (type=%.400s)",
565 Py_TYPE(v
)->tp_name
);
577 string_dealloc(PyObject
*op
)
579 switch (PyString_CHECK_INTERNED(op
)) {
580 case SSTATE_NOT_INTERNED
:
583 case SSTATE_INTERNED_MORTAL
:
584 /* revive dead object temporarily for DelItem */
586 if (PyDict_DelItem(interned
, op
) != 0)
588 "deletion of interned string failed");
591 case SSTATE_INTERNED_IMMORTAL
:
592 Py_FatalError("Immortal interned string died.");
595 Py_FatalError("Inconsistent interned string state.");
597 Py_TYPE(op
)->tp_free(op
);
600 /* Unescape a backslash-escaped string. If unicode is non-zero,
601 the string is a u-literal. If recode_encoding is non-zero,
602 the string is UTF-8 encoded and should be re-encoded in the
603 specified encoding. */
605 PyObject
*PyString_DecodeEscape(const char *s
,
609 const char *recode_encoding
)
615 Py_ssize_t newlen
= recode_encoding
? 4*len
:len
;
616 v
= PyString_FromStringAndSize((char *)NULL
, newlen
);
619 p
= buf
= PyString_AsString(v
);
624 #ifdef Py_USING_UNICODE
625 if (recode_encoding
&& (*s
& 0x80)) {
631 /* Decode non-ASCII bytes as UTF-8. */
632 while (t
< end
&& (*t
& 0x80)) t
++;
633 u
= PyUnicode_DecodeUTF8(s
, t
- s
, errors
);
636 /* Recode them in target encoding. */
637 w
= PyUnicode_AsEncodedString(
638 u
, recode_encoding
, errors
);
642 /* Append bytes to output buffer. */
643 assert(PyString_Check(w
));
644 r
= PyString_AS_STRING(w
);
645 rn
= PyString_GET_SIZE(w
);
660 PyErr_SetString(PyExc_ValueError
,
661 "Trailing \\ in string");
665 /* XXX This assumes ASCII! */
667 case '\\': *p
++ = '\\'; break;
668 case '\'': *p
++ = '\''; break;
669 case '\"': *p
++ = '\"'; break;
670 case 'b': *p
++ = '\b'; break;
671 case 'f': *p
++ = '\014'; break; /* FF */
672 case 't': *p
++ = '\t'; break;
673 case 'n': *p
++ = '\n'; break;
674 case 'r': *p
++ = '\r'; break;
675 case 'v': *p
++ = '\013'; break; /* VT */
676 case 'a': *p
++ = '\007'; break; /* BEL, not classic C */
677 case '0': case '1': case '2': case '3':
678 case '4': case '5': case '6': case '7':
680 if (s
< end
&& '0' <= *s
&& *s
<= '7') {
681 c
= (c
<<3) + *s
++ - '0';
682 if (s
< end
&& '0' <= *s
&& *s
<= '7')
683 c
= (c
<<3) + *s
++ - '0';
689 isxdigit(Py_CHARMASK(s
[0])) &&
690 isxdigit(Py_CHARMASK(s
[1])))
713 if (!errors
|| strcmp(errors
, "strict") == 0) {
714 PyErr_SetString(PyExc_ValueError
,
715 "invalid \\x escape");
718 if (strcmp(errors
, "replace") == 0) {
720 } else if (strcmp(errors
, "ignore") == 0)
723 PyErr_Format(PyExc_ValueError
,
725 "unknown error handling code: %.400s",
729 #ifndef Py_USING_UNICODE
734 PyErr_SetString(PyExc_ValueError
,
735 "Unicode escapes not legal "
736 "when Unicode disabled");
743 goto non_esc
; /* an arbitrary number of unescaped
744 UTF-8 bytes may follow. */
747 if (p
-buf
< newlen
&& _PyString_Resize(&v
, p
- buf
))
755 /* -------------------------------------------------------------------- */
759 string_getsize(register PyObject
*op
)
763 if (PyString_AsStringAndSize(op
, &s
, &len
))
768 static /*const*/ char *
769 string_getbuffer(register PyObject
*op
)
773 if (PyString_AsStringAndSize(op
, &s
, &len
))
779 PyString_Size(register PyObject
*op
)
781 if (!PyString_Check(op
))
782 return string_getsize(op
);
787 PyString_AsString(register PyObject
*op
)
789 if (!PyString_Check(op
))
790 return string_getbuffer(op
);
791 return ((PyStringObject
*)op
) -> ob_sval
;
795 PyString_AsStringAndSize(register PyObject
*obj
,
797 register Py_ssize_t
*len
)
800 PyErr_BadInternalCall();
804 if (!PyString_Check(obj
)) {
805 #ifdef Py_USING_UNICODE
806 if (PyUnicode_Check(obj
)) {
807 obj
= _PyUnicode_AsDefaultEncodedString(obj
, NULL
);
814 PyErr_Format(PyExc_TypeError
,
815 "expected string or Unicode object, "
816 "%.200s found", Py_TYPE(obj
)->tp_name
);
821 *s
= PyString_AS_STRING(obj
);
823 *len
= PyString_GET_SIZE(obj
);
824 else if (strlen(*s
) != (size_t)PyString_GET_SIZE(obj
)) {
825 PyErr_SetString(PyExc_TypeError
,
826 "expected string without null bytes");
832 /* -------------------------------------------------------------------- */
835 #include "stringlib/stringdefs.h"
836 #include "stringlib/fastsearch.h"
838 #include "stringlib/count.h"
839 #include "stringlib/find.h"
840 #include "stringlib/partition.h"
841 #include "stringlib/split.h"
843 #define _Py_InsertThousandsGrouping _PyString_InsertThousandsGrouping
844 #include "stringlib/localeutil.h"
849 string_print(PyStringObject
*op
, FILE *fp
, int flags
)
851 Py_ssize_t i
, str_len
;
855 /* XXX Ought to check for interrupts when writing long strings */
856 if (! PyString_CheckExact(op
)) {
858 /* A str subclass may have its own __str__ method. */
859 op
= (PyStringObject
*) PyObject_Str((PyObject
*)op
);
862 ret
= string_print(op
, fp
, flags
);
866 if (flags
& Py_PRINT_RAW
) {
867 char *data
= op
->ob_sval
;
868 Py_ssize_t size
= Py_SIZE(op
);
869 Py_BEGIN_ALLOW_THREADS
870 while (size
> INT_MAX
) {
871 /* Very long strings cannot be written atomically.
872 * But don't write exactly INT_MAX bytes at a time
873 * to avoid memory aligment issues.
875 const int chunk_size
= INT_MAX
& ~0x3FFF;
876 fwrite(data
, 1, chunk_size
, fp
);
881 if (size
) fwrite(data
, (int)size
, 1, fp
);
883 fwrite(data
, 1, (int)size
, fp
);
889 /* figure out which quote to use; single is preferred */
891 if (memchr(op
->ob_sval
, '\'', Py_SIZE(op
)) &&
892 !memchr(op
->ob_sval
, '"', Py_SIZE(op
)))
895 str_len
= Py_SIZE(op
);
896 Py_BEGIN_ALLOW_THREADS
898 for (i
= 0; i
< str_len
; i
++) {
899 /* Since strings are immutable and the caller should have a
900 reference, accessing the interal buffer should not be an issue
901 with the GIL released. */
903 if (c
== quote
|| c
== '\\')
904 fprintf(fp
, "\\%c", c
);
911 else if (c
< ' ' || c
>= 0x7f)
912 fprintf(fp
, "\\x%02x", c
& 0xff);
922 PyString_Repr(PyObject
*obj
, int smartquotes
)
924 register PyStringObject
* op
= (PyStringObject
*) obj
;
925 size_t newsize
= 2 + 4 * Py_SIZE(op
);
927 if (newsize
> PY_SSIZE_T_MAX
|| newsize
/ 4 != Py_SIZE(op
)) {
928 PyErr_SetString(PyExc_OverflowError
,
929 "string is too large to make repr");
932 v
= PyString_FromStringAndSize((char *)NULL
, newsize
);
937 register Py_ssize_t i
;
942 /* figure out which quote to use; single is preferred */
945 memchr(op
->ob_sval
, '\'', Py_SIZE(op
)) &&
946 !memchr(op
->ob_sval
, '"', Py_SIZE(op
)))
949 p
= PyString_AS_STRING(v
);
951 for (i
= 0; i
< Py_SIZE(op
); i
++) {
952 /* There's at least enough room for a hex escape
953 and a closing quote. */
954 assert(newsize
- (p
- PyString_AS_STRING(v
)) >= 5);
956 if (c
== quote
|| c
== '\\')
957 *p
++ = '\\', *p
++ = c
;
959 *p
++ = '\\', *p
++ = 't';
961 *p
++ = '\\', *p
++ = 'n';
963 *p
++ = '\\', *p
++ = 'r';
964 else if (c
< ' ' || c
>= 0x7f) {
965 /* For performance, we don't want to call
966 PyOS_snprintf here (extra layers of
968 sprintf(p
, "\\x%02x", c
& 0xff);
974 assert(newsize
- (p
- PyString_AS_STRING(v
)) >= 1);
977 if (_PyString_Resize(&v
, (p
- PyString_AS_STRING(v
))))
984 string_repr(PyObject
*op
)
986 return PyString_Repr(op
, 1);
990 string_str(PyObject
*s
)
992 assert(PyString_Check(s
));
993 if (PyString_CheckExact(s
)) {
998 /* Subtype -- return genuine string with the same value. */
999 PyStringObject
*t
= (PyStringObject
*) s
;
1000 return PyString_FromStringAndSize(t
->ob_sval
, Py_SIZE(t
));
1005 string_length(PyStringObject
*a
)
1011 string_concat(register PyStringObject
*a
, register PyObject
*bb
)
1013 register Py_ssize_t size
;
1014 register PyStringObject
*op
;
1015 if (!PyString_Check(bb
)) {
1016 #ifdef Py_USING_UNICODE
1017 if (PyUnicode_Check(bb
))
1018 return PyUnicode_Concat((PyObject
*)a
, bb
);
1020 if (PyByteArray_Check(bb
))
1021 return PyByteArray_Concat((PyObject
*)a
, bb
);
1022 PyErr_Format(PyExc_TypeError
,
1023 "cannot concatenate 'str' and '%.200s' objects",
1024 Py_TYPE(bb
)->tp_name
);
1027 #define b ((PyStringObject *)bb)
1028 /* Optimize cases with empty left or right operand */
1029 if ((Py_SIZE(a
) == 0 || Py_SIZE(b
) == 0) &&
1030 PyString_CheckExact(a
) && PyString_CheckExact(b
)) {
1031 if (Py_SIZE(a
) == 0) {
1036 return (PyObject
*)a
;
1038 size
= Py_SIZE(a
) + Py_SIZE(b
);
1039 /* Check that string sizes are not negative, to prevent an
1040 overflow in cases where we are passed incorrectly-created
1041 strings with negative lengths (due to a bug in other code).
1043 if (Py_SIZE(a
) < 0 || Py_SIZE(b
) < 0 ||
1044 Py_SIZE(a
) > PY_SSIZE_T_MAX
- Py_SIZE(b
)) {
1045 PyErr_SetString(PyExc_OverflowError
,
1046 "strings are too large to concat");
1050 /* Inline PyObject_NewVar */
1051 if (size
> PY_SSIZE_T_MAX
- PyStringObject_SIZE
) {
1052 PyErr_SetString(PyExc_OverflowError
,
1053 "strings are too large to concat");
1056 op
= (PyStringObject
*)PyObject_MALLOC(PyStringObject_SIZE
+ size
);
1058 return PyErr_NoMemory();
1059 PyObject_INIT_VAR(op
, &PyString_Type
, size
);
1061 op
->ob_sstate
= SSTATE_NOT_INTERNED
;
1062 Py_MEMCPY(op
->ob_sval
, a
->ob_sval
, Py_SIZE(a
));
1063 Py_MEMCPY(op
->ob_sval
+ Py_SIZE(a
), b
->ob_sval
, Py_SIZE(b
));
1064 op
->ob_sval
[size
] = '\0';
1065 return (PyObject
*) op
;
1070 string_repeat(register PyStringObject
*a
, register Py_ssize_t n
)
1072 register Py_ssize_t i
;
1073 register Py_ssize_t j
;
1074 register Py_ssize_t size
;
1075 register PyStringObject
*op
;
1079 /* watch out for overflows: the size can overflow int,
1080 * and the # of bytes needed can overflow size_t
1082 size
= Py_SIZE(a
) * n
;
1083 if (n
&& size
/ n
!= Py_SIZE(a
)) {
1084 PyErr_SetString(PyExc_OverflowError
,
1085 "repeated string is too long");
1088 if (size
== Py_SIZE(a
) && PyString_CheckExact(a
)) {
1090 return (PyObject
*)a
;
1092 nbytes
= (size_t)size
;
1093 if (nbytes
+ PyStringObject_SIZE
<= nbytes
) {
1094 PyErr_SetString(PyExc_OverflowError
,
1095 "repeated string is too long");
1098 op
= (PyStringObject
*)PyObject_MALLOC(PyStringObject_SIZE
+ nbytes
);
1100 return PyErr_NoMemory();
1101 PyObject_INIT_VAR(op
, &PyString_Type
, size
);
1103 op
->ob_sstate
= SSTATE_NOT_INTERNED
;
1104 op
->ob_sval
[size
] = '\0';
1105 if (Py_SIZE(a
) == 1 && n
> 0) {
1106 memset(op
->ob_sval
, a
->ob_sval
[0] , n
);
1107 return (PyObject
*) op
;
1111 Py_MEMCPY(op
->ob_sval
, a
->ob_sval
, Py_SIZE(a
));
1115 j
= (i
<= size
-i
) ? i
: size
-i
;
1116 Py_MEMCPY(op
->ob_sval
+i
, op
->ob_sval
, j
);
1119 return (PyObject
*) op
;
1122 /* String slice a[i:j] consists of characters a[i] ... a[j-1] */
1125 string_slice(register PyStringObject
*a
, register Py_ssize_t i
,
1126 register Py_ssize_t j
)
1127 /* j -- may be negative! */
1132 j
= 0; /* Avoid signed/unsigned bug in next line */
1135 if (i
== 0 && j
== Py_SIZE(a
) && PyString_CheckExact(a
)) {
1136 /* It's the same as a */
1138 return (PyObject
*)a
;
1142 return PyString_FromStringAndSize(a
->ob_sval
+ i
, j
-i
);
1146 string_contains(PyObject
*str_obj
, PyObject
*sub_obj
)
1148 if (!PyString_CheckExact(sub_obj
)) {
1149 #ifdef Py_USING_UNICODE
1150 if (PyUnicode_Check(sub_obj
))
1151 return PyUnicode_Contains(str_obj
, sub_obj
);
1153 if (!PyString_Check(sub_obj
)) {
1154 PyErr_Format(PyExc_TypeError
,
1155 "'in <string>' requires string as left operand, "
1156 "not %.200s", Py_TYPE(sub_obj
)->tp_name
);
1161 return stringlib_contains_obj(str_obj
, sub_obj
);
1165 string_item(PyStringObject
*a
, register Py_ssize_t i
)
1169 if (i
< 0 || i
>= Py_SIZE(a
)) {
1170 PyErr_SetString(PyExc_IndexError
, "string index out of range");
1173 pchar
= a
->ob_sval
[i
];
1174 v
= (PyObject
*)characters
[pchar
& UCHAR_MAX
];
1176 v
= PyString_FromStringAndSize(&pchar
, 1);
1187 string_richcompare(PyStringObject
*a
, PyStringObject
*b
, int op
)
1190 Py_ssize_t len_a
, len_b
;
1194 /* Make sure both arguments are strings. */
1195 if (!(PyString_Check(a
) && PyString_Check(b
))) {
1196 result
= Py_NotImplemented
;
1201 case Py_EQ
:case Py_LE
:case Py_GE
:
1204 case Py_NE
:case Py_LT
:case Py_GT
:
1210 /* Supporting Py_NE here as well does not save
1211 much time, since Py_NE is rarely used. */
1212 if (Py_SIZE(a
) == Py_SIZE(b
)
1213 && (a
->ob_sval
[0] == b
->ob_sval
[0]
1214 && memcmp(a
->ob_sval
, b
->ob_sval
, Py_SIZE(a
)) == 0)) {
1221 len_a
= Py_SIZE(a
); len_b
= Py_SIZE(b
);
1222 min_len
= (len_a
< len_b
) ? len_a
: len_b
;
1224 c
= Py_CHARMASK(*a
->ob_sval
) - Py_CHARMASK(*b
->ob_sval
);
1226 c
= memcmp(a
->ob_sval
, b
->ob_sval
, min_len
);
1230 c
= (len_a
< len_b
) ? -1 : (len_a
> len_b
) ? 1 : 0;
1232 case Py_LT
: c
= c
< 0; break;
1233 case Py_LE
: c
= c
<= 0; break;
1234 case Py_EQ
: assert(0); break; /* unreachable */
1235 case Py_NE
: c
= c
!= 0; break;
1236 case Py_GT
: c
= c
> 0; break;
1237 case Py_GE
: c
= c
>= 0; break;
1239 result
= Py_NotImplemented
;
1242 result
= c
? Py_True
: Py_False
;
1249 _PyString_Eq(PyObject
*o1
, PyObject
*o2
)
1251 PyStringObject
*a
= (PyStringObject
*) o1
;
1252 PyStringObject
*b
= (PyStringObject
*) o2
;
1253 return Py_SIZE(a
) == Py_SIZE(b
)
1254 && *a
->ob_sval
== *b
->ob_sval
1255 && memcmp(a
->ob_sval
, b
->ob_sval
, Py_SIZE(a
)) == 0;
1259 string_hash(PyStringObject
*a
)
1261 register Py_ssize_t len
;
1262 register unsigned char *p
;
1265 if (a
->ob_shash
!= -1)
1268 p
= (unsigned char *) a
->ob_sval
;
1271 x
= (1000003*x
) ^ *p
++;
1280 string_subscript(PyStringObject
* self
, PyObject
* item
)
1282 if (PyIndex_Check(item
)) {
1283 Py_ssize_t i
= PyNumber_AsSsize_t(item
, PyExc_IndexError
);
1284 if (i
== -1 && PyErr_Occurred())
1287 i
+= PyString_GET_SIZE(self
);
1288 return string_item(self
, i
);
1290 else if (PySlice_Check(item
)) {
1291 Py_ssize_t start
, stop
, step
, slicelength
, cur
, i
;
1296 if (PySlice_GetIndicesEx((PySliceObject
*)item
,
1297 PyString_GET_SIZE(self
),
1298 &start
, &stop
, &step
, &slicelength
) < 0) {
1302 if (slicelength
<= 0) {
1303 return PyString_FromStringAndSize("", 0);
1305 else if (start
== 0 && step
== 1 &&
1306 slicelength
== PyString_GET_SIZE(self
) &&
1307 PyString_CheckExact(self
)) {
1309 return (PyObject
*)self
;
1311 else if (step
== 1) {
1312 return PyString_FromStringAndSize(
1313 PyString_AS_STRING(self
) + start
,
1317 source_buf
= PyString_AsString((PyObject
*)self
);
1318 result_buf
= (char *)PyMem_Malloc(slicelength
);
1319 if (result_buf
== NULL
)
1320 return PyErr_NoMemory();
1322 for (cur
= start
, i
= 0; i
< slicelength
;
1324 result_buf
[i
] = source_buf
[cur
];
1327 result
= PyString_FromStringAndSize(result_buf
,
1329 PyMem_Free(result_buf
);
1334 PyErr_Format(PyExc_TypeError
,
1335 "string indices must be integers, not %.200s",
1336 Py_TYPE(item
)->tp_name
);
1342 string_buffer_getreadbuf(PyStringObject
*self
, Py_ssize_t index
, const void **ptr
)
1345 PyErr_SetString(PyExc_SystemError
,
1346 "accessing non-existent string segment");
1349 *ptr
= (void *)self
->ob_sval
;
1350 return Py_SIZE(self
);
1354 string_buffer_getwritebuf(PyStringObject
*self
, Py_ssize_t index
, const void **ptr
)
1356 PyErr_SetString(PyExc_TypeError
,
1357 "Cannot use string as modifiable buffer");
1362 string_buffer_getsegcount(PyStringObject
*self
, Py_ssize_t
*lenp
)
1365 *lenp
= Py_SIZE(self
);
1370 string_buffer_getcharbuf(PyStringObject
*self
, Py_ssize_t index
, const char **ptr
)
1373 PyErr_SetString(PyExc_SystemError
,
1374 "accessing non-existent string segment");
1377 *ptr
= self
->ob_sval
;
1378 return Py_SIZE(self
);
1382 string_buffer_getbuffer(PyStringObject
*self
, Py_buffer
*view
, int flags
)
1384 return PyBuffer_FillInfo(view
, (PyObject
*)self
,
1385 (void *)self
->ob_sval
, Py_SIZE(self
),
1389 static PySequenceMethods string_as_sequence
= {
1390 (lenfunc
)string_length
, /*sq_length*/
1391 (binaryfunc
)string_concat
, /*sq_concat*/
1392 (ssizeargfunc
)string_repeat
, /*sq_repeat*/
1393 (ssizeargfunc
)string_item
, /*sq_item*/
1394 (ssizessizeargfunc
)string_slice
, /*sq_slice*/
1397 (objobjproc
)string_contains
/*sq_contains*/
1400 static PyMappingMethods string_as_mapping
= {
1401 (lenfunc
)string_length
,
1402 (binaryfunc
)string_subscript
,
1406 static PyBufferProcs string_as_buffer
= {
1407 (readbufferproc
)string_buffer_getreadbuf
,
1408 (writebufferproc
)string_buffer_getwritebuf
,
1409 (segcountproc
)string_buffer_getsegcount
,
1410 (charbufferproc
)string_buffer_getcharbuf
,
1411 (getbufferproc
)string_buffer_getbuffer
,
1418 #define RIGHTSTRIP 1
1421 /* Arrays indexed by above */
1422 static const char *stripformat
[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1424 #define STRIPNAME(i) (stripformat[i]+3)
1426 PyDoc_STRVAR(split__doc__
,
1427 "S.split([sep [,maxsplit]]) -> list of strings\n\
1429 Return a list of the words in the string S, using sep as the\n\
1430 delimiter string. If maxsplit is given, at most maxsplit\n\
1431 splits are done. If sep is not specified or is None, any\n\
1432 whitespace string is a separator and empty strings are removed\n\
1436 string_split(PyStringObject
*self
, PyObject
*args
)
1438 Py_ssize_t len
= PyString_GET_SIZE(self
), n
;
1439 Py_ssize_t maxsplit
= -1;
1440 const char *s
= PyString_AS_STRING(self
), *sub
;
1441 PyObject
*subobj
= Py_None
;
1443 if (!PyArg_ParseTuple(args
, "|On:split", &subobj
, &maxsplit
))
1446 maxsplit
= PY_SSIZE_T_MAX
;
1447 if (subobj
== Py_None
)
1448 return stringlib_split_whitespace((PyObject
*) self
, s
, len
, maxsplit
);
1449 if (PyString_Check(subobj
)) {
1450 sub
= PyString_AS_STRING(subobj
);
1451 n
= PyString_GET_SIZE(subobj
);
1453 #ifdef Py_USING_UNICODE
1454 else if (PyUnicode_Check(subobj
))
1455 return PyUnicode_Split((PyObject
*)self
, subobj
, maxsplit
);
1457 else if (PyObject_AsCharBuffer(subobj
, &sub
, &n
))
1460 return stringlib_split((PyObject
*) self
, s
, len
, sub
, n
, maxsplit
);
1463 PyDoc_STRVAR(partition__doc__
,
1464 "S.partition(sep) -> (head, sep, tail)\n\
1466 Search for the separator sep in S, and return the part before it,\n\
1467 the separator itself, and the part after it. If the separator is not\n\
1468 found, return S and two empty strings.");
1471 string_partition(PyStringObject
*self
, PyObject
*sep_obj
)
1476 if (PyString_Check(sep_obj
)) {
1477 sep
= PyString_AS_STRING(sep_obj
);
1478 sep_len
= PyString_GET_SIZE(sep_obj
);
1480 #ifdef Py_USING_UNICODE
1481 else if (PyUnicode_Check(sep_obj
))
1482 return PyUnicode_Partition((PyObject
*) self
, sep_obj
);
1484 else if (PyObject_AsCharBuffer(sep_obj
, &sep
, &sep_len
))
1487 return stringlib_partition(
1489 PyString_AS_STRING(self
), PyString_GET_SIZE(self
),
1490 sep_obj
, sep
, sep_len
1494 PyDoc_STRVAR(rpartition__doc__
,
1495 "S.rpartition(sep) -> (head, sep, tail)\n\
1497 Search for the separator sep in S, starting at the end of S, and return\n\
1498 the part before it, the separator itself, and the part after it. If the\n\
1499 separator is not found, return two empty strings and S.");
1502 string_rpartition(PyStringObject
*self
, PyObject
*sep_obj
)
1507 if (PyString_Check(sep_obj
)) {
1508 sep
= PyString_AS_STRING(sep_obj
);
1509 sep_len
= PyString_GET_SIZE(sep_obj
);
1511 #ifdef Py_USING_UNICODE
1512 else if (PyUnicode_Check(sep_obj
))
1513 return PyUnicode_RPartition((PyObject
*) self
, sep_obj
);
1515 else if (PyObject_AsCharBuffer(sep_obj
, &sep
, &sep_len
))
1518 return stringlib_rpartition(
1520 PyString_AS_STRING(self
), PyString_GET_SIZE(self
),
1521 sep_obj
, sep
, sep_len
1525 PyDoc_STRVAR(rsplit__doc__
,
1526 "S.rsplit([sep [,maxsplit]]) -> list of strings\n\
1528 Return a list of the words in the string S, using sep as the\n\
1529 delimiter string, starting at the end of the string and working\n\
1530 to the front. If maxsplit is given, at most maxsplit splits are\n\
1531 done. If sep is not specified or is None, any whitespace string\n\
1535 string_rsplit(PyStringObject
*self
, PyObject
*args
)
1537 Py_ssize_t len
= PyString_GET_SIZE(self
), n
;
1538 Py_ssize_t maxsplit
= -1;
1539 const char *s
= PyString_AS_STRING(self
), *sub
;
1540 PyObject
*subobj
= Py_None
;
1542 if (!PyArg_ParseTuple(args
, "|On:rsplit", &subobj
, &maxsplit
))
1545 maxsplit
= PY_SSIZE_T_MAX
;
1546 if (subobj
== Py_None
)
1547 return stringlib_rsplit_whitespace((PyObject
*) self
, s
, len
, maxsplit
);
1548 if (PyString_Check(subobj
)) {
1549 sub
= PyString_AS_STRING(subobj
);
1550 n
= PyString_GET_SIZE(subobj
);
1552 #ifdef Py_USING_UNICODE
1553 else if (PyUnicode_Check(subobj
))
1554 return PyUnicode_RSplit((PyObject
*)self
, subobj
, maxsplit
);
1556 else if (PyObject_AsCharBuffer(subobj
, &sub
, &n
))
1559 return stringlib_rsplit((PyObject
*) self
, s
, len
, sub
, n
, maxsplit
);
1563 PyDoc_STRVAR(join__doc__
,
1564 "S.join(iterable) -> string\n\
1566 Return a string which is the concatenation of the strings in the\n\
1567 iterable. The separator between elements is S.");
1570 string_join(PyStringObject
*self
, PyObject
*orig
)
1572 char *sep
= PyString_AS_STRING(self
);
1573 const Py_ssize_t seplen
= PyString_GET_SIZE(self
);
1574 PyObject
*res
= NULL
;
1576 Py_ssize_t seqlen
= 0;
1579 PyObject
*seq
, *item
;
1581 seq
= PySequence_Fast(orig
, "");
1586 seqlen
= PySequence_Size(seq
);
1589 return PyString_FromString("");
1592 item
= PySequence_Fast_GET_ITEM(seq
, 0);
1593 if (PyString_CheckExact(item
) || PyUnicode_CheckExact(item
)) {
1600 /* There are at least two things to join, or else we have a subclass
1601 * of the builtin types in the sequence.
1602 * Do a pre-pass to figure out the total amount of space we'll
1603 * need (sz), see whether any argument is absurd, and defer to
1604 * the Unicode join if appropriate.
1606 for (i
= 0; i
< seqlen
; i
++) {
1607 const size_t old_sz
= sz
;
1608 item
= PySequence_Fast_GET_ITEM(seq
, i
);
1609 if (!PyString_Check(item
)){
1610 #ifdef Py_USING_UNICODE
1611 if (PyUnicode_Check(item
)) {
1612 /* Defer to Unicode join.
1613 * CAUTION: There's no gurantee that the
1614 * original sequence can be iterated over
1615 * again, so we must pass seq here.
1618 result
= PyUnicode_Join((PyObject
*)self
, seq
);
1623 PyErr_Format(PyExc_TypeError
,
1624 "sequence item %zd: expected string,"
1626 i
, Py_TYPE(item
)->tp_name
);
1630 sz
+= PyString_GET_SIZE(item
);
1633 if (sz
< old_sz
|| sz
> PY_SSIZE_T_MAX
) {
1634 PyErr_SetString(PyExc_OverflowError
,
1635 "join() result is too long for a Python string");
1641 /* Allocate result space. */
1642 res
= PyString_FromStringAndSize((char*)NULL
, sz
);
1648 /* Catenate everything. */
1649 p
= PyString_AS_STRING(res
);
1650 for (i
= 0; i
< seqlen
; ++i
) {
1652 item
= PySequence_Fast_GET_ITEM(seq
, i
);
1653 n
= PyString_GET_SIZE(item
);
1654 Py_MEMCPY(p
, PyString_AS_STRING(item
), n
);
1656 if (i
< seqlen
- 1) {
1657 Py_MEMCPY(p
, sep
, seplen
);
1667 _PyString_Join(PyObject
*sep
, PyObject
*x
)
1669 assert(sep
!= NULL
&& PyString_Check(sep
));
1671 return string_join((PyStringObject
*)sep
, x
);
1674 /* helper macro to fixup start/end slice values */
1675 #define ADJUST_INDICES(start, end, len) \
1678 else if (end < 0) { \
1689 Py_LOCAL_INLINE(Py_ssize_t
)
1690 string_find_internal(PyStringObject
*self
, PyObject
*args
, int dir
)
1695 Py_ssize_t start
=0, end
=PY_SSIZE_T_MAX
;
1697 if (!stringlib_parse_args_finds("find/rfind/index/rindex",
1698 args
, &subobj
, &start
, &end
))
1701 if (PyString_Check(subobj
)) {
1702 sub
= PyString_AS_STRING(subobj
);
1703 sub_len
= PyString_GET_SIZE(subobj
);
1705 #ifdef Py_USING_UNICODE
1706 else if (PyUnicode_Check(subobj
))
1707 return PyUnicode_Find(
1708 (PyObject
*)self
, subobj
, start
, end
, dir
);
1710 else if (PyObject_AsCharBuffer(subobj
, &sub
, &sub_len
))
1711 /* XXX - the "expected a character buffer object" is pretty
1712 confusing for a non-expert. remap to something else ? */
1716 return stringlib_find_slice(
1717 PyString_AS_STRING(self
), PyString_GET_SIZE(self
),
1718 sub
, sub_len
, start
, end
);
1720 return stringlib_rfind_slice(
1721 PyString_AS_STRING(self
), PyString_GET_SIZE(self
),
1722 sub
, sub_len
, start
, end
);
1726 PyDoc_STRVAR(find__doc__
,
1727 "S.find(sub [,start [,end]]) -> int\n\
1729 Return the lowest index in S where substring sub is found,\n\
1730 such that sub is contained within s[start:end]. Optional\n\
1731 arguments start and end are interpreted as in slice notation.\n\
1733 Return -1 on failure.");
1736 string_find(PyStringObject
*self
, PyObject
*args
)
1738 Py_ssize_t result
= string_find_internal(self
, args
, +1);
1741 return PyInt_FromSsize_t(result
);
1745 PyDoc_STRVAR(index__doc__
,
1746 "S.index(sub [,start [,end]]) -> int\n\
1748 Like S.find() but raise ValueError when the substring is not found.");
1751 string_index(PyStringObject
*self
, PyObject
*args
)
1753 Py_ssize_t result
= string_find_internal(self
, args
, +1);
1757 PyErr_SetString(PyExc_ValueError
,
1758 "substring not found");
1761 return PyInt_FromSsize_t(result
);
1765 PyDoc_STRVAR(rfind__doc__
,
1766 "S.rfind(sub [,start [,end]]) -> int\n\
1768 Return the highest index in S where substring sub is found,\n\
1769 such that sub is contained within s[start:end]. Optional\n\
1770 arguments start and end are interpreted as in slice notation.\n\
1772 Return -1 on failure.");
1775 string_rfind(PyStringObject
*self
, PyObject
*args
)
1777 Py_ssize_t result
= string_find_internal(self
, args
, -1);
1780 return PyInt_FromSsize_t(result
);
1784 PyDoc_STRVAR(rindex__doc__
,
1785 "S.rindex(sub [,start [,end]]) -> int\n\
1787 Like S.rfind() but raise ValueError when the substring is not found.");
1790 string_rindex(PyStringObject
*self
, PyObject
*args
)
1792 Py_ssize_t result
= string_find_internal(self
, args
, -1);
1796 PyErr_SetString(PyExc_ValueError
,
1797 "substring not found");
1800 return PyInt_FromSsize_t(result
);
1804 Py_LOCAL_INLINE(PyObject
*)
1805 do_xstrip(PyStringObject
*self
, int striptype
, PyObject
*sepobj
)
1807 char *s
= PyString_AS_STRING(self
);
1808 Py_ssize_t len
= PyString_GET_SIZE(self
);
1809 char *sep
= PyString_AS_STRING(sepobj
);
1810 Py_ssize_t seplen
= PyString_GET_SIZE(sepobj
);
1814 if (striptype
!= RIGHTSTRIP
) {
1815 while (i
< len
&& memchr(sep
, Py_CHARMASK(s
[i
]), seplen
)) {
1821 if (striptype
!= LEFTSTRIP
) {
1824 } while (j
>= i
&& memchr(sep
, Py_CHARMASK(s
[j
]), seplen
));
1828 if (i
== 0 && j
== len
&& PyString_CheckExact(self
)) {
1830 return (PyObject
*)self
;
1833 return PyString_FromStringAndSize(s
+i
, j
-i
);
1837 Py_LOCAL_INLINE(PyObject
*)
1838 do_strip(PyStringObject
*self
, int striptype
)
1840 char *s
= PyString_AS_STRING(self
);
1841 Py_ssize_t len
= PyString_GET_SIZE(self
), i
, j
;
1844 if (striptype
!= RIGHTSTRIP
) {
1845 while (i
< len
&& isspace(Py_CHARMASK(s
[i
]))) {
1851 if (striptype
!= LEFTSTRIP
) {
1854 } while (j
>= i
&& isspace(Py_CHARMASK(s
[j
])));
1858 if (i
== 0 && j
== len
&& PyString_CheckExact(self
)) {
1860 return (PyObject
*)self
;
1863 return PyString_FromStringAndSize(s
+i
, j
-i
);
1867 Py_LOCAL_INLINE(PyObject
*)
1868 do_argstrip(PyStringObject
*self
, int striptype
, PyObject
*args
)
1870 PyObject
*sep
= NULL
;
1872 if (!PyArg_ParseTuple(args
, (char *)stripformat
[striptype
], &sep
))
1875 if (sep
!= NULL
&& sep
!= Py_None
) {
1876 if (PyString_Check(sep
))
1877 return do_xstrip(self
, striptype
, sep
);
1878 #ifdef Py_USING_UNICODE
1879 else if (PyUnicode_Check(sep
)) {
1880 PyObject
*uniself
= PyUnicode_FromObject((PyObject
*)self
);
1884 res
= _PyUnicode_XStrip((PyUnicodeObject
*)uniself
,
1890 PyErr_Format(PyExc_TypeError
,
1891 #ifdef Py_USING_UNICODE
1892 "%s arg must be None, str or unicode",
1894 "%s arg must be None or str",
1896 STRIPNAME(striptype
));
1900 return do_strip(self
, striptype
);
1904 PyDoc_STRVAR(strip__doc__
,
1905 "S.strip([chars]) -> string or unicode\n\
1907 Return a copy of the string S with leading and trailing\n\
1908 whitespace removed.\n\
1909 If chars is given and not None, remove characters in chars instead.\n\
1910 If chars is unicode, S will be converted to unicode before stripping");
1913 string_strip(PyStringObject
*self
, PyObject
*args
)
1915 if (PyTuple_GET_SIZE(args
) == 0)
1916 return do_strip(self
, BOTHSTRIP
); /* Common case */
1918 return do_argstrip(self
, BOTHSTRIP
, args
);
1922 PyDoc_STRVAR(lstrip__doc__
,
1923 "S.lstrip([chars]) -> string or unicode\n\
1925 Return a copy of the string S with leading whitespace removed.\n\
1926 If chars is given and not None, remove characters in chars instead.\n\
1927 If chars is unicode, S will be converted to unicode before stripping");
1930 string_lstrip(PyStringObject
*self
, PyObject
*args
)
1932 if (PyTuple_GET_SIZE(args
) == 0)
1933 return do_strip(self
, LEFTSTRIP
); /* Common case */
1935 return do_argstrip(self
, LEFTSTRIP
, args
);
1939 PyDoc_STRVAR(rstrip__doc__
,
1940 "S.rstrip([chars]) -> string or unicode\n\
1942 Return a copy of the string S with trailing whitespace removed.\n\
1943 If chars is given and not None, remove characters in chars instead.\n\
1944 If chars is unicode, S will be converted to unicode before stripping");
1947 string_rstrip(PyStringObject
*self
, PyObject
*args
)
1949 if (PyTuple_GET_SIZE(args
) == 0)
1950 return do_strip(self
, RIGHTSTRIP
); /* Common case */
1952 return do_argstrip(self
, RIGHTSTRIP
, args
);
1956 PyDoc_STRVAR(lower__doc__
,
1957 "S.lower() -> string\n\
1959 Return a copy of the string S converted to lowercase.");
1961 /* _tolower and _toupper are defined by SUSv2, but they're not ISO C */
1963 #define _tolower tolower
1967 string_lower(PyStringObject
*self
)
1970 Py_ssize_t i
, n
= PyString_GET_SIZE(self
);
1973 newobj
= PyString_FromStringAndSize(NULL
, n
);
1977 s
= PyString_AS_STRING(newobj
);
1979 Py_MEMCPY(s
, PyString_AS_STRING(self
), n
);
1981 for (i
= 0; i
< n
; i
++) {
1982 int c
= Py_CHARMASK(s
[i
]);
1990 PyDoc_STRVAR(upper__doc__
,
1991 "S.upper() -> string\n\
1993 Return a copy of the string S converted to uppercase.");
1996 #define _toupper toupper
2000 string_upper(PyStringObject
*self
)
2003 Py_ssize_t i
, n
= PyString_GET_SIZE(self
);
2006 newobj
= PyString_FromStringAndSize(NULL
, n
);
2010 s
= PyString_AS_STRING(newobj
);
2012 Py_MEMCPY(s
, PyString_AS_STRING(self
), n
);
2014 for (i
= 0; i
< n
; i
++) {
2015 int c
= Py_CHARMASK(s
[i
]);
2023 PyDoc_STRVAR(title__doc__
,
2024 "S.title() -> string\n\
2026 Return a titlecased version of S, i.e. words start with uppercase\n\
2027 characters, all remaining cased characters have lowercase.");
2030 string_title(PyStringObject
*self
)
2032 char *s
= PyString_AS_STRING(self
), *s_new
;
2033 Py_ssize_t i
, n
= PyString_GET_SIZE(self
);
2034 int previous_is_cased
= 0;
2037 newobj
= PyString_FromStringAndSize(NULL
, n
);
2040 s_new
= PyString_AsString(newobj
);
2041 for (i
= 0; i
< n
; i
++) {
2042 int c
= Py_CHARMASK(*s
++);
2044 if (!previous_is_cased
)
2046 previous_is_cased
= 1;
2047 } else if (isupper(c
)) {
2048 if (previous_is_cased
)
2050 previous_is_cased
= 1;
2052 previous_is_cased
= 0;
2058 PyDoc_STRVAR(capitalize__doc__
,
2059 "S.capitalize() -> string\n\
2061 Return a copy of the string S with only its first character\n\
2065 string_capitalize(PyStringObject
*self
)
2067 char *s
= PyString_AS_STRING(self
), *s_new
;
2068 Py_ssize_t i
, n
= PyString_GET_SIZE(self
);
2071 newobj
= PyString_FromStringAndSize(NULL
, n
);
2074 s_new
= PyString_AsString(newobj
);
2076 int c
= Py_CHARMASK(*s
++);
2078 *s_new
= toupper(c
);
2083 for (i
= 1; i
< n
; i
++) {
2084 int c
= Py_CHARMASK(*s
++);
2086 *s_new
= tolower(c
);
2095 PyDoc_STRVAR(count__doc__
,
2096 "S.count(sub[, start[, end]]) -> int\n\
2098 Return the number of non-overlapping occurrences of substring sub in\n\
2099 string S[start:end]. Optional arguments start and end are interpreted\n\
2100 as in slice notation.");
2103 string_count(PyStringObject
*self
, PyObject
*args
)
2106 const char *str
= PyString_AS_STRING(self
), *sub
;
2108 Py_ssize_t start
= 0, end
= PY_SSIZE_T_MAX
;
2110 if (!stringlib_parse_args_finds("count", args
, &sub_obj
, &start
, &end
))
2113 if (PyString_Check(sub_obj
)) {
2114 sub
= PyString_AS_STRING(sub_obj
);
2115 sub_len
= PyString_GET_SIZE(sub_obj
);
2117 #ifdef Py_USING_UNICODE
2118 else if (PyUnicode_Check(sub_obj
)) {
2120 count
= PyUnicode_Count((PyObject
*)self
, sub_obj
, start
, end
);
2124 return PyInt_FromSsize_t(count
);
2127 else if (PyObject_AsCharBuffer(sub_obj
, &sub
, &sub_len
))
2130 ADJUST_INDICES(start
, end
, PyString_GET_SIZE(self
));
2132 return PyInt_FromSsize_t(
2133 stringlib_count(str
+ start
, end
- start
, sub
, sub_len
, PY_SSIZE_T_MAX
)
2137 PyDoc_STRVAR(swapcase__doc__
,
2138 "S.swapcase() -> string\n\
2140 Return a copy of the string S with uppercase characters\n\
2141 converted to lowercase and vice versa.");
2144 string_swapcase(PyStringObject
*self
)
2146 char *s
= PyString_AS_STRING(self
), *s_new
;
2147 Py_ssize_t i
, n
= PyString_GET_SIZE(self
);
2150 newobj
= PyString_FromStringAndSize(NULL
, n
);
2153 s_new
= PyString_AsString(newobj
);
2154 for (i
= 0; i
< n
; i
++) {
2155 int c
= Py_CHARMASK(*s
++);
2157 *s_new
= toupper(c
);
2159 else if (isupper(c
)) {
2160 *s_new
= tolower(c
);
2170 PyDoc_STRVAR(translate__doc__
,
2171 "S.translate(table [,deletechars]) -> string\n\
2173 Return a copy of the string S, where all characters occurring\n\
2174 in the optional argument deletechars are removed, and the\n\
2175 remaining characters have been mapped through the given\n\
2176 translation table, which must be a string of length 256.");
2179 string_translate(PyStringObject
*self
, PyObject
*args
)
2181 register char *input
, *output
;
2183 register Py_ssize_t i
, c
, changed
= 0;
2184 PyObject
*input_obj
= (PyObject
*)self
;
2185 const char *output_start
, *del_table
=NULL
;
2186 Py_ssize_t inlen
, tablen
, dellen
= 0;
2188 int trans_table
[256];
2189 PyObject
*tableobj
, *delobj
= NULL
;
2191 if (!PyArg_UnpackTuple(args
, "translate", 1, 2,
2192 &tableobj
, &delobj
))
2195 if (PyString_Check(tableobj
)) {
2196 table
= PyString_AS_STRING(tableobj
);
2197 tablen
= PyString_GET_SIZE(tableobj
);
2199 else if (tableobj
== Py_None
) {
2203 #ifdef Py_USING_UNICODE
2204 else if (PyUnicode_Check(tableobj
)) {
2205 /* Unicode .translate() does not support the deletechars
2206 parameter; instead a mapping to None will cause characters
2208 if (delobj
!= NULL
) {
2209 PyErr_SetString(PyExc_TypeError
,
2210 "deletions are implemented differently for unicode");
2213 return PyUnicode_Translate((PyObject
*)self
, tableobj
, NULL
);
2216 else if (PyObject_AsCharBuffer(tableobj
, &table
, &tablen
))
2219 if (tablen
!= 256) {
2220 PyErr_SetString(PyExc_ValueError
,
2221 "translation table must be 256 characters long");
2225 if (delobj
!= NULL
) {
2226 if (PyString_Check(delobj
)) {
2227 del_table
= PyString_AS_STRING(delobj
);
2228 dellen
= PyString_GET_SIZE(delobj
);
2230 #ifdef Py_USING_UNICODE
2231 else if (PyUnicode_Check(delobj
)) {
2232 PyErr_SetString(PyExc_TypeError
,
2233 "deletions are implemented differently for unicode");
2237 else if (PyObject_AsCharBuffer(delobj
, &del_table
, &dellen
))
2245 inlen
= PyString_GET_SIZE(input_obj
);
2246 result
= PyString_FromStringAndSize((char *)NULL
, inlen
);
2249 output_start
= output
= PyString_AsString(result
);
2250 input
= PyString_AS_STRING(input_obj
);
2252 if (dellen
== 0 && table
!= NULL
) {
2253 /* If no deletions are required, use faster code */
2254 for (i
= inlen
; --i
>= 0; ) {
2255 c
= Py_CHARMASK(*input
++);
2256 if (Py_CHARMASK((*output
++ = table
[c
])) != c
)
2259 if (changed
|| !PyString_CheckExact(input_obj
))
2262 Py_INCREF(input_obj
);
2266 if (table
== NULL
) {
2267 for (i
= 0; i
< 256; i
++)
2268 trans_table
[i
] = Py_CHARMASK(i
);
2270 for (i
= 0; i
< 256; i
++)
2271 trans_table
[i
] = Py_CHARMASK(table
[i
]);
2274 for (i
= 0; i
< dellen
; i
++)
2275 trans_table
[(int) Py_CHARMASK(del_table
[i
])] = -1;
2277 for (i
= inlen
; --i
>= 0; ) {
2278 c
= Py_CHARMASK(*input
++);
2279 if (trans_table
[c
] != -1)
2280 if (Py_CHARMASK(*output
++ = (char)trans_table
[c
]) == c
)
2284 if (!changed
&& PyString_CheckExact(input_obj
)) {
2286 Py_INCREF(input_obj
);
2289 /* Fix the size of the resulting string */
2290 if (inlen
> 0 && _PyString_Resize(&result
, output
- output_start
))
2296 /* find and count characters and substrings */
2298 #define findchar(target, target_len, c) \
2299 ((char *)memchr((const void *)(target), c, target_len))
2301 /* String ops must return a string. */
2302 /* If the object is subclass of string, create a copy */
2303 Py_LOCAL(PyStringObject
*)
2304 return_self(PyStringObject
*self
)
2306 if (PyString_CheckExact(self
)) {
2310 return (PyStringObject
*)PyString_FromStringAndSize(
2311 PyString_AS_STRING(self
),
2312 PyString_GET_SIZE(self
));
2315 Py_LOCAL_INLINE(Py_ssize_t
)
2316 countchar(const char *target
, int target_len
, char c
, Py_ssize_t maxcount
)
2319 const char *start
=target
;
2320 const char *end
=target
+target_len
;
2322 while ( (start
=findchar(start
, end
-start
, c
)) != NULL
) {
2324 if (count
>= maxcount
)
2332 /* Algorithms for different cases of string replacement */
2334 /* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
2335 Py_LOCAL(PyStringObject
*)
2336 replace_interleave(PyStringObject
*self
,
2337 const char *to_s
, Py_ssize_t to_len
,
2338 Py_ssize_t maxcount
)
2340 char *self_s
, *result_s
;
2341 Py_ssize_t self_len
, result_len
;
2342 Py_ssize_t count
, i
, product
;
2343 PyStringObject
*result
;
2345 self_len
= PyString_GET_SIZE(self
);
2347 /* 1 at the end plus 1 after every character */
2349 if (maxcount
< count
)
2352 /* Check for overflow */
2353 /* result_len = count * to_len + self_len; */
2354 product
= count
* to_len
;
2355 if (product
/ to_len
!= count
) {
2356 PyErr_SetString(PyExc_OverflowError
,
2357 "replace string is too long");
2360 result_len
= product
+ self_len
;
2361 if (result_len
< 0) {
2362 PyErr_SetString(PyExc_OverflowError
,
2363 "replace string is too long");
2367 if (! (result
= (PyStringObject
*)
2368 PyString_FromStringAndSize(NULL
, result_len
)) )
2371 self_s
= PyString_AS_STRING(self
);
2372 result_s
= PyString_AS_STRING(result
);
2374 /* TODO: special case single character, which doesn't need memcpy */
2376 /* Lay the first one down (guaranteed this will occur) */
2377 Py_MEMCPY(result_s
, to_s
, to_len
);
2381 for (i
=0; i
<count
; i
++) {
2382 *result_s
++ = *self_s
++;
2383 Py_MEMCPY(result_s
, to_s
, to_len
);
2387 /* Copy the rest of the original string */
2388 Py_MEMCPY(result_s
, self_s
, self_len
-i
);
2393 /* Special case for deleting a single character */
2394 /* len(self)>=1, len(from)==1, to="", maxcount>=1 */
2395 Py_LOCAL(PyStringObject
*)
2396 replace_delete_single_character(PyStringObject
*self
,
2397 char from_c
, Py_ssize_t maxcount
)
2399 char *self_s
, *result_s
;
2400 char *start
, *next
, *end
;
2401 Py_ssize_t self_len
, result_len
;
2403 PyStringObject
*result
;
2405 self_len
= PyString_GET_SIZE(self
);
2406 self_s
= PyString_AS_STRING(self
);
2408 count
= countchar(self_s
, self_len
, from_c
, maxcount
);
2410 return return_self(self
);
2413 result_len
= self_len
- count
; /* from_len == 1 */
2414 assert(result_len
>=0);
2416 if ( (result
= (PyStringObject
*)
2417 PyString_FromStringAndSize(NULL
, result_len
)) == NULL
)
2419 result_s
= PyString_AS_STRING(result
);
2422 end
= self_s
+ self_len
;
2423 while (count
-- > 0) {
2424 next
= findchar(start
, end
-start
, from_c
);
2427 Py_MEMCPY(result_s
, start
, next
-start
);
2428 result_s
+= (next
-start
);
2431 Py_MEMCPY(result_s
, start
, end
-start
);
2436 /* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2438 Py_LOCAL(PyStringObject
*)
2439 replace_delete_substring(PyStringObject
*self
,
2440 const char *from_s
, Py_ssize_t from_len
,
2441 Py_ssize_t maxcount
) {
2442 char *self_s
, *result_s
;
2443 char *start
, *next
, *end
;
2444 Py_ssize_t self_len
, result_len
;
2445 Py_ssize_t count
, offset
;
2446 PyStringObject
*result
;
2448 self_len
= PyString_GET_SIZE(self
);
2449 self_s
= PyString_AS_STRING(self
);
2451 count
= stringlib_count(self_s
, self_len
,
2457 return return_self(self
);
2460 result_len
= self_len
- (count
* from_len
);
2461 assert (result_len
>=0);
2463 if ( (result
= (PyStringObject
*)
2464 PyString_FromStringAndSize(NULL
, result_len
)) == NULL
)
2467 result_s
= PyString_AS_STRING(result
);
2470 end
= self_s
+ self_len
;
2471 while (count
-- > 0) {
2472 offset
= stringlib_find(start
, end
-start
,
2477 next
= start
+ offset
;
2479 Py_MEMCPY(result_s
, start
, next
-start
);
2481 result_s
+= (next
-start
);
2482 start
= next
+from_len
;
2484 Py_MEMCPY(result_s
, start
, end
-start
);
2488 /* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
2489 Py_LOCAL(PyStringObject
*)
2490 replace_single_character_in_place(PyStringObject
*self
,
2491 char from_c
, char to_c
,
2492 Py_ssize_t maxcount
)
2494 char *self_s
, *result_s
, *start
, *end
, *next
;
2495 Py_ssize_t self_len
;
2496 PyStringObject
*result
;
2498 /* The result string will be the same size */
2499 self_s
= PyString_AS_STRING(self
);
2500 self_len
= PyString_GET_SIZE(self
);
2502 next
= findchar(self_s
, self_len
, from_c
);
2505 /* No matches; return the original string */
2506 return return_self(self
);
2509 /* Need to make a new string */
2510 result
= (PyStringObject
*) PyString_FromStringAndSize(NULL
, self_len
);
2513 result_s
= PyString_AS_STRING(result
);
2514 Py_MEMCPY(result_s
, self_s
, self_len
);
2516 /* change everything in-place, starting with this one */
2517 start
= result_s
+ (next
-self_s
);
2520 end
= result_s
+ self_len
;
2522 while (--maxcount
> 0) {
2523 next
= findchar(start
, end
-start
, from_c
);
2533 /* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
2534 Py_LOCAL(PyStringObject
*)
2535 replace_substring_in_place(PyStringObject
*self
,
2536 const char *from_s
, Py_ssize_t from_len
,
2537 const char *to_s
, Py_ssize_t to_len
,
2538 Py_ssize_t maxcount
)
2540 char *result_s
, *start
, *end
;
2542 Py_ssize_t self_len
, offset
;
2543 PyStringObject
*result
;
2545 /* The result string will be the same size */
2547 self_s
= PyString_AS_STRING(self
);
2548 self_len
= PyString_GET_SIZE(self
);
2550 offset
= stringlib_find(self_s
, self_len
,
2554 /* No matches; return the original string */
2555 return return_self(self
);
2558 /* Need to make a new string */
2559 result
= (PyStringObject
*) PyString_FromStringAndSize(NULL
, self_len
);
2562 result_s
= PyString_AS_STRING(result
);
2563 Py_MEMCPY(result_s
, self_s
, self_len
);
2565 /* change everything in-place, starting with this one */
2566 start
= result_s
+ offset
;
2567 Py_MEMCPY(start
, to_s
, from_len
);
2569 end
= result_s
+ self_len
;
2571 while ( --maxcount
> 0) {
2572 offset
= stringlib_find(start
, end
-start
,
2577 Py_MEMCPY(start
+offset
, to_s
, from_len
);
2578 start
+= offset
+from_len
;
2584 /* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
2585 Py_LOCAL(PyStringObject
*)
2586 replace_single_character(PyStringObject
*self
,
2588 const char *to_s
, Py_ssize_t to_len
,
2589 Py_ssize_t maxcount
)
2591 char *self_s
, *result_s
;
2592 char *start
, *next
, *end
;
2593 Py_ssize_t self_len
, result_len
;
2594 Py_ssize_t count
, product
;
2595 PyStringObject
*result
;
2597 self_s
= PyString_AS_STRING(self
);
2598 self_len
= PyString_GET_SIZE(self
);
2600 count
= countchar(self_s
, self_len
, from_c
, maxcount
);
2602 /* no matches, return unchanged */
2603 return return_self(self
);
2606 /* use the difference between current and new, hence the "-1" */
2607 /* result_len = self_len + count * (to_len-1) */
2608 product
= count
* (to_len
-1);
2609 if (product
/ (to_len
-1) != count
) {
2610 PyErr_SetString(PyExc_OverflowError
, "replace string is too long");
2613 result_len
= self_len
+ product
;
2614 if (result_len
< 0) {
2615 PyErr_SetString(PyExc_OverflowError
, "replace string is too long");
2619 if ( (result
= (PyStringObject
*)
2620 PyString_FromStringAndSize(NULL
, result_len
)) == NULL
)
2622 result_s
= PyString_AS_STRING(result
);
2625 end
= self_s
+ self_len
;
2626 while (count
-- > 0) {
2627 next
= findchar(start
, end
-start
, from_c
);
2631 if (next
== start
) {
2632 /* replace with the 'to' */
2633 Py_MEMCPY(result_s
, to_s
, to_len
);
2637 /* copy the unchanged old then the 'to' */
2638 Py_MEMCPY(result_s
, start
, next
-start
);
2639 result_s
+= (next
-start
);
2640 Py_MEMCPY(result_s
, to_s
, to_len
);
2645 /* Copy the remainder of the remaining string */
2646 Py_MEMCPY(result_s
, start
, end
-start
);
2651 /* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
2652 Py_LOCAL(PyStringObject
*)
2653 replace_substring(PyStringObject
*self
,
2654 const char *from_s
, Py_ssize_t from_len
,
2655 const char *to_s
, Py_ssize_t to_len
,
2656 Py_ssize_t maxcount
) {
2657 char *self_s
, *result_s
;
2658 char *start
, *next
, *end
;
2659 Py_ssize_t self_len
, result_len
;
2660 Py_ssize_t count
, offset
, product
;
2661 PyStringObject
*result
;
2663 self_s
= PyString_AS_STRING(self
);
2664 self_len
= PyString_GET_SIZE(self
);
2666 count
= stringlib_count(self_s
, self_len
,
2671 /* no matches, return unchanged */
2672 return return_self(self
);
2675 /* Check for overflow */
2676 /* result_len = self_len + count * (to_len-from_len) */
2677 product
= count
* (to_len
-from_len
);
2678 if (product
/ (to_len
-from_len
) != count
) {
2679 PyErr_SetString(PyExc_OverflowError
, "replace string is too long");
2682 result_len
= self_len
+ product
;
2683 if (result_len
< 0) {
2684 PyErr_SetString(PyExc_OverflowError
, "replace string is too long");
2688 if ( (result
= (PyStringObject
*)
2689 PyString_FromStringAndSize(NULL
, result_len
)) == NULL
)
2691 result_s
= PyString_AS_STRING(result
);
2694 end
= self_s
+ self_len
;
2695 while (count
-- > 0) {
2696 offset
= stringlib_find(start
, end
-start
,
2701 next
= start
+offset
;
2702 if (next
== start
) {
2703 /* replace with the 'to' */
2704 Py_MEMCPY(result_s
, to_s
, to_len
);
2708 /* copy the unchanged old then the 'to' */
2709 Py_MEMCPY(result_s
, start
, next
-start
);
2710 result_s
+= (next
-start
);
2711 Py_MEMCPY(result_s
, to_s
, to_len
);
2713 start
= next
+from_len
;
2716 /* Copy the remainder of the remaining string */
2717 Py_MEMCPY(result_s
, start
, end
-start
);
2723 Py_LOCAL(PyStringObject
*)
2724 replace(PyStringObject
*self
,
2725 const char *from_s
, Py_ssize_t from_len
,
2726 const char *to_s
, Py_ssize_t to_len
,
2727 Py_ssize_t maxcount
)
2730 maxcount
= PY_SSIZE_T_MAX
;
2731 } else if (maxcount
== 0 || PyString_GET_SIZE(self
) == 0) {
2732 /* nothing to do; return the original string */
2733 return return_self(self
);
2736 if (maxcount
== 0 ||
2737 (from_len
== 0 && to_len
== 0)) {
2738 /* nothing to do; return the original string */
2739 return return_self(self
);
2742 /* Handle zero-length special cases */
2744 if (from_len
== 0) {
2745 /* insert the 'to' string everywhere. */
2746 /* >>> "Python".replace("", ".") */
2747 /* '.P.y.t.h.o.n.' */
2748 return replace_interleave(self
, to_s
, to_len
, maxcount
);
2751 /* Except for "".replace("", "A") == "A" there is no way beyond this */
2752 /* point for an empty self string to generate a non-empty string */
2753 /* Special case so the remaining code always gets a non-empty string */
2754 if (PyString_GET_SIZE(self
) == 0) {
2755 return return_self(self
);
2759 /* delete all occurances of 'from' string */
2760 if (from_len
== 1) {
2761 return replace_delete_single_character(
2762 self
, from_s
[0], maxcount
);
2764 return replace_delete_substring(self
, from_s
, from_len
, maxcount
);
2768 /* Handle special case where both strings have the same length */
2770 if (from_len
== to_len
) {
2771 if (from_len
== 1) {
2772 return replace_single_character_in_place(
2778 return replace_substring_in_place(
2779 self
, from_s
, from_len
, to_s
, to_len
, maxcount
);
2783 /* Otherwise use the more generic algorithms */
2784 if (from_len
== 1) {
2785 return replace_single_character(self
, from_s
[0],
2786 to_s
, to_len
, maxcount
);
2788 /* len('from')>=2, len('to')>=1 */
2789 return replace_substring(self
, from_s
, from_len
, to_s
, to_len
, maxcount
);
2793 PyDoc_STRVAR(replace__doc__
,
2794 "S.replace(old, new[, count]) -> string\n\
2796 Return a copy of string S with all occurrences of substring\n\
2797 old replaced by new. If the optional argument count is\n\
2798 given, only the first count occurrences are replaced.");
2801 string_replace(PyStringObject
*self
, PyObject
*args
)
2803 Py_ssize_t count
= -1;
2804 PyObject
*from
, *to
;
2805 const char *from_s
, *to_s
;
2806 Py_ssize_t from_len
, to_len
;
2808 if (!PyArg_ParseTuple(args
, "OO|n:replace", &from
, &to
, &count
))
2811 if (PyString_Check(from
)) {
2812 from_s
= PyString_AS_STRING(from
);
2813 from_len
= PyString_GET_SIZE(from
);
2815 #ifdef Py_USING_UNICODE
2816 if (PyUnicode_Check(from
))
2817 return PyUnicode_Replace((PyObject
*)self
,
2820 else if (PyObject_AsCharBuffer(from
, &from_s
, &from_len
))
2823 if (PyString_Check(to
)) {
2824 to_s
= PyString_AS_STRING(to
);
2825 to_len
= PyString_GET_SIZE(to
);
2827 #ifdef Py_USING_UNICODE
2828 else if (PyUnicode_Check(to
))
2829 return PyUnicode_Replace((PyObject
*)self
,
2832 else if (PyObject_AsCharBuffer(to
, &to_s
, &to_len
))
2835 return (PyObject
*)replace((PyStringObject
*) self
,
2837 to_s
, to_len
, count
);
2842 /* Matches the end (direction >= 0) or start (direction < 0) of self
2843 * against substr, using the start and end arguments. Returns
2844 * -1 on error, 0 if not found and 1 if found.
2847 _string_tailmatch(PyStringObject
*self
, PyObject
*substr
, Py_ssize_t start
,
2848 Py_ssize_t end
, int direction
)
2850 Py_ssize_t len
= PyString_GET_SIZE(self
);
2855 if (PyString_Check(substr
)) {
2856 sub
= PyString_AS_STRING(substr
);
2857 slen
= PyString_GET_SIZE(substr
);
2859 #ifdef Py_USING_UNICODE
2860 else if (PyUnicode_Check(substr
))
2861 return PyUnicode_Tailmatch((PyObject
*)self
,
2862 substr
, start
, end
, direction
);
2864 else if (PyObject_AsCharBuffer(substr
, &sub
, &slen
))
2866 str
= PyString_AS_STRING(self
);
2868 ADJUST_INDICES(start
, end
, len
);
2870 if (direction
< 0) {
2872 if (start
+slen
> len
)
2876 if (end
-start
< slen
|| start
> len
)
2879 if (end
-slen
> start
)
2882 if (end
-start
>= slen
)
2883 return ! memcmp(str
+start
, sub
, slen
);
2888 PyDoc_STRVAR(startswith__doc__
,
2889 "S.startswith(prefix[, start[, end]]) -> bool\n\
2891 Return True if S starts with the specified prefix, False otherwise.\n\
2892 With optional start, test S beginning at that position.\n\
2893 With optional end, stop comparing S at that position.\n\
2894 prefix can also be a tuple of strings to try.");
2897 string_startswith(PyStringObject
*self
, PyObject
*args
)
2899 Py_ssize_t start
= 0;
2900 Py_ssize_t end
= PY_SSIZE_T_MAX
;
2904 if (!stringlib_parse_args_finds("startswith", args
, &subobj
, &start
, &end
))
2906 if (PyTuple_Check(subobj
)) {
2908 for (i
= 0; i
< PyTuple_GET_SIZE(subobj
); i
++) {
2909 result
= _string_tailmatch(self
,
2910 PyTuple_GET_ITEM(subobj
, i
),
2920 result
= _string_tailmatch(self
, subobj
, start
, end
, -1);
2922 if (PyErr_ExceptionMatches(PyExc_TypeError
))
2923 PyErr_Format(PyExc_TypeError
, "startswith first arg must be str, "
2924 "unicode, or tuple, not %s", Py_TYPE(subobj
)->tp_name
);
2928 return PyBool_FromLong(result
);
2932 PyDoc_STRVAR(endswith__doc__
,
2933 "S.endswith(suffix[, start[, end]]) -> bool\n\
2935 Return True if S ends with the specified suffix, False otherwise.\n\
2936 With optional start, test S beginning at that position.\n\
2937 With optional end, stop comparing S at that position.\n\
2938 suffix can also be a tuple of strings to try.");
2941 string_endswith(PyStringObject
*self
, PyObject
*args
)
2943 Py_ssize_t start
= 0;
2944 Py_ssize_t end
= PY_SSIZE_T_MAX
;
2948 if (!stringlib_parse_args_finds("endswith", args
, &subobj
, &start
, &end
))
2950 if (PyTuple_Check(subobj
)) {
2952 for (i
= 0; i
< PyTuple_GET_SIZE(subobj
); i
++) {
2953 result
= _string_tailmatch(self
,
2954 PyTuple_GET_ITEM(subobj
, i
),
2964 result
= _string_tailmatch(self
, subobj
, start
, end
, +1);
2966 if (PyErr_ExceptionMatches(PyExc_TypeError
))
2967 PyErr_Format(PyExc_TypeError
, "endswith first arg must be str, "
2968 "unicode, or tuple, not %s", Py_TYPE(subobj
)->tp_name
);
2972 return PyBool_FromLong(result
);
2976 PyDoc_STRVAR(encode__doc__
,
2977 "S.encode([encoding[,errors]]) -> object\n\
2979 Encodes S using the codec registered for encoding. encoding defaults\n\
2980 to the default encoding. errors may be given to set a different error\n\
2981 handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2982 a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\
2983 'xmlcharrefreplace' as well as any other name registered with\n\
2984 codecs.register_error that is able to handle UnicodeEncodeErrors.");
2987 string_encode(PyStringObject
*self
, PyObject
*args
, PyObject
*kwargs
)
2989 static char *kwlist
[] = {"encoding", "errors", 0};
2990 char *encoding
= NULL
;
2991 char *errors
= NULL
;
2994 if (!PyArg_ParseTupleAndKeywords(args
, kwargs
, "|ss:encode",
2995 kwlist
, &encoding
, &errors
))
2997 v
= PyString_AsEncodedObject((PyObject
*)self
, encoding
, errors
);
3000 if (!PyString_Check(v
) && !PyUnicode_Check(v
)) {
3001 PyErr_Format(PyExc_TypeError
,
3002 "encoder did not return a string/unicode object "
3004 Py_TYPE(v
)->tp_name
);
3015 PyDoc_STRVAR(decode__doc__
,
3016 "S.decode([encoding[,errors]]) -> object\n\
3018 Decodes S using the codec registered for encoding. encoding defaults\n\
3019 to the default encoding. errors may be given to set a different error\n\
3020 handling scheme. Default is 'strict' meaning that encoding errors raise\n\
3021 a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
3022 as well as any other name registered with codecs.register_error that is\n\
3023 able to handle UnicodeDecodeErrors.");
3026 string_decode(PyStringObject
*self
, PyObject
*args
, PyObject
*kwargs
)
3028 static char *kwlist
[] = {"encoding", "errors", 0};
3029 char *encoding
= NULL
;
3030 char *errors
= NULL
;
3033 if (!PyArg_ParseTupleAndKeywords(args
, kwargs
, "|ss:decode",
3034 kwlist
, &encoding
, &errors
))
3036 v
= PyString_AsDecodedObject((PyObject
*)self
, encoding
, errors
);
3039 if (!PyString_Check(v
) && !PyUnicode_Check(v
)) {
3040 PyErr_Format(PyExc_TypeError
,
3041 "decoder did not return a string/unicode object "
3043 Py_TYPE(v
)->tp_name
);
3054 PyDoc_STRVAR(expandtabs__doc__
,
3055 "S.expandtabs([tabsize]) -> string\n\
3057 Return a copy of S where all tab characters are expanded using spaces.\n\
3058 If tabsize is not given, a tab size of 8 characters is assumed.");
3061 string_expandtabs(PyStringObject
*self
, PyObject
*args
)
3063 const char *e
, *p
, *qe
;
3065 Py_ssize_t i
, j
, incr
;
3069 if (!PyArg_ParseTuple(args
, "|i:expandtabs", &tabsize
))
3072 /* First pass: determine size of output string */
3073 i
= 0; /* chars up to and including most recent \n or \r */
3074 j
= 0; /* chars since most recent \n or \r (use in tab calculations) */
3075 e
= PyString_AS_STRING(self
) + PyString_GET_SIZE(self
); /* end of input */
3076 for (p
= PyString_AS_STRING(self
); p
< e
; p
++)
3079 incr
= tabsize
- (j
% tabsize
);
3080 if (j
> PY_SSIZE_T_MAX
- incr
)
3086 if (j
> PY_SSIZE_T_MAX
- 1)
3089 if (*p
== '\n' || *p
== '\r') {
3090 if (i
> PY_SSIZE_T_MAX
- j
)
3097 if (i
> PY_SSIZE_T_MAX
- j
)
3100 /* Second pass: create output string and fill it */
3101 u
= PyString_FromStringAndSize(NULL
, i
+ j
);
3105 j
= 0; /* same as in first pass */
3106 q
= PyString_AS_STRING(u
); /* next output char */
3107 qe
= PyString_AS_STRING(u
) + PyString_GET_SIZE(u
); /* end of output */
3109 for (p
= PyString_AS_STRING(self
); p
< e
; p
++)
3112 i
= tabsize
- (j
% tabsize
);
3126 if (*p
== '\n' || *p
== '\r')
3135 PyErr_SetString(PyExc_OverflowError
, "new string is too long");
3139 Py_LOCAL_INLINE(PyObject
*)
3140 pad(PyStringObject
*self
, Py_ssize_t left
, Py_ssize_t right
, char fill
)
3149 if (left
== 0 && right
== 0 && PyString_CheckExact(self
)) {
3151 return (PyObject
*)self
;
3154 u
= PyString_FromStringAndSize(NULL
,
3155 left
+ PyString_GET_SIZE(self
) + right
);
3158 memset(PyString_AS_STRING(u
), fill
, left
);
3159 Py_MEMCPY(PyString_AS_STRING(u
) + left
,
3160 PyString_AS_STRING(self
),
3161 PyString_GET_SIZE(self
));
3163 memset(PyString_AS_STRING(u
) + left
+ PyString_GET_SIZE(self
),
3170 PyDoc_STRVAR(ljust__doc__
,
3171 "S.ljust(width[, fillchar]) -> string\n"
3173 "Return S left-justified in a string of length width. Padding is\n"
3174 "done using the specified fill character (default is a space).");
3177 string_ljust(PyStringObject
*self
, PyObject
*args
)
3180 char fillchar
= ' ';
3182 if (!PyArg_ParseTuple(args
, "n|c:ljust", &width
, &fillchar
))
3185 if (PyString_GET_SIZE(self
) >= width
&& PyString_CheckExact(self
)) {
3187 return (PyObject
*) self
;
3190 return pad(self
, 0, width
- PyString_GET_SIZE(self
), fillchar
);
3194 PyDoc_STRVAR(rjust__doc__
,
3195 "S.rjust(width[, fillchar]) -> string\n"
3197 "Return S right-justified in a string of length width. Padding is\n"
3198 "done using the specified fill character (default is a space)");
3201 string_rjust(PyStringObject
*self
, PyObject
*args
)
3204 char fillchar
= ' ';
3206 if (!PyArg_ParseTuple(args
, "n|c:rjust", &width
, &fillchar
))
3209 if (PyString_GET_SIZE(self
) >= width
&& PyString_CheckExact(self
)) {
3211 return (PyObject
*) self
;
3214 return pad(self
, width
- PyString_GET_SIZE(self
), 0, fillchar
);
3218 PyDoc_STRVAR(center__doc__
,
3219 "S.center(width[, fillchar]) -> string\n"
3221 "Return S centered in a string of length width. Padding is\n"
3222 "done using the specified fill character (default is a space)");
3225 string_center(PyStringObject
*self
, PyObject
*args
)
3227 Py_ssize_t marg
, left
;
3229 char fillchar
= ' ';
3231 if (!PyArg_ParseTuple(args
, "n|c:center", &width
, &fillchar
))
3234 if (PyString_GET_SIZE(self
) >= width
&& PyString_CheckExact(self
)) {
3236 return (PyObject
*) self
;
3239 marg
= width
- PyString_GET_SIZE(self
);
3240 left
= marg
/ 2 + (marg
& width
& 1);
3242 return pad(self
, left
, marg
- left
, fillchar
);
3245 PyDoc_STRVAR(zfill__doc__
,
3246 "S.zfill(width) -> string\n"
3248 "Pad a numeric string S with zeros on the left, to fill a field\n"
3249 "of the specified width. The string S is never truncated.");
3252 string_zfill(PyStringObject
*self
, PyObject
*args
)
3259 if (!PyArg_ParseTuple(args
, "n:zfill", &width
))
3262 if (PyString_GET_SIZE(self
) >= width
) {
3263 if (PyString_CheckExact(self
)) {
3265 return (PyObject
*) self
;
3268 return PyString_FromStringAndSize(
3269 PyString_AS_STRING(self
),
3270 PyString_GET_SIZE(self
)
3274 fill
= width
- PyString_GET_SIZE(self
);
3276 s
= pad(self
, fill
, 0, '0');
3281 p
= PyString_AS_STRING(s
);
3282 if (p
[fill
] == '+' || p
[fill
] == '-') {
3283 /* move sign to beginning of string */
3288 return (PyObject
*) s
;
3291 PyDoc_STRVAR(isspace__doc__
,
3292 "S.isspace() -> bool\n\
3294 Return True if all characters in S are whitespace\n\
3295 and there is at least one character in S, False otherwise.");
3298 string_isspace(PyStringObject
*self
)
3300 register const unsigned char *p
3301 = (unsigned char *) PyString_AS_STRING(self
);
3302 register const unsigned char *e
;
3304 /* Shortcut for single character strings */
3305 if (PyString_GET_SIZE(self
) == 1 &&
3307 return PyBool_FromLong(1);
3309 /* Special case for empty strings */
3310 if (PyString_GET_SIZE(self
) == 0)
3311 return PyBool_FromLong(0);
3313 e
= p
+ PyString_GET_SIZE(self
);
3314 for (; p
< e
; p
++) {
3316 return PyBool_FromLong(0);
3318 return PyBool_FromLong(1);
3322 PyDoc_STRVAR(isalpha__doc__
,
3323 "S.isalpha() -> bool\n\
3325 Return True if all characters in S are alphabetic\n\
3326 and there is at least one character in S, False otherwise.");
3329 string_isalpha(PyStringObject
*self
)
3331 register const unsigned char *p
3332 = (unsigned char *) PyString_AS_STRING(self
);
3333 register const unsigned char *e
;
3335 /* Shortcut for single character strings */
3336 if (PyString_GET_SIZE(self
) == 1 &&
3338 return PyBool_FromLong(1);
3340 /* Special case for empty strings */
3341 if (PyString_GET_SIZE(self
) == 0)
3342 return PyBool_FromLong(0);
3344 e
= p
+ PyString_GET_SIZE(self
);
3345 for (; p
< e
; p
++) {
3347 return PyBool_FromLong(0);
3349 return PyBool_FromLong(1);
3353 PyDoc_STRVAR(isalnum__doc__
,
3354 "S.isalnum() -> bool\n\
3356 Return True if all characters in S are alphanumeric\n\
3357 and there is at least one character in S, False otherwise.");
3360 string_isalnum(PyStringObject
*self
)
3362 register const unsigned char *p
3363 = (unsigned char *) PyString_AS_STRING(self
);
3364 register const unsigned char *e
;
3366 /* Shortcut for single character strings */
3367 if (PyString_GET_SIZE(self
) == 1 &&
3369 return PyBool_FromLong(1);
3371 /* Special case for empty strings */
3372 if (PyString_GET_SIZE(self
) == 0)
3373 return PyBool_FromLong(0);
3375 e
= p
+ PyString_GET_SIZE(self
);
3376 for (; p
< e
; p
++) {
3378 return PyBool_FromLong(0);
3380 return PyBool_FromLong(1);
3384 PyDoc_STRVAR(isdigit__doc__
,
3385 "S.isdigit() -> bool\n\
3387 Return True if all characters in S are digits\n\
3388 and there is at least one character in S, False otherwise.");
3391 string_isdigit(PyStringObject
*self
)
3393 register const unsigned char *p
3394 = (unsigned char *) PyString_AS_STRING(self
);
3395 register const unsigned char *e
;
3397 /* Shortcut for single character strings */
3398 if (PyString_GET_SIZE(self
) == 1 &&
3400 return PyBool_FromLong(1);
3402 /* Special case for empty strings */
3403 if (PyString_GET_SIZE(self
) == 0)
3404 return PyBool_FromLong(0);
3406 e
= p
+ PyString_GET_SIZE(self
);
3407 for (; p
< e
; p
++) {
3409 return PyBool_FromLong(0);
3411 return PyBool_FromLong(1);
3415 PyDoc_STRVAR(islower__doc__
,
3416 "S.islower() -> bool\n\
3418 Return True if all cased characters in S are lowercase and there is\n\
3419 at least one cased character in S, False otherwise.");
3422 string_islower(PyStringObject
*self
)
3424 register const unsigned char *p
3425 = (unsigned char *) PyString_AS_STRING(self
);
3426 register const unsigned char *e
;
3429 /* Shortcut for single character strings */
3430 if (PyString_GET_SIZE(self
) == 1)
3431 return PyBool_FromLong(islower(*p
) != 0);
3433 /* Special case for empty strings */
3434 if (PyString_GET_SIZE(self
) == 0)
3435 return PyBool_FromLong(0);
3437 e
= p
+ PyString_GET_SIZE(self
);
3439 for (; p
< e
; p
++) {
3441 return PyBool_FromLong(0);
3442 else if (!cased
&& islower(*p
))
3445 return PyBool_FromLong(cased
);
3449 PyDoc_STRVAR(isupper__doc__
,
3450 "S.isupper() -> bool\n\
3452 Return True if all cased characters in S are uppercase and there is\n\
3453 at least one cased character in S, False otherwise.");
3456 string_isupper(PyStringObject
*self
)
3458 register const unsigned char *p
3459 = (unsigned char *) PyString_AS_STRING(self
);
3460 register const unsigned char *e
;
3463 /* Shortcut for single character strings */
3464 if (PyString_GET_SIZE(self
) == 1)
3465 return PyBool_FromLong(isupper(*p
) != 0);
3467 /* Special case for empty strings */
3468 if (PyString_GET_SIZE(self
) == 0)
3469 return PyBool_FromLong(0);
3471 e
= p
+ PyString_GET_SIZE(self
);
3473 for (; p
< e
; p
++) {
3475 return PyBool_FromLong(0);
3476 else if (!cased
&& isupper(*p
))
3479 return PyBool_FromLong(cased
);
3483 PyDoc_STRVAR(istitle__doc__
,
3484 "S.istitle() -> bool\n\
3486 Return True if S is a titlecased string and there is at least one\n\
3487 character in S, i.e. uppercase characters may only follow uncased\n\
3488 characters and lowercase characters only cased ones. Return False\n\
3492 string_istitle(PyStringObject
*self
, PyObject
*uncased
)
3494 register const unsigned char *p
3495 = (unsigned char *) PyString_AS_STRING(self
);
3496 register const unsigned char *e
;
3497 int cased
, previous_is_cased
;
3499 /* Shortcut for single character strings */
3500 if (PyString_GET_SIZE(self
) == 1)
3501 return PyBool_FromLong(isupper(*p
) != 0);
3503 /* Special case for empty strings */
3504 if (PyString_GET_SIZE(self
) == 0)
3505 return PyBool_FromLong(0);
3507 e
= p
+ PyString_GET_SIZE(self
);
3509 previous_is_cased
= 0;
3510 for (; p
< e
; p
++) {
3511 register const unsigned char ch
= *p
;
3514 if (previous_is_cased
)
3515 return PyBool_FromLong(0);
3516 previous_is_cased
= 1;
3519 else if (islower(ch
)) {
3520 if (!previous_is_cased
)
3521 return PyBool_FromLong(0);
3522 previous_is_cased
= 1;
3526 previous_is_cased
= 0;
3528 return PyBool_FromLong(cased
);
3532 PyDoc_STRVAR(splitlines__doc__
,
3533 "S.splitlines([keepends]) -> list of strings\n\
3535 Return a list of the lines in S, breaking at line boundaries.\n\
3536 Line breaks are not included in the resulting list unless keepends\n\
3537 is given and true.");
3540 string_splitlines(PyStringObject
*self
, PyObject
*args
)
3544 if (!PyArg_ParseTuple(args
, "|i:splitlines", &keepends
))
3547 return stringlib_splitlines(
3548 (PyObject
*) self
, PyString_AS_STRING(self
), PyString_GET_SIZE(self
),
3553 PyDoc_STRVAR(sizeof__doc__
,
3554 "S.__sizeof__() -> size of S in memory, in bytes");
3557 string_sizeof(PyStringObject
*v
)
3560 res
= PyStringObject_SIZE
+ PyString_GET_SIZE(v
) * Py_TYPE(v
)->tp_itemsize
;
3561 return PyInt_FromSsize_t(res
);
3565 string_getnewargs(PyStringObject
*v
)
3567 return Py_BuildValue("(s#)", v
->ob_sval
, Py_SIZE(v
));
3571 #include "stringlib/string_format.h"
3573 PyDoc_STRVAR(format__doc__
,
3574 "S.format(*args, **kwargs) -> string\n\
3576 Return a formatted version of S, using substitutions from args and kwargs.\n\
3577 The substitutions are identified by braces ('{' and '}').");
3580 string__format__(PyObject
* self
, PyObject
* args
)
3582 PyObject
*format_spec
;
3583 PyObject
*result
= NULL
;
3584 PyObject
*tmp
= NULL
;
3586 /* If 2.x, convert format_spec to the same type as value */
3587 /* This is to allow things like u''.format('') */
3588 if (!PyArg_ParseTuple(args
, "O:__format__", &format_spec
))
3590 if (!(PyString_Check(format_spec
) || PyUnicode_Check(format_spec
))) {
3591 PyErr_Format(PyExc_TypeError
, "__format__ arg must be str "
3592 "or unicode, not %s", Py_TYPE(format_spec
)->tp_name
);
3595 tmp
= PyObject_Str(format_spec
);
3600 result
= _PyBytes_FormatAdvanced(self
,
3601 PyString_AS_STRING(format_spec
),
3602 PyString_GET_SIZE(format_spec
));
3608 PyDoc_STRVAR(p_format__doc__
,
3609 "S.__format__(format_spec) -> string\n\
3611 Return a formatted version of S as described by format_spec.");
3615 string_methods
[] = {
3616 /* Counterparts of the obsolete stropmodule functions; except
3617 string.maketrans(). */
3618 {"join", (PyCFunction
)string_join
, METH_O
, join__doc__
},
3619 {"split", (PyCFunction
)string_split
, METH_VARARGS
, split__doc__
},
3620 {"rsplit", (PyCFunction
)string_rsplit
, METH_VARARGS
, rsplit__doc__
},
3621 {"lower", (PyCFunction
)string_lower
, METH_NOARGS
, lower__doc__
},
3622 {"upper", (PyCFunction
)string_upper
, METH_NOARGS
, upper__doc__
},
3623 {"islower", (PyCFunction
)string_islower
, METH_NOARGS
, islower__doc__
},
3624 {"isupper", (PyCFunction
)string_isupper
, METH_NOARGS
, isupper__doc__
},
3625 {"isspace", (PyCFunction
)string_isspace
, METH_NOARGS
, isspace__doc__
},
3626 {"isdigit", (PyCFunction
)string_isdigit
, METH_NOARGS
, isdigit__doc__
},
3627 {"istitle", (PyCFunction
)string_istitle
, METH_NOARGS
, istitle__doc__
},
3628 {"isalpha", (PyCFunction
)string_isalpha
, METH_NOARGS
, isalpha__doc__
},
3629 {"isalnum", (PyCFunction
)string_isalnum
, METH_NOARGS
, isalnum__doc__
},
3630 {"capitalize", (PyCFunction
)string_capitalize
, METH_NOARGS
,
3632 {"count", (PyCFunction
)string_count
, METH_VARARGS
, count__doc__
},
3633 {"endswith", (PyCFunction
)string_endswith
, METH_VARARGS
,
3635 {"partition", (PyCFunction
)string_partition
, METH_O
, partition__doc__
},
3636 {"find", (PyCFunction
)string_find
, METH_VARARGS
, find__doc__
},
3637 {"index", (PyCFunction
)string_index
, METH_VARARGS
, index__doc__
},
3638 {"lstrip", (PyCFunction
)string_lstrip
, METH_VARARGS
, lstrip__doc__
},
3639 {"replace", (PyCFunction
)string_replace
, METH_VARARGS
, replace__doc__
},
3640 {"rfind", (PyCFunction
)string_rfind
, METH_VARARGS
, rfind__doc__
},
3641 {"rindex", (PyCFunction
)string_rindex
, METH_VARARGS
, rindex__doc__
},
3642 {"rstrip", (PyCFunction
)string_rstrip
, METH_VARARGS
, rstrip__doc__
},
3643 {"rpartition", (PyCFunction
)string_rpartition
, METH_O
,
3645 {"startswith", (PyCFunction
)string_startswith
, METH_VARARGS
,
3647 {"strip", (PyCFunction
)string_strip
, METH_VARARGS
, strip__doc__
},
3648 {"swapcase", (PyCFunction
)string_swapcase
, METH_NOARGS
,
3650 {"translate", (PyCFunction
)string_translate
, METH_VARARGS
,
3652 {"title", (PyCFunction
)string_title
, METH_NOARGS
, title__doc__
},
3653 {"ljust", (PyCFunction
)string_ljust
, METH_VARARGS
, ljust__doc__
},
3654 {"rjust", (PyCFunction
)string_rjust
, METH_VARARGS
, rjust__doc__
},
3655 {"center", (PyCFunction
)string_center
, METH_VARARGS
, center__doc__
},
3656 {"zfill", (PyCFunction
)string_zfill
, METH_VARARGS
, zfill__doc__
},
3657 {"format", (PyCFunction
) do_string_format
, METH_VARARGS
| METH_KEYWORDS
, format__doc__
},
3658 {"__format__", (PyCFunction
) string__format__
, METH_VARARGS
, p_format__doc__
},
3659 {"_formatter_field_name_split", (PyCFunction
) formatter_field_name_split
, METH_NOARGS
},
3660 {"_formatter_parser", (PyCFunction
) formatter_parser
, METH_NOARGS
},
3661 {"encode", (PyCFunction
)string_encode
, METH_VARARGS
| METH_KEYWORDS
, encode__doc__
},
3662 {"decode", (PyCFunction
)string_decode
, METH_VARARGS
| METH_KEYWORDS
, decode__doc__
},
3663 {"expandtabs", (PyCFunction
)string_expandtabs
, METH_VARARGS
,
3665 {"splitlines", (PyCFunction
)string_splitlines
, METH_VARARGS
,
3667 {"__sizeof__", (PyCFunction
)string_sizeof
, METH_NOARGS
,
3669 {"__getnewargs__", (PyCFunction
)string_getnewargs
, METH_NOARGS
},
3670 {NULL
, NULL
} /* sentinel */
3674 str_subtype_new(PyTypeObject
*type
, PyObject
*args
, PyObject
*kwds
);
3677 string_new(PyTypeObject
*type
, PyObject
*args
, PyObject
*kwds
)
3680 static char *kwlist
[] = {"object", 0};
3682 if (type
!= &PyString_Type
)
3683 return str_subtype_new(type
, args
, kwds
);
3684 if (!PyArg_ParseTupleAndKeywords(args
, kwds
, "|O:str", kwlist
, &x
))
3687 return PyString_FromString("");
3688 return PyObject_Str(x
);
3692 str_subtype_new(PyTypeObject
*type
, PyObject
*args
, PyObject
*kwds
)
3694 PyObject
*tmp
, *pnew
;
3697 assert(PyType_IsSubtype(type
, &PyString_Type
));
3698 tmp
= string_new(&PyString_Type
, args
, kwds
);
3701 assert(PyString_CheckExact(tmp
));
3702 n
= PyString_GET_SIZE(tmp
);
3703 pnew
= type
->tp_alloc(type
, n
);
3705 Py_MEMCPY(PyString_AS_STRING(pnew
), PyString_AS_STRING(tmp
), n
+1);
3706 ((PyStringObject
*)pnew
)->ob_shash
=
3707 ((PyStringObject
*)tmp
)->ob_shash
;
3708 ((PyStringObject
*)pnew
)->ob_sstate
= SSTATE_NOT_INTERNED
;
3715 basestring_new(PyTypeObject
*type
, PyObject
*args
, PyObject
*kwds
)
3717 PyErr_SetString(PyExc_TypeError
,
3718 "The basestring type cannot be instantiated");
3723 string_mod(PyObject
*v
, PyObject
*w
)
3725 if (!PyString_Check(v
)) {
3726 Py_INCREF(Py_NotImplemented
);
3727 return Py_NotImplemented
;
3729 return PyString_Format(v
, w
);
3732 PyDoc_STRVAR(basestring_doc
,
3733 "Type basestring cannot be instantiated; it is the base for str and unicode.");
3735 static PyNumberMethods string_as_number
= {
3740 string_mod
, /*nb_remainder*/
3744 PyTypeObject PyBaseString_Type
= {
3745 PyVarObject_HEAD_INIT(&PyType_Type
, 0)
3755 0, /* tp_as_number */
3756 0, /* tp_as_sequence */
3757 0, /* tp_as_mapping */
3761 0, /* tp_getattro */
3762 0, /* tp_setattro */
3763 0, /* tp_as_buffer */
3764 Py_TPFLAGS_DEFAULT
| Py_TPFLAGS_BASETYPE
, /* tp_flags */
3765 basestring_doc
, /* tp_doc */
3766 0, /* tp_traverse */
3768 0, /* tp_richcompare */
3769 0, /* tp_weaklistoffset */
3771 0, /* tp_iternext */
3775 &PyBaseObject_Type
, /* tp_base */
3777 0, /* tp_descr_get */
3778 0, /* tp_descr_set */
3779 0, /* tp_dictoffset */
3782 basestring_new
, /* tp_new */
3786 PyDoc_STRVAR(string_doc
,
3787 "str(object) -> string\n\
3789 Return a nice string representation of the object.\n\
3790 If the argument is a string, the return value is the same object.");
3792 PyTypeObject PyString_Type
= {
3793 PyVarObject_HEAD_INIT(&PyType_Type
, 0)
3795 PyStringObject_SIZE
,
3797 string_dealloc
, /* tp_dealloc */
3798 (printfunc
)string_print
, /* tp_print */
3802 string_repr
, /* tp_repr */
3803 &string_as_number
, /* tp_as_number */
3804 &string_as_sequence
, /* tp_as_sequence */
3805 &string_as_mapping
, /* tp_as_mapping */
3806 (hashfunc
)string_hash
, /* tp_hash */
3808 string_str
, /* tp_str */
3809 PyObject_GenericGetAttr
, /* tp_getattro */
3810 0, /* tp_setattro */
3811 &string_as_buffer
, /* tp_as_buffer */
3812 Py_TPFLAGS_DEFAULT
| Py_TPFLAGS_CHECKTYPES
|
3813 Py_TPFLAGS_BASETYPE
| Py_TPFLAGS_STRING_SUBCLASS
|
3814 Py_TPFLAGS_HAVE_NEWBUFFER
, /* tp_flags */
3815 string_doc
, /* tp_doc */
3816 0, /* tp_traverse */
3818 (richcmpfunc
)string_richcompare
, /* tp_richcompare */
3819 0, /* tp_weaklistoffset */
3821 0, /* tp_iternext */
3822 string_methods
, /* tp_methods */
3825 &PyBaseString_Type
, /* tp_base */
3827 0, /* tp_descr_get */
3828 0, /* tp_descr_set */
3829 0, /* tp_dictoffset */
3832 string_new
, /* tp_new */
3833 PyObject_Del
, /* tp_free */
3837 PyString_Concat(register PyObject
**pv
, register PyObject
*w
)
3839 register PyObject
*v
;
3842 if (w
== NULL
|| !PyString_Check(*pv
)) {
3847 v
= string_concat((PyStringObject
*) *pv
, w
);
3853 PyString_ConcatAndDel(register PyObject
**pv
, register PyObject
*w
)
3855 PyString_Concat(pv
, w
);
3860 /* The following function breaks the notion that strings are immutable:
3861 it changes the size of a string. We get away with this only if there
3862 is only one module referencing the object. You can also think of it
3863 as creating a new string object and destroying the old one, only
3864 more efficiently. In any case, don't use this if the string may
3865 already be known to some other part of the code...
3866 Note that if there's not enough memory to resize the string, the original
3867 string object at *pv is deallocated, *pv is set to NULL, an "out of
3868 memory" exception is set, and -1 is returned. Else (on success) 0 is
3869 returned, and the value in *pv may or may not be the same as on input.
3870 As always, an extra byte is allocated for a trailing \0 byte (newsize
3871 does *not* include that), and a trailing \0 byte is stored.
3875 _PyString_Resize(PyObject
**pv
, Py_ssize_t newsize
)
3877 register PyObject
*v
;
3878 register PyStringObject
*sv
;
3880 if (!PyString_Check(v
) || Py_REFCNT(v
) != 1 || newsize
< 0 ||
3881 PyString_CHECK_INTERNED(v
)) {
3884 PyErr_BadInternalCall();
3887 /* XXX UNREF/NEWREF interface should be more symmetrical */
3889 _Py_ForgetReference(v
);
3891 PyObject_REALLOC((char *)v
, PyStringObject_SIZE
+ newsize
);
3897 _Py_NewReference(*pv
);
3898 sv
= (PyStringObject
*) *pv
;
3899 Py_SIZE(sv
) = newsize
;
3900 sv
->ob_sval
[newsize
] = '\0';
3901 sv
->ob_shash
= -1; /* invalidate cached hash value */
3905 /* Helpers for formatstring */
3907 Py_LOCAL_INLINE(PyObject
*)
3908 getnextarg(PyObject
*args
, Py_ssize_t arglen
, Py_ssize_t
*p_argidx
)
3910 Py_ssize_t argidx
= *p_argidx
;
3911 if (argidx
< arglen
) {
3916 return PyTuple_GetItem(args
, argidx
);
3918 PyErr_SetString(PyExc_TypeError
,
3919 "not enough arguments for format string");
3930 #define F_LJUST (1<<0)
3931 #define F_SIGN (1<<1)
3932 #define F_BLANK (1<<2)
3933 #define F_ALT (1<<3)
3934 #define F_ZERO (1<<4)
3936 /* Returns a new reference to a PyString object, or NULL on failure. */
3939 formatfloat(PyObject
*v
, int flags
, int prec
, int type
)
3945 x
= PyFloat_AsDouble(v
);
3946 if (x
== -1.0 && PyErr_Occurred()) {
3947 PyErr_Format(PyExc_TypeError
, "float argument required, "
3948 "not %.200s", Py_TYPE(v
)->tp_name
);
3955 p
= PyOS_double_to_string(x
, type
, prec
,
3956 (flags
& F_ALT
) ? Py_DTSF_ALT
: 0, NULL
);
3960 result
= PyString_FromStringAndSize(p
, strlen(p
));
3965 /* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
3966 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
3967 * Python's regular ints.
3968 * Return value: a new PyString*, or NULL if error.
3969 * . *pbuf is set to point into it,
3970 * *plen set to the # of chars following that.
3971 * Caller must decref it when done using pbuf.
3972 * The string starting at *pbuf is of the form
3973 * "-"? ("0x" | "0X")? digit+
3974 * "0x"/"0X" are present only for x and X conversions, with F_ALT
3975 * set in flags. The case of hex digits will be correct,
3976 * There will be at least prec digits, zero-filled on the left if
3977 * necessary to get that many.
3978 * val object to be converted
3979 * flags bitmask of format flags; only F_ALT is looked at
3980 * prec minimum number of digits; 0-fill on left if needed
3981 * type a character in [duoxX]; u acts the same as d
3983 * CAUTION: o, x and X conversions on regular ints can never
3984 * produce a '-' sign, but can for Python's unbounded ints.
3987 _PyString_FormatLong(PyObject
*val
, int flags
, int prec
, int type
,
3988 char **pbuf
, int *plen
)
3990 PyObject
*result
= NULL
;
3993 int sign
; /* 1 if '-', else 0 */
3994 int len
; /* number of characters */
3996 int numdigits
; /* len == numnondigits + numdigits */
3997 int numnondigits
= 0;
4002 result
= Py_TYPE(val
)->tp_str(val
);
4005 result
= Py_TYPE(val
)->tp_as_number
->nb_oct(val
);
4010 result
= Py_TYPE(val
)->tp_as_number
->nb_hex(val
);
4013 assert(!"'type' not in [duoxX]");
4018 buf
= PyString_AsString(result
);
4024 /* To modify the string in-place, there can only be one reference. */
4025 if (Py_REFCNT(result
) != 1) {
4026 PyErr_BadInternalCall();
4029 llen
= PyString_Size(result
);
4030 if (llen
> INT_MAX
) {
4031 PyErr_SetString(PyExc_ValueError
, "string too large in _PyString_FormatLong");
4035 if (buf
[len
-1] == 'L') {
4039 sign
= buf
[0] == '-';
4040 numnondigits
+= sign
;
4041 numdigits
= len
- numnondigits
;
4042 assert(numdigits
> 0);
4044 /* Get rid of base marker unless F_ALT */
4045 if ((flags
& F_ALT
) == 0) {
4046 /* Need to skip 0x, 0X or 0. */
4050 assert(buf
[sign
] == '0');
4051 /* If 0 is only digit, leave it alone. */
4052 if (numdigits
> 1) {
4059 assert(buf
[sign
] == '0');
4060 assert(buf
[sign
+ 1] == 'x');
4071 assert(len
== numnondigits
+ numdigits
);
4072 assert(numdigits
> 0);
4075 /* Fill with leading zeroes to meet minimum width. */
4076 if (prec
> numdigits
) {
4077 PyObject
*r1
= PyString_FromStringAndSize(NULL
,
4078 numnondigits
+ prec
);
4084 b1
= PyString_AS_STRING(r1
);
4085 for (i
= 0; i
< numnondigits
; ++i
)
4087 for (i
= 0; i
< prec
- numdigits
; i
++)
4089 for (i
= 0; i
< numdigits
; i
++)
4094 buf
= PyString_AS_STRING(result
);
4095 len
= numnondigits
+ prec
;
4098 /* Fix up case for hex conversions. */
4100 /* Need to convert all lower case letters to upper case.
4101 and need to convert 0x to 0X (and -0x to -0X). */
4102 for (i
= 0; i
< len
; i
++)
4103 if (buf
[i
] >= 'a' && buf
[i
] <= 'x')
4111 Py_LOCAL_INLINE(int)
4112 formatint(char *buf
, size_t buflen
, int flags
,
4113 int prec
, int type
, PyObject
*v
)
4115 /* fmt = '%#.' + `prec` + 'l' + `type`
4116 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
4118 char fmt
[64]; /* plenty big enough! */
4122 x
= PyInt_AsLong(v
);
4123 if (x
== -1 && PyErr_Occurred()) {
4124 PyErr_Format(PyExc_TypeError
, "int argument required, not %.200s",
4125 Py_TYPE(v
)->tp_name
);
4128 if (x
< 0 && type
== 'u') {
4131 if (x
< 0 && (type
== 'x' || type
== 'X' || type
== 'o'))
4138 if ((flags
& F_ALT
) &&
4139 (type
== 'x' || type
== 'X')) {
4140 /* When converting under %#x or %#X, there are a number
4141 * of issues that cause pain:
4142 * - when 0 is being converted, the C standard leaves off
4143 * the '0x' or '0X', which is inconsistent with other
4144 * %#x/%#X conversions and inconsistent with Python's
4146 * - there are platforms that violate the standard and
4147 * convert 0 with the '0x' or '0X'
4148 * (Metrowerks, Compaq Tru64)
4149 * - there are platforms that give '0x' when converting
4150 * under %#X, but convert 0 in accordance with the
4151 * standard (OS/2 EMX)
4153 * We can achieve the desired consistency by inserting our
4154 * own '0x' or '0X' prefix, and substituting %x/%X in place
4157 * Note that this is the same approach as used in
4158 * formatint() in unicodeobject.c
4160 PyOS_snprintf(fmt
, sizeof(fmt
), "%s0%c%%.%dl%c",
4161 sign
, type
, prec
, type
);
4164 PyOS_snprintf(fmt
, sizeof(fmt
), "%s%%%s.%dl%c",
4165 sign
, (flags
&F_ALT
) ? "#" : "",
4169 /* buf = '+'/'-'/'' + '0'/'0x'/'' + '[0-9]'*max(prec, len(x in octal))
4170 * worst case buf = '-0x' + [0-9]*prec, where prec >= 11
4172 if (buflen
<= 14 || buflen
<= (size_t)3 + (size_t)prec
) {
4173 PyErr_SetString(PyExc_OverflowError
,
4174 "formatted integer is too long (precision too large?)");
4178 PyOS_snprintf(buf
, buflen
, fmt
, -x
);
4180 PyOS_snprintf(buf
, buflen
, fmt
, x
);
4181 return (int)strlen(buf
);
4184 Py_LOCAL_INLINE(int)
4185 formatchar(char *buf
, size_t buflen
, PyObject
*v
)
4187 /* presume that the buffer is at least 2 characters long */
4188 if (PyString_Check(v
)) {
4189 if (!PyArg_Parse(v
, "c;%c requires int or char", &buf
[0]))
4193 if (!PyArg_Parse(v
, "b;%c requires int or char", &buf
[0]))
4200 /* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
4202 FORMATBUFLEN is the length of the buffer in which the ints &
4203 chars are formatted. XXX This is a magic number. Each formatting
4204 routine does bounds checking to ensure no overflow, but a better
4205 solution may be to malloc a buffer of appropriate size for each
4206 format. For now, the current solution is sufficient.
4208 #define FORMATBUFLEN (size_t)120
4211 PyString_Format(PyObject
*format
, PyObject
*args
)
4214 Py_ssize_t arglen
, argidx
;
4215 Py_ssize_t reslen
, rescnt
, fmtcnt
;
4217 PyObject
*result
, *orig_args
;
4218 #ifdef Py_USING_UNICODE
4221 PyObject
*dict
= NULL
;
4222 if (format
== NULL
|| !PyString_Check(format
) || args
== NULL
) {
4223 PyErr_BadInternalCall();
4227 fmt
= PyString_AS_STRING(format
);
4228 fmtcnt
= PyString_GET_SIZE(format
);
4229 reslen
= rescnt
= fmtcnt
+ 100;
4230 result
= PyString_FromStringAndSize((char *)NULL
, reslen
);
4233 res
= PyString_AsString(result
);
4234 if (PyTuple_Check(args
)) {
4235 arglen
= PyTuple_GET_SIZE(args
);
4242 if (Py_TYPE(args
)->tp_as_mapping
&& !PyTuple_Check(args
) &&
4243 !PyObject_TypeCheck(args
, &PyBaseString_Type
))
4245 while (--fmtcnt
>= 0) {
4248 rescnt
= fmtcnt
+ 100;
4250 if (_PyString_Resize(&result
, reslen
))
4252 res
= PyString_AS_STRING(result
)
4259 /* Got a format specifier */
4261 Py_ssize_t width
= -1;
4267 PyObject
*temp
= NULL
;
4271 char formatbuf
[FORMATBUFLEN
];
4272 /* For format{int,char}() */
4273 #ifdef Py_USING_UNICODE
4274 char *fmt_start
= fmt
;
4275 Py_ssize_t argidx_start
= argidx
;
4286 PyErr_SetString(PyExc_TypeError
,
4287 "format requires a mapping");
4293 /* Skip over balanced parentheses */
4294 while (pcount
> 0 && --fmtcnt
>= 0) {
4297 else if (*fmt
== '(')
4301 keylen
= fmt
- keystart
- 1;
4302 if (fmtcnt
< 0 || pcount
> 0) {
4303 PyErr_SetString(PyExc_ValueError
,
4304 "incomplete format key");
4307 key
= PyString_FromStringAndSize(keystart
,
4315 args
= PyObject_GetItem(dict
, key
);
4324 while (--fmtcnt
>= 0) {
4325 switch (c
= *fmt
++) {
4326 case '-': flags
|= F_LJUST
; continue;
4327 case '+': flags
|= F_SIGN
; continue;
4328 case ' ': flags
|= F_BLANK
; continue;
4329 case '#': flags
|= F_ALT
; continue;
4330 case '0': flags
|= F_ZERO
; continue;
4335 v
= getnextarg(args
, arglen
, &argidx
);
4338 if (!PyInt_Check(v
)) {
4339 PyErr_SetString(PyExc_TypeError
,
4343 width
= PyInt_AsLong(v
);
4351 else if (c
>= 0 && isdigit(c
)) {
4353 while (--fmtcnt
>= 0) {
4354 c
= Py_CHARMASK(*fmt
++);
4357 if ((width
*10) / 10 != width
) {
4363 width
= width
*10 + (c
- '0');
4371 v
= getnextarg(args
, arglen
, &argidx
);
4374 if (!PyInt_Check(v
)) {
4380 prec
= PyInt_AsLong(v
);
4386 else if (c
>= 0 && isdigit(c
)) {
4388 while (--fmtcnt
>= 0) {
4389 c
= Py_CHARMASK(*fmt
++);
4392 if ((prec
*10) / 10 != prec
) {
4398 prec
= prec
*10 + (c
- '0');
4403 if (c
== 'h' || c
== 'l' || c
== 'L') {
4409 PyErr_SetString(PyExc_ValueError
,
4410 "incomplete format");
4414 v
= getnextarg(args
, arglen
, &argidx
);
4426 #ifdef Py_USING_UNICODE
4427 if (PyUnicode_Check(v
)) {
4429 argidx
= argidx_start
;
4433 temp
= _PyObject_Str(v
);
4434 #ifdef Py_USING_UNICODE
4435 if (temp
!= NULL
&& PyUnicode_Check(temp
)) {
4438 argidx
= argidx_start
;
4445 temp
= PyObject_Repr(v
);
4448 if (!PyString_Check(temp
)) {
4449 PyErr_SetString(PyExc_TypeError
,
4450 "%s argument has non-string str()");
4454 pbuf
= PyString_AS_STRING(temp
);
4455 len
= PyString_GET_SIZE(temp
);
4456 if (prec
>= 0 && len
> prec
)
4468 if (PyNumber_Check(v
)) {
4469 PyObject
*iobj
=NULL
;
4471 if (PyInt_Check(v
) || (PyLong_Check(v
))) {
4476 iobj
= PyNumber_Int(v
);
4477 if (iobj
==NULL
) iobj
= PyNumber_Long(v
);
4480 if (PyInt_Check(iobj
)) {
4483 len
= formatint(pbuf
,
4485 flags
, prec
, c
, iobj
);
4491 else if (PyLong_Check(iobj
)) {
4495 temp
= _PyString_FormatLong(iobj
, flags
,
4496 prec
, c
, &pbuf
, &ilen
);
4509 PyErr_Format(PyExc_TypeError
,
4510 "%%%c format: a number is required, "
4511 "not %.200s", c
, Py_TYPE(v
)->tp_name
);
4523 temp
= formatfloat(v
, flags
, prec
, c
);
4526 pbuf
= PyString_AS_STRING(temp
);
4527 len
= PyString_GET_SIZE(temp
);
4533 #ifdef Py_USING_UNICODE
4534 if (PyUnicode_Check(v
)) {
4536 argidx
= argidx_start
;
4541 len
= formatchar(pbuf
, sizeof(formatbuf
), v
);
4546 PyErr_Format(PyExc_ValueError
,
4547 "unsupported format character '%c' (0x%x) "
4550 (Py_ssize_t
)(fmt
- 1 -
4551 PyString_AsString(format
)));
4555 if (*pbuf
== '-' || *pbuf
== '+') {
4559 else if (flags
& F_SIGN
)
4561 else if (flags
& F_BLANK
)
4568 if (rescnt
- (sign
!= 0) < width
) {
4570 rescnt
= width
+ fmtcnt
+ 100;
4575 return PyErr_NoMemory();
4577 if (_PyString_Resize(&result
, reslen
)) {
4581 res
= PyString_AS_STRING(result
)
4591 if ((flags
& F_ALT
) && (c
== 'x' || c
== 'X')) {
4592 assert(pbuf
[0] == '0');
4593 assert(pbuf
[1] == c
);
4604 if (width
> len
&& !(flags
& F_LJUST
)) {
4608 } while (--width
> len
);
4613 if ((flags
& F_ALT
) &&
4614 (c
== 'x' || c
== 'X')) {
4615 assert(pbuf
[0] == '0');
4616 assert(pbuf
[1] == c
);
4621 Py_MEMCPY(res
, pbuf
, len
);
4624 while (--width
>= len
) {
4628 if (dict
&& (argidx
< arglen
) && c
!= '%') {
4629 PyErr_SetString(PyExc_TypeError
,
4630 "not all arguments converted during string formatting");
4637 if (argidx
< arglen
&& !dict
) {
4638 PyErr_SetString(PyExc_TypeError
,
4639 "not all arguments converted during string formatting");
4645 if (_PyString_Resize(&result
, reslen
- rescnt
))
4649 #ifdef Py_USING_UNICODE
4655 /* Fiddle args right (remove the first argidx arguments) */
4656 if (PyTuple_Check(orig_args
) && argidx
> 0) {
4658 Py_ssize_t n
= PyTuple_GET_SIZE(orig_args
) - argidx
;
4663 PyObject
*w
= PyTuple_GET_ITEM(orig_args
, n
+ argidx
);
4665 PyTuple_SET_ITEM(v
, n
, w
);
4669 Py_INCREF(orig_args
);
4673 /* Take what we have of the result and let the Unicode formatting
4674 function format the rest of the input. */
4675 rescnt
= res
- PyString_AS_STRING(result
);
4676 if (_PyString_Resize(&result
, rescnt
))
4678 fmtcnt
= PyString_GET_SIZE(format
) - \
4679 (fmt
- PyString_AS_STRING(format
));
4680 format
= PyUnicode_Decode(fmt
, fmtcnt
, NULL
, NULL
);
4683 v
= PyUnicode_Format(format
, args
);
4687 /* Paste what we have (result) to what the Unicode formatting
4688 function returned (v) and return the result (or error) */
4689 w
= PyUnicode_Concat(result
, v
);
4694 #endif /* Py_USING_UNICODE */
4705 PyString_InternInPlace(PyObject
**p
)
4707 register PyStringObject
*s
= (PyStringObject
*)(*p
);
4709 if (s
== NULL
|| !PyString_Check(s
))
4710 Py_FatalError("PyString_InternInPlace: strings only please!");
4711 /* If it's a string subclass, we don't really know what putting
4712 it in the interned dict might do. */
4713 if (!PyString_CheckExact(s
))
4715 if (PyString_CHECK_INTERNED(s
))
4717 if (interned
== NULL
) {
4718 interned
= PyDict_New();
4719 if (interned
== NULL
) {
4720 PyErr_Clear(); /* Don't leave an exception */
4724 t
= PyDict_GetItem(interned
, (PyObject
*)s
);
4732 if (PyDict_SetItem(interned
, (PyObject
*)s
, (PyObject
*)s
) < 0) {
4736 /* The two references in interned are not counted by refcnt.
4737 The string deallocator will take care of this */
4739 PyString_CHECK_INTERNED(s
) = SSTATE_INTERNED_MORTAL
;
4743 PyString_InternImmortal(PyObject
**p
)
4745 PyString_InternInPlace(p
);
4746 if (PyString_CHECK_INTERNED(*p
) != SSTATE_INTERNED_IMMORTAL
) {
4747 PyString_CHECK_INTERNED(*p
) = SSTATE_INTERNED_IMMORTAL
;
4754 PyString_InternFromString(const char *cp
)
4756 PyObject
*s
= PyString_FromString(cp
);
4759 PyString_InternInPlace(&s
);
4767 for (i
= 0; i
< UCHAR_MAX
+ 1; i
++) {
4768 Py_XDECREF(characters
[i
]);
4769 characters
[i
] = NULL
;
4771 Py_XDECREF(nullstring
);
4775 void _Py_ReleaseInternedStrings(void)
4780 Py_ssize_t immortal_size
= 0, mortal_size
= 0;
4782 if (interned
== NULL
|| !PyDict_Check(interned
))
4784 keys
= PyDict_Keys(interned
);
4785 if (keys
== NULL
|| !PyList_Check(keys
)) {
4790 /* Since _Py_ReleaseInternedStrings() is intended to help a leak
4791 detector, interned strings are not forcibly deallocated; rather, we
4792 give them their stolen references back, and then clear and DECREF
4793 the interned dict. */
4795 n
= PyList_GET_SIZE(keys
);
4796 fprintf(stderr
, "releasing %" PY_FORMAT_SIZE_T
"d interned strings\n",
4798 for (i
= 0; i
< n
; i
++) {
4799 s
= (PyStringObject
*) PyList_GET_ITEM(keys
, i
);
4800 switch (s
->ob_sstate
) {
4801 case SSTATE_NOT_INTERNED
:
4802 /* XXX Shouldn't happen */
4804 case SSTATE_INTERNED_IMMORTAL
:
4806 immortal_size
+= Py_SIZE(s
);
4808 case SSTATE_INTERNED_MORTAL
:
4810 mortal_size
+= Py_SIZE(s
);
4813 Py_FatalError("Inconsistent interned string state.");
4815 s
->ob_sstate
= SSTATE_NOT_INTERNED
;
4817 fprintf(stderr
, "total size of all interned strings: "
4818 "%" PY_FORMAT_SIZE_T
"d/%" PY_FORMAT_SIZE_T
"d "
4819 "mortal/immortal\n", mortal_size
, immortal_size
);
4821 PyDict_Clear(interned
);
4822 Py_DECREF(interned
);