]>
Commit | Line | Data |
---|---|---|
c8042e10 DM |
1 | #ifndef Py_UNICODEOBJECT_H\r |
2 | #define Py_UNICODEOBJECT_H\r | |
3 | \r | |
4 | #include <stdarg.h>\r | |
5 | \r | |
6 | /*\r | |
7 | \r | |
8 | Unicode implementation based on original code by Fredrik Lundh,\r | |
9 | modified by Marc-Andre Lemburg (mal@lemburg.com) according to the\r | |
10 | Unicode Integration Proposal (see file Misc/unicode.txt).\r | |
11 | \r | |
12 | Copyright (c) Corporation for National Research Initiatives.\r | |
13 | \r | |
14 | \r | |
15 | Original header:\r | |
16 | --------------------------------------------------------------------\r | |
17 | \r | |
18 | * Yet another Unicode string type for Python. This type supports the\r | |
19 | * 16-bit Basic Multilingual Plane (BMP) only.\r | |
20 | *\r | |
21 | * Written by Fredrik Lundh, January 1999.\r | |
22 | *\r | |
23 | * Copyright (c) 1999 by Secret Labs AB.\r | |
24 | * Copyright (c) 1999 by Fredrik Lundh.\r | |
25 | *\r | |
26 | * fredrik@pythonware.com\r | |
27 | * http://www.pythonware.com\r | |
28 | *\r | |
29 | * --------------------------------------------------------------------\r | |
30 | * This Unicode String Type is\r | |
31 | *\r | |
32 | * Copyright (c) 1999 by Secret Labs AB\r | |
33 | * Copyright (c) 1999 by Fredrik Lundh\r | |
34 | *\r | |
35 | * By obtaining, using, and/or copying this software and/or its\r | |
36 | * associated documentation, you agree that you have read, understood,\r | |
37 | * and will comply with the following terms and conditions:\r | |
38 | *\r | |
39 | * Permission to use, copy, modify, and distribute this software and its\r | |
40 | * associated documentation for any purpose and without fee is hereby\r | |
41 | * granted, provided that the above copyright notice appears in all\r | |
42 | * copies, and that both that copyright notice and this permission notice\r | |
43 | * appear in supporting documentation, and that the name of Secret Labs\r | |
44 | * AB or the author not be used in advertising or publicity pertaining to\r | |
45 | * distribution of the software without specific, written prior\r | |
46 | * permission.\r | |
47 | *\r | |
48 | * SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO\r | |
49 | * THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND\r | |
50 | * FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR BE LIABLE FOR\r | |
51 | * ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES\r | |
52 | * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN\r | |
53 | * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT\r | |
54 | * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.\r | |
55 | * -------------------------------------------------------------------- */\r | |
56 | \r | |
57 | #include <ctype.h>\r | |
58 | \r | |
59 | /* === Internal API ======================================================= */\r | |
60 | \r | |
61 | /* --- Internal Unicode Format -------------------------------------------- */\r | |
62 | \r | |
63 | #ifndef Py_USING_UNICODE\r | |
64 | \r | |
65 | #define PyUnicode_Check(op) 0\r | |
66 | #define PyUnicode_CheckExact(op) 0\r | |
67 | \r | |
68 | #else\r | |
69 | \r | |
70 | /* FIXME: MvL's new implementation assumes that Py_UNICODE_SIZE is\r | |
71 | properly set, but the default rules below doesn't set it. I'll\r | |
72 | sort this out some other day -- fredrik@pythonware.com */\r | |
73 | \r | |
74 | #ifndef Py_UNICODE_SIZE\r | |
75 | #error Must define Py_UNICODE_SIZE\r | |
76 | #endif\r | |
77 | \r | |
78 | /* Setting Py_UNICODE_WIDE enables UCS-4 storage. Otherwise, Unicode\r | |
79 | strings are stored as UCS-2 (with limited support for UTF-16) */\r | |
80 | \r | |
81 | #if Py_UNICODE_SIZE >= 4\r | |
82 | #define Py_UNICODE_WIDE\r | |
83 | #endif\r | |
84 | \r | |
85 | /* Set these flags if the platform has "wchar.h", "wctype.h" and the\r | |
86 | wchar_t type is a 16-bit unsigned type */\r | |
87 | /* #define HAVE_WCHAR_H */\r | |
88 | /* #define HAVE_USABLE_WCHAR_T */\r | |
89 | \r | |
90 | /* Defaults for various platforms */\r | |
91 | #ifndef PY_UNICODE_TYPE\r | |
92 | \r | |
93 | /* Windows has a usable wchar_t type (unless we're using UCS-4) */\r | |
94 | # if defined(MS_WIN32) && Py_UNICODE_SIZE == 2\r | |
95 | # define HAVE_USABLE_WCHAR_T\r | |
96 | # define PY_UNICODE_TYPE wchar_t\r | |
97 | # endif\r | |
98 | \r | |
99 | # if defined(Py_UNICODE_WIDE)\r | |
100 | # define PY_UNICODE_TYPE Py_UCS4\r | |
101 | # endif\r | |
102 | \r | |
103 | #endif\r | |
104 | \r | |
105 | /* If the compiler provides a wchar_t type we try to support it\r | |
106 | through the interface functions PyUnicode_FromWideChar() and\r | |
107 | PyUnicode_AsWideChar(). */\r | |
108 | \r | |
109 | #ifdef HAVE_USABLE_WCHAR_T\r | |
110 | # ifndef HAVE_WCHAR_H\r | |
111 | # define HAVE_WCHAR_H\r | |
112 | # endif\r | |
113 | #endif\r | |
114 | \r | |
115 | #ifdef HAVE_WCHAR_H\r | |
116 | /* Work around a cosmetic bug in BSDI 4.x wchar.h; thanks to Thomas Wouters */\r | |
117 | # ifdef _HAVE_BSDI\r | |
118 | # include <time.h>\r | |
119 | # endif\r | |
120 | # include <wchar.h>\r | |
121 | #endif\r | |
122 | \r | |
123 | /*\r | |
124 | * Use this typedef when you need to represent a UTF-16 surrogate pair\r | |
125 | * as single unsigned integer.\r | |
126 | */\r | |
127 | #if SIZEOF_INT >= 4\r | |
128 | typedef unsigned int Py_UCS4;\r | |
129 | #elif SIZEOF_LONG >= 4\r | |
130 | typedef unsigned long Py_UCS4;\r | |
131 | #endif\r | |
132 | \r | |
133 | /* Py_UNICODE is the native Unicode storage format (code unit) used by\r | |
134 | Python and represents a single Unicode element in the Unicode\r | |
135 | type. */\r | |
136 | \r | |
137 | typedef PY_UNICODE_TYPE Py_UNICODE;\r | |
138 | \r | |
139 | /* --- UCS-2/UCS-4 Name Mangling ------------------------------------------ */\r | |
140 | \r | |
141 | /* Unicode API names are mangled to assure that UCS-2 and UCS-4 builds\r | |
142 | produce different external names and thus cause import errors in\r | |
143 | case Python interpreters and extensions with mixed compiled in\r | |
144 | Unicode width assumptions are combined. */\r | |
145 | \r | |
146 | #ifndef Py_UNICODE_WIDE\r | |
147 | \r | |
148 | # define PyUnicode_AsASCIIString PyUnicodeUCS2_AsASCIIString\r | |
149 | # define PyUnicode_AsCharmapString PyUnicodeUCS2_AsCharmapString\r | |
150 | # define PyUnicode_AsEncodedObject PyUnicodeUCS2_AsEncodedObject\r | |
151 | # define PyUnicode_AsEncodedString PyUnicodeUCS2_AsEncodedString\r | |
152 | # define PyUnicode_AsLatin1String PyUnicodeUCS2_AsLatin1String\r | |
153 | # define PyUnicode_AsRawUnicodeEscapeString PyUnicodeUCS2_AsRawUnicodeEscapeString\r | |
154 | # define PyUnicode_AsUTF32String PyUnicodeUCS2_AsUTF32String\r | |
155 | # define PyUnicode_AsUTF16String PyUnicodeUCS2_AsUTF16String\r | |
156 | # define PyUnicode_AsUTF8String PyUnicodeUCS2_AsUTF8String\r | |
157 | # define PyUnicode_AsUnicode PyUnicodeUCS2_AsUnicode\r | |
158 | # define PyUnicode_AsUnicodeEscapeString PyUnicodeUCS2_AsUnicodeEscapeString\r | |
159 | # define PyUnicode_AsWideChar PyUnicodeUCS2_AsWideChar\r | |
160 | # define PyUnicode_ClearFreeList PyUnicodeUCS2_ClearFreelist\r | |
161 | # define PyUnicode_Compare PyUnicodeUCS2_Compare\r | |
162 | # define PyUnicode_Concat PyUnicodeUCS2_Concat\r | |
163 | # define PyUnicode_Contains PyUnicodeUCS2_Contains\r | |
164 | # define PyUnicode_Count PyUnicodeUCS2_Count\r | |
165 | # define PyUnicode_Decode PyUnicodeUCS2_Decode\r | |
166 | # define PyUnicode_DecodeASCII PyUnicodeUCS2_DecodeASCII\r | |
167 | # define PyUnicode_DecodeCharmap PyUnicodeUCS2_DecodeCharmap\r | |
168 | # define PyUnicode_DecodeLatin1 PyUnicodeUCS2_DecodeLatin1\r | |
169 | # define PyUnicode_DecodeRawUnicodeEscape PyUnicodeUCS2_DecodeRawUnicodeEscape\r | |
170 | # define PyUnicode_DecodeUTF32 PyUnicodeUCS2_DecodeUTF32\r | |
171 | # define PyUnicode_DecodeUTF32Stateful PyUnicodeUCS2_DecodeUTF32Stateful\r | |
172 | # define PyUnicode_DecodeUTF16 PyUnicodeUCS2_DecodeUTF16\r | |
173 | # define PyUnicode_DecodeUTF16Stateful PyUnicodeUCS2_DecodeUTF16Stateful\r | |
174 | # define PyUnicode_DecodeUTF8 PyUnicodeUCS2_DecodeUTF8\r | |
175 | # define PyUnicode_DecodeUTF8Stateful PyUnicodeUCS2_DecodeUTF8Stateful\r | |
176 | # define PyUnicode_DecodeUnicodeEscape PyUnicodeUCS2_DecodeUnicodeEscape\r | |
177 | # define PyUnicode_Encode PyUnicodeUCS2_Encode\r | |
178 | # define PyUnicode_EncodeASCII PyUnicodeUCS2_EncodeASCII\r | |
179 | # define PyUnicode_EncodeCharmap PyUnicodeUCS2_EncodeCharmap\r | |
180 | # define PyUnicode_EncodeDecimal PyUnicodeUCS2_EncodeDecimal\r | |
181 | # define PyUnicode_EncodeLatin1 PyUnicodeUCS2_EncodeLatin1\r | |
182 | # define PyUnicode_EncodeRawUnicodeEscape PyUnicodeUCS2_EncodeRawUnicodeEscape\r | |
183 | # define PyUnicode_EncodeUTF32 PyUnicodeUCS2_EncodeUTF32\r | |
184 | # define PyUnicode_EncodeUTF16 PyUnicodeUCS2_EncodeUTF16\r | |
185 | # define PyUnicode_EncodeUTF8 PyUnicodeUCS2_EncodeUTF8\r | |
186 | # define PyUnicode_EncodeUnicodeEscape PyUnicodeUCS2_EncodeUnicodeEscape\r | |
187 | # define PyUnicode_Find PyUnicodeUCS2_Find\r | |
188 | # define PyUnicode_Format PyUnicodeUCS2_Format\r | |
189 | # define PyUnicode_FromEncodedObject PyUnicodeUCS2_FromEncodedObject\r | |
190 | # define PyUnicode_FromFormat PyUnicodeUCS2_FromFormat\r | |
191 | # define PyUnicode_FromFormatV PyUnicodeUCS2_FromFormatV\r | |
192 | # define PyUnicode_FromObject PyUnicodeUCS2_FromObject\r | |
193 | # define PyUnicode_FromOrdinal PyUnicodeUCS2_FromOrdinal\r | |
194 | # define PyUnicode_FromString PyUnicodeUCS2_FromString\r | |
195 | # define PyUnicode_FromStringAndSize PyUnicodeUCS2_FromStringAndSize\r | |
196 | # define PyUnicode_FromUnicode PyUnicodeUCS2_FromUnicode\r | |
197 | # define PyUnicode_FromWideChar PyUnicodeUCS2_FromWideChar\r | |
198 | # define PyUnicode_GetDefaultEncoding PyUnicodeUCS2_GetDefaultEncoding\r | |
199 | # define PyUnicode_GetMax PyUnicodeUCS2_GetMax\r | |
200 | # define PyUnicode_GetSize PyUnicodeUCS2_GetSize\r | |
201 | # define PyUnicode_Join PyUnicodeUCS2_Join\r | |
202 | # define PyUnicode_Partition PyUnicodeUCS2_Partition\r | |
203 | # define PyUnicode_RPartition PyUnicodeUCS2_RPartition\r | |
204 | # define PyUnicode_RSplit PyUnicodeUCS2_RSplit\r | |
205 | # define PyUnicode_Replace PyUnicodeUCS2_Replace\r | |
206 | # define PyUnicode_Resize PyUnicodeUCS2_Resize\r | |
207 | # define PyUnicode_RichCompare PyUnicodeUCS2_RichCompare\r | |
208 | # define PyUnicode_SetDefaultEncoding PyUnicodeUCS2_SetDefaultEncoding\r | |
209 | # define PyUnicode_Split PyUnicodeUCS2_Split\r | |
210 | # define PyUnicode_Splitlines PyUnicodeUCS2_Splitlines\r | |
211 | # define PyUnicode_Tailmatch PyUnicodeUCS2_Tailmatch\r | |
212 | # define PyUnicode_Translate PyUnicodeUCS2_Translate\r | |
213 | # define PyUnicode_TranslateCharmap PyUnicodeUCS2_TranslateCharmap\r | |
214 | # define _PyUnicode_AsDefaultEncodedString _PyUnicodeUCS2_AsDefaultEncodedString\r | |
215 | # define _PyUnicode_Fini _PyUnicodeUCS2_Fini\r | |
216 | # define _PyUnicode_Init _PyUnicodeUCS2_Init\r | |
217 | # define _PyUnicode_IsAlpha _PyUnicodeUCS2_IsAlpha\r | |
218 | # define _PyUnicode_IsDecimalDigit _PyUnicodeUCS2_IsDecimalDigit\r | |
219 | # define _PyUnicode_IsDigit _PyUnicodeUCS2_IsDigit\r | |
220 | # define _PyUnicode_IsLinebreak _PyUnicodeUCS2_IsLinebreak\r | |
221 | # define _PyUnicode_IsLowercase _PyUnicodeUCS2_IsLowercase\r | |
222 | # define _PyUnicode_IsNumeric _PyUnicodeUCS2_IsNumeric\r | |
223 | # define _PyUnicode_IsTitlecase _PyUnicodeUCS2_IsTitlecase\r | |
224 | # define _PyUnicode_IsUppercase _PyUnicodeUCS2_IsUppercase\r | |
225 | # define _PyUnicode_IsWhitespace _PyUnicodeUCS2_IsWhitespace\r | |
226 | # define _PyUnicode_ToDecimalDigit _PyUnicodeUCS2_ToDecimalDigit\r | |
227 | # define _PyUnicode_ToDigit _PyUnicodeUCS2_ToDigit\r | |
228 | # define _PyUnicode_ToLowercase _PyUnicodeUCS2_ToLowercase\r | |
229 | # define _PyUnicode_ToNumeric _PyUnicodeUCS2_ToNumeric\r | |
230 | # define _PyUnicode_ToTitlecase _PyUnicodeUCS2_ToTitlecase\r | |
231 | # define _PyUnicode_ToUppercase _PyUnicodeUCS2_ToUppercase\r | |
232 | \r | |
233 | #else\r | |
234 | \r | |
235 | # define PyUnicode_AsASCIIString PyUnicodeUCS4_AsASCIIString\r | |
236 | # define PyUnicode_AsCharmapString PyUnicodeUCS4_AsCharmapString\r | |
237 | # define PyUnicode_AsEncodedObject PyUnicodeUCS4_AsEncodedObject\r | |
238 | # define PyUnicode_AsEncodedString PyUnicodeUCS4_AsEncodedString\r | |
239 | # define PyUnicode_AsLatin1String PyUnicodeUCS4_AsLatin1String\r | |
240 | # define PyUnicode_AsRawUnicodeEscapeString PyUnicodeUCS4_AsRawUnicodeEscapeString\r | |
241 | # define PyUnicode_AsUTF32String PyUnicodeUCS4_AsUTF32String\r | |
242 | # define PyUnicode_AsUTF16String PyUnicodeUCS4_AsUTF16String\r | |
243 | # define PyUnicode_AsUTF8String PyUnicodeUCS4_AsUTF8String\r | |
244 | # define PyUnicode_AsUnicode PyUnicodeUCS4_AsUnicode\r | |
245 | # define PyUnicode_AsUnicodeEscapeString PyUnicodeUCS4_AsUnicodeEscapeString\r | |
246 | # define PyUnicode_AsWideChar PyUnicodeUCS4_AsWideChar\r | |
247 | # define PyUnicode_ClearFreeList PyUnicodeUCS4_ClearFreelist\r | |
248 | # define PyUnicode_Compare PyUnicodeUCS4_Compare\r | |
249 | # define PyUnicode_Concat PyUnicodeUCS4_Concat\r | |
250 | # define PyUnicode_Contains PyUnicodeUCS4_Contains\r | |
251 | # define PyUnicode_Count PyUnicodeUCS4_Count\r | |
252 | # define PyUnicode_Decode PyUnicodeUCS4_Decode\r | |
253 | # define PyUnicode_DecodeASCII PyUnicodeUCS4_DecodeASCII\r | |
254 | # define PyUnicode_DecodeCharmap PyUnicodeUCS4_DecodeCharmap\r | |
255 | # define PyUnicode_DecodeLatin1 PyUnicodeUCS4_DecodeLatin1\r | |
256 | # define PyUnicode_DecodeRawUnicodeEscape PyUnicodeUCS4_DecodeRawUnicodeEscape\r | |
257 | # define PyUnicode_DecodeUTF32 PyUnicodeUCS4_DecodeUTF32\r | |
258 | # define PyUnicode_DecodeUTF32Stateful PyUnicodeUCS4_DecodeUTF32Stateful\r | |
259 | # define PyUnicode_DecodeUTF16 PyUnicodeUCS4_DecodeUTF16\r | |
260 | # define PyUnicode_DecodeUTF16Stateful PyUnicodeUCS4_DecodeUTF16Stateful\r | |
261 | # define PyUnicode_DecodeUTF8 PyUnicodeUCS4_DecodeUTF8\r | |
262 | # define PyUnicode_DecodeUTF8Stateful PyUnicodeUCS4_DecodeUTF8Stateful\r | |
263 | # define PyUnicode_DecodeUnicodeEscape PyUnicodeUCS4_DecodeUnicodeEscape\r | |
264 | # define PyUnicode_Encode PyUnicodeUCS4_Encode\r | |
265 | # define PyUnicode_EncodeASCII PyUnicodeUCS4_EncodeASCII\r | |
266 | # define PyUnicode_EncodeCharmap PyUnicodeUCS4_EncodeCharmap\r | |
267 | # define PyUnicode_EncodeDecimal PyUnicodeUCS4_EncodeDecimal\r | |
268 | # define PyUnicode_EncodeLatin1 PyUnicodeUCS4_EncodeLatin1\r | |
269 | # define PyUnicode_EncodeRawUnicodeEscape PyUnicodeUCS4_EncodeRawUnicodeEscape\r | |
270 | # define PyUnicode_EncodeUTF32 PyUnicodeUCS4_EncodeUTF32\r | |
271 | # define PyUnicode_EncodeUTF16 PyUnicodeUCS4_EncodeUTF16\r | |
272 | # define PyUnicode_EncodeUTF8 PyUnicodeUCS4_EncodeUTF8\r | |
273 | # define PyUnicode_EncodeUnicodeEscape PyUnicodeUCS4_EncodeUnicodeEscape\r | |
274 | # define PyUnicode_Find PyUnicodeUCS4_Find\r | |
275 | # define PyUnicode_Format PyUnicodeUCS4_Format\r | |
276 | # define PyUnicode_FromEncodedObject PyUnicodeUCS4_FromEncodedObject\r | |
277 | # define PyUnicode_FromFormat PyUnicodeUCS4_FromFormat\r | |
278 | # define PyUnicode_FromFormatV PyUnicodeUCS4_FromFormatV\r | |
279 | # define PyUnicode_FromObject PyUnicodeUCS4_FromObject\r | |
280 | # define PyUnicode_FromOrdinal PyUnicodeUCS4_FromOrdinal\r | |
281 | # define PyUnicode_FromString PyUnicodeUCS4_FromString\r | |
282 | # define PyUnicode_FromStringAndSize PyUnicodeUCS4_FromStringAndSize\r | |
283 | # define PyUnicode_FromUnicode PyUnicodeUCS4_FromUnicode\r | |
284 | # define PyUnicode_FromWideChar PyUnicodeUCS4_FromWideChar\r | |
285 | # define PyUnicode_GetDefaultEncoding PyUnicodeUCS4_GetDefaultEncoding\r | |
286 | # define PyUnicode_GetMax PyUnicodeUCS4_GetMax\r | |
287 | # define PyUnicode_GetSize PyUnicodeUCS4_GetSize\r | |
288 | # define PyUnicode_Join PyUnicodeUCS4_Join\r | |
289 | # define PyUnicode_Partition PyUnicodeUCS4_Partition\r | |
290 | # define PyUnicode_RPartition PyUnicodeUCS4_RPartition\r | |
291 | # define PyUnicode_RSplit PyUnicodeUCS4_RSplit\r | |
292 | # define PyUnicode_Replace PyUnicodeUCS4_Replace\r | |
293 | # define PyUnicode_Resize PyUnicodeUCS4_Resize\r | |
294 | # define PyUnicode_RichCompare PyUnicodeUCS4_RichCompare\r | |
295 | # define PyUnicode_SetDefaultEncoding PyUnicodeUCS4_SetDefaultEncoding\r | |
296 | # define PyUnicode_Split PyUnicodeUCS4_Split\r | |
297 | # define PyUnicode_Splitlines PyUnicodeUCS4_Splitlines\r | |
298 | # define PyUnicode_Tailmatch PyUnicodeUCS4_Tailmatch\r | |
299 | # define PyUnicode_Translate PyUnicodeUCS4_Translate\r | |
300 | # define PyUnicode_TranslateCharmap PyUnicodeUCS4_TranslateCharmap\r | |
301 | # define _PyUnicode_AsDefaultEncodedString _PyUnicodeUCS4_AsDefaultEncodedString\r | |
302 | # define _PyUnicode_Fini _PyUnicodeUCS4_Fini\r | |
303 | # define _PyUnicode_Init _PyUnicodeUCS4_Init\r | |
304 | # define _PyUnicode_IsAlpha _PyUnicodeUCS4_IsAlpha\r | |
305 | # define _PyUnicode_IsDecimalDigit _PyUnicodeUCS4_IsDecimalDigit\r | |
306 | # define _PyUnicode_IsDigit _PyUnicodeUCS4_IsDigit\r | |
307 | # define _PyUnicode_IsLinebreak _PyUnicodeUCS4_IsLinebreak\r | |
308 | # define _PyUnicode_IsLowercase _PyUnicodeUCS4_IsLowercase\r | |
309 | # define _PyUnicode_IsNumeric _PyUnicodeUCS4_IsNumeric\r | |
310 | # define _PyUnicode_IsTitlecase _PyUnicodeUCS4_IsTitlecase\r | |
311 | # define _PyUnicode_IsUppercase _PyUnicodeUCS4_IsUppercase\r | |
312 | # define _PyUnicode_IsWhitespace _PyUnicodeUCS4_IsWhitespace\r | |
313 | # define _PyUnicode_ToDecimalDigit _PyUnicodeUCS4_ToDecimalDigit\r | |
314 | # define _PyUnicode_ToDigit _PyUnicodeUCS4_ToDigit\r | |
315 | # define _PyUnicode_ToLowercase _PyUnicodeUCS4_ToLowercase\r | |
316 | # define _PyUnicode_ToNumeric _PyUnicodeUCS4_ToNumeric\r | |
317 | # define _PyUnicode_ToTitlecase _PyUnicodeUCS4_ToTitlecase\r | |
318 | # define _PyUnicode_ToUppercase _PyUnicodeUCS4_ToUppercase\r | |
319 | \r | |
320 | \r | |
321 | #endif\r | |
322 | \r | |
323 | /* --- Internal Unicode Operations ---------------------------------------- */\r | |
324 | \r | |
325 | /* If you want Python to use the compiler's wctype.h functions instead\r | |
326 | of the ones supplied with Python, define WANT_WCTYPE_FUNCTIONS or\r | |
327 | configure Python using --with-wctype-functions. This reduces the\r | |
328 | interpreter's code size. */\r | |
329 | \r | |
330 | #if defined(HAVE_USABLE_WCHAR_T) && defined(WANT_WCTYPE_FUNCTIONS)\r | |
331 | \r | |
332 | #include <wctype.h>\r | |
333 | \r | |
334 | #define Py_UNICODE_ISSPACE(ch) iswspace(ch)\r | |
335 | \r | |
336 | #define Py_UNICODE_ISLOWER(ch) iswlower(ch)\r | |
337 | #define Py_UNICODE_ISUPPER(ch) iswupper(ch)\r | |
338 | #define Py_UNICODE_ISTITLE(ch) _PyUnicode_IsTitlecase(ch)\r | |
339 | #define Py_UNICODE_ISLINEBREAK(ch) _PyUnicode_IsLinebreak(ch)\r | |
340 | \r | |
341 | #define Py_UNICODE_TOLOWER(ch) towlower(ch)\r | |
342 | #define Py_UNICODE_TOUPPER(ch) towupper(ch)\r | |
343 | #define Py_UNICODE_TOTITLE(ch) _PyUnicode_ToTitlecase(ch)\r | |
344 | \r | |
345 | #define Py_UNICODE_ISDECIMAL(ch) _PyUnicode_IsDecimalDigit(ch)\r | |
346 | #define Py_UNICODE_ISDIGIT(ch) _PyUnicode_IsDigit(ch)\r | |
347 | #define Py_UNICODE_ISNUMERIC(ch) _PyUnicode_IsNumeric(ch)\r | |
348 | \r | |
349 | #define Py_UNICODE_TODECIMAL(ch) _PyUnicode_ToDecimalDigit(ch)\r | |
350 | #define Py_UNICODE_TODIGIT(ch) _PyUnicode_ToDigit(ch)\r | |
351 | #define Py_UNICODE_TONUMERIC(ch) _PyUnicode_ToNumeric(ch)\r | |
352 | \r | |
353 | #define Py_UNICODE_ISALPHA(ch) iswalpha(ch)\r | |
354 | \r | |
355 | #else\r | |
356 | \r | |
357 | /* Since splitting on whitespace is an important use case, and\r | |
358 | whitespace in most situations is solely ASCII whitespace, we\r | |
359 | optimize for the common case by using a quick look-up table\r | |
360 | _Py_ascii_whitespace (see below) with an inlined check.\r | |
361 | \r | |
362 | */\r | |
363 | #define Py_UNICODE_ISSPACE(ch) \\r | |
364 | ((ch) < 128U ? _Py_ascii_whitespace[(ch)] : _PyUnicode_IsWhitespace(ch))\r | |
365 | \r | |
366 | #define Py_UNICODE_ISLOWER(ch) _PyUnicode_IsLowercase(ch)\r | |
367 | #define Py_UNICODE_ISUPPER(ch) _PyUnicode_IsUppercase(ch)\r | |
368 | #define Py_UNICODE_ISTITLE(ch) _PyUnicode_IsTitlecase(ch)\r | |
369 | #define Py_UNICODE_ISLINEBREAK(ch) _PyUnicode_IsLinebreak(ch)\r | |
370 | \r | |
371 | #define Py_UNICODE_TOLOWER(ch) _PyUnicode_ToLowercase(ch)\r | |
372 | #define Py_UNICODE_TOUPPER(ch) _PyUnicode_ToUppercase(ch)\r | |
373 | #define Py_UNICODE_TOTITLE(ch) _PyUnicode_ToTitlecase(ch)\r | |
374 | \r | |
375 | #define Py_UNICODE_ISDECIMAL(ch) _PyUnicode_IsDecimalDigit(ch)\r | |
376 | #define Py_UNICODE_ISDIGIT(ch) _PyUnicode_IsDigit(ch)\r | |
377 | #define Py_UNICODE_ISNUMERIC(ch) _PyUnicode_IsNumeric(ch)\r | |
378 | \r | |
379 | #define Py_UNICODE_TODECIMAL(ch) _PyUnicode_ToDecimalDigit(ch)\r | |
380 | #define Py_UNICODE_TODIGIT(ch) _PyUnicode_ToDigit(ch)\r | |
381 | #define Py_UNICODE_TONUMERIC(ch) _PyUnicode_ToNumeric(ch)\r | |
382 | \r | |
383 | #define Py_UNICODE_ISALPHA(ch) _PyUnicode_IsAlpha(ch)\r | |
384 | \r | |
385 | #endif\r | |
386 | \r | |
387 | #define Py_UNICODE_ISALNUM(ch) \\r | |
388 | (Py_UNICODE_ISALPHA(ch) || \\r | |
389 | Py_UNICODE_ISDECIMAL(ch) || \\r | |
390 | Py_UNICODE_ISDIGIT(ch) || \\r | |
391 | Py_UNICODE_ISNUMERIC(ch))\r | |
392 | \r | |
393 | #define Py_UNICODE_COPY(target, source, length) \\r | |
394 | Py_MEMCPY((target), (source), (length)*sizeof(Py_UNICODE))\r | |
395 | \r | |
396 | #define Py_UNICODE_FILL(target, value, length) \\r | |
397 | do {Py_ssize_t i_; Py_UNICODE *t_ = (target); Py_UNICODE v_ = (value);\\r | |
398 | for (i_ = 0; i_ < (length); i_++) t_[i_] = v_;\\r | |
399 | } while (0)\r | |
400 | \r | |
401 | /* Check if substring matches at given offset. the offset must be\r | |
402 | valid, and the substring must not be empty */\r | |
403 | \r | |
404 | #define Py_UNICODE_MATCH(string, offset, substring) \\r | |
405 | ((*((string)->str + (offset)) == *((substring)->str)) && \\r | |
406 | ((*((string)->str + (offset) + (substring)->length-1) == *((substring)->str + (substring)->length-1))) && \\r | |
407 | !memcmp((string)->str + (offset), (substring)->str, (substring)->length*sizeof(Py_UNICODE)))\r | |
408 | \r | |
409 | #ifdef __cplusplus\r | |
410 | extern "C" {\r | |
411 | #endif\r | |
412 | \r | |
413 | /* --- Unicode Type ------------------------------------------------------- */\r | |
414 | \r | |
415 | typedef struct {\r | |
416 | PyObject_HEAD\r | |
417 | Py_ssize_t length; /* Length of raw Unicode data in buffer */\r | |
418 | Py_UNICODE *str; /* Raw Unicode buffer */\r | |
419 | long hash; /* Hash value; -1 if not set */\r | |
420 | PyObject *defenc; /* (Default) Encoded version as Python\r | |
421 | string, or NULL; this is used for\r | |
422 | implementing the buffer protocol */\r | |
423 | } PyUnicodeObject;\r | |
424 | \r | |
425 | PyAPI_DATA(PyTypeObject) PyUnicode_Type;\r | |
426 | \r | |
427 | #define PyUnicode_Check(op) \\r | |
428 | PyType_FastSubclass(Py_TYPE(op), Py_TPFLAGS_UNICODE_SUBCLASS)\r | |
429 | #define PyUnicode_CheckExact(op) (Py_TYPE(op) == &PyUnicode_Type)\r | |
430 | \r | |
431 | /* Fast access macros */\r | |
432 | #define PyUnicode_GET_SIZE(op) \\r | |
433 | (((PyUnicodeObject *)(op))->length)\r | |
434 | #define PyUnicode_GET_DATA_SIZE(op) \\r | |
435 | (((PyUnicodeObject *)(op))->length * sizeof(Py_UNICODE))\r | |
436 | #define PyUnicode_AS_UNICODE(op) \\r | |
437 | (((PyUnicodeObject *)(op))->str)\r | |
438 | #define PyUnicode_AS_DATA(op) \\r | |
439 | ((const char *)((PyUnicodeObject *)(op))->str)\r | |
440 | \r | |
441 | /* --- Constants ---------------------------------------------------------- */\r | |
442 | \r | |
443 | /* This Unicode character will be used as replacement character during\r | |
444 | decoding if the errors argument is set to "replace". Note: the\r | |
445 | Unicode character U+FFFD is the official REPLACEMENT CHARACTER in\r | |
446 | Unicode 3.0. */\r | |
447 | \r | |
448 | #define Py_UNICODE_REPLACEMENT_CHARACTER ((Py_UNICODE) 0xFFFD)\r | |
449 | \r | |
450 | /* === Public API ========================================================= */\r | |
451 | \r | |
452 | /* --- Plain Py_UNICODE --------------------------------------------------- */\r | |
453 | \r | |
454 | /* Create a Unicode Object from the Py_UNICODE buffer u of the given\r | |
455 | size.\r | |
456 | \r | |
457 | u may be NULL which causes the contents to be undefined. It is the\r | |
458 | user's responsibility to fill in the needed data afterwards. Note\r | |
459 | that modifying the Unicode object contents after construction is\r | |
460 | only allowed if u was set to NULL.\r | |
461 | \r | |
462 | The buffer is copied into the new object. */\r | |
463 | \r | |
464 | PyAPI_FUNC(PyObject*) PyUnicode_FromUnicode(\r | |
465 | const Py_UNICODE *u, /* Unicode buffer */\r | |
466 | Py_ssize_t size /* size of buffer */\r | |
467 | );\r | |
468 | \r | |
469 | /* Similar to PyUnicode_FromUnicode(), but u points to Latin-1 encoded bytes */\r | |
470 | PyAPI_FUNC(PyObject*) PyUnicode_FromStringAndSize(\r | |
471 | const char *u, /* char buffer */\r | |
472 | Py_ssize_t size /* size of buffer */\r | |
473 | );\r | |
474 | \r | |
475 | /* Similar to PyUnicode_FromUnicode(), but u points to null-terminated\r | |
476 | Latin-1 encoded bytes */\r | |
477 | PyAPI_FUNC(PyObject*) PyUnicode_FromString(\r | |
478 | const char *u /* string */\r | |
479 | );\r | |
480 | \r | |
481 | /* Return a read-only pointer to the Unicode object's internal\r | |
482 | Py_UNICODE buffer. */\r | |
483 | \r | |
484 | PyAPI_FUNC(Py_UNICODE *) PyUnicode_AsUnicode(\r | |
485 | PyObject *unicode /* Unicode object */\r | |
486 | );\r | |
487 | \r | |
488 | /* Get the length of the Unicode object. */\r | |
489 | \r | |
490 | PyAPI_FUNC(Py_ssize_t) PyUnicode_GetSize(\r | |
491 | PyObject *unicode /* Unicode object */\r | |
492 | );\r | |
493 | \r | |
494 | /* Get the maximum ordinal for a Unicode character. */\r | |
495 | PyAPI_FUNC(Py_UNICODE) PyUnicode_GetMax(void);\r | |
496 | \r | |
497 | /* Resize an already allocated Unicode object to the new size length.\r | |
498 | \r | |
499 | *unicode is modified to point to the new (resized) object and 0\r | |
500 | returned on success.\r | |
501 | \r | |
502 | This API may only be called by the function which also called the\r | |
503 | Unicode constructor. The refcount on the object must be 1. Otherwise,\r | |
504 | an error is returned.\r | |
505 | \r | |
506 | Error handling is implemented as follows: an exception is set, -1\r | |
507 | is returned and *unicode left untouched.\r | |
508 | \r | |
509 | */\r | |
510 | \r | |
511 | PyAPI_FUNC(int) PyUnicode_Resize(\r | |
512 | PyObject **unicode, /* Pointer to the Unicode object */\r | |
513 | Py_ssize_t length /* New length */\r | |
514 | );\r | |
515 | \r | |
516 | /* Coerce obj to an Unicode object and return a reference with\r | |
517 | *incremented* refcount.\r | |
518 | \r | |
519 | Coercion is done in the following way:\r | |
520 | \r | |
521 | 1. String and other char buffer compatible objects are decoded\r | |
522 | under the assumptions that they contain data using the current\r | |
523 | default encoding. Decoding is done in "strict" mode.\r | |
524 | \r | |
525 | 2. All other objects (including Unicode objects) raise an\r | |
526 | exception.\r | |
527 | \r | |
528 | The API returns NULL in case of an error. The caller is responsible\r | |
529 | for decref'ing the returned objects.\r | |
530 | \r | |
531 | */\r | |
532 | \r | |
533 | PyAPI_FUNC(PyObject*) PyUnicode_FromEncodedObject(\r | |
534 | register PyObject *obj, /* Object */\r | |
535 | const char *encoding, /* encoding */\r | |
536 | const char *errors /* error handling */\r | |
537 | );\r | |
538 | \r | |
539 | /* Coerce obj to an Unicode object and return a reference with\r | |
540 | *incremented* refcount.\r | |
541 | \r | |
542 | Unicode objects are passed back as-is (subclasses are converted to\r | |
543 | true Unicode objects), all other objects are delegated to\r | |
544 | PyUnicode_FromEncodedObject(obj, NULL, "strict") which results in\r | |
545 | using the default encoding as basis for decoding the object.\r | |
546 | \r | |
547 | The API returns NULL in case of an error. The caller is responsible\r | |
548 | for decref'ing the returned objects.\r | |
549 | \r | |
550 | */\r | |
551 | \r | |
552 | PyAPI_FUNC(PyObject*) PyUnicode_FromObject(\r | |
553 | register PyObject *obj /* Object */\r | |
554 | );\r | |
555 | \r | |
556 | PyAPI_FUNC(PyObject *) PyUnicode_FromFormatV(const char*, va_list);\r | |
557 | PyAPI_FUNC(PyObject *) PyUnicode_FromFormat(const char*, ...);\r | |
558 | \r | |
559 | /* Format the object based on the format_spec, as defined in PEP 3101\r | |
560 | (Advanced String Formatting). */\r | |
561 | PyAPI_FUNC(PyObject *) _PyUnicode_FormatAdvanced(PyObject *obj,\r | |
562 | Py_UNICODE *format_spec,\r | |
563 | Py_ssize_t format_spec_len);\r | |
564 | \r | |
565 | /* --- wchar_t support for platforms which support it --------------------- */\r | |
566 | \r | |
567 | #ifdef HAVE_WCHAR_H\r | |
568 | \r | |
569 | /* Create a Unicode Object from the whcar_t buffer w of the given\r | |
570 | size.\r | |
571 | \r | |
572 | The buffer is copied into the new object. */\r | |
573 | \r | |
574 | PyAPI_FUNC(PyObject*) PyUnicode_FromWideChar(\r | |
575 | register const wchar_t *w, /* wchar_t buffer */\r | |
576 | Py_ssize_t size /* size of buffer */\r | |
577 | );\r | |
578 | \r | |
579 | /* Copies the Unicode Object contents into the wchar_t buffer w. At\r | |
580 | most size wchar_t characters are copied.\r | |
581 | \r | |
582 | Note that the resulting wchar_t string may or may not be\r | |
583 | 0-terminated. It is the responsibility of the caller to make sure\r | |
584 | that the wchar_t string is 0-terminated in case this is required by\r | |
585 | the application.\r | |
586 | \r | |
587 | Returns the number of wchar_t characters copied (excluding a\r | |
588 | possibly trailing 0-termination character) or -1 in case of an\r | |
589 | error. */\r | |
590 | \r | |
591 | PyAPI_FUNC(Py_ssize_t) PyUnicode_AsWideChar(\r | |
592 | PyUnicodeObject *unicode, /* Unicode object */\r | |
593 | register wchar_t *w, /* wchar_t buffer */\r | |
594 | Py_ssize_t size /* size of buffer */\r | |
595 | );\r | |
596 | \r | |
597 | #endif\r | |
598 | \r | |
599 | /* --- Unicode ordinals --------------------------------------------------- */\r | |
600 | \r | |
601 | /* Create a Unicode Object from the given Unicode code point ordinal.\r | |
602 | \r | |
603 | The ordinal must be in range(0x10000) on narrow Python builds\r | |
604 | (UCS2), and range(0x110000) on wide builds (UCS4). A ValueError is\r | |
605 | raised in case it is not.\r | |
606 | \r | |
607 | */\r | |
608 | \r | |
609 | PyAPI_FUNC(PyObject*) PyUnicode_FromOrdinal(int ordinal);\r | |
610 | \r | |
611 | /* --- Free-list management ----------------------------------------------- */\r | |
612 | \r | |
613 | /* Clear the free list used by the Unicode implementation.\r | |
614 | \r | |
615 | This can be used to release memory used for objects on the free\r | |
616 | list back to the Python memory allocator.\r | |
617 | \r | |
618 | */\r | |
619 | \r | |
620 | PyAPI_FUNC(int) PyUnicode_ClearFreeList(void);\r | |
621 | \r | |
622 | /* === Builtin Codecs =====================================================\r | |
623 | \r | |
624 | Many of these APIs take two arguments encoding and errors. These\r | |
625 | parameters encoding and errors have the same semantics as the ones\r | |
626 | of the builtin unicode() API.\r | |
627 | \r | |
628 | Setting encoding to NULL causes the default encoding to be used.\r | |
629 | \r | |
630 | Error handling is set by errors which may also be set to NULL\r | |
631 | meaning to use the default handling defined for the codec. Default\r | |
632 | error handling for all builtin codecs is "strict" (ValueErrors are\r | |
633 | raised).\r | |
634 | \r | |
635 | The codecs all use a similar interface. Only deviation from the\r | |
636 | generic ones are documented.\r | |
637 | \r | |
638 | */\r | |
639 | \r | |
640 | /* --- Manage the default encoding ---------------------------------------- */\r | |
641 | \r | |
642 | /* Return a Python string holding the default encoded value of the\r | |
643 | Unicode object.\r | |
644 | \r | |
645 | The resulting string is cached in the Unicode object for subsequent\r | |
646 | usage by this function. The cached version is needed to implement\r | |
647 | the character buffer interface and will live (at least) as long as\r | |
648 | the Unicode object itself.\r | |
649 | \r | |
650 | The refcount of the string is *not* incremented.\r | |
651 | \r | |
652 | *** Exported for internal use by the interpreter only !!! ***\r | |
653 | \r | |
654 | */\r | |
655 | \r | |
656 | PyAPI_FUNC(PyObject *) _PyUnicode_AsDefaultEncodedString(\r | |
657 | PyObject *, const char *);\r | |
658 | \r | |
659 | /* Returns the currently active default encoding.\r | |
660 | \r | |
661 | The default encoding is currently implemented as run-time settable\r | |
662 | process global. This may change in future versions of the\r | |
663 | interpreter to become a parameter which is managed on a per-thread\r | |
664 | basis.\r | |
665 | \r | |
666 | */\r | |
667 | \r | |
668 | PyAPI_FUNC(const char*) PyUnicode_GetDefaultEncoding(void);\r | |
669 | \r | |
670 | /* Sets the currently active default encoding.\r | |
671 | \r | |
672 | Returns 0 on success, -1 in case of an error.\r | |
673 | \r | |
674 | */\r | |
675 | \r | |
676 | PyAPI_FUNC(int) PyUnicode_SetDefaultEncoding(\r | |
677 | const char *encoding /* Encoding name in standard form */\r | |
678 | );\r | |
679 | \r | |
680 | /* --- Generic Codecs ----------------------------------------------------- */\r | |
681 | \r | |
682 | /* Create a Unicode object by decoding the encoded string s of the\r | |
683 | given size. */\r | |
684 | \r | |
685 | PyAPI_FUNC(PyObject*) PyUnicode_Decode(\r | |
686 | const char *s, /* encoded string */\r | |
687 | Py_ssize_t size, /* size of buffer */\r | |
688 | const char *encoding, /* encoding */\r | |
689 | const char *errors /* error handling */\r | |
690 | );\r | |
691 | \r | |
692 | /* Encodes a Py_UNICODE buffer of the given size and returns a\r | |
693 | Python string object. */\r | |
694 | \r | |
695 | PyAPI_FUNC(PyObject*) PyUnicode_Encode(\r | |
696 | const Py_UNICODE *s, /* Unicode char buffer */\r | |
697 | Py_ssize_t size, /* number of Py_UNICODE chars to encode */\r | |
698 | const char *encoding, /* encoding */\r | |
699 | const char *errors /* error handling */\r | |
700 | );\r | |
701 | \r | |
702 | /* Encodes a Unicode object and returns the result as Python\r | |
703 | object. */\r | |
704 | \r | |
705 | PyAPI_FUNC(PyObject*) PyUnicode_AsEncodedObject(\r | |
706 | PyObject *unicode, /* Unicode object */\r | |
707 | const char *encoding, /* encoding */\r | |
708 | const char *errors /* error handling */\r | |
709 | );\r | |
710 | \r | |
711 | /* Encodes a Unicode object and returns the result as Python string\r | |
712 | object. */\r | |
713 | \r | |
714 | PyAPI_FUNC(PyObject*) PyUnicode_AsEncodedString(\r | |
715 | PyObject *unicode, /* Unicode object */\r | |
716 | const char *encoding, /* encoding */\r | |
717 | const char *errors /* error handling */\r | |
718 | );\r | |
719 | \r | |
720 | PyAPI_FUNC(PyObject*) PyUnicode_BuildEncodingMap(\r | |
721 | PyObject* string /* 256 character map */\r | |
722 | );\r | |
723 | \r | |
724 | \r | |
725 | /* --- UTF-7 Codecs ------------------------------------------------------- */\r | |
726 | \r | |
727 | PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF7(\r | |
728 | const char *string, /* UTF-7 encoded string */\r | |
729 | Py_ssize_t length, /* size of string */\r | |
730 | const char *errors /* error handling */\r | |
731 | );\r | |
732 | \r | |
733 | PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF7Stateful(\r | |
734 | const char *string, /* UTF-7 encoded string */\r | |
735 | Py_ssize_t length, /* size of string */\r | |
736 | const char *errors, /* error handling */\r | |
737 | Py_ssize_t *consumed /* bytes consumed */\r | |
738 | );\r | |
739 | \r | |
740 | PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF7(\r | |
741 | const Py_UNICODE *data, /* Unicode char buffer */\r | |
742 | Py_ssize_t length, /* number of Py_UNICODE chars to encode */\r | |
743 | int base64SetO, /* Encode RFC2152 Set O characters in base64 */\r | |
744 | int base64WhiteSpace, /* Encode whitespace (sp, ht, nl, cr) in base64 */\r | |
745 | const char *errors /* error handling */\r | |
746 | );\r | |
747 | \r | |
748 | /* --- UTF-8 Codecs ------------------------------------------------------- */\r | |
749 | \r | |
750 | PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF8(\r | |
751 | const char *string, /* UTF-8 encoded string */\r | |
752 | Py_ssize_t length, /* size of string */\r | |
753 | const char *errors /* error handling */\r | |
754 | );\r | |
755 | \r | |
756 | PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF8Stateful(\r | |
757 | const char *string, /* UTF-8 encoded string */\r | |
758 | Py_ssize_t length, /* size of string */\r | |
759 | const char *errors, /* error handling */\r | |
760 | Py_ssize_t *consumed /* bytes consumed */\r | |
761 | );\r | |
762 | \r | |
763 | PyAPI_FUNC(PyObject*) PyUnicode_AsUTF8String(\r | |
764 | PyObject *unicode /* Unicode object */\r | |
765 | );\r | |
766 | \r | |
767 | PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF8(\r | |
768 | const Py_UNICODE *data, /* Unicode char buffer */\r | |
769 | Py_ssize_t length, /* number of Py_UNICODE chars to encode */\r | |
770 | const char *errors /* error handling */\r | |
771 | );\r | |
772 | \r | |
773 | /* --- UTF-32 Codecs ------------------------------------------------------ */\r | |
774 | \r | |
775 | /* Decodes length bytes from a UTF-32 encoded buffer string and returns\r | |
776 | the corresponding Unicode object.\r | |
777 | \r | |
778 | errors (if non-NULL) defines the error handling. It defaults\r | |
779 | to "strict".\r | |
780 | \r | |
781 | If byteorder is non-NULL, the decoder starts decoding using the\r | |
782 | given byte order:\r | |
783 | \r | |
784 | *byteorder == -1: little endian\r | |
785 | *byteorder == 0: native order\r | |
786 | *byteorder == 1: big endian\r | |
787 | \r | |
788 | In native mode, the first four bytes of the stream are checked for a\r | |
789 | BOM mark. If found, the BOM mark is analysed, the byte order\r | |
790 | adjusted and the BOM skipped. In the other modes, no BOM mark\r | |
791 | interpretation is done. After completion, *byteorder is set to the\r | |
792 | current byte order at the end of input data.\r | |
793 | \r | |
794 | If byteorder is NULL, the codec starts in native order mode.\r | |
795 | \r | |
796 | */\r | |
797 | \r | |
798 | PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF32(\r | |
799 | const char *string, /* UTF-32 encoded string */\r | |
800 | Py_ssize_t length, /* size of string */\r | |
801 | const char *errors, /* error handling */\r | |
802 | int *byteorder /* pointer to byteorder to use\r | |
803 | 0=native;-1=LE,1=BE; updated on\r | |
804 | exit */\r | |
805 | );\r | |
806 | \r | |
807 | PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF32Stateful(\r | |
808 | const char *string, /* UTF-32 encoded string */\r | |
809 | Py_ssize_t length, /* size of string */\r | |
810 | const char *errors, /* error handling */\r | |
811 | int *byteorder, /* pointer to byteorder to use\r | |
812 | 0=native;-1=LE,1=BE; updated on\r | |
813 | exit */\r | |
814 | Py_ssize_t *consumed /* bytes consumed */\r | |
815 | );\r | |
816 | \r | |
817 | /* Returns a Python string using the UTF-32 encoding in native byte\r | |
818 | order. The string always starts with a BOM mark. */\r | |
819 | \r | |
820 | PyAPI_FUNC(PyObject*) PyUnicode_AsUTF32String(\r | |
821 | PyObject *unicode /* Unicode object */\r | |
822 | );\r | |
823 | \r | |
824 | /* Returns a Python string object holding the UTF-32 encoded value of\r | |
825 | the Unicode data.\r | |
826 | \r | |
827 | If byteorder is not 0, output is written according to the following\r | |
828 | byte order:\r | |
829 | \r | |
830 | byteorder == -1: little endian\r | |
831 | byteorder == 0: native byte order (writes a BOM mark)\r | |
832 | byteorder == 1: big endian\r | |
833 | \r | |
834 | If byteorder is 0, the output string will always start with the\r | |
835 | Unicode BOM mark (U+FEFF). In the other two modes, no BOM mark is\r | |
836 | prepended.\r | |
837 | \r | |
838 | */\r | |
839 | \r | |
840 | PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF32(\r | |
841 | const Py_UNICODE *data, /* Unicode char buffer */\r | |
842 | Py_ssize_t length, /* number of Py_UNICODE chars to encode */\r | |
843 | const char *errors, /* error handling */\r | |
844 | int byteorder /* byteorder to use 0=BOM+native;-1=LE,1=BE */\r | |
845 | );\r | |
846 | \r | |
847 | /* --- UTF-16 Codecs ------------------------------------------------------ */\r | |
848 | \r | |
849 | /* Decodes length bytes from a UTF-16 encoded buffer string and returns\r | |
850 | the corresponding Unicode object.\r | |
851 | \r | |
852 | errors (if non-NULL) defines the error handling. It defaults\r | |
853 | to "strict".\r | |
854 | \r | |
855 | If byteorder is non-NULL, the decoder starts decoding using the\r | |
856 | given byte order:\r | |
857 | \r | |
858 | *byteorder == -1: little endian\r | |
859 | *byteorder == 0: native order\r | |
860 | *byteorder == 1: big endian\r | |
861 | \r | |
862 | In native mode, the first two bytes of the stream are checked for a\r | |
863 | BOM mark. If found, the BOM mark is analysed, the byte order\r | |
864 | adjusted and the BOM skipped. In the other modes, no BOM mark\r | |
865 | interpretation is done. After completion, *byteorder is set to the\r | |
866 | current byte order at the end of input data.\r | |
867 | \r | |
868 | If byteorder is NULL, the codec starts in native order mode.\r | |
869 | \r | |
870 | */\r | |
871 | \r | |
872 | PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF16(\r | |
873 | const char *string, /* UTF-16 encoded string */\r | |
874 | Py_ssize_t length, /* size of string */\r | |
875 | const char *errors, /* error handling */\r | |
876 | int *byteorder /* pointer to byteorder to use\r | |
877 | 0=native;-1=LE,1=BE; updated on\r | |
878 | exit */\r | |
879 | );\r | |
880 | \r | |
881 | PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF16Stateful(\r | |
882 | const char *string, /* UTF-16 encoded string */\r | |
883 | Py_ssize_t length, /* size of string */\r | |
884 | const char *errors, /* error handling */\r | |
885 | int *byteorder, /* pointer to byteorder to use\r | |
886 | 0=native;-1=LE,1=BE; updated on\r | |
887 | exit */\r | |
888 | Py_ssize_t *consumed /* bytes consumed */\r | |
889 | );\r | |
890 | \r | |
891 | /* Returns a Python string using the UTF-16 encoding in native byte\r | |
892 | order. The string always starts with a BOM mark. */\r | |
893 | \r | |
894 | PyAPI_FUNC(PyObject*) PyUnicode_AsUTF16String(\r | |
895 | PyObject *unicode /* Unicode object */\r | |
896 | );\r | |
897 | \r | |
898 | /* Returns a Python string object holding the UTF-16 encoded value of\r | |
899 | the Unicode data.\r | |
900 | \r | |
901 | If byteorder is not 0, output is written according to the following\r | |
902 | byte order:\r | |
903 | \r | |
904 | byteorder == -1: little endian\r | |
905 | byteorder == 0: native byte order (writes a BOM mark)\r | |
906 | byteorder == 1: big endian\r | |
907 | \r | |
908 | If byteorder is 0, the output string will always start with the\r | |
909 | Unicode BOM mark (U+FEFF). In the other two modes, no BOM mark is\r | |
910 | prepended.\r | |
911 | \r | |
912 | Note that Py_UNICODE data is being interpreted as UTF-16 reduced to\r | |
913 | UCS-2. This trick makes it possible to add full UTF-16 capabilities\r | |
914 | at a later point without compromising the APIs.\r | |
915 | \r | |
916 | */\r | |
917 | \r | |
918 | PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF16(\r | |
919 | const Py_UNICODE *data, /* Unicode char buffer */\r | |
920 | Py_ssize_t length, /* number of Py_UNICODE chars to encode */\r | |
921 | const char *errors, /* error handling */\r | |
922 | int byteorder /* byteorder to use 0=BOM+native;-1=LE,1=BE */\r | |
923 | );\r | |
924 | \r | |
925 | /* --- Unicode-Escape Codecs ---------------------------------------------- */\r | |
926 | \r | |
927 | PyAPI_FUNC(PyObject*) PyUnicode_DecodeUnicodeEscape(\r | |
928 | const char *string, /* Unicode-Escape encoded string */\r | |
929 | Py_ssize_t length, /* size of string */\r | |
930 | const char *errors /* error handling */\r | |
931 | );\r | |
932 | \r | |
933 | PyAPI_FUNC(PyObject*) PyUnicode_AsUnicodeEscapeString(\r | |
934 | PyObject *unicode /* Unicode object */\r | |
935 | );\r | |
936 | \r | |
937 | PyAPI_FUNC(PyObject*) PyUnicode_EncodeUnicodeEscape(\r | |
938 | const Py_UNICODE *data, /* Unicode char buffer */\r | |
939 | Py_ssize_t length /* Number of Py_UNICODE chars to encode */\r | |
940 | );\r | |
941 | \r | |
942 | /* --- Raw-Unicode-Escape Codecs ------------------------------------------ */\r | |
943 | \r | |
944 | PyAPI_FUNC(PyObject*) PyUnicode_DecodeRawUnicodeEscape(\r | |
945 | const char *string, /* Raw-Unicode-Escape encoded string */\r | |
946 | Py_ssize_t length, /* size of string */\r | |
947 | const char *errors /* error handling */\r | |
948 | );\r | |
949 | \r | |
950 | PyAPI_FUNC(PyObject*) PyUnicode_AsRawUnicodeEscapeString(\r | |
951 | PyObject *unicode /* Unicode object */\r | |
952 | );\r | |
953 | \r | |
954 | PyAPI_FUNC(PyObject*) PyUnicode_EncodeRawUnicodeEscape(\r | |
955 | const Py_UNICODE *data, /* Unicode char buffer */\r | |
956 | Py_ssize_t length /* Number of Py_UNICODE chars to encode */\r | |
957 | );\r | |
958 | \r | |
959 | /* --- Unicode Internal Codec ---------------------------------------------\r | |
960 | \r | |
961 | Only for internal use in _codecsmodule.c */\r | |
962 | \r | |
963 | PyObject *_PyUnicode_DecodeUnicodeInternal(\r | |
964 | const char *string,\r | |
965 | Py_ssize_t length,\r | |
966 | const char *errors\r | |
967 | );\r | |
968 | \r | |
969 | /* --- Latin-1 Codecs -----------------------------------------------------\r | |
970 | \r | |
971 | Note: Latin-1 corresponds to the first 256 Unicode ordinals.\r | |
972 | \r | |
973 | */\r | |
974 | \r | |
975 | PyAPI_FUNC(PyObject*) PyUnicode_DecodeLatin1(\r | |
976 | const char *string, /* Latin-1 encoded string */\r | |
977 | Py_ssize_t length, /* size of string */\r | |
978 | const char *errors /* error handling */\r | |
979 | );\r | |
980 | \r | |
981 | PyAPI_FUNC(PyObject*) PyUnicode_AsLatin1String(\r | |
982 | PyObject *unicode /* Unicode object */\r | |
983 | );\r | |
984 | \r | |
985 | PyAPI_FUNC(PyObject*) PyUnicode_EncodeLatin1(\r | |
986 | const Py_UNICODE *data, /* Unicode char buffer */\r | |
987 | Py_ssize_t length, /* Number of Py_UNICODE chars to encode */\r | |
988 | const char *errors /* error handling */\r | |
989 | );\r | |
990 | \r | |
991 | /* --- ASCII Codecs -------------------------------------------------------\r | |
992 | \r | |
993 | Only 7-bit ASCII data is excepted. All other codes generate errors.\r | |
994 | \r | |
995 | */\r | |
996 | \r | |
997 | PyAPI_FUNC(PyObject*) PyUnicode_DecodeASCII(\r | |
998 | const char *string, /* ASCII encoded string */\r | |
999 | Py_ssize_t length, /* size of string */\r | |
1000 | const char *errors /* error handling */\r | |
1001 | );\r | |
1002 | \r | |
1003 | PyAPI_FUNC(PyObject*) PyUnicode_AsASCIIString(\r | |
1004 | PyObject *unicode /* Unicode object */\r | |
1005 | );\r | |
1006 | \r | |
1007 | PyAPI_FUNC(PyObject*) PyUnicode_EncodeASCII(\r | |
1008 | const Py_UNICODE *data, /* Unicode char buffer */\r | |
1009 | Py_ssize_t length, /* Number of Py_UNICODE chars to encode */\r | |
1010 | const char *errors /* error handling */\r | |
1011 | );\r | |
1012 | \r | |
1013 | /* --- Character Map Codecs -----------------------------------------------\r | |
1014 | \r | |
1015 | This codec uses mappings to encode and decode characters.\r | |
1016 | \r | |
1017 | Decoding mappings must map single string characters to single\r | |
1018 | Unicode characters, integers (which are then interpreted as Unicode\r | |
1019 | ordinals) or None (meaning "undefined mapping" and causing an\r | |
1020 | error).\r | |
1021 | \r | |
1022 | Encoding mappings must map single Unicode characters to single\r | |
1023 | string characters, integers (which are then interpreted as Latin-1\r | |
1024 | ordinals) or None (meaning "undefined mapping" and causing an\r | |
1025 | error).\r | |
1026 | \r | |
1027 | If a character lookup fails with a LookupError, the character is\r | |
1028 | copied as-is meaning that its ordinal value will be interpreted as\r | |
1029 | Unicode or Latin-1 ordinal resp. Because of this mappings only need\r | |
1030 | to contain those mappings which map characters to different code\r | |
1031 | points.\r | |
1032 | \r | |
1033 | */\r | |
1034 | \r | |
1035 | PyAPI_FUNC(PyObject*) PyUnicode_DecodeCharmap(\r | |
1036 | const char *string, /* Encoded string */\r | |
1037 | Py_ssize_t length, /* size of string */\r | |
1038 | PyObject *mapping, /* character mapping\r | |
1039 | (char ordinal -> unicode ordinal) */\r | |
1040 | const char *errors /* error handling */\r | |
1041 | );\r | |
1042 | \r | |
1043 | PyAPI_FUNC(PyObject*) PyUnicode_AsCharmapString(\r | |
1044 | PyObject *unicode, /* Unicode object */\r | |
1045 | PyObject *mapping /* character mapping\r | |
1046 | (unicode ordinal -> char ordinal) */\r | |
1047 | );\r | |
1048 | \r | |
1049 | PyAPI_FUNC(PyObject*) PyUnicode_EncodeCharmap(\r | |
1050 | const Py_UNICODE *data, /* Unicode char buffer */\r | |
1051 | Py_ssize_t length, /* Number of Py_UNICODE chars to encode */\r | |
1052 | PyObject *mapping, /* character mapping\r | |
1053 | (unicode ordinal -> char ordinal) */\r | |
1054 | const char *errors /* error handling */\r | |
1055 | );\r | |
1056 | \r | |
1057 | /* Translate a Py_UNICODE buffer of the given length by applying a\r | |
1058 | character mapping table to it and return the resulting Unicode\r | |
1059 | object.\r | |
1060 | \r | |
1061 | The mapping table must map Unicode ordinal integers to Unicode\r | |
1062 | ordinal integers or None (causing deletion of the character).\r | |
1063 | \r | |
1064 | Mapping tables may be dictionaries or sequences. Unmapped character\r | |
1065 | ordinals (ones which cause a LookupError) are left untouched and\r | |
1066 | are copied as-is.\r | |
1067 | \r | |
1068 | */\r | |
1069 | \r | |
1070 | PyAPI_FUNC(PyObject *) PyUnicode_TranslateCharmap(\r | |
1071 | const Py_UNICODE *data, /* Unicode char buffer */\r | |
1072 | Py_ssize_t length, /* Number of Py_UNICODE chars to encode */\r | |
1073 | PyObject *table, /* Translate table */\r | |
1074 | const char *errors /* error handling */\r | |
1075 | );\r | |
1076 | \r | |
1077 | #ifdef MS_WIN32\r | |
1078 | \r | |
1079 | /* --- MBCS codecs for Windows -------------------------------------------- */\r | |
1080 | \r | |
1081 | PyAPI_FUNC(PyObject*) PyUnicode_DecodeMBCS(\r | |
1082 | const char *string, /* MBCS encoded string */\r | |
1083 | Py_ssize_t length, /* size of string */\r | |
1084 | const char *errors /* error handling */\r | |
1085 | );\r | |
1086 | \r | |
1087 | PyAPI_FUNC(PyObject*) PyUnicode_DecodeMBCSStateful(\r | |
1088 | const char *string, /* MBCS encoded string */\r | |
1089 | Py_ssize_t length, /* size of string */\r | |
1090 | const char *errors, /* error handling */\r | |
1091 | Py_ssize_t *consumed /* bytes consumed */\r | |
1092 | );\r | |
1093 | \r | |
1094 | PyAPI_FUNC(PyObject*) PyUnicode_AsMBCSString(\r | |
1095 | PyObject *unicode /* Unicode object */\r | |
1096 | );\r | |
1097 | \r | |
1098 | PyAPI_FUNC(PyObject*) PyUnicode_EncodeMBCS(\r | |
1099 | const Py_UNICODE *data, /* Unicode char buffer */\r | |
1100 | Py_ssize_t length, /* Number of Py_UNICODE chars to encode */\r | |
1101 | const char *errors /* error handling */\r | |
1102 | );\r | |
1103 | \r | |
1104 | #endif /* MS_WIN32 */\r | |
1105 | \r | |
1106 | /* --- Decimal Encoder ---------------------------------------------------- */\r | |
1107 | \r | |
1108 | /* Takes a Unicode string holding a decimal value and writes it into\r | |
1109 | an output buffer using standard ASCII digit codes.\r | |
1110 | \r | |
1111 | The output buffer has to provide at least length+1 bytes of storage\r | |
1112 | area. The output string is 0-terminated.\r | |
1113 | \r | |
1114 | The encoder converts whitespace to ' ', decimal characters to their\r | |
1115 | corresponding ASCII digit and all other Latin-1 characters except\r | |
1116 | \0 as-is. Characters outside this range (Unicode ordinals 1-256)\r | |
1117 | are treated as errors. This includes embedded NULL bytes.\r | |
1118 | \r | |
1119 | Error handling is defined by the errors argument:\r | |
1120 | \r | |
1121 | NULL or "strict": raise a ValueError\r | |
1122 | "ignore": ignore the wrong characters (these are not copied to the\r | |
1123 | output buffer)\r | |
1124 | "replace": replaces illegal characters with '?'\r | |
1125 | \r | |
1126 | Returns 0 on success, -1 on failure.\r | |
1127 | \r | |
1128 | */\r | |
1129 | \r | |
1130 | PyAPI_FUNC(int) PyUnicode_EncodeDecimal(\r | |
1131 | Py_UNICODE *s, /* Unicode buffer */\r | |
1132 | Py_ssize_t length, /* Number of Py_UNICODE chars to encode */\r | |
1133 | char *output, /* Output buffer; must have size >= length */\r | |
1134 | const char *errors /* error handling */\r | |
1135 | );\r | |
1136 | \r | |
1137 | /* --- Methods & Slots ----------------------------------------------------\r | |
1138 | \r | |
1139 | These are capable of handling Unicode objects and strings on input\r | |
1140 | (we refer to them as strings in the descriptions) and return\r | |
1141 | Unicode objects or integers as apporpriate. */\r | |
1142 | \r | |
1143 | /* Concat two strings giving a new Unicode string. */\r | |
1144 | \r | |
1145 | PyAPI_FUNC(PyObject*) PyUnicode_Concat(\r | |
1146 | PyObject *left, /* Left string */\r | |
1147 | PyObject *right /* Right string */\r | |
1148 | );\r | |
1149 | \r | |
1150 | /* Split a string giving a list of Unicode strings.\r | |
1151 | \r | |
1152 | If sep is NULL, splitting will be done at all whitespace\r | |
1153 | substrings. Otherwise, splits occur at the given separator.\r | |
1154 | \r | |
1155 | At most maxsplit splits will be done. If negative, no limit is set.\r | |
1156 | \r | |
1157 | Separators are not included in the resulting list.\r | |
1158 | \r | |
1159 | */\r | |
1160 | \r | |
1161 | PyAPI_FUNC(PyObject*) PyUnicode_Split(\r | |
1162 | PyObject *s, /* String to split */\r | |
1163 | PyObject *sep, /* String separator */\r | |
1164 | Py_ssize_t maxsplit /* Maxsplit count */\r | |
1165 | );\r | |
1166 | \r | |
1167 | /* Dito, but split at line breaks.\r | |
1168 | \r | |
1169 | CRLF is considered to be one line break. Line breaks are not\r | |
1170 | included in the resulting list. */\r | |
1171 | \r | |
1172 | PyAPI_FUNC(PyObject*) PyUnicode_Splitlines(\r | |
1173 | PyObject *s, /* String to split */\r | |
1174 | int keepends /* If true, line end markers are included */\r | |
1175 | );\r | |
1176 | \r | |
1177 | /* Partition a string using a given separator. */\r | |
1178 | \r | |
1179 | PyAPI_FUNC(PyObject*) PyUnicode_Partition(\r | |
1180 | PyObject *s, /* String to partition */\r | |
1181 | PyObject *sep /* String separator */\r | |
1182 | );\r | |
1183 | \r | |
1184 | /* Partition a string using a given separator, searching from the end of the\r | |
1185 | string. */\r | |
1186 | \r | |
1187 | PyAPI_FUNC(PyObject*) PyUnicode_RPartition(\r | |
1188 | PyObject *s, /* String to partition */\r | |
1189 | PyObject *sep /* String separator */\r | |
1190 | );\r | |
1191 | \r | |
1192 | /* Split a string giving a list of Unicode strings.\r | |
1193 | \r | |
1194 | If sep is NULL, splitting will be done at all whitespace\r | |
1195 | substrings. Otherwise, splits occur at the given separator.\r | |
1196 | \r | |
1197 | At most maxsplit splits will be done. But unlike PyUnicode_Split\r | |
1198 | PyUnicode_RSplit splits from the end of the string. If negative,\r | |
1199 | no limit is set.\r | |
1200 | \r | |
1201 | Separators are not included in the resulting list.\r | |
1202 | \r | |
1203 | */\r | |
1204 | \r | |
1205 | PyAPI_FUNC(PyObject*) PyUnicode_RSplit(\r | |
1206 | PyObject *s, /* String to split */\r | |
1207 | PyObject *sep, /* String separator */\r | |
1208 | Py_ssize_t maxsplit /* Maxsplit count */\r | |
1209 | );\r | |
1210 | \r | |
1211 | /* Translate a string by applying a character mapping table to it and\r | |
1212 | return the resulting Unicode object.\r | |
1213 | \r | |
1214 | The mapping table must map Unicode ordinal integers to Unicode\r | |
1215 | ordinal integers or None (causing deletion of the character).\r | |
1216 | \r | |
1217 | Mapping tables may be dictionaries or sequences. Unmapped character\r | |
1218 | ordinals (ones which cause a LookupError) are left untouched and\r | |
1219 | are copied as-is.\r | |
1220 | \r | |
1221 | */\r | |
1222 | \r | |
1223 | PyAPI_FUNC(PyObject *) PyUnicode_Translate(\r | |
1224 | PyObject *str, /* String */\r | |
1225 | PyObject *table, /* Translate table */\r | |
1226 | const char *errors /* error handling */\r | |
1227 | );\r | |
1228 | \r | |
1229 | /* Join a sequence of strings using the given separator and return\r | |
1230 | the resulting Unicode string. */\r | |
1231 | \r | |
1232 | PyAPI_FUNC(PyObject*) PyUnicode_Join(\r | |
1233 | PyObject *separator, /* Separator string */\r | |
1234 | PyObject *seq /* Sequence object */\r | |
1235 | );\r | |
1236 | \r | |
1237 | /* Return 1 if substr matches str[start:end] at the given tail end, 0\r | |
1238 | otherwise. */\r | |
1239 | \r | |
1240 | PyAPI_FUNC(Py_ssize_t) PyUnicode_Tailmatch(\r | |
1241 | PyObject *str, /* String */\r | |
1242 | PyObject *substr, /* Prefix or Suffix string */\r | |
1243 | Py_ssize_t start, /* Start index */\r | |
1244 | Py_ssize_t end, /* Stop index */\r | |
1245 | int direction /* Tail end: -1 prefix, +1 suffix */\r | |
1246 | );\r | |
1247 | \r | |
1248 | /* Return the first position of substr in str[start:end] using the\r | |
1249 | given search direction or -1 if not found. -2 is returned in case\r | |
1250 | an error occurred and an exception is set. */\r | |
1251 | \r | |
1252 | PyAPI_FUNC(Py_ssize_t) PyUnicode_Find(\r | |
1253 | PyObject *str, /* String */\r | |
1254 | PyObject *substr, /* Substring to find */\r | |
1255 | Py_ssize_t start, /* Start index */\r | |
1256 | Py_ssize_t end, /* Stop index */\r | |
1257 | int direction /* Find direction: +1 forward, -1 backward */\r | |
1258 | );\r | |
1259 | \r | |
1260 | /* Count the number of occurrences of substr in str[start:end]. */\r | |
1261 | \r | |
1262 | PyAPI_FUNC(Py_ssize_t) PyUnicode_Count(\r | |
1263 | PyObject *str, /* String */\r | |
1264 | PyObject *substr, /* Substring to count */\r | |
1265 | Py_ssize_t start, /* Start index */\r | |
1266 | Py_ssize_t end /* Stop index */\r | |
1267 | );\r | |
1268 | \r | |
1269 | /* Replace at most maxcount occurrences of substr in str with replstr\r | |
1270 | and return the resulting Unicode object. */\r | |
1271 | \r | |
1272 | PyAPI_FUNC(PyObject *) PyUnicode_Replace(\r | |
1273 | PyObject *str, /* String */\r | |
1274 | PyObject *substr, /* Substring to find */\r | |
1275 | PyObject *replstr, /* Substring to replace */\r | |
1276 | Py_ssize_t maxcount /* Max. number of replacements to apply;\r | |
1277 | -1 = all */\r | |
1278 | );\r | |
1279 | \r | |
1280 | /* Compare two strings and return -1, 0, 1 for less than, equal,\r | |
1281 | greater than resp. */\r | |
1282 | \r | |
1283 | PyAPI_FUNC(int) PyUnicode_Compare(\r | |
1284 | PyObject *left, /* Left string */\r | |
1285 | PyObject *right /* Right string */\r | |
1286 | );\r | |
1287 | \r | |
1288 | /* Rich compare two strings and return one of the following:\r | |
1289 | \r | |
1290 | - NULL in case an exception was raised\r | |
1291 | - Py_True or Py_False for successfuly comparisons\r | |
1292 | - Py_NotImplemented in case the type combination is unknown\r | |
1293 | \r | |
1294 | Note that Py_EQ and Py_NE comparisons can cause a UnicodeWarning in\r | |
1295 | case the conversion of the arguments to Unicode fails with a\r | |
1296 | UnicodeDecodeError.\r | |
1297 | \r | |
1298 | Possible values for op:\r | |
1299 | \r | |
1300 | Py_GT, Py_GE, Py_EQ, Py_NE, Py_LT, Py_LE\r | |
1301 | \r | |
1302 | */\r | |
1303 | \r | |
1304 | PyAPI_FUNC(PyObject *) PyUnicode_RichCompare(\r | |
1305 | PyObject *left, /* Left string */\r | |
1306 | PyObject *right, /* Right string */\r | |
1307 | int op /* Operation: Py_EQ, Py_NE, Py_GT, etc. */\r | |
1308 | );\r | |
1309 | \r | |
1310 | /* Apply a argument tuple or dictionary to a format string and return\r | |
1311 | the resulting Unicode string. */\r | |
1312 | \r | |
1313 | PyAPI_FUNC(PyObject *) PyUnicode_Format(\r | |
1314 | PyObject *format, /* Format string */\r | |
1315 | PyObject *args /* Argument tuple or dictionary */\r | |
1316 | );\r | |
1317 | \r | |
1318 | /* Checks whether element is contained in container and return 1/0\r | |
1319 | accordingly.\r | |
1320 | \r | |
1321 | element has to coerce to an one element Unicode string. -1 is\r | |
1322 | returned in case of an error. */\r | |
1323 | \r | |
1324 | PyAPI_FUNC(int) PyUnicode_Contains(\r | |
1325 | PyObject *container, /* Container string */\r | |
1326 | PyObject *element /* Element string */\r | |
1327 | );\r | |
1328 | \r | |
1329 | /* Externally visible for str.strip(unicode) */\r | |
1330 | PyAPI_FUNC(PyObject *) _PyUnicode_XStrip(\r | |
1331 | PyUnicodeObject *self,\r | |
1332 | int striptype,\r | |
1333 | PyObject *sepobj\r | |
1334 | );\r | |
1335 | \r | |
1336 | /* === Characters Type APIs =============================================== */\r | |
1337 | \r | |
1338 | /* Helper array used by Py_UNICODE_ISSPACE(). */\r | |
1339 | \r | |
1340 | PyAPI_DATA(const unsigned char) _Py_ascii_whitespace[];\r | |
1341 | \r | |
1342 | /* These should not be used directly. Use the Py_UNICODE_IS* and\r | |
1343 | Py_UNICODE_TO* macros instead.\r | |
1344 | \r | |
1345 | These APIs are implemented in Objects/unicodectype.c.\r | |
1346 | \r | |
1347 | */\r | |
1348 | \r | |
1349 | PyAPI_FUNC(int) _PyUnicode_IsLowercase(\r | |
1350 | Py_UNICODE ch /* Unicode character */\r | |
1351 | );\r | |
1352 | \r | |
1353 | PyAPI_FUNC(int) _PyUnicode_IsUppercase(\r | |
1354 | Py_UNICODE ch /* Unicode character */\r | |
1355 | );\r | |
1356 | \r | |
1357 | PyAPI_FUNC(int) _PyUnicode_IsTitlecase(\r | |
1358 | Py_UNICODE ch /* Unicode character */\r | |
1359 | );\r | |
1360 | \r | |
1361 | PyAPI_FUNC(int) _PyUnicode_IsWhitespace(\r | |
1362 | const Py_UNICODE ch /* Unicode character */\r | |
1363 | );\r | |
1364 | \r | |
1365 | PyAPI_FUNC(int) _PyUnicode_IsLinebreak(\r | |
1366 | const Py_UNICODE ch /* Unicode character */\r | |
1367 | );\r | |
1368 | \r | |
1369 | PyAPI_FUNC(Py_UNICODE) _PyUnicode_ToLowercase(\r | |
1370 | Py_UNICODE ch /* Unicode character */\r | |
1371 | );\r | |
1372 | \r | |
1373 | PyAPI_FUNC(Py_UNICODE) _PyUnicode_ToUppercase(\r | |
1374 | Py_UNICODE ch /* Unicode character */\r | |
1375 | );\r | |
1376 | \r | |
1377 | PyAPI_FUNC(Py_UNICODE) _PyUnicode_ToTitlecase(\r | |
1378 | Py_UNICODE ch /* Unicode character */\r | |
1379 | );\r | |
1380 | \r | |
1381 | PyAPI_FUNC(int) _PyUnicode_ToDecimalDigit(\r | |
1382 | Py_UNICODE ch /* Unicode character */\r | |
1383 | );\r | |
1384 | \r | |
1385 | PyAPI_FUNC(int) _PyUnicode_ToDigit(\r | |
1386 | Py_UNICODE ch /* Unicode character */\r | |
1387 | );\r | |
1388 | \r | |
1389 | PyAPI_FUNC(double) _PyUnicode_ToNumeric(\r | |
1390 | Py_UNICODE ch /* Unicode character */\r | |
1391 | );\r | |
1392 | \r | |
1393 | PyAPI_FUNC(int) _PyUnicode_IsDecimalDigit(\r | |
1394 | Py_UNICODE ch /* Unicode character */\r | |
1395 | );\r | |
1396 | \r | |
1397 | PyAPI_FUNC(int) _PyUnicode_IsDigit(\r | |
1398 | Py_UNICODE ch /* Unicode character */\r | |
1399 | );\r | |
1400 | \r | |
1401 | PyAPI_FUNC(int) _PyUnicode_IsNumeric(\r | |
1402 | Py_UNICODE ch /* Unicode character */\r | |
1403 | );\r | |
1404 | \r | |
1405 | PyAPI_FUNC(int) _PyUnicode_IsAlpha(\r | |
1406 | Py_UNICODE ch /* Unicode character */\r | |
1407 | );\r | |
1408 | \r | |
1409 | #ifdef __cplusplus\r | |
1410 | }\r | |
1411 | #endif\r | |
1412 | #endif /* Py_USING_UNICODE */\r | |
1413 | #endif /* !Py_UNICODEOBJECT_H */\r |