]>
git.proxmox.com Git - mirror_edk2.git/blob - AppPkg/Applications/Python/Python-2.7.10/Objects/stringlib/split.h
1 /* stringlib: split implementation */
3 #ifndef STRINGLIB_SPLIT_H
4 #define STRINGLIB_SPLIT_H
6 #ifndef STRINGLIB_FASTSEARCH_H
7 #error must include "stringlib/fastsearch.h" before including this module
10 /* Overallocate the initial list to reduce the number of reallocs for small
11 split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three
12 resizes, to sizes 4, 8, then 16. Most observed string splits are for human
13 text (roughly 11 words per line) and field delimited data (usually 1-10
14 fields). For large strings the split algorithms are bandwidth limited
15 so increasing the preallocation likely will not improve things.*/
17 #define MAX_PREALLOC 12
19 /* 5 splits gives 6 elements */
20 #define PREALLOC_SIZE(maxsplit) \
21 (maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)
23 #define SPLIT_APPEND(data, left, right) \
24 sub = STRINGLIB_NEW((data) + (left), \
28 if (PyList_Append(list, sub)) { \
35 #define SPLIT_ADD(data, left, right) { \
36 sub = STRINGLIB_NEW((data) + (left), \
40 if (count < MAX_PREALLOC) { \
41 PyList_SET_ITEM(list, count, sub); \
43 if (PyList_Append(list, sub)) { \
53 /* Always force the list to the expected size. */
54 #define FIX_PREALLOC_SIZE(list) Py_SIZE(list) = count
56 Py_LOCAL_INLINE(PyObject
*)
57 stringlib_split_whitespace(PyObject
* str_obj
,
58 const STRINGLIB_CHAR
* str
, Py_ssize_t str_len
,
61 Py_ssize_t i
, j
, count
=0;
62 PyObject
*list
= PyList_New(PREALLOC_SIZE(maxcount
));
69 while (maxcount
-- > 0) {
70 while (i
< str_len
&& STRINGLIB_ISSPACE(str
[i
]))
72 if (i
== str_len
) break;
74 while (i
< str_len
&& !STRINGLIB_ISSPACE(str
[i
]))
76 #ifndef STRINGLIB_MUTABLE
77 if (j
== 0 && i
== str_len
&& STRINGLIB_CHECK_EXACT(str_obj
)) {
78 /* No whitespace in str_obj, so just use it as list[0] */
80 PyList_SET_ITEM(list
, 0, (PyObject
*)str_obj
);
89 /* Only occurs when maxcount was reached */
90 /* Skip any remaining whitespace and copy to end of string */
91 while (i
< str_len
&& STRINGLIB_ISSPACE(str
[i
]))
94 SPLIT_ADD(str
, i
, str_len
);
96 FIX_PREALLOC_SIZE(list
);
104 Py_LOCAL_INLINE(PyObject
*)
105 stringlib_split_char(PyObject
* str_obj
,
106 const STRINGLIB_CHAR
* str
, Py_ssize_t str_len
,
107 const STRINGLIB_CHAR ch
,
110 Py_ssize_t i
, j
, count
=0;
111 PyObject
*list
= PyList_New(PREALLOC_SIZE(maxcount
));
118 while ((j
< str_len
) && (maxcount
-- > 0)) {
119 for(; j
< str_len
; j
++) {
120 /* I found that using memchr makes no difference */
122 SPLIT_ADD(str
, i
, j
);
128 #ifndef STRINGLIB_MUTABLE
129 if (count
== 0 && STRINGLIB_CHECK_EXACT(str_obj
)) {
130 /* ch not in str_obj, so just use str_obj as list[0] */
132 PyList_SET_ITEM(list
, 0, (PyObject
*)str_obj
);
137 SPLIT_ADD(str
, i
, str_len
);
139 FIX_PREALLOC_SIZE(list
);
147 Py_LOCAL_INLINE(PyObject
*)
148 stringlib_split(PyObject
* str_obj
,
149 const STRINGLIB_CHAR
* str
, Py_ssize_t str_len
,
150 const STRINGLIB_CHAR
* sep
, Py_ssize_t sep_len
,
153 Py_ssize_t i
, j
, pos
, count
=0;
154 PyObject
*list
, *sub
;
157 PyErr_SetString(PyExc_ValueError
, "empty separator");
160 else if (sep_len
== 1)
161 return stringlib_split_char(str_obj
, str
, str_len
, sep
[0], maxcount
);
163 list
= PyList_New(PREALLOC_SIZE(maxcount
));
168 while (maxcount
-- > 0) {
169 pos
= fastsearch(str
+i
, str_len
-i
, sep
, sep_len
, -1, FAST_SEARCH
);
173 SPLIT_ADD(str
, i
, j
);
176 #ifndef STRINGLIB_MUTABLE
177 if (count
== 0 && STRINGLIB_CHECK_EXACT(str_obj
)) {
178 /* No match in str_obj, so just use it as list[0] */
180 PyList_SET_ITEM(list
, 0, (PyObject
*)str_obj
);
185 SPLIT_ADD(str
, i
, str_len
);
187 FIX_PREALLOC_SIZE(list
);
195 Py_LOCAL_INLINE(PyObject
*)
196 stringlib_rsplit_whitespace(PyObject
* str_obj
,
197 const STRINGLIB_CHAR
* str
, Py_ssize_t str_len
,
200 Py_ssize_t i
, j
, count
=0;
201 PyObject
*list
= PyList_New(PREALLOC_SIZE(maxcount
));
208 while (maxcount
-- > 0) {
209 while (i
>= 0 && STRINGLIB_ISSPACE(str
[i
]))
213 while (i
>= 0 && !STRINGLIB_ISSPACE(str
[i
]))
215 #ifndef STRINGLIB_MUTABLE
216 if (j
== str_len
- 1 && i
< 0 && STRINGLIB_CHECK_EXACT(str_obj
)) {
217 /* No whitespace in str_obj, so just use it as list[0] */
219 PyList_SET_ITEM(list
, 0, (PyObject
*)str_obj
);
224 SPLIT_ADD(str
, i
+ 1, j
+ 1);
228 /* Only occurs when maxcount was reached */
229 /* Skip any remaining whitespace and copy to beginning of string */
230 while (i
>= 0 && STRINGLIB_ISSPACE(str
[i
]))
233 SPLIT_ADD(str
, 0, i
+ 1);
235 FIX_PREALLOC_SIZE(list
);
236 if (PyList_Reverse(list
) < 0)
245 Py_LOCAL_INLINE(PyObject
*)
246 stringlib_rsplit_char(PyObject
* str_obj
,
247 const STRINGLIB_CHAR
* str
, Py_ssize_t str_len
,
248 const STRINGLIB_CHAR ch
,
251 Py_ssize_t i
, j
, count
=0;
252 PyObject
*list
= PyList_New(PREALLOC_SIZE(maxcount
));
259 while ((i
>= 0) && (maxcount
-- > 0)) {
262 SPLIT_ADD(str
, i
+ 1, j
+ 1);
268 #ifndef STRINGLIB_MUTABLE
269 if (count
== 0 && STRINGLIB_CHECK_EXACT(str_obj
)) {
270 /* ch not in str_obj, so just use str_obj as list[0] */
272 PyList_SET_ITEM(list
, 0, (PyObject
*)str_obj
);
277 SPLIT_ADD(str
, 0, j
+ 1);
279 FIX_PREALLOC_SIZE(list
);
280 if (PyList_Reverse(list
) < 0)
289 Py_LOCAL_INLINE(PyObject
*)
290 stringlib_rsplit(PyObject
* str_obj
,
291 const STRINGLIB_CHAR
* str
, Py_ssize_t str_len
,
292 const STRINGLIB_CHAR
* sep
, Py_ssize_t sep_len
,
295 Py_ssize_t j
, pos
, count
=0;
296 PyObject
*list
, *sub
;
299 PyErr_SetString(PyExc_ValueError
, "empty separator");
302 else if (sep_len
== 1)
303 return stringlib_rsplit_char(str_obj
, str
, str_len
, sep
[0], maxcount
);
305 list
= PyList_New(PREALLOC_SIZE(maxcount
));
310 while (maxcount
-- > 0) {
311 pos
= fastsearch(str
, j
, sep
, sep_len
, -1, FAST_RSEARCH
);
314 SPLIT_ADD(str
, pos
+ sep_len
, j
);
317 #ifndef STRINGLIB_MUTABLE
318 if (count
== 0 && STRINGLIB_CHECK_EXACT(str_obj
)) {
319 /* No match in str_obj, so just use it as list[0] */
321 PyList_SET_ITEM(list
, 0, (PyObject
*)str_obj
);
326 SPLIT_ADD(str
, 0, j
);
328 FIX_PREALLOC_SIZE(list
);
329 if (PyList_Reverse(list
) < 0)
338 Py_LOCAL_INLINE(PyObject
*)
339 stringlib_splitlines(PyObject
* str_obj
,
340 const STRINGLIB_CHAR
* str
, Py_ssize_t str_len
,
343 /* This does not use the preallocated list because splitlines is
344 usually run with hundreds of newlines. The overhead of
345 switching between PyList_SET_ITEM and append causes about a
346 2-3% slowdown for that common case. A smarter implementation
347 could move the if check out, so the SET_ITEMs are done first
348 and the appends only done when the prealloc buffer is full.
349 That's too much work for little gain.*/
351 register Py_ssize_t i
;
352 register Py_ssize_t j
;
353 PyObject
*list
= PyList_New(0);
359 for (i
= j
= 0; i
< str_len
; ) {
362 /* Find a line and append it */
363 while (i
< str_len
&& !STRINGLIB_ISLINEBREAK(str
[i
]))
366 /* Skip the line break reading CRLF as one line break */
369 if (str
[i
] == '\r' && i
+ 1 < str_len
&& str
[i
+1] == '\n')
376 #ifndef STRINGLIB_MUTABLE
377 if (j
== 0 && eol
== str_len
&& STRINGLIB_CHECK_EXACT(str_obj
)) {
378 /* No linebreak in str_obj, so just use it as list[0] */
379 if (PyList_Append(list
, str_obj
))
384 SPLIT_APPEND(str
, j
, eol
);