]> git.proxmox.com Git - mirror_edk2.git/blame - AppPkg/Applications/Python/Python-2.7.10/Objects/stringlib/split.h
AppPkg/Applications/Python/Python-2.7.10: Initial Checkin part 3/5.
[mirror_edk2.git] / AppPkg / Applications / Python / Python-2.7.10 / Objects / stringlib / split.h
CommitLineData
53b2ba57
DM
1/* stringlib: split implementation */\r
2\r
3#ifndef STRINGLIB_SPLIT_H\r
4#define STRINGLIB_SPLIT_H\r
5\r
6#ifndef STRINGLIB_FASTSEARCH_H\r
7#error must include "stringlib/fastsearch.h" before including this module\r
8#endif\r
9\r
10/* Overallocate the initial list to reduce the number of reallocs for small\r
11 split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three\r
12 resizes, to sizes 4, 8, then 16. Most observed string splits are for human\r
13 text (roughly 11 words per line) and field delimited data (usually 1-10\r
14 fields). For large strings the split algorithms are bandwidth limited\r
15 so increasing the preallocation likely will not improve things.*/\r
16\r
17#define MAX_PREALLOC 12\r
18\r
19/* 5 splits gives 6 elements */\r
20#define PREALLOC_SIZE(maxsplit) \\r
21 (maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)\r
22\r
23#define SPLIT_APPEND(data, left, right) \\r
24 sub = STRINGLIB_NEW((data) + (left), \\r
25 (right) - (left)); \\r
26 if (sub == NULL) \\r
27 goto onError; \\r
28 if (PyList_Append(list, sub)) { \\r
29 Py_DECREF(sub); \\r
30 goto onError; \\r
31 } \\r
32 else \\r
33 Py_DECREF(sub);\r
34\r
35#define SPLIT_ADD(data, left, right) { \\r
36 sub = STRINGLIB_NEW((data) + (left), \\r
37 (right) - (left)); \\r
38 if (sub == NULL) \\r
39 goto onError; \\r
40 if (count < MAX_PREALLOC) { \\r
41 PyList_SET_ITEM(list, count, sub); \\r
42 } else { \\r
43 if (PyList_Append(list, sub)) { \\r
44 Py_DECREF(sub); \\r
45 goto onError; \\r
46 } \\r
47 else \\r
48 Py_DECREF(sub); \\r
49 } \\r
50 count++; }\r
51\r
52\r
53/* Always force the list to the expected size. */\r
54#define FIX_PREALLOC_SIZE(list) Py_SIZE(list) = count\r
55\r
56Py_LOCAL_INLINE(PyObject *)\r
57stringlib_split_whitespace(PyObject* str_obj,\r
58 const STRINGLIB_CHAR* str, Py_ssize_t str_len,\r
59 Py_ssize_t maxcount)\r
60{\r
61 Py_ssize_t i, j, count=0;\r
62 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));\r
63 PyObject *sub;\r
64\r
65 if (list == NULL)\r
66 return NULL;\r
67\r
68 i = j = 0;\r
69 while (maxcount-- > 0) {\r
70 while (i < str_len && STRINGLIB_ISSPACE(str[i]))\r
71 i++;\r
72 if (i == str_len) break;\r
73 j = i; i++;\r
74 while (i < str_len && !STRINGLIB_ISSPACE(str[i]))\r
75 i++;\r
76#ifndef STRINGLIB_MUTABLE\r
77 if (j == 0 && i == str_len && STRINGLIB_CHECK_EXACT(str_obj)) {\r
78 /* No whitespace in str_obj, so just use it as list[0] */\r
79 Py_INCREF(str_obj);\r
80 PyList_SET_ITEM(list, 0, (PyObject *)str_obj);\r
81 count++;\r
82 break;\r
83 }\r
84#endif\r
85 SPLIT_ADD(str, j, i);\r
86 }\r
87\r
88 if (i < str_len) {\r
89 /* Only occurs when maxcount was reached */\r
90 /* Skip any remaining whitespace and copy to end of string */\r
91 while (i < str_len && STRINGLIB_ISSPACE(str[i]))\r
92 i++;\r
93 if (i != str_len)\r
94 SPLIT_ADD(str, i, str_len);\r
95 }\r
96 FIX_PREALLOC_SIZE(list);\r
97 return list;\r
98\r
99 onError:\r
100 Py_DECREF(list);\r
101 return NULL;\r
102}\r
103\r
104Py_LOCAL_INLINE(PyObject *)\r
105stringlib_split_char(PyObject* str_obj,\r
106 const STRINGLIB_CHAR* str, Py_ssize_t str_len,\r
107 const STRINGLIB_CHAR ch,\r
108 Py_ssize_t maxcount)\r
109{\r
110 Py_ssize_t i, j, count=0;\r
111 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));\r
112 PyObject *sub;\r
113\r
114 if (list == NULL)\r
115 return NULL;\r
116\r
117 i = j = 0;\r
118 while ((j < str_len) && (maxcount-- > 0)) {\r
119 for(; j < str_len; j++) {\r
120 /* I found that using memchr makes no difference */\r
121 if (str[j] == ch) {\r
122 SPLIT_ADD(str, i, j);\r
123 i = j = j + 1;\r
124 break;\r
125 }\r
126 }\r
127 }\r
128#ifndef STRINGLIB_MUTABLE\r
129 if (count == 0 && STRINGLIB_CHECK_EXACT(str_obj)) {\r
130 /* ch not in str_obj, so just use str_obj as list[0] */\r
131 Py_INCREF(str_obj);\r
132 PyList_SET_ITEM(list, 0, (PyObject *)str_obj);\r
133 count++;\r
134 } else\r
135#endif\r
136 if (i <= str_len) {\r
137 SPLIT_ADD(str, i, str_len);\r
138 }\r
139 FIX_PREALLOC_SIZE(list);\r
140 return list;\r
141\r
142 onError:\r
143 Py_DECREF(list);\r
144 return NULL;\r
145}\r
146\r
147Py_LOCAL_INLINE(PyObject *)\r
148stringlib_split(PyObject* str_obj,\r
149 const STRINGLIB_CHAR* str, Py_ssize_t str_len,\r
150 const STRINGLIB_CHAR* sep, Py_ssize_t sep_len,\r
151 Py_ssize_t maxcount)\r
152{\r
153 Py_ssize_t i, j, pos, count=0;\r
154 PyObject *list, *sub;\r
155\r
156 if (sep_len == 0) {\r
157 PyErr_SetString(PyExc_ValueError, "empty separator");\r
158 return NULL;\r
159 }\r
160 else if (sep_len == 1)\r
161 return stringlib_split_char(str_obj, str, str_len, sep[0], maxcount);\r
162\r
163 list = PyList_New(PREALLOC_SIZE(maxcount));\r
164 if (list == NULL)\r
165 return NULL;\r
166\r
167 i = j = 0;\r
168 while (maxcount-- > 0) {\r
169 pos = fastsearch(str+i, str_len-i, sep, sep_len, -1, FAST_SEARCH);\r
170 if (pos < 0)\r
171 break;\r
172 j = i + pos;\r
173 SPLIT_ADD(str, i, j);\r
174 i = j + sep_len;\r
175 }\r
176#ifndef STRINGLIB_MUTABLE\r
177 if (count == 0 && STRINGLIB_CHECK_EXACT(str_obj)) {\r
178 /* No match in str_obj, so just use it as list[0] */\r
179 Py_INCREF(str_obj);\r
180 PyList_SET_ITEM(list, 0, (PyObject *)str_obj);\r
181 count++;\r
182 } else\r
183#endif\r
184 {\r
185 SPLIT_ADD(str, i, str_len);\r
186 }\r
187 FIX_PREALLOC_SIZE(list);\r
188 return list;\r
189\r
190 onError:\r
191 Py_DECREF(list);\r
192 return NULL;\r
193}\r
194\r
195Py_LOCAL_INLINE(PyObject *)\r
196stringlib_rsplit_whitespace(PyObject* str_obj,\r
197 const STRINGLIB_CHAR* str, Py_ssize_t str_len,\r
198 Py_ssize_t maxcount)\r
199{\r
200 Py_ssize_t i, j, count=0;\r
201 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));\r
202 PyObject *sub;\r
203\r
204 if (list == NULL)\r
205 return NULL;\r
206\r
207 i = j = str_len - 1;\r
208 while (maxcount-- > 0) {\r
209 while (i >= 0 && STRINGLIB_ISSPACE(str[i]))\r
210 i--;\r
211 if (i < 0) break;\r
212 j = i; i--;\r
213 while (i >= 0 && !STRINGLIB_ISSPACE(str[i]))\r
214 i--;\r
215#ifndef STRINGLIB_MUTABLE\r
216 if (j == str_len - 1 && i < 0 && STRINGLIB_CHECK_EXACT(str_obj)) {\r
217 /* No whitespace in str_obj, so just use it as list[0] */\r
218 Py_INCREF(str_obj);\r
219 PyList_SET_ITEM(list, 0, (PyObject *)str_obj);\r
220 count++;\r
221 break;\r
222 }\r
223#endif\r
224 SPLIT_ADD(str, i + 1, j + 1);\r
225 }\r
226\r
227 if (i >= 0) {\r
228 /* Only occurs when maxcount was reached */\r
229 /* Skip any remaining whitespace and copy to beginning of string */\r
230 while (i >= 0 && STRINGLIB_ISSPACE(str[i]))\r
231 i--;\r
232 if (i >= 0)\r
233 SPLIT_ADD(str, 0, i + 1);\r
234 }\r
235 FIX_PREALLOC_SIZE(list);\r
236 if (PyList_Reverse(list) < 0)\r
237 goto onError;\r
238 return list;\r
239\r
240 onError:\r
241 Py_DECREF(list);\r
242 return NULL;\r
243}\r
244\r
245Py_LOCAL_INLINE(PyObject *)\r
246stringlib_rsplit_char(PyObject* str_obj,\r
247 const STRINGLIB_CHAR* str, Py_ssize_t str_len,\r
248 const STRINGLIB_CHAR ch,\r
249 Py_ssize_t maxcount)\r
250{\r
251 Py_ssize_t i, j, count=0;\r
252 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));\r
253 PyObject *sub;\r
254\r
255 if (list == NULL)\r
256 return NULL;\r
257\r
258 i = j = str_len - 1;\r
259 while ((i >= 0) && (maxcount-- > 0)) {\r
260 for(; i >= 0; i--) {\r
261 if (str[i] == ch) {\r
262 SPLIT_ADD(str, i + 1, j + 1);\r
263 j = i = i - 1;\r
264 break;\r
265 }\r
266 }\r
267 }\r
268#ifndef STRINGLIB_MUTABLE\r
269 if (count == 0 && STRINGLIB_CHECK_EXACT(str_obj)) {\r
270 /* ch not in str_obj, so just use str_obj as list[0] */\r
271 Py_INCREF(str_obj);\r
272 PyList_SET_ITEM(list, 0, (PyObject *)str_obj);\r
273 count++;\r
274 } else\r
275#endif\r
276 if (j >= -1) {\r
277 SPLIT_ADD(str, 0, j + 1);\r
278 }\r
279 FIX_PREALLOC_SIZE(list);\r
280 if (PyList_Reverse(list) < 0)\r
281 goto onError;\r
282 return list;\r
283\r
284 onError:\r
285 Py_DECREF(list);\r
286 return NULL;\r
287}\r
288\r
289Py_LOCAL_INLINE(PyObject *)\r
290stringlib_rsplit(PyObject* str_obj,\r
291 const STRINGLIB_CHAR* str, Py_ssize_t str_len,\r
292 const STRINGLIB_CHAR* sep, Py_ssize_t sep_len,\r
293 Py_ssize_t maxcount)\r
294{\r
295 Py_ssize_t j, pos, count=0;\r
296 PyObject *list, *sub;\r
297\r
298 if (sep_len == 0) {\r
299 PyErr_SetString(PyExc_ValueError, "empty separator");\r
300 return NULL;\r
301 }\r
302 else if (sep_len == 1)\r
303 return stringlib_rsplit_char(str_obj, str, str_len, sep[0], maxcount);\r
304\r
305 list = PyList_New(PREALLOC_SIZE(maxcount));\r
306 if (list == NULL)\r
307 return NULL;\r
308\r
309 j = str_len;\r
310 while (maxcount-- > 0) {\r
311 pos = fastsearch(str, j, sep, sep_len, -1, FAST_RSEARCH);\r
312 if (pos < 0)\r
313 break;\r
314 SPLIT_ADD(str, pos + sep_len, j);\r
315 j = pos;\r
316 }\r
317#ifndef STRINGLIB_MUTABLE\r
318 if (count == 0 && STRINGLIB_CHECK_EXACT(str_obj)) {\r
319 /* No match in str_obj, so just use it as list[0] */\r
320 Py_INCREF(str_obj);\r
321 PyList_SET_ITEM(list, 0, (PyObject *)str_obj);\r
322 count++;\r
323 } else\r
324#endif\r
325 {\r
326 SPLIT_ADD(str, 0, j);\r
327 }\r
328 FIX_PREALLOC_SIZE(list);\r
329 if (PyList_Reverse(list) < 0)\r
330 goto onError;\r
331 return list;\r
332\r
333 onError:\r
334 Py_DECREF(list);\r
335 return NULL;\r
336}\r
337\r
338Py_LOCAL_INLINE(PyObject *)\r
339stringlib_splitlines(PyObject* str_obj,\r
340 const STRINGLIB_CHAR* str, Py_ssize_t str_len,\r
341 int keepends)\r
342{\r
343 /* This does not use the preallocated list because splitlines is\r
344 usually run with hundreds of newlines. The overhead of\r
345 switching between PyList_SET_ITEM and append causes about a\r
346 2-3% slowdown for that common case. A smarter implementation\r
347 could move the if check out, so the SET_ITEMs are done first\r
348 and the appends only done when the prealloc buffer is full.\r
349 That's too much work for little gain.*/\r
350\r
351 register Py_ssize_t i;\r
352 register Py_ssize_t j;\r
353 PyObject *list = PyList_New(0);\r
354 PyObject *sub;\r
355\r
356 if (list == NULL)\r
357 return NULL;\r
358\r
359 for (i = j = 0; i < str_len; ) {\r
360 Py_ssize_t eol;\r
361\r
362 /* Find a line and append it */\r
363 while (i < str_len && !STRINGLIB_ISLINEBREAK(str[i]))\r
364 i++;\r
365\r
366 /* Skip the line break reading CRLF as one line break */\r
367 eol = i;\r
368 if (i < str_len) {\r
369 if (str[i] == '\r' && i + 1 < str_len && str[i+1] == '\n')\r
370 i += 2;\r
371 else\r
372 i++;\r
373 if (keepends)\r
374 eol = i;\r
375 }\r
376#ifndef STRINGLIB_MUTABLE\r
377 if (j == 0 && eol == str_len && STRINGLIB_CHECK_EXACT(str_obj)) {\r
378 /* No linebreak in str_obj, so just use it as list[0] */\r
379 if (PyList_Append(list, str_obj))\r
380 goto onError;\r
381 break;\r
382 }\r
383#endif\r
384 SPLIT_APPEND(str, j, eol);\r
385 j = i;\r
386 }\r
387 return list;\r
388\r
389 onError:\r
390 Py_DECREF(list);\r
391 return NULL;\r
392}\r
393\r
394#endif\r