]> git.proxmox.com Git - mirror_edk2.git/blame - AppPkg/Applications/Python/PyMod-2.7.1/Modules/_sre.c
Basic Core Python interpreter.
[mirror_edk2.git] / AppPkg / Applications / Python / PyMod-2.7.1 / Modules / _sre.c
CommitLineData
006fecd5 1/*\r
2 * Secret Labs' Regular Expression Engine\r
3 *\r
4 * regular expression matching engine\r
5 *\r
6 * partial history:\r
7 * 1999-10-24 fl created (based on existing template matcher code)\r
8 * 2000-03-06 fl first alpha, sort of\r
9 * 2000-08-01 fl fixes for 1.6b1\r
10 * 2000-08-07 fl use PyOS_CheckStack() if available\r
11 * 2000-09-20 fl added expand method\r
12 * 2001-03-20 fl lots of fixes for 2.1b2\r
13 * 2001-04-15 fl export copyright as Python attribute, not global\r
14 * 2001-04-28 fl added __copy__ methods (work in progress)\r
15 * 2001-05-14 fl fixes for 1.5.2 compatibility\r
16 * 2001-07-01 fl added BIGCHARSET support (from Martin von Loewis)\r
17 * 2001-10-18 fl fixed group reset issue (from Matthew Mueller)\r
18 * 2001-10-20 fl added split primitive; reenable unicode for 1.6/2.0/2.1\r
19 * 2001-10-21 fl added sub/subn primitive\r
20 * 2001-10-24 fl added finditer primitive (for 2.2 only)\r
21 * 2001-12-07 fl fixed memory leak in sub/subn (Guido van Rossum)\r
22 * 2002-11-09 fl fixed empty sub/subn return type\r
23 * 2003-04-18 mvl fully support 4-byte codes\r
24 * 2003-10-17 gn implemented non recursive scheme\r
25 *\r
26 * Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved.\r
27 *\r
28 * This version of the SRE library can be redistributed under CNRI's\r
29 * Python 1.6 license. For any other use, please contact Secret Labs\r
30 * AB (info@pythonware.com).\r
31 *\r
32 * Portions of this engine have been developed in cooperation with\r
33 * CNRI. Hewlett-Packard provided funding for 1.6 integration and\r
34 * other compatibility work.\r
35 */\r
36\r
37/* Get rid of these macros to prevent collisions between EFI and Python in this file. */\r
38#undef RETURN_ERROR\r
39#undef RETURN_SUCCESS\r
40\r
41#ifndef SRE_RECURSIVE\r
42\r
43static char copyright[] =\r
44 " SRE 2.2.2 Copyright (c) 1997-2002 by Secret Labs AB ";\r
45\r
46#define PY_SSIZE_T_CLEAN\r
47\r
48#include "Python.h"\r
49#include "structmember.h" /* offsetof */\r
50\r
51#include "sre.h"\r
52\r
53#include <ctype.h>\r
54\r
55/* name of this module, minus the leading underscore */\r
56#if !defined(SRE_MODULE)\r
57#define SRE_MODULE "sre"\r
58#endif\r
59\r
60#define SRE_PY_MODULE "re"\r
61\r
62/* defining this one enables tracing */\r
63#undef VERBOSE\r
64\r
65#if PY_VERSION_HEX >= 0x01060000\r
66#if PY_VERSION_HEX < 0x02020000 || defined(Py_USING_UNICODE)\r
67/* defining this enables unicode support (default under 1.6a1 and later) */\r
68#define HAVE_UNICODE\r
69#endif\r
70#endif\r
71\r
72/* -------------------------------------------------------------------- */\r
73/* optional features */\r
74\r
75/* enables fast searching */\r
76#define USE_FAST_SEARCH\r
77\r
78/* enables aggressive inlining (always on for Visual C) */\r
79#undef USE_INLINE\r
80\r
81/* enables copy/deepcopy handling (work in progress) */\r
82#undef USE_BUILTIN_COPY\r
83\r
84#if PY_VERSION_HEX < 0x01060000\r
85#define PyObject_DEL(op) PyMem_DEL((op))\r
86#endif\r
87\r
88/* -------------------------------------------------------------------- */\r
89\r
90#if defined(_MSC_VER)\r
91#pragma optimize("gt", on) /* doesn't seem to make much difference... */\r
92#pragma warning(disable: 4710) /* who cares if functions are not inlined ;-) */\r
93/* fastest possible local call under MSVC */\r
94#define LOCAL(type) static __inline type __fastcall\r
95#elif defined(USE_INLINE)\r
96#define LOCAL(type) static inline type\r
97#else\r
98#define LOCAL(type) static type\r
99#endif\r
100\r
101/* error codes */\r
102#define SRE_ERROR_ILLEGAL -1 /* illegal opcode */\r
103#define SRE_ERROR_STATE -2 /* illegal state */\r
104#define SRE_ERROR_RECURSION_LIMIT -3 /* runaway recursion */\r
105#define SRE_ERROR_MEMORY -9 /* out of memory */\r
106#define SRE_ERROR_INTERRUPTED -10 /* signal handler raised exception */\r
107\r
108#if defined(VERBOSE)\r
109#define TRACE(v) printf v\r
110#else\r
111#define TRACE(v)\r
112#endif\r
113\r
114/* -------------------------------------------------------------------- */\r
115/* search engine state */\r
116\r
117/* default character predicates (run sre_chars.py to regenerate tables) */\r
118\r
119#define SRE_DIGIT_MASK 1\r
120#define SRE_SPACE_MASK 2\r
121#define SRE_LINEBREAK_MASK 4\r
122#define SRE_ALNUM_MASK 8\r
123#define SRE_WORD_MASK 16\r
124\r
125/* FIXME: this assumes ASCII. create tables in init_sre() instead */\r
126\r
127static char sre_char_info[128] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 6, 2,\r
1282, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0,\r
1290, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25, 25, 25, 25, 25, 25, 25, 25,\r
13025, 25, 0, 0, 0, 0, 0, 0, 0, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,\r
13124, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0,\r
1320, 0, 16, 0, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,\r
13324, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0, 0, 0, 0 };\r
134\r
135static char sre_char_lower[128] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,\r
13610, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,\r
13727, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43,\r
13844, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60,\r
13961, 62, 63, 64, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107,\r
140108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121,\r
141122, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105,\r
142106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119,\r
143120, 121, 122, 123, 124, 125, 126, 127 };\r
144\r
145#define SRE_IS_DIGIT(ch)\\r
146 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_DIGIT_MASK) : 0)\r
147#define SRE_IS_SPACE(ch)\\r
148 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_SPACE_MASK) : 0)\r
149#define SRE_IS_LINEBREAK(ch)\\r
150 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_LINEBREAK_MASK) : 0)\r
151#define SRE_IS_ALNUM(ch)\\r
152 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_ALNUM_MASK) : 0)\r
153#define SRE_IS_WORD(ch)\\r
154 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_WORD_MASK) : 0)\r
155\r
156static unsigned int sre_lower(unsigned int ch)\r
157{\r
158 return ((ch) < 128 ? (unsigned int)sre_char_lower[ch] : ch);\r
159}\r
160\r
161/* locale-specific character predicates */\r
162/* !(c & ~N) == (c < N+1) for any unsigned c, this avoids\r
163 * warnings when c's type supports only numbers < N+1 */\r
164#define SRE_LOC_IS_DIGIT(ch) (!((ch) & ~255) ? isdigit((ch)) : 0)\r
165#define SRE_LOC_IS_SPACE(ch) (!((ch) & ~255) ? isspace((ch)) : 0)\r
166#define SRE_LOC_IS_LINEBREAK(ch) ((ch) == '\n')\r
167#define SRE_LOC_IS_ALNUM(ch) (!((ch) & ~255) ? isalnum((ch)) : 0)\r
168#define SRE_LOC_IS_WORD(ch) (SRE_LOC_IS_ALNUM((ch)) || (ch) == '_')\r
169\r
170static unsigned int sre_lower_locale(unsigned int ch)\r
171{\r
172 return ((ch) < 256 ? (unsigned int)tolower((ch)) : ch);\r
173}\r
174\r
175/* unicode-specific character predicates */\r
176\r
177#if defined(HAVE_UNICODE)\r
178\r
179#define SRE_UNI_IS_DIGIT(ch) Py_UNICODE_ISDECIMAL((Py_UNICODE)(ch))\r
180#define SRE_UNI_IS_SPACE(ch) Py_UNICODE_ISSPACE((Py_UNICODE)(ch))\r
181#define SRE_UNI_IS_LINEBREAK(ch) Py_UNICODE_ISLINEBREAK((Py_UNICODE)(ch))\r
182#define SRE_UNI_IS_ALNUM(ch) Py_UNICODE_ISALNUM((Py_UNICODE)(ch))\r
183#define SRE_UNI_IS_WORD(ch) (SRE_UNI_IS_ALNUM((ch)) || (ch) == '_')\r
184\r
185static unsigned int sre_lower_unicode(unsigned int ch)\r
186{\r
187 return (unsigned int) Py_UNICODE_TOLOWER((Py_UNICODE)(ch));\r
188}\r
189\r
190#endif\r
191\r
192LOCAL(int)\r
193sre_category(SRE_CODE category, unsigned int ch)\r
194{\r
195 switch (category) {\r
196\r
197 case SRE_CATEGORY_DIGIT:\r
198 return SRE_IS_DIGIT(ch);\r
199 case SRE_CATEGORY_NOT_DIGIT:\r
200 return !SRE_IS_DIGIT(ch);\r
201 case SRE_CATEGORY_SPACE:\r
202 return SRE_IS_SPACE(ch);\r
203 case SRE_CATEGORY_NOT_SPACE:\r
204 return !SRE_IS_SPACE(ch);\r
205 case SRE_CATEGORY_WORD:\r
206 return SRE_IS_WORD(ch);\r
207 case SRE_CATEGORY_NOT_WORD:\r
208 return !SRE_IS_WORD(ch);\r
209 case SRE_CATEGORY_LINEBREAK:\r
210 return SRE_IS_LINEBREAK(ch);\r
211 case SRE_CATEGORY_NOT_LINEBREAK:\r
212 return !SRE_IS_LINEBREAK(ch);\r
213\r
214 case SRE_CATEGORY_LOC_WORD:\r
215 return SRE_LOC_IS_WORD(ch);\r
216 case SRE_CATEGORY_LOC_NOT_WORD:\r
217 return !SRE_LOC_IS_WORD(ch);\r
218\r
219#if defined(HAVE_UNICODE)\r
220 case SRE_CATEGORY_UNI_DIGIT:\r
221 return SRE_UNI_IS_DIGIT(ch);\r
222 case SRE_CATEGORY_UNI_NOT_DIGIT:\r
223 return !SRE_UNI_IS_DIGIT(ch);\r
224 case SRE_CATEGORY_UNI_SPACE:\r
225 return SRE_UNI_IS_SPACE(ch);\r
226 case SRE_CATEGORY_UNI_NOT_SPACE:\r
227 return !SRE_UNI_IS_SPACE(ch);\r
228 case SRE_CATEGORY_UNI_WORD:\r
229 return SRE_UNI_IS_WORD(ch);\r
230 case SRE_CATEGORY_UNI_NOT_WORD:\r
231 return !SRE_UNI_IS_WORD(ch);\r
232 case SRE_CATEGORY_UNI_LINEBREAK:\r
233 return SRE_UNI_IS_LINEBREAK(ch);\r
234 case SRE_CATEGORY_UNI_NOT_LINEBREAK:\r
235 return !SRE_UNI_IS_LINEBREAK(ch);\r
236#else\r
237 case SRE_CATEGORY_UNI_DIGIT:\r
238 return SRE_IS_DIGIT(ch);\r
239 case SRE_CATEGORY_UNI_NOT_DIGIT:\r
240 return !SRE_IS_DIGIT(ch);\r
241 case SRE_CATEGORY_UNI_SPACE:\r
242 return SRE_IS_SPACE(ch);\r
243 case SRE_CATEGORY_UNI_NOT_SPACE:\r
244 return !SRE_IS_SPACE(ch);\r
245 case SRE_CATEGORY_UNI_WORD:\r
246 return SRE_LOC_IS_WORD(ch);\r
247 case SRE_CATEGORY_UNI_NOT_WORD:\r
248 return !SRE_LOC_IS_WORD(ch);\r
249 case SRE_CATEGORY_UNI_LINEBREAK:\r
250 return SRE_IS_LINEBREAK(ch);\r
251 case SRE_CATEGORY_UNI_NOT_LINEBREAK:\r
252 return !SRE_IS_LINEBREAK(ch);\r
253#endif\r
254 }\r
255 return 0;\r
256}\r
257\r
258/* helpers */\r
259\r
260static void\r
261data_stack_dealloc(SRE_STATE* state)\r
262{\r
263 if (state->data_stack) {\r
264 PyMem_FREE(state->data_stack);\r
265 state->data_stack = NULL;\r
266 }\r
267 state->data_stack_size = state->data_stack_base = 0;\r
268}\r
269\r
270static int\r
271data_stack_grow(SRE_STATE* state, Py_ssize_t size)\r
272{\r
273 Py_ssize_t minsize, cursize;\r
274 minsize = state->data_stack_base+size;\r
275 cursize = state->data_stack_size;\r
276 if (cursize < minsize) {\r
277 void* stack;\r
278 cursize = minsize+minsize/4+1024;\r
279 TRACE(("allocate/grow stack %d\n", cursize));\r
280 stack = PyMem_REALLOC(state->data_stack, cursize);\r
281 if (!stack) {\r
282 data_stack_dealloc(state);\r
283 return SRE_ERROR_MEMORY;\r
284 }\r
285 state->data_stack = (char *)stack;\r
286 state->data_stack_size = cursize;\r
287 }\r
288 return 0;\r
289}\r
290\r
291/* generate 8-bit version */\r
292\r
293#define SRE_CHAR unsigned char\r
294#define SRE_AT sre_at\r
295#define SRE_COUNT sre_count\r
296#define SRE_CHARSET sre_charset\r
297#define SRE_INFO sre_info\r
298#define SRE_MATCH sre_match\r
299#define SRE_MATCH_CONTEXT sre_match_context\r
300#define SRE_SEARCH sre_search\r
301#define SRE_LITERAL_TEMPLATE sre_literal_template\r
302\r
303#if defined(HAVE_UNICODE)\r
304\r
305#define SRE_RECURSIVE\r
306#include "_sre.c"\r
307#undef SRE_RECURSIVE\r
308\r
309#undef SRE_LITERAL_TEMPLATE\r
310#undef SRE_SEARCH\r
311#undef SRE_MATCH\r
312#undef SRE_MATCH_CONTEXT\r
313#undef SRE_INFO\r
314#undef SRE_CHARSET\r
315#undef SRE_COUNT\r
316#undef SRE_AT\r
317#undef SRE_CHAR\r
318\r
319/* generate 16-bit unicode version */\r
320\r
321#define SRE_CHAR Py_UNICODE\r
322#define SRE_AT sre_uat\r
323#define SRE_COUNT sre_ucount\r
324#define SRE_CHARSET sre_ucharset\r
325#define SRE_INFO sre_uinfo\r
326#define SRE_MATCH sre_umatch\r
327#define SRE_MATCH_CONTEXT sre_umatch_context\r
328#define SRE_SEARCH sre_usearch\r
329#define SRE_LITERAL_TEMPLATE sre_uliteral_template\r
330#endif\r
331\r
332#endif /* SRE_RECURSIVE */\r
333\r
334/* -------------------------------------------------------------------- */\r
335/* String matching engine */\r
336\r
337/* the following section is compiled twice, with different character\r
338 settings */\r
339\r
340LOCAL(int)\r
341SRE_AT(SRE_STATE* state, SRE_CHAR* ptr, SRE_CODE at)\r
342{\r
343 /* check if pointer is at given position */\r
344\r
345 Py_ssize_t thisp, thatp;\r
346\r
347 switch (at) {\r
348\r
349 case SRE_AT_BEGINNING:\r
350 case SRE_AT_BEGINNING_STRING:\r
351 return ((void*) ptr == state->beginning);\r
352\r
353 case SRE_AT_BEGINNING_LINE:\r
354 return ((void*) ptr == state->beginning ||\r
355 SRE_IS_LINEBREAK((int) ptr[-1]));\r
356\r
357 case SRE_AT_END:\r
358 return (((void*) (ptr+1) == state->end &&\r
359 SRE_IS_LINEBREAK((int) ptr[0])) ||\r
360 ((void*) ptr == state->end));\r
361\r
362 case SRE_AT_END_LINE:\r
363 return ((void*) ptr == state->end ||\r
364 SRE_IS_LINEBREAK((int) ptr[0]));\r
365\r
366 case SRE_AT_END_STRING:\r
367 return ((void*) ptr == state->end);\r
368\r
369 case SRE_AT_BOUNDARY:\r
370 if (state->beginning == state->end)\r
371 return 0;\r
372 thatp = ((void*) ptr > state->beginning) ?\r
373 SRE_IS_WORD((int) ptr[-1]) : 0;\r
374 thisp = ((void*) ptr < state->end) ?\r
375 SRE_IS_WORD((int) ptr[0]) : 0;\r
376 return thisp != thatp;\r
377\r
378 case SRE_AT_NON_BOUNDARY:\r
379 if (state->beginning == state->end)\r
380 return 0;\r
381 thatp = ((void*) ptr > state->beginning) ?\r
382 SRE_IS_WORD((int) ptr[-1]) : 0;\r
383 thisp = ((void*) ptr < state->end) ?\r
384 SRE_IS_WORD((int) ptr[0]) : 0;\r
385 return thisp == thatp;\r
386\r
387 case SRE_AT_LOC_BOUNDARY:\r
388 if (state->beginning == state->end)\r
389 return 0;\r
390 thatp = ((void*) ptr > state->beginning) ?\r
391 SRE_LOC_IS_WORD((int) ptr[-1]) : 0;\r
392 thisp = ((void*) ptr < state->end) ?\r
393 SRE_LOC_IS_WORD((int) ptr[0]) : 0;\r
394 return thisp != thatp;\r
395\r
396 case SRE_AT_LOC_NON_BOUNDARY:\r
397 if (state->beginning == state->end)\r
398 return 0;\r
399 thatp = ((void*) ptr > state->beginning) ?\r
400 SRE_LOC_IS_WORD((int) ptr[-1]) : 0;\r
401 thisp = ((void*) ptr < state->end) ?\r
402 SRE_LOC_IS_WORD((int) ptr[0]) : 0;\r
403 return thisp == thatp;\r
404\r
405#if defined(HAVE_UNICODE)\r
406 case SRE_AT_UNI_BOUNDARY:\r
407 if (state->beginning == state->end)\r
408 return 0;\r
409 thatp = ((void*) ptr > state->beginning) ?\r
410 SRE_UNI_IS_WORD((int) ptr[-1]) : 0;\r
411 thisp = ((void*) ptr < state->end) ?\r
412 SRE_UNI_IS_WORD((int) ptr[0]) : 0;\r
413 return thisp != thatp;\r
414\r
415 case SRE_AT_UNI_NON_BOUNDARY:\r
416 if (state->beginning == state->end)\r
417 return 0;\r
418 thatp = ((void*) ptr > state->beginning) ?\r
419 SRE_UNI_IS_WORD((int) ptr[-1]) : 0;\r
420 thisp = ((void*) ptr < state->end) ?\r
421 SRE_UNI_IS_WORD((int) ptr[0]) : 0;\r
422 return thisp == thatp;\r
423#endif\r
424\r
425 }\r
426\r
427 return 0;\r
428}\r
429\r
430LOCAL(int)\r
431SRE_CHARSET(SRE_CODE* set, SRE_CODE ch)\r
432{\r
433 /* check if character is a member of the given set */\r
434\r
435 int ok = 1;\r
436\r
437 for (;;) {\r
438 switch (*set++) {\r
439\r
440 case SRE_OP_FAILURE:\r
441 return !ok;\r
442\r
443 case SRE_OP_LITERAL:\r
444 /* <LITERAL> <code> */\r
445 if (ch == set[0])\r
446 return ok;\r
447 set++;\r
448 break;\r
449\r
450 case SRE_OP_CATEGORY:\r
451 /* <CATEGORY> <code> */\r
452 if (sre_category(set[0], (int) ch))\r
453 return ok;\r
454 set += 1;\r
455 break;\r
456\r
457 case SRE_OP_CHARSET:\r
458 if (sizeof(SRE_CODE) == 2) {\r
459 /* <CHARSET> <bitmap> (16 bits per code word) */\r
460 if (ch < 256 && (set[ch >> 4] & (1 << (ch & 15))))\r
461 return ok;\r
462 set += 16;\r
463 }\r
464 else {\r
465 /* <CHARSET> <bitmap> (32 bits per code word) */\r
466 if (ch < 256 && (set[ch >> 5] & (1 << (ch & 31))))\r
467 return ok;\r
468 set += 8;\r
469 }\r
470 break;\r
471\r
472 case SRE_OP_RANGE:\r
473 /* <RANGE> <lower> <upper> */\r
474 if (set[0] <= ch && ch <= set[1])\r
475 return ok;\r
476 set += 2;\r
477 break;\r
478\r
479 case SRE_OP_NEGATE:\r
480 ok = !ok;\r
481 break;\r
482\r
483 case SRE_OP_BIGCHARSET:\r
484 /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */\r
485 {\r
486 Py_ssize_t count, block;\r
487 count = *(set++);\r
488\r
489 if (sizeof(SRE_CODE) == 2) {\r
490 block = ((unsigned char*)set)[ch >> 8];\r
491 set += 128;\r
492 if (set[block*16 + ((ch & 255)>>4)] & (1 << (ch & 15)))\r
493 return ok;\r
494 set += count*16;\r
495 }\r
496 else {\r
497 /* !(c & ~N) == (c < N+1) for any unsigned c, this avoids\r
498 * warnings when c's type supports only numbers < N+1 */\r
499 if (!(ch & ~65535))\r
500 block = ((unsigned char*)set)[ch >> 8];\r
501 else\r
502 block = -1;\r
503 set += 64;\r
504 if (block >=0 &&\r
505 (set[block*8 + ((ch & 255)>>5)] & (1 << (ch & 31))))\r
506 return ok;\r
507 set += count*8;\r
508 }\r
509 break;\r
510 }\r
511\r
512 default:\r
513 /* internal error -- there's not much we can do about it\r
514 here, so let's just pretend it didn't match... */\r
515 return 0;\r
516 }\r
517 }\r
518}\r
519\r
520LOCAL(Py_ssize_t) SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern);\r
521\r
522LOCAL(Py_ssize_t)\r
523SRE_COUNT(SRE_STATE* state, SRE_CODE* pattern, Py_ssize_t maxcount)\r
524{\r
525 SRE_CODE chr;\r
526 SRE_CHAR* ptr = (SRE_CHAR *)state->ptr;\r
527 SRE_CHAR* end = (SRE_CHAR *)state->end;\r
528 Py_ssize_t i;\r
529\r
530 /* adjust end */\r
531 if (maxcount < end - ptr && maxcount != 65535)\r
532 end = ptr + maxcount;\r
533\r
534 switch (pattern[0]) {\r
535\r
536 case SRE_OP_IN:\r
537 /* repeated set */\r
538 TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));\r
539 while (ptr < end && SRE_CHARSET(pattern + 2, *ptr))\r
540 ptr++;\r
541 break;\r
542\r
543 case SRE_OP_ANY:\r
544 /* repeated dot wildcard. */\r
545 TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));\r
546 while (ptr < end && !SRE_IS_LINEBREAK(*ptr))\r
547 ptr++;\r
548 break;\r
549\r
550 case SRE_OP_ANY_ALL:\r
551 /* repeated dot wildcard. skip to the end of the target\r
552 string, and backtrack from there */\r
553 TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));\r
554 ptr = end;\r
555 break;\r
556\r
557 case SRE_OP_LITERAL:\r
558 /* repeated literal */\r
559 chr = pattern[1];\r
560 TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));\r
561 while (ptr < end && (SRE_CODE) *ptr == chr)\r
562 ptr++;\r
563 break;\r
564\r
565 case SRE_OP_LITERAL_IGNORE:\r
566 /* repeated literal */\r
567 chr = pattern[1];\r
568 TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));\r
569 while (ptr < end && (SRE_CODE) state->lower(*ptr) == chr)\r
570 ptr++;\r
571 break;\r
572\r
573 case SRE_OP_NOT_LITERAL:\r
574 /* repeated non-literal */\r
575 chr = pattern[1];\r
576 TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));\r
577 while (ptr < end && (SRE_CODE) *ptr != chr)\r
578 ptr++;\r
579 break;\r
580\r
581 case SRE_OP_NOT_LITERAL_IGNORE:\r
582 /* repeated non-literal */\r
583 chr = pattern[1];\r
584 TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));\r
585 while (ptr < end && (SRE_CODE) state->lower(*ptr) != chr)\r
586 ptr++;\r
587 break;\r
588\r
589 default:\r
590 /* repeated single character pattern */\r
591 TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));\r
592 while ((SRE_CHAR*) state->ptr < end) {\r
593 i = SRE_MATCH(state, pattern);\r
594 if (i < 0)\r
595 return i;\r
596 if (!i)\r
597 break;\r
598 }\r
599 TRACE(("|%p|%p|COUNT %d\n", pattern, ptr,\r
600 (SRE_CHAR*) state->ptr - ptr));\r
601 return (SRE_CHAR*) state->ptr - ptr;\r
602 }\r
603\r
604 TRACE(("|%p|%p|COUNT %d\n", pattern, ptr, ptr - (SRE_CHAR*) state->ptr));\r
605 return ptr - (SRE_CHAR*) state->ptr;\r
606}\r
607\r
608#if 0 /* not used in this release */\r
609LOCAL(int)\r
610SRE_INFO(SRE_STATE* state, SRE_CODE* pattern)\r
611{\r
612 /* check if an SRE_OP_INFO block matches at the current position.\r
613 returns the number of SRE_CODE objects to skip if successful, 0\r
614 if no match */\r
615\r
616 SRE_CHAR* end = state->end;\r
617 SRE_CHAR* ptr = state->ptr;\r
618 Py_ssize_t i;\r
619\r
620 /* check minimal length */\r
621 if (pattern[3] && (end - ptr) < pattern[3])\r
622 return 0;\r
623\r
624 /* check known prefix */\r
625 if (pattern[2] & SRE_INFO_PREFIX && pattern[5] > 1) {\r
626 /* <length> <skip> <prefix data> <overlap data> */\r
627 for (i = 0; i < pattern[5]; i++)\r
628 if ((SRE_CODE) ptr[i] != pattern[7 + i])\r
629 return 0;\r
630 return pattern[0] + 2 * pattern[6];\r
631 }\r
632 return pattern[0];\r
633}\r
634#endif\r
635\r
636/* The macros below should be used to protect recursive SRE_MATCH()\r
637 * calls that *failed* and do *not* return immediately (IOW, those\r
638 * that will backtrack). Explaining:\r
639 *\r
640 * - Recursive SRE_MATCH() returned true: that's usually a success\r
641 * (besides atypical cases like ASSERT_NOT), therefore there's no\r
642 * reason to restore lastmark;\r
643 *\r
644 * - Recursive SRE_MATCH() returned false but the current SRE_MATCH()\r
645 * is returning to the caller: If the current SRE_MATCH() is the\r
646 * top function of the recursion, returning false will be a matching\r
647 * failure, and it doesn't matter where lastmark is pointing to.\r
648 * If it's *not* the top function, it will be a recursive SRE_MATCH()\r
649 * failure by itself, and the calling SRE_MATCH() will have to deal\r
650 * with the failure by the same rules explained here (it will restore\r
651 * lastmark by itself if necessary);\r
652 *\r
653 * - Recursive SRE_MATCH() returned false, and will continue the\r
654 * outside 'for' loop: must be protected when breaking, since the next\r
655 * OP could potentially depend on lastmark;\r
656 *\r
657 * - Recursive SRE_MATCH() returned false, and will be called again\r
658 * inside a local for/while loop: must be protected between each\r
659 * loop iteration, since the recursive SRE_MATCH() could do anything,\r
660 * and could potentially depend on lastmark.\r
661 *\r
662 * For more information, check the discussion at SF patch #712900.\r
663 */\r
664#define LASTMARK_SAVE() \\r
665 do { \\r
666 ctx->lastmark = state->lastmark; \\r
667 ctx->lastindex = state->lastindex; \\r
668 } while (0)\r
669#define LASTMARK_RESTORE() \\r
670 do { \\r
671 state->lastmark = ctx->lastmark; \\r
672 state->lastindex = ctx->lastindex; \\r
673 } while (0)\r
674\r
675#define RETURN_ERROR(i) do { return i; } while(0)\r
676#define RETURN_FAILURE do { ret = 0; goto exit; } while(0)\r
677#define RETURN_SUCCESS do { ret = 1; goto exit; } while(0)\r
678\r
679#define RETURN_ON_ERROR(i) \\r
680 do { if (i < 0) RETURN_ERROR(i); } while (0)\r
681#define RETURN_ON_SUCCESS(i) \\r
682 do { RETURN_ON_ERROR(i); if (i > 0) RETURN_SUCCESS; } while (0)\r
683#define RETURN_ON_FAILURE(i) \\r
684 do { RETURN_ON_ERROR(i); if (i == 0) RETURN_FAILURE; } while (0)\r
685\r
686#define SFY(x) #x\r
687\r
688#define DATA_STACK_ALLOC(state, type, ptr) \\r
689do { \\r
690 alloc_pos = state->data_stack_base; \\r
691 TRACE(("allocating %s in %d (%d)\n", \\r
692 SFY(type), alloc_pos, sizeof(type))); \\r
693 if (state->data_stack_size < alloc_pos+sizeof(type)) { \\r
694 int j = data_stack_grow(state, sizeof(type)); \\r
695 if (j < 0) return j; \\r
696 if (ctx_pos != -1) \\r
697 DATA_STACK_LOOKUP_AT(state, SRE_MATCH_CONTEXT, ctx, ctx_pos); \\r
698 } \\r
699 ptr = (type*)(state->data_stack+alloc_pos); \\r
700 state->data_stack_base += sizeof(type); \\r
701} while (0)\r
702\r
703#define DATA_STACK_LOOKUP_AT(state, type, ptr, pos) \\r
704do { \\r
705 TRACE(("looking up %s at %d\n", SFY(type), pos)); \\r
706 ptr = (type*)(state->data_stack+pos); \\r
707} while (0)\r
708\r
709#define DATA_STACK_PUSH(state, data, size) \\r
710do { \\r
711 TRACE(("copy data in %p to %d (%d)\n", \\r
712 data, state->data_stack_base, size)); \\r
713 if (state->data_stack_size < state->data_stack_base+size) { \\r
714 int j = data_stack_grow(state, size); \\r
715 if (j < 0) return j; \\r
716 if (ctx_pos != -1) \\r
717 DATA_STACK_LOOKUP_AT(state, SRE_MATCH_CONTEXT, ctx, ctx_pos); \\r
718 } \\r
719 memcpy(state->data_stack+state->data_stack_base, data, size); \\r
720 state->data_stack_base += size; \\r
721} while (0)\r
722\r
723#define DATA_STACK_POP(state, data, size, discard) \\r
724do { \\r
725 TRACE(("copy data to %p from %d (%d)\n", \\r
726 data, state->data_stack_base-size, size)); \\r
727 memcpy(data, state->data_stack+state->data_stack_base-size, size); \\r
728 if (discard) \\r
729 state->data_stack_base -= size; \\r
730} while (0)\r
731\r
732#define DATA_STACK_POP_DISCARD(state, size) \\r
733do { \\r
734 TRACE(("discard data from %d (%d)\n", \\r
735 state->data_stack_base-size, size)); \\r
736 state->data_stack_base -= size; \\r
737} while(0)\r
738\r
739#define DATA_PUSH(x) \\r
740 DATA_STACK_PUSH(state, (x), sizeof(*(x)))\r
741#define DATA_POP(x) \\r
742 DATA_STACK_POP(state, (x), sizeof(*(x)), 1)\r
743#define DATA_POP_DISCARD(x) \\r
744 DATA_STACK_POP_DISCARD(state, sizeof(*(x)))\r
745#define DATA_ALLOC(t,p) \\r
746 DATA_STACK_ALLOC(state, t, p)\r
747#define DATA_LOOKUP_AT(t,p,pos) \\r
748 DATA_STACK_LOOKUP_AT(state,t,p,pos)\r
749\r
750#define MARK_PUSH(lastmark) \\r
751 do if (lastmark > 0) { \\r
752 i = lastmark; /* ctx->lastmark may change if reallocated */ \\r
753 DATA_STACK_PUSH(state, state->mark, (i+1)*sizeof(void*)); \\r
754 } while (0)\r
755#define MARK_POP(lastmark) \\r
756 do if (lastmark > 0) { \\r
757 DATA_STACK_POP(state, state->mark, (lastmark+1)*sizeof(void*), 1); \\r
758 } while (0)\r
759#define MARK_POP_KEEP(lastmark) \\r
760 do if (lastmark > 0) { \\r
761 DATA_STACK_POP(state, state->mark, (lastmark+1)*sizeof(void*), 0); \\r
762 } while (0)\r
763#define MARK_POP_DISCARD(lastmark) \\r
764 do if (lastmark > 0) { \\r
765 DATA_STACK_POP_DISCARD(state, (lastmark+1)*sizeof(void*)); \\r
766 } while (0)\r
767\r
768#define JUMP_NONE 0\r
769#define JUMP_MAX_UNTIL_1 1\r
770#define JUMP_MAX_UNTIL_2 2\r
771#define JUMP_MAX_UNTIL_3 3\r
772#define JUMP_MIN_UNTIL_1 4\r
773#define JUMP_MIN_UNTIL_2 5\r
774#define JUMP_MIN_UNTIL_3 6\r
775#define JUMP_REPEAT 7\r
776#define JUMP_REPEAT_ONE_1 8\r
777#define JUMP_REPEAT_ONE_2 9\r
778#define JUMP_MIN_REPEAT_ONE 10\r
779#define JUMP_BRANCH 11\r
780#define JUMP_ASSERT 12\r
781#define JUMP_ASSERT_NOT 13\r
782\r
783#define DO_JUMP(jumpvalue, jumplabel, nextpattern) \\r
784 DATA_ALLOC(SRE_MATCH_CONTEXT, nextctx); \\r
785 nextctx->last_ctx_pos = ctx_pos; \\r
786 nextctx->jump = jumpvalue; \\r
787 nextctx->pattern = nextpattern; \\r
788 ctx_pos = alloc_pos; \\r
789 ctx = nextctx; \\r
790 goto entrance; \\r
791 jumplabel: \\r
792 while (0) /* gcc doesn't like labels at end of scopes */ \\r
793\r
794typedef struct {\r
795 Py_ssize_t last_ctx_pos;\r
796 Py_ssize_t jump;\r
797 SRE_CHAR* ptr;\r
798 SRE_CODE* pattern;\r
799 Py_ssize_t count;\r
800 Py_ssize_t lastmark;\r
801 Py_ssize_t lastindex;\r
802 union {\r
803 SRE_CODE chr;\r
804 SRE_REPEAT* rep;\r
805 } u;\r
806} SRE_MATCH_CONTEXT;\r
807\r
808/* check if string matches the given pattern. returns <0 for\r
809 error, 0 for failure, and 1 for success */\r
810LOCAL(Py_ssize_t)\r
811SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern)\r
812{\r
813 SRE_CHAR* end = (SRE_CHAR *)state->end;\r
814 Py_ssize_t alloc_pos, ctx_pos = -1;\r
815 Py_ssize_t i, ret = 0;\r
816 Py_ssize_t jump;\r
817 unsigned int sigcount=0;\r
818\r
819 SRE_MATCH_CONTEXT* ctx;\r
820 SRE_MATCH_CONTEXT* nextctx;\r
821\r
822 TRACE(("|%p|%p|ENTER\n", pattern, state->ptr));\r
823\r
824 DATA_ALLOC(SRE_MATCH_CONTEXT, ctx);\r
825 ctx->last_ctx_pos = -1;\r
826 ctx->jump = JUMP_NONE;\r
827 ctx->pattern = pattern;\r
828 ctx_pos = alloc_pos;\r
829\r
830entrance:\r
831\r
832 ctx->ptr = (SRE_CHAR *)state->ptr;\r
833\r
834 if (ctx->pattern[0] == SRE_OP_INFO) {\r
835 /* optimization info block */\r
836 /* <INFO> <1=skip> <2=flags> <3=min> ... */\r
837 if (ctx->pattern[3] && (end - ctx->ptr) < ctx->pattern[3]) {\r
838 TRACE(("reject (got %d chars, need %d)\n",\r
839 (end - ctx->ptr), ctx->pattern[3]));\r
840 RETURN_FAILURE;\r
841 }\r
842 ctx->pattern += ctx->pattern[1] + 1;\r
843 }\r
844\r
845 for (;;) {\r
846 ++sigcount;\r
847 if ((0 == (sigcount & 0xfff)) && PyErr_CheckSignals())\r
848 RETURN_ERROR(SRE_ERROR_INTERRUPTED);\r
849\r
850 switch (*ctx->pattern++) {\r
851\r
852 case SRE_OP_MARK:\r
853 /* set mark */\r
854 /* <MARK> <gid> */\r
855 TRACE(("|%p|%p|MARK %d\n", ctx->pattern,\r
856 ctx->ptr, ctx->pattern[0]));\r
857 i = ctx->pattern[0];\r
858 if (i & 1)\r
859 state->lastindex = i/2 + 1;\r
860 if (i > state->lastmark) {\r
861 /* state->lastmark is the highest valid index in the\r
862 state->mark array. If it is increased by more than 1,\r
863 the intervening marks must be set to NULL to signal\r
864 that these marks have not been encountered. */\r
865 Py_ssize_t j = state->lastmark + 1;\r
866 while (j < i)\r
867 state->mark[j++] = NULL;\r
868 state->lastmark = i;\r
869 }\r
870 state->mark[i] = ctx->ptr;\r
871 ctx->pattern++;\r
872 break;\r
873\r
874 case SRE_OP_LITERAL:\r
875 /* match literal string */\r
876 /* <LITERAL> <code> */\r
877 TRACE(("|%p|%p|LITERAL %d\n", ctx->pattern,\r
878 ctx->ptr, *ctx->pattern));\r
879 if (ctx->ptr >= end || (SRE_CODE) ctx->ptr[0] != ctx->pattern[0])\r
880 RETURN_FAILURE;\r
881 ctx->pattern++;\r
882 ctx->ptr++;\r
883 break;\r
884\r
885 case SRE_OP_NOT_LITERAL:\r
886 /* match anything that is not literal character */\r
887 /* <NOT_LITERAL> <code> */\r
888 TRACE(("|%p|%p|NOT_LITERAL %d\n", ctx->pattern,\r
889 ctx->ptr, *ctx->pattern));\r
890 if (ctx->ptr >= end || (SRE_CODE) ctx->ptr[0] == ctx->pattern[0])\r
891 RETURN_FAILURE;\r
892 ctx->pattern++;\r
893 ctx->ptr++;\r
894 break;\r
895\r
896 case SRE_OP_SUCCESS:\r
897 /* end of pattern */\r
898 TRACE(("|%p|%p|SUCCESS\n", ctx->pattern, ctx->ptr));\r
899 state->ptr = ctx->ptr;\r
900 RETURN_SUCCESS;\r
901\r
902 case SRE_OP_AT:\r
903 /* match at given position */\r
904 /* <AT> <code> */\r
905 TRACE(("|%p|%p|AT %d\n", ctx->pattern, ctx->ptr, *ctx->pattern));\r
906 if (!SRE_AT(state, ctx->ptr, *ctx->pattern))\r
907 RETURN_FAILURE;\r
908 ctx->pattern++;\r
909 break;\r
910\r
911 case SRE_OP_CATEGORY:\r
912 /* match at given category */\r
913 /* <CATEGORY> <code> */\r
914 TRACE(("|%p|%p|CATEGORY %d\n", ctx->pattern,\r
915 ctx->ptr, *ctx->pattern));\r
916 if (ctx->ptr >= end || !sre_category(ctx->pattern[0], ctx->ptr[0]))\r
917 RETURN_FAILURE;\r
918 ctx->pattern++;\r
919 ctx->ptr++;\r
920 break;\r
921\r
922 case SRE_OP_ANY:\r
923 /* match anything (except a newline) */\r
924 /* <ANY> */\r
925 TRACE(("|%p|%p|ANY\n", ctx->pattern, ctx->ptr));\r
926 if (ctx->ptr >= end || SRE_IS_LINEBREAK(ctx->ptr[0]))\r
927 RETURN_FAILURE;\r
928 ctx->ptr++;\r
929 break;\r
930\r
931 case SRE_OP_ANY_ALL:\r
932 /* match anything */\r
933 /* <ANY_ALL> */\r
934 TRACE(("|%p|%p|ANY_ALL\n", ctx->pattern, ctx->ptr));\r
935 if (ctx->ptr >= end)\r
936 RETURN_FAILURE;\r
937 ctx->ptr++;\r
938 break;\r
939\r
940 case SRE_OP_IN:\r
941 /* match set member (or non_member) */\r
942 /* <IN> <skip> <set> */\r
943 TRACE(("|%p|%p|IN\n", ctx->pattern, ctx->ptr));\r
944 if (ctx->ptr >= end || !SRE_CHARSET(ctx->pattern + 1, *ctx->ptr))\r
945 RETURN_FAILURE;\r
946 ctx->pattern += ctx->pattern[0];\r
947 ctx->ptr++;\r
948 break;\r
949\r
950 case SRE_OP_LITERAL_IGNORE:\r
951 TRACE(("|%p|%p|LITERAL_IGNORE %d\n",\r
952 ctx->pattern, ctx->ptr, ctx->pattern[0]));\r
953 if (ctx->ptr >= end ||\r
954 state->lower(*ctx->ptr) != state->lower(*ctx->pattern))\r
955 RETURN_FAILURE;\r
956 ctx->pattern++;\r
957 ctx->ptr++;\r
958 break;\r
959\r
960 case SRE_OP_NOT_LITERAL_IGNORE:\r
961 TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n",\r
962 ctx->pattern, ctx->ptr, *ctx->pattern));\r
963 if (ctx->ptr >= end ||\r
964 state->lower(*ctx->ptr) == state->lower(*ctx->pattern))\r
965 RETURN_FAILURE;\r
966 ctx->pattern++;\r
967 ctx->ptr++;\r
968 break;\r
969\r
970 case SRE_OP_IN_IGNORE:\r
971 TRACE(("|%p|%p|IN_IGNORE\n", ctx->pattern, ctx->ptr));\r
972 if (ctx->ptr >= end\r
973 || !SRE_CHARSET(ctx->pattern+1,\r
974 (SRE_CODE)state->lower(*ctx->ptr)))\r
975 RETURN_FAILURE;\r
976 ctx->pattern += ctx->pattern[0];\r
977 ctx->ptr++;\r
978 break;\r
979\r
980 case SRE_OP_JUMP:\r
981 case SRE_OP_INFO:\r
982 /* jump forward */\r
983 /* <JUMP> <offset> */\r
984 TRACE(("|%p|%p|JUMP %d\n", ctx->pattern,\r
985 ctx->ptr, ctx->pattern[0]));\r
986 ctx->pattern += ctx->pattern[0];\r
987 break;\r
988\r
989 case SRE_OP_BRANCH:\r
990 /* alternation */\r
991 /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */\r
992 TRACE(("|%p|%p|BRANCH\n", ctx->pattern, ctx->ptr));\r
993 LASTMARK_SAVE();\r
994 ctx->u.rep = state->repeat;\r
995 if (ctx->u.rep)\r
996 MARK_PUSH(ctx->lastmark);\r
997 for (; ctx->pattern[0]; ctx->pattern += ctx->pattern[0]) {\r
998 if (ctx->pattern[1] == SRE_OP_LITERAL &&\r
999 (ctx->ptr >= end ||\r
1000 (SRE_CODE) *ctx->ptr != ctx->pattern[2]))\r
1001 continue;\r
1002 if (ctx->pattern[1] == SRE_OP_IN &&\r
1003 (ctx->ptr >= end ||\r
1004 !SRE_CHARSET(ctx->pattern + 3, (SRE_CODE) *ctx->ptr)))\r
1005 continue;\r
1006 state->ptr = ctx->ptr;\r
1007 DO_JUMP(JUMP_BRANCH, jump_branch, ctx->pattern+1);\r
1008 if (ret) {\r
1009 if (ctx->u.rep)\r
1010 MARK_POP_DISCARD(ctx->lastmark);\r
1011 RETURN_ON_ERROR(ret);\r
1012 RETURN_SUCCESS;\r
1013 }\r
1014 if (ctx->u.rep)\r
1015 MARK_POP_KEEP(ctx->lastmark);\r
1016 LASTMARK_RESTORE();\r
1017 }\r
1018 if (ctx->u.rep)\r
1019 MARK_POP_DISCARD(ctx->lastmark);\r
1020 RETURN_FAILURE;\r
1021\r
1022 case SRE_OP_REPEAT_ONE:\r
1023 /* match repeated sequence (maximizing regexp) */\r
1024\r
1025 /* this operator only works if the repeated item is\r
1026 exactly one character wide, and we're not already\r
1027 collecting backtracking points. for other cases,\r
1028 use the MAX_REPEAT operator */\r
1029\r
1030 /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */\r
1031\r
1032 TRACE(("|%p|%p|REPEAT_ONE %d %d\n", ctx->pattern, ctx->ptr,\r
1033 ctx->pattern[1], ctx->pattern[2]));\r
1034\r
1035 if (ctx->ptr + ctx->pattern[1] > end)\r
1036 RETURN_FAILURE; /* cannot match */\r
1037\r
1038 state->ptr = ctx->ptr;\r
1039\r
1040 ret = SRE_COUNT(state, ctx->pattern+3, ctx->pattern[2]);\r
1041 RETURN_ON_ERROR(ret);\r
1042 DATA_LOOKUP_AT(SRE_MATCH_CONTEXT, ctx, ctx_pos);\r
1043 ctx->count = ret;\r
1044 ctx->ptr += ctx->count;\r
1045\r
1046 /* when we arrive here, count contains the number of\r
1047 matches, and ctx->ptr points to the tail of the target\r
1048 string. check if the rest of the pattern matches,\r
1049 and backtrack if not. */\r
1050\r
1051 if (ctx->count < (Py_ssize_t) ctx->pattern[1])\r
1052 RETURN_FAILURE;\r
1053\r
1054 if (ctx->pattern[ctx->pattern[0]] == SRE_OP_SUCCESS) {\r
1055 /* tail is empty. we're finished */\r
1056 state->ptr = ctx->ptr;\r
1057 RETURN_SUCCESS;\r
1058 }\r
1059\r
1060 LASTMARK_SAVE();\r
1061\r
1062 if (ctx->pattern[ctx->pattern[0]] == SRE_OP_LITERAL) {\r
1063 /* tail starts with a literal. skip positions where\r
1064 the rest of the pattern cannot possibly match */\r
1065 ctx->u.chr = ctx->pattern[ctx->pattern[0]+1];\r
1066 for (;;) {\r
1067 while (ctx->count >= (Py_ssize_t) ctx->pattern[1] &&\r
1068 (ctx->ptr >= end || *ctx->ptr != ctx->u.chr)) {\r
1069 ctx->ptr--;\r
1070 ctx->count--;\r
1071 }\r
1072 if (ctx->count < (Py_ssize_t) ctx->pattern[1])\r
1073 break;\r
1074 state->ptr = ctx->ptr;\r
1075 DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1,\r
1076 ctx->pattern+ctx->pattern[0]);\r
1077 if (ret) {\r
1078 RETURN_ON_ERROR(ret);\r
1079 RETURN_SUCCESS;\r
1080 }\r
1081\r
1082 LASTMARK_RESTORE();\r
1083\r
1084 ctx->ptr--;\r
1085 ctx->count--;\r
1086 }\r
1087\r
1088 } else {\r
1089 /* general case */\r
1090 while (ctx->count >= (Py_ssize_t) ctx->pattern[1]) {\r
1091 state->ptr = ctx->ptr;\r
1092 DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2,\r
1093 ctx->pattern+ctx->pattern[0]);\r
1094 if (ret) {\r
1095 RETURN_ON_ERROR(ret);\r
1096 RETURN_SUCCESS;\r
1097 }\r
1098 ctx->ptr--;\r
1099 ctx->count--;\r
1100 LASTMARK_RESTORE();\r
1101 }\r
1102 }\r
1103 RETURN_FAILURE;\r
1104\r
1105 case SRE_OP_MIN_REPEAT_ONE:\r
1106 /* match repeated sequence (minimizing regexp) */\r
1107\r
1108 /* this operator only works if the repeated item is\r
1109 exactly one character wide, and we're not already\r
1110 collecting backtracking points. for other cases,\r
1111 use the MIN_REPEAT operator */\r
1112\r
1113 /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */\r
1114\r
1115 TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", ctx->pattern, ctx->ptr,\r
1116 ctx->pattern[1], ctx->pattern[2]));\r
1117\r
1118 if (ctx->ptr + ctx->pattern[1] > end)\r
1119 RETURN_FAILURE; /* cannot match */\r
1120\r
1121 state->ptr = ctx->ptr;\r
1122\r
1123 if (ctx->pattern[1] == 0)\r
1124 ctx->count = 0;\r
1125 else {\r
1126 /* count using pattern min as the maximum */\r
1127 ret = SRE_COUNT(state, ctx->pattern+3, ctx->pattern[1]);\r
1128 RETURN_ON_ERROR(ret);\r
1129 DATA_LOOKUP_AT(SRE_MATCH_CONTEXT, ctx, ctx_pos);\r
1130 if (ret < (Py_ssize_t) ctx->pattern[1])\r
1131 /* didn't match minimum number of times */\r
1132 RETURN_FAILURE;\r
1133 /* advance past minimum matches of repeat */\r
1134 ctx->count = ret;\r
1135 ctx->ptr += ctx->count;\r
1136 }\r
1137\r
1138 if (ctx->pattern[ctx->pattern[0]] == SRE_OP_SUCCESS) {\r
1139 /* tail is empty. we're finished */\r
1140 state->ptr = ctx->ptr;\r
1141 RETURN_SUCCESS;\r
1142\r
1143 } else {\r
1144 /* general case */\r
1145 LASTMARK_SAVE();\r
1146 while ((Py_ssize_t)ctx->pattern[2] == 65535\r
1147 || ctx->count <= (Py_ssize_t)ctx->pattern[2]) {\r
1148 state->ptr = ctx->ptr;\r
1149 DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,\r
1150 ctx->pattern+ctx->pattern[0]);\r
1151 if (ret) {\r
1152 RETURN_ON_ERROR(ret);\r
1153 RETURN_SUCCESS;\r
1154 }\r
1155 state->ptr = ctx->ptr;\r
1156 ret = SRE_COUNT(state, ctx->pattern+3, 1);\r
1157 RETURN_ON_ERROR(ret);\r
1158 DATA_LOOKUP_AT(SRE_MATCH_CONTEXT, ctx, ctx_pos);\r
1159 if (ret == 0)\r
1160 break;\r
1161 assert(ret == 1);\r
1162 ctx->ptr++;\r
1163 ctx->count++;\r
1164 LASTMARK_RESTORE();\r
1165 }\r
1166 }\r
1167 RETURN_FAILURE;\r
1168\r
1169 case SRE_OP_REPEAT:\r
1170 /* create repeat context. all the hard work is done\r
1171 by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */\r
1172 /* <REPEAT> <skip> <1=min> <2=max> item <UNTIL> tail */\r
1173 TRACE(("|%p|%p|REPEAT %d %d\n", ctx->pattern, ctx->ptr,\r
1174 ctx->pattern[1], ctx->pattern[2]));\r
1175\r
1176 /* install new repeat context */\r
1177 ctx->u.rep = (SRE_REPEAT*) PyObject_MALLOC(sizeof(*ctx->u.rep));\r
1178 if (!ctx->u.rep) {\r
1179 PyErr_NoMemory();\r
1180 RETURN_FAILURE;\r
1181 }\r
1182 ctx->u.rep->count = -1;\r
1183 ctx->u.rep->pattern = ctx->pattern;\r
1184 ctx->u.rep->prev = state->repeat;\r
1185 ctx->u.rep->last_ptr = NULL;\r
1186 state->repeat = ctx->u.rep;\r
1187\r
1188 state->ptr = ctx->ptr;\r
1189 DO_JUMP(JUMP_REPEAT, jump_repeat, ctx->pattern+ctx->pattern[0]);\r
1190 state->repeat = ctx->u.rep->prev;\r
1191 PyObject_FREE(ctx->u.rep);\r
1192\r
1193 if (ret) {\r
1194 RETURN_ON_ERROR(ret);\r
1195 RETURN_SUCCESS;\r
1196 }\r
1197 RETURN_FAILURE;\r
1198\r
1199 case SRE_OP_MAX_UNTIL:\r
1200 /* maximizing repeat */\r
1201 /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */\r
1202\r
1203 /* FIXME: we probably need to deal with zero-width\r
1204 matches in here... */\r
1205\r
1206 ctx->u.rep = state->repeat;\r
1207 if (!ctx->u.rep)\r
1208 RETURN_ERROR(SRE_ERROR_STATE);\r
1209\r
1210 state->ptr = ctx->ptr;\r
1211\r
1212 ctx->count = ctx->u.rep->count+1;\r
1213\r
1214 TRACE(("|%p|%p|MAX_UNTIL %d\n", ctx->pattern,\r
1215 ctx->ptr, ctx->count));\r
1216\r
1217 if (ctx->count < ctx->u.rep->pattern[1]) {\r
1218 /* not enough matches */\r
1219 ctx->u.rep->count = ctx->count;\r
1220 DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1,\r
1221 ctx->u.rep->pattern+3);\r
1222 if (ret) {\r
1223 RETURN_ON_ERROR(ret);\r
1224 RETURN_SUCCESS;\r
1225 }\r
1226 ctx->u.rep->count = ctx->count-1;\r
1227 state->ptr = ctx->ptr;\r
1228 RETURN_FAILURE;\r
1229 }\r
1230\r
1231 if ((ctx->count < ctx->u.rep->pattern[2] ||\r
1232 ctx->u.rep->pattern[2] == 65535) &&\r
1233 state->ptr != ctx->u.rep->last_ptr) {\r
1234 /* we may have enough matches, but if we can\r
1235 match another item, do so */\r
1236 ctx->u.rep->count = ctx->count;\r
1237 LASTMARK_SAVE();\r
1238 MARK_PUSH(ctx->lastmark);\r
1239 /* zero-width match protection */\r
1240 DATA_PUSH(&ctx->u.rep->last_ptr);\r
1241 ctx->u.rep->last_ptr = state->ptr;\r
1242 DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2,\r
1243 ctx->u.rep->pattern+3);\r
1244 DATA_POP(&ctx->u.rep->last_ptr);\r
1245 if (ret) {\r
1246 MARK_POP_DISCARD(ctx->lastmark);\r
1247 RETURN_ON_ERROR(ret);\r
1248 RETURN_SUCCESS;\r
1249 }\r
1250 MARK_POP(ctx->lastmark);\r
1251 LASTMARK_RESTORE();\r
1252 ctx->u.rep->count = ctx->count-1;\r
1253 state->ptr = ctx->ptr;\r
1254 }\r
1255\r
1256 /* cannot match more repeated items here. make sure the\r
1257 tail matches */\r
1258 state->repeat = ctx->u.rep->prev;\r
1259 DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, ctx->pattern);\r
1260 RETURN_ON_SUCCESS(ret);\r
1261 state->repeat = ctx->u.rep;\r
1262 state->ptr = ctx->ptr;\r
1263 RETURN_FAILURE;\r
1264\r
1265 case SRE_OP_MIN_UNTIL:\r
1266 /* minimizing repeat */\r
1267 /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */\r
1268\r
1269 ctx->u.rep = state->repeat;\r
1270 if (!ctx->u.rep)\r
1271 RETURN_ERROR(SRE_ERROR_STATE);\r
1272\r
1273 state->ptr = ctx->ptr;\r
1274\r
1275 ctx->count = ctx->u.rep->count+1;\r
1276\r
1277 TRACE(("|%p|%p|MIN_UNTIL %d %p\n", ctx->pattern,\r
1278 ctx->ptr, ctx->count, ctx->u.rep->pattern));\r
1279\r
1280 if (ctx->count < ctx->u.rep->pattern[1]) {\r
1281 /* not enough matches */\r
1282 ctx->u.rep->count = ctx->count;\r
1283 DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1,\r
1284 ctx->u.rep->pattern+3);\r
1285 if (ret) {\r
1286 RETURN_ON_ERROR(ret);\r
1287 RETURN_SUCCESS;\r
1288 }\r
1289 ctx->u.rep->count = ctx->count-1;\r
1290 state->ptr = ctx->ptr;\r
1291 RETURN_FAILURE;\r
1292 }\r
1293\r
1294 LASTMARK_SAVE();\r
1295\r
1296 /* see if the tail matches */\r
1297 state->repeat = ctx->u.rep->prev;\r
1298 DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, ctx->pattern);\r
1299 if (ret) {\r
1300 RETURN_ON_ERROR(ret);\r
1301 RETURN_SUCCESS;\r
1302 }\r
1303\r
1304 state->repeat = ctx->u.rep;\r
1305 state->ptr = ctx->ptr;\r
1306\r
1307 LASTMARK_RESTORE();\r
1308\r
1309 if (ctx->count >= ctx->u.rep->pattern[2]\r
1310 && ctx->u.rep->pattern[2] != 65535)\r
1311 RETURN_FAILURE;\r
1312\r
1313 ctx->u.rep->count = ctx->count;\r
1314 DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3,\r
1315 ctx->u.rep->pattern+3);\r
1316 if (ret) {\r
1317 RETURN_ON_ERROR(ret);\r
1318 RETURN_SUCCESS;\r
1319 }\r
1320 ctx->u.rep->count = ctx->count-1;\r
1321 state->ptr = ctx->ptr;\r
1322 RETURN_FAILURE;\r
1323\r
1324 case SRE_OP_GROUPREF:\r
1325 /* match backreference */\r
1326 TRACE(("|%p|%p|GROUPREF %d\n", ctx->pattern,\r
1327 ctx->ptr, ctx->pattern[0]));\r
1328 i = ctx->pattern[0];\r
1329 {\r
1330 Py_ssize_t groupref = i+i;\r
1331 if (groupref >= state->lastmark) {\r
1332 RETURN_FAILURE;\r
1333 } else {\r
1334 SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];\r
1335 SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];\r
1336 if (!p || !e || e < p)\r
1337 RETURN_FAILURE;\r
1338 while (p < e) {\r
1339 if (ctx->ptr >= end || *ctx->ptr != *p)\r
1340 RETURN_FAILURE;\r
1341 p++; ctx->ptr++;\r
1342 }\r
1343 }\r
1344 }\r
1345 ctx->pattern++;\r
1346 break;\r
1347\r
1348 case SRE_OP_GROUPREF_IGNORE:\r
1349 /* match backreference */\r
1350 TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", ctx->pattern,\r
1351 ctx->ptr, ctx->pattern[0]));\r
1352 i = ctx->pattern[0];\r
1353 {\r
1354 Py_ssize_t groupref = i+i;\r
1355 if (groupref >= state->lastmark) {\r
1356 RETURN_FAILURE;\r
1357 } else {\r
1358 SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];\r
1359 SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];\r
1360 if (!p || !e || e < p)\r
1361 RETURN_FAILURE;\r
1362 while (p < e) {\r
1363 if (ctx->ptr >= end ||\r
1364 state->lower(*ctx->ptr) != state->lower(*p))\r
1365 RETURN_FAILURE;\r
1366 p++; ctx->ptr++;\r
1367 }\r
1368 }\r
1369 }\r
1370 ctx->pattern++;\r
1371 break;\r
1372\r
1373 case SRE_OP_GROUPREF_EXISTS:\r
1374 TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", ctx->pattern,\r
1375 ctx->ptr, ctx->pattern[0]));\r
1376 /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */\r
1377 i = ctx->pattern[0];\r
1378 {\r
1379 Py_ssize_t groupref = i+i;\r
1380 if (groupref >= state->lastmark) {\r
1381 ctx->pattern += ctx->pattern[1];\r
1382 break;\r
1383 } else {\r
1384 SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];\r
1385 SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];\r
1386 if (!p || !e || e < p) {\r
1387 ctx->pattern += ctx->pattern[1];\r
1388 break;\r
1389 }\r
1390 }\r
1391 }\r
1392 ctx->pattern += 2;\r
1393 break;\r
1394\r
1395 case SRE_OP_ASSERT:\r
1396 /* assert subpattern */\r
1397 /* <ASSERT> <skip> <back> <pattern> */\r
1398 TRACE(("|%p|%p|ASSERT %d\n", ctx->pattern,\r
1399 ctx->ptr, ctx->pattern[1]));\r
1400 state->ptr = ctx->ptr - ctx->pattern[1];\r
1401 if (state->ptr < state->beginning)\r
1402 RETURN_FAILURE;\r
1403 DO_JUMP(JUMP_ASSERT, jump_assert, ctx->pattern+2);\r
1404 RETURN_ON_FAILURE(ret);\r
1405 ctx->pattern += ctx->pattern[0];\r
1406 break;\r
1407\r
1408 case SRE_OP_ASSERT_NOT:\r
1409 /* assert not subpattern */\r
1410 /* <ASSERT_NOT> <skip> <back> <pattern> */\r
1411 TRACE(("|%p|%p|ASSERT_NOT %d\n", ctx->pattern,\r
1412 ctx->ptr, ctx->pattern[1]));\r
1413 state->ptr = ctx->ptr - ctx->pattern[1];\r
1414 if (state->ptr >= state->beginning) {\r
1415 DO_JUMP(JUMP_ASSERT_NOT, jump_assert_not, ctx->pattern+2);\r
1416 if (ret) {\r
1417 RETURN_ON_ERROR(ret);\r
1418 RETURN_FAILURE;\r
1419 }\r
1420 }\r
1421 ctx->pattern += ctx->pattern[0];\r
1422 break;\r
1423\r
1424 case SRE_OP_FAILURE:\r
1425 /* immediate failure */\r
1426 TRACE(("|%p|%p|FAILURE\n", ctx->pattern, ctx->ptr));\r
1427 RETURN_FAILURE;\r
1428\r
1429 default:\r
1430 TRACE(("|%p|%p|UNKNOWN %d\n", ctx->pattern, ctx->ptr,\r
1431 ctx->pattern[-1]));\r
1432 RETURN_ERROR(SRE_ERROR_ILLEGAL);\r
1433 }\r
1434 }\r
1435\r
1436exit:\r
1437 ctx_pos = ctx->last_ctx_pos;\r
1438 jump = ctx->jump;\r
1439 DATA_POP_DISCARD(ctx);\r
1440 if (ctx_pos == -1)\r
1441 return ret;\r
1442 DATA_LOOKUP_AT(SRE_MATCH_CONTEXT, ctx, ctx_pos);\r
1443\r
1444 switch (jump) {\r
1445 case JUMP_MAX_UNTIL_2:\r
1446 TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", ctx->pattern, ctx->ptr));\r
1447 goto jump_max_until_2;\r
1448 case JUMP_MAX_UNTIL_3:\r
1449 TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", ctx->pattern, ctx->ptr));\r
1450 goto jump_max_until_3;\r
1451 case JUMP_MIN_UNTIL_2:\r
1452 TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", ctx->pattern, ctx->ptr));\r
1453 goto jump_min_until_2;\r
1454 case JUMP_MIN_UNTIL_3:\r
1455 TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", ctx->pattern, ctx->ptr));\r
1456 goto jump_min_until_3;\r
1457 case JUMP_BRANCH:\r
1458 TRACE(("|%p|%p|JUMP_BRANCH\n", ctx->pattern, ctx->ptr));\r
1459 goto jump_branch;\r
1460 case JUMP_MAX_UNTIL_1:\r
1461 TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", ctx->pattern, ctx->ptr));\r
1462 goto jump_max_until_1;\r
1463 case JUMP_MIN_UNTIL_1:\r
1464 TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", ctx->pattern, ctx->ptr));\r
1465 goto jump_min_until_1;\r
1466 case JUMP_REPEAT:\r
1467 TRACE(("|%p|%p|JUMP_REPEAT\n", ctx->pattern, ctx->ptr));\r
1468 goto jump_repeat;\r
1469 case JUMP_REPEAT_ONE_1:\r
1470 TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", ctx->pattern, ctx->ptr));\r
1471 goto jump_repeat_one_1;\r
1472 case JUMP_REPEAT_ONE_2:\r
1473 TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", ctx->pattern, ctx->ptr));\r
1474 goto jump_repeat_one_2;\r
1475 case JUMP_MIN_REPEAT_ONE:\r
1476 TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", ctx->pattern, ctx->ptr));\r
1477 goto jump_min_repeat_one;\r
1478 case JUMP_ASSERT:\r
1479 TRACE(("|%p|%p|JUMP_ASSERT\n", ctx->pattern, ctx->ptr));\r
1480 goto jump_assert;\r
1481 case JUMP_ASSERT_NOT:\r
1482 TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", ctx->pattern, ctx->ptr));\r
1483 goto jump_assert_not;\r
1484 case JUMP_NONE:\r
1485 TRACE(("|%p|%p|RETURN %d\n", ctx->pattern, ctx->ptr, ret));\r
1486 break;\r
1487 }\r
1488\r
1489 return ret; /* should never get here */\r
1490}\r
1491\r
1492LOCAL(Py_ssize_t)\r
1493SRE_SEARCH(SRE_STATE* state, SRE_CODE* pattern)\r
1494{\r
1495 SRE_CHAR* ptr = (SRE_CHAR *)state->start;\r
1496 SRE_CHAR* end = (SRE_CHAR *)state->end;\r
1497 Py_ssize_t status = 0;\r
1498 Py_ssize_t prefix_len = 0;\r
1499 Py_ssize_t prefix_skip = 0;\r
1500 SRE_CODE* prefix = NULL;\r
1501 SRE_CODE* charset = NULL;\r
1502 SRE_CODE* overlap = NULL;\r
1503 int flags = 0;\r
1504\r
1505 if (pattern[0] == SRE_OP_INFO) {\r
1506 /* optimization info block */\r
1507 /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info> */\r
1508\r
1509 flags = pattern[2];\r
1510\r
1511 if (pattern[3] > 1) {\r
1512 /* adjust end point (but make sure we leave at least one\r
1513 character in there, so literal search will work) */\r
1514 end -= pattern[3]-1;\r
1515 if (end <= ptr)\r
1516 end = ptr+1;\r
1517 }\r
1518\r
1519 if (flags & SRE_INFO_PREFIX) {\r
1520 /* pattern starts with a known prefix */\r
1521 /* <length> <skip> <prefix data> <overlap data> */\r
1522 prefix_len = pattern[5];\r
1523 prefix_skip = pattern[6];\r
1524 prefix = pattern + 7;\r
1525 overlap = prefix + prefix_len - 1;\r
1526 } else if (flags & SRE_INFO_CHARSET)\r
1527 /* pattern starts with a character from a known set */\r
1528 /* <charset> */\r
1529 charset = pattern + 5;\r
1530\r
1531 pattern += 1 + pattern[1];\r
1532 }\r
1533\r
1534 TRACE(("prefix = %p %d %d\n", prefix, prefix_len, prefix_skip));\r
1535 TRACE(("charset = %p\n", charset));\r
1536\r
1537#if defined(USE_FAST_SEARCH)\r
1538 if (prefix_len > 1) {\r
1539 /* pattern starts with a known prefix. use the overlap\r
1540 table to skip forward as fast as we possibly can */\r
1541 Py_ssize_t i = 0;\r
1542 end = (SRE_CHAR *)state->end;\r
1543 while (ptr < end) {\r
1544 for (;;) {\r
1545 if ((SRE_CODE) ptr[0] != prefix[i]) {\r
1546 if (!i)\r
1547 break;\r
1548 else\r
1549 i = overlap[i];\r
1550 } else {\r
1551 if (++i == prefix_len) {\r
1552 /* found a potential match */\r
1553 TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));\r
1554 state->start = ptr + 1 - prefix_len;\r
1555 state->ptr = ptr + 1 - prefix_len + prefix_skip;\r
1556 if (flags & SRE_INFO_LITERAL)\r
1557 return 1; /* we got all of it */\r
1558 status = SRE_MATCH(state, pattern + 2*prefix_skip);\r
1559 if (status != 0)\r
1560 return status;\r
1561 /* close but no cigar -- try again */\r
1562 i = overlap[i];\r
1563 }\r
1564 break;\r
1565 }\r
1566 }\r
1567 ptr++;\r
1568 }\r
1569 return 0;\r
1570 }\r
1571#endif\r
1572\r
1573 if (pattern[0] == SRE_OP_LITERAL) {\r
1574 /* pattern starts with a literal character. this is used\r
1575 for short prefixes, and if fast search is disabled */\r
1576 SRE_CODE chr = pattern[1];\r
1577 end = (SRE_CHAR *)state->end;\r
1578 for (;;) {\r
1579 while (ptr < end && (SRE_CODE) ptr[0] != chr)\r
1580 ptr++;\r
1581 if (ptr >= end)\r
1582 return 0;\r
1583 TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));\r
1584 state->start = ptr;\r
1585 state->ptr = ++ptr;\r
1586 if (flags & SRE_INFO_LITERAL)\r
1587 return 1; /* we got all of it */\r
1588 status = SRE_MATCH(state, pattern + 2);\r
1589 if (status != 0)\r
1590 break;\r
1591 }\r
1592 } else if (charset) {\r
1593 /* pattern starts with a character from a known set */\r
1594 end = (SRE_CHAR *)state->end;\r
1595 for (;;) {\r
1596 while (ptr < end && !SRE_CHARSET(charset, ptr[0]))\r
1597 ptr++;\r
1598 if (ptr >= end)\r
1599 return 0;\r
1600 TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));\r
1601 state->start = ptr;\r
1602 state->ptr = ptr;\r
1603 status = SRE_MATCH(state, pattern);\r
1604 if (status != 0)\r
1605 break;\r
1606 ptr++;\r
1607 }\r
1608 } else\r
1609 /* general case */\r
1610 while (ptr <= end) {\r
1611 TRACE(("|%p|%p|SEARCH\n", pattern, ptr));\r
1612 state->start = state->ptr = ptr++;\r
1613 status = SRE_MATCH(state, pattern);\r
1614 if (status != 0)\r
1615 break;\r
1616 }\r
1617\r
1618 return status;\r
1619}\r
1620\r
1621LOCAL(int)\r
1622SRE_LITERAL_TEMPLATE(SRE_CHAR* ptr, Py_ssize_t len)\r
1623{\r
1624 /* check if given string is a literal template (i.e. no escapes) */\r
1625 while (len-- > 0)\r
1626 if (*ptr++ == '\\')\r
1627 return 0;\r
1628 return 1;\r
1629}\r
1630\r
1631#if !defined(SRE_RECURSIVE)\r
1632\r
1633/* -------------------------------------------------------------------- */\r
1634/* factories and destructors */\r
1635\r
1636/* see sre.h for object declarations */\r
1637static PyObject*pattern_new_match(PatternObject*, SRE_STATE*, int);\r
1638static PyObject*pattern_scanner(PatternObject*, PyObject*);\r
1639\r
1640static PyObject *\r
1641sre_codesize(PyObject* self, PyObject *unused)\r
1642{\r
1643 return Py_BuildValue("l", sizeof(SRE_CODE));\r
1644}\r
1645\r
1646static PyObject *\r
1647sre_getlower(PyObject* self, PyObject* args)\r
1648{\r
1649 int character, flags;\r
1650 if (!PyArg_ParseTuple(args, "ii", &character, &flags))\r
1651 return NULL;\r
1652 if (flags & SRE_FLAG_LOCALE)\r
1653 return Py_BuildValue("i", sre_lower_locale(character));\r
1654 if (flags & SRE_FLAG_UNICODE)\r
1655#if defined(HAVE_UNICODE)\r
1656 return Py_BuildValue("i", sre_lower_unicode(character));\r
1657#else\r
1658 return Py_BuildValue("i", sre_lower_locale(character));\r
1659#endif\r
1660 return Py_BuildValue("i", sre_lower(character));\r
1661}\r
1662\r
1663LOCAL(void)\r
1664state_reset(SRE_STATE* state)\r
1665{\r
1666 /* FIXME: dynamic! */\r
1667 /*memset(state->mark, 0, sizeof(*state->mark) * SRE_MARK_SIZE);*/\r
1668\r
1669 state->lastmark = -1;\r
1670 state->lastindex = -1;\r
1671\r
1672 state->repeat = NULL;\r
1673\r
1674 data_stack_dealloc(state);\r
1675}\r
1676\r
1677static void*\r
1678getstring(PyObject* string, Py_ssize_t* p_length, int* p_charsize)\r
1679{\r
1680 /* given a python object, return a data pointer, a length (in\r
1681 characters), and a character size. return NULL if the object\r
1682 is not a string (or not compatible) */\r
1683\r
1684 PyBufferProcs *buffer;\r
1685 Py_ssize_t size, bytes;\r
1686 int charsize;\r
1687 void* ptr;\r
1688\r
1689#if defined(HAVE_UNICODE)\r
1690 if (PyUnicode_Check(string)) {\r
1691 /* unicode strings doesn't always support the buffer interface */\r
1692 ptr = (void*) PyUnicode_AS_DATA(string);\r
1693 /* bytes = PyUnicode_GET_DATA_SIZE(string); */\r
1694 size = PyUnicode_GET_SIZE(string);\r
1695 charsize = sizeof(Py_UNICODE);\r
1696\r
1697 } else {\r
1698#endif\r
1699\r
1700 /* get pointer to string buffer */\r
1701 buffer = Py_TYPE(string)->tp_as_buffer;\r
1702 if (!buffer || !buffer->bf_getreadbuffer || !buffer->bf_getsegcount ||\r
1703 buffer->bf_getsegcount(string, NULL) != 1) {\r
1704 PyErr_SetString(PyExc_TypeError, "expected string or buffer");\r
1705 return NULL;\r
1706 }\r
1707\r
1708 /* determine buffer size */\r
1709 bytes = buffer->bf_getreadbuffer(string, 0, &ptr);\r
1710 if (bytes < 0) {\r
1711 PyErr_SetString(PyExc_TypeError, "buffer has negative size");\r
1712 return NULL;\r
1713 }\r
1714\r
1715 /* determine character size */\r
1716#if PY_VERSION_HEX >= 0x01060000\r
1717 size = PyObject_Size(string);\r
1718#else\r
1719 size = PyObject_Length(string);\r
1720#endif\r
1721\r
1722 if (PyString_Check(string) || bytes == size)\r
1723 charsize = 1;\r
1724#if defined(HAVE_UNICODE)\r
1725 else if (bytes == (Py_ssize_t) (size * sizeof(Py_UNICODE)))\r
1726 charsize = sizeof(Py_UNICODE);\r
1727#endif\r
1728 else {\r
1729 PyErr_SetString(PyExc_TypeError, "buffer size mismatch");\r
1730 return NULL;\r
1731 }\r
1732\r
1733#if defined(HAVE_UNICODE)\r
1734 }\r
1735#endif\r
1736\r
1737 *p_length = size;\r
1738 *p_charsize = charsize;\r
1739\r
1740 return ptr;\r
1741}\r
1742\r
1743LOCAL(PyObject*)\r
1744state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string,\r
1745 Py_ssize_t start, Py_ssize_t end)\r
1746{\r
1747 /* prepare state object */\r
1748\r
1749 Py_ssize_t length;\r
1750 int charsize;\r
1751 void* ptr;\r
1752\r
1753 memset(state, 0, sizeof(SRE_STATE));\r
1754\r
1755 state->lastmark = -1;\r
1756 state->lastindex = -1;\r
1757\r
1758 ptr = getstring(string, &length, &charsize);\r
1759 if (!ptr)\r
1760 return NULL;\r
1761\r
1762 /* adjust boundaries */\r
1763 if (start < 0)\r
1764 start = 0;\r
1765 else if (start > length)\r
1766 start = length;\r
1767\r
1768 if (end < 0)\r
1769 end = 0;\r
1770 else if (end > length)\r
1771 end = length;\r
1772\r
1773 state->charsize = charsize;\r
1774\r
1775 state->beginning = ptr;\r
1776\r
1777 state->start = (void*) ((char*) ptr + start * state->charsize);\r
1778 state->end = (void*) ((char*) ptr + end * state->charsize);\r
1779\r
1780 Py_INCREF(string);\r
1781 state->string = string;\r
1782 state->pos = start;\r
1783 state->endpos = end;\r
1784\r
1785 if (pattern->flags & SRE_FLAG_LOCALE)\r
1786 state->lower = sre_lower_locale;\r
1787 else if (pattern->flags & SRE_FLAG_UNICODE)\r
1788#if defined(HAVE_UNICODE)\r
1789 state->lower = sre_lower_unicode;\r
1790#else\r
1791 state->lower = sre_lower_locale;\r
1792#endif\r
1793 else\r
1794 state->lower = sre_lower;\r
1795\r
1796 return string;\r
1797}\r
1798\r
1799LOCAL(void)\r
1800state_fini(SRE_STATE* state)\r
1801{\r
1802 Py_XDECREF(state->string);\r
1803 data_stack_dealloc(state);\r
1804}\r
1805\r
1806/* calculate offset from start of string */\r
1807#define STATE_OFFSET(state, member)\\r
1808 (((char*)(member) - (char*)(state)->beginning) / (state)->charsize)\r
1809\r
1810LOCAL(PyObject*)\r
1811state_getslice(SRE_STATE* state, Py_ssize_t index, PyObject* string, int empty)\r
1812{\r
1813 Py_ssize_t i, j;\r
1814\r
1815 index = (index - 1) * 2;\r
1816\r
1817 if (string == Py_None || index >= state->lastmark || !state->mark[index] || !state->mark[index+1]) {\r
1818 if (empty)\r
1819 /* want empty string */\r
1820 i = j = 0;\r
1821 else {\r
1822 Py_INCREF(Py_None);\r
1823 return Py_None;\r
1824 }\r
1825 } else {\r
1826 i = STATE_OFFSET(state, state->mark[index]);\r
1827 j = STATE_OFFSET(state, state->mark[index+1]);\r
1828 }\r
1829\r
1830 return PySequence_GetSlice(string, i, j);\r
1831}\r
1832\r
1833static void\r
1834pattern_error(int status)\r
1835{\r
1836 switch (status) {\r
1837 case SRE_ERROR_RECURSION_LIMIT:\r
1838 PyErr_SetString(\r
1839 PyExc_RuntimeError,\r
1840 "maximum recursion limit exceeded"\r
1841 );\r
1842 break;\r
1843 case SRE_ERROR_MEMORY:\r
1844 PyErr_NoMemory();\r
1845 break;\r
1846 case SRE_ERROR_INTERRUPTED:\r
1847 /* An exception has already been raised, so let it fly */\r
1848 break;\r
1849 default:\r
1850 /* other error codes indicate compiler/engine bugs */\r
1851 PyErr_SetString(\r
1852 PyExc_RuntimeError,\r
1853 "internal error in regular expression engine"\r
1854 );\r
1855 }\r
1856}\r
1857\r
1858static void\r
1859pattern_dealloc(PatternObject* self)\r
1860{\r
1861 if (self->weakreflist != NULL)\r
1862 PyObject_ClearWeakRefs((PyObject *) self);\r
1863 Py_XDECREF(self->pattern);\r
1864 Py_XDECREF(self->groupindex);\r
1865 Py_XDECREF(self->indexgroup);\r
1866 PyObject_DEL(self);\r
1867}\r
1868\r
1869static PyObject*\r
1870pattern_match(PatternObject* self, PyObject* args, PyObject* kw)\r
1871{\r
1872 SRE_STATE state;\r
1873 int status;\r
1874\r
1875 PyObject* string;\r
1876 Py_ssize_t start = 0;\r
1877 Py_ssize_t end = PY_SSIZE_T_MAX;\r
1878 static char* kwlist[] = { "pattern", "pos", "endpos", NULL };\r
1879 if (!PyArg_ParseTupleAndKeywords(args, kw, "O|nn:match", kwlist,\r
1880 &string, &start, &end))\r
1881 return NULL;\r
1882\r
1883 string = state_init(&state, self, string, start, end);\r
1884 if (!string)\r
1885 return NULL;\r
1886\r
1887 state.ptr = state.start;\r
1888\r
1889 TRACE(("|%p|%p|MATCH\n", PatternObject_GetCode(self), state.ptr));\r
1890\r
1891 if (state.charsize == 1) {\r
1892 status = sre_match(&state, PatternObject_GetCode(self));\r
1893 } else {\r
1894#if defined(HAVE_UNICODE)\r
1895 status = sre_umatch(&state, PatternObject_GetCode(self));\r
1896#endif\r
1897 }\r
1898\r
1899 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));\r
1900 if (PyErr_Occurred())\r
1901 return NULL;\r
1902\r
1903 state_fini(&state);\r
1904\r
1905 return pattern_new_match(self, &state, status);\r
1906}\r
1907\r
1908static PyObject*\r
1909pattern_search(PatternObject* self, PyObject* args, PyObject* kw)\r
1910{\r
1911 SRE_STATE state;\r
1912 int status;\r
1913\r
1914 PyObject* string;\r
1915 Py_ssize_t start = 0;\r
1916 Py_ssize_t end = PY_SSIZE_T_MAX;\r
1917 static char* kwlist[] = { "pattern", "pos", "endpos", NULL };\r
1918 if (!PyArg_ParseTupleAndKeywords(args, kw, "O|nn:search", kwlist,\r
1919 &string, &start, &end))\r
1920 return NULL;\r
1921\r
1922 string = state_init(&state, self, string, start, end);\r
1923 if (!string)\r
1924 return NULL;\r
1925\r
1926 TRACE(("|%p|%p|SEARCH\n", PatternObject_GetCode(self), state.ptr));\r
1927\r
1928 if (state.charsize == 1) {\r
1929 status = sre_search(&state, PatternObject_GetCode(self));\r
1930 } else {\r
1931#if defined(HAVE_UNICODE)\r
1932 status = sre_usearch(&state, PatternObject_GetCode(self));\r
1933#endif\r
1934 }\r
1935\r
1936 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));\r
1937\r
1938 state_fini(&state);\r
1939\r
1940 if (PyErr_Occurred())\r
1941 return NULL;\r
1942\r
1943 return pattern_new_match(self, &state, status);\r
1944}\r
1945\r
1946static PyObject*\r
1947call(char* module, char* function, PyObject* args)\r
1948{\r
1949 PyObject* name;\r
1950 PyObject* mod;\r
1951 PyObject* func;\r
1952 PyObject* result;\r
1953\r
1954 if (!args)\r
1955 return NULL;\r
1956 name = PyString_FromString(module);\r
1957 if (!name)\r
1958 return NULL;\r
1959 mod = PyImport_Import(name);\r
1960 Py_DECREF(name);\r
1961 if (!mod)\r
1962 return NULL;\r
1963 func = PyObject_GetAttrString(mod, function);\r
1964 Py_DECREF(mod);\r
1965 if (!func)\r
1966 return NULL;\r
1967 result = PyObject_CallObject(func, args);\r
1968 Py_DECREF(func);\r
1969 Py_DECREF(args);\r
1970 return result;\r
1971}\r
1972\r
1973#ifdef USE_BUILTIN_COPY\r
1974static int\r
1975deepcopy(PyObject** object, PyObject* memo)\r
1976{\r
1977 PyObject* copy;\r
1978\r
1979 copy = call(\r
1980 "copy", "deepcopy",\r
1981 PyTuple_Pack(2, *object, memo)\r
1982 );\r
1983 if (!copy)\r
1984 return 0;\r
1985\r
1986 Py_DECREF(*object);\r
1987 *object = copy;\r
1988\r
1989 return 1; /* success */\r
1990}\r
1991#endif\r
1992\r
1993static PyObject*\r
1994join_list(PyObject* list, PyObject* string)\r
1995{\r
1996 /* join list elements */\r
1997\r
1998 PyObject* joiner;\r
1999#if PY_VERSION_HEX >= 0x01060000\r
2000 PyObject* function;\r
2001 PyObject* args;\r
2002#endif\r
2003 PyObject* result;\r
2004\r
2005 joiner = PySequence_GetSlice(string, 0, 0);\r
2006 if (!joiner)\r
2007 return NULL;\r
2008\r
2009 if (PyList_GET_SIZE(list) == 0) {\r
2010 Py_DECREF(list);\r
2011 return joiner;\r
2012 }\r
2013\r
2014#if PY_VERSION_HEX >= 0x01060000\r
2015 function = PyObject_GetAttrString(joiner, "join");\r
2016 if (!function) {\r
2017 Py_DECREF(joiner);\r
2018 return NULL;\r
2019 }\r
2020 args = PyTuple_New(1);\r
2021 if (!args) {\r
2022 Py_DECREF(function);\r
2023 Py_DECREF(joiner);\r
2024 return NULL;\r
2025 }\r
2026 PyTuple_SET_ITEM(args, 0, list);\r
2027 result = PyObject_CallObject(function, args);\r
2028 Py_DECREF(args); /* also removes list */\r
2029 Py_DECREF(function);\r
2030#else\r
2031 result = call(\r
2032 "string", "join",\r
2033 PyTuple_Pack(2, list, joiner)\r
2034 );\r
2035#endif\r
2036 Py_DECREF(joiner);\r
2037\r
2038 return result;\r
2039}\r
2040\r
2041static PyObject*\r
2042pattern_findall(PatternObject* self, PyObject* args, PyObject* kw)\r
2043{\r
2044 SRE_STATE state;\r
2045 PyObject* list;\r
2046 int status;\r
2047 Py_ssize_t i, b, e;\r
2048\r
2049 PyObject* string;\r
2050 Py_ssize_t start = 0;\r
2051 Py_ssize_t end = PY_SSIZE_T_MAX;\r
2052 static char* kwlist[] = { "source", "pos", "endpos", NULL };\r
2053 if (!PyArg_ParseTupleAndKeywords(args, kw, "O|nn:findall", kwlist,\r
2054 &string, &start, &end))\r
2055 return NULL;\r
2056\r
2057 string = state_init(&state, self, string, start, end);\r
2058 if (!string)\r
2059 return NULL;\r
2060\r
2061 list = PyList_New(0);\r
2062 if (!list) {\r
2063 state_fini(&state);\r
2064 return NULL;\r
2065 }\r
2066\r
2067 while (state.start <= state.end) {\r
2068\r
2069 PyObject* item;\r
2070\r
2071 state_reset(&state);\r
2072\r
2073 state.ptr = state.start;\r
2074\r
2075 if (state.charsize == 1) {\r
2076 status = sre_search(&state, PatternObject_GetCode(self));\r
2077 } else {\r
2078#if defined(HAVE_UNICODE)\r
2079 status = sre_usearch(&state, PatternObject_GetCode(self));\r
2080#endif\r
2081 }\r
2082\r
2083 if (PyErr_Occurred())\r
2084 goto error;\r
2085\r
2086 if (status <= 0) {\r
2087 if (status == 0)\r
2088 break;\r
2089 pattern_error(status);\r
2090 goto error;\r
2091 }\r
2092\r
2093 /* don't bother to build a match object */\r
2094 switch (self->groups) {\r
2095 case 0:\r
2096 b = STATE_OFFSET(&state, state.start);\r
2097 e = STATE_OFFSET(&state, state.ptr);\r
2098 item = PySequence_GetSlice(string, b, e);\r
2099 if (!item)\r
2100 goto error;\r
2101 break;\r
2102 case 1:\r
2103 item = state_getslice(&state, 1, string, 1);\r
2104 if (!item)\r
2105 goto error;\r
2106 break;\r
2107 default:\r
2108 item = PyTuple_New(self->groups);\r
2109 if (!item)\r
2110 goto error;\r
2111 for (i = 0; i < self->groups; i++) {\r
2112 PyObject* o = state_getslice(&state, i+1, string, 1);\r
2113 if (!o) {\r
2114 Py_DECREF(item);\r
2115 goto error;\r
2116 }\r
2117 PyTuple_SET_ITEM(item, i, o);\r
2118 }\r
2119 break;\r
2120 }\r
2121\r
2122 status = PyList_Append(list, item);\r
2123 Py_DECREF(item);\r
2124 if (status < 0)\r
2125 goto error;\r
2126\r
2127 if (state.ptr == state.start)\r
2128 state.start = (void*) ((char*) state.ptr + state.charsize);\r
2129 else\r
2130 state.start = state.ptr;\r
2131\r
2132 }\r
2133\r
2134 state_fini(&state);\r
2135 return list;\r
2136\r
2137error:\r
2138 Py_DECREF(list);\r
2139 state_fini(&state);\r
2140 return NULL;\r
2141\r
2142}\r
2143\r
2144#if PY_VERSION_HEX >= 0x02020000\r
2145static PyObject*\r
2146pattern_finditer(PatternObject* pattern, PyObject* args)\r
2147{\r
2148 PyObject* scanner;\r
2149 PyObject* search;\r
2150 PyObject* iterator;\r
2151\r
2152 scanner = pattern_scanner(pattern, args);\r
2153 if (!scanner)\r
2154 return NULL;\r
2155\r
2156 search = PyObject_GetAttrString(scanner, "search");\r
2157 Py_DECREF(scanner);\r
2158 if (!search)\r
2159 return NULL;\r
2160\r
2161 iterator = PyCallIter_New(search, Py_None);\r
2162 Py_DECREF(search);\r
2163\r
2164 return iterator;\r
2165}\r
2166#endif\r
2167\r
2168static PyObject*\r
2169pattern_split(PatternObject* self, PyObject* args, PyObject* kw)\r
2170{\r
2171 SRE_STATE state;\r
2172 PyObject* list;\r
2173 PyObject* item;\r
2174 int status;\r
2175 Py_ssize_t n;\r
2176 Py_ssize_t i;\r
2177 void* last;\r
2178\r
2179 PyObject* string;\r
2180 Py_ssize_t maxsplit = 0;\r
2181 static char* kwlist[] = { "source", "maxsplit", NULL };\r
2182 if (!PyArg_ParseTupleAndKeywords(args, kw, "O|n:split", kwlist,\r
2183 &string, &maxsplit))\r
2184 return NULL;\r
2185\r
2186 string = state_init(&state, self, string, 0, PY_SSIZE_T_MAX);\r
2187 if (!string)\r
2188 return NULL;\r
2189\r
2190 list = PyList_New(0);\r
2191 if (!list) {\r
2192 state_fini(&state);\r
2193 return NULL;\r
2194 }\r
2195\r
2196 n = 0;\r
2197 last = state.start;\r
2198\r
2199 while (!maxsplit || n < maxsplit) {\r
2200\r
2201 state_reset(&state);\r
2202\r
2203 state.ptr = state.start;\r
2204\r
2205 if (state.charsize == 1) {\r
2206 status = sre_search(&state, PatternObject_GetCode(self));\r
2207 } else {\r
2208#if defined(HAVE_UNICODE)\r
2209 status = sre_usearch(&state, PatternObject_GetCode(self));\r
2210#endif\r
2211 }\r
2212\r
2213 if (PyErr_Occurred())\r
2214 goto error;\r
2215\r
2216 if (status <= 0) {\r
2217 if (status == 0)\r
2218 break;\r
2219 pattern_error(status);\r
2220 goto error;\r
2221 }\r
2222\r
2223 if (state.start == state.ptr) {\r
2224 if (last == state.end)\r
2225 break;\r
2226 /* skip one character */\r
2227 state.start = (void*) ((char*) state.ptr + state.charsize);\r
2228 continue;\r
2229 }\r
2230\r
2231 /* get segment before this match */\r
2232 item = PySequence_GetSlice(\r
2233 string, STATE_OFFSET(&state, last),\r
2234 STATE_OFFSET(&state, state.start)\r
2235 );\r
2236 if (!item)\r
2237 goto error;\r
2238 status = PyList_Append(list, item);\r
2239 Py_DECREF(item);\r
2240 if (status < 0)\r
2241 goto error;\r
2242\r
2243 /* add groups (if any) */\r
2244 for (i = 0; i < self->groups; i++) {\r
2245 item = state_getslice(&state, i+1, string, 0);\r
2246 if (!item)\r
2247 goto error;\r
2248 status = PyList_Append(list, item);\r
2249 Py_DECREF(item);\r
2250 if (status < 0)\r
2251 goto error;\r
2252 }\r
2253\r
2254 n = n + 1;\r
2255\r
2256 last = state.start = state.ptr;\r
2257\r
2258 }\r
2259\r
2260 /* get segment following last match (even if empty) */\r
2261 item = PySequence_GetSlice(\r
2262 string, STATE_OFFSET(&state, last), state.endpos\r
2263 );\r
2264 if (!item)\r
2265 goto error;\r
2266 status = PyList_Append(list, item);\r
2267 Py_DECREF(item);\r
2268 if (status < 0)\r
2269 goto error;\r
2270\r
2271 state_fini(&state);\r
2272 return list;\r
2273\r
2274error:\r
2275 Py_DECREF(list);\r
2276 state_fini(&state);\r
2277 return NULL;\r
2278\r
2279}\r
2280\r
2281static PyObject*\r
2282pattern_subx(PatternObject* self, PyObject* ptemplate, PyObject* string,\r
2283 Py_ssize_t count, Py_ssize_t subn)\r
2284{\r
2285 SRE_STATE state;\r
2286 PyObject* list;\r
2287 PyObject* item;\r
2288 PyObject* filter;\r
2289 PyObject* args;\r
2290 PyObject* match;\r
2291 void* ptr;\r
2292 int status;\r
2293 Py_ssize_t n;\r
2294 Py_ssize_t i, b, e;\r
2295 int bint;\r
2296 int filter_is_callable;\r
2297\r
2298 if (PyCallable_Check(ptemplate)) {\r
2299 /* sub/subn takes either a function or a template */\r
2300 filter = ptemplate;\r
2301 Py_INCREF(filter);\r
2302 filter_is_callable = 1;\r
2303 } else {\r
2304 /* if not callable, check if it's a literal string */\r
2305 int literal;\r
2306 ptr = getstring(ptemplate, &n, &bint);\r
2307 b = bint;\r
2308 if (ptr) {\r
2309 if (b == 1) {\r
2310 literal = sre_literal_template((unsigned char *)ptr, n);\r
2311 } else {\r
2312#if defined(HAVE_UNICODE)\r
2313 literal = sre_uliteral_template((Py_UNICODE *)ptr, n);\r
2314#endif\r
2315 }\r
2316 } else {\r
2317 PyErr_Clear();\r
2318 literal = 0;\r
2319 }\r
2320 if (literal) {\r
2321 filter = ptemplate;\r
2322 Py_INCREF(filter);\r
2323 filter_is_callable = 0;\r
2324 } else {\r
2325 /* not a literal; hand it over to the template compiler */\r
2326 filter = call(\r
2327 SRE_PY_MODULE, "_subx",\r
2328 PyTuple_Pack(2, self, ptemplate)\r
2329 );\r
2330 if (!filter)\r
2331 return NULL;\r
2332 filter_is_callable = PyCallable_Check(filter);\r
2333 }\r
2334 }\r
2335\r
2336 string = state_init(&state, self, string, 0, PY_SSIZE_T_MAX);\r
2337 if (!string) {\r
2338 Py_DECREF(filter);\r
2339 return NULL;\r
2340 }\r
2341\r
2342 list = PyList_New(0);\r
2343 if (!list) {\r
2344 Py_DECREF(filter);\r
2345 state_fini(&state);\r
2346 return NULL;\r
2347 }\r
2348\r
2349 n = i = 0;\r
2350\r
2351 while (!count || n < count) {\r
2352\r
2353 state_reset(&state);\r
2354\r
2355 state.ptr = state.start;\r
2356\r
2357 if (state.charsize == 1) {\r
2358 status = sre_search(&state, PatternObject_GetCode(self));\r
2359 } else {\r
2360#if defined(HAVE_UNICODE)\r
2361 status = sre_usearch(&state, PatternObject_GetCode(self));\r
2362#endif\r
2363 }\r
2364\r
2365 if (PyErr_Occurred())\r
2366 goto error;\r
2367\r
2368 if (status <= 0) {\r
2369 if (status == 0)\r
2370 break;\r
2371 pattern_error(status);\r
2372 goto error;\r
2373 }\r
2374\r
2375 b = STATE_OFFSET(&state, state.start);\r
2376 e = STATE_OFFSET(&state, state.ptr);\r
2377\r
2378 if (i < b) {\r
2379 /* get segment before this match */\r
2380 item = PySequence_GetSlice(string, i, b);\r
2381 if (!item)\r
2382 goto error;\r
2383 status = PyList_Append(list, item);\r
2384 Py_DECREF(item);\r
2385 if (status < 0)\r
2386 goto error;\r
2387\r
2388 } else if (i == b && i == e && n > 0)\r
2389 /* ignore empty match on latest position */\r
2390 goto next;\r
2391\r
2392 if (filter_is_callable) {\r
2393 /* pass match object through filter */\r
2394 match = pattern_new_match(self, &state, 1);\r
2395 if (!match)\r
2396 goto error;\r
2397 args = PyTuple_Pack(1, match);\r
2398 if (!args) {\r
2399 Py_DECREF(match);\r
2400 goto error;\r
2401 }\r
2402 item = PyObject_CallObject(filter, args);\r
2403 Py_DECREF(args);\r
2404 Py_DECREF(match);\r
2405 if (!item)\r
2406 goto error;\r
2407 } else {\r
2408 /* filter is literal string */\r
2409 item = filter;\r
2410 Py_INCREF(item);\r
2411 }\r
2412\r
2413 /* add to list */\r
2414 if (item != Py_None) {\r
2415 status = PyList_Append(list, item);\r
2416 Py_DECREF(item);\r
2417 if (status < 0)\r
2418 goto error;\r
2419 }\r
2420\r
2421 i = e;\r
2422 n = n + 1;\r
2423\r
2424next:\r
2425 /* move on */\r
2426 if (state.ptr == state.start)\r
2427 state.start = (void*) ((char*) state.ptr + state.charsize);\r
2428 else\r
2429 state.start = state.ptr;\r
2430\r
2431 }\r
2432\r
2433 /* get segment following last match */\r
2434 if (i < state.endpos) {\r
2435 item = PySequence_GetSlice(string, i, state.endpos);\r
2436 if (!item)\r
2437 goto error;\r
2438 status = PyList_Append(list, item);\r
2439 Py_DECREF(item);\r
2440 if (status < 0)\r
2441 goto error;\r
2442 }\r
2443\r
2444 state_fini(&state);\r
2445\r
2446 Py_DECREF(filter);\r
2447\r
2448 /* convert list to single string (also removes list) */\r
2449 item = join_list(list, string);\r
2450\r
2451 if (!item)\r
2452 return NULL;\r
2453\r
2454 if (subn)\r
2455 return Py_BuildValue("Ni", item, n);\r
2456\r
2457 return item;\r
2458\r
2459error:\r
2460 Py_DECREF(list);\r
2461 state_fini(&state);\r
2462 Py_DECREF(filter);\r
2463 return NULL;\r
2464\r
2465}\r
2466\r
2467static PyObject*\r
2468pattern_sub(PatternObject* self, PyObject* args, PyObject* kw)\r
2469{\r
2470 PyObject* ptemplate;\r
2471 PyObject* string;\r
2472 Py_ssize_t count = 0;\r
2473 static char* kwlist[] = { "repl", "string", "count", NULL };\r
2474 if (!PyArg_ParseTupleAndKeywords(args, kw, "OO|n:sub", kwlist,\r
2475 &ptemplate, &string, &count))\r
2476 return NULL;\r
2477\r
2478 return pattern_subx(self, ptemplate, string, count, 0);\r
2479}\r
2480\r
2481static PyObject*\r
2482pattern_subn(PatternObject* self, PyObject* args, PyObject* kw)\r
2483{\r
2484 PyObject* ptemplate;\r
2485 PyObject* string;\r
2486 Py_ssize_t count = 0;\r
2487 static char* kwlist[] = { "repl", "string", "count", NULL };\r
2488 if (!PyArg_ParseTupleAndKeywords(args, kw, "OO|n:subn", kwlist,\r
2489 &ptemplate, &string, &count))\r
2490 return NULL;\r
2491\r
2492 return pattern_subx(self, ptemplate, string, count, 1);\r
2493}\r
2494\r
2495static PyObject*\r
2496pattern_copy(PatternObject* self, PyObject *unused)\r
2497{\r
2498#ifdef USE_BUILTIN_COPY\r
2499 PatternObject* copy;\r
2500 int offset;\r
2501\r
2502 copy = PyObject_NEW_VAR(PatternObject, &Pattern_Type, self->codesize);\r
2503 if (!copy)\r
2504 return NULL;\r
2505\r
2506 offset = offsetof(PatternObject, groups);\r
2507\r
2508 Py_XINCREF(self->groupindex);\r
2509 Py_XINCREF(self->indexgroup);\r
2510 Py_XINCREF(self->pattern);\r
2511\r
2512 memcpy((char*) copy + offset, (char*) self + offset,\r
2513 sizeof(PatternObject) + self->codesize * sizeof(SRE_CODE) - offset);\r
2514 copy->weakreflist = NULL;\r
2515\r
2516 return (PyObject*) copy;\r
2517#else\r
2518 PyErr_SetString(PyExc_TypeError, "cannot copy this pattern object");\r
2519 return NULL;\r
2520#endif\r
2521}\r
2522\r
2523static PyObject*\r
2524pattern_deepcopy(PatternObject* self, PyObject* memo)\r
2525{\r
2526#ifdef USE_BUILTIN_COPY\r
2527 PatternObject* copy;\r
2528\r
2529 copy = (PatternObject*) pattern_copy(self);\r
2530 if (!copy)\r
2531 return NULL;\r
2532\r
2533 if (!deepcopy(&copy->groupindex, memo) ||\r
2534 !deepcopy(&copy->indexgroup, memo) ||\r
2535 !deepcopy(&copy->pattern, memo)) {\r
2536 Py_DECREF(copy);\r
2537 return NULL;\r
2538 }\r
2539\r
2540#else\r
2541 PyErr_SetString(PyExc_TypeError, "cannot deepcopy this pattern object");\r
2542 return NULL;\r
2543#endif\r
2544}\r
2545\r
2546PyDoc_STRVAR(pattern_match_doc,\r
2547"match(string[, pos[, endpos]]) --> match object or None.\n\\r
2548 Matches zero or more characters at the beginning of the string");\r
2549\r
2550PyDoc_STRVAR(pattern_search_doc,\r
2551"search(string[, pos[, endpos]]) --> match object or None.\n\\r
2552 Scan through string looking for a match, and return a corresponding\n\\r
2553 MatchObject instance. Return None if no position in the string matches.");\r
2554\r
2555PyDoc_STRVAR(pattern_split_doc,\r
2556"split(string[, maxsplit = 0]) --> list.\n\\r
2557 Split string by the occurrences of pattern.");\r
2558\r
2559PyDoc_STRVAR(pattern_findall_doc,\r
2560"findall(string[, pos[, endpos]]) --> list.\n\\r
2561 Return a list of all non-overlapping matches of pattern in string.");\r
2562\r
2563PyDoc_STRVAR(pattern_finditer_doc,\r
2564"finditer(string[, pos[, endpos]]) --> iterator.\n\\r
2565 Return an iterator over all non-overlapping matches for the \n\\r
2566 RE pattern in string. For each match, the iterator returns a\n\\r
2567 match object.");\r
2568\r
2569PyDoc_STRVAR(pattern_sub_doc,\r
2570"sub(repl, string[, count = 0]) --> newstring\n\\r
2571 Return the string obtained by replacing the leftmost non-overlapping\n\\r
2572 occurrences of pattern in string by the replacement repl.");\r
2573\r
2574PyDoc_STRVAR(pattern_subn_doc,\r
2575"subn(repl, string[, count = 0]) --> (newstring, number of subs)\n\\r
2576 Return the tuple (new_string, number_of_subs_made) found by replacing\n\\r
2577 the leftmost non-overlapping occurrences of pattern with the\n\\r
2578 replacement repl.");\r
2579\r
2580PyDoc_STRVAR(pattern_doc, "Compiled regular expression objects");\r
2581\r
2582static PyMethodDef pattern_methods[] = {\r
2583 {"match", (PyCFunction) pattern_match, METH_VARARGS|METH_KEYWORDS,\r
2584 pattern_match_doc},\r
2585 {"search", (PyCFunction) pattern_search, METH_VARARGS|METH_KEYWORDS,\r
2586 pattern_search_doc},\r
2587 {"sub", (PyCFunction) pattern_sub, METH_VARARGS|METH_KEYWORDS,\r
2588 pattern_sub_doc},\r
2589 {"subn", (PyCFunction) pattern_subn, METH_VARARGS|METH_KEYWORDS,\r
2590 pattern_subn_doc},\r
2591 {"split", (PyCFunction) pattern_split, METH_VARARGS|METH_KEYWORDS,\r
2592 pattern_split_doc},\r
2593 {"findall", (PyCFunction) pattern_findall, METH_VARARGS|METH_KEYWORDS,\r
2594 pattern_findall_doc},\r
2595#if PY_VERSION_HEX >= 0x02020000\r
2596 {"finditer", (PyCFunction) pattern_finditer, METH_VARARGS,\r
2597 pattern_finditer_doc},\r
2598#endif\r
2599 {"scanner", (PyCFunction) pattern_scanner, METH_VARARGS},\r
2600 {"__copy__", (PyCFunction) pattern_copy, METH_NOARGS},\r
2601 {"__deepcopy__", (PyCFunction) pattern_deepcopy, METH_O},\r
2602 {NULL, NULL}\r
2603};\r
2604\r
2605static PyObject*\r
2606pattern_getattr(PatternObject* self, char* name)\r
2607{\r
2608 PyObject* res;\r
2609\r
2610 res = Py_FindMethod(pattern_methods, (PyObject*) self, name);\r
2611\r
2612 if (res)\r
2613 return res;\r
2614\r
2615 PyErr_Clear();\r
2616\r
2617 /* attributes */\r
2618 if (!strcmp(name, "pattern")) {\r
2619 Py_INCREF(self->pattern);\r
2620 return self->pattern;\r
2621 }\r
2622\r
2623 if (!strcmp(name, "flags"))\r
2624 return Py_BuildValue("i", self->flags);\r
2625\r
2626 if (!strcmp(name, "groups"))\r
2627 return Py_BuildValue("i", self->groups);\r
2628\r
2629 if (!strcmp(name, "groupindex") && self->groupindex) {\r
2630 Py_INCREF(self->groupindex);\r
2631 return self->groupindex;\r
2632 }\r
2633\r
2634 PyErr_SetString(PyExc_AttributeError, name);\r
2635 return NULL;\r
2636}\r
2637\r
2638statichere PyTypeObject Pattern_Type = {\r
2639 PyObject_HEAD_INIT(NULL)\r
2640 0, "_" SRE_MODULE ".SRE_Pattern",\r
2641 sizeof(PatternObject), sizeof(SRE_CODE),\r
2642 (destructor)pattern_dealloc, /*tp_dealloc*/\r
2643 0, /*tp_print*/\r
2644 (getattrfunc)pattern_getattr, /*tp_getattr*/\r
2645 0, /* tp_setattr */\r
2646 0, /* tp_compare */\r
2647 0, /* tp_repr */\r
2648 0, /* tp_as_number */\r
2649 0, /* tp_as_sequence */\r
2650 0, /* tp_as_mapping */\r
2651 0, /* tp_hash */\r
2652 0, /* tp_call */\r
2653 0, /* tp_str */\r
2654 0, /* tp_getattro */\r
2655 0, /* tp_setattro */\r
2656 0, /* tp_as_buffer */\r
2657 Py_TPFLAGS_HAVE_WEAKREFS, /* tp_flags */\r
2658 pattern_doc, /* tp_doc */\r
2659 0, /* tp_traverse */\r
2660 0, /* tp_clear */\r
2661 0, /* tp_richcompare */\r
2662 offsetof(PatternObject, weakreflist), /* tp_weaklistoffset */\r
2663};\r
2664\r
2665static int _validate(PatternObject *self); /* Forward */\r
2666\r
2667static PyObject *\r
2668_compile(PyObject* self_, PyObject* args)\r
2669{\r
2670 /* "compile" pattern descriptor to pattern object */\r
2671\r
2672 PatternObject* self;\r
2673 Py_ssize_t i, n;\r
2674\r
2675 PyObject* pattern;\r
2676 int flags = 0;\r
2677 PyObject* code;\r
2678 Py_ssize_t groups = 0;\r
2679 PyObject* groupindex = NULL;\r
2680 PyObject* indexgroup = NULL;\r
2681 if (!PyArg_ParseTuple(args, "OiO!|nOO", &pattern, &flags,\r
2682 &PyList_Type, &code, &groups,\r
2683 &groupindex, &indexgroup))\r
2684 return NULL;\r
2685\r
2686 n = PyList_GET_SIZE(code);\r
2687 /* coverity[ampersand_in_size] */\r
2688 self = PyObject_NEW_VAR(PatternObject, &Pattern_Type, n);\r
2689 if (!self)\r
2690 return NULL;\r
2691 self->weakreflist = NULL;\r
2692 self->pattern = NULL;\r
2693 self->groupindex = NULL;\r
2694 self->indexgroup = NULL;\r
2695\r
2696 self->codesize = n;\r
2697\r
2698 for (i = 0; i < n; i++) {\r
2699 PyObject *o = PyList_GET_ITEM(code, i);\r
2700 unsigned long value = PyInt_Check(o) ? (unsigned long)PyInt_AsLong(o)\r
2701 : PyLong_AsUnsignedLong(o);\r
2702 self->code[i] = (SRE_CODE) value;\r
2703 if ((unsigned long) self->code[i] != value) {\r
2704 PyErr_SetString(PyExc_OverflowError,\r
2705 "regular expression code size limit exceeded");\r
2706 break;\r
2707 }\r
2708 }\r
2709\r
2710 if (PyErr_Occurred()) {\r
2711 Py_DECREF(self);\r
2712 return NULL;\r
2713 }\r
2714\r
2715 Py_INCREF(pattern);\r
2716 self->pattern = pattern;\r
2717\r
2718 self->flags = flags;\r
2719\r
2720 self->groups = groups;\r
2721\r
2722 Py_XINCREF(groupindex);\r
2723 self->groupindex = groupindex;\r
2724\r
2725 Py_XINCREF(indexgroup);\r
2726 self->indexgroup = indexgroup;\r
2727\r
2728 self->weakreflist = NULL;\r
2729\r
2730 if (!_validate(self)) {\r
2731 Py_DECREF(self);\r
2732 return NULL;\r
2733 }\r
2734\r
2735 return (PyObject*) self;\r
2736}\r
2737\r
2738/* -------------------------------------------------------------------- */\r
2739/* Code validation */\r
2740\r
2741/* To learn more about this code, have a look at the _compile() function in\r
2742 Lib/sre_compile.py. The validation functions below checks the code array\r
2743 for conformance with the code patterns generated there.\r
2744\r
2745 The nice thing about the generated code is that it is position-independent:\r
2746 all jumps are relative jumps forward. Also, jumps don't cross each other:\r
2747 the target of a later jump is always earlier than the target of an earlier\r
2748 jump. IOW, this is okay:\r
2749\r
2750 J---------J-------T--------T\r
2751 \ \_____/ /\r
2752 \______________________/\r
2753\r
2754 but this is not:\r
2755\r
2756 J---------J-------T--------T\r
2757 \_________\_____/ /\r
2758 \____________/\r
2759\r
2760 It also helps that SRE_CODE is always an unsigned type, either 2 bytes or 4\r
2761 bytes wide (the latter if Python is compiled for "wide" unicode support).\r
2762*/\r
2763\r
2764/* Defining this one enables tracing of the validator */\r
2765#undef VVERBOSE\r
2766\r
2767/* Trace macro for the validator */\r
2768#if defined(VVERBOSE)\r
2769#define VTRACE(v) printf v\r
2770#else\r
2771#define VTRACE(v)\r
2772#endif\r
2773\r
2774/* Report failure */\r
2775#define FAIL do { VTRACE(("FAIL: %d\n", __LINE__)); return 0; } while (0)\r
2776\r
2777/* Extract opcode, argument, or skip count from code array */\r
2778#define GET_OP \\r
2779 do { \\r
2780 VTRACE(("%p: ", code)); \\r
2781 if (code >= end) FAIL; \\r
2782 op = *code++; \\r
2783 VTRACE(("%lu (op)\n", (unsigned long)op)); \\r
2784 } while (0)\r
2785#define GET_ARG \\r
2786 do { \\r
2787 VTRACE(("%p= ", code)); \\r
2788 if (code >= end) FAIL; \\r
2789 arg = *code++; \\r
2790 VTRACE(("%lu (arg)\n", (unsigned long)arg)); \\r
2791 } while (0)\r
2792#define GET_SKIP_ADJ(adj) \\r
2793 do { \\r
2794 VTRACE(("%p= ", code)); \\r
2795 if (code >= end) FAIL; \\r
2796 skip = *code; \\r
2797 VTRACE(("%lu (skip to %p)\n", \\r
2798 (unsigned long)skip, code+skip)); \\r
2799 if (code+skip-adj < code || code+skip-adj > end)\\r
2800 FAIL; \\r
2801 code++; \\r
2802 } while (0)\r
2803#define GET_SKIP GET_SKIP_ADJ(0)\r
2804\r
2805static int\r
2806_validate_charset(SRE_CODE *code, SRE_CODE *end)\r
2807{\r
2808 /* Some variables are manipulated by the macros above */\r
2809 SRE_CODE op;\r
2810 SRE_CODE arg;\r
2811 SRE_CODE offset;\r
2812 int i;\r
2813\r
2814 while (code < end) {\r
2815 GET_OP;\r
2816 switch (op) {\r
2817\r
2818 case SRE_OP_NEGATE:\r
2819 break;\r
2820\r
2821 case SRE_OP_LITERAL:\r
2822 GET_ARG;\r
2823 break;\r
2824\r
2825 case SRE_OP_RANGE:\r
2826 GET_ARG;\r
2827 GET_ARG;\r
2828 break;\r
2829\r
2830 case SRE_OP_CHARSET:\r
2831 offset = 32/sizeof(SRE_CODE); /* 32-byte bitmap */\r
2832 if (code+offset < code || code+offset > end)\r
2833 FAIL;\r
2834 code += offset;\r
2835 break;\r
2836\r
2837 case SRE_OP_BIGCHARSET:\r
2838 GET_ARG; /* Number of blocks */\r
2839 offset = 256/sizeof(SRE_CODE); /* 256-byte table */\r
2840 if (code+offset < code || code+offset > end)\r
2841 FAIL;\r
2842 /* Make sure that each byte points to a valid block */\r
2843 for (i = 0; i < 256; i++) {\r
2844 if (((unsigned char *)code)[i] >= arg)\r
2845 FAIL;\r
2846 }\r
2847 code += offset;\r
2848 offset = arg * 32/sizeof(SRE_CODE); /* 32-byte bitmap times arg */\r
2849 if (code+offset < code || code+offset > end)\r
2850 FAIL;\r
2851 code += offset;\r
2852 break;\r
2853\r
2854 case SRE_OP_CATEGORY:\r
2855 GET_ARG;\r
2856 switch (arg) {\r
2857 case SRE_CATEGORY_DIGIT:\r
2858 case SRE_CATEGORY_NOT_DIGIT:\r
2859 case SRE_CATEGORY_SPACE:\r
2860 case SRE_CATEGORY_NOT_SPACE:\r
2861 case SRE_CATEGORY_WORD:\r
2862 case SRE_CATEGORY_NOT_WORD:\r
2863 case SRE_CATEGORY_LINEBREAK:\r
2864 case SRE_CATEGORY_NOT_LINEBREAK:\r
2865 case SRE_CATEGORY_LOC_WORD:\r
2866 case SRE_CATEGORY_LOC_NOT_WORD:\r
2867 case SRE_CATEGORY_UNI_DIGIT:\r
2868 case SRE_CATEGORY_UNI_NOT_DIGIT:\r
2869 case SRE_CATEGORY_UNI_SPACE:\r
2870 case SRE_CATEGORY_UNI_NOT_SPACE:\r
2871 case SRE_CATEGORY_UNI_WORD:\r
2872 case SRE_CATEGORY_UNI_NOT_WORD:\r
2873 case SRE_CATEGORY_UNI_LINEBREAK:\r
2874 case SRE_CATEGORY_UNI_NOT_LINEBREAK:\r
2875 break;\r
2876 default:\r
2877 FAIL;\r
2878 }\r
2879 break;\r
2880\r
2881 default:\r
2882 FAIL;\r
2883\r
2884 }\r
2885 }\r
2886\r
2887 return 1;\r
2888}\r
2889\r
2890static int\r
2891_validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)\r
2892{\r
2893 /* Some variables are manipulated by the macros above */\r
2894 SRE_CODE op;\r
2895 SRE_CODE arg;\r
2896 SRE_CODE skip;\r
2897\r
2898 VTRACE(("code=%p, end=%p\n", code, end));\r
2899\r
2900 if (code > end)\r
2901 FAIL;\r
2902\r
2903 while (code < end) {\r
2904 GET_OP;\r
2905 switch (op) {\r
2906\r
2907 case SRE_OP_MARK:\r
2908 /* We don't check whether marks are properly nested; the\r
2909 sre_match() code is robust even if they don't, and the worst\r
2910 you can get is nonsensical match results. */\r
2911 GET_ARG;\r
2912 if (arg > 2*groups+1) {\r
2913 VTRACE(("arg=%d, groups=%d\n", (int)arg, (int)groups));\r
2914 FAIL;\r
2915 }\r
2916 break;\r
2917\r
2918 case SRE_OP_LITERAL:\r
2919 case SRE_OP_NOT_LITERAL:\r
2920 case SRE_OP_LITERAL_IGNORE:\r
2921 case SRE_OP_NOT_LITERAL_IGNORE:\r
2922 GET_ARG;\r
2923 /* The arg is just a character, nothing to check */\r
2924 break;\r
2925\r
2926 case SRE_OP_SUCCESS:\r
2927 case SRE_OP_FAILURE:\r
2928 /* Nothing to check; these normally end the matching process */\r
2929 break;\r
2930\r
2931 case SRE_OP_AT:\r
2932 GET_ARG;\r
2933 switch (arg) {\r
2934 case SRE_AT_BEGINNING:\r
2935 case SRE_AT_BEGINNING_STRING:\r
2936 case SRE_AT_BEGINNING_LINE:\r
2937 case SRE_AT_END:\r
2938 case SRE_AT_END_LINE:\r
2939 case SRE_AT_END_STRING:\r
2940 case SRE_AT_BOUNDARY:\r
2941 case SRE_AT_NON_BOUNDARY:\r
2942 case SRE_AT_LOC_BOUNDARY:\r
2943 case SRE_AT_LOC_NON_BOUNDARY:\r
2944 case SRE_AT_UNI_BOUNDARY:\r
2945 case SRE_AT_UNI_NON_BOUNDARY:\r
2946 break;\r
2947 default:\r
2948 FAIL;\r
2949 }\r
2950 break;\r
2951\r
2952 case SRE_OP_ANY:\r
2953 case SRE_OP_ANY_ALL:\r
2954 /* These have no operands */\r
2955 break;\r
2956\r
2957 case SRE_OP_IN:\r
2958 case SRE_OP_IN_IGNORE:\r
2959 GET_SKIP;\r
2960 /* Stop 1 before the end; we check the FAILURE below */\r
2961 if (!_validate_charset(code, code+skip-2))\r
2962 FAIL;\r
2963 if (code[skip-2] != SRE_OP_FAILURE)\r
2964 FAIL;\r
2965 code += skip-1;\r
2966 break;\r
2967\r
2968 case SRE_OP_INFO:\r
2969 {\r
2970 /* A minimal info field is\r
2971 <INFO> <1=skip> <2=flags> <3=min> <4=max>;\r
2972 If SRE_INFO_PREFIX or SRE_INFO_CHARSET is in the flags,\r
2973 more follows. */\r
2974 SRE_CODE flags, i;\r
2975 SRE_CODE *newcode;\r
2976 GET_SKIP;\r
2977 newcode = code+skip-1;\r
2978 GET_ARG; flags = arg;\r
2979 GET_ARG; /* min */\r
2980 GET_ARG; /* max */\r
2981 /* Check that only valid flags are present */\r
2982 if ((flags & ~(SRE_INFO_PREFIX |\r
2983 SRE_INFO_LITERAL |\r
2984 SRE_INFO_CHARSET)) != 0)\r
2985 FAIL;\r
2986 /* PREFIX and CHARSET are mutually exclusive */\r
2987 if ((flags & SRE_INFO_PREFIX) &&\r
2988 (flags & SRE_INFO_CHARSET))\r
2989 FAIL;\r
2990 /* LITERAL implies PREFIX */\r
2991 if ((flags & SRE_INFO_LITERAL) &&\r
2992 !(flags & SRE_INFO_PREFIX))\r
2993 FAIL;\r
2994 /* Validate the prefix */\r
2995 if (flags & SRE_INFO_PREFIX) {\r
2996 SRE_CODE prefix_len;\r
2997 GET_ARG; prefix_len = arg;\r
2998 GET_ARG; /* prefix skip */\r
2999 /* Here comes the prefix string */\r
3000 if (code+prefix_len < code || code+prefix_len > newcode)\r
3001 FAIL;\r
3002 code += prefix_len;\r
3003 /* And here comes the overlap table */\r
3004 if (code+prefix_len < code || code+prefix_len > newcode)\r
3005 FAIL;\r
3006 /* Each overlap value should be < prefix_len */\r
3007 for (i = 0; i < prefix_len; i++) {\r
3008 if (code[i] >= prefix_len)\r
3009 FAIL;\r
3010 }\r
3011 code += prefix_len;\r
3012 }\r
3013 /* Validate the charset */\r
3014 if (flags & SRE_INFO_CHARSET) {\r
3015 if (!_validate_charset(code, newcode-1))\r
3016 FAIL;\r
3017 if (newcode[-1] != SRE_OP_FAILURE)\r
3018 FAIL;\r
3019 code = newcode;\r
3020 }\r
3021 else if (code != newcode) {\r
3022 VTRACE(("code=%p, newcode=%p\n", code, newcode));\r
3023 FAIL;\r
3024 }\r
3025 }\r
3026 break;\r
3027\r
3028 case SRE_OP_BRANCH:\r
3029 {\r
3030 SRE_CODE *target = NULL;\r
3031 for (;;) {\r
3032 GET_SKIP;\r
3033 if (skip == 0)\r
3034 break;\r
3035 /* Stop 2 before the end; we check the JUMP below */\r
3036 if (!_validate_inner(code, code+skip-3, groups))\r
3037 FAIL;\r
3038 code += skip-3;\r
3039 /* Check that it ends with a JUMP, and that each JUMP\r
3040 has the same target */\r
3041 GET_OP;\r
3042 if (op != SRE_OP_JUMP)\r
3043 FAIL;\r
3044 GET_SKIP;\r
3045 if (target == NULL)\r
3046 target = code+skip-1;\r
3047 else if (code+skip-1 != target)\r
3048 FAIL;\r
3049 }\r
3050 }\r
3051 break;\r
3052\r
3053 case SRE_OP_REPEAT_ONE:\r
3054 case SRE_OP_MIN_REPEAT_ONE:\r
3055 {\r
3056 SRE_CODE min, max;\r
3057 GET_SKIP;\r
3058 GET_ARG; min = arg;\r
3059 GET_ARG; max = arg;\r
3060 if (min > max)\r
3061 FAIL;\r
3062#ifdef Py_UNICODE_WIDE\r
3063 if (max > 65535)\r
3064 FAIL;\r
3065#endif\r
3066 if (!_validate_inner(code, code+skip-4, groups))\r
3067 FAIL;\r
3068 code += skip-4;\r
3069 GET_OP;\r
3070 if (op != SRE_OP_SUCCESS)\r
3071 FAIL;\r
3072 }\r
3073 break;\r
3074\r
3075 case SRE_OP_REPEAT:\r
3076 {\r
3077 SRE_CODE min, max;\r
3078 GET_SKIP;\r
3079 GET_ARG; min = arg;\r
3080 GET_ARG; max = arg;\r
3081 if (min > max)\r
3082 FAIL;\r
3083#ifdef Py_UNICODE_WIDE\r
3084 if (max > 65535)\r
3085 FAIL;\r
3086#endif\r
3087 if (!_validate_inner(code, code+skip-3, groups))\r
3088 FAIL;\r
3089 code += skip-3;\r
3090 GET_OP;\r
3091 if (op != SRE_OP_MAX_UNTIL && op != SRE_OP_MIN_UNTIL)\r
3092 FAIL;\r
3093 }\r
3094 break;\r
3095\r
3096 case SRE_OP_GROUPREF:\r
3097 case SRE_OP_GROUPREF_IGNORE:\r
3098 GET_ARG;\r
3099 if (arg >= groups)\r
3100 FAIL;\r
3101 break;\r
3102\r
3103 case SRE_OP_GROUPREF_EXISTS:\r
3104 /* The regex syntax for this is: '(?(group)then|else)', where\r
3105 'group' is either an integer group number or a group name,\r
3106 'then' and 'else' are sub-regexes, and 'else' is optional. */\r
3107 GET_ARG;\r
3108 if (arg >= groups)\r
3109 FAIL;\r
3110 GET_SKIP_ADJ(1);\r
3111 code--; /* The skip is relative to the first arg! */\r
3112 /* There are two possibilities here: if there is both a 'then'\r
3113 part and an 'else' part, the generated code looks like:\r
3114\r
3115 GROUPREF_EXISTS\r
3116 <group>\r
3117 <skipyes>\r
3118 ...then part...\r
3119 JUMP\r
3120 <skipno>\r
3121 (<skipyes> jumps here)\r
3122 ...else part...\r
3123 (<skipno> jumps here)\r
3124\r
3125 If there is only a 'then' part, it looks like:\r
3126\r
3127 GROUPREF_EXISTS\r
3128 <group>\r
3129 <skip>\r
3130 ...then part...\r
3131 (<skip> jumps here)\r
3132\r
3133 There is no direct way to decide which it is, and we don't want\r
3134 to allow arbitrary jumps anywhere in the code; so we just look\r
3135 for a JUMP opcode preceding our skip target.\r
3136 */\r
3137 if (skip >= 3 && code+skip-3 >= code &&\r
3138 code[skip-3] == SRE_OP_JUMP)\r
3139 {\r
3140 VTRACE(("both then and else parts present\n"));\r
3141 if (!_validate_inner(code+1, code+skip-3, groups))\r
3142 FAIL;\r
3143 code += skip-2; /* Position after JUMP, at <skipno> */\r
3144 GET_SKIP;\r
3145 if (!_validate_inner(code, code+skip-1, groups))\r
3146 FAIL;\r
3147 code += skip-1;\r
3148 }\r
3149 else {\r
3150 VTRACE(("only a then part present\n"));\r
3151 if (!_validate_inner(code+1, code+skip-1, groups))\r
3152 FAIL;\r
3153 code += skip-1;\r
3154 }\r
3155 break;\r
3156\r
3157 case SRE_OP_ASSERT:\r
3158 case SRE_OP_ASSERT_NOT:\r
3159 GET_SKIP;\r
3160 GET_ARG; /* 0 for lookahead, width for lookbehind */\r
3161 code--; /* Back up over arg to simplify math below */\r
3162 if (arg & 0x80000000)\r
3163 FAIL; /* Width too large */\r
3164 /* Stop 1 before the end; we check the SUCCESS below */\r
3165 if (!_validate_inner(code+1, code+skip-2, groups))\r
3166 FAIL;\r
3167 code += skip-2;\r
3168 GET_OP;\r
3169 if (op != SRE_OP_SUCCESS)\r
3170 FAIL;\r
3171 break;\r
3172\r
3173 default:\r
3174 FAIL;\r
3175\r
3176 }\r
3177 }\r
3178\r
3179 VTRACE(("okay\n"));\r
3180 return 1;\r
3181}\r
3182\r
3183static int\r
3184_validate_outer(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)\r
3185{\r
3186 if (groups < 0 || groups > 100 || code >= end || end[-1] != SRE_OP_SUCCESS)\r
3187 FAIL;\r
3188 if (groups == 0) /* fix for simplejson */\r
3189 groups = 100; /* 100 groups should always be safe */\r
3190 return _validate_inner(code, end-1, groups);\r
3191}\r
3192\r
3193static int\r
3194_validate(PatternObject *self)\r
3195{\r
3196 if (!_validate_outer(self->code, self->code+self->codesize, self->groups))\r
3197 {\r
3198 PyErr_SetString(PyExc_RuntimeError, "invalid SRE code");\r
3199 return 0;\r
3200 }\r
3201 else\r
3202 VTRACE(("Success!\n"));\r
3203 return 1;\r
3204}\r
3205\r
3206/* -------------------------------------------------------------------- */\r
3207/* match methods */\r
3208\r
3209static void\r
3210match_dealloc(MatchObject* self)\r
3211{\r
3212 Py_XDECREF(self->regs);\r
3213 Py_XDECREF(self->string);\r
3214 Py_DECREF(self->pattern);\r
3215 PyObject_DEL(self);\r
3216}\r
3217\r
3218static PyObject*\r
3219match_getslice_by_index(MatchObject* self, Py_ssize_t index, PyObject* def)\r
3220{\r
3221 if (index < 0 || index >= self->groups) {\r
3222 /* raise IndexError if we were given a bad group number */\r
3223 PyErr_SetString(\r
3224 PyExc_IndexError,\r
3225 "no such group"\r
3226 );\r
3227 return NULL;\r
3228 }\r
3229\r
3230 index *= 2;\r
3231\r
3232 if (self->string == Py_None || self->mark[index] < 0) {\r
3233 /* return default value if the string or group is undefined */\r
3234 Py_INCREF(def);\r
3235 return def;\r
3236 }\r
3237\r
3238 return PySequence_GetSlice(\r
3239 self->string, self->mark[index], self->mark[index+1]\r
3240 );\r
3241}\r
3242\r
3243static Py_ssize_t\r
3244match_getindex(MatchObject* self, PyObject* index)\r
3245{\r
3246 Py_ssize_t i;\r
3247\r
3248 if (PyInt_Check(index))\r
3249 return PyInt_AsSsize_t(index);\r
3250\r
3251 i = -1;\r
3252\r
3253 if (self->pattern->groupindex) {\r
3254 index = PyObject_GetItem(self->pattern->groupindex, index);\r
3255 if (index) {\r
3256 if (PyInt_Check(index) || PyLong_Check(index))\r
3257 i = PyInt_AsSsize_t(index);\r
3258 Py_DECREF(index);\r
3259 } else\r
3260 PyErr_Clear();\r
3261 }\r
3262\r
3263 return i;\r
3264}\r
3265\r
3266static PyObject*\r
3267match_getslice(MatchObject* self, PyObject* index, PyObject* def)\r
3268{\r
3269 return match_getslice_by_index(self, match_getindex(self, index), def);\r
3270}\r
3271\r
3272static PyObject*\r
3273match_expand(MatchObject* self, PyObject* ptemplate)\r
3274{\r
3275 /* delegate to Python code */\r
3276 return call(\r
3277 SRE_PY_MODULE, "_expand",\r
3278 PyTuple_Pack(3, self->pattern, self, ptemplate)\r
3279 );\r
3280}\r
3281\r
3282static PyObject*\r
3283match_group(MatchObject* self, PyObject* args)\r
3284{\r
3285 PyObject* result;\r
3286 Py_ssize_t i, size;\r
3287\r
3288 size = PyTuple_GET_SIZE(args);\r
3289\r
3290 switch (size) {\r
3291 case 0:\r
3292 result = match_getslice(self, Py_False, Py_None);\r
3293 break;\r
3294 case 1:\r
3295 result = match_getslice(self, PyTuple_GET_ITEM(args, 0), Py_None);\r
3296 break;\r
3297 default:\r
3298 /* fetch multiple items */\r
3299 result = PyTuple_New(size);\r
3300 if (!result)\r
3301 return NULL;\r
3302 for (i = 0; i < size; i++) {\r
3303 PyObject* item = match_getslice(\r
3304 self, PyTuple_GET_ITEM(args, i), Py_None\r
3305 );\r
3306 if (!item) {\r
3307 Py_DECREF(result);\r
3308 return NULL;\r
3309 }\r
3310 PyTuple_SET_ITEM(result, i, item);\r
3311 }\r
3312 break;\r
3313 }\r
3314 return result;\r
3315}\r
3316\r
3317static PyObject*\r
3318match_groups(MatchObject* self, PyObject* args, PyObject* kw)\r
3319{\r
3320 PyObject* result;\r
3321 Py_ssize_t index;\r
3322\r
3323 PyObject* def = Py_None;\r
3324 static char* kwlist[] = { "default", NULL };\r
3325 if (!PyArg_ParseTupleAndKeywords(args, kw, "|O:groups", kwlist, &def))\r
3326 return NULL;\r
3327\r
3328 result = PyTuple_New(self->groups-1);\r
3329 if (!result)\r
3330 return NULL;\r
3331\r
3332 for (index = 1; index < self->groups; index++) {\r
3333 PyObject* item;\r
3334 item = match_getslice_by_index(self, index, def);\r
3335 if (!item) {\r
3336 Py_DECREF(result);\r
3337 return NULL;\r
3338 }\r
3339 PyTuple_SET_ITEM(result, index-1, item);\r
3340 }\r
3341\r
3342 return result;\r
3343}\r
3344\r
3345static PyObject*\r
3346match_groupdict(MatchObject* self, PyObject* args, PyObject* kw)\r
3347{\r
3348 PyObject* result;\r
3349 PyObject* keys;\r
3350 Py_ssize_t index;\r
3351\r
3352 PyObject* def = Py_None;\r
3353 static char* kwlist[] = { "default", NULL };\r
3354 if (!PyArg_ParseTupleAndKeywords(args, kw, "|O:groupdict", kwlist, &def))\r
3355 return NULL;\r
3356\r
3357 result = PyDict_New();\r
3358 if (!result || !self->pattern->groupindex)\r
3359 return result;\r
3360\r
3361 keys = PyMapping_Keys(self->pattern->groupindex);\r
3362 if (!keys)\r
3363 goto failed;\r
3364\r
3365 for (index = 0; index < PyList_GET_SIZE(keys); index++) {\r
3366 int status;\r
3367 PyObject* key;\r
3368 PyObject* value;\r
3369 key = PyList_GET_ITEM(keys, index);\r
3370 if (!key)\r
3371 goto failed;\r
3372 value = match_getslice(self, key, def);\r
3373 if (!value) {\r
3374 Py_DECREF(key);\r
3375 goto failed;\r
3376 }\r
3377 status = PyDict_SetItem(result, key, value);\r
3378 Py_DECREF(value);\r
3379 if (status < 0)\r
3380 goto failed;\r
3381 }\r
3382\r
3383 Py_DECREF(keys);\r
3384\r
3385 return result;\r
3386\r
3387failed:\r
3388 Py_XDECREF(keys);\r
3389 Py_DECREF(result);\r
3390 return NULL;\r
3391}\r
3392\r
3393static PyObject*\r
3394match_start(MatchObject* self, PyObject* args)\r
3395{\r
3396 Py_ssize_t index;\r
3397\r
3398 PyObject* index_ = Py_False; /* zero */\r
3399 if (!PyArg_UnpackTuple(args, "start", 0, 1, &index_))\r
3400 return NULL;\r
3401\r
3402 index = match_getindex(self, index_);\r
3403\r
3404 if (index < 0 || index >= self->groups) {\r
3405 PyErr_SetString(\r
3406 PyExc_IndexError,\r
3407 "no such group"\r
3408 );\r
3409 return NULL;\r
3410 }\r
3411\r
3412 /* mark is -1 if group is undefined */\r
3413 return Py_BuildValue("i", self->mark[index*2]);\r
3414}\r
3415\r
3416static PyObject*\r
3417match_end(MatchObject* self, PyObject* args)\r
3418{\r
3419 Py_ssize_t index;\r
3420\r
3421 PyObject* index_ = Py_False; /* zero */\r
3422 if (!PyArg_UnpackTuple(args, "end", 0, 1, &index_))\r
3423 return NULL;\r
3424\r
3425 index = match_getindex(self, index_);\r
3426\r
3427 if (index < 0 || index >= self->groups) {\r
3428 PyErr_SetString(\r
3429 PyExc_IndexError,\r
3430 "no such group"\r
3431 );\r
3432 return NULL;\r
3433 }\r
3434\r
3435 /* mark is -1 if group is undefined */\r
3436 return Py_BuildValue("i", self->mark[index*2+1]);\r
3437}\r
3438\r
3439LOCAL(PyObject*)\r
3440_pair(Py_ssize_t i1, Py_ssize_t i2)\r
3441{\r
3442 PyObject* pair;\r
3443 PyObject* item;\r
3444\r
3445 pair = PyTuple_New(2);\r
3446 if (!pair)\r
3447 return NULL;\r
3448\r
3449 item = PyInt_FromSsize_t(i1);\r
3450 if (!item)\r
3451 goto error;\r
3452 PyTuple_SET_ITEM(pair, 0, item);\r
3453\r
3454 item = PyInt_FromSsize_t(i2);\r
3455 if (!item)\r
3456 goto error;\r
3457 PyTuple_SET_ITEM(pair, 1, item);\r
3458\r
3459 return pair;\r
3460\r
3461 error:\r
3462 Py_DECREF(pair);\r
3463 return NULL;\r
3464}\r
3465\r
3466static PyObject*\r
3467match_span(MatchObject* self, PyObject* args)\r
3468{\r
3469 Py_ssize_t index;\r
3470\r
3471 PyObject* index_ = Py_False; /* zero */\r
3472 if (!PyArg_UnpackTuple(args, "span", 0, 1, &index_))\r
3473 return NULL;\r
3474\r
3475 index = match_getindex(self, index_);\r
3476\r
3477 if (index < 0 || index >= self->groups) {\r
3478 PyErr_SetString(\r
3479 PyExc_IndexError,\r
3480 "no such group"\r
3481 );\r
3482 return NULL;\r
3483 }\r
3484\r
3485 /* marks are -1 if group is undefined */\r
3486 return _pair(self->mark[index*2], self->mark[index*2+1]);\r
3487}\r
3488\r
3489static PyObject*\r
3490match_regs(MatchObject* self)\r
3491{\r
3492 PyObject* regs;\r
3493 PyObject* item;\r
3494 Py_ssize_t index;\r
3495\r
3496 regs = PyTuple_New(self->groups);\r
3497 if (!regs)\r
3498 return NULL;\r
3499\r
3500 for (index = 0; index < self->groups; index++) {\r
3501 item = _pair(self->mark[index*2], self->mark[index*2+1]);\r
3502 if (!item) {\r
3503 Py_DECREF(regs);\r
3504 return NULL;\r
3505 }\r
3506 PyTuple_SET_ITEM(regs, index, item);\r
3507 }\r
3508\r
3509 Py_INCREF(regs);\r
3510 self->regs = regs;\r
3511\r
3512 return regs;\r
3513}\r
3514\r
3515static PyObject*\r
3516match_copy(MatchObject* self, PyObject *unused)\r
3517{\r
3518#ifdef USE_BUILTIN_COPY\r
3519 MatchObject* copy;\r
3520 Py_ssize_t slots, offset;\r
3521\r
3522 slots = 2 * (self->pattern->groups+1);\r
3523\r
3524 copy = PyObject_NEW_VAR(MatchObject, &Match_Type, slots);\r
3525 if (!copy)\r
3526 return NULL;\r
3527\r
3528 /* this value a constant, but any compiler should be able to\r
3529 figure that out all by itself */\r
3530 offset = offsetof(MatchObject, string);\r
3531\r
3532 Py_XINCREF(self->pattern);\r
3533 Py_XINCREF(self->string);\r
3534 Py_XINCREF(self->regs);\r
3535\r
3536 memcpy((char*) copy + offset, (char*) self + offset,\r
3537 sizeof(MatchObject) + slots * sizeof(Py_ssize_t) - offset);\r
3538\r
3539 return (PyObject*) copy;\r
3540#else\r
3541 PyErr_SetString(PyExc_TypeError, "cannot copy this match object");\r
3542 return NULL;\r
3543#endif\r
3544}\r
3545\r
3546static PyObject*\r
3547match_deepcopy(MatchObject* self, PyObject* memo)\r
3548{\r
3549#ifdef USE_BUILTIN_COPY\r
3550 MatchObject* copy;\r
3551\r
3552 copy = (MatchObject*) match_copy(self);\r
3553 if (!copy)\r
3554 return NULL;\r
3555\r
3556 if (!deepcopy((PyObject**) &copy->pattern, memo) ||\r
3557 !deepcopy(&copy->string, memo) ||\r
3558 !deepcopy(&copy->regs, memo)) {\r
3559 Py_DECREF(copy);\r
3560 return NULL;\r
3561 }\r
3562\r
3563#else\r
3564 PyErr_SetString(PyExc_TypeError, "cannot deepcopy this match object");\r
3565 return NULL;\r
3566#endif\r
3567}\r
3568\r
3569static PyMethodDef match_methods[] = {\r
3570 {"group", (PyCFunction) match_group, METH_VARARGS},\r
3571 {"start", (PyCFunction) match_start, METH_VARARGS},\r
3572 {"end", (PyCFunction) match_end, METH_VARARGS},\r
3573 {"span", (PyCFunction) match_span, METH_VARARGS},\r
3574 {"groups", (PyCFunction) match_groups, METH_VARARGS|METH_KEYWORDS},\r
3575 {"groupdict", (PyCFunction) match_groupdict, METH_VARARGS|METH_KEYWORDS},\r
3576 {"expand", (PyCFunction) match_expand, METH_O},\r
3577 {"__copy__", (PyCFunction) match_copy, METH_NOARGS},\r
3578 {"__deepcopy__", (PyCFunction) match_deepcopy, METH_O},\r
3579 {NULL, NULL}\r
3580};\r
3581\r
3582static PyObject*\r
3583match_getattr(MatchObject* self, char* name)\r
3584{\r
3585 PyObject* res;\r
3586\r
3587 res = Py_FindMethod(match_methods, (PyObject*) self, name);\r
3588 if (res)\r
3589 return res;\r
3590\r
3591 PyErr_Clear();\r
3592\r
3593 if (!strcmp(name, "lastindex")) {\r
3594 if (self->lastindex >= 0)\r
3595 return Py_BuildValue("i", self->lastindex);\r
3596 Py_INCREF(Py_None);\r
3597 return Py_None;\r
3598 }\r
3599\r
3600 if (!strcmp(name, "lastgroup")) {\r
3601 if (self->pattern->indexgroup && self->lastindex >= 0) {\r
3602 PyObject* result = PySequence_GetItem(\r
3603 self->pattern->indexgroup, self->lastindex\r
3604 );\r
3605 if (result)\r
3606 return result;\r
3607 PyErr_Clear();\r
3608 }\r
3609 Py_INCREF(Py_None);\r
3610 return Py_None;\r
3611 }\r
3612\r
3613 if (!strcmp(name, "string")) {\r
3614 if (self->string) {\r
3615 Py_INCREF(self->string);\r
3616 return self->string;\r
3617 } else {\r
3618 Py_INCREF(Py_None);\r
3619 return Py_None;\r
3620 }\r
3621 }\r
3622\r
3623 if (!strcmp(name, "regs")) {\r
3624 if (self->regs) {\r
3625 Py_INCREF(self->regs);\r
3626 return self->regs;\r
3627 } else\r
3628 return match_regs(self);\r
3629 }\r
3630\r
3631 if (!strcmp(name, "re")) {\r
3632 Py_INCREF(self->pattern);\r
3633 return (PyObject*) self->pattern;\r
3634 }\r
3635\r
3636 if (!strcmp(name, "pos"))\r
3637 return Py_BuildValue("i", self->pos);\r
3638\r
3639 if (!strcmp(name, "endpos"))\r
3640 return Py_BuildValue("i", self->endpos);\r
3641\r
3642 PyErr_SetString(PyExc_AttributeError, name);\r
3643 return NULL;\r
3644}\r
3645\r
3646/* FIXME: implement setattr("string", None) as a special case (to\r
3647 detach the associated string, if any */\r
3648\r
3649statichere PyTypeObject Match_Type = {\r
3650 PyObject_HEAD_INIT(NULL)\r
3651 0, "_" SRE_MODULE ".SRE_Match",\r
3652 sizeof(MatchObject), sizeof(Py_ssize_t),\r
3653 (destructor)match_dealloc, /*tp_dealloc*/\r
3654 0, /*tp_print*/\r
3655 (getattrfunc)match_getattr /*tp_getattr*/\r
3656};\r
3657\r
3658static PyObject*\r
3659pattern_new_match(PatternObject* pattern, SRE_STATE* state, int status)\r
3660{\r
3661 /* create match object (from state object) */\r
3662\r
3663 MatchObject* match;\r
3664 Py_ssize_t i, j;\r
3665 char* base;\r
3666 int n;\r
3667\r
3668 if (status > 0) {\r
3669\r
3670 /* create match object (with room for extra group marks) */\r
3671 /* coverity[ampersand_in_size] */\r
3672 match = PyObject_NEW_VAR(MatchObject, &Match_Type,\r
3673 2*(pattern->groups+1));\r
3674 if (!match)\r
3675 return NULL;\r
3676\r
3677 Py_INCREF(pattern);\r
3678 match->pattern = pattern;\r
3679\r
3680 Py_INCREF(state->string);\r
3681 match->string = state->string;\r
3682\r
3683 match->regs = NULL;\r
3684 match->groups = pattern->groups+1;\r
3685\r
3686 /* fill in group slices */\r
3687\r
3688 base = (char*) state->beginning;\r
3689 n = state->charsize;\r
3690\r
3691 match->mark[0] = ((char*) state->start - base) / n;\r
3692 match->mark[1] = ((char*) state->ptr - base) / n;\r
3693\r
3694 for (i = j = 0; i < pattern->groups; i++, j+=2)\r
3695 if (j+1 <= state->lastmark && state->mark[j] && state->mark[j+1]) {\r
3696 match->mark[j+2] = ((char*) state->mark[j] - base) / n;\r
3697 match->mark[j+3] = ((char*) state->mark[j+1] - base) / n;\r
3698 } else\r
3699 match->mark[j+2] = match->mark[j+3] = -1; /* undefined */\r
3700\r
3701 match->pos = state->pos;\r
3702 match->endpos = state->endpos;\r
3703\r
3704 match->lastindex = state->lastindex;\r
3705\r
3706 return (PyObject*) match;\r
3707\r
3708 } else if (status == 0) {\r
3709\r
3710 /* no match */\r
3711 Py_INCREF(Py_None);\r
3712 return Py_None;\r
3713\r
3714 }\r
3715\r
3716 /* internal error */\r
3717 pattern_error(status);\r
3718 return NULL;\r
3719}\r
3720\r
3721\r
3722/* -------------------------------------------------------------------- */\r
3723/* scanner methods (experimental) */\r
3724\r
3725static void\r
3726scanner_dealloc(ScannerObject* self)\r
3727{\r
3728 state_fini(&self->state);\r
3729 Py_XDECREF(self->pattern);\r
3730 PyObject_DEL(self);\r
3731}\r
3732\r
3733static PyObject*\r
3734scanner_match(ScannerObject* self, PyObject *unused)\r
3735{\r
3736 SRE_STATE* state = &self->state;\r
3737 PyObject* match;\r
3738 int status;\r
3739\r
3740 state_reset(state);\r
3741\r
3742 state->ptr = state->start;\r
3743\r
3744 if (state->charsize == 1) {\r
3745 status = sre_match(state, PatternObject_GetCode(self->pattern));\r
3746 } else {\r
3747#if defined(HAVE_UNICODE)\r
3748 status = sre_umatch(state, PatternObject_GetCode(self->pattern));\r
3749#endif\r
3750 }\r
3751 if (PyErr_Occurred())\r
3752 return NULL;\r
3753\r
3754 match = pattern_new_match((PatternObject*) self->pattern,\r
3755 state, status);\r
3756\r
3757 if (status == 0 || state->ptr == state->start)\r
3758 state->start = (void*) ((char*) state->ptr + state->charsize);\r
3759 else\r
3760 state->start = state->ptr;\r
3761\r
3762 return match;\r
3763}\r
3764\r
3765\r
3766static PyObject*\r
3767scanner_search(ScannerObject* self, PyObject *unused)\r
3768{\r
3769 SRE_STATE* state = &self->state;\r
3770 PyObject* match;\r
3771 int status;\r
3772\r
3773 state_reset(state);\r
3774\r
3775 state->ptr = state->start;\r
3776\r
3777 if (state->charsize == 1) {\r
3778 status = sre_search(state, PatternObject_GetCode(self->pattern));\r
3779 } else {\r
3780#if defined(HAVE_UNICODE)\r
3781 status = sre_usearch(state, PatternObject_GetCode(self->pattern));\r
3782#endif\r
3783 }\r
3784 if (PyErr_Occurred())\r
3785 return NULL;\r
3786\r
3787 match = pattern_new_match((PatternObject*) self->pattern,\r
3788 state, status);\r
3789\r
3790 if (status == 0 || state->ptr == state->start)\r
3791 state->start = (void*) ((char*) state->ptr + state->charsize);\r
3792 else\r
3793 state->start = state->ptr;\r
3794\r
3795 return match;\r
3796}\r
3797\r
3798static PyMethodDef scanner_methods[] = {\r
3799 {"match", (PyCFunction) scanner_match, METH_NOARGS},\r
3800 {"search", (PyCFunction) scanner_search, METH_NOARGS},\r
3801 {NULL, NULL}\r
3802};\r
3803\r
3804static PyObject*\r
3805scanner_getattr(ScannerObject* self, char* name)\r
3806{\r
3807 PyObject* res;\r
3808\r
3809 res = Py_FindMethod(scanner_methods, (PyObject*) self, name);\r
3810 if (res)\r
3811 return res;\r
3812\r
3813 PyErr_Clear();\r
3814\r
3815 /* attributes */\r
3816 if (!strcmp(name, "pattern")) {\r
3817 Py_INCREF(self->pattern);\r
3818 return self->pattern;\r
3819 }\r
3820\r
3821 PyErr_SetString(PyExc_AttributeError, name);\r
3822 return NULL;\r
3823}\r
3824\r
3825statichere PyTypeObject Scanner_Type = {\r
3826 PyObject_HEAD_INIT(NULL)\r
3827 0, "_" SRE_MODULE ".SRE_Scanner",\r
3828 sizeof(ScannerObject), 0,\r
3829 (destructor)scanner_dealloc, /*tp_dealloc*/\r
3830 0, /*tp_print*/\r
3831 (getattrfunc)scanner_getattr, /*tp_getattr*/\r
3832};\r
3833\r
3834static PyObject*\r
3835pattern_scanner(PatternObject* pattern, PyObject* args)\r
3836{\r
3837 /* create search state object */\r
3838\r
3839 ScannerObject* self;\r
3840\r
3841 PyObject* string;\r
3842 Py_ssize_t start = 0;\r
3843 Py_ssize_t end = PY_SSIZE_T_MAX;\r
3844 if (!PyArg_ParseTuple(args, "O|nn:scanner", &string, &start, &end))\r
3845 return NULL;\r
3846\r
3847 /* create scanner object */\r
3848 self = PyObject_NEW(ScannerObject, &Scanner_Type);\r
3849 if (!self)\r
3850 return NULL;\r
3851 self->pattern = NULL;\r
3852\r
3853 string = state_init(&self->state, pattern, string, start, end);\r
3854 if (!string) {\r
3855 Py_DECREF(self);\r
3856 return NULL;\r
3857 }\r
3858\r
3859 Py_INCREF(pattern);\r
3860 self->pattern = (PyObject*) pattern;\r
3861\r
3862 return (PyObject*) self;\r
3863}\r
3864\r
3865static PyMethodDef _functions[] = {\r
3866 {"compile", _compile, METH_VARARGS},\r
3867 {"getcodesize", sre_codesize, METH_NOARGS},\r
3868 {"getlower", sre_getlower, METH_VARARGS},\r
3869 {NULL, NULL}\r
3870};\r
3871\r
3872#if PY_VERSION_HEX < 0x02030000\r
3873DL_EXPORT(void) init_sre(void)\r
3874#else\r
3875PyMODINIT_FUNC init_sre(void)\r
3876#endif\r
3877{\r
3878 PyObject* m;\r
3879 PyObject* d;\r
3880 PyObject* x;\r
3881\r
3882 /* Patch object types */\r
3883 if (PyType_Ready(&Pattern_Type) || PyType_Ready(&Match_Type) ||\r
3884 PyType_Ready(&Scanner_Type))\r
3885 return;\r
3886\r
3887 m = Py_InitModule("_" SRE_MODULE, _functions);\r
3888 if (m == NULL)\r
3889 return;\r
3890 d = PyModule_GetDict(m);\r
3891\r
3892 x = PyInt_FromLong(SRE_MAGIC);\r
3893 if (x) {\r
3894 PyDict_SetItemString(d, "MAGIC", x);\r
3895 Py_DECREF(x);\r
3896 }\r
3897\r
3898 x = PyInt_FromLong(sizeof(SRE_CODE));\r
3899 if (x) {\r
3900 PyDict_SetItemString(d, "CODESIZE", x);\r
3901 Py_DECREF(x);\r
3902 }\r
3903\r
3904 x = PyString_FromString(copyright);\r
3905 if (x) {\r
3906 PyDict_SetItemString(d, "copyright", x);\r
3907 Py_DECREF(x);\r
3908 }\r
3909}\r
3910\r
3911#endif /* !defined(SRE_RECURSIVE) */\r
3912\r
3913/* vim:ts=4:sw=4:et\r
3914*/\r