]> git.proxmox.com Git - mirror_edk2.git/blame - AppPkg/Applications/Python/PyMod-2.7.2/Modules/_sre.c
StdLib: Improve robustness of stat() and make basename() a public function.
[mirror_edk2.git] / AppPkg / Applications / Python / PyMod-2.7.2 / Modules / _sre.c
CommitLineData
6c0ebd5f 1/*\r
2 * Secret Labs' Regular Expression Engine\r
3 *\r
4 * regular expression matching engine\r
41b152c5 5\r
6 Copyright (c) 2011, Intel Corporation. All rights reserved.<BR>\r
7 This program and the accompanying materials are licensed and made available under\r
8 the terms and conditions of the BSD License that accompanies this distribution.\r
9 The full text of the license may be found at\r
10 http://opensource.org/licenses/bsd-license.\r
11\r
12 THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,\r
13 WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.\r
6c0ebd5f 14 *\r
15 * partial history:\r
16 * 1999-10-24 fl created (based on existing template matcher code)\r
17 * 2000-03-06 fl first alpha, sort of\r
18 * 2000-08-01 fl fixes for 1.6b1\r
19 * 2000-08-07 fl use PyOS_CheckStack() if available\r
20 * 2000-09-20 fl added expand method\r
21 * 2001-03-20 fl lots of fixes for 2.1b2\r
22 * 2001-04-15 fl export copyright as Python attribute, not global\r
23 * 2001-04-28 fl added __copy__ methods (work in progress)\r
24 * 2001-05-14 fl fixes for 1.5.2 compatibility\r
25 * 2001-07-01 fl added BIGCHARSET support (from Martin von Loewis)\r
26 * 2001-10-18 fl fixed group reset issue (from Matthew Mueller)\r
27 * 2001-10-20 fl added split primitive; reenable unicode for 1.6/2.0/2.1\r
28 * 2001-10-21 fl added sub/subn primitive\r
29 * 2001-10-24 fl added finditer primitive (for 2.2 only)\r
30 * 2001-12-07 fl fixed memory leak in sub/subn (Guido van Rossum)\r
31 * 2002-11-09 fl fixed empty sub/subn return type\r
32 * 2003-04-18 mvl fully support 4-byte codes\r
33 * 2003-10-17 gn implemented non recursive scheme\r
34 *\r
35 * Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved.\r
36 *\r
37 * This version of the SRE library can be redistributed under CNRI's\r
38 * Python 1.6 license. For any other use, please contact Secret Labs\r
39 * AB (info@pythonware.com).\r
40 *\r
41 * Portions of this engine have been developed in cooperation with\r
42 * CNRI. Hewlett-Packard provided funding for 1.6 integration and\r
43 * other compatibility work.\r
44 */\r
45\r
46/* Get rid of these macros to prevent collisions between EFI and Python in this file. */\r
47#undef RETURN_ERROR\r
48#undef RETURN_SUCCESS\r
49\r
50#ifndef SRE_RECURSIVE\r
51\r
52static char copyright[] =\r
53 " SRE 2.2.2 Copyright (c) 1997-2002 by Secret Labs AB ";\r
54\r
55#define PY_SSIZE_T_CLEAN\r
56\r
57#include "Python.h"\r
58#include "structmember.h" /* offsetof */\r
59\r
60#include "sre.h"\r
61\r
62#include <ctype.h>\r
63\r
64/* name of this module, minus the leading underscore */\r
65#if !defined(SRE_MODULE)\r
66#define SRE_MODULE "sre"\r
67#endif\r
68\r
69#define SRE_PY_MODULE "re"\r
70\r
71/* defining this one enables tracing */\r
72#undef VERBOSE\r
73\r
74#if PY_VERSION_HEX >= 0x01060000\r
75#if PY_VERSION_HEX < 0x02020000 || defined(Py_USING_UNICODE)\r
76/* defining this enables unicode support (default under 1.6a1 and later) */\r
77#define HAVE_UNICODE\r
78#endif\r
79#endif\r
80\r
81/* -------------------------------------------------------------------- */\r
82/* optional features */\r
83\r
84/* enables fast searching */\r
85#define USE_FAST_SEARCH\r
86\r
87/* enables aggressive inlining (always on for Visual C) */\r
88#undef USE_INLINE\r
89\r
90/* enables copy/deepcopy handling (work in progress) */\r
91#undef USE_BUILTIN_COPY\r
92\r
93#if PY_VERSION_HEX < 0x01060000\r
94#define PyObject_DEL(op) PyMem_DEL((op))\r
95#endif\r
96\r
97/* -------------------------------------------------------------------- */\r
98\r
99#if defined(_MSC_VER)\r
100#pragma optimize("gt", on) /* doesn't seem to make much difference... */\r
101#pragma warning(disable: 4710) /* who cares if functions are not inlined ;-) */\r
102/* fastest possible local call under MSVC */\r
103#define LOCAL(type) static __inline type __fastcall\r
104#elif defined(USE_INLINE)\r
105#define LOCAL(type) static inline type\r
106#else\r
107#define LOCAL(type) static type\r
108#endif\r
109\r
110/* error codes */\r
111#define SRE_ERROR_ILLEGAL -1 /* illegal opcode */\r
112#define SRE_ERROR_STATE -2 /* illegal state */\r
113#define SRE_ERROR_RECURSION_LIMIT -3 /* runaway recursion */\r
114#define SRE_ERROR_MEMORY -9 /* out of memory */\r
115#define SRE_ERROR_INTERRUPTED -10 /* signal handler raised exception */\r
116\r
117#if defined(VERBOSE)\r
118#define TRACE(v) printf v\r
119#else\r
120#define TRACE(v)\r
121#endif\r
122\r
123/* -------------------------------------------------------------------- */\r
124/* search engine state */\r
125\r
126/* default character predicates (run sre_chars.py to regenerate tables) */\r
127\r
128#define SRE_DIGIT_MASK 1\r
129#define SRE_SPACE_MASK 2\r
130#define SRE_LINEBREAK_MASK 4\r
131#define SRE_ALNUM_MASK 8\r
132#define SRE_WORD_MASK 16\r
133\r
134/* FIXME: this assumes ASCII. create tables in init_sre() instead */\r
135\r
136static char sre_char_info[128] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 6, 2,\r
1372, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0,\r
1380, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25, 25, 25, 25, 25, 25, 25, 25,\r
13925, 25, 0, 0, 0, 0, 0, 0, 0, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,\r
14024, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0,\r
1410, 0, 16, 0, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,\r
14224, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0, 0, 0, 0 };\r
143\r
144static char sre_char_lower[128] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,\r
14510, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,\r
14627, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43,\r
14744, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60,\r
14861, 62, 63, 64, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107,\r
149108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121,\r
150122, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105,\r
151106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119,\r
152120, 121, 122, 123, 124, 125, 126, 127 };\r
153\r
154#define SRE_IS_DIGIT(ch)\\r
155 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_DIGIT_MASK) : 0)\r
156#define SRE_IS_SPACE(ch)\\r
157 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_SPACE_MASK) : 0)\r
158#define SRE_IS_LINEBREAK(ch)\\r
159 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_LINEBREAK_MASK) : 0)\r
160#define SRE_IS_ALNUM(ch)\\r
161 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_ALNUM_MASK) : 0)\r
162#define SRE_IS_WORD(ch)\\r
163 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_WORD_MASK) : 0)\r
164\r
165static unsigned int sre_lower(unsigned int ch)\r
166{\r
167 return ((ch) < 128 ? (unsigned int)sre_char_lower[ch] : ch);\r
168}\r
169\r
170/* locale-specific character predicates */\r
171/* !(c & ~N) == (c < N+1) for any unsigned c, this avoids\r
172 * warnings when c's type supports only numbers < N+1 */\r
173#define SRE_LOC_IS_DIGIT(ch) (!((ch) & ~255) ? isdigit((ch)) : 0)\r
174#define SRE_LOC_IS_SPACE(ch) (!((ch) & ~255) ? isspace((ch)) : 0)\r
175#define SRE_LOC_IS_LINEBREAK(ch) ((ch) == '\n')\r
176#define SRE_LOC_IS_ALNUM(ch) (!((ch) & ~255) ? isalnum((ch)) : 0)\r
177#define SRE_LOC_IS_WORD(ch) (SRE_LOC_IS_ALNUM((ch)) || (ch) == '_')\r
178\r
179static unsigned int sre_lower_locale(unsigned int ch)\r
180{\r
181 return ((ch) < 256 ? (unsigned int)tolower((ch)) : ch);\r
182}\r
183\r
184/* unicode-specific character predicates */\r
185\r
186#if defined(HAVE_UNICODE)\r
187\r
188#define SRE_UNI_IS_DIGIT(ch) Py_UNICODE_ISDECIMAL((Py_UNICODE)(ch))\r
189#define SRE_UNI_IS_SPACE(ch) Py_UNICODE_ISSPACE((Py_UNICODE)(ch))\r
190#define SRE_UNI_IS_LINEBREAK(ch) Py_UNICODE_ISLINEBREAK((Py_UNICODE)(ch))\r
191#define SRE_UNI_IS_ALNUM(ch) Py_UNICODE_ISALNUM((Py_UNICODE)(ch))\r
192#define SRE_UNI_IS_WORD(ch) (SRE_UNI_IS_ALNUM((ch)) || (ch) == '_')\r
193\r
194static unsigned int sre_lower_unicode(unsigned int ch)\r
195{\r
196 return (unsigned int) Py_UNICODE_TOLOWER((Py_UNICODE)(ch));\r
197}\r
198\r
199#endif\r
200\r
201LOCAL(int)\r
202sre_category(SRE_CODE category, unsigned int ch)\r
203{\r
204 switch (category) {\r
205\r
206 case SRE_CATEGORY_DIGIT:\r
207 return SRE_IS_DIGIT(ch);\r
208 case SRE_CATEGORY_NOT_DIGIT:\r
209 return !SRE_IS_DIGIT(ch);\r
210 case SRE_CATEGORY_SPACE:\r
211 return SRE_IS_SPACE(ch);\r
212 case SRE_CATEGORY_NOT_SPACE:\r
213 return !SRE_IS_SPACE(ch);\r
214 case SRE_CATEGORY_WORD:\r
215 return SRE_IS_WORD(ch);\r
216 case SRE_CATEGORY_NOT_WORD:\r
217 return !SRE_IS_WORD(ch);\r
218 case SRE_CATEGORY_LINEBREAK:\r
219 return SRE_IS_LINEBREAK(ch);\r
220 case SRE_CATEGORY_NOT_LINEBREAK:\r
221 return !SRE_IS_LINEBREAK(ch);\r
222\r
223 case SRE_CATEGORY_LOC_WORD:\r
224 return SRE_LOC_IS_WORD(ch);\r
225 case SRE_CATEGORY_LOC_NOT_WORD:\r
226 return !SRE_LOC_IS_WORD(ch);\r
227\r
228#if defined(HAVE_UNICODE)\r
229 case SRE_CATEGORY_UNI_DIGIT:\r
230 return SRE_UNI_IS_DIGIT(ch);\r
231 case SRE_CATEGORY_UNI_NOT_DIGIT:\r
232 return !SRE_UNI_IS_DIGIT(ch);\r
233 case SRE_CATEGORY_UNI_SPACE:\r
234 return SRE_UNI_IS_SPACE(ch);\r
235 case SRE_CATEGORY_UNI_NOT_SPACE:\r
236 return !SRE_UNI_IS_SPACE(ch);\r
237 case SRE_CATEGORY_UNI_WORD:\r
238 return SRE_UNI_IS_WORD(ch);\r
239 case SRE_CATEGORY_UNI_NOT_WORD:\r
240 return !SRE_UNI_IS_WORD(ch);\r
241 case SRE_CATEGORY_UNI_LINEBREAK:\r
242 return SRE_UNI_IS_LINEBREAK(ch);\r
243 case SRE_CATEGORY_UNI_NOT_LINEBREAK:\r
244 return !SRE_UNI_IS_LINEBREAK(ch);\r
245#else\r
246 case SRE_CATEGORY_UNI_DIGIT:\r
247 return SRE_IS_DIGIT(ch);\r
248 case SRE_CATEGORY_UNI_NOT_DIGIT:\r
249 return !SRE_IS_DIGIT(ch);\r
250 case SRE_CATEGORY_UNI_SPACE:\r
251 return SRE_IS_SPACE(ch);\r
252 case SRE_CATEGORY_UNI_NOT_SPACE:\r
253 return !SRE_IS_SPACE(ch);\r
254 case SRE_CATEGORY_UNI_WORD:\r
255 return SRE_LOC_IS_WORD(ch);\r
256 case SRE_CATEGORY_UNI_NOT_WORD:\r
257 return !SRE_LOC_IS_WORD(ch);\r
258 case SRE_CATEGORY_UNI_LINEBREAK:\r
259 return SRE_IS_LINEBREAK(ch);\r
260 case SRE_CATEGORY_UNI_NOT_LINEBREAK:\r
261 return !SRE_IS_LINEBREAK(ch);\r
262#endif\r
263 }\r
264 return 0;\r
265}\r
266\r
267/* helpers */\r
268\r
269static void\r
270data_stack_dealloc(SRE_STATE* state)\r
271{\r
272 if (state->data_stack) {\r
273 PyMem_FREE(state->data_stack);\r
274 state->data_stack = NULL;\r
275 }\r
276 state->data_stack_size = state->data_stack_base = 0;\r
277}\r
278\r
279static int\r
280data_stack_grow(SRE_STATE* state, Py_ssize_t size)\r
281{\r
282 Py_ssize_t minsize, cursize;\r
283 minsize = state->data_stack_base+size;\r
284 cursize = state->data_stack_size;\r
285 if (cursize < minsize) {\r
286 void* stack;\r
287 cursize = minsize+minsize/4+1024;\r
288 TRACE(("allocate/grow stack %d\n", cursize));\r
289 stack = PyMem_REALLOC(state->data_stack, cursize);\r
290 if (!stack) {\r
291 data_stack_dealloc(state);\r
292 return SRE_ERROR_MEMORY;\r
293 }\r
294 state->data_stack = (char *)stack;\r
295 state->data_stack_size = cursize;\r
296 }\r
297 return 0;\r
298}\r
299\r
300/* generate 8-bit version */\r
301\r
302#define SRE_CHAR unsigned char\r
303#define SRE_AT sre_at\r
304#define SRE_COUNT sre_count\r
305#define SRE_CHARSET sre_charset\r
306#define SRE_INFO sre_info\r
307#define SRE_MATCH sre_match\r
308#define SRE_MATCH_CONTEXT sre_match_context\r
309#define SRE_SEARCH sre_search\r
310#define SRE_LITERAL_TEMPLATE sre_literal_template\r
311\r
312#if defined(HAVE_UNICODE)\r
313\r
314#define SRE_RECURSIVE\r
315#include "_sre.c"\r
316#undef SRE_RECURSIVE\r
317\r
318#undef SRE_LITERAL_TEMPLATE\r
319#undef SRE_SEARCH\r
320#undef SRE_MATCH\r
321#undef SRE_MATCH_CONTEXT\r
322#undef SRE_INFO\r
323#undef SRE_CHARSET\r
324#undef SRE_COUNT\r
325#undef SRE_AT\r
326#undef SRE_CHAR\r
327\r
328/* generate 16-bit unicode version */\r
329\r
330#define SRE_CHAR Py_UNICODE\r
331#define SRE_AT sre_uat\r
332#define SRE_COUNT sre_ucount\r
333#define SRE_CHARSET sre_ucharset\r
334#define SRE_INFO sre_uinfo\r
335#define SRE_MATCH sre_umatch\r
336#define SRE_MATCH_CONTEXT sre_umatch_context\r
337#define SRE_SEARCH sre_usearch\r
338#define SRE_LITERAL_TEMPLATE sre_uliteral_template\r
339#endif\r
340\r
341#endif /* SRE_RECURSIVE */\r
342\r
343/* -------------------------------------------------------------------- */\r
344/* String matching engine */\r
345\r
346/* the following section is compiled twice, with different character\r
347 settings */\r
348\r
349LOCAL(int)\r
350SRE_AT(SRE_STATE* state, SRE_CHAR* ptr, SRE_CODE at)\r
351{\r
352 /* check if pointer is at given position */\r
353\r
354 Py_ssize_t thisp, thatp;\r
355\r
356 switch (at) {\r
357\r
358 case SRE_AT_BEGINNING:\r
359 case SRE_AT_BEGINNING_STRING:\r
360 return ((void*) ptr == state->beginning);\r
361\r
362 case SRE_AT_BEGINNING_LINE:\r
363 return ((void*) ptr == state->beginning ||\r
364 SRE_IS_LINEBREAK((int) ptr[-1]));\r
365\r
366 case SRE_AT_END:\r
367 return (((void*) (ptr+1) == state->end &&\r
368 SRE_IS_LINEBREAK((int) ptr[0])) ||\r
369 ((void*) ptr == state->end));\r
370\r
371 case SRE_AT_END_LINE:\r
372 return ((void*) ptr == state->end ||\r
373 SRE_IS_LINEBREAK((int) ptr[0]));\r
374\r
375 case SRE_AT_END_STRING:\r
376 return ((void*) ptr == state->end);\r
377\r
378 case SRE_AT_BOUNDARY:\r
379 if (state->beginning == state->end)\r
380 return 0;\r
381 thatp = ((void*) ptr > state->beginning) ?\r
382 SRE_IS_WORD((int) ptr[-1]) : 0;\r
383 thisp = ((void*) ptr < state->end) ?\r
384 SRE_IS_WORD((int) ptr[0]) : 0;\r
385 return thisp != thatp;\r
386\r
387 case SRE_AT_NON_BOUNDARY:\r
388 if (state->beginning == state->end)\r
389 return 0;\r
390 thatp = ((void*) ptr > state->beginning) ?\r
391 SRE_IS_WORD((int) ptr[-1]) : 0;\r
392 thisp = ((void*) ptr < state->end) ?\r
393 SRE_IS_WORD((int) ptr[0]) : 0;\r
394 return thisp == thatp;\r
395\r
396 case SRE_AT_LOC_BOUNDARY:\r
397 if (state->beginning == state->end)\r
398 return 0;\r
399 thatp = ((void*) ptr > state->beginning) ?\r
400 SRE_LOC_IS_WORD((int) ptr[-1]) : 0;\r
401 thisp = ((void*) ptr < state->end) ?\r
402 SRE_LOC_IS_WORD((int) ptr[0]) : 0;\r
403 return thisp != thatp;\r
404\r
405 case SRE_AT_LOC_NON_BOUNDARY:\r
406 if (state->beginning == state->end)\r
407 return 0;\r
408 thatp = ((void*) ptr > state->beginning) ?\r
409 SRE_LOC_IS_WORD((int) ptr[-1]) : 0;\r
410 thisp = ((void*) ptr < state->end) ?\r
411 SRE_LOC_IS_WORD((int) ptr[0]) : 0;\r
412 return thisp == thatp;\r
413\r
414#if defined(HAVE_UNICODE)\r
415 case SRE_AT_UNI_BOUNDARY:\r
416 if (state->beginning == state->end)\r
417 return 0;\r
418 thatp = ((void*) ptr > state->beginning) ?\r
419 SRE_UNI_IS_WORD((int) ptr[-1]) : 0;\r
420 thisp = ((void*) ptr < state->end) ?\r
421 SRE_UNI_IS_WORD((int) ptr[0]) : 0;\r
422 return thisp != thatp;\r
423\r
424 case SRE_AT_UNI_NON_BOUNDARY:\r
425 if (state->beginning == state->end)\r
426 return 0;\r
427 thatp = ((void*) ptr > state->beginning) ?\r
428 SRE_UNI_IS_WORD((int) ptr[-1]) : 0;\r
429 thisp = ((void*) ptr < state->end) ?\r
430 SRE_UNI_IS_WORD((int) ptr[0]) : 0;\r
431 return thisp == thatp;\r
432#endif\r
433\r
434 }\r
435\r
436 return 0;\r
437}\r
438\r
439LOCAL(int)\r
440SRE_CHARSET(SRE_CODE* set, SRE_CODE ch)\r
441{\r
442 /* check if character is a member of the given set */\r
443\r
444 int ok = 1;\r
445\r
446 for (;;) {\r
447 switch (*set++) {\r
448\r
449 case SRE_OP_FAILURE:\r
450 return !ok;\r
451\r
452 case SRE_OP_LITERAL:\r
453 /* <LITERAL> <code> */\r
454 if (ch == set[0])\r
455 return ok;\r
456 set++;\r
457 break;\r
458\r
459 case SRE_OP_CATEGORY:\r
460 /* <CATEGORY> <code> */\r
461 if (sre_category(set[0], (int) ch))\r
462 return ok;\r
463 set += 1;\r
464 break;\r
465\r
466 case SRE_OP_CHARSET:\r
467 if (sizeof(SRE_CODE) == 2) {\r
468 /* <CHARSET> <bitmap> (16 bits per code word) */\r
469 if (ch < 256 && (set[ch >> 4] & (1 << (ch & 15))))\r
470 return ok;\r
471 set += 16;\r
472 }\r
473 else {\r
474 /* <CHARSET> <bitmap> (32 bits per code word) */\r
475 if (ch < 256 && (set[ch >> 5] & (1 << (ch & 31))))\r
476 return ok;\r
477 set += 8;\r
478 }\r
479 break;\r
480\r
481 case SRE_OP_RANGE:\r
482 /* <RANGE> <lower> <upper> */\r
483 if (set[0] <= ch && ch <= set[1])\r
484 return ok;\r
485 set += 2;\r
486 break;\r
487\r
488 case SRE_OP_NEGATE:\r
489 ok = !ok;\r
490 break;\r
491\r
492 case SRE_OP_BIGCHARSET:\r
493 /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */\r
494 {\r
495 Py_ssize_t count, block;\r
496 count = *(set++);\r
497\r
498 if (sizeof(SRE_CODE) == 2) {\r
499 block = ((unsigned char*)set)[ch >> 8];\r
500 set += 128;\r
501 if (set[block*16 + ((ch & 255)>>4)] & (1 << (ch & 15)))\r
502 return ok;\r
503 set += count*16;\r
504 }\r
505 else {\r
506 /* !(c & ~N) == (c < N+1) for any unsigned c, this avoids\r
507 * warnings when c's type supports only numbers < N+1 */\r
508 if (!(ch & ~65535))\r
509 block = ((unsigned char*)set)[ch >> 8];\r
510 else\r
511 block = -1;\r
512 set += 64;\r
513 if (block >=0 &&\r
514 (set[block*8 + ((ch & 255)>>5)] & (1 << (ch & 31))))\r
515 return ok;\r
516 set += count*8;\r
517 }\r
518 break;\r
519 }\r
520\r
521 default:\r
522 /* internal error -- there's not much we can do about it\r
523 here, so let's just pretend it didn't match... */\r
524 return 0;\r
525 }\r
526 }\r
527}\r
528\r
529LOCAL(Py_ssize_t) SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern);\r
530\r
531LOCAL(Py_ssize_t)\r
532SRE_COUNT(SRE_STATE* state, SRE_CODE* pattern, Py_ssize_t maxcount)\r
533{\r
534 SRE_CODE chr;\r
535 SRE_CHAR* ptr = (SRE_CHAR *)state->ptr;\r
536 SRE_CHAR* end = (SRE_CHAR *)state->end;\r
537 Py_ssize_t i;\r
538\r
539 /* adjust end */\r
540 if (maxcount < end - ptr && maxcount != 65535)\r
541 end = ptr + maxcount;\r
542\r
543 switch (pattern[0]) {\r
544\r
545 case SRE_OP_IN:\r
546 /* repeated set */\r
547 TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));\r
548 while (ptr < end && SRE_CHARSET(pattern + 2, *ptr))\r
549 ptr++;\r
550 break;\r
551\r
552 case SRE_OP_ANY:\r
553 /* repeated dot wildcard. */\r
554 TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));\r
555 while (ptr < end && !SRE_IS_LINEBREAK(*ptr))\r
556 ptr++;\r
557 break;\r
558\r
559 case SRE_OP_ANY_ALL:\r
560 /* repeated dot wildcard. skip to the end of the target\r
561 string, and backtrack from there */\r
562 TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));\r
563 ptr = end;\r
564 break;\r
565\r
566 case SRE_OP_LITERAL:\r
567 /* repeated literal */\r
568 chr = pattern[1];\r
569 TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));\r
570 while (ptr < end && (SRE_CODE) *ptr == chr)\r
571 ptr++;\r
572 break;\r
573\r
574 case SRE_OP_LITERAL_IGNORE:\r
575 /* repeated literal */\r
576 chr = pattern[1];\r
577 TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));\r
578 while (ptr < end && (SRE_CODE) state->lower(*ptr) == chr)\r
579 ptr++;\r
580 break;\r
581\r
582 case SRE_OP_NOT_LITERAL:\r
583 /* repeated non-literal */\r
584 chr = pattern[1];\r
585 TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));\r
586 while (ptr < end && (SRE_CODE) *ptr != chr)\r
587 ptr++;\r
588 break;\r
589\r
590 case SRE_OP_NOT_LITERAL_IGNORE:\r
591 /* repeated non-literal */\r
592 chr = pattern[1];\r
593 TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));\r
594 while (ptr < end && (SRE_CODE) state->lower(*ptr) != chr)\r
595 ptr++;\r
596 break;\r
597\r
598 default:\r
599 /* repeated single character pattern */\r
600 TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));\r
601 while ((SRE_CHAR*) state->ptr < end) {\r
602 i = SRE_MATCH(state, pattern);\r
603 if (i < 0)\r
604 return i;\r
605 if (!i)\r
606 break;\r
607 }\r
608 TRACE(("|%p|%p|COUNT %d\n", pattern, ptr,\r
609 (SRE_CHAR*) state->ptr - ptr));\r
610 return (SRE_CHAR*) state->ptr - ptr;\r
611 }\r
612\r
613 TRACE(("|%p|%p|COUNT %d\n", pattern, ptr, ptr - (SRE_CHAR*) state->ptr));\r
614 return ptr - (SRE_CHAR*) state->ptr;\r
615}\r
616\r
617#if 0 /* not used in this release */\r
618LOCAL(int)\r
619SRE_INFO(SRE_STATE* state, SRE_CODE* pattern)\r
620{\r
621 /* check if an SRE_OP_INFO block matches at the current position.\r
622 returns the number of SRE_CODE objects to skip if successful, 0\r
623 if no match */\r
624\r
625 SRE_CHAR* end = state->end;\r
626 SRE_CHAR* ptr = state->ptr;\r
627 Py_ssize_t i;\r
628\r
629 /* check minimal length */\r
630 if (pattern[3] && (end - ptr) < pattern[3])\r
631 return 0;\r
632\r
633 /* check known prefix */\r
634 if (pattern[2] & SRE_INFO_PREFIX && pattern[5] > 1) {\r
635 /* <length> <skip> <prefix data> <overlap data> */\r
636 for (i = 0; i < pattern[5]; i++)\r
637 if ((SRE_CODE) ptr[i] != pattern[7 + i])\r
638 return 0;\r
639 return pattern[0] + 2 * pattern[6];\r
640 }\r
641 return pattern[0];\r
642}\r
643#endif\r
644\r
645/* The macros below should be used to protect recursive SRE_MATCH()\r
646 * calls that *failed* and do *not* return immediately (IOW, those\r
647 * that will backtrack). Explaining:\r
648 *\r
649 * - Recursive SRE_MATCH() returned true: that's usually a success\r
650 * (besides atypical cases like ASSERT_NOT), therefore there's no\r
651 * reason to restore lastmark;\r
652 *\r
653 * - Recursive SRE_MATCH() returned false but the current SRE_MATCH()\r
654 * is returning to the caller: If the current SRE_MATCH() is the\r
655 * top function of the recursion, returning false will be a matching\r
656 * failure, and it doesn't matter where lastmark is pointing to.\r
657 * If it's *not* the top function, it will be a recursive SRE_MATCH()\r
658 * failure by itself, and the calling SRE_MATCH() will have to deal\r
659 * with the failure by the same rules explained here (it will restore\r
660 * lastmark by itself if necessary);\r
661 *\r
662 * - Recursive SRE_MATCH() returned false, and will continue the\r
663 * outside 'for' loop: must be protected when breaking, since the next\r
664 * OP could potentially depend on lastmark;\r
665 *\r
666 * - Recursive SRE_MATCH() returned false, and will be called again\r
667 * inside a local for/while loop: must be protected between each\r
668 * loop iteration, since the recursive SRE_MATCH() could do anything,\r
669 * and could potentially depend on lastmark.\r
670 *\r
671 * For more information, check the discussion at SF patch #712900.\r
672 */\r
673#define LASTMARK_SAVE() \\r
674 do { \\r
675 ctx->lastmark = state->lastmark; \\r
676 ctx->lastindex = state->lastindex; \\r
677 } while (0)\r
678#define LASTMARK_RESTORE() \\r
679 do { \\r
680 state->lastmark = ctx->lastmark; \\r
681 state->lastindex = ctx->lastindex; \\r
682 } while (0)\r
683\r
684#define RETURN_ERROR(i) do { return i; } while(0)\r
685#define RETURN_FAILURE do { ret = 0; goto exit; } while(0)\r
686#define RETURN_SUCCESS do { ret = 1; goto exit; } while(0)\r
687\r
688#define RETURN_ON_ERROR(i) \\r
689 do { if (i < 0) RETURN_ERROR(i); } while (0)\r
690#define RETURN_ON_SUCCESS(i) \\r
691 do { RETURN_ON_ERROR(i); if (i > 0) RETURN_SUCCESS; } while (0)\r
692#define RETURN_ON_FAILURE(i) \\r
693 do { RETURN_ON_ERROR(i); if (i == 0) RETURN_FAILURE; } while (0)\r
694\r
695#define SFY(x) #x\r
696\r
697#define DATA_STACK_ALLOC(state, type, ptr) \\r
698do { \\r
699 alloc_pos = state->data_stack_base; \\r
700 TRACE(("allocating %s in %d (%d)\n", \\r
701 SFY(type), alloc_pos, sizeof(type))); \\r
702 if (state->data_stack_size < alloc_pos+sizeof(type)) { \\r
703 int j = data_stack_grow(state, sizeof(type)); \\r
704 if (j < 0) return j; \\r
705 if (ctx_pos != -1) \\r
706 DATA_STACK_LOOKUP_AT(state, SRE_MATCH_CONTEXT, ctx, ctx_pos); \\r
707 } \\r
708 ptr = (type*)(state->data_stack+alloc_pos); \\r
709 state->data_stack_base += sizeof(type); \\r
710} while (0)\r
711\r
712#define DATA_STACK_LOOKUP_AT(state, type, ptr, pos) \\r
713do { \\r
714 TRACE(("looking up %s at %d\n", SFY(type), pos)); \\r
715 ptr = (type*)(state->data_stack+pos); \\r
716} while (0)\r
717\r
718#define DATA_STACK_PUSH(state, data, size) \\r
719do { \\r
720 TRACE(("copy data in %p to %d (%d)\n", \\r
721 data, state->data_stack_base, size)); \\r
722 if (state->data_stack_size < state->data_stack_base+size) { \\r
723 int j = data_stack_grow(state, size); \\r
724 if (j < 0) return j; \\r
725 if (ctx_pos != -1) \\r
726 DATA_STACK_LOOKUP_AT(state, SRE_MATCH_CONTEXT, ctx, ctx_pos); \\r
727 } \\r
728 memcpy(state->data_stack+state->data_stack_base, data, size); \\r
729 state->data_stack_base += size; \\r
730} while (0)\r
731\r
732#define DATA_STACK_POP(state, data, size, discard) \\r
733do { \\r
734 TRACE(("copy data to %p from %d (%d)\n", \\r
735 data, state->data_stack_base-size, size)); \\r
736 memcpy(data, state->data_stack+state->data_stack_base-size, size); \\r
737 if (discard) \\r
738 state->data_stack_base -= size; \\r
739} while (0)\r
740\r
741#define DATA_STACK_POP_DISCARD(state, size) \\r
742do { \\r
743 TRACE(("discard data from %d (%d)\n", \\r
744 state->data_stack_base-size, size)); \\r
745 state->data_stack_base -= size; \\r
746} while(0)\r
747\r
748#define DATA_PUSH(x) \\r
749 DATA_STACK_PUSH(state, (x), sizeof(*(x)))\r
750#define DATA_POP(x) \\r
751 DATA_STACK_POP(state, (x), sizeof(*(x)), 1)\r
752#define DATA_POP_DISCARD(x) \\r
753 DATA_STACK_POP_DISCARD(state, sizeof(*(x)))\r
754#define DATA_ALLOC(t,p) \\r
755 DATA_STACK_ALLOC(state, t, p)\r
756#define DATA_LOOKUP_AT(t,p,pos) \\r
757 DATA_STACK_LOOKUP_AT(state,t,p,pos)\r
758\r
759#define MARK_PUSH(lastmark) \\r
760 do if (lastmark > 0) { \\r
761 i = lastmark; /* ctx->lastmark may change if reallocated */ \\r
762 DATA_STACK_PUSH(state, state->mark, (i+1)*sizeof(void*)); \\r
763 } while (0)\r
764#define MARK_POP(lastmark) \\r
765 do if (lastmark > 0) { \\r
766 DATA_STACK_POP(state, state->mark, (lastmark+1)*sizeof(void*), 1); \\r
767 } while (0)\r
768#define MARK_POP_KEEP(lastmark) \\r
769 do if (lastmark > 0) { \\r
770 DATA_STACK_POP(state, state->mark, (lastmark+1)*sizeof(void*), 0); \\r
771 } while (0)\r
772#define MARK_POP_DISCARD(lastmark) \\r
773 do if (lastmark > 0) { \\r
774 DATA_STACK_POP_DISCARD(state, (lastmark+1)*sizeof(void*)); \\r
775 } while (0)\r
776\r
777#define JUMP_NONE 0\r
778#define JUMP_MAX_UNTIL_1 1\r
779#define JUMP_MAX_UNTIL_2 2\r
780#define JUMP_MAX_UNTIL_3 3\r
781#define JUMP_MIN_UNTIL_1 4\r
782#define JUMP_MIN_UNTIL_2 5\r
783#define JUMP_MIN_UNTIL_3 6\r
784#define JUMP_REPEAT 7\r
785#define JUMP_REPEAT_ONE_1 8\r
786#define JUMP_REPEAT_ONE_2 9\r
787#define JUMP_MIN_REPEAT_ONE 10\r
788#define JUMP_BRANCH 11\r
789#define JUMP_ASSERT 12\r
790#define JUMP_ASSERT_NOT 13\r
791\r
792#define DO_JUMP(jumpvalue, jumplabel, nextpattern) \\r
793 DATA_ALLOC(SRE_MATCH_CONTEXT, nextctx); \\r
794 nextctx->last_ctx_pos = ctx_pos; \\r
795 nextctx->jump = jumpvalue; \\r
796 nextctx->pattern = nextpattern; \\r
797 ctx_pos = alloc_pos; \\r
798 ctx = nextctx; \\r
799 goto entrance; \\r
800 jumplabel: \\r
801 while (0) /* gcc doesn't like labels at end of scopes */ \\r
802\r
803typedef struct {\r
804 Py_ssize_t last_ctx_pos;\r
805 Py_ssize_t jump;\r
806 SRE_CHAR* ptr;\r
807 SRE_CODE* pattern;\r
808 Py_ssize_t count;\r
809 Py_ssize_t lastmark;\r
810 Py_ssize_t lastindex;\r
811 union {\r
812 SRE_CODE chr;\r
813 SRE_REPEAT* rep;\r
814 } u;\r
815} SRE_MATCH_CONTEXT;\r
816\r
817/* check if string matches the given pattern. returns <0 for\r
818 error, 0 for failure, and 1 for success */\r
819LOCAL(Py_ssize_t)\r
820SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern)\r
821{\r
822 SRE_CHAR* end = (SRE_CHAR *)state->end;\r
823 Py_ssize_t alloc_pos, ctx_pos = -1;\r
824 Py_ssize_t i, ret = 0;\r
825 Py_ssize_t jump;\r
826 unsigned int sigcount=0;\r
827\r
828 SRE_MATCH_CONTEXT* ctx;\r
829 SRE_MATCH_CONTEXT* nextctx;\r
830\r
831 TRACE(("|%p|%p|ENTER\n", pattern, state->ptr));\r
832\r
833 DATA_ALLOC(SRE_MATCH_CONTEXT, ctx);\r
834 ctx->last_ctx_pos = -1;\r
835 ctx->jump = JUMP_NONE;\r
836 ctx->pattern = pattern;\r
837 ctx_pos = alloc_pos;\r
838\r
839entrance:\r
840\r
841 ctx->ptr = (SRE_CHAR *)state->ptr;\r
842\r
843 if (ctx->pattern[0] == SRE_OP_INFO) {\r
844 /* optimization info block */\r
845 /* <INFO> <1=skip> <2=flags> <3=min> ... */\r
846 if (ctx->pattern[3] && (end - ctx->ptr) < ctx->pattern[3]) {\r
847 TRACE(("reject (got %d chars, need %d)\n",\r
848 (end - ctx->ptr), ctx->pattern[3]));\r
849 RETURN_FAILURE;\r
850 }\r
851 ctx->pattern += ctx->pattern[1] + 1;\r
852 }\r
853\r
854 for (;;) {\r
855 ++sigcount;\r
856 if ((0 == (sigcount & 0xfff)) && PyErr_CheckSignals())\r
857 RETURN_ERROR(SRE_ERROR_INTERRUPTED);\r
858\r
859 switch (*ctx->pattern++) {\r
860\r
861 case SRE_OP_MARK:\r
862 /* set mark */\r
863 /* <MARK> <gid> */\r
864 TRACE(("|%p|%p|MARK %d\n", ctx->pattern,\r
865 ctx->ptr, ctx->pattern[0]));\r
866 i = ctx->pattern[0];\r
867 if (i & 1)\r
868 state->lastindex = i/2 + 1;\r
869 if (i > state->lastmark) {\r
870 /* state->lastmark is the highest valid index in the\r
871 state->mark array. If it is increased by more than 1,\r
872 the intervening marks must be set to NULL to signal\r
873 that these marks have not been encountered. */\r
874 Py_ssize_t j = state->lastmark + 1;\r
875 while (j < i)\r
876 state->mark[j++] = NULL;\r
877 state->lastmark = i;\r
878 }\r
879 state->mark[i] = ctx->ptr;\r
880 ctx->pattern++;\r
881 break;\r
882\r
883 case SRE_OP_LITERAL:\r
884 /* match literal string */\r
885 /* <LITERAL> <code> */\r
886 TRACE(("|%p|%p|LITERAL %d\n", ctx->pattern,\r
887 ctx->ptr, *ctx->pattern));\r
888 if (ctx->ptr >= end || (SRE_CODE) ctx->ptr[0] != ctx->pattern[0])\r
889 RETURN_FAILURE;\r
890 ctx->pattern++;\r
891 ctx->ptr++;\r
892 break;\r
893\r
894 case SRE_OP_NOT_LITERAL:\r
895 /* match anything that is not literal character */\r
896 /* <NOT_LITERAL> <code> */\r
897 TRACE(("|%p|%p|NOT_LITERAL %d\n", ctx->pattern,\r
898 ctx->ptr, *ctx->pattern));\r
899 if (ctx->ptr >= end || (SRE_CODE) ctx->ptr[0] == ctx->pattern[0])\r
900 RETURN_FAILURE;\r
901 ctx->pattern++;\r
902 ctx->ptr++;\r
903 break;\r
904\r
905 case SRE_OP_SUCCESS:\r
906 /* end of pattern */\r
907 TRACE(("|%p|%p|SUCCESS\n", ctx->pattern, ctx->ptr));\r
908 state->ptr = ctx->ptr;\r
909 RETURN_SUCCESS;\r
910\r
911 case SRE_OP_AT:\r
912 /* match at given position */\r
913 /* <AT> <code> */\r
914 TRACE(("|%p|%p|AT %d\n", ctx->pattern, ctx->ptr, *ctx->pattern));\r
915 if (!SRE_AT(state, ctx->ptr, *ctx->pattern))\r
916 RETURN_FAILURE;\r
917 ctx->pattern++;\r
918 break;\r
919\r
920 case SRE_OP_CATEGORY:\r
921 /* match at given category */\r
922 /* <CATEGORY> <code> */\r
923 TRACE(("|%p|%p|CATEGORY %d\n", ctx->pattern,\r
924 ctx->ptr, *ctx->pattern));\r
925 if (ctx->ptr >= end || !sre_category(ctx->pattern[0], ctx->ptr[0]))\r
926 RETURN_FAILURE;\r
927 ctx->pattern++;\r
928 ctx->ptr++;\r
929 break;\r
930\r
931 case SRE_OP_ANY:\r
932 /* match anything (except a newline) */\r
933 /* <ANY> */\r
934 TRACE(("|%p|%p|ANY\n", ctx->pattern, ctx->ptr));\r
935 if (ctx->ptr >= end || SRE_IS_LINEBREAK(ctx->ptr[0]))\r
936 RETURN_FAILURE;\r
937 ctx->ptr++;\r
938 break;\r
939\r
940 case SRE_OP_ANY_ALL:\r
941 /* match anything */\r
942 /* <ANY_ALL> */\r
943 TRACE(("|%p|%p|ANY_ALL\n", ctx->pattern, ctx->ptr));\r
944 if (ctx->ptr >= end)\r
945 RETURN_FAILURE;\r
946 ctx->ptr++;\r
947 break;\r
948\r
949 case SRE_OP_IN:\r
950 /* match set member (or non_member) */\r
951 /* <IN> <skip> <set> */\r
952 TRACE(("|%p|%p|IN\n", ctx->pattern, ctx->ptr));\r
953 if (ctx->ptr >= end || !SRE_CHARSET(ctx->pattern + 1, *ctx->ptr))\r
954 RETURN_FAILURE;\r
955 ctx->pattern += ctx->pattern[0];\r
956 ctx->ptr++;\r
957 break;\r
958\r
959 case SRE_OP_LITERAL_IGNORE:\r
960 TRACE(("|%p|%p|LITERAL_IGNORE %d\n",\r
961 ctx->pattern, ctx->ptr, ctx->pattern[0]));\r
962 if (ctx->ptr >= end ||\r
963 state->lower(*ctx->ptr) != state->lower(*ctx->pattern))\r
964 RETURN_FAILURE;\r
965 ctx->pattern++;\r
966 ctx->ptr++;\r
967 break;\r
968\r
969 case SRE_OP_NOT_LITERAL_IGNORE:\r
970 TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n",\r
971 ctx->pattern, ctx->ptr, *ctx->pattern));\r
972 if (ctx->ptr >= end ||\r
973 state->lower(*ctx->ptr) == state->lower(*ctx->pattern))\r
974 RETURN_FAILURE;\r
975 ctx->pattern++;\r
976 ctx->ptr++;\r
977 break;\r
978\r
979 case SRE_OP_IN_IGNORE:\r
980 TRACE(("|%p|%p|IN_IGNORE\n", ctx->pattern, ctx->ptr));\r
981 if (ctx->ptr >= end\r
982 || !SRE_CHARSET(ctx->pattern+1,\r
983 (SRE_CODE)state->lower(*ctx->ptr)))\r
984 RETURN_FAILURE;\r
985 ctx->pattern += ctx->pattern[0];\r
986 ctx->ptr++;\r
987 break;\r
988\r
989 case SRE_OP_JUMP:\r
990 case SRE_OP_INFO:\r
991 /* jump forward */\r
992 /* <JUMP> <offset> */\r
993 TRACE(("|%p|%p|JUMP %d\n", ctx->pattern,\r
994 ctx->ptr, ctx->pattern[0]));\r
995 ctx->pattern += ctx->pattern[0];\r
996 break;\r
997\r
998 case SRE_OP_BRANCH:\r
999 /* alternation */\r
1000 /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */\r
1001 TRACE(("|%p|%p|BRANCH\n", ctx->pattern, ctx->ptr));\r
1002 LASTMARK_SAVE();\r
1003 ctx->u.rep = state->repeat;\r
1004 if (ctx->u.rep)\r
1005 MARK_PUSH(ctx->lastmark);\r
1006 for (; ctx->pattern[0]; ctx->pattern += ctx->pattern[0]) {\r
1007 if (ctx->pattern[1] == SRE_OP_LITERAL &&\r
1008 (ctx->ptr >= end ||\r
1009 (SRE_CODE) *ctx->ptr != ctx->pattern[2]))\r
1010 continue;\r
1011 if (ctx->pattern[1] == SRE_OP_IN &&\r
1012 (ctx->ptr >= end ||\r
1013 !SRE_CHARSET(ctx->pattern + 3, (SRE_CODE) *ctx->ptr)))\r
1014 continue;\r
1015 state->ptr = ctx->ptr;\r
1016 DO_JUMP(JUMP_BRANCH, jump_branch, ctx->pattern+1);\r
1017 if (ret) {\r
1018 if (ctx->u.rep)\r
1019 MARK_POP_DISCARD(ctx->lastmark);\r
1020 RETURN_ON_ERROR(ret);\r
1021 RETURN_SUCCESS;\r
1022 }\r
1023 if (ctx->u.rep)\r
1024 MARK_POP_KEEP(ctx->lastmark);\r
1025 LASTMARK_RESTORE();\r
1026 }\r
1027 if (ctx->u.rep)\r
1028 MARK_POP_DISCARD(ctx->lastmark);\r
1029 RETURN_FAILURE;\r
1030\r
1031 case SRE_OP_REPEAT_ONE:\r
1032 /* match repeated sequence (maximizing regexp) */\r
1033\r
1034 /* this operator only works if the repeated item is\r
1035 exactly one character wide, and we're not already\r
1036 collecting backtracking points. for other cases,\r
1037 use the MAX_REPEAT operator */\r
1038\r
1039 /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */\r
1040\r
1041 TRACE(("|%p|%p|REPEAT_ONE %d %d\n", ctx->pattern, ctx->ptr,\r
1042 ctx->pattern[1], ctx->pattern[2]));\r
1043\r
1044 if (ctx->ptr + ctx->pattern[1] > end)\r
1045 RETURN_FAILURE; /* cannot match */\r
1046\r
1047 state->ptr = ctx->ptr;\r
1048\r
1049 ret = SRE_COUNT(state, ctx->pattern+3, ctx->pattern[2]);\r
1050 RETURN_ON_ERROR(ret);\r
1051 DATA_LOOKUP_AT(SRE_MATCH_CONTEXT, ctx, ctx_pos);\r
1052 ctx->count = ret;\r
1053 ctx->ptr += ctx->count;\r
1054\r
1055 /* when we arrive here, count contains the number of\r
1056 matches, and ctx->ptr points to the tail of the target\r
1057 string. check if the rest of the pattern matches,\r
1058 and backtrack if not. */\r
1059\r
1060 if (ctx->count < (Py_ssize_t) ctx->pattern[1])\r
1061 RETURN_FAILURE;\r
1062\r
1063 if (ctx->pattern[ctx->pattern[0]] == SRE_OP_SUCCESS) {\r
1064 /* tail is empty. we're finished */\r
1065 state->ptr = ctx->ptr;\r
1066 RETURN_SUCCESS;\r
1067 }\r
1068\r
1069 LASTMARK_SAVE();\r
1070\r
1071 if (ctx->pattern[ctx->pattern[0]] == SRE_OP_LITERAL) {\r
1072 /* tail starts with a literal. skip positions where\r
1073 the rest of the pattern cannot possibly match */\r
1074 ctx->u.chr = ctx->pattern[ctx->pattern[0]+1];\r
1075 for (;;) {\r
1076 while (ctx->count >= (Py_ssize_t) ctx->pattern[1] &&\r
1077 (ctx->ptr >= end || *ctx->ptr != ctx->u.chr)) {\r
1078 ctx->ptr--;\r
1079 ctx->count--;\r
1080 }\r
1081 if (ctx->count < (Py_ssize_t) ctx->pattern[1])\r
1082 break;\r
1083 state->ptr = ctx->ptr;\r
1084 DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1,\r
1085 ctx->pattern+ctx->pattern[0]);\r
1086 if (ret) {\r
1087 RETURN_ON_ERROR(ret);\r
1088 RETURN_SUCCESS;\r
1089 }\r
1090\r
1091 LASTMARK_RESTORE();\r
1092\r
1093 ctx->ptr--;\r
1094 ctx->count--;\r
1095 }\r
1096\r
1097 } else {\r
1098 /* general case */\r
1099 while (ctx->count >= (Py_ssize_t) ctx->pattern[1]) {\r
1100 state->ptr = ctx->ptr;\r
1101 DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2,\r
1102 ctx->pattern+ctx->pattern[0]);\r
1103 if (ret) {\r
1104 RETURN_ON_ERROR(ret);\r
1105 RETURN_SUCCESS;\r
1106 }\r
1107 ctx->ptr--;\r
1108 ctx->count--;\r
1109 LASTMARK_RESTORE();\r
1110 }\r
1111 }\r
1112 RETURN_FAILURE;\r
1113\r
1114 case SRE_OP_MIN_REPEAT_ONE:\r
1115 /* match repeated sequence (minimizing regexp) */\r
1116\r
1117 /* this operator only works if the repeated item is\r
1118 exactly one character wide, and we're not already\r
1119 collecting backtracking points. for other cases,\r
1120 use the MIN_REPEAT operator */\r
1121\r
1122 /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */\r
1123\r
1124 TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", ctx->pattern, ctx->ptr,\r
1125 ctx->pattern[1], ctx->pattern[2]));\r
1126\r
1127 if (ctx->ptr + ctx->pattern[1] > end)\r
1128 RETURN_FAILURE; /* cannot match */\r
1129\r
1130 state->ptr = ctx->ptr;\r
1131\r
1132 if (ctx->pattern[1] == 0)\r
1133 ctx->count = 0;\r
1134 else {\r
1135 /* count using pattern min as the maximum */\r
1136 ret = SRE_COUNT(state, ctx->pattern+3, ctx->pattern[1]);\r
1137 RETURN_ON_ERROR(ret);\r
1138 DATA_LOOKUP_AT(SRE_MATCH_CONTEXT, ctx, ctx_pos);\r
1139 if (ret < (Py_ssize_t) ctx->pattern[1])\r
1140 /* didn't match minimum number of times */\r
1141 RETURN_FAILURE;\r
1142 /* advance past minimum matches of repeat */\r
1143 ctx->count = ret;\r
1144 ctx->ptr += ctx->count;\r
1145 }\r
1146\r
1147 if (ctx->pattern[ctx->pattern[0]] == SRE_OP_SUCCESS) {\r
1148 /* tail is empty. we're finished */\r
1149 state->ptr = ctx->ptr;\r
1150 RETURN_SUCCESS;\r
1151\r
1152 } else {\r
1153 /* general case */\r
1154 LASTMARK_SAVE();\r
1155 while ((Py_ssize_t)ctx->pattern[2] == 65535\r
1156 || ctx->count <= (Py_ssize_t)ctx->pattern[2]) {\r
1157 state->ptr = ctx->ptr;\r
1158 DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,\r
1159 ctx->pattern+ctx->pattern[0]);\r
1160 if (ret) {\r
1161 RETURN_ON_ERROR(ret);\r
1162 RETURN_SUCCESS;\r
1163 }\r
1164 state->ptr = ctx->ptr;\r
1165 ret = SRE_COUNT(state, ctx->pattern+3, 1);\r
1166 RETURN_ON_ERROR(ret);\r
1167 DATA_LOOKUP_AT(SRE_MATCH_CONTEXT, ctx, ctx_pos);\r
1168 if (ret == 0)\r
1169 break;\r
1170 assert(ret == 1);\r
1171 ctx->ptr++;\r
1172 ctx->count++;\r
1173 LASTMARK_RESTORE();\r
1174 }\r
1175 }\r
1176 RETURN_FAILURE;\r
1177\r
1178 case SRE_OP_REPEAT:\r
1179 /* create repeat context. all the hard work is done\r
1180 by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */\r
1181 /* <REPEAT> <skip> <1=min> <2=max> item <UNTIL> tail */\r
1182 TRACE(("|%p|%p|REPEAT %d %d\n", ctx->pattern, ctx->ptr,\r
1183 ctx->pattern[1], ctx->pattern[2]));\r
1184\r
1185 /* install new repeat context */\r
1186 ctx->u.rep = (SRE_REPEAT*) PyObject_MALLOC(sizeof(*ctx->u.rep));\r
1187 if (!ctx->u.rep) {\r
1188 PyErr_NoMemory();\r
1189 RETURN_FAILURE;\r
1190 }\r
1191 ctx->u.rep->count = -1;\r
1192 ctx->u.rep->pattern = ctx->pattern;\r
1193 ctx->u.rep->prev = state->repeat;\r
1194 ctx->u.rep->last_ptr = NULL;\r
1195 state->repeat = ctx->u.rep;\r
1196\r
1197 state->ptr = ctx->ptr;\r
1198 DO_JUMP(JUMP_REPEAT, jump_repeat, ctx->pattern+ctx->pattern[0]);\r
1199 state->repeat = ctx->u.rep->prev;\r
1200 PyObject_FREE(ctx->u.rep);\r
1201\r
1202 if (ret) {\r
1203 RETURN_ON_ERROR(ret);\r
1204 RETURN_SUCCESS;\r
1205 }\r
1206 RETURN_FAILURE;\r
1207\r
1208 case SRE_OP_MAX_UNTIL:\r
1209 /* maximizing repeat */\r
1210 /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */\r
1211\r
1212 /* FIXME: we probably need to deal with zero-width\r
1213 matches in here... */\r
1214\r
1215 ctx->u.rep = state->repeat;\r
1216 if (!ctx->u.rep)\r
1217 RETURN_ERROR(SRE_ERROR_STATE);\r
1218\r
1219 state->ptr = ctx->ptr;\r
1220\r
1221 ctx->count = ctx->u.rep->count+1;\r
1222\r
1223 TRACE(("|%p|%p|MAX_UNTIL %d\n", ctx->pattern,\r
1224 ctx->ptr, ctx->count));\r
1225\r
1226 if (ctx->count < ctx->u.rep->pattern[1]) {\r
1227 /* not enough matches */\r
1228 ctx->u.rep->count = ctx->count;\r
1229 DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1,\r
1230 ctx->u.rep->pattern+3);\r
1231 if (ret) {\r
1232 RETURN_ON_ERROR(ret);\r
1233 RETURN_SUCCESS;\r
1234 }\r
1235 ctx->u.rep->count = ctx->count-1;\r
1236 state->ptr = ctx->ptr;\r
1237 RETURN_FAILURE;\r
1238 }\r
1239\r
1240 if ((ctx->count < ctx->u.rep->pattern[2] ||\r
1241 ctx->u.rep->pattern[2] == 65535) &&\r
1242 state->ptr != ctx->u.rep->last_ptr) {\r
1243 /* we may have enough matches, but if we can\r
1244 match another item, do so */\r
1245 ctx->u.rep->count = ctx->count;\r
1246 LASTMARK_SAVE();\r
1247 MARK_PUSH(ctx->lastmark);\r
1248 /* zero-width match protection */\r
1249 DATA_PUSH(&ctx->u.rep->last_ptr);\r
1250 ctx->u.rep->last_ptr = state->ptr;\r
1251 DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2,\r
1252 ctx->u.rep->pattern+3);\r
1253 DATA_POP(&ctx->u.rep->last_ptr);\r
1254 if (ret) {\r
1255 MARK_POP_DISCARD(ctx->lastmark);\r
1256 RETURN_ON_ERROR(ret);\r
1257 RETURN_SUCCESS;\r
1258 }\r
1259 MARK_POP(ctx->lastmark);\r
1260 LASTMARK_RESTORE();\r
1261 ctx->u.rep->count = ctx->count-1;\r
1262 state->ptr = ctx->ptr;\r
1263 }\r
1264\r
1265 /* cannot match more repeated items here. make sure the\r
1266 tail matches */\r
1267 state->repeat = ctx->u.rep->prev;\r
1268 DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, ctx->pattern);\r
1269 RETURN_ON_SUCCESS(ret);\r
1270 state->repeat = ctx->u.rep;\r
1271 state->ptr = ctx->ptr;\r
1272 RETURN_FAILURE;\r
1273\r
1274 case SRE_OP_MIN_UNTIL:\r
1275 /* minimizing repeat */\r
1276 /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */\r
1277\r
1278 ctx->u.rep = state->repeat;\r
1279 if (!ctx->u.rep)\r
1280 RETURN_ERROR(SRE_ERROR_STATE);\r
1281\r
1282 state->ptr = ctx->ptr;\r
1283\r
1284 ctx->count = ctx->u.rep->count+1;\r
1285\r
1286 TRACE(("|%p|%p|MIN_UNTIL %d %p\n", ctx->pattern,\r
1287 ctx->ptr, ctx->count, ctx->u.rep->pattern));\r
1288\r
1289 if (ctx->count < ctx->u.rep->pattern[1]) {\r
1290 /* not enough matches */\r
1291 ctx->u.rep->count = ctx->count;\r
1292 DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1,\r
1293 ctx->u.rep->pattern+3);\r
1294 if (ret) {\r
1295 RETURN_ON_ERROR(ret);\r
1296 RETURN_SUCCESS;\r
1297 }\r
1298 ctx->u.rep->count = ctx->count-1;\r
1299 state->ptr = ctx->ptr;\r
1300 RETURN_FAILURE;\r
1301 }\r
1302\r
1303 LASTMARK_SAVE();\r
1304\r
1305 /* see if the tail matches */\r
1306 state->repeat = ctx->u.rep->prev;\r
1307 DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, ctx->pattern);\r
1308 if (ret) {\r
1309 RETURN_ON_ERROR(ret);\r
1310 RETURN_SUCCESS;\r
1311 }\r
1312\r
1313 state->repeat = ctx->u.rep;\r
1314 state->ptr = ctx->ptr;\r
1315\r
1316 LASTMARK_RESTORE();\r
1317\r
1318 if (ctx->count >= ctx->u.rep->pattern[2]\r
1319 && ctx->u.rep->pattern[2] != 65535)\r
1320 RETURN_FAILURE;\r
1321\r
1322 ctx->u.rep->count = ctx->count;\r
1323 DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3,\r
1324 ctx->u.rep->pattern+3);\r
1325 if (ret) {\r
1326 RETURN_ON_ERROR(ret);\r
1327 RETURN_SUCCESS;\r
1328 }\r
1329 ctx->u.rep->count = ctx->count-1;\r
1330 state->ptr = ctx->ptr;\r
1331 RETURN_FAILURE;\r
1332\r
1333 case SRE_OP_GROUPREF:\r
1334 /* match backreference */\r
1335 TRACE(("|%p|%p|GROUPREF %d\n", ctx->pattern,\r
1336 ctx->ptr, ctx->pattern[0]));\r
1337 i = ctx->pattern[0];\r
1338 {\r
1339 Py_ssize_t groupref = i+i;\r
1340 if (groupref >= state->lastmark) {\r
1341 RETURN_FAILURE;\r
1342 } else {\r
1343 SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];\r
1344 SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];\r
1345 if (!p || !e || e < p)\r
1346 RETURN_FAILURE;\r
1347 while (p < e) {\r
1348 if (ctx->ptr >= end || *ctx->ptr != *p)\r
1349 RETURN_FAILURE;\r
1350 p++; ctx->ptr++;\r
1351 }\r
1352 }\r
1353 }\r
1354 ctx->pattern++;\r
1355 break;\r
1356\r
1357 case SRE_OP_GROUPREF_IGNORE:\r
1358 /* match backreference */\r
1359 TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", ctx->pattern,\r
1360 ctx->ptr, ctx->pattern[0]));\r
1361 i = ctx->pattern[0];\r
1362 {\r
1363 Py_ssize_t groupref = i+i;\r
1364 if (groupref >= state->lastmark) {\r
1365 RETURN_FAILURE;\r
1366 } else {\r
1367 SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];\r
1368 SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];\r
1369 if (!p || !e || e < p)\r
1370 RETURN_FAILURE;\r
1371 while (p < e) {\r
1372 if (ctx->ptr >= end ||\r
1373 state->lower(*ctx->ptr) != state->lower(*p))\r
1374 RETURN_FAILURE;\r
1375 p++; ctx->ptr++;\r
1376 }\r
1377 }\r
1378 }\r
1379 ctx->pattern++;\r
1380 break;\r
1381\r
1382 case SRE_OP_GROUPREF_EXISTS:\r
1383 TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", ctx->pattern,\r
1384 ctx->ptr, ctx->pattern[0]));\r
1385 /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */\r
1386 i = ctx->pattern[0];\r
1387 {\r
1388 Py_ssize_t groupref = i+i;\r
1389 if (groupref >= state->lastmark) {\r
1390 ctx->pattern += ctx->pattern[1];\r
1391 break;\r
1392 } else {\r
1393 SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];\r
1394 SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];\r
1395 if (!p || !e || e < p) {\r
1396 ctx->pattern += ctx->pattern[1];\r
1397 break;\r
1398 }\r
1399 }\r
1400 }\r
1401 ctx->pattern += 2;\r
1402 break;\r
1403\r
1404 case SRE_OP_ASSERT:\r
1405 /* assert subpattern */\r
1406 /* <ASSERT> <skip> <back> <pattern> */\r
1407 TRACE(("|%p|%p|ASSERT %d\n", ctx->pattern,\r
1408 ctx->ptr, ctx->pattern[1]));\r
1409 state->ptr = ctx->ptr - ctx->pattern[1];\r
1410 if (state->ptr < state->beginning)\r
1411 RETURN_FAILURE;\r
1412 DO_JUMP(JUMP_ASSERT, jump_assert, ctx->pattern+2);\r
1413 RETURN_ON_FAILURE(ret);\r
1414 ctx->pattern += ctx->pattern[0];\r
1415 break;\r
1416\r
1417 case SRE_OP_ASSERT_NOT:\r
1418 /* assert not subpattern */\r
1419 /* <ASSERT_NOT> <skip> <back> <pattern> */\r
1420 TRACE(("|%p|%p|ASSERT_NOT %d\n", ctx->pattern,\r
1421 ctx->ptr, ctx->pattern[1]));\r
1422 state->ptr = ctx->ptr - ctx->pattern[1];\r
1423 if (state->ptr >= state->beginning) {\r
1424 DO_JUMP(JUMP_ASSERT_NOT, jump_assert_not, ctx->pattern+2);\r
1425 if (ret) {\r
1426 RETURN_ON_ERROR(ret);\r
1427 RETURN_FAILURE;\r
1428 }\r
1429 }\r
1430 ctx->pattern += ctx->pattern[0];\r
1431 break;\r
1432\r
1433 case SRE_OP_FAILURE:\r
1434 /* immediate failure */\r
1435 TRACE(("|%p|%p|FAILURE\n", ctx->pattern, ctx->ptr));\r
1436 RETURN_FAILURE;\r
1437\r
1438 default:\r
1439 TRACE(("|%p|%p|UNKNOWN %d\n", ctx->pattern, ctx->ptr,\r
1440 ctx->pattern[-1]));\r
1441 RETURN_ERROR(SRE_ERROR_ILLEGAL);\r
1442 }\r
1443 }\r
1444\r
1445exit:\r
1446 ctx_pos = ctx->last_ctx_pos;\r
1447 jump = ctx->jump;\r
1448 DATA_POP_DISCARD(ctx);\r
1449 if (ctx_pos == -1)\r
1450 return ret;\r
1451 DATA_LOOKUP_AT(SRE_MATCH_CONTEXT, ctx, ctx_pos);\r
1452\r
1453 switch (jump) {\r
1454 case JUMP_MAX_UNTIL_2:\r
1455 TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", ctx->pattern, ctx->ptr));\r
1456 goto jump_max_until_2;\r
1457 case JUMP_MAX_UNTIL_3:\r
1458 TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", ctx->pattern, ctx->ptr));\r
1459 goto jump_max_until_3;\r
1460 case JUMP_MIN_UNTIL_2:\r
1461 TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", ctx->pattern, ctx->ptr));\r
1462 goto jump_min_until_2;\r
1463 case JUMP_MIN_UNTIL_3:\r
1464 TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", ctx->pattern, ctx->ptr));\r
1465 goto jump_min_until_3;\r
1466 case JUMP_BRANCH:\r
1467 TRACE(("|%p|%p|JUMP_BRANCH\n", ctx->pattern, ctx->ptr));\r
1468 goto jump_branch;\r
1469 case JUMP_MAX_UNTIL_1:\r
1470 TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", ctx->pattern, ctx->ptr));\r
1471 goto jump_max_until_1;\r
1472 case JUMP_MIN_UNTIL_1:\r
1473 TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", ctx->pattern, ctx->ptr));\r
1474 goto jump_min_until_1;\r
1475 case JUMP_REPEAT:\r
1476 TRACE(("|%p|%p|JUMP_REPEAT\n", ctx->pattern, ctx->ptr));\r
1477 goto jump_repeat;\r
1478 case JUMP_REPEAT_ONE_1:\r
1479 TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", ctx->pattern, ctx->ptr));\r
1480 goto jump_repeat_one_1;\r
1481 case JUMP_REPEAT_ONE_2:\r
1482 TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", ctx->pattern, ctx->ptr));\r
1483 goto jump_repeat_one_2;\r
1484 case JUMP_MIN_REPEAT_ONE:\r
1485 TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", ctx->pattern, ctx->ptr));\r
1486 goto jump_min_repeat_one;\r
1487 case JUMP_ASSERT:\r
1488 TRACE(("|%p|%p|JUMP_ASSERT\n", ctx->pattern, ctx->ptr));\r
1489 goto jump_assert;\r
1490 case JUMP_ASSERT_NOT:\r
1491 TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", ctx->pattern, ctx->ptr));\r
1492 goto jump_assert_not;\r
1493 case JUMP_NONE:\r
1494 TRACE(("|%p|%p|RETURN %d\n", ctx->pattern, ctx->ptr, ret));\r
1495 break;\r
1496 }\r
1497\r
1498 return ret; /* should never get here */\r
1499}\r
1500\r
1501LOCAL(Py_ssize_t)\r
1502SRE_SEARCH(SRE_STATE* state, SRE_CODE* pattern)\r
1503{\r
1504 SRE_CHAR* ptr = (SRE_CHAR *)state->start;\r
1505 SRE_CHAR* end = (SRE_CHAR *)state->end;\r
1506 Py_ssize_t status = 0;\r
1507 Py_ssize_t prefix_len = 0;\r
1508 Py_ssize_t prefix_skip = 0;\r
1509 SRE_CODE* prefix = NULL;\r
1510 SRE_CODE* charset = NULL;\r
1511 SRE_CODE* overlap = NULL;\r
1512 int flags = 0;\r
1513\r
1514 if (pattern[0] == SRE_OP_INFO) {\r
1515 /* optimization info block */\r
1516 /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info> */\r
1517\r
1518 flags = pattern[2];\r
1519\r
1520 if (pattern[3] > 1) {\r
1521 /* adjust end point (but make sure we leave at least one\r
1522 character in there, so literal search will work) */\r
1523 end -= pattern[3]-1;\r
1524 if (end <= ptr)\r
1525 end = ptr+1;\r
1526 }\r
1527\r
1528 if (flags & SRE_INFO_PREFIX) {\r
1529 /* pattern starts with a known prefix */\r
1530 /* <length> <skip> <prefix data> <overlap data> */\r
1531 prefix_len = pattern[5];\r
1532 prefix_skip = pattern[6];\r
1533 prefix = pattern + 7;\r
1534 overlap = prefix + prefix_len - 1;\r
1535 } else if (flags & SRE_INFO_CHARSET)\r
1536 /* pattern starts with a character from a known set */\r
1537 /* <charset> */\r
1538 charset = pattern + 5;\r
1539\r
1540 pattern += 1 + pattern[1];\r
1541 }\r
1542\r
1543 TRACE(("prefix = %p %d %d\n", prefix, prefix_len, prefix_skip));\r
1544 TRACE(("charset = %p\n", charset));\r
1545\r
1546#if defined(USE_FAST_SEARCH)\r
1547 if (prefix_len > 1) {\r
1548 /* pattern starts with a known prefix. use the overlap\r
1549 table to skip forward as fast as we possibly can */\r
1550 Py_ssize_t i = 0;\r
1551 end = (SRE_CHAR *)state->end;\r
1552 while (ptr < end) {\r
1553 for (;;) {\r
1554 if ((SRE_CODE) ptr[0] != prefix[i]) {\r
1555 if (!i)\r
1556 break;\r
1557 else\r
1558 i = overlap[i];\r
1559 } else {\r
1560 if (++i == prefix_len) {\r
1561 /* found a potential match */\r
1562 TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));\r
1563 state->start = ptr + 1 - prefix_len;\r
1564 state->ptr = ptr + 1 - prefix_len + prefix_skip;\r
1565 if (flags & SRE_INFO_LITERAL)\r
1566 return 1; /* we got all of it */\r
1567 status = SRE_MATCH(state, pattern + 2*prefix_skip);\r
1568 if (status != 0)\r
1569 return status;\r
1570 /* close but no cigar -- try again */\r
1571 i = overlap[i];\r
1572 }\r
1573 break;\r
1574 }\r
1575 }\r
1576 ptr++;\r
1577 }\r
1578 return 0;\r
1579 }\r
1580#endif\r
1581\r
1582 if (pattern[0] == SRE_OP_LITERAL) {\r
1583 /* pattern starts with a literal character. this is used\r
1584 for short prefixes, and if fast search is disabled */\r
1585 SRE_CODE chr = pattern[1];\r
1586 end = (SRE_CHAR *)state->end;\r
1587 for (;;) {\r
1588 while (ptr < end && (SRE_CODE) ptr[0] != chr)\r
1589 ptr++;\r
1590 if (ptr >= end)\r
1591 return 0;\r
1592 TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));\r
1593 state->start = ptr;\r
1594 state->ptr = ++ptr;\r
1595 if (flags & SRE_INFO_LITERAL)\r
1596 return 1; /* we got all of it */\r
1597 status = SRE_MATCH(state, pattern + 2);\r
1598 if (status != 0)\r
1599 break;\r
1600 }\r
1601 } else if (charset) {\r
1602 /* pattern starts with a character from a known set */\r
1603 end = (SRE_CHAR *)state->end;\r
1604 for (;;) {\r
1605 while (ptr < end && !SRE_CHARSET(charset, ptr[0]))\r
1606 ptr++;\r
1607 if (ptr >= end)\r
1608 return 0;\r
1609 TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));\r
1610 state->start = ptr;\r
1611 state->ptr = ptr;\r
1612 status = SRE_MATCH(state, pattern);\r
1613 if (status != 0)\r
1614 break;\r
1615 ptr++;\r
1616 }\r
1617 } else\r
1618 /* general case */\r
1619 while (ptr <= end) {\r
1620 TRACE(("|%p|%p|SEARCH\n", pattern, ptr));\r
1621 state->start = state->ptr = ptr++;\r
1622 status = SRE_MATCH(state, pattern);\r
1623 if (status != 0)\r
1624 break;\r
1625 }\r
1626\r
1627 return status;\r
1628}\r
1629\r
1630LOCAL(int)\r
1631SRE_LITERAL_TEMPLATE(SRE_CHAR* ptr, Py_ssize_t len)\r
1632{\r
1633 /* check if given string is a literal template (i.e. no escapes) */\r
1634 while (len-- > 0)\r
1635 if (*ptr++ == '\\')\r
1636 return 0;\r
1637 return 1;\r
1638}\r
1639\r
1640#if !defined(SRE_RECURSIVE)\r
1641\r
1642/* -------------------------------------------------------------------- */\r
1643/* factories and destructors */\r
1644\r
1645/* see sre.h for object declarations */\r
1646static PyObject*pattern_new_match(PatternObject*, SRE_STATE*, int);\r
1647static PyObject*pattern_scanner(PatternObject*, PyObject*);\r
1648\r
1649static PyObject *\r
1650sre_codesize(PyObject* self, PyObject *unused)\r
1651{\r
1652 return Py_BuildValue("l", sizeof(SRE_CODE));\r
1653}\r
1654\r
1655static PyObject *\r
1656sre_getlower(PyObject* self, PyObject* args)\r
1657{\r
1658 int character, flags;\r
1659 if (!PyArg_ParseTuple(args, "ii", &character, &flags))\r
1660 return NULL;\r
1661 if (flags & SRE_FLAG_LOCALE)\r
1662 return Py_BuildValue("i", sre_lower_locale(character));\r
1663 if (flags & SRE_FLAG_UNICODE)\r
1664#if defined(HAVE_UNICODE)\r
1665 return Py_BuildValue("i", sre_lower_unicode(character));\r
1666#else\r
1667 return Py_BuildValue("i", sre_lower_locale(character));\r
1668#endif\r
1669 return Py_BuildValue("i", sre_lower(character));\r
1670}\r
1671\r
1672LOCAL(void)\r
1673state_reset(SRE_STATE* state)\r
1674{\r
1675 /* FIXME: dynamic! */\r
1676 /*memset(state->mark, 0, sizeof(*state->mark) * SRE_MARK_SIZE);*/\r
1677\r
1678 state->lastmark = -1;\r
1679 state->lastindex = -1;\r
1680\r
1681 state->repeat = NULL;\r
1682\r
1683 data_stack_dealloc(state);\r
1684}\r
1685\r
1686static void*\r
1687getstring(PyObject* string, Py_ssize_t* p_length, int* p_charsize)\r
1688{\r
1689 /* given a python object, return a data pointer, a length (in\r
1690 characters), and a character size. return NULL if the object\r
1691 is not a string (or not compatible) */\r
1692\r
1693 PyBufferProcs *buffer;\r
1694 Py_ssize_t size, bytes;\r
1695 int charsize;\r
1696 void* ptr;\r
1697\r
1698#if defined(HAVE_UNICODE)\r
1699 if (PyUnicode_Check(string)) {\r
1700 /* unicode strings doesn't always support the buffer interface */\r
1701 ptr = (void*) PyUnicode_AS_DATA(string);\r
1702 /* bytes = PyUnicode_GET_DATA_SIZE(string); */\r
1703 size = PyUnicode_GET_SIZE(string);\r
1704 charsize = sizeof(Py_UNICODE);\r
1705\r
1706 } else {\r
1707#endif\r
1708\r
1709 /* get pointer to string buffer */\r
1710 buffer = Py_TYPE(string)->tp_as_buffer;\r
1711 if (!buffer || !buffer->bf_getreadbuffer || !buffer->bf_getsegcount ||\r
1712 buffer->bf_getsegcount(string, NULL) != 1) {\r
1713 PyErr_SetString(PyExc_TypeError, "expected string or buffer");\r
1714 return NULL;\r
1715 }\r
1716\r
1717 /* determine buffer size */\r
1718 bytes = buffer->bf_getreadbuffer(string, 0, &ptr);\r
1719 if (bytes < 0) {\r
1720 PyErr_SetString(PyExc_TypeError, "buffer has negative size");\r
1721 return NULL;\r
1722 }\r
1723\r
1724 /* determine character size */\r
1725#if PY_VERSION_HEX >= 0x01060000\r
1726 size = PyObject_Size(string);\r
1727#else\r
1728 size = PyObject_Length(string);\r
1729#endif\r
1730\r
1731 if (PyString_Check(string) || bytes == size)\r
1732 charsize = 1;\r
1733#if defined(HAVE_UNICODE)\r
1734 else if (bytes == (Py_ssize_t) (size * sizeof(Py_UNICODE)))\r
1735 charsize = sizeof(Py_UNICODE);\r
1736#endif\r
1737 else {\r
1738 PyErr_SetString(PyExc_TypeError, "buffer size mismatch");\r
1739 return NULL;\r
1740 }\r
1741\r
1742#if defined(HAVE_UNICODE)\r
1743 }\r
1744#endif\r
1745\r
1746 *p_length = size;\r
1747 *p_charsize = charsize;\r
1748\r
1749 return ptr;\r
1750}\r
1751\r
1752LOCAL(PyObject*)\r
1753state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string,\r
1754 Py_ssize_t start, Py_ssize_t end)\r
1755{\r
1756 /* prepare state object */\r
1757\r
1758 Py_ssize_t length;\r
1759 int charsize;\r
1760 void* ptr;\r
1761\r
1762 memset(state, 0, sizeof(SRE_STATE));\r
1763\r
1764 state->lastmark = -1;\r
1765 state->lastindex = -1;\r
1766\r
1767 ptr = getstring(string, &length, &charsize);\r
1768 if (!ptr)\r
1769 return NULL;\r
1770\r
1771 /* adjust boundaries */\r
1772 if (start < 0)\r
1773 start = 0;\r
1774 else if (start > length)\r
1775 start = length;\r
1776\r
1777 if (end < 0)\r
1778 end = 0;\r
1779 else if (end > length)\r
1780 end = length;\r
1781\r
1782 state->charsize = charsize;\r
1783\r
1784 state->beginning = ptr;\r
1785\r
1786 state->start = (void*) ((char*) ptr + start * state->charsize);\r
1787 state->end = (void*) ((char*) ptr + end * state->charsize);\r
1788\r
1789 Py_INCREF(string);\r
1790 state->string = string;\r
1791 state->pos = start;\r
1792 state->endpos = end;\r
1793\r
1794 if (pattern->flags & SRE_FLAG_LOCALE)\r
1795 state->lower = sre_lower_locale;\r
1796 else if (pattern->flags & SRE_FLAG_UNICODE)\r
1797#if defined(HAVE_UNICODE)\r
1798 state->lower = sre_lower_unicode;\r
1799#else\r
1800 state->lower = sre_lower_locale;\r
1801#endif\r
1802 else\r
1803 state->lower = sre_lower;\r
1804\r
1805 return string;\r
1806}\r
1807\r
1808LOCAL(void)\r
1809state_fini(SRE_STATE* state)\r
1810{\r
1811 Py_XDECREF(state->string);\r
1812 data_stack_dealloc(state);\r
1813}\r
1814\r
1815/* calculate offset from start of string */\r
1816#define STATE_OFFSET(state, member)\\r
1817 (((char*)(member) - (char*)(state)->beginning) / (state)->charsize)\r
1818\r
1819LOCAL(PyObject*)\r
1820state_getslice(SRE_STATE* state, Py_ssize_t index, PyObject* string, int empty)\r
1821{\r
1822 Py_ssize_t i, j;\r
1823\r
1824 index = (index - 1) * 2;\r
1825\r
1826 if (string == Py_None || index >= state->lastmark || !state->mark[index] || !state->mark[index+1]) {\r
1827 if (empty)\r
1828 /* want empty string */\r
1829 i = j = 0;\r
1830 else {\r
1831 Py_INCREF(Py_None);\r
1832 return Py_None;\r
1833 }\r
1834 } else {\r
1835 i = STATE_OFFSET(state, state->mark[index]);\r
1836 j = STATE_OFFSET(state, state->mark[index+1]);\r
1837 }\r
1838\r
1839 return PySequence_GetSlice(string, i, j);\r
1840}\r
1841\r
1842static void\r
1843pattern_error(int status)\r
1844{\r
1845 switch (status) {\r
1846 case SRE_ERROR_RECURSION_LIMIT:\r
1847 PyErr_SetString(\r
1848 PyExc_RuntimeError,\r
1849 "maximum recursion limit exceeded"\r
1850 );\r
1851 break;\r
1852 case SRE_ERROR_MEMORY:\r
1853 PyErr_NoMemory();\r
1854 break;\r
1855 case SRE_ERROR_INTERRUPTED:\r
1856 /* An exception has already been raised, so let it fly */\r
1857 break;\r
1858 default:\r
1859 /* other error codes indicate compiler/engine bugs */\r
1860 PyErr_SetString(\r
1861 PyExc_RuntimeError,\r
1862 "internal error in regular expression engine"\r
1863 );\r
1864 }\r
1865}\r
1866\r
1867static void\r
1868pattern_dealloc(PatternObject* self)\r
1869{\r
1870 if (self->weakreflist != NULL)\r
1871 PyObject_ClearWeakRefs((PyObject *) self);\r
1872 Py_XDECREF(self->pattern);\r
1873 Py_XDECREF(self->groupindex);\r
1874 Py_XDECREF(self->indexgroup);\r
1875 PyObject_DEL(self);\r
1876}\r
1877\r
1878static PyObject*\r
1879pattern_match(PatternObject* self, PyObject* args, PyObject* kw)\r
1880{\r
1881 SRE_STATE state;\r
1882 int status;\r
1883\r
1884 PyObject* string;\r
1885 Py_ssize_t start = 0;\r
1886 Py_ssize_t end = PY_SSIZE_T_MAX;\r
1887 static char* kwlist[] = { "pattern", "pos", "endpos", NULL };\r
1888 if (!PyArg_ParseTupleAndKeywords(args, kw, "O|nn:match", kwlist,\r
1889 &string, &start, &end))\r
1890 return NULL;\r
1891\r
1892 string = state_init(&state, self, string, start, end);\r
1893 if (!string)\r
1894 return NULL;\r
1895\r
1896 state.ptr = state.start;\r
1897\r
1898 TRACE(("|%p|%p|MATCH\n", PatternObject_GetCode(self), state.ptr));\r
1899\r
1900 if (state.charsize == 1) {\r
1901 status = sre_match(&state, PatternObject_GetCode(self));\r
1902 } else {\r
1903#if defined(HAVE_UNICODE)\r
1904 status = sre_umatch(&state, PatternObject_GetCode(self));\r
1905#endif\r
1906 }\r
1907\r
1908 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));\r
1909 if (PyErr_Occurred())\r
1910 return NULL;\r
1911\r
1912 state_fini(&state);\r
1913\r
1914 return pattern_new_match(self, &state, status);\r
1915}\r
1916\r
1917static PyObject*\r
1918pattern_search(PatternObject* self, PyObject* args, PyObject* kw)\r
1919{\r
1920 SRE_STATE state;\r
1921 int status;\r
1922\r
1923 PyObject* string;\r
1924 Py_ssize_t start = 0;\r
1925 Py_ssize_t end = PY_SSIZE_T_MAX;\r
1926 static char* kwlist[] = { "pattern", "pos", "endpos", NULL };\r
1927 if (!PyArg_ParseTupleAndKeywords(args, kw, "O|nn:search", kwlist,\r
1928 &string, &start, &end))\r
1929 return NULL;\r
1930\r
1931 string = state_init(&state, self, string, start, end);\r
1932 if (!string)\r
1933 return NULL;\r
1934\r
1935 TRACE(("|%p|%p|SEARCH\n", PatternObject_GetCode(self), state.ptr));\r
1936\r
1937 if (state.charsize == 1) {\r
1938 status = sre_search(&state, PatternObject_GetCode(self));\r
1939 } else {\r
1940#if defined(HAVE_UNICODE)\r
1941 status = sre_usearch(&state, PatternObject_GetCode(self));\r
1942#endif\r
1943 }\r
1944\r
1945 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));\r
1946\r
1947 state_fini(&state);\r
1948\r
1949 if (PyErr_Occurred())\r
1950 return NULL;\r
1951\r
1952 return pattern_new_match(self, &state, status);\r
1953}\r
1954\r
1955static PyObject*\r
1956call(char* module, char* function, PyObject* args)\r
1957{\r
1958 PyObject* name;\r
1959 PyObject* mod;\r
1960 PyObject* func;\r
1961 PyObject* result;\r
1962\r
1963 if (!args)\r
1964 return NULL;\r
1965 name = PyString_FromString(module);\r
1966 if (!name)\r
1967 return NULL;\r
1968 mod = PyImport_Import(name);\r
1969 Py_DECREF(name);\r
1970 if (!mod)\r
1971 return NULL;\r
1972 func = PyObject_GetAttrString(mod, function);\r
1973 Py_DECREF(mod);\r
1974 if (!func)\r
1975 return NULL;\r
1976 result = PyObject_CallObject(func, args);\r
1977 Py_DECREF(func);\r
1978 Py_DECREF(args);\r
1979 return result;\r
1980}\r
1981\r
1982#ifdef USE_BUILTIN_COPY\r
1983static int\r
1984deepcopy(PyObject** object, PyObject* memo)\r
1985{\r
1986 PyObject* copy;\r
1987\r
1988 copy = call(\r
1989 "copy", "deepcopy",\r
1990 PyTuple_Pack(2, *object, memo)\r
1991 );\r
1992 if (!copy)\r
1993 return 0;\r
1994\r
1995 Py_DECREF(*object);\r
1996 *object = copy;\r
1997\r
1998 return 1; /* success */\r
1999}\r
2000#endif\r
2001\r
2002static PyObject*\r
2003join_list(PyObject* list, PyObject* string)\r
2004{\r
2005 /* join list elements */\r
2006\r
2007 PyObject* joiner;\r
2008#if PY_VERSION_HEX >= 0x01060000\r
2009 PyObject* function;\r
2010 PyObject* args;\r
2011#endif\r
2012 PyObject* result;\r
2013\r
2014 joiner = PySequence_GetSlice(string, 0, 0);\r
2015 if (!joiner)\r
2016 return NULL;\r
2017\r
2018 if (PyList_GET_SIZE(list) == 0) {\r
2019 Py_DECREF(list);\r
2020 return joiner;\r
2021 }\r
2022\r
2023#if PY_VERSION_HEX >= 0x01060000\r
2024 function = PyObject_GetAttrString(joiner, "join");\r
2025 if (!function) {\r
2026 Py_DECREF(joiner);\r
2027 return NULL;\r
2028 }\r
2029 args = PyTuple_New(1);\r
2030 if (!args) {\r
2031 Py_DECREF(function);\r
2032 Py_DECREF(joiner);\r
2033 return NULL;\r
2034 }\r
2035 PyTuple_SET_ITEM(args, 0, list);\r
2036 result = PyObject_CallObject(function, args);\r
2037 Py_DECREF(args); /* also removes list */\r
2038 Py_DECREF(function);\r
2039#else\r
2040 result = call(\r
2041 "string", "join",\r
2042 PyTuple_Pack(2, list, joiner)\r
2043 );\r
2044#endif\r
2045 Py_DECREF(joiner);\r
2046\r
2047 return result;\r
2048}\r
2049\r
2050static PyObject*\r
2051pattern_findall(PatternObject* self, PyObject* args, PyObject* kw)\r
2052{\r
2053 SRE_STATE state;\r
2054 PyObject* list;\r
2055 int status;\r
2056 Py_ssize_t i, b, e;\r
2057\r
2058 PyObject* string;\r
2059 Py_ssize_t start = 0;\r
2060 Py_ssize_t end = PY_SSIZE_T_MAX;\r
2061 static char* kwlist[] = { "source", "pos", "endpos", NULL };\r
2062 if (!PyArg_ParseTupleAndKeywords(args, kw, "O|nn:findall", kwlist,\r
2063 &string, &start, &end))\r
2064 return NULL;\r
2065\r
2066 string = state_init(&state, self, string, start, end);\r
2067 if (!string)\r
2068 return NULL;\r
2069\r
2070 list = PyList_New(0);\r
2071 if (!list) {\r
2072 state_fini(&state);\r
2073 return NULL;\r
2074 }\r
2075\r
2076 while (state.start <= state.end) {\r
2077\r
2078 PyObject* item;\r
2079\r
2080 state_reset(&state);\r
2081\r
2082 state.ptr = state.start;\r
2083\r
2084 if (state.charsize == 1) {\r
2085 status = sre_search(&state, PatternObject_GetCode(self));\r
2086 } else {\r
2087#if defined(HAVE_UNICODE)\r
2088 status = sre_usearch(&state, PatternObject_GetCode(self));\r
2089#endif\r
2090 }\r
2091\r
2092 if (PyErr_Occurred())\r
2093 goto error;\r
2094\r
2095 if (status <= 0) {\r
2096 if (status == 0)\r
2097 break;\r
2098 pattern_error(status);\r
2099 goto error;\r
2100 }\r
2101\r
2102 /* don't bother to build a match object */\r
2103 switch (self->groups) {\r
2104 case 0:\r
2105 b = STATE_OFFSET(&state, state.start);\r
2106 e = STATE_OFFSET(&state, state.ptr);\r
2107 item = PySequence_GetSlice(string, b, e);\r
2108 if (!item)\r
2109 goto error;\r
2110 break;\r
2111 case 1:\r
2112 item = state_getslice(&state, 1, string, 1);\r
2113 if (!item)\r
2114 goto error;\r
2115 break;\r
2116 default:\r
2117 item = PyTuple_New(self->groups);\r
2118 if (!item)\r
2119 goto error;\r
2120 for (i = 0; i < self->groups; i++) {\r
2121 PyObject* o = state_getslice(&state, i+1, string, 1);\r
2122 if (!o) {\r
2123 Py_DECREF(item);\r
2124 goto error;\r
2125 }\r
2126 PyTuple_SET_ITEM(item, i, o);\r
2127 }\r
2128 break;\r
2129 }\r
2130\r
2131 status = PyList_Append(list, item);\r
2132 Py_DECREF(item);\r
2133 if (status < 0)\r
2134 goto error;\r
2135\r
2136 if (state.ptr == state.start)\r
2137 state.start = (void*) ((char*) state.ptr + state.charsize);\r
2138 else\r
2139 state.start = state.ptr;\r
2140\r
2141 }\r
2142\r
2143 state_fini(&state);\r
2144 return list;\r
2145\r
2146error:\r
2147 Py_DECREF(list);\r
2148 state_fini(&state);\r
2149 return NULL;\r
2150\r
2151}\r
2152\r
2153#if PY_VERSION_HEX >= 0x02020000\r
2154static PyObject*\r
2155pattern_finditer(PatternObject* pattern, PyObject* args)\r
2156{\r
2157 PyObject* scanner;\r
2158 PyObject* search;\r
2159 PyObject* iterator;\r
2160\r
2161 scanner = pattern_scanner(pattern, args);\r
2162 if (!scanner)\r
2163 return NULL;\r
2164\r
2165 search = PyObject_GetAttrString(scanner, "search");\r
2166 Py_DECREF(scanner);\r
2167 if (!search)\r
2168 return NULL;\r
2169\r
2170 iterator = PyCallIter_New(search, Py_None);\r
2171 Py_DECREF(search);\r
2172\r
2173 return iterator;\r
2174}\r
2175#endif\r
2176\r
2177static PyObject*\r
2178pattern_split(PatternObject* self, PyObject* args, PyObject* kw)\r
2179{\r
2180 SRE_STATE state;\r
2181 PyObject* list;\r
2182 PyObject* item;\r
2183 int status;\r
2184 Py_ssize_t n;\r
2185 Py_ssize_t i;\r
2186 void* last;\r
2187\r
2188 PyObject* string;\r
2189 Py_ssize_t maxsplit = 0;\r
2190 static char* kwlist[] = { "source", "maxsplit", NULL };\r
2191 if (!PyArg_ParseTupleAndKeywords(args, kw, "O|n:split", kwlist,\r
2192 &string, &maxsplit))\r
2193 return NULL;\r
2194\r
2195 string = state_init(&state, self, string, 0, PY_SSIZE_T_MAX);\r
2196 if (!string)\r
2197 return NULL;\r
2198\r
2199 list = PyList_New(0);\r
2200 if (!list) {\r
2201 state_fini(&state);\r
2202 return NULL;\r
2203 }\r
2204\r
2205 n = 0;\r
2206 last = state.start;\r
2207\r
2208 while (!maxsplit || n < maxsplit) {\r
2209\r
2210 state_reset(&state);\r
2211\r
2212 state.ptr = state.start;\r
2213\r
2214 if (state.charsize == 1) {\r
2215 status = sre_search(&state, PatternObject_GetCode(self));\r
2216 } else {\r
2217#if defined(HAVE_UNICODE)\r
2218 status = sre_usearch(&state, PatternObject_GetCode(self));\r
2219#endif\r
2220 }\r
2221\r
2222 if (PyErr_Occurred())\r
2223 goto error;\r
2224\r
2225 if (status <= 0) {\r
2226 if (status == 0)\r
2227 break;\r
2228 pattern_error(status);\r
2229 goto error;\r
2230 }\r
2231\r
2232 if (state.start == state.ptr) {\r
2233 if (last == state.end)\r
2234 break;\r
2235 /* skip one character */\r
2236 state.start = (void*) ((char*) state.ptr + state.charsize);\r
2237 continue;\r
2238 }\r
2239\r
2240 /* get segment before this match */\r
2241 item = PySequence_GetSlice(\r
2242 string, STATE_OFFSET(&state, last),\r
2243 STATE_OFFSET(&state, state.start)\r
2244 );\r
2245 if (!item)\r
2246 goto error;\r
2247 status = PyList_Append(list, item);\r
2248 Py_DECREF(item);\r
2249 if (status < 0)\r
2250 goto error;\r
2251\r
2252 /* add groups (if any) */\r
2253 for (i = 0; i < self->groups; i++) {\r
2254 item = state_getslice(&state, i+1, string, 0);\r
2255 if (!item)\r
2256 goto error;\r
2257 status = PyList_Append(list, item);\r
2258 Py_DECREF(item);\r
2259 if (status < 0)\r
2260 goto error;\r
2261 }\r
2262\r
2263 n = n + 1;\r
2264\r
2265 last = state.start = state.ptr;\r
2266\r
2267 }\r
2268\r
2269 /* get segment following last match (even if empty) */\r
2270 item = PySequence_GetSlice(\r
2271 string, STATE_OFFSET(&state, last), state.endpos\r
2272 );\r
2273 if (!item)\r
2274 goto error;\r
2275 status = PyList_Append(list, item);\r
2276 Py_DECREF(item);\r
2277 if (status < 0)\r
2278 goto error;\r
2279\r
2280 state_fini(&state);\r
2281 return list;\r
2282\r
2283error:\r
2284 Py_DECREF(list);\r
2285 state_fini(&state);\r
2286 return NULL;\r
2287\r
2288}\r
2289\r
2290static PyObject*\r
2291pattern_subx(PatternObject* self, PyObject* ptemplate, PyObject* string,\r
2292 Py_ssize_t count, Py_ssize_t subn)\r
2293{\r
2294 SRE_STATE state;\r
2295 PyObject* list;\r
2296 PyObject* item;\r
2297 PyObject* filter;\r
2298 PyObject* args;\r
2299 PyObject* match;\r
2300 void* ptr;\r
2301 int status;\r
2302 Py_ssize_t n;\r
2303 Py_ssize_t i, b, e;\r
2304 int bint;\r
2305 int filter_is_callable;\r
2306\r
2307 if (PyCallable_Check(ptemplate)) {\r
2308 /* sub/subn takes either a function or a template */\r
2309 filter = ptemplate;\r
2310 Py_INCREF(filter);\r
2311 filter_is_callable = 1;\r
2312 } else {\r
2313 /* if not callable, check if it's a literal string */\r
2314 int literal;\r
2315 ptr = getstring(ptemplate, &n, &bint);\r
2316 b = bint;\r
2317 if (ptr) {\r
2318 if (b == 1) {\r
2319 literal = sre_literal_template((unsigned char *)ptr, n);\r
2320 } else {\r
2321#if defined(HAVE_UNICODE)\r
2322 literal = sre_uliteral_template((Py_UNICODE *)ptr, n);\r
2323#endif\r
2324 }\r
2325 } else {\r
2326 PyErr_Clear();\r
2327 literal = 0;\r
2328 }\r
2329 if (literal) {\r
2330 filter = ptemplate;\r
2331 Py_INCREF(filter);\r
2332 filter_is_callable = 0;\r
2333 } else {\r
2334 /* not a literal; hand it over to the template compiler */\r
2335 filter = call(\r
2336 SRE_PY_MODULE, "_subx",\r
2337 PyTuple_Pack(2, self, ptemplate)\r
2338 );\r
2339 if (!filter)\r
2340 return NULL;\r
2341 filter_is_callable = PyCallable_Check(filter);\r
2342 }\r
2343 }\r
2344\r
2345 string = state_init(&state, self, string, 0, PY_SSIZE_T_MAX);\r
2346 if (!string) {\r
2347 Py_DECREF(filter);\r
2348 return NULL;\r
2349 }\r
2350\r
2351 list = PyList_New(0);\r
2352 if (!list) {\r
2353 Py_DECREF(filter);\r
2354 state_fini(&state);\r
2355 return NULL;\r
2356 }\r
2357\r
2358 n = i = 0;\r
2359\r
2360 while (!count || n < count) {\r
2361\r
2362 state_reset(&state);\r
2363\r
2364 state.ptr = state.start;\r
2365\r
2366 if (state.charsize == 1) {\r
2367 status = sre_search(&state, PatternObject_GetCode(self));\r
2368 } else {\r
2369#if defined(HAVE_UNICODE)\r
2370 status = sre_usearch(&state, PatternObject_GetCode(self));\r
2371#endif\r
2372 }\r
2373\r
2374 if (PyErr_Occurred())\r
2375 goto error;\r
2376\r
2377 if (status <= 0) {\r
2378 if (status == 0)\r
2379 break;\r
2380 pattern_error(status);\r
2381 goto error;\r
2382 }\r
2383\r
2384 b = STATE_OFFSET(&state, state.start);\r
2385 e = STATE_OFFSET(&state, state.ptr);\r
2386\r
2387 if (i < b) {\r
2388 /* get segment before this match */\r
2389 item = PySequence_GetSlice(string, i, b);\r
2390 if (!item)\r
2391 goto error;\r
2392 status = PyList_Append(list, item);\r
2393 Py_DECREF(item);\r
2394 if (status < 0)\r
2395 goto error;\r
2396\r
2397 } else if (i == b && i == e && n > 0)\r
2398 /* ignore empty match on latest position */\r
2399 goto next;\r
2400\r
2401 if (filter_is_callable) {\r
2402 /* pass match object through filter */\r
2403 match = pattern_new_match(self, &state, 1);\r
2404 if (!match)\r
2405 goto error;\r
2406 args = PyTuple_Pack(1, match);\r
2407 if (!args) {\r
2408 Py_DECREF(match);\r
2409 goto error;\r
2410 }\r
2411 item = PyObject_CallObject(filter, args);\r
2412 Py_DECREF(args);\r
2413 Py_DECREF(match);\r
2414 if (!item)\r
2415 goto error;\r
2416 } else {\r
2417 /* filter is literal string */\r
2418 item = filter;\r
2419 Py_INCREF(item);\r
2420 }\r
2421\r
2422 /* add to list */\r
2423 if (item != Py_None) {\r
2424 status = PyList_Append(list, item);\r
2425 Py_DECREF(item);\r
2426 if (status < 0)\r
2427 goto error;\r
2428 }\r
2429\r
2430 i = e;\r
2431 n = n + 1;\r
2432\r
2433next:\r
2434 /* move on */\r
2435 if (state.ptr == state.start)\r
2436 state.start = (void*) ((char*) state.ptr + state.charsize);\r
2437 else\r
2438 state.start = state.ptr;\r
2439\r
2440 }\r
2441\r
2442 /* get segment following last match */\r
2443 if (i < state.endpos) {\r
2444 item = PySequence_GetSlice(string, i, state.endpos);\r
2445 if (!item)\r
2446 goto error;\r
2447 status = PyList_Append(list, item);\r
2448 Py_DECREF(item);\r
2449 if (status < 0)\r
2450 goto error;\r
2451 }\r
2452\r
2453 state_fini(&state);\r
2454\r
2455 Py_DECREF(filter);\r
2456\r
2457 /* convert list to single string (also removes list) */\r
2458 item = join_list(list, string);\r
2459\r
2460 if (!item)\r
2461 return NULL;\r
2462\r
2463 if (subn)\r
2464 return Py_BuildValue("Ni", item, n);\r
2465\r
2466 return item;\r
2467\r
2468error:\r
2469 Py_DECREF(list);\r
2470 state_fini(&state);\r
2471 Py_DECREF(filter);\r
2472 return NULL;\r
2473\r
2474}\r
2475\r
2476static PyObject*\r
2477pattern_sub(PatternObject* self, PyObject* args, PyObject* kw)\r
2478{\r
2479 PyObject* ptemplate;\r
2480 PyObject* string;\r
2481 Py_ssize_t count = 0;\r
2482 static char* kwlist[] = { "repl", "string", "count", NULL };\r
2483 if (!PyArg_ParseTupleAndKeywords(args, kw, "OO|n:sub", kwlist,\r
2484 &ptemplate, &string, &count))\r
2485 return NULL;\r
2486\r
2487 return pattern_subx(self, ptemplate, string, count, 0);\r
2488}\r
2489\r
2490static PyObject*\r
2491pattern_subn(PatternObject* self, PyObject* args, PyObject* kw)\r
2492{\r
2493 PyObject* ptemplate;\r
2494 PyObject* string;\r
2495 Py_ssize_t count = 0;\r
2496 static char* kwlist[] = { "repl", "string", "count", NULL };\r
2497 if (!PyArg_ParseTupleAndKeywords(args, kw, "OO|n:subn", kwlist,\r
2498 &ptemplate, &string, &count))\r
2499 return NULL;\r
2500\r
2501 return pattern_subx(self, ptemplate, string, count, 1);\r
2502}\r
2503\r
2504static PyObject*\r
2505pattern_copy(PatternObject* self, PyObject *unused)\r
2506{\r
2507#ifdef USE_BUILTIN_COPY\r
2508 PatternObject* copy;\r
2509 int offset;\r
2510\r
2511 copy = PyObject_NEW_VAR(PatternObject, &Pattern_Type, self->codesize);\r
2512 if (!copy)\r
2513 return NULL;\r
2514\r
2515 offset = offsetof(PatternObject, groups);\r
2516\r
2517 Py_XINCREF(self->groupindex);\r
2518 Py_XINCREF(self->indexgroup);\r
2519 Py_XINCREF(self->pattern);\r
2520\r
2521 memcpy((char*) copy + offset, (char*) self + offset,\r
2522 sizeof(PatternObject) + self->codesize * sizeof(SRE_CODE) - offset);\r
2523 copy->weakreflist = NULL;\r
2524\r
2525 return (PyObject*) copy;\r
2526#else\r
2527 PyErr_SetString(PyExc_TypeError, "cannot copy this pattern object");\r
2528 return NULL;\r
2529#endif\r
2530}\r
2531\r
2532static PyObject*\r
2533pattern_deepcopy(PatternObject* self, PyObject* memo)\r
2534{\r
2535#ifdef USE_BUILTIN_COPY\r
2536 PatternObject* copy;\r
2537\r
2538 copy = (PatternObject*) pattern_copy(self);\r
2539 if (!copy)\r
2540 return NULL;\r
2541\r
2542 if (!deepcopy(&copy->groupindex, memo) ||\r
2543 !deepcopy(&copy->indexgroup, memo) ||\r
2544 !deepcopy(&copy->pattern, memo)) {\r
2545 Py_DECREF(copy);\r
2546 return NULL;\r
2547 }\r
2548\r
2549#else\r
2550 PyErr_SetString(PyExc_TypeError, "cannot deepcopy this pattern object");\r
2551 return NULL;\r
2552#endif\r
2553}\r
2554\r
2555PyDoc_STRVAR(pattern_match_doc,\r
2556"match(string[, pos[, endpos]]) --> match object or None.\n\\r
2557 Matches zero or more characters at the beginning of the string");\r
2558\r
2559PyDoc_STRVAR(pattern_search_doc,\r
2560"search(string[, pos[, endpos]]) --> match object or None.\n\\r
2561 Scan through string looking for a match, and return a corresponding\n\\r
2562 MatchObject instance. Return None if no position in the string matches.");\r
2563\r
2564PyDoc_STRVAR(pattern_split_doc,\r
2565"split(string[, maxsplit = 0]) --> list.\n\\r
2566 Split string by the occurrences of pattern.");\r
2567\r
2568PyDoc_STRVAR(pattern_findall_doc,\r
2569"findall(string[, pos[, endpos]]) --> list.\n\\r
2570 Return a list of all non-overlapping matches of pattern in string.");\r
2571\r
2572PyDoc_STRVAR(pattern_finditer_doc,\r
2573"finditer(string[, pos[, endpos]]) --> iterator.\n\\r
2574 Return an iterator over all non-overlapping matches for the \n\\r
2575 RE pattern in string. For each match, the iterator returns a\n\\r
2576 match object.");\r
2577\r
2578PyDoc_STRVAR(pattern_sub_doc,\r
2579"sub(repl, string[, count = 0]) --> newstring\n\\r
2580 Return the string obtained by replacing the leftmost non-overlapping\n\\r
2581 occurrences of pattern in string by the replacement repl.");\r
2582\r
2583PyDoc_STRVAR(pattern_subn_doc,\r
2584"subn(repl, string[, count = 0]) --> (newstring, number of subs)\n\\r
2585 Return the tuple (new_string, number_of_subs_made) found by replacing\n\\r
2586 the leftmost non-overlapping occurrences of pattern with the\n\\r
2587 replacement repl.");\r
2588\r
2589PyDoc_STRVAR(pattern_doc, "Compiled regular expression objects");\r
2590\r
2591static PyMethodDef pattern_methods[] = {\r
2592 {"match", (PyCFunction) pattern_match, METH_VARARGS|METH_KEYWORDS,\r
2593 pattern_match_doc},\r
2594 {"search", (PyCFunction) pattern_search, METH_VARARGS|METH_KEYWORDS,\r
2595 pattern_search_doc},\r
2596 {"sub", (PyCFunction) pattern_sub, METH_VARARGS|METH_KEYWORDS,\r
2597 pattern_sub_doc},\r
2598 {"subn", (PyCFunction) pattern_subn, METH_VARARGS|METH_KEYWORDS,\r
2599 pattern_subn_doc},\r
2600 {"split", (PyCFunction) pattern_split, METH_VARARGS|METH_KEYWORDS,\r
2601 pattern_split_doc},\r
2602 {"findall", (PyCFunction) pattern_findall, METH_VARARGS|METH_KEYWORDS,\r
2603 pattern_findall_doc},\r
2604#if PY_VERSION_HEX >= 0x02020000\r
2605 {"finditer", (PyCFunction) pattern_finditer, METH_VARARGS,\r
2606 pattern_finditer_doc},\r
2607#endif\r
2608 {"scanner", (PyCFunction) pattern_scanner, METH_VARARGS},\r
2609 {"__copy__", (PyCFunction) pattern_copy, METH_NOARGS},\r
2610 {"__deepcopy__", (PyCFunction) pattern_deepcopy, METH_O},\r
2611 {NULL, NULL}\r
2612};\r
2613\r
2614#define PAT_OFF(x) offsetof(PatternObject, x)\r
2615static PyMemberDef pattern_members[] = {\r
2616 {"pattern", T_OBJECT, PAT_OFF(pattern), READONLY},\r
2617 {"flags", T_INT, PAT_OFF(flags), READONLY},\r
2618 {"groups", T_PYSSIZET, PAT_OFF(groups), READONLY},\r
2619 {"groupindex", T_OBJECT, PAT_OFF(groupindex), READONLY},\r
2620 {NULL} /* Sentinel */\r
2621};\r
2622\r
2623statichere PyTypeObject Pattern_Type = {\r
2624 PyObject_HEAD_INIT(NULL)\r
2625 0, "_" SRE_MODULE ".SRE_Pattern",\r
2626 sizeof(PatternObject), sizeof(SRE_CODE),\r
2627 (destructor)pattern_dealloc, /*tp_dealloc*/\r
2628 0, /* tp_print */\r
2629 0, /* tp_getattrn */\r
2630 0, /* tp_setattr */\r
2631 0, /* tp_compare */\r
2632 0, /* tp_repr */\r
2633 0, /* tp_as_number */\r
2634 0, /* tp_as_sequence */\r
2635 0, /* tp_as_mapping */\r
2636 0, /* tp_hash */\r
2637 0, /* tp_call */\r
2638 0, /* tp_str */\r
2639 0, /* tp_getattro */\r
2640 0, /* tp_setattro */\r
2641 0, /* tp_as_buffer */\r
2642 Py_TPFLAGS_DEFAULT, /* tp_flags */\r
2643 pattern_doc, /* tp_doc */\r
2644 0, /* tp_traverse */\r
2645 0, /* tp_clear */\r
2646 0, /* tp_richcompare */\r
2647 offsetof(PatternObject, weakreflist), /* tp_weaklistoffset */\r
2648 0, /* tp_iter */\r
2649 0, /* tp_iternext */\r
2650 pattern_methods, /* tp_methods */\r
2651 pattern_members, /* tp_members */\r
2652};\r
2653\r
2654static int _validate(PatternObject *self); /* Forward */\r
2655\r
2656static PyObject *\r
2657_compile(PyObject* self_, PyObject* args)\r
2658{\r
2659 /* "compile" pattern descriptor to pattern object */\r
2660\r
2661 PatternObject* self;\r
2662 Py_ssize_t i, n;\r
2663\r
2664 PyObject* pattern;\r
2665 int flags = 0;\r
2666 PyObject* code;\r
2667 Py_ssize_t groups = 0;\r
2668 PyObject* groupindex = NULL;\r
2669 PyObject* indexgroup = NULL;\r
2670 if (!PyArg_ParseTuple(args, "OiO!|nOO", &pattern, &flags,\r
2671 &PyList_Type, &code, &groups,\r
2672 &groupindex, &indexgroup))\r
2673 return NULL;\r
2674\r
2675 n = PyList_GET_SIZE(code);\r
2676 /* coverity[ampersand_in_size] */\r
2677 self = PyObject_NEW_VAR(PatternObject, &Pattern_Type, n);\r
2678 if (!self)\r
2679 return NULL;\r
2680 self->weakreflist = NULL;\r
2681 self->pattern = NULL;\r
2682 self->groupindex = NULL;\r
2683 self->indexgroup = NULL;\r
2684\r
2685 self->codesize = n;\r
2686\r
2687 for (i = 0; i < n; i++) {\r
2688 PyObject *o = PyList_GET_ITEM(code, i);\r
2689 unsigned long value = PyInt_Check(o) ? (unsigned long)PyInt_AsLong(o)\r
2690 : PyLong_AsUnsignedLong(o);\r
2691 self->code[i] = (SRE_CODE) value;\r
2692 if ((unsigned long) self->code[i] != value) {\r
2693 PyErr_SetString(PyExc_OverflowError,\r
2694 "regular expression code size limit exceeded");\r
2695 break;\r
2696 }\r
2697 }\r
2698\r
2699 if (PyErr_Occurred()) {\r
2700 Py_DECREF(self);\r
2701 return NULL;\r
2702 }\r
2703\r
2704 Py_INCREF(pattern);\r
2705 self->pattern = pattern;\r
2706\r
2707 self->flags = flags;\r
2708\r
2709 self->groups = groups;\r
2710\r
2711 Py_XINCREF(groupindex);\r
2712 self->groupindex = groupindex;\r
2713\r
2714 Py_XINCREF(indexgroup);\r
2715 self->indexgroup = indexgroup;\r
2716\r
2717 self->weakreflist = NULL;\r
2718\r
2719 if (!_validate(self)) {\r
2720 Py_DECREF(self);\r
2721 return NULL;\r
2722 }\r
2723\r
2724 return (PyObject*) self;\r
2725}\r
2726\r
2727/* -------------------------------------------------------------------- */\r
2728/* Code validation */\r
2729\r
2730/* To learn more about this code, have a look at the _compile() function in\r
2731 Lib/sre_compile.py. The validation functions below checks the code array\r
2732 for conformance with the code patterns generated there.\r
2733\r
2734 The nice thing about the generated code is that it is position-independent:\r
2735 all jumps are relative jumps forward. Also, jumps don't cross each other:\r
2736 the target of a later jump is always earlier than the target of an earlier\r
2737 jump. IOW, this is okay:\r
2738\r
2739 J---------J-------T--------T\r
2740 \ \_____/ /\r
2741 \______________________/\r
2742\r
2743 but this is not:\r
2744\r
2745 J---------J-------T--------T\r
2746 \_________\_____/ /\r
2747 \____________/\r
2748\r
2749 It also helps that SRE_CODE is always an unsigned type, either 2 bytes or 4\r
2750 bytes wide (the latter if Python is compiled for "wide" unicode support).\r
2751*/\r
2752\r
2753/* Defining this one enables tracing of the validator */\r
2754#undef VVERBOSE\r
2755\r
2756/* Trace macro for the validator */\r
2757#if defined(VVERBOSE)\r
2758#define VTRACE(v) printf v\r
2759#else\r
2760#define VTRACE(v)\r
2761#endif\r
2762\r
2763/* Report failure */\r
2764#define FAIL do { VTRACE(("FAIL: %d\n", __LINE__)); return 0; } while (0)\r
2765\r
2766/* Extract opcode, argument, or skip count from code array */\r
2767#define GET_OP \\r
2768 do { \\r
2769 VTRACE(("%p: ", code)); \\r
2770 if (code >= end) FAIL; \\r
2771 op = *code++; \\r
2772 VTRACE(("%lu (op)\n", (unsigned long)op)); \\r
2773 } while (0)\r
2774#define GET_ARG \\r
2775 do { \\r
2776 VTRACE(("%p= ", code)); \\r
2777 if (code >= end) FAIL; \\r
2778 arg = *code++; \\r
2779 VTRACE(("%lu (arg)\n", (unsigned long)arg)); \\r
2780 } while (0)\r
2781#define GET_SKIP_ADJ(adj) \\r
2782 do { \\r
2783 VTRACE(("%p= ", code)); \\r
2784 if (code >= end) FAIL; \\r
2785 skip = *code; \\r
2786 VTRACE(("%lu (skip to %p)\n", \\r
2787 (unsigned long)skip, code+skip)); \\r
2788 if (code+skip-adj < code || code+skip-adj > end)\\r
2789 FAIL; \\r
2790 code++; \\r
2791 } while (0)\r
2792#define GET_SKIP GET_SKIP_ADJ(0)\r
2793\r
2794static int\r
2795_validate_charset(SRE_CODE *code, SRE_CODE *end)\r
2796{\r
2797 /* Some variables are manipulated by the macros above */\r
2798 SRE_CODE op;\r
2799 SRE_CODE arg;\r
2800 SRE_CODE offset;\r
2801 int i;\r
2802\r
2803 while (code < end) {\r
2804 GET_OP;\r
2805 switch (op) {\r
2806\r
2807 case SRE_OP_NEGATE:\r
2808 break;\r
2809\r
2810 case SRE_OP_LITERAL:\r
2811 GET_ARG;\r
2812 break;\r
2813\r
2814 case SRE_OP_RANGE:\r
2815 GET_ARG;\r
2816 GET_ARG;\r
2817 break;\r
2818\r
2819 case SRE_OP_CHARSET:\r
2820 offset = 32/sizeof(SRE_CODE); /* 32-byte bitmap */\r
2821 if (code+offset < code || code+offset > end)\r
2822 FAIL;\r
2823 code += offset;\r
2824 break;\r
2825\r
2826 case SRE_OP_BIGCHARSET:\r
2827 GET_ARG; /* Number of blocks */\r
2828 offset = 256/sizeof(SRE_CODE); /* 256-byte table */\r
2829 if (code+offset < code || code+offset > end)\r
2830 FAIL;\r
2831 /* Make sure that each byte points to a valid block */\r
2832 for (i = 0; i < 256; i++) {\r
2833 if (((unsigned char *)code)[i] >= arg)\r
2834 FAIL;\r
2835 }\r
2836 code += offset;\r
2837 offset = arg * 32/sizeof(SRE_CODE); /* 32-byte bitmap times arg */\r
2838 if (code+offset < code || code+offset > end)\r
2839 FAIL;\r
2840 code += offset;\r
2841 break;\r
2842\r
2843 case SRE_OP_CATEGORY:\r
2844 GET_ARG;\r
2845 switch (arg) {\r
2846 case SRE_CATEGORY_DIGIT:\r
2847 case SRE_CATEGORY_NOT_DIGIT:\r
2848 case SRE_CATEGORY_SPACE:\r
2849 case SRE_CATEGORY_NOT_SPACE:\r
2850 case SRE_CATEGORY_WORD:\r
2851 case SRE_CATEGORY_NOT_WORD:\r
2852 case SRE_CATEGORY_LINEBREAK:\r
2853 case SRE_CATEGORY_NOT_LINEBREAK:\r
2854 case SRE_CATEGORY_LOC_WORD:\r
2855 case SRE_CATEGORY_LOC_NOT_WORD:\r
2856 case SRE_CATEGORY_UNI_DIGIT:\r
2857 case SRE_CATEGORY_UNI_NOT_DIGIT:\r
2858 case SRE_CATEGORY_UNI_SPACE:\r
2859 case SRE_CATEGORY_UNI_NOT_SPACE:\r
2860 case SRE_CATEGORY_UNI_WORD:\r
2861 case SRE_CATEGORY_UNI_NOT_WORD:\r
2862 case SRE_CATEGORY_UNI_LINEBREAK:\r
2863 case SRE_CATEGORY_UNI_NOT_LINEBREAK:\r
2864 break;\r
2865 default:\r
2866 FAIL;\r
2867 }\r
2868 break;\r
2869\r
2870 default:\r
2871 FAIL;\r
2872\r
2873 }\r
2874 }\r
2875\r
2876 return 1;\r
2877}\r
2878\r
2879static int\r
2880_validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)\r
2881{\r
2882 /* Some variables are manipulated by the macros above */\r
2883 SRE_CODE op;\r
2884 SRE_CODE arg;\r
2885 SRE_CODE skip;\r
2886\r
2887 VTRACE(("code=%p, end=%p\n", code, end));\r
2888\r
2889 if (code > end)\r
2890 FAIL;\r
2891\r
2892 while (code < end) {\r
2893 GET_OP;\r
2894 switch (op) {\r
2895\r
2896 case SRE_OP_MARK:\r
2897 /* We don't check whether marks are properly nested; the\r
2898 sre_match() code is robust even if they don't, and the worst\r
2899 you can get is nonsensical match results. */\r
2900 GET_ARG;\r
2901 if (arg > 2*groups+1) {\r
2902 VTRACE(("arg=%d, groups=%d\n", (int)arg, (int)groups));\r
2903 FAIL;\r
2904 }\r
2905 break;\r
2906\r
2907 case SRE_OP_LITERAL:\r
2908 case SRE_OP_NOT_LITERAL:\r
2909 case SRE_OP_LITERAL_IGNORE:\r
2910 case SRE_OP_NOT_LITERAL_IGNORE:\r
2911 GET_ARG;\r
2912 /* The arg is just a character, nothing to check */\r
2913 break;\r
2914\r
2915 case SRE_OP_SUCCESS:\r
2916 case SRE_OP_FAILURE:\r
2917 /* Nothing to check; these normally end the matching process */\r
2918 break;\r
2919\r
2920 case SRE_OP_AT:\r
2921 GET_ARG;\r
2922 switch (arg) {\r
2923 case SRE_AT_BEGINNING:\r
2924 case SRE_AT_BEGINNING_STRING:\r
2925 case SRE_AT_BEGINNING_LINE:\r
2926 case SRE_AT_END:\r
2927 case SRE_AT_END_LINE:\r
2928 case SRE_AT_END_STRING:\r
2929 case SRE_AT_BOUNDARY:\r
2930 case SRE_AT_NON_BOUNDARY:\r
2931 case SRE_AT_LOC_BOUNDARY:\r
2932 case SRE_AT_LOC_NON_BOUNDARY:\r
2933 case SRE_AT_UNI_BOUNDARY:\r
2934 case SRE_AT_UNI_NON_BOUNDARY:\r
2935 break;\r
2936 default:\r
2937 FAIL;\r
2938 }\r
2939 break;\r
2940\r
2941 case SRE_OP_ANY:\r
2942 case SRE_OP_ANY_ALL:\r
2943 /* These have no operands */\r
2944 break;\r
2945\r
2946 case SRE_OP_IN:\r
2947 case SRE_OP_IN_IGNORE:\r
2948 GET_SKIP;\r
2949 /* Stop 1 before the end; we check the FAILURE below */\r
2950 if (!_validate_charset(code, code+skip-2))\r
2951 FAIL;\r
2952 if (code[skip-2] != SRE_OP_FAILURE)\r
2953 FAIL;\r
2954 code += skip-1;\r
2955 break;\r
2956\r
2957 case SRE_OP_INFO:\r
2958 {\r
2959 /* A minimal info field is\r
2960 <INFO> <1=skip> <2=flags> <3=min> <4=max>;\r
2961 If SRE_INFO_PREFIX or SRE_INFO_CHARSET is in the flags,\r
2962 more follows. */\r
2963 SRE_CODE flags, i;\r
2964 SRE_CODE *newcode;\r
2965 GET_SKIP;\r
2966 newcode = code+skip-1;\r
2967 GET_ARG; flags = arg;\r
2968 GET_ARG; /* min */\r
2969 GET_ARG; /* max */\r
2970 /* Check that only valid flags are present */\r
2971 if ((flags & ~(SRE_INFO_PREFIX |\r
2972 SRE_INFO_LITERAL |\r
2973 SRE_INFO_CHARSET)) != 0)\r
2974 FAIL;\r
2975 /* PREFIX and CHARSET are mutually exclusive */\r
2976 if ((flags & SRE_INFO_PREFIX) &&\r
2977 (flags & SRE_INFO_CHARSET))\r
2978 FAIL;\r
2979 /* LITERAL implies PREFIX */\r
2980 if ((flags & SRE_INFO_LITERAL) &&\r
2981 !(flags & SRE_INFO_PREFIX))\r
2982 FAIL;\r
2983 /* Validate the prefix */\r
2984 if (flags & SRE_INFO_PREFIX) {\r
2985 SRE_CODE prefix_len;\r
2986 GET_ARG; prefix_len = arg;\r
2987 GET_ARG; /* prefix skip */\r
2988 /* Here comes the prefix string */\r
2989 if (code+prefix_len < code || code+prefix_len > newcode)\r
2990 FAIL;\r
2991 code += prefix_len;\r
2992 /* And here comes the overlap table */\r
2993 if (code+prefix_len < code || code+prefix_len > newcode)\r
2994 FAIL;\r
2995 /* Each overlap value should be < prefix_len */\r
2996 for (i = 0; i < prefix_len; i++) {\r
2997 if (code[i] >= prefix_len)\r
2998 FAIL;\r
2999 }\r
3000 code += prefix_len;\r
3001 }\r
3002 /* Validate the charset */\r
3003 if (flags & SRE_INFO_CHARSET) {\r
3004 if (!_validate_charset(code, newcode-1))\r
3005 FAIL;\r
3006 if (newcode[-1] != SRE_OP_FAILURE)\r
3007 FAIL;\r
3008 code = newcode;\r
3009 }\r
3010 else if (code != newcode) {\r
3011 VTRACE(("code=%p, newcode=%p\n", code, newcode));\r
3012 FAIL;\r
3013 }\r
3014 }\r
3015 break;\r
3016\r
3017 case SRE_OP_BRANCH:\r
3018 {\r
3019 SRE_CODE *target = NULL;\r
3020 for (;;) {\r
3021 GET_SKIP;\r
3022 if (skip == 0)\r
3023 break;\r
3024 /* Stop 2 before the end; we check the JUMP below */\r
3025 if (!_validate_inner(code, code+skip-3, groups))\r
3026 FAIL;\r
3027 code += skip-3;\r
3028 /* Check that it ends with a JUMP, and that each JUMP\r
3029 has the same target */\r
3030 GET_OP;\r
3031 if (op != SRE_OP_JUMP)\r
3032 FAIL;\r
3033 GET_SKIP;\r
3034 if (target == NULL)\r
3035 target = code+skip-1;\r
3036 else if (code+skip-1 != target)\r
3037 FAIL;\r
3038 }\r
3039 }\r
3040 break;\r
3041\r
3042 case SRE_OP_REPEAT_ONE:\r
3043 case SRE_OP_MIN_REPEAT_ONE:\r
3044 {\r
3045 SRE_CODE min, max;\r
3046 GET_SKIP;\r
3047 GET_ARG; min = arg;\r
3048 GET_ARG; max = arg;\r
3049 if (min > max)\r
3050 FAIL;\r
3051#ifdef Py_UNICODE_WIDE\r
3052 if (max > 65535)\r
3053 FAIL;\r
3054#endif\r
3055 if (!_validate_inner(code, code+skip-4, groups))\r
3056 FAIL;\r
3057 code += skip-4;\r
3058 GET_OP;\r
3059 if (op != SRE_OP_SUCCESS)\r
3060 FAIL;\r
3061 }\r
3062 break;\r
3063\r
3064 case SRE_OP_REPEAT:\r
3065 {\r
3066 SRE_CODE min, max;\r
3067 GET_SKIP;\r
3068 GET_ARG; min = arg;\r
3069 GET_ARG; max = arg;\r
3070 if (min > max)\r
3071 FAIL;\r
3072#ifdef Py_UNICODE_WIDE\r
3073 if (max > 65535)\r
3074 FAIL;\r
3075#endif\r
3076 if (!_validate_inner(code, code+skip-3, groups))\r
3077 FAIL;\r
3078 code += skip-3;\r
3079 GET_OP;\r
3080 if (op != SRE_OP_MAX_UNTIL && op != SRE_OP_MIN_UNTIL)\r
3081 FAIL;\r
3082 }\r
3083 break;\r
3084\r
3085 case SRE_OP_GROUPREF:\r
3086 case SRE_OP_GROUPREF_IGNORE:\r
3087 GET_ARG;\r
3088 if (arg >= groups)\r
3089 FAIL;\r
3090 break;\r
3091\r
3092 case SRE_OP_GROUPREF_EXISTS:\r
3093 /* The regex syntax for this is: '(?(group)then|else)', where\r
3094 'group' is either an integer group number or a group name,\r
3095 'then' and 'else' are sub-regexes, and 'else' is optional. */\r
3096 GET_ARG;\r
3097 if (arg >= groups)\r
3098 FAIL;\r
3099 GET_SKIP_ADJ(1);\r
3100 code--; /* The skip is relative to the first arg! */\r
3101 /* There are two possibilities here: if there is both a 'then'\r
3102 part and an 'else' part, the generated code looks like:\r
3103\r
3104 GROUPREF_EXISTS\r
3105 <group>\r
3106 <skipyes>\r
3107 ...then part...\r
3108 JUMP\r
3109 <skipno>\r
3110 (<skipyes> jumps here)\r
3111 ...else part...\r
3112 (<skipno> jumps here)\r
3113\r
3114 If there is only a 'then' part, it looks like:\r
3115\r
3116 GROUPREF_EXISTS\r
3117 <group>\r
3118 <skip>\r
3119 ...then part...\r
3120 (<skip> jumps here)\r
3121\r
3122 There is no direct way to decide which it is, and we don't want\r
3123 to allow arbitrary jumps anywhere in the code; so we just look\r
3124 for a JUMP opcode preceding our skip target.\r
3125 */\r
3126 if (skip >= 3 && code+skip-3 >= code &&\r
3127 code[skip-3] == SRE_OP_JUMP)\r
3128 {\r
3129 VTRACE(("both then and else parts present\n"));\r
3130 if (!_validate_inner(code+1, code+skip-3, groups))\r
3131 FAIL;\r
3132 code += skip-2; /* Position after JUMP, at <skipno> */\r
3133 GET_SKIP;\r
3134 if (!_validate_inner(code, code+skip-1, groups))\r
3135 FAIL;\r
3136 code += skip-1;\r
3137 }\r
3138 else {\r
3139 VTRACE(("only a then part present\n"));\r
3140 if (!_validate_inner(code+1, code+skip-1, groups))\r
3141 FAIL;\r
3142 code += skip-1;\r
3143 }\r
3144 break;\r
3145\r
3146 case SRE_OP_ASSERT:\r
3147 case SRE_OP_ASSERT_NOT:\r
3148 GET_SKIP;\r
3149 GET_ARG; /* 0 for lookahead, width for lookbehind */\r
3150 code--; /* Back up over arg to simplify math below */\r
3151 if (arg & 0x80000000)\r
3152 FAIL; /* Width too large */\r
3153 /* Stop 1 before the end; we check the SUCCESS below */\r
3154 if (!_validate_inner(code+1, code+skip-2, groups))\r
3155 FAIL;\r
3156 code += skip-2;\r
3157 GET_OP;\r
3158 if (op != SRE_OP_SUCCESS)\r
3159 FAIL;\r
3160 break;\r
3161\r
3162 default:\r
3163 FAIL;\r
3164\r
3165 }\r
3166 }\r
3167\r
3168 VTRACE(("okay\n"));\r
3169 return 1;\r
3170}\r
3171\r
3172static int\r
3173_validate_outer(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)\r
3174{\r
3175 if (groups < 0 || groups > 100 || code >= end || end[-1] != SRE_OP_SUCCESS)\r
3176 FAIL;\r
3177 if (groups == 0) /* fix for simplejson */\r
3178 groups = 100; /* 100 groups should always be safe */\r
3179 return _validate_inner(code, end-1, groups);\r
3180}\r
3181\r
3182static int\r
3183_validate(PatternObject *self)\r
3184{\r
3185 if (!_validate_outer(self->code, self->code+self->codesize, self->groups))\r
3186 {\r
3187 PyErr_SetString(PyExc_RuntimeError, "invalid SRE code");\r
3188 return 0;\r
3189 }\r
3190 else\r
3191 VTRACE(("Success!\n"));\r
3192 return 1;\r
3193}\r
3194\r
3195/* -------------------------------------------------------------------- */\r
3196/* match methods */\r
3197\r
3198static void\r
3199match_dealloc(MatchObject* self)\r
3200{\r
3201 Py_XDECREF(self->regs);\r
3202 Py_XDECREF(self->string);\r
3203 Py_DECREF(self->pattern);\r
3204 PyObject_DEL(self);\r
3205}\r
3206\r
3207static PyObject*\r
3208match_getslice_by_index(MatchObject* self, Py_ssize_t index, PyObject* def)\r
3209{\r
3210 if (index < 0 || index >= self->groups) {\r
3211 /* raise IndexError if we were given a bad group number */\r
3212 PyErr_SetString(\r
3213 PyExc_IndexError,\r
3214 "no such group"\r
3215 );\r
3216 return NULL;\r
3217 }\r
3218\r
3219 index *= 2;\r
3220\r
3221 if (self->string == Py_None || self->mark[index] < 0) {\r
3222 /* return default value if the string or group is undefined */\r
3223 Py_INCREF(def);\r
3224 return def;\r
3225 }\r
3226\r
3227 return PySequence_GetSlice(\r
3228 self->string, self->mark[index], self->mark[index+1]\r
3229 );\r
3230}\r
3231\r
3232static Py_ssize_t\r
3233match_getindex(MatchObject* self, PyObject* index)\r
3234{\r
3235 Py_ssize_t i;\r
3236\r
3237 if (PyInt_Check(index))\r
3238 return PyInt_AsSsize_t(index);\r
3239\r
3240 i = -1;\r
3241\r
3242 if (self->pattern->groupindex) {\r
3243 index = PyObject_GetItem(self->pattern->groupindex, index);\r
3244 if (index) {\r
3245 if (PyInt_Check(index) || PyLong_Check(index))\r
3246 i = PyInt_AsSsize_t(index);\r
3247 Py_DECREF(index);\r
3248 } else\r
3249 PyErr_Clear();\r
3250 }\r
3251\r
3252 return i;\r
3253}\r
3254\r
3255static PyObject*\r
3256match_getslice(MatchObject* self, PyObject* index, PyObject* def)\r
3257{\r
3258 return match_getslice_by_index(self, match_getindex(self, index), def);\r
3259}\r
3260\r
3261static PyObject*\r
3262match_expand(MatchObject* self, PyObject* ptemplate)\r
3263{\r
3264 /* delegate to Python code */\r
3265 return call(\r
3266 SRE_PY_MODULE, "_expand",\r
3267 PyTuple_Pack(3, self->pattern, self, ptemplate)\r
3268 );\r
3269}\r
3270\r
3271static PyObject*\r
3272match_group(MatchObject* self, PyObject* args)\r
3273{\r
3274 PyObject* result;\r
3275 Py_ssize_t i, size;\r
3276\r
3277 size = PyTuple_GET_SIZE(args);\r
3278\r
3279 switch (size) {\r
3280 case 0:\r
3281 result = match_getslice(self, Py_False, Py_None);\r
3282 break;\r
3283 case 1:\r
3284 result = match_getslice(self, PyTuple_GET_ITEM(args, 0), Py_None);\r
3285 break;\r
3286 default:\r
3287 /* fetch multiple items */\r
3288 result = PyTuple_New(size);\r
3289 if (!result)\r
3290 return NULL;\r
3291 for (i = 0; i < size; i++) {\r
3292 PyObject* item = match_getslice(\r
3293 self, PyTuple_GET_ITEM(args, i), Py_None\r
3294 );\r
3295 if (!item) {\r
3296 Py_DECREF(result);\r
3297 return NULL;\r
3298 }\r
3299 PyTuple_SET_ITEM(result, i, item);\r
3300 }\r
3301 break;\r
3302 }\r
3303 return result;\r
3304}\r
3305\r
3306static PyObject*\r
3307match_groups(MatchObject* self, PyObject* args, PyObject* kw)\r
3308{\r
3309 PyObject* result;\r
3310 Py_ssize_t index;\r
3311\r
3312 PyObject* def = Py_None;\r
3313 static char* kwlist[] = { "default", NULL };\r
3314 if (!PyArg_ParseTupleAndKeywords(args, kw, "|O:groups", kwlist, &def))\r
3315 return NULL;\r
3316\r
3317 result = PyTuple_New(self->groups-1);\r
3318 if (!result)\r
3319 return NULL;\r
3320\r
3321 for (index = 1; index < self->groups; index++) {\r
3322 PyObject* item;\r
3323 item = match_getslice_by_index(self, index, def);\r
3324 if (!item) {\r
3325 Py_DECREF(result);\r
3326 return NULL;\r
3327 }\r
3328 PyTuple_SET_ITEM(result, index-1, item);\r
3329 }\r
3330\r
3331 return result;\r
3332}\r
3333\r
3334static PyObject*\r
3335match_groupdict(MatchObject* self, PyObject* args, PyObject* kw)\r
3336{\r
3337 PyObject* result;\r
3338 PyObject* keys;\r
3339 Py_ssize_t index;\r
3340\r
3341 PyObject* def = Py_None;\r
3342 static char* kwlist[] = { "default", NULL };\r
3343 if (!PyArg_ParseTupleAndKeywords(args, kw, "|O:groupdict", kwlist, &def))\r
3344 return NULL;\r
3345\r
3346 result = PyDict_New();\r
3347 if (!result || !self->pattern->groupindex)\r
3348 return result;\r
3349\r
3350 keys = PyMapping_Keys(self->pattern->groupindex);\r
3351 if (!keys)\r
3352 goto failed;\r
3353\r
3354 for (index = 0; index < PyList_GET_SIZE(keys); index++) {\r
3355 int status;\r
3356 PyObject* key;\r
3357 PyObject* value;\r
3358 key = PyList_GET_ITEM(keys, index);\r
3359 if (!key)\r
3360 goto failed;\r
3361 value = match_getslice(self, key, def);\r
3362 if (!value) {\r
3363 Py_DECREF(key);\r
3364 goto failed;\r
3365 }\r
3366 status = PyDict_SetItem(result, key, value);\r
3367 Py_DECREF(value);\r
3368 if (status < 0)\r
3369 goto failed;\r
3370 }\r
3371\r
3372 Py_DECREF(keys);\r
3373\r
3374 return result;\r
3375\r
3376failed:\r
3377 Py_XDECREF(keys);\r
3378 Py_DECREF(result);\r
3379 return NULL;\r
3380}\r
3381\r
3382static PyObject*\r
3383match_start(MatchObject* self, PyObject* args)\r
3384{\r
3385 Py_ssize_t index;\r
3386\r
3387 PyObject* index_ = Py_False; /* zero */\r
3388 if (!PyArg_UnpackTuple(args, "start", 0, 1, &index_))\r
3389 return NULL;\r
3390\r
3391 index = match_getindex(self, index_);\r
3392\r
3393 if (index < 0 || index >= self->groups) {\r
3394 PyErr_SetString(\r
3395 PyExc_IndexError,\r
3396 "no such group"\r
3397 );\r
3398 return NULL;\r
3399 }\r
3400\r
3401 /* mark is -1 if group is undefined */\r
3402 return Py_BuildValue("i", self->mark[index*2]);\r
3403}\r
3404\r
3405static PyObject*\r
3406match_end(MatchObject* self, PyObject* args)\r
3407{\r
3408 Py_ssize_t index;\r
3409\r
3410 PyObject* index_ = Py_False; /* zero */\r
3411 if (!PyArg_UnpackTuple(args, "end", 0, 1, &index_))\r
3412 return NULL;\r
3413\r
3414 index = match_getindex(self, index_);\r
3415\r
3416 if (index < 0 || index >= self->groups) {\r
3417 PyErr_SetString(\r
3418 PyExc_IndexError,\r
3419 "no such group"\r
3420 );\r
3421 return NULL;\r
3422 }\r
3423\r
3424 /* mark is -1 if group is undefined */\r
3425 return Py_BuildValue("i", self->mark[index*2+1]);\r
3426}\r
3427\r
3428LOCAL(PyObject*)\r
3429_pair(Py_ssize_t i1, Py_ssize_t i2)\r
3430{\r
3431 PyObject* pair;\r
3432 PyObject* item;\r
3433\r
3434 pair = PyTuple_New(2);\r
3435 if (!pair)\r
3436 return NULL;\r
3437\r
3438 item = PyInt_FromSsize_t(i1);\r
3439 if (!item)\r
3440 goto error;\r
3441 PyTuple_SET_ITEM(pair, 0, item);\r
3442\r
3443 item = PyInt_FromSsize_t(i2);\r
3444 if (!item)\r
3445 goto error;\r
3446 PyTuple_SET_ITEM(pair, 1, item);\r
3447\r
3448 return pair;\r
3449\r
3450 error:\r
3451 Py_DECREF(pair);\r
3452 return NULL;\r
3453}\r
3454\r
3455static PyObject*\r
3456match_span(MatchObject* self, PyObject* args)\r
3457{\r
3458 Py_ssize_t index;\r
3459\r
3460 PyObject* index_ = Py_False; /* zero */\r
3461 if (!PyArg_UnpackTuple(args, "span", 0, 1, &index_))\r
3462 return NULL;\r
3463\r
3464 index = match_getindex(self, index_);\r
3465\r
3466 if (index < 0 || index >= self->groups) {\r
3467 PyErr_SetString(\r
3468 PyExc_IndexError,\r
3469 "no such group"\r
3470 );\r
3471 return NULL;\r
3472 }\r
3473\r
3474 /* marks are -1 if group is undefined */\r
3475 return _pair(self->mark[index*2], self->mark[index*2+1]);\r
3476}\r
3477\r
3478static PyObject*\r
3479match_regs(MatchObject* self)\r
3480{\r
3481 PyObject* regs;\r
3482 PyObject* item;\r
3483 Py_ssize_t index;\r
3484\r
3485 regs = PyTuple_New(self->groups);\r
3486 if (!regs)\r
3487 return NULL;\r
3488\r
3489 for (index = 0; index < self->groups; index++) {\r
3490 item = _pair(self->mark[index*2], self->mark[index*2+1]);\r
3491 if (!item) {\r
3492 Py_DECREF(regs);\r
3493 return NULL;\r
3494 }\r
3495 PyTuple_SET_ITEM(regs, index, item);\r
3496 }\r
3497\r
3498 Py_INCREF(regs);\r
3499 self->regs = regs;\r
3500\r
3501 return regs;\r
3502}\r
3503\r
3504static PyObject*\r
3505match_copy(MatchObject* self, PyObject *unused)\r
3506{\r
3507#ifdef USE_BUILTIN_COPY\r
3508 MatchObject* copy;\r
3509 Py_ssize_t slots, offset;\r
3510\r
3511 slots = 2 * (self->pattern->groups+1);\r
3512\r
3513 copy = PyObject_NEW_VAR(MatchObject, &Match_Type, slots);\r
3514 if (!copy)\r
3515 return NULL;\r
3516\r
3517 /* this value a constant, but any compiler should be able to\r
3518 figure that out all by itself */\r
3519 offset = offsetof(MatchObject, string);\r
3520\r
3521 Py_XINCREF(self->pattern);\r
3522 Py_XINCREF(self->string);\r
3523 Py_XINCREF(self->regs);\r
3524\r
3525 memcpy((char*) copy + offset, (char*) self + offset,\r
3526 sizeof(MatchObject) + slots * sizeof(Py_ssize_t) - offset);\r
3527\r
3528 return (PyObject*) copy;\r
3529#else\r
3530 PyErr_SetString(PyExc_TypeError, "cannot copy this match object");\r
3531 return NULL;\r
3532#endif\r
3533}\r
3534\r
3535static PyObject*\r
3536match_deepcopy(MatchObject* self, PyObject* memo)\r
3537{\r
3538#ifdef USE_BUILTIN_COPY\r
3539 MatchObject* copy;\r
3540\r
3541 copy = (MatchObject*) match_copy(self);\r
3542 if (!copy)\r
3543 return NULL;\r
3544\r
3545 if (!deepcopy((PyObject**) &copy->pattern, memo) ||\r
3546 !deepcopy(&copy->string, memo) ||\r
3547 !deepcopy(&copy->regs, memo)) {\r
3548 Py_DECREF(copy);\r
3549 return NULL;\r
3550 }\r
3551\r
3552#else\r
3553 PyErr_SetString(PyExc_TypeError, "cannot deepcopy this match object");\r
3554 return NULL;\r
3555#endif\r
3556}\r
3557\r
3558static struct PyMethodDef match_methods[] = {\r
3559 {"group", (PyCFunction) match_group, METH_VARARGS},\r
3560 {"start", (PyCFunction) match_start, METH_VARARGS},\r
3561 {"end", (PyCFunction) match_end, METH_VARARGS},\r
3562 {"span", (PyCFunction) match_span, METH_VARARGS},\r
3563 {"groups", (PyCFunction) match_groups, METH_VARARGS|METH_KEYWORDS},\r
3564 {"groupdict", (PyCFunction) match_groupdict, METH_VARARGS|METH_KEYWORDS},\r
3565 {"expand", (PyCFunction) match_expand, METH_O},\r
3566 {"__copy__", (PyCFunction) match_copy, METH_NOARGS},\r
3567 {"__deepcopy__", (PyCFunction) match_deepcopy, METH_O},\r
3568 {NULL, NULL}\r
3569};\r
3570\r
3571static PyObject *\r
3572match_lastindex_get(MatchObject *self)\r
3573{\r
3574 if (self->lastindex >= 0)\r
3575 return Py_BuildValue("i", self->lastindex);\r
3576 Py_INCREF(Py_None);\r
3577 return Py_None;\r
3578}\r
3579\r
3580static PyObject *\r
3581match_lastgroup_get(MatchObject *self)\r
3582{\r
3583 if (self->pattern->indexgroup && self->lastindex >= 0) {\r
3584 PyObject* result = PySequence_GetItem(\r
3585 self->pattern->indexgroup, self->lastindex\r
3586 );\r
3587 if (result)\r
3588 return result;\r
3589 PyErr_Clear();\r
3590 }\r
3591 Py_INCREF(Py_None);\r
3592 return Py_None;\r
3593}\r
3594\r
3595static PyObject *\r
3596match_regs_get(MatchObject *self)\r
3597{\r
3598 if (self->regs) {\r
3599 Py_INCREF(self->regs);\r
3600 return self->regs;\r
3601 } else\r
3602 return match_regs(self);\r
3603}\r
3604\r
3605static PyGetSetDef match_getset[] = {\r
3606 {"lastindex", (getter)match_lastindex_get, (setter)NULL},\r
3607 {"lastgroup", (getter)match_lastgroup_get, (setter)NULL},\r
3608 {"regs", (getter)match_regs_get, (setter)NULL},\r
3609 {NULL}\r
3610};\r
3611\r
3612#define MATCH_OFF(x) offsetof(MatchObject, x)\r
3613static PyMemberDef match_members[] = {\r
3614 {"string", T_OBJECT, MATCH_OFF(string), READONLY},\r
3615 {"re", T_OBJECT, MATCH_OFF(pattern), READONLY},\r
3616 {"pos", T_PYSSIZET, MATCH_OFF(pos), READONLY},\r
3617 {"endpos", T_PYSSIZET, MATCH_OFF(endpos), READONLY},\r
3618 {NULL}\r
3619};\r
3620\r
3621\r
3622/* FIXME: implement setattr("string", None) as a special case (to\r
3623 detach the associated string, if any */\r
3624\r
3625static PyTypeObject Match_Type = {\r
3626 PyVarObject_HEAD_INIT(NULL, 0)\r
3627 "_" SRE_MODULE ".SRE_Match",\r
3628 sizeof(MatchObject), sizeof(Py_ssize_t),\r
3629 (destructor)match_dealloc, /* tp_dealloc */\r
3630 0, /* tp_print */\r
3631 0, /* tp_getattr */\r
3632 0, /* tp_setattr */\r
3633 0, /* tp_compare */\r
3634 0, /* tp_repr */\r
3635 0, /* tp_as_number */\r
3636 0, /* tp_as_sequence */\r
3637 0, /* tp_as_mapping */\r
3638 0, /* tp_hash */\r
3639 0, /* tp_call */\r
3640 0, /* tp_str */\r
3641 0, /* tp_getattro */\r
3642 0, /* tp_setattro */\r
3643 0, /* tp_as_buffer */\r
3644 Py_TPFLAGS_DEFAULT,\r
3645 0, /* tp_doc */\r
3646 0, /* tp_traverse */\r
3647 0, /* tp_clear */\r
3648 0, /* tp_richcompare */\r
3649 0, /* tp_weaklistoffset */\r
3650 0, /* tp_iter */\r
3651 0, /* tp_iternext */\r
3652 match_methods, /* tp_methods */\r
3653 match_members, /* tp_members */\r
3654 match_getset, /* tp_getset */\r
3655};\r
3656\r
3657static PyObject*\r
3658pattern_new_match(PatternObject* pattern, SRE_STATE* state, int status)\r
3659{\r
3660 /* create match object (from state object) */\r
3661\r
3662 MatchObject* match;\r
3663 Py_ssize_t i, j;\r
3664 char* base;\r
3665 int n;\r
3666\r
3667 if (status > 0) {\r
3668\r
3669 /* create match object (with room for extra group marks) */\r
3670 /* coverity[ampersand_in_size] */\r
3671 match = PyObject_NEW_VAR(MatchObject, &Match_Type,\r
3672 2*(pattern->groups+1));\r
3673 if (!match)\r
3674 return NULL;\r
3675\r
3676 Py_INCREF(pattern);\r
3677 match->pattern = pattern;\r
3678\r
3679 Py_INCREF(state->string);\r
3680 match->string = state->string;\r
3681\r
3682 match->regs = NULL;\r
3683 match->groups = pattern->groups+1;\r
3684\r
3685 /* fill in group slices */\r
3686\r
3687 base = (char*) state->beginning;\r
3688 n = state->charsize;\r
3689\r
3690 match->mark[0] = ((char*) state->start - base) / n;\r
3691 match->mark[1] = ((char*) state->ptr - base) / n;\r
3692\r
3693 for (i = j = 0; i < pattern->groups; i++, j+=2)\r
3694 if (j+1 <= state->lastmark && state->mark[j] && state->mark[j+1]) {\r
3695 match->mark[j+2] = ((char*) state->mark[j] - base) / n;\r
3696 match->mark[j+3] = ((char*) state->mark[j+1] - base) / n;\r
3697 } else\r
3698 match->mark[j+2] = match->mark[j+3] = -1; /* undefined */\r
3699\r
3700 match->pos = state->pos;\r
3701 match->endpos = state->endpos;\r
3702\r
3703 match->lastindex = state->lastindex;\r
3704\r
3705 return (PyObject*) match;\r
3706\r
3707 } else if (status == 0) {\r
3708\r
3709 /* no match */\r
3710 Py_INCREF(Py_None);\r
3711 return Py_None;\r
3712\r
3713 }\r
3714\r
3715 /* internal error */\r
3716 pattern_error(status);\r
3717 return NULL;\r
3718}\r
3719\r
3720\r
3721/* -------------------------------------------------------------------- */\r
3722/* scanner methods (experimental) */\r
3723\r
3724static void\r
3725scanner_dealloc(ScannerObject* self)\r
3726{\r
3727 state_fini(&self->state);\r
3728 Py_XDECREF(self->pattern);\r
3729 PyObject_DEL(self);\r
3730}\r
3731\r
3732static PyObject*\r
3733scanner_match(ScannerObject* self, PyObject *unused)\r
3734{\r
3735 SRE_STATE* state = &self->state;\r
3736 PyObject* match;\r
3737 int status;\r
3738\r
3739 state_reset(state);\r
3740\r
3741 state->ptr = state->start;\r
3742\r
3743 if (state->charsize == 1) {\r
3744 status = sre_match(state, PatternObject_GetCode(self->pattern));\r
3745 } else {\r
3746#if defined(HAVE_UNICODE)\r
3747 status = sre_umatch(state, PatternObject_GetCode(self->pattern));\r
3748#endif\r
3749 }\r
3750 if (PyErr_Occurred())\r
3751 return NULL;\r
3752\r
3753 match = pattern_new_match((PatternObject*) self->pattern,\r
3754 state, status);\r
3755\r
3756 if (status == 0 || state->ptr == state->start)\r
3757 state->start = (void*) ((char*) state->ptr + state->charsize);\r
3758 else\r
3759 state->start = state->ptr;\r
3760\r
3761 return match;\r
3762}\r
3763\r
3764\r
3765static PyObject*\r
3766scanner_search(ScannerObject* self, PyObject *unused)\r
3767{\r
3768 SRE_STATE* state = &self->state;\r
3769 PyObject* match;\r
3770 int status;\r
3771\r
3772 state_reset(state);\r
3773\r
3774 state->ptr = state->start;\r
3775\r
3776 if (state->charsize == 1) {\r
3777 status = sre_search(state, PatternObject_GetCode(self->pattern));\r
3778 } else {\r
3779#if defined(HAVE_UNICODE)\r
3780 status = sre_usearch(state, PatternObject_GetCode(self->pattern));\r
3781#endif\r
3782 }\r
3783 if (PyErr_Occurred())\r
3784 return NULL;\r
3785\r
3786 match = pattern_new_match((PatternObject*) self->pattern,\r
3787 state, status);\r
3788\r
3789 if (status == 0 || state->ptr == state->start)\r
3790 state->start = (void*) ((char*) state->ptr + state->charsize);\r
3791 else\r
3792 state->start = state->ptr;\r
3793\r
3794 return match;\r
3795}\r
3796\r
3797static PyMethodDef scanner_methods[] = {\r
3798 {"match", (PyCFunction) scanner_match, METH_NOARGS},\r
3799 {"search", (PyCFunction) scanner_search, METH_NOARGS},\r
3800 {NULL, NULL}\r
3801};\r
3802\r
3803#define SCAN_OFF(x) offsetof(ScannerObject, x)\r
3804static PyMemberDef scanner_members[] = {\r
3805 {"pattern", T_OBJECT, SCAN_OFF(pattern), READONLY},\r
3806 {NULL} /* Sentinel */\r
3807};\r
3808\r
3809statichere PyTypeObject Scanner_Type = {\r
3810 PyObject_HEAD_INIT(NULL)\r
3811 0, "_" SRE_MODULE ".SRE_Scanner",\r
3812 sizeof(ScannerObject), 0,\r
3813 (destructor)scanner_dealloc, /*tp_dealloc*/\r
3814 0, /* tp_print */\r
3815 0, /* tp_getattr */\r
3816 0, /* tp_setattr */\r
3817 0, /* tp_reserved */\r
3818 0, /* tp_repr */\r
3819 0, /* tp_as_number */\r
3820 0, /* tp_as_sequence */\r
3821 0, /* tp_as_mapping */\r
3822 0, /* tp_hash */\r
3823 0, /* tp_call */\r
3824 0, /* tp_str */\r
3825 0, /* tp_getattro */\r
3826 0, /* tp_setattro */\r
3827 0, /* tp_as_buffer */\r
3828 Py_TPFLAGS_DEFAULT, /* tp_flags */\r
3829 0, /* tp_doc */\r
3830 0, /* tp_traverse */\r
3831 0, /* tp_clear */\r
3832 0, /* tp_richcompare */\r
3833 0, /* tp_weaklistoffset */\r
3834 0, /* tp_iter */\r
3835 0, /* tp_iternext */\r
3836 scanner_methods, /* tp_methods */\r
3837 scanner_members, /* tp_members */\r
3838 0, /* tp_getset */\r
3839};\r
3840\r
3841static PyObject*\r
3842pattern_scanner(PatternObject* pattern, PyObject* args)\r
3843{\r
3844 /* create search state object */\r
3845\r
3846 ScannerObject* self;\r
3847\r
3848 PyObject* string;\r
3849 Py_ssize_t start = 0;\r
3850 Py_ssize_t end = PY_SSIZE_T_MAX;\r
3851 if (!PyArg_ParseTuple(args, "O|nn:scanner", &string, &start, &end))\r
3852 return NULL;\r
3853\r
3854 /* create scanner object */\r
3855 self = PyObject_NEW(ScannerObject, &Scanner_Type);\r
3856 if (!self)\r
3857 return NULL;\r
3858 self->pattern = NULL;\r
3859\r
3860 string = state_init(&self->state, pattern, string, start, end);\r
3861 if (!string) {\r
3862 Py_DECREF(self);\r
3863 return NULL;\r
3864 }\r
3865\r
3866 Py_INCREF(pattern);\r
3867 self->pattern = (PyObject*) pattern;\r
3868\r
3869 return (PyObject*) self;\r
3870}\r
3871\r
3872static PyMethodDef _functions[] = {\r
3873 {"compile", _compile, METH_VARARGS},\r
3874 {"getcodesize", sre_codesize, METH_NOARGS},\r
3875 {"getlower", sre_getlower, METH_VARARGS},\r
3876 {NULL, NULL}\r
3877};\r
3878\r
3879#if PY_VERSION_HEX < 0x02030000\r
3880DL_EXPORT(void) init_sre(void)\r
3881#else\r
3882PyMODINIT_FUNC init_sre(void)\r
3883#endif\r
3884{\r
3885 PyObject* m;\r
3886 PyObject* d;\r
3887 PyObject* x;\r
3888\r
3889 /* Patch object types */\r
3890 if (PyType_Ready(&Pattern_Type) || PyType_Ready(&Match_Type) ||\r
3891 PyType_Ready(&Scanner_Type))\r
3892 return;\r
3893\r
3894 m = Py_InitModule("_" SRE_MODULE, _functions);\r
3895 if (m == NULL)\r
3896 return;\r
3897 d = PyModule_GetDict(m);\r
3898\r
3899 x = PyInt_FromLong(SRE_MAGIC);\r
3900 if (x) {\r
3901 PyDict_SetItemString(d, "MAGIC", x);\r
3902 Py_DECREF(x);\r
3903 }\r
3904\r
3905 x = PyInt_FromLong(sizeof(SRE_CODE));\r
3906 if (x) {\r
3907 PyDict_SetItemString(d, "CODESIZE", x);\r
3908 Py_DECREF(x);\r
3909 }\r
3910\r
3911 x = PyString_FromString(copyright);\r
3912 if (x) {\r
3913 PyDict_SetItemString(d, "copyright", x);\r
3914 Py_DECREF(x);\r
3915 }\r
3916}\r
3917\r
3918#endif /* !defined(SRE_RECURSIVE) */\r
3919\r
3920/* vim:ts=4:sw=4:et\r
3921*/\r