]> git.proxmox.com Git - mirror_edk2.git/blob - AppPkg/Applications/Python/Python-2.7.10/PyMod-2.7.10/Modules/_sre.c
AppPkg/.../Python-2.7.10: AppPkg.dsc, pyconfig.h, PyMod-2.7.10
[mirror_edk2.git] / AppPkg / Applications / Python / Python-2.7.10 / PyMod-2.7.10 / Modules / _sre.c
1 /*
2 * Secret Labs' Regular Expression Engine
3 *
4 * regular expression matching engine
5
6 Copyright (c) 2015, Daryl McDaniel. All rights reserved.<BR>
7 Copyright (c) 2011, Intel Corporation. All rights reserved.<BR>
8 This program and the accompanying materials are licensed and made available under
9 the terms and conditions of the BSD License that accompanies this distribution.
10 The full text of the license may be found at
11 http://opensource.org/licenses/bsd-license.
12
13 THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,
14 WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.
15 *
16 * partial history:
17 * 1999-10-24 fl created (based on existing template matcher code)
18 * 2000-03-06 fl first alpha, sort of
19 * 2000-08-01 fl fixes for 1.6b1
20 * 2000-08-07 fl use PyOS_CheckStack() if available
21 * 2000-09-20 fl added expand method
22 * 2001-03-20 fl lots of fixes for 2.1b2
23 * 2001-04-15 fl export copyright as Python attribute, not global
24 * 2001-04-28 fl added __copy__ methods (work in progress)
25 * 2001-05-14 fl fixes for 1.5.2 compatibility
26 * 2001-07-01 fl added BIGCHARSET support (from Martin von Loewis)
27 * 2001-10-18 fl fixed group reset issue (from Matthew Mueller)
28 * 2001-10-20 fl added split primitive; reenable unicode for 1.6/2.0/2.1
29 * 2001-10-21 fl added sub/subn primitive
30 * 2001-10-24 fl added finditer primitive (for 2.2 only)
31 * 2001-12-07 fl fixed memory leak in sub/subn (Guido van Rossum)
32 * 2002-11-09 fl fixed empty sub/subn return type
33 * 2003-04-18 mvl fully support 4-byte codes
34 * 2003-10-17 gn implemented non recursive scheme
35 *
36 * Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved.
37 *
38 * This version of the SRE library can be redistributed under CNRI's
39 * Python 1.6 license. For any other use, please contact Secret Labs
40 * AB (info@pythonware.com).
41 *
42 * Portions of this engine have been developed in cooperation with
43 * CNRI. Hewlett-Packard provided funding for 1.6 integration and
44 * other compatibility work.
45 */
46
47 /* Get rid of these macros to prevent collisions between EFI and Python in this file. */
48 #undef RETURN_ERROR
49 #undef RETURN_SUCCESS
50
51 #ifndef SRE_RECURSIVE
52
53 static char copyright[] =
54 " SRE 2.2.2 Copyright (c) 1997-2002 by Secret Labs AB ";
55
56 #define PY_SSIZE_T_CLEAN
57
58 #include "Python.h"
59 #include "structmember.h" /* offsetof */
60
61 #include "sre.h"
62
63 #include <ctype.h>
64
65 /* name of this module, minus the leading underscore */
66 #if !defined(SRE_MODULE)
67 #define SRE_MODULE "sre"
68 #endif
69
70 #define SRE_PY_MODULE "re"
71
72 /* defining this one enables tracing */
73 #undef VERBOSE
74
75 #if PY_VERSION_HEX >= 0x01060000
76 #if PY_VERSION_HEX < 0x02020000 || defined(Py_USING_UNICODE)
77 /* defining this enables unicode support (default under 1.6a1 and later) */
78 #define HAVE_UNICODE
79 #endif
80 #endif
81
82 /* -------------------------------------------------------------------- */
83 /* optional features */
84
85 /* enables fast searching */
86 #define USE_FAST_SEARCH
87
88 /* enables aggressive inlining (always on for Visual C) */
89 #undef USE_INLINE
90
91 /* enables copy/deepcopy handling (work in progress) */
92 #undef USE_BUILTIN_COPY
93
94 #if PY_VERSION_HEX < 0x01060000
95 #define PyObject_DEL(op) PyMem_DEL((op))
96 #endif
97
98 /* -------------------------------------------------------------------- */
99
100 #if defined(_MSC_VER)
101 #pragma optimize("gt", on) /* doesn't seem to make much difference... */
102 #pragma warning(disable: 4710) /* who cares if functions are not inlined ;-) */
103 /* fastest possible local call under MSVC */
104 #define LOCAL(type) static __inline type __fastcall
105 #elif defined(USE_INLINE)
106 #define LOCAL(type) static inline type
107 #else
108 #define LOCAL(type) static type
109 #endif
110
111 /* error codes */
112 #define SRE_ERROR_ILLEGAL -1 /* illegal opcode */
113 #define SRE_ERROR_STATE -2 /* illegal state */
114 #define SRE_ERROR_RECURSION_LIMIT -3 /* runaway recursion */
115 #define SRE_ERROR_MEMORY -9 /* out of memory */
116 #define SRE_ERROR_INTERRUPTED -10 /* signal handler raised exception */
117
118 #if defined(VERBOSE)
119 #define TRACE(v) printf v
120 #else
121 #define TRACE(v)
122 #endif
123
124 /* -------------------------------------------------------------------- */
125 /* search engine state */
126
127 /* default character predicates (run sre_chars.py to regenerate tables) */
128
129 #define SRE_DIGIT_MASK 1
130 #define SRE_SPACE_MASK 2
131 #define SRE_LINEBREAK_MASK 4
132 #define SRE_ALNUM_MASK 8
133 #define SRE_WORD_MASK 16
134
135 /* FIXME: this assumes ASCII. create tables in init_sre() instead */
136
137 static char sre_char_info[128] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 6, 2,
138 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0,
139 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25, 25, 25, 25, 25, 25, 25, 25,
140 25, 25, 0, 0, 0, 0, 0, 0, 0, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
141 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0,
142 0, 0, 16, 0, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
143 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0, 0, 0, 0 };
144
145 static char sre_char_lower[128] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
146 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,
147 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43,
148 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60,
149 61, 62, 63, 64, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107,
150 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121,
151 122, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105,
152 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119,
153 120, 121, 122, 123, 124, 125, 126, 127 };
154
155 #define SRE_IS_DIGIT(ch)\
156 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_DIGIT_MASK) : 0)
157 #define SRE_IS_SPACE(ch)\
158 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_SPACE_MASK) : 0)
159 #define SRE_IS_LINEBREAK(ch)\
160 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_LINEBREAK_MASK) : 0)
161 #define SRE_IS_ALNUM(ch)\
162 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_ALNUM_MASK) : 0)
163 #define SRE_IS_WORD(ch)\
164 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_WORD_MASK) : 0)
165
166 static unsigned int sre_lower(unsigned int ch)
167 {
168 return ((ch) < 128 ? (unsigned int)sre_char_lower[ch] : ch);
169 }
170
171 /* locale-specific character predicates */
172 /* !(c & ~N) == (c < N+1) for any unsigned c, this avoids
173 * warnings when c's type supports only numbers < N+1 */
174 #define SRE_LOC_IS_DIGIT(ch) (!((ch) & ~255) ? isdigit((ch)) : 0)
175 #define SRE_LOC_IS_SPACE(ch) (!((ch) & ~255) ? isspace((ch)) : 0)
176 #define SRE_LOC_IS_LINEBREAK(ch) ((ch) == '\n')
177 #define SRE_LOC_IS_ALNUM(ch) (!((ch) & ~255) ? isalnum((ch)) : 0)
178 #define SRE_LOC_IS_WORD(ch) (SRE_LOC_IS_ALNUM((ch)) || (ch) == '_')
179
180 static unsigned int sre_lower_locale(unsigned int ch)
181 {
182 return ((ch) < 256 ? (unsigned int)tolower((ch)) : ch);
183 }
184
185 /* unicode-specific character predicates */
186
187 #if defined(HAVE_UNICODE)
188
189 #define SRE_UNI_IS_DIGIT(ch) Py_UNICODE_ISDECIMAL((Py_UNICODE)(ch))
190 #define SRE_UNI_IS_SPACE(ch) Py_UNICODE_ISSPACE((Py_UNICODE)(ch))
191 #define SRE_UNI_IS_LINEBREAK(ch) Py_UNICODE_ISLINEBREAK((Py_UNICODE)(ch))
192 #define SRE_UNI_IS_ALNUM(ch) Py_UNICODE_ISALNUM((Py_UNICODE)(ch))
193 #define SRE_UNI_IS_WORD(ch) (SRE_UNI_IS_ALNUM((ch)) || (ch) == '_')
194
195 static unsigned int sre_lower_unicode(unsigned int ch)
196 {
197 return (unsigned int) Py_UNICODE_TOLOWER((Py_UNICODE)(ch));
198 }
199
200 #endif
201
202 LOCAL(int)
203 sre_category(SRE_CODE category, unsigned int ch)
204 {
205 switch (category) {
206
207 case SRE_CATEGORY_DIGIT:
208 return SRE_IS_DIGIT(ch);
209 case SRE_CATEGORY_NOT_DIGIT:
210 return !SRE_IS_DIGIT(ch);
211 case SRE_CATEGORY_SPACE:
212 return SRE_IS_SPACE(ch);
213 case SRE_CATEGORY_NOT_SPACE:
214 return !SRE_IS_SPACE(ch);
215 case SRE_CATEGORY_WORD:
216 return SRE_IS_WORD(ch);
217 case SRE_CATEGORY_NOT_WORD:
218 return !SRE_IS_WORD(ch);
219 case SRE_CATEGORY_LINEBREAK:
220 return SRE_IS_LINEBREAK(ch);
221 case SRE_CATEGORY_NOT_LINEBREAK:
222 return !SRE_IS_LINEBREAK(ch);
223
224 case SRE_CATEGORY_LOC_WORD:
225 return SRE_LOC_IS_WORD(ch);
226 case SRE_CATEGORY_LOC_NOT_WORD:
227 return !SRE_LOC_IS_WORD(ch);
228
229 #if defined(HAVE_UNICODE)
230 case SRE_CATEGORY_UNI_DIGIT:
231 return SRE_UNI_IS_DIGIT(ch);
232 case SRE_CATEGORY_UNI_NOT_DIGIT:
233 return !SRE_UNI_IS_DIGIT(ch);
234 case SRE_CATEGORY_UNI_SPACE:
235 return SRE_UNI_IS_SPACE(ch);
236 case SRE_CATEGORY_UNI_NOT_SPACE:
237 return !SRE_UNI_IS_SPACE(ch);
238 case SRE_CATEGORY_UNI_WORD:
239 return SRE_UNI_IS_WORD(ch);
240 case SRE_CATEGORY_UNI_NOT_WORD:
241 return !SRE_UNI_IS_WORD(ch);
242 case SRE_CATEGORY_UNI_LINEBREAK:
243 return SRE_UNI_IS_LINEBREAK(ch);
244 case SRE_CATEGORY_UNI_NOT_LINEBREAK:
245 return !SRE_UNI_IS_LINEBREAK(ch);
246 #else
247 case SRE_CATEGORY_UNI_DIGIT:
248 return SRE_IS_DIGIT(ch);
249 case SRE_CATEGORY_UNI_NOT_DIGIT:
250 return !SRE_IS_DIGIT(ch);
251 case SRE_CATEGORY_UNI_SPACE:
252 return SRE_IS_SPACE(ch);
253 case SRE_CATEGORY_UNI_NOT_SPACE:
254 return !SRE_IS_SPACE(ch);
255 case SRE_CATEGORY_UNI_WORD:
256 return SRE_LOC_IS_WORD(ch);
257 case SRE_CATEGORY_UNI_NOT_WORD:
258 return !SRE_LOC_IS_WORD(ch);
259 case SRE_CATEGORY_UNI_LINEBREAK:
260 return SRE_IS_LINEBREAK(ch);
261 case SRE_CATEGORY_UNI_NOT_LINEBREAK:
262 return !SRE_IS_LINEBREAK(ch);
263 #endif
264 }
265 return 0;
266 }
267
268 /* helpers */
269
270 static void
271 data_stack_dealloc(SRE_STATE* state)
272 {
273 if (state->data_stack) {
274 PyMem_FREE(state->data_stack);
275 state->data_stack = NULL;
276 }
277 state->data_stack_size = state->data_stack_base = 0;
278 }
279
280 static int
281 data_stack_grow(SRE_STATE* state, Py_ssize_t size)
282 {
283 Py_ssize_t minsize, cursize;
284 minsize = state->data_stack_base+size;
285 cursize = state->data_stack_size;
286 if (cursize < minsize) {
287 void* stack;
288 cursize = minsize+minsize/4+1024;
289 TRACE(("allocate/grow stack %" PY_FORMAT_SIZE_T "d\n", cursize));
290 stack = PyMem_REALLOC(state->data_stack, cursize);
291 if (!stack) {
292 data_stack_dealloc(state);
293 return SRE_ERROR_MEMORY;
294 }
295 state->data_stack = (char *)stack;
296 state->data_stack_size = cursize;
297 }
298 return 0;
299 }
300
301 /* generate 8-bit version */
302
303 #define SRE_CHAR unsigned char
304 #define SRE_AT sre_at
305 #define SRE_COUNT sre_count
306 #define SRE_CHARSET sre_charset
307 #define SRE_INFO sre_info
308 #define SRE_MATCH sre_match
309 #define SRE_MATCH_CONTEXT sre_match_context
310 #define SRE_SEARCH sre_search
311 #define SRE_LITERAL_TEMPLATE sre_literal_template
312
313 #if defined(HAVE_UNICODE)
314
315 #define SRE_RECURSIVE
316 #include "_sre.c"
317 #undef SRE_RECURSIVE
318
319 #undef SRE_LITERAL_TEMPLATE
320 #undef SRE_SEARCH
321 #undef SRE_MATCH
322 #undef SRE_MATCH_CONTEXT
323 #undef SRE_INFO
324 #undef SRE_CHARSET
325 #undef SRE_COUNT
326 #undef SRE_AT
327 #undef SRE_CHAR
328
329 /* generate 16-bit unicode version */
330
331 #define SRE_CHAR Py_UNICODE
332 #define SRE_AT sre_uat
333 #define SRE_COUNT sre_ucount
334 #define SRE_CHARSET sre_ucharset
335 #define SRE_INFO sre_uinfo
336 #define SRE_MATCH sre_umatch
337 #define SRE_MATCH_CONTEXT sre_umatch_context
338 #define SRE_SEARCH sre_usearch
339 #define SRE_LITERAL_TEMPLATE sre_uliteral_template
340 #endif
341
342 #endif /* SRE_RECURSIVE */
343
344 /* -------------------------------------------------------------------- */
345 /* String matching engine */
346
347 /* the following section is compiled twice, with different character
348 settings */
349
350 LOCAL(int)
351 SRE_AT(SRE_STATE* state, SRE_CHAR* ptr, SRE_CODE at)
352 {
353 /* check if pointer is at given position */
354
355 Py_ssize_t thisp, thatp;
356
357 switch (at) {
358
359 case SRE_AT_BEGINNING:
360 case SRE_AT_BEGINNING_STRING:
361 return ((void*) ptr == state->beginning);
362
363 case SRE_AT_BEGINNING_LINE:
364 return ((void*) ptr == state->beginning ||
365 SRE_IS_LINEBREAK((int) ptr[-1]));
366
367 case SRE_AT_END:
368 return (((void*) (ptr+1) == state->end &&
369 SRE_IS_LINEBREAK((int) ptr[0])) ||
370 ((void*) ptr == state->end));
371
372 case SRE_AT_END_LINE:
373 return ((void*) ptr == state->end ||
374 SRE_IS_LINEBREAK((int) ptr[0]));
375
376 case SRE_AT_END_STRING:
377 return ((void*) ptr == state->end);
378
379 case SRE_AT_BOUNDARY:
380 if (state->beginning == state->end)
381 return 0;
382 thatp = ((void*) ptr > state->beginning) ?
383 SRE_IS_WORD((int) ptr[-1]) : 0;
384 thisp = ((void*) ptr < state->end) ?
385 SRE_IS_WORD((int) ptr[0]) : 0;
386 return thisp != thatp;
387
388 case SRE_AT_NON_BOUNDARY:
389 if (state->beginning == state->end)
390 return 0;
391 thatp = ((void*) ptr > state->beginning) ?
392 SRE_IS_WORD((int) ptr[-1]) : 0;
393 thisp = ((void*) ptr < state->end) ?
394 SRE_IS_WORD((int) ptr[0]) : 0;
395 return thisp == thatp;
396
397 case SRE_AT_LOC_BOUNDARY:
398 if (state->beginning == state->end)
399 return 0;
400 thatp = ((void*) ptr > state->beginning) ?
401 SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
402 thisp = ((void*) ptr < state->end) ?
403 SRE_LOC_IS_WORD((int) ptr[0]) : 0;
404 return thisp != thatp;
405
406 case SRE_AT_LOC_NON_BOUNDARY:
407 if (state->beginning == state->end)
408 return 0;
409 thatp = ((void*) ptr > state->beginning) ?
410 SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
411 thisp = ((void*) ptr < state->end) ?
412 SRE_LOC_IS_WORD((int) ptr[0]) : 0;
413 return thisp == thatp;
414
415 #if defined(HAVE_UNICODE)
416 case SRE_AT_UNI_BOUNDARY:
417 if (state->beginning == state->end)
418 return 0;
419 thatp = ((void*) ptr > state->beginning) ?
420 SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
421 thisp = ((void*) ptr < state->end) ?
422 SRE_UNI_IS_WORD((int) ptr[0]) : 0;
423 return thisp != thatp;
424
425 case SRE_AT_UNI_NON_BOUNDARY:
426 if (state->beginning == state->end)
427 return 0;
428 thatp = ((void*) ptr > state->beginning) ?
429 SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
430 thisp = ((void*) ptr < state->end) ?
431 SRE_UNI_IS_WORD((int) ptr[0]) : 0;
432 return thisp == thatp;
433 #endif
434
435 }
436
437 return 0;
438 }
439
440 LOCAL(int)
441 SRE_CHARSET(SRE_CODE* set, SRE_CODE ch)
442 {
443 /* check if character is a member of the given set */
444
445 int ok = 1;
446
447 for (;;) {
448 switch (*set++) {
449
450 case SRE_OP_FAILURE:
451 return !ok;
452
453 case SRE_OP_LITERAL:
454 /* <LITERAL> <code> */
455 if (ch == set[0])
456 return ok;
457 set++;
458 break;
459
460 case SRE_OP_CATEGORY:
461 /* <CATEGORY> <code> */
462 if (sre_category(set[0], (int) ch))
463 return ok;
464 set += 1;
465 break;
466
467 case SRE_OP_CHARSET:
468 if (sizeof(SRE_CODE) == 2) {
469 /* <CHARSET> <bitmap> (16 bits per code word) */
470 if (ch < 256 && (set[ch >> 4] & (1 << (ch & 15))))
471 return ok;
472 set += 16;
473 }
474 else {
475 /* <CHARSET> <bitmap> (32 bits per code word) */
476 if (ch < 256 && (set[ch >> 5] & (1u << (ch & 31))))
477 return ok;
478 set += 8;
479 }
480 break;
481
482 case SRE_OP_RANGE:
483 /* <RANGE> <lower> <upper> */
484 if (set[0] <= ch && ch <= set[1])
485 return ok;
486 set += 2;
487 break;
488
489 case SRE_OP_NEGATE:
490 ok = !ok;
491 break;
492
493 case SRE_OP_BIGCHARSET:
494 /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
495 {
496 Py_ssize_t count, block;
497 count = *(set++);
498
499 if (sizeof(SRE_CODE) == 2) {
500 block = ((unsigned char*)set)[ch >> 8];
501 set += 128;
502 if (set[block*16 + ((ch & 255)>>4)] & (1 << (ch & 15)))
503 return ok;
504 set += count*16;
505 }
506 else {
507 /* !(c & ~N) == (c < N+1) for any unsigned c, this avoids
508 * warnings when c's type supports only numbers < N+1 */
509 if (!(ch & ~65535))
510 block = ((unsigned char*)set)[ch >> 8];
511 else
512 block = -1;
513 set += 64;
514 if (block >=0 &&
515 (set[block*8 + ((ch & 255)>>5)] & (1u << (ch & 31))))
516 return ok;
517 set += count*8;
518 }
519 break;
520 }
521
522 default:
523 /* internal error -- there's not much we can do about it
524 here, so let's just pretend it didn't match... */
525 return 0;
526 }
527 }
528 }
529
530 LOCAL(Py_ssize_t) SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern);
531
532 LOCAL(Py_ssize_t)
533 SRE_COUNT(SRE_STATE* state, SRE_CODE* pattern, Py_ssize_t maxcount)
534 {
535 SRE_CODE chr;
536 SRE_CHAR* ptr = (SRE_CHAR *)state->ptr;
537 SRE_CHAR* end = (SRE_CHAR *)state->end;
538 Py_ssize_t i;
539
540 /* adjust end */
541 if (maxcount < end - ptr && maxcount != SRE_MAXREPEAT)
542 end = ptr + maxcount;
543
544 switch (pattern[0]) {
545
546 case SRE_OP_IN:
547 /* repeated set */
548 TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
549 while (ptr < end && SRE_CHARSET(pattern + 2, *ptr))
550 ptr++;
551 break;
552
553 case SRE_OP_ANY:
554 /* repeated dot wildcard. */
555 TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
556 while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
557 ptr++;
558 break;
559
560 case SRE_OP_ANY_ALL:
561 /* repeated dot wildcard. skip to the end of the target
562 string, and backtrack from there */
563 TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
564 ptr = end;
565 break;
566
567 case SRE_OP_LITERAL:
568 /* repeated literal */
569 chr = pattern[1];
570 TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
571 while (ptr < end && (SRE_CODE) *ptr == chr)
572 ptr++;
573 break;
574
575 case SRE_OP_LITERAL_IGNORE:
576 /* repeated literal */
577 chr = pattern[1];
578 TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
579 while (ptr < end && (SRE_CODE) state->lower(*ptr) == chr)
580 ptr++;
581 break;
582
583 case SRE_OP_NOT_LITERAL:
584 /* repeated non-literal */
585 chr = pattern[1];
586 TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
587 while (ptr < end && (SRE_CODE) *ptr != chr)
588 ptr++;
589 break;
590
591 case SRE_OP_NOT_LITERAL_IGNORE:
592 /* repeated non-literal */
593 chr = pattern[1];
594 TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
595 while (ptr < end && (SRE_CODE) state->lower(*ptr) != chr)
596 ptr++;
597 break;
598
599 default:
600 /* repeated single character pattern */
601 TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
602 while ((SRE_CHAR*) state->ptr < end) {
603 i = SRE_MATCH(state, pattern);
604 if (i < 0)
605 return i;
606 if (!i)
607 break;
608 }
609 TRACE(("|%p|%p|COUNT %" PY_FORMAT_SIZE_T "d\n", pattern, ptr,
610 (SRE_CHAR*) state->ptr - ptr));
611 return (SRE_CHAR*) state->ptr - ptr;
612 }
613
614 TRACE(("|%p|%p|COUNT %" PY_FORMAT_SIZE_T "d\n", pattern, ptr,
615 ptr - (SRE_CHAR*) state->ptr));
616 return ptr - (SRE_CHAR*) state->ptr;
617 }
618
619 #if 0 /* not used in this release */
620 LOCAL(int)
621 SRE_INFO(SRE_STATE* state, SRE_CODE* pattern)
622 {
623 /* check if an SRE_OP_INFO block matches at the current position.
624 returns the number of SRE_CODE objects to skip if successful, 0
625 if no match */
626
627 SRE_CHAR* end = state->end;
628 SRE_CHAR* ptr = state->ptr;
629 Py_ssize_t i;
630
631 /* check minimal length */
632 if (pattern[3] && (end - ptr) < pattern[3])
633 return 0;
634
635 /* check known prefix */
636 if (pattern[2] & SRE_INFO_PREFIX && pattern[5] > 1) {
637 /* <length> <skip> <prefix data> <overlap data> */
638 for (i = 0; i < pattern[5]; i++)
639 if ((SRE_CODE) ptr[i] != pattern[7 + i])
640 return 0;
641 return pattern[0] + 2 * pattern[6];
642 }
643 return pattern[0];
644 }
645 #endif
646
647 /* The macros below should be used to protect recursive SRE_MATCH()
648 * calls that *failed* and do *not* return immediately (IOW, those
649 * that will backtrack). Explaining:
650 *
651 * - Recursive SRE_MATCH() returned true: that's usually a success
652 * (besides atypical cases like ASSERT_NOT), therefore there's no
653 * reason to restore lastmark;
654 *
655 * - Recursive SRE_MATCH() returned false but the current SRE_MATCH()
656 * is returning to the caller: If the current SRE_MATCH() is the
657 * top function of the recursion, returning false will be a matching
658 * failure, and it doesn't matter where lastmark is pointing to.
659 * If it's *not* the top function, it will be a recursive SRE_MATCH()
660 * failure by itself, and the calling SRE_MATCH() will have to deal
661 * with the failure by the same rules explained here (it will restore
662 * lastmark by itself if necessary);
663 *
664 * - Recursive SRE_MATCH() returned false, and will continue the
665 * outside 'for' loop: must be protected when breaking, since the next
666 * OP could potentially depend on lastmark;
667 *
668 * - Recursive SRE_MATCH() returned false, and will be called again
669 * inside a local for/while loop: must be protected between each
670 * loop iteration, since the recursive SRE_MATCH() could do anything,
671 * and could potentially depend on lastmark.
672 *
673 * For more information, check the discussion at SF patch #712900.
674 */
675 #define LASTMARK_SAVE() \
676 do { \
677 ctx->lastmark = state->lastmark; \
678 ctx->lastindex = state->lastindex; \
679 } while (0)
680 #define LASTMARK_RESTORE() \
681 do { \
682 state->lastmark = ctx->lastmark; \
683 state->lastindex = ctx->lastindex; \
684 } while (0)
685
686 #define RETURN_ERROR(i) do { return i; } while(0)
687 #define RETURN_FAILURE do { ret = 0; goto exit; } while(0)
688 #define RETURN_SUCCESS do { ret = 1; goto exit; } while(0)
689
690 #define RETURN_ON_ERROR(i) \
691 do { if (i < 0) RETURN_ERROR(i); } while (0)
692 #define RETURN_ON_SUCCESS(i) \
693 do { RETURN_ON_ERROR(i); if (i > 0) RETURN_SUCCESS; } while (0)
694 #define RETURN_ON_FAILURE(i) \
695 do { RETURN_ON_ERROR(i); if (i == 0) RETURN_FAILURE; } while (0)
696
697 #define SFY(x) #x
698
699 #define DATA_STACK_ALLOC(state, type, ptr) \
700 do { \
701 alloc_pos = state->data_stack_base; \
702 TRACE(("allocating %s in %" PY_FORMAT_SIZE_T "d " \
703 "(%" PY_FORMAT_SIZE_T "d)\n", \
704 SFY(type), alloc_pos, sizeof(type))); \
705 if (sizeof(type) > state->data_stack_size - alloc_pos) { \
706 int j = data_stack_grow(state, sizeof(type)); \
707 if (j < 0) return j; \
708 if (ctx_pos != -1) \
709 DATA_STACK_LOOKUP_AT(state, SRE_MATCH_CONTEXT, ctx, ctx_pos); \
710 } \
711 ptr = (type*)(state->data_stack+alloc_pos); \
712 state->data_stack_base += sizeof(type); \
713 } while (0)
714
715 #define DATA_STACK_LOOKUP_AT(state, type, ptr, pos) \
716 do { \
717 TRACE(("looking up %s at %" PY_FORMAT_SIZE_T "d\n", SFY(type), pos)); \
718 ptr = (type*)(state->data_stack+pos); \
719 } while (0)
720
721 #define DATA_STACK_PUSH(state, data, size) \
722 do { \
723 TRACE(("copy data in %p to %" PY_FORMAT_SIZE_T "d " \
724 "(%" PY_FORMAT_SIZE_T "d)\n", \
725 data, state->data_stack_base, size)); \
726 if (size > state->data_stack_size - state->data_stack_base) { \
727 int j = data_stack_grow(state, size); \
728 if (j < 0) return j; \
729 if (ctx_pos != -1) \
730 DATA_STACK_LOOKUP_AT(state, SRE_MATCH_CONTEXT, ctx, ctx_pos); \
731 } \
732 memcpy(state->data_stack+state->data_stack_base, data, size); \
733 state->data_stack_base += size; \
734 } while (0)
735
736 #define DATA_STACK_POP(state, data, size, discard) \
737 do { \
738 TRACE(("copy data to %p from %" PY_FORMAT_SIZE_T "d " \
739 "(%" PY_FORMAT_SIZE_T "d)\n", \
740 data, state->data_stack_base-size, size)); \
741 memcpy(data, state->data_stack+state->data_stack_base-size, size); \
742 if (discard) \
743 state->data_stack_base -= size; \
744 } while (0)
745
746 #define DATA_STACK_POP_DISCARD(state, size) \
747 do { \
748 TRACE(("discard data from %" PY_FORMAT_SIZE_T "d " \
749 "(%" PY_FORMAT_SIZE_T "d)\n", \
750 state->data_stack_base-size, size)); \
751 state->data_stack_base -= size; \
752 } while(0)
753
754 #define DATA_PUSH(x) \
755 DATA_STACK_PUSH(state, (x), sizeof(*(x)))
756 #define DATA_POP(x) \
757 DATA_STACK_POP(state, (x), sizeof(*(x)), 1)
758 #define DATA_POP_DISCARD(x) \
759 DATA_STACK_POP_DISCARD(state, sizeof(*(x)))
760 #define DATA_ALLOC(t,p) \
761 DATA_STACK_ALLOC(state, t, p)
762 #define DATA_LOOKUP_AT(t,p,pos) \
763 DATA_STACK_LOOKUP_AT(state,t,p,pos)
764
765 #define MARK_PUSH(lastmark) \
766 do if (lastmark > 0) { \
767 i = lastmark; /* ctx->lastmark may change if reallocated */ \
768 DATA_STACK_PUSH(state, state->mark, (i+1)*sizeof(void*)); \
769 } while (0)
770 #define MARK_POP(lastmark) \
771 do if (lastmark > 0) { \
772 DATA_STACK_POP(state, state->mark, (lastmark+1)*sizeof(void*), 1); \
773 } while (0)
774 #define MARK_POP_KEEP(lastmark) \
775 do if (lastmark > 0) { \
776 DATA_STACK_POP(state, state->mark, (lastmark+1)*sizeof(void*), 0); \
777 } while (0)
778 #define MARK_POP_DISCARD(lastmark) \
779 do if (lastmark > 0) { \
780 DATA_STACK_POP_DISCARD(state, (lastmark+1)*sizeof(void*)); \
781 } while (0)
782
783 #define JUMP_NONE 0
784 #define JUMP_MAX_UNTIL_1 1
785 #define JUMP_MAX_UNTIL_2 2
786 #define JUMP_MAX_UNTIL_3 3
787 #define JUMP_MIN_UNTIL_1 4
788 #define JUMP_MIN_UNTIL_2 5
789 #define JUMP_MIN_UNTIL_3 6
790 #define JUMP_REPEAT 7
791 #define JUMP_REPEAT_ONE_1 8
792 #define JUMP_REPEAT_ONE_2 9
793 #define JUMP_MIN_REPEAT_ONE 10
794 #define JUMP_BRANCH 11
795 #define JUMP_ASSERT 12
796 #define JUMP_ASSERT_NOT 13
797
798 #define DO_JUMP(jumpvalue, jumplabel, nextpattern) \
799 DATA_ALLOC(SRE_MATCH_CONTEXT, nextctx); \
800 nextctx->last_ctx_pos = ctx_pos; \
801 nextctx->jump = jumpvalue; \
802 nextctx->pattern = nextpattern; \
803 ctx_pos = alloc_pos; \
804 ctx = nextctx; \
805 goto entrance; \
806 jumplabel: \
807 while (0) /* gcc doesn't like labels at end of scopes */ \
808
809 typedef struct {
810 Py_ssize_t last_ctx_pos;
811 Py_ssize_t jump;
812 SRE_CHAR* ptr;
813 SRE_CODE* pattern;
814 Py_ssize_t count;
815 Py_ssize_t lastmark;
816 Py_ssize_t lastindex;
817 union {
818 SRE_CODE chr;
819 SRE_REPEAT* rep;
820 } u;
821 } SRE_MATCH_CONTEXT;
822
823 /* check if string matches the given pattern. returns <0 for
824 error, 0 for failure, and 1 for success */
825 LOCAL(Py_ssize_t)
826 SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern)
827 {
828 SRE_CHAR* end = (SRE_CHAR *)state->end;
829 Py_ssize_t alloc_pos, ctx_pos = -1;
830 Py_ssize_t i, ret = 0;
831 Py_ssize_t jump;
832 unsigned int sigcount=0;
833
834 SRE_MATCH_CONTEXT* ctx;
835 SRE_MATCH_CONTEXT* nextctx;
836
837 TRACE(("|%p|%p|ENTER\n", pattern, state->ptr));
838
839 DATA_ALLOC(SRE_MATCH_CONTEXT, ctx);
840 ctx->last_ctx_pos = -1;
841 ctx->jump = JUMP_NONE;
842 ctx->pattern = pattern;
843 ctx_pos = alloc_pos;
844
845 entrance:
846
847 ctx->ptr = (SRE_CHAR *)state->ptr;
848
849 if (ctx->pattern[0] == SRE_OP_INFO) {
850 /* optimization info block */
851 /* <INFO> <1=skip> <2=flags> <3=min> ... */
852 if (ctx->pattern[3] && (end - ctx->ptr) < ctx->pattern[3]) {
853 TRACE(("reject (got %" PY_FORMAT_SIZE_T "d chars, "
854 "need %" PY_FORMAT_SIZE_T "d)\n",
855 (end - ctx->ptr), (Py_ssize_t) ctx->pattern[3]));
856 RETURN_FAILURE;
857 }
858 ctx->pattern += ctx->pattern[1] + 1;
859 }
860
861 for (;;) {
862 ++sigcount;
863 if ((0 == (sigcount & 0xfff)) && PyErr_CheckSignals())
864 RETURN_ERROR(SRE_ERROR_INTERRUPTED);
865
866 switch (*ctx->pattern++) {
867
868 case SRE_OP_MARK:
869 /* set mark */
870 /* <MARK> <gid> */
871 TRACE(("|%p|%p|MARK %d\n", ctx->pattern,
872 ctx->ptr, ctx->pattern[0]));
873 i = ctx->pattern[0];
874 if (i & 1)
875 state->lastindex = i/2 + 1;
876 if (i > state->lastmark) {
877 /* state->lastmark is the highest valid index in the
878 state->mark array. If it is increased by more than 1,
879 the intervening marks must be set to NULL to signal
880 that these marks have not been encountered. */
881 Py_ssize_t j = state->lastmark + 1;
882 while (j < i)
883 state->mark[j++] = NULL;
884 state->lastmark = i;
885 }
886 state->mark[i] = ctx->ptr;
887 ctx->pattern++;
888 break;
889
890 case SRE_OP_LITERAL:
891 /* match literal string */
892 /* <LITERAL> <code> */
893 TRACE(("|%p|%p|LITERAL %d\n", ctx->pattern,
894 ctx->ptr, *ctx->pattern));
895 if (ctx->ptr >= end || (SRE_CODE) ctx->ptr[0] != ctx->pattern[0])
896 RETURN_FAILURE;
897 ctx->pattern++;
898 ctx->ptr++;
899 break;
900
901 case SRE_OP_NOT_LITERAL:
902 /* match anything that is not literal character */
903 /* <NOT_LITERAL> <code> */
904 TRACE(("|%p|%p|NOT_LITERAL %d\n", ctx->pattern,
905 ctx->ptr, *ctx->pattern));
906 if (ctx->ptr >= end || (SRE_CODE) ctx->ptr[0] == ctx->pattern[0])
907 RETURN_FAILURE;
908 ctx->pattern++;
909 ctx->ptr++;
910 break;
911
912 case SRE_OP_SUCCESS:
913 /* end of pattern */
914 TRACE(("|%p|%p|SUCCESS\n", ctx->pattern, ctx->ptr));
915 state->ptr = ctx->ptr;
916 RETURN_SUCCESS;
917
918 case SRE_OP_AT:
919 /* match at given position */
920 /* <AT> <code> */
921 TRACE(("|%p|%p|AT %d\n", ctx->pattern, ctx->ptr, *ctx->pattern));
922 if (!SRE_AT(state, ctx->ptr, *ctx->pattern))
923 RETURN_FAILURE;
924 ctx->pattern++;
925 break;
926
927 case SRE_OP_CATEGORY:
928 /* match at given category */
929 /* <CATEGORY> <code> */
930 TRACE(("|%p|%p|CATEGORY %d\n", ctx->pattern,
931 ctx->ptr, *ctx->pattern));
932 if (ctx->ptr >= end || !sre_category(ctx->pattern[0], ctx->ptr[0]))
933 RETURN_FAILURE;
934 ctx->pattern++;
935 ctx->ptr++;
936 break;
937
938 case SRE_OP_ANY:
939 /* match anything (except a newline) */
940 /* <ANY> */
941 TRACE(("|%p|%p|ANY\n", ctx->pattern, ctx->ptr));
942 if (ctx->ptr >= end || SRE_IS_LINEBREAK(ctx->ptr[0]))
943 RETURN_FAILURE;
944 ctx->ptr++;
945 break;
946
947 case SRE_OP_ANY_ALL:
948 /* match anything */
949 /* <ANY_ALL> */
950 TRACE(("|%p|%p|ANY_ALL\n", ctx->pattern, ctx->ptr));
951 if (ctx->ptr >= end)
952 RETURN_FAILURE;
953 ctx->ptr++;
954 break;
955
956 case SRE_OP_IN:
957 /* match set member (or non_member) */
958 /* <IN> <skip> <set> */
959 TRACE(("|%p|%p|IN\n", ctx->pattern, ctx->ptr));
960 if (ctx->ptr >= end || !SRE_CHARSET(ctx->pattern + 1, *ctx->ptr))
961 RETURN_FAILURE;
962 ctx->pattern += ctx->pattern[0];
963 ctx->ptr++;
964 break;
965
966 case SRE_OP_LITERAL_IGNORE:
967 TRACE(("|%p|%p|LITERAL_IGNORE %d\n",
968 ctx->pattern, ctx->ptr, ctx->pattern[0]));
969 if (ctx->ptr >= end ||
970 state->lower(*ctx->ptr) != state->lower(*ctx->pattern))
971 RETURN_FAILURE;
972 ctx->pattern++;
973 ctx->ptr++;
974 break;
975
976 case SRE_OP_NOT_LITERAL_IGNORE:
977 TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n",
978 ctx->pattern, ctx->ptr, *ctx->pattern));
979 if (ctx->ptr >= end ||
980 state->lower(*ctx->ptr) == state->lower(*ctx->pattern))
981 RETURN_FAILURE;
982 ctx->pattern++;
983 ctx->ptr++;
984 break;
985
986 case SRE_OP_IN_IGNORE:
987 TRACE(("|%p|%p|IN_IGNORE\n", ctx->pattern, ctx->ptr));
988 if (ctx->ptr >= end
989 || !SRE_CHARSET(ctx->pattern+1,
990 (SRE_CODE)state->lower(*ctx->ptr)))
991 RETURN_FAILURE;
992 ctx->pattern += ctx->pattern[0];
993 ctx->ptr++;
994 break;
995
996 case SRE_OP_JUMP:
997 case SRE_OP_INFO:
998 /* jump forward */
999 /* <JUMP> <offset> */
1000 TRACE(("|%p|%p|JUMP %d\n", ctx->pattern,
1001 ctx->ptr, ctx->pattern[0]));
1002 ctx->pattern += ctx->pattern[0];
1003 break;
1004
1005 case SRE_OP_BRANCH:
1006 /* alternation */
1007 /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
1008 TRACE(("|%p|%p|BRANCH\n", ctx->pattern, ctx->ptr));
1009 LASTMARK_SAVE();
1010 ctx->u.rep = state->repeat;
1011 if (ctx->u.rep)
1012 MARK_PUSH(ctx->lastmark);
1013 for (; ctx->pattern[0]; ctx->pattern += ctx->pattern[0]) {
1014 if (ctx->pattern[1] == SRE_OP_LITERAL &&
1015 (ctx->ptr >= end ||
1016 (SRE_CODE) *ctx->ptr != ctx->pattern[2]))
1017 continue;
1018 if (ctx->pattern[1] == SRE_OP_IN &&
1019 (ctx->ptr >= end ||
1020 !SRE_CHARSET(ctx->pattern + 3, (SRE_CODE) *ctx->ptr)))
1021 continue;
1022 state->ptr = ctx->ptr;
1023 DO_JUMP(JUMP_BRANCH, jump_branch, ctx->pattern+1);
1024 if (ret) {
1025 if (ctx->u.rep)
1026 MARK_POP_DISCARD(ctx->lastmark);
1027 RETURN_ON_ERROR(ret);
1028 RETURN_SUCCESS;
1029 }
1030 if (ctx->u.rep)
1031 MARK_POP_KEEP(ctx->lastmark);
1032 LASTMARK_RESTORE();
1033 }
1034 if (ctx->u.rep)
1035 MARK_POP_DISCARD(ctx->lastmark);
1036 RETURN_FAILURE;
1037
1038 case SRE_OP_REPEAT_ONE:
1039 /* match repeated sequence (maximizing regexp) */
1040
1041 /* this operator only works if the repeated item is
1042 exactly one character wide, and we're not already
1043 collecting backtracking points. for other cases,
1044 use the MAX_REPEAT operator */
1045
1046 /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
1047
1048 TRACE(("|%p|%p|REPEAT_ONE %d %d\n", ctx->pattern, ctx->ptr,
1049 ctx->pattern[1], ctx->pattern[2]));
1050
1051 if ((Py_ssize_t) ctx->pattern[1] > end - ctx->ptr)
1052 RETURN_FAILURE; /* cannot match */
1053
1054 state->ptr = ctx->ptr;
1055
1056 ret = SRE_COUNT(state, ctx->pattern+3, ctx->pattern[2]);
1057 RETURN_ON_ERROR(ret);
1058 DATA_LOOKUP_AT(SRE_MATCH_CONTEXT, ctx, ctx_pos);
1059 ctx->count = ret;
1060 ctx->ptr += ctx->count;
1061
1062 /* when we arrive here, count contains the number of
1063 matches, and ctx->ptr points to the tail of the target
1064 string. check if the rest of the pattern matches,
1065 and backtrack if not. */
1066
1067 if (ctx->count < (Py_ssize_t) ctx->pattern[1])
1068 RETURN_FAILURE;
1069
1070 if (ctx->pattern[ctx->pattern[0]] == SRE_OP_SUCCESS) {
1071 /* tail is empty. we're finished */
1072 state->ptr = ctx->ptr;
1073 RETURN_SUCCESS;
1074 }
1075
1076 LASTMARK_SAVE();
1077
1078 if (ctx->pattern[ctx->pattern[0]] == SRE_OP_LITERAL) {
1079 /* tail starts with a literal. skip positions where
1080 the rest of the pattern cannot possibly match */
1081 ctx->u.chr = ctx->pattern[ctx->pattern[0]+1];
1082 for (;;) {
1083 while (ctx->count >= (Py_ssize_t) ctx->pattern[1] &&
1084 (ctx->ptr >= end || *ctx->ptr != ctx->u.chr)) {
1085 ctx->ptr--;
1086 ctx->count--;
1087 }
1088 if (ctx->count < (Py_ssize_t) ctx->pattern[1])
1089 break;
1090 state->ptr = ctx->ptr;
1091 DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1,
1092 ctx->pattern+ctx->pattern[0]);
1093 if (ret) {
1094 RETURN_ON_ERROR(ret);
1095 RETURN_SUCCESS;
1096 }
1097
1098 LASTMARK_RESTORE();
1099
1100 ctx->ptr--;
1101 ctx->count--;
1102 }
1103
1104 } else {
1105 /* general case */
1106 while (ctx->count >= (Py_ssize_t) ctx->pattern[1]) {
1107 state->ptr = ctx->ptr;
1108 DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2,
1109 ctx->pattern+ctx->pattern[0]);
1110 if (ret) {
1111 RETURN_ON_ERROR(ret);
1112 RETURN_SUCCESS;
1113 }
1114 ctx->ptr--;
1115 ctx->count--;
1116 LASTMARK_RESTORE();
1117 }
1118 }
1119 RETURN_FAILURE;
1120
1121 case SRE_OP_MIN_REPEAT_ONE:
1122 /* match repeated sequence (minimizing regexp) */
1123
1124 /* this operator only works if the repeated item is
1125 exactly one character wide, and we're not already
1126 collecting backtracking points. for other cases,
1127 use the MIN_REPEAT operator */
1128
1129 /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
1130
1131 TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", ctx->pattern, ctx->ptr,
1132 ctx->pattern[1], ctx->pattern[2]));
1133
1134 if ((Py_ssize_t) ctx->pattern[1] > end - ctx->ptr)
1135 RETURN_FAILURE; /* cannot match */
1136
1137 state->ptr = ctx->ptr;
1138
1139 if (ctx->pattern[1] == 0)
1140 ctx->count = 0;
1141 else {
1142 /* count using pattern min as the maximum */
1143 ret = SRE_COUNT(state, ctx->pattern+3, ctx->pattern[1]);
1144 RETURN_ON_ERROR(ret);
1145 DATA_LOOKUP_AT(SRE_MATCH_CONTEXT, ctx, ctx_pos);
1146 if (ret < (Py_ssize_t) ctx->pattern[1])
1147 /* didn't match minimum number of times */
1148 RETURN_FAILURE;
1149 /* advance past minimum matches of repeat */
1150 ctx->count = ret;
1151 ctx->ptr += ctx->count;
1152 }
1153
1154 if (ctx->pattern[ctx->pattern[0]] == SRE_OP_SUCCESS) {
1155 /* tail is empty. we're finished */
1156 state->ptr = ctx->ptr;
1157 RETURN_SUCCESS;
1158
1159 } else {
1160 /* general case */
1161 LASTMARK_SAVE();
1162 while ((Py_ssize_t)ctx->pattern[2] == SRE_MAXREPEAT
1163 || ctx->count <= (Py_ssize_t)ctx->pattern[2]) {
1164 state->ptr = ctx->ptr;
1165 DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
1166 ctx->pattern+ctx->pattern[0]);
1167 if (ret) {
1168 RETURN_ON_ERROR(ret);
1169 RETURN_SUCCESS;
1170 }
1171 state->ptr = ctx->ptr;
1172 ret = SRE_COUNT(state, ctx->pattern+3, 1);
1173 RETURN_ON_ERROR(ret);
1174 DATA_LOOKUP_AT(SRE_MATCH_CONTEXT, ctx, ctx_pos);
1175 if (ret == 0)
1176 break;
1177 assert(ret == 1);
1178 ctx->ptr++;
1179 ctx->count++;
1180 LASTMARK_RESTORE();
1181 }
1182 }
1183 RETURN_FAILURE;
1184
1185 case SRE_OP_REPEAT:
1186 /* create repeat context. all the hard work is done
1187 by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
1188 /* <REPEAT> <skip> <1=min> <2=max> item <UNTIL> tail */
1189 TRACE(("|%p|%p|REPEAT %d %d\n", ctx->pattern, ctx->ptr,
1190 ctx->pattern[1], ctx->pattern[2]));
1191
1192 /* install new repeat context */
1193 ctx->u.rep = (SRE_REPEAT*) PyObject_MALLOC(sizeof(*ctx->u.rep));
1194 if (!ctx->u.rep) {
1195 PyErr_NoMemory();
1196 RETURN_FAILURE;
1197 }
1198 ctx->u.rep->count = -1;
1199 ctx->u.rep->pattern = ctx->pattern;
1200 ctx->u.rep->prev = state->repeat;
1201 ctx->u.rep->last_ptr = NULL;
1202 state->repeat = ctx->u.rep;
1203
1204 state->ptr = ctx->ptr;
1205 DO_JUMP(JUMP_REPEAT, jump_repeat, ctx->pattern+ctx->pattern[0]);
1206 state->repeat = ctx->u.rep->prev;
1207 PyObject_FREE(ctx->u.rep);
1208
1209 if (ret) {
1210 RETURN_ON_ERROR(ret);
1211 RETURN_SUCCESS;
1212 }
1213 RETURN_FAILURE;
1214
1215 case SRE_OP_MAX_UNTIL:
1216 /* maximizing repeat */
1217 /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
1218
1219 /* FIXME: we probably need to deal with zero-width
1220 matches in here... */
1221
1222 ctx->u.rep = state->repeat;
1223 if (!ctx->u.rep)
1224 RETURN_ERROR(SRE_ERROR_STATE);
1225
1226 state->ptr = ctx->ptr;
1227
1228 ctx->count = ctx->u.rep->count+1;
1229
1230 TRACE(("|%p|%p|MAX_UNTIL %" PY_FORMAT_SIZE_T "d\n", ctx->pattern,
1231 ctx->ptr, ctx->count));
1232
1233 if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1234 /* not enough matches */
1235 ctx->u.rep->count = ctx->count;
1236 DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1,
1237 ctx->u.rep->pattern+3);
1238 if (ret) {
1239 RETURN_ON_ERROR(ret);
1240 RETURN_SUCCESS;
1241 }
1242 ctx->u.rep->count = ctx->count-1;
1243 state->ptr = ctx->ptr;
1244 RETURN_FAILURE;
1245 }
1246
1247 if ((ctx->count < (Py_ssize_t) ctx->u.rep->pattern[2] ||
1248 ctx->u.rep->pattern[2] == SRE_MAXREPEAT) &&
1249 state->ptr != ctx->u.rep->last_ptr) {
1250 /* we may have enough matches, but if we can
1251 match another item, do so */
1252 ctx->u.rep->count = ctx->count;
1253 LASTMARK_SAVE();
1254 MARK_PUSH(ctx->lastmark);
1255 /* zero-width match protection */
1256 DATA_PUSH(&ctx->u.rep->last_ptr);
1257 ctx->u.rep->last_ptr = state->ptr;
1258 DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2,
1259 ctx->u.rep->pattern+3);
1260 DATA_POP(&ctx->u.rep->last_ptr);
1261 if (ret) {
1262 MARK_POP_DISCARD(ctx->lastmark);
1263 RETURN_ON_ERROR(ret);
1264 RETURN_SUCCESS;
1265 }
1266 MARK_POP(ctx->lastmark);
1267 LASTMARK_RESTORE();
1268 ctx->u.rep->count = ctx->count-1;
1269 state->ptr = ctx->ptr;
1270 }
1271
1272 /* cannot match more repeated items here. make sure the
1273 tail matches */
1274 state->repeat = ctx->u.rep->prev;
1275 DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, ctx->pattern);
1276 RETURN_ON_SUCCESS(ret);
1277 state->repeat = ctx->u.rep;
1278 state->ptr = ctx->ptr;
1279 RETURN_FAILURE;
1280
1281 case SRE_OP_MIN_UNTIL:
1282 /* minimizing repeat */
1283 /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
1284
1285 ctx->u.rep = state->repeat;
1286 if (!ctx->u.rep)
1287 RETURN_ERROR(SRE_ERROR_STATE);
1288
1289 state->ptr = ctx->ptr;
1290
1291 ctx->count = ctx->u.rep->count+1;
1292
1293 TRACE(("|%p|%p|MIN_UNTIL %" PY_FORMAT_SIZE_T "d %p\n", ctx->pattern,
1294 ctx->ptr, ctx->count, ctx->u.rep->pattern));
1295
1296 if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1297 /* not enough matches */
1298 ctx->u.rep->count = ctx->count;
1299 DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1,
1300 ctx->u.rep->pattern+3);
1301 if (ret) {
1302 RETURN_ON_ERROR(ret);
1303 RETURN_SUCCESS;
1304 }
1305 ctx->u.rep->count = ctx->count-1;
1306 state->ptr = ctx->ptr;
1307 RETURN_FAILURE;
1308 }
1309
1310 LASTMARK_SAVE();
1311
1312 /* see if the tail matches */
1313 state->repeat = ctx->u.rep->prev;
1314 DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, ctx->pattern);
1315 if (ret) {
1316 RETURN_ON_ERROR(ret);
1317 RETURN_SUCCESS;
1318 }
1319
1320 state->repeat = ctx->u.rep;
1321 state->ptr = ctx->ptr;
1322
1323 LASTMARK_RESTORE();
1324
1325 if ((ctx->count >= (Py_ssize_t) ctx->u.rep->pattern[2]
1326 && ctx->u.rep->pattern[2] != SRE_MAXREPEAT) ||
1327 state->ptr == ctx->u.rep->last_ptr)
1328 RETURN_FAILURE;
1329
1330 ctx->u.rep->count = ctx->count;
1331 /* zero-width match protection */
1332 DATA_PUSH(&ctx->u.rep->last_ptr);
1333 ctx->u.rep->last_ptr = state->ptr;
1334 DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3,
1335 ctx->u.rep->pattern+3);
1336 DATA_POP(&ctx->u.rep->last_ptr);
1337 if (ret) {
1338 RETURN_ON_ERROR(ret);
1339 RETURN_SUCCESS;
1340 }
1341 ctx->u.rep->count = ctx->count-1;
1342 state->ptr = ctx->ptr;
1343 RETURN_FAILURE;
1344
1345 case SRE_OP_GROUPREF:
1346 /* match backreference */
1347 TRACE(("|%p|%p|GROUPREF %d\n", ctx->pattern,
1348 ctx->ptr, ctx->pattern[0]));
1349 i = ctx->pattern[0];
1350 {
1351 Py_ssize_t groupref = i+i;
1352 if (groupref >= state->lastmark) {
1353 RETURN_FAILURE;
1354 } else {
1355 SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1356 SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1357 if (!p || !e || e < p)
1358 RETURN_FAILURE;
1359 while (p < e) {
1360 if (ctx->ptr >= end || *ctx->ptr != *p)
1361 RETURN_FAILURE;
1362 p++; ctx->ptr++;
1363 }
1364 }
1365 }
1366 ctx->pattern++;
1367 break;
1368
1369 case SRE_OP_GROUPREF_IGNORE:
1370 /* match backreference */
1371 TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", ctx->pattern,
1372 ctx->ptr, ctx->pattern[0]));
1373 i = ctx->pattern[0];
1374 {
1375 Py_ssize_t groupref = i+i;
1376 if (groupref >= state->lastmark) {
1377 RETURN_FAILURE;
1378 } else {
1379 SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1380 SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1381 if (!p || !e || e < p)
1382 RETURN_FAILURE;
1383 while (p < e) {
1384 if (ctx->ptr >= end ||
1385 state->lower(*ctx->ptr) != state->lower(*p))
1386 RETURN_FAILURE;
1387 p++; ctx->ptr++;
1388 }
1389 }
1390 }
1391 ctx->pattern++;
1392 break;
1393
1394 case SRE_OP_GROUPREF_EXISTS:
1395 TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", ctx->pattern,
1396 ctx->ptr, ctx->pattern[0]));
1397 /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */
1398 i = ctx->pattern[0];
1399 {
1400 Py_ssize_t groupref = i+i;
1401 if (groupref >= state->lastmark) {
1402 ctx->pattern += ctx->pattern[1];
1403 break;
1404 } else {
1405 SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1406 SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1407 if (!p || !e || e < p) {
1408 ctx->pattern += ctx->pattern[1];
1409 break;
1410 }
1411 }
1412 }
1413 ctx->pattern += 2;
1414 break;
1415
1416 case SRE_OP_ASSERT:
1417 /* assert subpattern */
1418 /* <ASSERT> <skip> <back> <pattern> */
1419 TRACE(("|%p|%p|ASSERT %d\n", ctx->pattern,
1420 ctx->ptr, ctx->pattern[1]));
1421 state->ptr = ctx->ptr - ctx->pattern[1];
1422 if (state->ptr < state->beginning)
1423 RETURN_FAILURE;
1424 DO_JUMP(JUMP_ASSERT, jump_assert, ctx->pattern+2);
1425 RETURN_ON_FAILURE(ret);
1426 ctx->pattern += ctx->pattern[0];
1427 break;
1428
1429 case SRE_OP_ASSERT_NOT:
1430 /* assert not subpattern */
1431 /* <ASSERT_NOT> <skip> <back> <pattern> */
1432 TRACE(("|%p|%p|ASSERT_NOT %d\n", ctx->pattern,
1433 ctx->ptr, ctx->pattern[1]));
1434 state->ptr = ctx->ptr - ctx->pattern[1];
1435 if (state->ptr >= state->beginning) {
1436 DO_JUMP(JUMP_ASSERT_NOT, jump_assert_not, ctx->pattern+2);
1437 if (ret) {
1438 RETURN_ON_ERROR(ret);
1439 RETURN_FAILURE;
1440 }
1441 }
1442 ctx->pattern += ctx->pattern[0];
1443 break;
1444
1445 case SRE_OP_FAILURE:
1446 /* immediate failure */
1447 TRACE(("|%p|%p|FAILURE\n", ctx->pattern, ctx->ptr));
1448 RETURN_FAILURE;
1449
1450 default:
1451 TRACE(("|%p|%p|UNKNOWN %d\n", ctx->pattern, ctx->ptr,
1452 ctx->pattern[-1]));
1453 RETURN_ERROR(SRE_ERROR_ILLEGAL);
1454 }
1455 }
1456
1457 exit:
1458 ctx_pos = ctx->last_ctx_pos;
1459 jump = ctx->jump;
1460 DATA_POP_DISCARD(ctx);
1461 if (ctx_pos == -1)
1462 return ret;
1463 DATA_LOOKUP_AT(SRE_MATCH_CONTEXT, ctx, ctx_pos);
1464
1465 switch (jump) {
1466 case JUMP_MAX_UNTIL_2:
1467 TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", ctx->pattern, ctx->ptr));
1468 goto jump_max_until_2;
1469 case JUMP_MAX_UNTIL_3:
1470 TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", ctx->pattern, ctx->ptr));
1471 goto jump_max_until_3;
1472 case JUMP_MIN_UNTIL_2:
1473 TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", ctx->pattern, ctx->ptr));
1474 goto jump_min_until_2;
1475 case JUMP_MIN_UNTIL_3:
1476 TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", ctx->pattern, ctx->ptr));
1477 goto jump_min_until_3;
1478 case JUMP_BRANCH:
1479 TRACE(("|%p|%p|JUMP_BRANCH\n", ctx->pattern, ctx->ptr));
1480 goto jump_branch;
1481 case JUMP_MAX_UNTIL_1:
1482 TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", ctx->pattern, ctx->ptr));
1483 goto jump_max_until_1;
1484 case JUMP_MIN_UNTIL_1:
1485 TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", ctx->pattern, ctx->ptr));
1486 goto jump_min_until_1;
1487 case JUMP_REPEAT:
1488 TRACE(("|%p|%p|JUMP_REPEAT\n", ctx->pattern, ctx->ptr));
1489 goto jump_repeat;
1490 case JUMP_REPEAT_ONE_1:
1491 TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", ctx->pattern, ctx->ptr));
1492 goto jump_repeat_one_1;
1493 case JUMP_REPEAT_ONE_2:
1494 TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", ctx->pattern, ctx->ptr));
1495 goto jump_repeat_one_2;
1496 case JUMP_MIN_REPEAT_ONE:
1497 TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", ctx->pattern, ctx->ptr));
1498 goto jump_min_repeat_one;
1499 case JUMP_ASSERT:
1500 TRACE(("|%p|%p|JUMP_ASSERT\n", ctx->pattern, ctx->ptr));
1501 goto jump_assert;
1502 case JUMP_ASSERT_NOT:
1503 TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", ctx->pattern, ctx->ptr));
1504 goto jump_assert_not;
1505 case JUMP_NONE:
1506 TRACE(("|%p|%p|RETURN %" PY_FORMAT_SIZE_T "d\n", ctx->pattern,
1507 ctx->ptr, ret));
1508 break;
1509 }
1510
1511 return ret; /* should never get here */
1512 }
1513
1514 LOCAL(Py_ssize_t)
1515 SRE_SEARCH(SRE_STATE* state, SRE_CODE* pattern)
1516 {
1517 SRE_CHAR* ptr = (SRE_CHAR *)state->start;
1518 SRE_CHAR* end = (SRE_CHAR *)state->end;
1519 Py_ssize_t status = 0;
1520 Py_ssize_t prefix_len = 0;
1521 Py_ssize_t prefix_skip = 0;
1522 SRE_CODE* prefix = NULL;
1523 SRE_CODE* charset = NULL;
1524 SRE_CODE* overlap = NULL;
1525 int flags = 0;
1526
1527 if (pattern[0] == SRE_OP_INFO) {
1528 /* optimization info block */
1529 /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info> */
1530
1531 flags = pattern[2];
1532
1533 if (pattern[3] > 1) {
1534 /* adjust end point (but make sure we leave at least one
1535 character in there, so literal search will work) */
1536 end -= pattern[3]-1;
1537 if (end <= ptr)
1538 end = ptr+1;
1539 }
1540
1541 if (flags & SRE_INFO_PREFIX) {
1542 /* pattern starts with a known prefix */
1543 /* <length> <skip> <prefix data> <overlap data> */
1544 prefix_len = pattern[5];
1545 prefix_skip = pattern[6];
1546 prefix = pattern + 7;
1547 overlap = prefix + prefix_len - 1;
1548 } else if (flags & SRE_INFO_CHARSET)
1549 /* pattern starts with a character from a known set */
1550 /* <charset> */
1551 charset = pattern + 5;
1552
1553 pattern += 1 + pattern[1];
1554 }
1555
1556 TRACE(("prefix = %p %" PY_FORMAT_SIZE_T "d %" PY_FORMAT_SIZE_T "d\n",
1557 prefix, prefix_len, prefix_skip));
1558 TRACE(("charset = %p\n", charset));
1559
1560 #if defined(USE_FAST_SEARCH)
1561 if (prefix_len > 1) {
1562 /* pattern starts with a known prefix. use the overlap
1563 table to skip forward as fast as we possibly can */
1564 Py_ssize_t i = 0;
1565 end = (SRE_CHAR *)state->end;
1566 while (ptr < end) {
1567 for (;;) {
1568 if ((SRE_CODE) ptr[0] != prefix[i]) {
1569 if (!i)
1570 break;
1571 else
1572 i = overlap[i];
1573 } else {
1574 if (++i == prefix_len) {
1575 /* found a potential match */
1576 TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
1577 state->start = ptr + 1 - prefix_len;
1578 state->ptr = ptr + 1 - prefix_len + prefix_skip;
1579 if (flags & SRE_INFO_LITERAL)
1580 return 1; /* we got all of it */
1581 status = SRE_MATCH(state, pattern + 2*prefix_skip);
1582 if (status != 0)
1583 return status;
1584 /* close but no cigar -- try again */
1585 i = overlap[i];
1586 }
1587 break;
1588 }
1589 }
1590 ptr++;
1591 }
1592 return 0;
1593 }
1594 #endif
1595
1596 if (pattern[0] == SRE_OP_LITERAL) {
1597 /* pattern starts with a literal character. this is used
1598 for short prefixes, and if fast search is disabled */
1599 SRE_CODE chr = pattern[1];
1600 end = (SRE_CHAR *)state->end;
1601 for (;;) {
1602 while (ptr < end && (SRE_CODE) ptr[0] != chr)
1603 ptr++;
1604 if (ptr >= end)
1605 return 0;
1606 TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
1607 state->start = ptr;
1608 state->ptr = ++ptr;
1609 if (flags & SRE_INFO_LITERAL)
1610 return 1; /* we got all of it */
1611 status = SRE_MATCH(state, pattern + 2);
1612 if (status != 0)
1613 break;
1614 }
1615 } else if (charset) {
1616 /* pattern starts with a character from a known set */
1617 end = (SRE_CHAR *)state->end;
1618 for (;;) {
1619 while (ptr < end && !SRE_CHARSET(charset, ptr[0]))
1620 ptr++;
1621 if (ptr >= end)
1622 return 0;
1623 TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
1624 state->start = ptr;
1625 state->ptr = ptr;
1626 status = SRE_MATCH(state, pattern);
1627 if (status != 0)
1628 break;
1629 ptr++;
1630 }
1631 } else
1632 /* general case */
1633 while (ptr <= end) {
1634 TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1635 state->start = state->ptr = ptr++;
1636 status = SRE_MATCH(state, pattern);
1637 if (status != 0)
1638 break;
1639 }
1640
1641 return status;
1642 }
1643
1644 LOCAL(int)
1645 SRE_LITERAL_TEMPLATE(SRE_CHAR* ptr, Py_ssize_t len)
1646 {
1647 /* check if given string is a literal template (i.e. no escapes) */
1648 while (len-- > 0)
1649 if (*ptr++ == '\\')
1650 return 0;
1651 return 1;
1652 }
1653
1654 #if !defined(SRE_RECURSIVE)
1655
1656 /* -------------------------------------------------------------------- */
1657 /* factories and destructors */
1658
1659 /* see sre.h for object declarations */
1660 static PyObject*pattern_new_match(PatternObject*, SRE_STATE*, int);
1661 static PyObject*pattern_scanner(PatternObject*, PyObject*);
1662
1663 static PyObject *
1664 sre_codesize(PyObject* self, PyObject *unused)
1665 {
1666 return PyInt_FromSize_t(sizeof(SRE_CODE));
1667 }
1668
1669 static PyObject *
1670 sre_getlower(PyObject* self, PyObject* args)
1671 {
1672 int character, flags;
1673 if (!PyArg_ParseTuple(args, "ii", &character, &flags))
1674 return NULL;
1675 if (flags & SRE_FLAG_LOCALE)
1676 return Py_BuildValue("i", sre_lower_locale(character));
1677 if (flags & SRE_FLAG_UNICODE)
1678 #if defined(HAVE_UNICODE)
1679 return Py_BuildValue("i", sre_lower_unicode(character));
1680 #else
1681 return Py_BuildValue("i", sre_lower_locale(character));
1682 #endif
1683 return Py_BuildValue("i", sre_lower(character));
1684 }
1685
1686 LOCAL(void)
1687 state_reset(SRE_STATE* state)
1688 {
1689 /* FIXME: dynamic! */
1690 /*memset(state->mark, 0, sizeof(*state->mark) * SRE_MARK_SIZE);*/
1691
1692 state->lastmark = -1;
1693 state->lastindex = -1;
1694
1695 state->repeat = NULL;
1696
1697 data_stack_dealloc(state);
1698 }
1699
1700 static void*
1701 getstring(PyObject* string, Py_ssize_t* p_length, int* p_charsize)
1702 {
1703 /* given a python object, return a data pointer, a length (in
1704 characters), and a character size. return NULL if the object
1705 is not a string (or not compatible) */
1706
1707 PyBufferProcs *buffer;
1708 Py_ssize_t size, bytes;
1709 int charsize;
1710 void* ptr;
1711
1712 #if defined(HAVE_UNICODE)
1713 if (PyUnicode_Check(string)) {
1714 /* unicode strings doesn't always support the buffer interface */
1715 ptr = (void*) PyUnicode_AS_DATA(string);
1716 /* bytes = PyUnicode_GET_DATA_SIZE(string); */
1717 size = PyUnicode_GET_SIZE(string);
1718 charsize = sizeof(Py_UNICODE);
1719
1720 } else {
1721 #endif
1722
1723 /* get pointer to string buffer */
1724 buffer = Py_TYPE(string)->tp_as_buffer;
1725 if (!buffer || !buffer->bf_getreadbuffer || !buffer->bf_getsegcount ||
1726 buffer->bf_getsegcount(string, NULL) != 1) {
1727 PyErr_SetString(PyExc_TypeError, "expected string or buffer");
1728 return NULL;
1729 }
1730
1731 /* determine buffer size */
1732 bytes = buffer->bf_getreadbuffer(string, 0, &ptr);
1733 if (bytes < 0) {
1734 PyErr_SetString(PyExc_TypeError, "buffer has negative size");
1735 return NULL;
1736 }
1737
1738 /* determine character size */
1739 #if PY_VERSION_HEX >= 0x01060000
1740 size = PyObject_Size(string);
1741 #else
1742 size = PyObject_Length(string);
1743 #endif
1744
1745 if (PyString_Check(string) || bytes == size)
1746 charsize = 1;
1747 #if defined(HAVE_UNICODE)
1748 else if (bytes == (Py_ssize_t) (size * sizeof(Py_UNICODE)))
1749 charsize = sizeof(Py_UNICODE);
1750 #endif
1751 else {
1752 PyErr_SetString(PyExc_TypeError, "buffer size mismatch");
1753 return NULL;
1754 }
1755
1756 #if defined(HAVE_UNICODE)
1757 }
1758 #endif
1759
1760 *p_length = size;
1761 *p_charsize = charsize;
1762
1763 return ptr;
1764 }
1765
1766 LOCAL(PyObject*)
1767 state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string,
1768 Py_ssize_t start, Py_ssize_t end)
1769 {
1770 /* prepare state object */
1771
1772 Py_ssize_t length;
1773 int charsize;
1774 void* ptr;
1775
1776 memset(state, 0, sizeof(SRE_STATE));
1777
1778 state->lastmark = -1;
1779 state->lastindex = -1;
1780
1781 ptr = getstring(string, &length, &charsize);
1782 if (!ptr)
1783 return NULL;
1784
1785 /* adjust boundaries */
1786 if (start < 0)
1787 start = 0;
1788 else if (start > length)
1789 start = length;
1790
1791 if (end < 0)
1792 end = 0;
1793 else if (end > length)
1794 end = length;
1795
1796 state->charsize = charsize;
1797
1798 state->beginning = ptr;
1799
1800 state->start = (void*) ((char*) ptr + start * state->charsize);
1801 state->end = (void*) ((char*) ptr + end * state->charsize);
1802
1803 Py_INCREF(string);
1804 state->string = string;
1805 state->pos = start;
1806 state->endpos = end;
1807
1808 if (pattern->flags & SRE_FLAG_LOCALE)
1809 state->lower = sre_lower_locale;
1810 else if (pattern->flags & SRE_FLAG_UNICODE)
1811 #if defined(HAVE_UNICODE)
1812 state->lower = sre_lower_unicode;
1813 #else
1814 state->lower = sre_lower_locale;
1815 #endif
1816 else
1817 state->lower = sre_lower;
1818
1819 return string;
1820 }
1821
1822 LOCAL(void)
1823 state_fini(SRE_STATE* state)
1824 {
1825 Py_XDECREF(state->string);
1826 data_stack_dealloc(state);
1827 }
1828
1829 /* calculate offset from start of string */
1830 #define STATE_OFFSET(state, member)\
1831 (((char*)(member) - (char*)(state)->beginning) / (state)->charsize)
1832
1833 LOCAL(PyObject*)
1834 state_getslice(SRE_STATE* state, Py_ssize_t index, PyObject* string, int empty)
1835 {
1836 Py_ssize_t i, j;
1837
1838 index = (index - 1) * 2;
1839
1840 if (string == Py_None || index >= state->lastmark || !state->mark[index] || !state->mark[index+1]) {
1841 if (empty)
1842 /* want empty string */
1843 i = j = 0;
1844 else {
1845 Py_INCREF(Py_None);
1846 return Py_None;
1847 }
1848 } else {
1849 i = STATE_OFFSET(state, state->mark[index]);
1850 j = STATE_OFFSET(state, state->mark[index+1]);
1851 }
1852
1853 return PySequence_GetSlice(string, i, j);
1854 }
1855
1856 static void
1857 pattern_error(int status)
1858 {
1859 switch (status) {
1860 case SRE_ERROR_RECURSION_LIMIT:
1861 PyErr_SetString(
1862 PyExc_RuntimeError,
1863 "maximum recursion limit exceeded"
1864 );
1865 break;
1866 case SRE_ERROR_MEMORY:
1867 PyErr_NoMemory();
1868 break;
1869 case SRE_ERROR_INTERRUPTED:
1870 /* An exception has already been raised, so let it fly */
1871 break;
1872 default:
1873 /* other error codes indicate compiler/engine bugs */
1874 PyErr_SetString(
1875 PyExc_RuntimeError,
1876 "internal error in regular expression engine"
1877 );
1878 }
1879 }
1880
1881 static void
1882 pattern_dealloc(PatternObject* self)
1883 {
1884 if (self->weakreflist != NULL)
1885 PyObject_ClearWeakRefs((PyObject *) self);
1886 Py_XDECREF(self->pattern);
1887 Py_XDECREF(self->groupindex);
1888 Py_XDECREF(self->indexgroup);
1889 PyObject_DEL(self);
1890 }
1891
1892 static int
1893 check_args_size(const char *name, PyObject* args, PyObject* kw, int n)
1894 {
1895 Py_ssize_t m = PyTuple_GET_SIZE(args) + (kw ? PyDict_Size(kw) : 0);
1896 if (m <= n)
1897 return 1;
1898 PyErr_Format(PyExc_TypeError,
1899 "%s() takes at most %d positional arguments (%zd given)",
1900 name, n, m);
1901 return 0;
1902 }
1903
1904 static PyObject*
1905 fix_string_param(PyObject *string, PyObject *string2, const char *oldname)
1906 {
1907 if (string2 != NULL) {
1908 char buf[100];
1909 if (string != NULL) {
1910 PyErr_Format(PyExc_TypeError,
1911 "Argument given by name ('%s') and position (1)",
1912 oldname);
1913 return NULL;
1914 }
1915 sprintf(buf, "The '%s' keyword parameter name is deprecated. "
1916 "Use 'string' instead.", oldname);
1917 if (PyErr_Warn(PyExc_DeprecationWarning, buf) < 0)
1918 return NULL;
1919 return string2;
1920 }
1921 if (string == NULL) {
1922 PyErr_SetString(PyExc_TypeError,
1923 "Required argument 'string' (pos 1) not found");
1924 return NULL;
1925 }
1926 return string;
1927 }
1928
1929 static PyObject*
1930 pattern_match(PatternObject* self, PyObject* args, PyObject* kw)
1931 {
1932 SRE_STATE state;
1933 int status;
1934
1935 PyObject *string = NULL, *string2 = NULL;
1936 Py_ssize_t start = 0;
1937 Py_ssize_t end = PY_SSIZE_T_MAX;
1938 static char* kwlist[] = { "string", "pos", "endpos", "pattern", NULL };
1939 if (!check_args_size("match", args, kw, 3))
1940 return NULL;
1941
1942 if (!PyArg_ParseTupleAndKeywords(args, kw, "|OnnO:match", kwlist,
1943 &string, &start, &end, &string2))
1944 return NULL;
1945
1946 string = fix_string_param(string, string2, "pattern");
1947 if (!string)
1948 return NULL;
1949
1950 string = state_init(&state, self, string, start, end);
1951 if (!string)
1952 return NULL;
1953
1954 state.ptr = state.start;
1955
1956 TRACE(("|%p|%p|MATCH\n", PatternObject_GetCode(self), state.ptr));
1957
1958 if (state.charsize == 1) {
1959 status = sre_match(&state, PatternObject_GetCode(self));
1960 } else {
1961 #if defined(HAVE_UNICODE)
1962 status = sre_umatch(&state, PatternObject_GetCode(self));
1963 #endif
1964 }
1965
1966 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
1967 if (PyErr_Occurred())
1968 return NULL;
1969
1970 state_fini(&state);
1971
1972 return pattern_new_match(self, &state, status);
1973 }
1974
1975 static PyObject*
1976 pattern_search(PatternObject* self, PyObject* args, PyObject* kw)
1977 {
1978 SRE_STATE state;
1979 int status;
1980
1981 PyObject *string = NULL, *string2 = NULL;
1982 Py_ssize_t start = 0;
1983 Py_ssize_t end = PY_SSIZE_T_MAX;
1984 static char* kwlist[] = { "string", "pos", "endpos", "pattern", NULL };
1985 if (!check_args_size("search", args, kw, 3))
1986 return NULL;
1987
1988 if (!PyArg_ParseTupleAndKeywords(args, kw, "|OnnO:search", kwlist,
1989 &string, &start, &end, &string2))
1990 return NULL;
1991
1992 string = fix_string_param(string, string2, "pattern");
1993 if (!string)
1994 return NULL;
1995
1996 string = state_init(&state, self, string, start, end);
1997 if (!string)
1998 return NULL;
1999
2000 TRACE(("|%p|%p|SEARCH\n", PatternObject_GetCode(self), state.ptr));
2001
2002 if (state.charsize == 1) {
2003 status = sre_search(&state, PatternObject_GetCode(self));
2004 } else {
2005 #if defined(HAVE_UNICODE)
2006 status = sre_usearch(&state, PatternObject_GetCode(self));
2007 #endif
2008 }
2009
2010 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
2011
2012 state_fini(&state);
2013
2014 if (PyErr_Occurred())
2015 return NULL;
2016
2017 return pattern_new_match(self, &state, status);
2018 }
2019
2020 static PyObject*
2021 call(char* module, char* function, PyObject* args)
2022 {
2023 PyObject* name;
2024 PyObject* mod;
2025 PyObject* func;
2026 PyObject* result;
2027
2028 if (!args)
2029 return NULL;
2030 name = PyString_FromString(module);
2031 if (!name)
2032 return NULL;
2033 mod = PyImport_Import(name);
2034 Py_DECREF(name);
2035 if (!mod)
2036 return NULL;
2037 func = PyObject_GetAttrString(mod, function);
2038 Py_DECREF(mod);
2039 if (!func)
2040 return NULL;
2041 result = PyObject_CallObject(func, args);
2042 Py_DECREF(func);
2043 Py_DECREF(args);
2044 return result;
2045 }
2046
2047 #ifdef USE_BUILTIN_COPY
2048 static int
2049 deepcopy(PyObject** object, PyObject* memo)
2050 {
2051 PyObject* copy;
2052
2053 copy = call(
2054 "copy", "deepcopy",
2055 PyTuple_Pack(2, *object, memo)
2056 );
2057 if (!copy)
2058 return 0;
2059
2060 Py_DECREF(*object);
2061 *object = copy;
2062
2063 return 1; /* success */
2064 }
2065 #endif
2066
2067 static PyObject*
2068 join_list(PyObject* list, PyObject* string)
2069 {
2070 /* join list elements */
2071
2072 PyObject* joiner;
2073 #if PY_VERSION_HEX >= 0x01060000
2074 PyObject* function;
2075 PyObject* args;
2076 #endif
2077 PyObject* result;
2078
2079 joiner = PySequence_GetSlice(string, 0, 0);
2080 if (!joiner)
2081 return NULL;
2082
2083 if (PyList_GET_SIZE(list) == 0) {
2084 Py_DECREF(list);
2085 return joiner;
2086 }
2087
2088 #if PY_VERSION_HEX >= 0x01060000
2089 function = PyObject_GetAttrString(joiner, "join");
2090 if (!function) {
2091 Py_DECREF(joiner);
2092 return NULL;
2093 }
2094 args = PyTuple_New(1);
2095 if (!args) {
2096 Py_DECREF(function);
2097 Py_DECREF(joiner);
2098 return NULL;
2099 }
2100 PyTuple_SET_ITEM(args, 0, list);
2101 result = PyObject_CallObject(function, args);
2102 Py_DECREF(args); /* also removes list */
2103 Py_DECREF(function);
2104 #else
2105 result = call(
2106 "string", "join",
2107 PyTuple_Pack(2, list, joiner)
2108 );
2109 #endif
2110 Py_DECREF(joiner);
2111
2112 return result;
2113 }
2114
2115 static PyObject*
2116 pattern_findall(PatternObject* self, PyObject* args, PyObject* kw)
2117 {
2118 SRE_STATE state;
2119 PyObject* list;
2120 int status;
2121 Py_ssize_t i, b, e;
2122
2123 PyObject *string = NULL, *string2 = NULL;
2124 Py_ssize_t start = 0;
2125 Py_ssize_t end = PY_SSIZE_T_MAX;
2126 static char* kwlist[] = { "string", "pos", "endpos", "source", NULL };
2127 if (!check_args_size("findall", args, kw, 3))
2128 return NULL;
2129
2130 if (!PyArg_ParseTupleAndKeywords(args, kw, "|OnnO:findall", kwlist,
2131 &string, &start, &end, &string2))
2132 return NULL;
2133
2134 string = fix_string_param(string, string2, "source");
2135 if (!string)
2136 return NULL;
2137
2138 string = state_init(&state, self, string, start, end);
2139 if (!string)
2140 return NULL;
2141
2142 list = PyList_New(0);
2143 if (!list) {
2144 state_fini(&state);
2145 return NULL;
2146 }
2147
2148 while (state.start <= state.end) {
2149
2150 PyObject* item;
2151
2152 state_reset(&state);
2153
2154 state.ptr = state.start;
2155
2156 if (state.charsize == 1) {
2157 status = sre_search(&state, PatternObject_GetCode(self));
2158 } else {
2159 #if defined(HAVE_UNICODE)
2160 status = sre_usearch(&state, PatternObject_GetCode(self));
2161 #endif
2162 }
2163
2164 if (PyErr_Occurred())
2165 goto error;
2166
2167 if (status <= 0) {
2168 if (status == 0)
2169 break;
2170 pattern_error(status);
2171 goto error;
2172 }
2173
2174 /* don't bother to build a match object */
2175 switch (self->groups) {
2176 case 0:
2177 b = STATE_OFFSET(&state, state.start);
2178 e = STATE_OFFSET(&state, state.ptr);
2179 item = PySequence_GetSlice(string, b, e);
2180 if (!item)
2181 goto error;
2182 break;
2183 case 1:
2184 item = state_getslice(&state, 1, string, 1);
2185 if (!item)
2186 goto error;
2187 break;
2188 default:
2189 item = PyTuple_New(self->groups);
2190 if (!item)
2191 goto error;
2192 for (i = 0; i < self->groups; i++) {
2193 PyObject* o = state_getslice(&state, i+1, string, 1);
2194 if (!o) {
2195 Py_DECREF(item);
2196 goto error;
2197 }
2198 PyTuple_SET_ITEM(item, i, o);
2199 }
2200 break;
2201 }
2202
2203 status = PyList_Append(list, item);
2204 Py_DECREF(item);
2205 if (status < 0)
2206 goto error;
2207
2208 if (state.ptr == state.start)
2209 state.start = (void*) ((char*) state.ptr + state.charsize);
2210 else
2211 state.start = state.ptr;
2212
2213 }
2214
2215 state_fini(&state);
2216 return list;
2217
2218 error:
2219 Py_DECREF(list);
2220 state_fini(&state);
2221 return NULL;
2222
2223 }
2224
2225 #if PY_VERSION_HEX >= 0x02020000
2226 static PyObject*
2227 pattern_finditer(PatternObject* pattern, PyObject* args)
2228 {
2229 PyObject* scanner;
2230 PyObject* search;
2231 PyObject* iterator;
2232
2233 scanner = pattern_scanner(pattern, args);
2234 if (!scanner)
2235 return NULL;
2236
2237 search = PyObject_GetAttrString(scanner, "search");
2238 Py_DECREF(scanner);
2239 if (!search)
2240 return NULL;
2241
2242 iterator = PyCallIter_New(search, Py_None);
2243 Py_DECREF(search);
2244
2245 return iterator;
2246 }
2247 #endif
2248
2249 static PyObject*
2250 pattern_split(PatternObject* self, PyObject* args, PyObject* kw)
2251 {
2252 SRE_STATE state;
2253 PyObject* list;
2254 PyObject* item;
2255 int status;
2256 Py_ssize_t n;
2257 Py_ssize_t i;
2258 void* last;
2259
2260 PyObject *string = NULL, *string2 = NULL;
2261 Py_ssize_t maxsplit = 0;
2262 static char* kwlist[] = { "string", "maxsplit", "source", NULL };
2263 if (!check_args_size("split", args, kw, 2))
2264 return NULL;
2265
2266 if (!PyArg_ParseTupleAndKeywords(args, kw, "|OnO:split", kwlist,
2267 &string, &maxsplit, &string2))
2268 return NULL;
2269
2270 string = fix_string_param(string, string2, "source");
2271 if (!string)
2272 return NULL;
2273
2274 string = state_init(&state, self, string, 0, PY_SSIZE_T_MAX);
2275 if (!string)
2276 return NULL;
2277
2278 list = PyList_New(0);
2279 if (!list) {
2280 state_fini(&state);
2281 return NULL;
2282 }
2283
2284 n = 0;
2285 last = state.start;
2286
2287 while (!maxsplit || n < maxsplit) {
2288
2289 state_reset(&state);
2290
2291 state.ptr = state.start;
2292
2293 if (state.charsize == 1) {
2294 status = sre_search(&state, PatternObject_GetCode(self));
2295 } else {
2296 #if defined(HAVE_UNICODE)
2297 status = sre_usearch(&state, PatternObject_GetCode(self));
2298 #endif
2299 }
2300
2301 if (PyErr_Occurred())
2302 goto error;
2303
2304 if (status <= 0) {
2305 if (status == 0)
2306 break;
2307 pattern_error(status);
2308 goto error;
2309 }
2310
2311 if (state.start == state.ptr) {
2312 if (last == state.end)
2313 break;
2314 /* skip one character */
2315 state.start = (void*) ((char*) state.ptr + state.charsize);
2316 continue;
2317 }
2318
2319 /* get segment before this match */
2320 item = PySequence_GetSlice(
2321 string, STATE_OFFSET(&state, last),
2322 STATE_OFFSET(&state, state.start)
2323 );
2324 if (!item)
2325 goto error;
2326 status = PyList_Append(list, item);
2327 Py_DECREF(item);
2328 if (status < 0)
2329 goto error;
2330
2331 /* add groups (if any) */
2332 for (i = 0; i < self->groups; i++) {
2333 item = state_getslice(&state, i+1, string, 0);
2334 if (!item)
2335 goto error;
2336 status = PyList_Append(list, item);
2337 Py_DECREF(item);
2338 if (status < 0)
2339 goto error;
2340 }
2341
2342 n = n + 1;
2343
2344 last = state.start = state.ptr;
2345
2346 }
2347
2348 /* get segment following last match (even if empty) */
2349 item = PySequence_GetSlice(
2350 string, STATE_OFFSET(&state, last), state.endpos
2351 );
2352 if (!item)
2353 goto error;
2354 status = PyList_Append(list, item);
2355 Py_DECREF(item);
2356 if (status < 0)
2357 goto error;
2358
2359 state_fini(&state);
2360 return list;
2361
2362 error:
2363 Py_DECREF(list);
2364 state_fini(&state);
2365 return NULL;
2366
2367 }
2368
2369 static PyObject*
2370 pattern_subx(PatternObject* self, PyObject* ptemplate, PyObject* string,
2371 Py_ssize_t count, Py_ssize_t subn)
2372 {
2373 SRE_STATE state;
2374 PyObject* list;
2375 PyObject* item;
2376 PyObject* filter;
2377 PyObject* args;
2378 PyObject* match;
2379 void* ptr;
2380 int status;
2381 Py_ssize_t n;
2382 Py_ssize_t i, b, e;
2383 int bint;
2384 int filter_is_callable;
2385
2386 if (PyCallable_Check(ptemplate)) {
2387 /* sub/subn takes either a function or a template */
2388 filter = ptemplate;
2389 Py_INCREF(filter);
2390 filter_is_callable = 1;
2391 } else {
2392 /* if not callable, check if it's a literal string */
2393 int literal;
2394 ptr = getstring(ptemplate, &n, &bint);
2395 b = bint;
2396 if (ptr) {
2397 if (b == 1) {
2398 literal = sre_literal_template((unsigned char *)ptr, n);
2399 } else {
2400 #if defined(HAVE_UNICODE)
2401 literal = sre_uliteral_template((Py_UNICODE *)ptr, n);
2402 #endif
2403 }
2404 } else {
2405 PyErr_Clear();
2406 literal = 0;
2407 }
2408 if (literal) {
2409 filter = ptemplate;
2410 Py_INCREF(filter);
2411 filter_is_callable = 0;
2412 } else {
2413 /* not a literal; hand it over to the template compiler */
2414 filter = call(
2415 SRE_PY_MODULE, "_subx",
2416 PyTuple_Pack(2, self, ptemplate)
2417 );
2418 if (!filter)
2419 return NULL;
2420 filter_is_callable = PyCallable_Check(filter);
2421 }
2422 }
2423
2424 string = state_init(&state, self, string, 0, PY_SSIZE_T_MAX);
2425 if (!string) {
2426 Py_DECREF(filter);
2427 return NULL;
2428 }
2429
2430 list = PyList_New(0);
2431 if (!list) {
2432 Py_DECREF(filter);
2433 state_fini(&state);
2434 return NULL;
2435 }
2436
2437 n = i = 0;
2438
2439 while (!count || n < count) {
2440
2441 state_reset(&state);
2442
2443 state.ptr = state.start;
2444
2445 if (state.charsize == 1) {
2446 status = sre_search(&state, PatternObject_GetCode(self));
2447 } else {
2448 #if defined(HAVE_UNICODE)
2449 status = sre_usearch(&state, PatternObject_GetCode(self));
2450 #endif
2451 }
2452
2453 if (PyErr_Occurred())
2454 goto error;
2455
2456 if (status <= 0) {
2457 if (status == 0)
2458 break;
2459 pattern_error(status);
2460 goto error;
2461 }
2462
2463 b = STATE_OFFSET(&state, state.start);
2464 e = STATE_OFFSET(&state, state.ptr);
2465
2466 if (i < b) {
2467 /* get segment before this match */
2468 item = PySequence_GetSlice(string, i, b);
2469 if (!item)
2470 goto error;
2471 status = PyList_Append(list, item);
2472 Py_DECREF(item);
2473 if (status < 0)
2474 goto error;
2475
2476 } else if (i == b && i == e && n > 0)
2477 /* ignore empty match on latest position */
2478 goto next;
2479
2480 if (filter_is_callable) {
2481 /* pass match object through filter */
2482 match = pattern_new_match(self, &state, 1);
2483 if (!match)
2484 goto error;
2485 args = PyTuple_Pack(1, match);
2486 if (!args) {
2487 Py_DECREF(match);
2488 goto error;
2489 }
2490 item = PyObject_CallObject(filter, args);
2491 Py_DECREF(args);
2492 Py_DECREF(match);
2493 if (!item)
2494 goto error;
2495 } else {
2496 /* filter is literal string */
2497 item = filter;
2498 Py_INCREF(item);
2499 }
2500
2501 /* add to list */
2502 if (item != Py_None) {
2503 status = PyList_Append(list, item);
2504 Py_DECREF(item);
2505 if (status < 0)
2506 goto error;
2507 }
2508
2509 i = e;
2510 n = n + 1;
2511
2512 next:
2513 /* move on */
2514 if (state.ptr == state.start)
2515 state.start = (void*) ((char*) state.ptr + state.charsize);
2516 else
2517 state.start = state.ptr;
2518
2519 }
2520
2521 /* get segment following last match */
2522 if (i < state.endpos) {
2523 item = PySequence_GetSlice(string, i, state.endpos);
2524 if (!item)
2525 goto error;
2526 status = PyList_Append(list, item);
2527 Py_DECREF(item);
2528 if (status < 0)
2529 goto error;
2530 }
2531
2532 state_fini(&state);
2533
2534 Py_DECREF(filter);
2535
2536 /* convert list to single string (also removes list) */
2537 item = join_list(list, string);
2538
2539 if (!item)
2540 return NULL;
2541
2542 if (subn)
2543 return Py_BuildValue("Nn", item, n);
2544
2545 return item;
2546
2547 error:
2548 Py_DECREF(list);
2549 state_fini(&state);
2550 Py_DECREF(filter);
2551 return NULL;
2552
2553 }
2554
2555 static PyObject*
2556 pattern_sub(PatternObject* self, PyObject* args, PyObject* kw)
2557 {
2558 PyObject* ptemplate;
2559 PyObject* string;
2560 Py_ssize_t count = 0;
2561 static char* kwlist[] = { "repl", "string", "count", NULL };
2562 if (!PyArg_ParseTupleAndKeywords(args, kw, "OO|n:sub", kwlist,
2563 &ptemplate, &string, &count))
2564 return NULL;
2565
2566 return pattern_subx(self, ptemplate, string, count, 0);
2567 }
2568
2569 static PyObject*
2570 pattern_subn(PatternObject* self, PyObject* args, PyObject* kw)
2571 {
2572 PyObject* ptemplate;
2573 PyObject* string;
2574 Py_ssize_t count = 0;
2575 static char* kwlist[] = { "repl", "string", "count", NULL };
2576 if (!PyArg_ParseTupleAndKeywords(args, kw, "OO|n:subn", kwlist,
2577 &ptemplate, &string, &count))
2578 return NULL;
2579
2580 return pattern_subx(self, ptemplate, string, count, 1);
2581 }
2582
2583 static PyObject*
2584 pattern_copy(PatternObject* self, PyObject *unused)
2585 {
2586 #ifdef USE_BUILTIN_COPY
2587 PatternObject* copy;
2588 int offset;
2589
2590 copy = PyObject_NEW_VAR(PatternObject, &Pattern_Type, self->codesize);
2591 if (!copy)
2592 return NULL;
2593
2594 offset = offsetof(PatternObject, groups);
2595
2596 Py_XINCREF(self->groupindex);
2597 Py_XINCREF(self->indexgroup);
2598 Py_XINCREF(self->pattern);
2599
2600 memcpy((char*) copy + offset, (char*) self + offset,
2601 sizeof(PatternObject) + self->codesize * sizeof(SRE_CODE) - offset);
2602 copy->weakreflist = NULL;
2603
2604 return (PyObject*) copy;
2605 #else
2606 PyErr_SetString(PyExc_TypeError, "cannot copy this pattern object");
2607 return NULL;
2608 #endif
2609 }
2610
2611 static PyObject*
2612 pattern_deepcopy(PatternObject* self, PyObject* memo)
2613 {
2614 #ifdef USE_BUILTIN_COPY
2615 PatternObject* copy;
2616
2617 copy = (PatternObject*) pattern_copy(self);
2618 if (!copy)
2619 return NULL;
2620
2621 if (!deepcopy(&copy->groupindex, memo) ||
2622 !deepcopy(&copy->indexgroup, memo) ||
2623 !deepcopy(&copy->pattern, memo)) {
2624 Py_DECREF(copy);
2625 return NULL;
2626 }
2627
2628 #else
2629 PyErr_SetString(PyExc_TypeError, "cannot deepcopy this pattern object");
2630 return NULL;
2631 #endif
2632 }
2633
2634 PyDoc_STRVAR(pattern_match_doc,
2635 "match(string[, pos[, endpos]]) --> match object or None.\n\
2636 Matches zero or more characters at the beginning of the string");
2637
2638 PyDoc_STRVAR(pattern_search_doc,
2639 "search(string[, pos[, endpos]]) --> match object or None.\n\
2640 Scan through string looking for a match, and return a corresponding\n\
2641 match object instance. Return None if no position in the string matches.");
2642
2643 PyDoc_STRVAR(pattern_split_doc,
2644 "split(string[, maxsplit = 0]) --> list.\n\
2645 Split string by the occurrences of pattern.");
2646
2647 PyDoc_STRVAR(pattern_findall_doc,
2648 "findall(string[, pos[, endpos]]) --> list.\n\
2649 Return a list of all non-overlapping matches of pattern in string.");
2650
2651 PyDoc_STRVAR(pattern_finditer_doc,
2652 "finditer(string[, pos[, endpos]]) --> iterator.\n\
2653 Return an iterator over all non-overlapping matches for the \n\
2654 RE pattern in string. For each match, the iterator returns a\n\
2655 match object.");
2656
2657 PyDoc_STRVAR(pattern_sub_doc,
2658 "sub(repl, string[, count = 0]) --> newstring\n\
2659 Return the string obtained by replacing the leftmost non-overlapping\n\
2660 occurrences of pattern in string by the replacement repl.");
2661
2662 PyDoc_STRVAR(pattern_subn_doc,
2663 "subn(repl, string[, count = 0]) --> (newstring, number of subs)\n\
2664 Return the tuple (new_string, number_of_subs_made) found by replacing\n\
2665 the leftmost non-overlapping occurrences of pattern with the\n\
2666 replacement repl.");
2667
2668 PyDoc_STRVAR(pattern_doc, "Compiled regular expression objects");
2669
2670 static PyMethodDef pattern_methods[] = {
2671 {"match", (PyCFunction) pattern_match, METH_VARARGS|METH_KEYWORDS,
2672 pattern_match_doc},
2673 {"search", (PyCFunction) pattern_search, METH_VARARGS|METH_KEYWORDS,
2674 pattern_search_doc},
2675 {"sub", (PyCFunction) pattern_sub, METH_VARARGS|METH_KEYWORDS,
2676 pattern_sub_doc},
2677 {"subn", (PyCFunction) pattern_subn, METH_VARARGS|METH_KEYWORDS,
2678 pattern_subn_doc},
2679 {"split", (PyCFunction) pattern_split, METH_VARARGS|METH_KEYWORDS,
2680 pattern_split_doc},
2681 {"findall", (PyCFunction) pattern_findall, METH_VARARGS|METH_KEYWORDS,
2682 pattern_findall_doc},
2683 #if PY_VERSION_HEX >= 0x02020000
2684 {"finditer", (PyCFunction) pattern_finditer, METH_VARARGS,
2685 pattern_finditer_doc},
2686 #endif
2687 {"scanner", (PyCFunction) pattern_scanner, METH_VARARGS},
2688 {"__copy__", (PyCFunction) pattern_copy, METH_NOARGS},
2689 {"__deepcopy__", (PyCFunction) pattern_deepcopy, METH_O},
2690 {NULL, NULL}
2691 };
2692
2693 #define PAT_OFF(x) offsetof(PatternObject, x)
2694 static PyMemberDef pattern_members[] = {
2695 {"pattern", T_OBJECT, PAT_OFF(pattern), READONLY},
2696 {"flags", T_INT, PAT_OFF(flags), READONLY},
2697 {"groups", T_PYSSIZET, PAT_OFF(groups), READONLY},
2698 {"groupindex", T_OBJECT, PAT_OFF(groupindex), READONLY},
2699 {NULL} /* Sentinel */
2700 };
2701
2702 statichere PyTypeObject Pattern_Type = {
2703 PyObject_HEAD_INIT(NULL)
2704 0, "_" SRE_MODULE ".SRE_Pattern",
2705 sizeof(PatternObject), sizeof(SRE_CODE),
2706 (destructor)pattern_dealloc, /*tp_dealloc*/
2707 0, /* tp_print */
2708 0, /* tp_getattrn */
2709 0, /* tp_setattr */
2710 0, /* tp_compare */
2711 0, /* tp_repr */
2712 0, /* tp_as_number */
2713 0, /* tp_as_sequence */
2714 0, /* tp_as_mapping */
2715 0, /* tp_hash */
2716 0, /* tp_call */
2717 0, /* tp_str */
2718 0, /* tp_getattro */
2719 0, /* tp_setattro */
2720 0, /* tp_as_buffer */
2721 Py_TPFLAGS_DEFAULT, /* tp_flags */
2722 pattern_doc, /* tp_doc */
2723 0, /* tp_traverse */
2724 0, /* tp_clear */
2725 0, /* tp_richcompare */
2726 offsetof(PatternObject, weakreflist), /* tp_weaklistoffset */
2727 0, /* tp_iter */
2728 0, /* tp_iternext */
2729 pattern_methods, /* tp_methods */
2730 pattern_members, /* tp_members */
2731 };
2732
2733 static int _validate(PatternObject *self); /* Forward */
2734
2735 static PyObject *
2736 _compile(PyObject* self_, PyObject* args)
2737 {
2738 /* "compile" pattern descriptor to pattern object */
2739
2740 PatternObject* self;
2741 Py_ssize_t i, n;
2742
2743 PyObject* pattern;
2744 int flags = 0;
2745 PyObject* code;
2746 Py_ssize_t groups = 0;
2747 PyObject* groupindex = NULL;
2748 PyObject* indexgroup = NULL;
2749 if (!PyArg_ParseTuple(args, "OiO!|nOO", &pattern, &flags,
2750 &PyList_Type, &code, &groups,
2751 &groupindex, &indexgroup))
2752 return NULL;
2753
2754 n = PyList_GET_SIZE(code);
2755 /* coverity[ampersand_in_size] */
2756 self = PyObject_NEW_VAR(PatternObject, &Pattern_Type, n);
2757 if (!self)
2758 return NULL;
2759 self->weakreflist = NULL;
2760 self->pattern = NULL;
2761 self->groupindex = NULL;
2762 self->indexgroup = NULL;
2763
2764 self->codesize = n;
2765
2766 for (i = 0; i < n; i++) {
2767 PyObject *o = PyList_GET_ITEM(code, i);
2768 unsigned long value = PyInt_Check(o) ? (unsigned long)PyInt_AsLong(o)
2769 : PyLong_AsUnsignedLong(o);
2770 if (value == (unsigned long)-1 && PyErr_Occurred()) {
2771 if (PyErr_ExceptionMatches(PyExc_OverflowError)) {
2772 PyErr_SetString(PyExc_OverflowError,
2773 "regular expression code size limit exceeded");
2774 }
2775 break;
2776 }
2777 self->code[i] = (SRE_CODE) value;
2778 if ((unsigned long) self->code[i] != value) {
2779 PyErr_SetString(PyExc_OverflowError,
2780 "regular expression code size limit exceeded");
2781 break;
2782 }
2783 }
2784
2785 if (PyErr_Occurred()) {
2786 Py_DECREF(self);
2787 return NULL;
2788 }
2789
2790 Py_INCREF(pattern);
2791 self->pattern = pattern;
2792
2793 self->flags = flags;
2794
2795 self->groups = groups;
2796
2797 Py_XINCREF(groupindex);
2798 self->groupindex = groupindex;
2799
2800 Py_XINCREF(indexgroup);
2801 self->indexgroup = indexgroup;
2802
2803 self->weakreflist = NULL;
2804
2805 if (!_validate(self)) {
2806 Py_DECREF(self);
2807 return NULL;
2808 }
2809
2810 return (PyObject*) self;
2811 }
2812
2813 /* -------------------------------------------------------------------- */
2814 /* Code validation */
2815
2816 /* To learn more about this code, have a look at the _compile() function in
2817 Lib/sre_compile.py. The validation functions below checks the code array
2818 for conformance with the code patterns generated there.
2819
2820 The nice thing about the generated code is that it is position-independent:
2821 all jumps are relative jumps forward. Also, jumps don't cross each other:
2822 the target of a later jump is always earlier than the target of an earlier
2823 jump. IOW, this is okay:
2824
2825 J---------J-------T--------T
2826 \ \_____/ /
2827 \______________________/
2828
2829 but this is not:
2830
2831 J---------J-------T--------T
2832 \_________\_____/ /
2833 \____________/
2834
2835 It also helps that SRE_CODE is always an unsigned type.
2836 */
2837
2838 /* Defining this one enables tracing of the validator */
2839 #undef VVERBOSE
2840
2841 /* Trace macro for the validator */
2842 #if defined(VVERBOSE)
2843 #define VTRACE(v) printf v
2844 #else
2845 #define VTRACE(v) do {} while(0) /* do nothing */
2846 #endif
2847
2848 /* Report failure */
2849 #define FAIL do { VTRACE(("FAIL: %d\n", __LINE__)); return 0; } while (0)
2850
2851 /* Extract opcode, argument, or skip count from code array */
2852 #define GET_OP \
2853 do { \
2854 VTRACE(("%p: ", code)); \
2855 if (code >= end) FAIL; \
2856 op = *code++; \
2857 VTRACE(("%lu (op)\n", (unsigned long)op)); \
2858 } while (0)
2859 #define GET_ARG \
2860 do { \
2861 VTRACE(("%p= ", code)); \
2862 if (code >= end) FAIL; \
2863 arg = *code++; \
2864 VTRACE(("%lu (arg)\n", (unsigned long)arg)); \
2865 } while (0)
2866 #define GET_SKIP_ADJ(adj) \
2867 do { \
2868 VTRACE(("%p= ", code)); \
2869 if (code >= end) FAIL; \
2870 skip = *code; \
2871 VTRACE(("%lu (skip to %p)\n", \
2872 (unsigned long)skip, code+skip)); \
2873 if (skip-adj > end-code) \
2874 FAIL; \
2875 code++; \
2876 } while (0)
2877 #define GET_SKIP GET_SKIP_ADJ(0)
2878
2879 static int
2880 _validate_charset(SRE_CODE *code, SRE_CODE *end)
2881 {
2882 /* Some variables are manipulated by the macros above */
2883 SRE_CODE op;
2884 SRE_CODE arg;
2885 SRE_CODE offset;
2886 int i;
2887
2888 while (code < end) {
2889 GET_OP;
2890 switch (op) {
2891
2892 case SRE_OP_NEGATE:
2893 break;
2894
2895 case SRE_OP_LITERAL:
2896 GET_ARG;
2897 break;
2898
2899 case SRE_OP_RANGE:
2900 GET_ARG;
2901 GET_ARG;
2902 break;
2903
2904 case SRE_OP_CHARSET:
2905 offset = 32/sizeof(SRE_CODE); /* 32-byte bitmap */
2906 if (offset > end-code)
2907 FAIL;
2908 code += offset;
2909 break;
2910
2911 case SRE_OP_BIGCHARSET:
2912 GET_ARG; /* Number of blocks */
2913 offset = 256/sizeof(SRE_CODE); /* 256-byte table */
2914 if (offset > end-code)
2915 FAIL;
2916 /* Make sure that each byte points to a valid block */
2917 for (i = 0; i < 256; i++) {
2918 if (((unsigned char *)code)[i] >= arg)
2919 FAIL;
2920 }
2921 code += offset;
2922 offset = arg * 32/sizeof(SRE_CODE); /* 32-byte bitmap times arg */
2923 if (offset > end-code)
2924 FAIL;
2925 code += offset;
2926 break;
2927
2928 case SRE_OP_CATEGORY:
2929 GET_ARG;
2930 switch (arg) {
2931 case SRE_CATEGORY_DIGIT:
2932 case SRE_CATEGORY_NOT_DIGIT:
2933 case SRE_CATEGORY_SPACE:
2934 case SRE_CATEGORY_NOT_SPACE:
2935 case SRE_CATEGORY_WORD:
2936 case SRE_CATEGORY_NOT_WORD:
2937 case SRE_CATEGORY_LINEBREAK:
2938 case SRE_CATEGORY_NOT_LINEBREAK:
2939 case SRE_CATEGORY_LOC_WORD:
2940 case SRE_CATEGORY_LOC_NOT_WORD:
2941 case SRE_CATEGORY_UNI_DIGIT:
2942 case SRE_CATEGORY_UNI_NOT_DIGIT:
2943 case SRE_CATEGORY_UNI_SPACE:
2944 case SRE_CATEGORY_UNI_NOT_SPACE:
2945 case SRE_CATEGORY_UNI_WORD:
2946 case SRE_CATEGORY_UNI_NOT_WORD:
2947 case SRE_CATEGORY_UNI_LINEBREAK:
2948 case SRE_CATEGORY_UNI_NOT_LINEBREAK:
2949 break;
2950 default:
2951 FAIL;
2952 }
2953 break;
2954
2955 default:
2956 FAIL;
2957
2958 }
2959 }
2960
2961 return 1;
2962 }
2963
2964 static int
2965 _validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
2966 {
2967 /* Some variables are manipulated by the macros above */
2968 SRE_CODE op;
2969 SRE_CODE arg;
2970 SRE_CODE skip;
2971
2972 VTRACE(("code=%p, end=%p\n", code, end));
2973
2974 if (code > end)
2975 FAIL;
2976
2977 while (code < end) {
2978 GET_OP;
2979 switch (op) {
2980
2981 case SRE_OP_MARK:
2982 /* We don't check whether marks are properly nested; the
2983 sre_match() code is robust even if they don't, and the worst
2984 you can get is nonsensical match results. */
2985 GET_ARG;
2986 if (arg > 2*groups+1) {
2987 VTRACE(("arg=%d, groups=%d\n", (int)arg, (int)groups));
2988 FAIL;
2989 }
2990 break;
2991
2992 case SRE_OP_LITERAL:
2993 case SRE_OP_NOT_LITERAL:
2994 case SRE_OP_LITERAL_IGNORE:
2995 case SRE_OP_NOT_LITERAL_IGNORE:
2996 GET_ARG;
2997 /* The arg is just a character, nothing to check */
2998 break;
2999
3000 case SRE_OP_SUCCESS:
3001 case SRE_OP_FAILURE:
3002 /* Nothing to check; these normally end the matching process */
3003 break;
3004
3005 case SRE_OP_AT:
3006 GET_ARG;
3007 switch (arg) {
3008 case SRE_AT_BEGINNING:
3009 case SRE_AT_BEGINNING_STRING:
3010 case SRE_AT_BEGINNING_LINE:
3011 case SRE_AT_END:
3012 case SRE_AT_END_LINE:
3013 case SRE_AT_END_STRING:
3014 case SRE_AT_BOUNDARY:
3015 case SRE_AT_NON_BOUNDARY:
3016 case SRE_AT_LOC_BOUNDARY:
3017 case SRE_AT_LOC_NON_BOUNDARY:
3018 case SRE_AT_UNI_BOUNDARY:
3019 case SRE_AT_UNI_NON_BOUNDARY:
3020 break;
3021 default:
3022 FAIL;
3023 }
3024 break;
3025
3026 case SRE_OP_ANY:
3027 case SRE_OP_ANY_ALL:
3028 /* These have no operands */
3029 break;
3030
3031 case SRE_OP_IN:
3032 case SRE_OP_IN_IGNORE:
3033 GET_SKIP;
3034 /* Stop 1 before the end; we check the FAILURE below */
3035 if (!_validate_charset(code, code+skip-2))
3036 FAIL;
3037 if (code[skip-2] != SRE_OP_FAILURE)
3038 FAIL;
3039 code += skip-1;
3040 break;
3041
3042 case SRE_OP_INFO:
3043 {
3044 /* A minimal info field is
3045 <INFO> <1=skip> <2=flags> <3=min> <4=max>;
3046 If SRE_INFO_PREFIX or SRE_INFO_CHARSET is in the flags,
3047 more follows. */
3048 SRE_CODE flags, i;
3049 SRE_CODE *newcode;
3050 GET_SKIP;
3051 newcode = code+skip-1;
3052 GET_ARG; flags = arg;
3053 GET_ARG; /* min */
3054 GET_ARG; /* max */
3055 /* Check that only valid flags are present */
3056 if ((flags & ~(SRE_INFO_PREFIX |
3057 SRE_INFO_LITERAL |
3058 SRE_INFO_CHARSET)) != 0)
3059 FAIL;
3060 /* PREFIX and CHARSET are mutually exclusive */
3061 if ((flags & SRE_INFO_PREFIX) &&
3062 (flags & SRE_INFO_CHARSET))
3063 FAIL;
3064 /* LITERAL implies PREFIX */
3065 if ((flags & SRE_INFO_LITERAL) &&
3066 !(flags & SRE_INFO_PREFIX))
3067 FAIL;
3068 /* Validate the prefix */
3069 if (flags & SRE_INFO_PREFIX) {
3070 SRE_CODE prefix_len;
3071 GET_ARG; prefix_len = arg;
3072 GET_ARG; /* prefix skip */
3073 /* Here comes the prefix string */
3074 if (prefix_len > newcode-code)
3075 FAIL;
3076 code += prefix_len;
3077 /* And here comes the overlap table */
3078 if (prefix_len > newcode-code)
3079 FAIL;
3080 /* Each overlap value should be < prefix_len */
3081 for (i = 0; i < prefix_len; i++) {
3082 if (code[i] >= prefix_len)
3083 FAIL;
3084 }
3085 code += prefix_len;
3086 }
3087 /* Validate the charset */
3088 if (flags & SRE_INFO_CHARSET) {
3089 if (!_validate_charset(code, newcode-1))
3090 FAIL;
3091 if (newcode[-1] != SRE_OP_FAILURE)
3092 FAIL;
3093 code = newcode;
3094 }
3095 else if (code != newcode) {
3096 VTRACE(("code=%p, newcode=%p\n", code, newcode));
3097 FAIL;
3098 }
3099 }
3100 break;
3101
3102 case SRE_OP_BRANCH:
3103 {
3104 SRE_CODE *target = NULL;
3105 for (;;) {
3106 GET_SKIP;
3107 if (skip == 0)
3108 break;
3109 /* Stop 2 before the end; we check the JUMP below */
3110 if (!_validate_inner(code, code+skip-3, groups))
3111 FAIL;
3112 code += skip-3;
3113 /* Check that it ends with a JUMP, and that each JUMP
3114 has the same target */
3115 GET_OP;
3116 if (op != SRE_OP_JUMP)
3117 FAIL;
3118 GET_SKIP;
3119 if (target == NULL)
3120 target = code+skip-1;
3121 else if (code+skip-1 != target)
3122 FAIL;
3123 }
3124 }
3125 break;
3126
3127 case SRE_OP_REPEAT_ONE:
3128 case SRE_OP_MIN_REPEAT_ONE:
3129 {
3130 SRE_CODE min, max;
3131 GET_SKIP;
3132 GET_ARG; min = arg;
3133 GET_ARG; max = arg;
3134 if (min > max)
3135 FAIL;
3136 if (max > SRE_MAXREPEAT)
3137 FAIL;
3138 if (!_validate_inner(code, code+skip-4, groups))
3139 FAIL;
3140 code += skip-4;
3141 GET_OP;
3142 if (op != SRE_OP_SUCCESS)
3143 FAIL;
3144 }
3145 break;
3146
3147 case SRE_OP_REPEAT:
3148 {
3149 SRE_CODE min, max;
3150 GET_SKIP;
3151 GET_ARG; min = arg;
3152 GET_ARG; max = arg;
3153 if (min > max)
3154 FAIL;
3155 if (max > SRE_MAXREPEAT)
3156 FAIL;
3157 if (!_validate_inner(code, code+skip-3, groups))
3158 FAIL;
3159 code += skip-3;
3160 GET_OP;
3161 if (op != SRE_OP_MAX_UNTIL && op != SRE_OP_MIN_UNTIL)
3162 FAIL;
3163 }
3164 break;
3165
3166 case SRE_OP_GROUPREF:
3167 case SRE_OP_GROUPREF_IGNORE:
3168 GET_ARG;
3169 if (arg >= groups)
3170 FAIL;
3171 break;
3172
3173 case SRE_OP_GROUPREF_EXISTS:
3174 /* The regex syntax for this is: '(?(group)then|else)', where
3175 'group' is either an integer group number or a group name,
3176 'then' and 'else' are sub-regexes, and 'else' is optional. */
3177 GET_ARG;
3178 if (arg >= groups)
3179 FAIL;
3180 GET_SKIP_ADJ(1);
3181 code--; /* The skip is relative to the first arg! */
3182 /* There are two possibilities here: if there is both a 'then'
3183 part and an 'else' part, the generated code looks like:
3184
3185 GROUPREF_EXISTS
3186 <group>
3187 <skipyes>
3188 ...then part...
3189 JUMP
3190 <skipno>
3191 (<skipyes> jumps here)
3192 ...else part...
3193 (<skipno> jumps here)
3194
3195 If there is only a 'then' part, it looks like:
3196
3197 GROUPREF_EXISTS
3198 <group>
3199 <skip>
3200 ...then part...
3201 (<skip> jumps here)
3202
3203 There is no direct way to decide which it is, and we don't want
3204 to allow arbitrary jumps anywhere in the code; so we just look
3205 for a JUMP opcode preceding our skip target.
3206 */
3207 if (skip >= 3 && skip-3 < end-code &&
3208 code[skip-3] == SRE_OP_JUMP)
3209 {
3210 VTRACE(("both then and else parts present\n"));
3211 if (!_validate_inner(code+1, code+skip-3, groups))
3212 FAIL;
3213 code += skip-2; /* Position after JUMP, at <skipno> */
3214 GET_SKIP;
3215 if (!_validate_inner(code, code+skip-1, groups))
3216 FAIL;
3217 code += skip-1;
3218 }
3219 else {
3220 VTRACE(("only a then part present\n"));
3221 if (!_validate_inner(code+1, code+skip-1, groups))
3222 FAIL;
3223 code += skip-1;
3224 }
3225 break;
3226
3227 case SRE_OP_ASSERT:
3228 case SRE_OP_ASSERT_NOT:
3229 GET_SKIP;
3230 GET_ARG; /* 0 for lookahead, width for lookbehind */
3231 code--; /* Back up over arg to simplify math below */
3232 if (arg & 0x80000000)
3233 FAIL; /* Width too large */
3234 /* Stop 1 before the end; we check the SUCCESS below */
3235 if (!_validate_inner(code+1, code+skip-2, groups))
3236 FAIL;
3237 code += skip-2;
3238 GET_OP;
3239 if (op != SRE_OP_SUCCESS)
3240 FAIL;
3241 break;
3242
3243 default:
3244 FAIL;
3245
3246 }
3247 }
3248
3249 VTRACE(("okay\n"));
3250 return 1;
3251 }
3252
3253 static int
3254 _validate_outer(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
3255 {
3256 if (groups < 0 || groups > 100 || code >= end || end[-1] != SRE_OP_SUCCESS)
3257 FAIL;
3258 if (groups == 0) /* fix for simplejson */
3259 groups = 100; /* 100 groups should always be safe */
3260 return _validate_inner(code, end-1, groups);
3261 }
3262
3263 static int
3264 _validate(PatternObject *self)
3265 {
3266 if (!_validate_outer(self->code, self->code+self->codesize, self->groups))
3267 {
3268 PyErr_SetString(PyExc_RuntimeError, "invalid SRE code");
3269 return 0;
3270 }
3271 else
3272 VTRACE(("Success!\n"));
3273 return 1;
3274 }
3275
3276 /* -------------------------------------------------------------------- */
3277 /* match methods */
3278
3279 static void
3280 match_dealloc(MatchObject* self)
3281 {
3282 Py_XDECREF(self->regs);
3283 Py_XDECREF(self->string);
3284 Py_DECREF(self->pattern);
3285 PyObject_DEL(self);
3286 }
3287
3288 static PyObject*
3289 match_getslice_by_index(MatchObject* self, Py_ssize_t index, PyObject* def)
3290 {
3291 if (index < 0 || index >= self->groups) {
3292 /* raise IndexError if we were given a bad group number */
3293 PyErr_SetString(
3294 PyExc_IndexError,
3295 "no such group"
3296 );
3297 return NULL;
3298 }
3299
3300 index *= 2;
3301
3302 if (self->string == Py_None || self->mark[index] < 0) {
3303 /* return default value if the string or group is undefined */
3304 Py_INCREF(def);
3305 return def;
3306 }
3307
3308 return PySequence_GetSlice(
3309 self->string, self->mark[index], self->mark[index+1]
3310 );
3311 }
3312
3313 static Py_ssize_t
3314 match_getindex(MatchObject* self, PyObject* index)
3315 {
3316 Py_ssize_t i;
3317
3318 if (PyInt_Check(index) || PyLong_Check(index))
3319 return PyInt_AsSsize_t(index);
3320
3321 i = -1;
3322
3323 if (self->pattern->groupindex) {
3324 index = PyObject_GetItem(self->pattern->groupindex, index);
3325 if (index) {
3326 if (PyInt_Check(index) || PyLong_Check(index))
3327 i = PyInt_AsSsize_t(index);
3328 Py_DECREF(index);
3329 } else
3330 PyErr_Clear();
3331 }
3332
3333 return i;
3334 }
3335
3336 static PyObject*
3337 match_getslice(MatchObject* self, PyObject* index, PyObject* def)
3338 {
3339 return match_getslice_by_index(self, match_getindex(self, index), def);
3340 }
3341
3342 static PyObject*
3343 match_expand(MatchObject* self, PyObject* ptemplate)
3344 {
3345 /* delegate to Python code */
3346 return call(
3347 SRE_PY_MODULE, "_expand",
3348 PyTuple_Pack(3, self->pattern, self, ptemplate)
3349 );
3350 }
3351
3352 static PyObject*
3353 match_group(MatchObject* self, PyObject* args)
3354 {
3355 PyObject* result;
3356 Py_ssize_t i, size;
3357
3358 size = PyTuple_GET_SIZE(args);
3359
3360 switch (size) {
3361 case 0:
3362 result = match_getslice(self, Py_False, Py_None);
3363 break;
3364 case 1:
3365 result = match_getslice(self, PyTuple_GET_ITEM(args, 0), Py_None);
3366 break;
3367 default:
3368 /* fetch multiple items */
3369 result = PyTuple_New(size);
3370 if (!result)
3371 return NULL;
3372 for (i = 0; i < size; i++) {
3373 PyObject* item = match_getslice(
3374 self, PyTuple_GET_ITEM(args, i), Py_None
3375 );
3376 if (!item) {
3377 Py_DECREF(result);
3378 return NULL;
3379 }
3380 PyTuple_SET_ITEM(result, i, item);
3381 }
3382 break;
3383 }
3384 return result;
3385 }
3386
3387 static PyObject*
3388 match_groups(MatchObject* self, PyObject* args, PyObject* kw)
3389 {
3390 PyObject* result;
3391 Py_ssize_t index;
3392
3393 PyObject* def = Py_None;
3394 static char* kwlist[] = { "default", NULL };
3395 if (!PyArg_ParseTupleAndKeywords(args, kw, "|O:groups", kwlist, &def))
3396 return NULL;
3397
3398 result = PyTuple_New(self->groups-1);
3399 if (!result)
3400 return NULL;
3401
3402 for (index = 1; index < self->groups; index++) {
3403 PyObject* item;
3404 item = match_getslice_by_index(self, index, def);
3405 if (!item) {
3406 Py_DECREF(result);
3407 return NULL;
3408 }
3409 PyTuple_SET_ITEM(result, index-1, item);
3410 }
3411
3412 return result;
3413 }
3414
3415 static PyObject*
3416 match_groupdict(MatchObject* self, PyObject* args, PyObject* kw)
3417 {
3418 PyObject* result;
3419 PyObject* keys;
3420 Py_ssize_t index;
3421
3422 PyObject* def = Py_None;
3423 static char* kwlist[] = { "default", NULL };
3424 if (!PyArg_ParseTupleAndKeywords(args, kw, "|O:groupdict", kwlist, &def))
3425 return NULL;
3426
3427 result = PyDict_New();
3428 if (!result || !self->pattern->groupindex)
3429 return result;
3430
3431 keys = PyMapping_Keys(self->pattern->groupindex);
3432 if (!keys)
3433 goto failed;
3434
3435 for (index = 0; index < PyList_GET_SIZE(keys); index++) {
3436 int status;
3437 PyObject* key;
3438 PyObject* value;
3439 key = PyList_GET_ITEM(keys, index);
3440 if (!key)
3441 goto failed;
3442 value = match_getslice(self, key, def);
3443 if (!value) {
3444 Py_DECREF(key);
3445 goto failed;
3446 }
3447 status = PyDict_SetItem(result, key, value);
3448 Py_DECREF(value);
3449 if (status < 0)
3450 goto failed;
3451 }
3452
3453 Py_DECREF(keys);
3454
3455 return result;
3456
3457 failed:
3458 Py_XDECREF(keys);
3459 Py_DECREF(result);
3460 return NULL;
3461 }
3462
3463 static PyObject*
3464 match_start(MatchObject* self, PyObject* args)
3465 {
3466 Py_ssize_t index;
3467
3468 PyObject* index_ = Py_False; /* zero */
3469 if (!PyArg_UnpackTuple(args, "start", 0, 1, &index_))
3470 return NULL;
3471
3472 index = match_getindex(self, index_);
3473
3474 if (index < 0 || index >= self->groups) {
3475 PyErr_SetString(
3476 PyExc_IndexError,
3477 "no such group"
3478 );
3479 return NULL;
3480 }
3481
3482 /* mark is -1 if group is undefined */
3483 return PyInt_FromSsize_t(self->mark[index*2]);
3484 }
3485
3486 static PyObject*
3487 match_end(MatchObject* self, PyObject* args)
3488 {
3489 Py_ssize_t index;
3490
3491 PyObject* index_ = Py_False; /* zero */
3492 if (!PyArg_UnpackTuple(args, "end", 0, 1, &index_))
3493 return NULL;
3494
3495 index = match_getindex(self, index_);
3496
3497 if (index < 0 || index >= self->groups) {
3498 PyErr_SetString(
3499 PyExc_IndexError,
3500 "no such group"
3501 );
3502 return NULL;
3503 }
3504
3505 /* mark is -1 if group is undefined */
3506 return PyInt_FromSsize_t(self->mark[index*2+1]);
3507 }
3508
3509 LOCAL(PyObject*)
3510 _pair(Py_ssize_t i1, Py_ssize_t i2)
3511 {
3512 PyObject* pair;
3513 PyObject* item;
3514
3515 pair = PyTuple_New(2);
3516 if (!pair)
3517 return NULL;
3518
3519 item = PyInt_FromSsize_t(i1);
3520 if (!item)
3521 goto error;
3522 PyTuple_SET_ITEM(pair, 0, item);
3523
3524 item = PyInt_FromSsize_t(i2);
3525 if (!item)
3526 goto error;
3527 PyTuple_SET_ITEM(pair, 1, item);
3528
3529 return pair;
3530
3531 error:
3532 Py_DECREF(pair);
3533 return NULL;
3534 }
3535
3536 static PyObject*
3537 match_span(MatchObject* self, PyObject* args)
3538 {
3539 Py_ssize_t index;
3540
3541 PyObject* index_ = Py_False; /* zero */
3542 if (!PyArg_UnpackTuple(args, "span", 0, 1, &index_))
3543 return NULL;
3544
3545 index = match_getindex(self, index_);
3546
3547 if (index < 0 || index >= self->groups) {
3548 PyErr_SetString(
3549 PyExc_IndexError,
3550 "no such group"
3551 );
3552 return NULL;
3553 }
3554
3555 /* marks are -1 if group is undefined */
3556 return _pair(self->mark[index*2], self->mark[index*2+1]);
3557 }
3558
3559 static PyObject*
3560 match_regs(MatchObject* self)
3561 {
3562 PyObject* regs;
3563 PyObject* item;
3564 Py_ssize_t index;
3565
3566 regs = PyTuple_New(self->groups);
3567 if (!regs)
3568 return NULL;
3569
3570 for (index = 0; index < self->groups; index++) {
3571 item = _pair(self->mark[index*2], self->mark[index*2+1]);
3572 if (!item) {
3573 Py_DECREF(regs);
3574 return NULL;
3575 }
3576 PyTuple_SET_ITEM(regs, index, item);
3577 }
3578
3579 Py_INCREF(regs);
3580 self->regs = regs;
3581
3582 return regs;
3583 }
3584
3585 static PyObject*
3586 match_copy(MatchObject* self, PyObject *unused)
3587 {
3588 #ifdef USE_BUILTIN_COPY
3589 MatchObject* copy;
3590 Py_ssize_t slots, offset;
3591
3592 slots = 2 * (self->pattern->groups+1);
3593
3594 copy = PyObject_NEW_VAR(MatchObject, &Match_Type, slots);
3595 if (!copy)
3596 return NULL;
3597
3598 /* this value a constant, but any compiler should be able to
3599 figure that out all by itself */
3600 offset = offsetof(MatchObject, string);
3601
3602 Py_XINCREF(self->pattern);
3603 Py_XINCREF(self->string);
3604 Py_XINCREF(self->regs);
3605
3606 memcpy((char*) copy + offset, (char*) self + offset,
3607 sizeof(MatchObject) + slots * sizeof(Py_ssize_t) - offset);
3608
3609 return (PyObject*) copy;
3610 #else
3611 PyErr_SetString(PyExc_TypeError, "cannot copy this match object");
3612 return NULL;
3613 #endif
3614 }
3615
3616 static PyObject*
3617 match_deepcopy(MatchObject* self, PyObject* memo)
3618 {
3619 #ifdef USE_BUILTIN_COPY
3620 MatchObject* copy;
3621
3622 copy = (MatchObject*) match_copy(self);
3623 if (!copy)
3624 return NULL;
3625
3626 if (!deepcopy((PyObject**) &copy->pattern, memo) ||
3627 !deepcopy(&copy->string, memo) ||
3628 !deepcopy(&copy->regs, memo)) {
3629 Py_DECREF(copy);
3630 return NULL;
3631 }
3632
3633 #else
3634 PyErr_SetString(PyExc_TypeError, "cannot deepcopy this match object");
3635 return NULL;
3636 #endif
3637 }
3638
3639 PyDoc_STRVAR(match_doc,
3640 "The result of re.match() and re.search().\n\
3641 Match objects always have a boolean value of True.");
3642
3643 PyDoc_STRVAR(match_group_doc,
3644 "group([group1, ...]) -> str or tuple.\n\
3645 Return subgroup(s) of the match by indices or names.\n\
3646 For 0 returns the entire match.");
3647
3648 PyDoc_STRVAR(match_start_doc,
3649 "start([group=0]) -> int.\n\
3650 Return index of the start of the substring matched by group.");
3651
3652 PyDoc_STRVAR(match_end_doc,
3653 "end([group=0]) -> int.\n\
3654 Return index of the end of the substring matched by group.");
3655
3656 PyDoc_STRVAR(match_span_doc,
3657 "span([group]) -> tuple.\n\
3658 For MatchObject m, return the 2-tuple (m.start(group), m.end(group)).");
3659
3660 PyDoc_STRVAR(match_groups_doc,
3661 "groups([default=None]) -> tuple.\n\
3662 Return a tuple containing all the subgroups of the match, from 1.\n\
3663 The default argument is used for groups\n\
3664 that did not participate in the match");
3665
3666 PyDoc_STRVAR(match_groupdict_doc,
3667 "groupdict([default=None]) -> dict.\n\
3668 Return a dictionary containing all the named subgroups of the match,\n\
3669 keyed by the subgroup name. The default argument is used for groups\n\
3670 that did not participate in the match");
3671
3672 PyDoc_STRVAR(match_expand_doc,
3673 "expand(template) -> str.\n\
3674 Return the string obtained by doing backslash substitution\n\
3675 on the string template, as done by the sub() method.");
3676
3677 static PyMethodDef match_methods[] = {
3678 {"group", (PyCFunction) match_group, METH_VARARGS, match_group_doc},
3679 {"start", (PyCFunction) match_start, METH_VARARGS, match_start_doc},
3680 {"end", (PyCFunction) match_end, METH_VARARGS, match_end_doc},
3681 {"span", (PyCFunction) match_span, METH_VARARGS, match_span_doc},
3682 {"groups", (PyCFunction) match_groups, METH_VARARGS|METH_KEYWORDS,
3683 match_groups_doc},
3684 {"groupdict", (PyCFunction) match_groupdict, METH_VARARGS|METH_KEYWORDS,
3685 match_groupdict_doc},
3686 {"expand", (PyCFunction) match_expand, METH_O, match_expand_doc},
3687 {"__copy__", (PyCFunction) match_copy, METH_NOARGS},
3688 {"__deepcopy__", (PyCFunction) match_deepcopy, METH_O},
3689 {NULL, NULL}
3690 };
3691
3692 static PyObject *
3693 match_lastindex_get(MatchObject *self)
3694 {
3695 if (self->lastindex >= 0)
3696 return PyInt_FromSsize_t(self->lastindex);
3697 Py_INCREF(Py_None);
3698 return Py_None;
3699 }
3700
3701 static PyObject *
3702 match_lastgroup_get(MatchObject *self)
3703 {
3704 if (self->pattern->indexgroup && self->lastindex >= 0) {
3705 PyObject* result = PySequence_GetItem(
3706 self->pattern->indexgroup, self->lastindex
3707 );
3708 if (result)
3709 return result;
3710 PyErr_Clear();
3711 }
3712 Py_INCREF(Py_None);
3713 return Py_None;
3714 }
3715
3716 static PyObject *
3717 match_regs_get(MatchObject *self)
3718 {
3719 if (self->regs) {
3720 Py_INCREF(self->regs);
3721 return self->regs;
3722 } else
3723 return match_regs(self);
3724 }
3725
3726 static PyGetSetDef match_getset[] = {
3727 {"lastindex", (getter)match_lastindex_get, (setter)NULL},
3728 {"lastgroup", (getter)match_lastgroup_get, (setter)NULL},
3729 {"regs", (getter)match_regs_get, (setter)NULL},
3730 {NULL}
3731 };
3732
3733 #define MATCH_OFF(x) offsetof(MatchObject, x)
3734 static PyMemberDef match_members[] = {
3735 {"string", T_OBJECT, MATCH_OFF(string), READONLY},
3736 {"re", T_OBJECT, MATCH_OFF(pattern), READONLY},
3737 {"pos", T_PYSSIZET, MATCH_OFF(pos), READONLY},
3738 {"endpos", T_PYSSIZET, MATCH_OFF(endpos), READONLY},
3739 {NULL}
3740 };
3741
3742
3743 /* FIXME: implement setattr("string", None) as a special case (to
3744 detach the associated string, if any */
3745
3746 static PyTypeObject Match_Type = {
3747 PyVarObject_HEAD_INIT(NULL, 0)
3748 "_" SRE_MODULE ".SRE_Match",
3749 sizeof(MatchObject), sizeof(Py_ssize_t),
3750 (destructor)match_dealloc, /* tp_dealloc */
3751 0, /* tp_print */
3752 0, /* tp_getattr */
3753 0, /* tp_setattr */
3754 0, /* tp_compare */
3755 0, /* tp_repr */
3756 0, /* tp_as_number */
3757 0, /* tp_as_sequence */
3758 0, /* tp_as_mapping */
3759 0, /* tp_hash */
3760 0, /* tp_call */
3761 0, /* tp_str */
3762 0, /* tp_getattro */
3763 0, /* tp_setattro */
3764 0, /* tp_as_buffer */
3765 Py_TPFLAGS_DEFAULT,
3766 match_doc, /* tp_doc */
3767 0, /* tp_traverse */
3768 0, /* tp_clear */
3769 0, /* tp_richcompare */
3770 0, /* tp_weaklistoffset */
3771 0, /* tp_iter */
3772 0, /* tp_iternext */
3773 match_methods, /* tp_methods */
3774 match_members, /* tp_members */
3775 match_getset, /* tp_getset */
3776 };
3777
3778 static PyObject*
3779 pattern_new_match(PatternObject* pattern, SRE_STATE* state, int status)
3780 {
3781 /* create match object (from state object) */
3782
3783 MatchObject* match;
3784 Py_ssize_t i, j;
3785 char* base;
3786 int n;
3787
3788 if (status > 0) {
3789
3790 /* create match object (with room for extra group marks) */
3791 /* coverity[ampersand_in_size] */
3792 match = PyObject_NEW_VAR(MatchObject, &Match_Type,
3793 2*(pattern->groups+1));
3794 if (!match)
3795 return NULL;
3796
3797 Py_INCREF(pattern);
3798 match->pattern = pattern;
3799
3800 Py_INCREF(state->string);
3801 match->string = state->string;
3802
3803 match->regs = NULL;
3804 match->groups = pattern->groups+1;
3805
3806 /* fill in group slices */
3807
3808 base = (char*) state->beginning;
3809 n = state->charsize;
3810
3811 match->mark[0] = ((char*) state->start - base) / n;
3812 match->mark[1] = ((char*) state->ptr - base) / n;
3813
3814 for (i = j = 0; i < pattern->groups; i++, j+=2)
3815 if (j+1 <= state->lastmark && state->mark[j] && state->mark[j+1]) {
3816 match->mark[j+2] = ((char*) state->mark[j] - base) / n;
3817 match->mark[j+3] = ((char*) state->mark[j+1] - base) / n;
3818 } else
3819 match->mark[j+2] = match->mark[j+3] = -1; /* undefined */
3820
3821 match->pos = state->pos;
3822 match->endpos = state->endpos;
3823
3824 match->lastindex = state->lastindex;
3825
3826 return (PyObject*) match;
3827
3828 } else if (status == 0) {
3829
3830 /* no match */
3831 Py_INCREF(Py_None);
3832 return Py_None;
3833
3834 }
3835
3836 /* internal error */
3837 pattern_error(status);
3838 return NULL;
3839 }
3840
3841
3842 /* -------------------------------------------------------------------- */
3843 /* scanner methods (experimental) */
3844
3845 static void
3846 scanner_dealloc(ScannerObject* self)
3847 {
3848 state_fini(&self->state);
3849 Py_XDECREF(self->pattern);
3850 PyObject_DEL(self);
3851 }
3852
3853 static PyObject*
3854 scanner_match(ScannerObject* self, PyObject *unused)
3855 {
3856 SRE_STATE* state = &self->state;
3857 PyObject* match;
3858 int status;
3859
3860 state_reset(state);
3861
3862 state->ptr = state->start;
3863
3864 if (state->charsize == 1) {
3865 status = sre_match(state, PatternObject_GetCode(self->pattern));
3866 } else {
3867 #if defined(HAVE_UNICODE)
3868 status = sre_umatch(state, PatternObject_GetCode(self->pattern));
3869 #endif
3870 }
3871 if (PyErr_Occurred())
3872 return NULL;
3873
3874 match = pattern_new_match((PatternObject*) self->pattern,
3875 state, status);
3876
3877 if (status == 0 || state->ptr == state->start)
3878 state->start = (void*) ((char*) state->ptr + state->charsize);
3879 else
3880 state->start = state->ptr;
3881
3882 return match;
3883 }
3884
3885
3886 static PyObject*
3887 scanner_search(ScannerObject* self, PyObject *unused)
3888 {
3889 SRE_STATE* state = &self->state;
3890 PyObject* match;
3891 int status;
3892
3893 state_reset(state);
3894
3895 state->ptr = state->start;
3896
3897 if (state->charsize == 1) {
3898 status = sre_search(state, PatternObject_GetCode(self->pattern));
3899 } else {
3900 #if defined(HAVE_UNICODE)
3901 status = sre_usearch(state, PatternObject_GetCode(self->pattern));
3902 #endif
3903 }
3904 if (PyErr_Occurred())
3905 return NULL;
3906
3907 match = pattern_new_match((PatternObject*) self->pattern,
3908 state, status);
3909
3910 if (status == 0 || state->ptr == state->start)
3911 state->start = (void*) ((char*) state->ptr + state->charsize);
3912 else
3913 state->start = state->ptr;
3914
3915 return match;
3916 }
3917
3918 static PyMethodDef scanner_methods[] = {
3919 {"match", (PyCFunction) scanner_match, METH_NOARGS},
3920 {"search", (PyCFunction) scanner_search, METH_NOARGS},
3921 {NULL, NULL}
3922 };
3923
3924 #define SCAN_OFF(x) offsetof(ScannerObject, x)
3925 static PyMemberDef scanner_members[] = {
3926 {"pattern", T_OBJECT, SCAN_OFF(pattern), READONLY},
3927 {NULL} /* Sentinel */
3928 };
3929
3930 statichere PyTypeObject Scanner_Type = {
3931 PyObject_HEAD_INIT(NULL)
3932 0, "_" SRE_MODULE ".SRE_Scanner",
3933 sizeof(ScannerObject), 0,
3934 (destructor)scanner_dealloc, /*tp_dealloc*/
3935 0, /* tp_print */
3936 0, /* tp_getattr */
3937 0, /* tp_setattr */
3938 0, /* tp_reserved */
3939 0, /* tp_repr */
3940 0, /* tp_as_number */
3941 0, /* tp_as_sequence */
3942 0, /* tp_as_mapping */
3943 0, /* tp_hash */
3944 0, /* tp_call */
3945 0, /* tp_str */
3946 0, /* tp_getattro */
3947 0, /* tp_setattro */
3948 0, /* tp_as_buffer */
3949 Py_TPFLAGS_DEFAULT, /* tp_flags */
3950 0, /* tp_doc */
3951 0, /* tp_traverse */
3952 0, /* tp_clear */
3953 0, /* tp_richcompare */
3954 0, /* tp_weaklistoffset */
3955 0, /* tp_iter */
3956 0, /* tp_iternext */
3957 scanner_methods, /* tp_methods */
3958 scanner_members, /* tp_members */
3959 0, /* tp_getset */
3960 };
3961
3962 static PyObject*
3963 pattern_scanner(PatternObject* pattern, PyObject* args)
3964 {
3965 /* create search state object */
3966
3967 ScannerObject* self;
3968
3969 PyObject* string;
3970 Py_ssize_t start = 0;
3971 Py_ssize_t end = PY_SSIZE_T_MAX;
3972 if (!PyArg_ParseTuple(args, "O|nn:scanner", &string, &start, &end))
3973 return NULL;
3974
3975 /* create scanner object */
3976 self = PyObject_NEW(ScannerObject, &Scanner_Type);
3977 if (!self)
3978 return NULL;
3979 self->pattern = NULL;
3980
3981 string = state_init(&self->state, pattern, string, start, end);
3982 if (!string) {
3983 Py_DECREF(self);
3984 return NULL;
3985 }
3986
3987 Py_INCREF(pattern);
3988 self->pattern = (PyObject*) pattern;
3989
3990 return (PyObject*) self;
3991 }
3992
3993 static PyMethodDef _functions[] = {
3994 {"compile", _compile, METH_VARARGS},
3995 {"getcodesize", sre_codesize, METH_NOARGS},
3996 {"getlower", sre_getlower, METH_VARARGS},
3997 {NULL, NULL}
3998 };
3999
4000 #if PY_VERSION_HEX < 0x02030000
4001 DL_EXPORT(void) init_sre(void)
4002 #else
4003 PyMODINIT_FUNC init_sre(void)
4004 #endif
4005 {
4006 PyObject* m;
4007 PyObject* d;
4008 PyObject* x;
4009
4010 /* Patch object types */
4011 if (PyType_Ready(&Pattern_Type) || PyType_Ready(&Match_Type) ||
4012 PyType_Ready(&Scanner_Type))
4013 return;
4014
4015 m = Py_InitModule("_" SRE_MODULE, _functions);
4016 if (m == NULL)
4017 return;
4018 d = PyModule_GetDict(m);
4019
4020 x = PyInt_FromLong(SRE_MAGIC);
4021 if (x) {
4022 PyDict_SetItemString(d, "MAGIC", x);
4023 Py_DECREF(x);
4024 }
4025
4026 x = PyInt_FromLong(sizeof(SRE_CODE));
4027 if (x) {
4028 PyDict_SetItemString(d, "CODESIZE", x);
4029 Py_DECREF(x);
4030 }
4031
4032 x = PyLong_FromUnsignedLong(SRE_MAXREPEAT);
4033 if (x) {
4034 PyDict_SetItemString(d, "MAXREPEAT", x);
4035 Py_DECREF(x);
4036 }
4037
4038 x = PyString_FromString(copyright);
4039 if (x) {
4040 PyDict_SetItemString(d, "copyright", x);
4041 Py_DECREF(x);
4042 }
4043 }
4044
4045 #endif /* !defined(SRE_RECURSIVE) */
4046
4047 /* vim:ts=4:sw=4:et
4048 */