]> git.proxmox.com Git - mirror_edk2.git/blob - AppPkg/Applications/Python/Python-2.7.10/PyMod-2.7.10/Modules/_sre.c
399ea742cfac246c7217218c2b949d1b2b102db6
[mirror_edk2.git] / AppPkg / Applications / Python / Python-2.7.10 / PyMod-2.7.10 / Modules / _sre.c
1 /*
2 * Secret Labs' Regular Expression Engine
3 *
4 * regular expression matching engine
5 *
6 * partial history:
7 * 1999-10-24 fl created (based on existing template matcher code)
8 * 2000-03-06 fl first alpha, sort of
9 * 2000-08-01 fl fixes for 1.6b1
10 * 2000-08-07 fl use PyOS_CheckStack() if available
11 * 2000-09-20 fl added expand method
12 * 2001-03-20 fl lots of fixes for 2.1b2
13 * 2001-04-15 fl export copyright as Python attribute, not global
14 * 2001-04-28 fl added __copy__ methods (work in progress)
15 * 2001-05-14 fl fixes for 1.5.2 compatibility
16 * 2001-07-01 fl added BIGCHARSET support (from Martin von Loewis)
17 * 2001-10-18 fl fixed group reset issue (from Matthew Mueller)
18 * 2001-10-20 fl added split primitive; reenable unicode for 1.6/2.0/2.1
19 * 2001-10-21 fl added sub/subn primitive
20 * 2001-10-24 fl added finditer primitive (for 2.2 only)
21 * 2001-12-07 fl fixed memory leak in sub/subn (Guido van Rossum)
22 * 2002-11-09 fl fixed empty sub/subn return type
23 * 2003-04-18 mvl fully support 4-byte codes
24 * 2003-10-17 gn implemented non recursive scheme
25 *
26 * Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved.
27 *
28 * This version of the SRE library can be redistributed under CNRI's
29 * Python 1.6 license. For any other use, please contact Secret Labs
30 * AB (info@pythonware.com).
31 *
32 * Portions of this engine have been developed in cooperation with
33 * CNRI. Hewlett-Packard provided funding for 1.6 integration and
34 * other compatibility work.
35 */
36
37 #ifndef SRE_RECURSIVE
38
39 static char copyright[] =
40 " SRE 2.2.2 Copyright (c) 1997-2002 by Secret Labs AB ";
41
42 #define PY_SSIZE_T_CLEAN
43
44 #include "Python.h"
45 #include "structmember.h" /* offsetof */
46
47 #include "sre.h"
48
49 #include <ctype.h>
50
51 /* name of this module, minus the leading underscore */
52 #if !defined(SRE_MODULE)
53 #define SRE_MODULE "sre"
54 #endif
55
56 #define SRE_PY_MODULE "re"
57
58 /* defining this one enables tracing */
59 #undef VERBOSE
60
61 #if PY_VERSION_HEX >= 0x01060000
62 #if PY_VERSION_HEX < 0x02020000 || defined(Py_USING_UNICODE)
63 /* defining this enables unicode support (default under 1.6a1 and later) */
64 #define HAVE_UNICODE
65 #endif
66 #endif
67
68 /* -------------------------------------------------------------------- */
69 /* optional features */
70
71 /* enables fast searching */
72 #define USE_FAST_SEARCH
73
74 /* enables aggressive inlining (always on for Visual C) */
75 #undef USE_INLINE
76
77 /* enables copy/deepcopy handling (work in progress) */
78 #undef USE_BUILTIN_COPY
79
80 #if PY_VERSION_HEX < 0x01060000
81 #define PyObject_DEL(op) PyMem_DEL((op))
82 #endif
83
84 /* -------------------------------------------------------------------- */
85
86 #if defined(_MSC_VER)
87 #pragma optimize("agtw", on) /* doesn't seem to make much difference... */
88 #pragma warning(disable: 4710) /* who cares if functions are not inlined ;-) */
89 /* fastest possible local call under MSVC */
90 #define LOCAL(type) static __inline type __fastcall
91 #elif defined(USE_INLINE)
92 #define LOCAL(type) static inline type
93 #else
94 #define LOCAL(type) static type
95 #endif
96
97 /* error codes */
98 #define SRE_ERROR_ILLEGAL -1 /* illegal opcode */
99 #define SRE_ERROR_STATE -2 /* illegal state */
100 #define SRE_ERROR_RECURSION_LIMIT -3 /* runaway recursion */
101 #define SRE_ERROR_MEMORY -9 /* out of memory */
102 #define SRE_ERROR_INTERRUPTED -10 /* signal handler raised exception */
103
104 #if defined(VERBOSE)
105 #define TRACE(v) printf v
106 #else
107 #define TRACE(v)
108 #endif
109
110 /* -------------------------------------------------------------------- */
111 /* search engine state */
112
113 /* default character predicates (run sre_chars.py to regenerate tables) */
114
115 #define SRE_DIGIT_MASK 1
116 #define SRE_SPACE_MASK 2
117 #define SRE_LINEBREAK_MASK 4
118 #define SRE_ALNUM_MASK 8
119 #define SRE_WORD_MASK 16
120
121 /* FIXME: this assumes ASCII. create tables in init_sre() instead */
122
123 static char sre_char_info[128] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 6, 2,
124 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0,
125 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25, 25, 25, 25, 25, 25, 25, 25,
126 25, 25, 0, 0, 0, 0, 0, 0, 0, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
127 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0,
128 0, 0, 16, 0, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
129 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0, 0, 0, 0 };
130
131 static char sre_char_lower[128] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
132 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,
133 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43,
134 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60,
135 61, 62, 63, 64, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107,
136 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121,
137 122, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105,
138 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119,
139 120, 121, 122, 123, 124, 125, 126, 127 };
140
141 #define SRE_IS_DIGIT(ch)\
142 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_DIGIT_MASK) : 0)
143 #define SRE_IS_SPACE(ch)\
144 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_SPACE_MASK) : 0)
145 #define SRE_IS_LINEBREAK(ch)\
146 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_LINEBREAK_MASK) : 0)
147 #define SRE_IS_ALNUM(ch)\
148 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_ALNUM_MASK) : 0)
149 #define SRE_IS_WORD(ch)\
150 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_WORD_MASK) : 0)
151
152 static unsigned int sre_lower(unsigned int ch)
153 {
154 return ((ch) < 128 ? (unsigned int)sre_char_lower[ch] : ch);
155 }
156
157 /* locale-specific character predicates */
158 /* !(c & ~N) == (c < N+1) for any unsigned c, this avoids
159 * warnings when c's type supports only numbers < N+1 */
160 #define SRE_LOC_IS_DIGIT(ch) (!((ch) & ~255) ? isdigit((ch)) : 0)
161 #define SRE_LOC_IS_SPACE(ch) (!((ch) & ~255) ? isspace((ch)) : 0)
162 #define SRE_LOC_IS_LINEBREAK(ch) ((ch) == '\n')
163 #define SRE_LOC_IS_ALNUM(ch) (!((ch) & ~255) ? isalnum((ch)) : 0)
164 #define SRE_LOC_IS_WORD(ch) (SRE_LOC_IS_ALNUM((ch)) || (ch) == '_')
165
166 static unsigned int sre_lower_locale(unsigned int ch)
167 {
168 return ((ch) < 256 ? (unsigned int)tolower((ch)) : ch);
169 }
170
171 /* unicode-specific character predicates */
172
173 #if defined(HAVE_UNICODE)
174
175 #define SRE_UNI_IS_DIGIT(ch) Py_UNICODE_ISDECIMAL((Py_UNICODE)(ch))
176 #define SRE_UNI_IS_SPACE(ch) Py_UNICODE_ISSPACE((Py_UNICODE)(ch))
177 #define SRE_UNI_IS_LINEBREAK(ch) Py_UNICODE_ISLINEBREAK((Py_UNICODE)(ch))
178 #define SRE_UNI_IS_ALNUM(ch) Py_UNICODE_ISALNUM((Py_UNICODE)(ch))
179 #define SRE_UNI_IS_WORD(ch) (SRE_UNI_IS_ALNUM((ch)) || (ch) == '_')
180
181 static unsigned int sre_lower_unicode(unsigned int ch)
182 {
183 return (unsigned int) Py_UNICODE_TOLOWER((Py_UNICODE)(ch));
184 }
185
186 #endif
187
188 LOCAL(int)
189 sre_category(SRE_CODE category, unsigned int ch)
190 {
191 switch (category) {
192
193 case SRE_CATEGORY_DIGIT:
194 return SRE_IS_DIGIT(ch);
195 case SRE_CATEGORY_NOT_DIGIT:
196 return !SRE_IS_DIGIT(ch);
197 case SRE_CATEGORY_SPACE:
198 return SRE_IS_SPACE(ch);
199 case SRE_CATEGORY_NOT_SPACE:
200 return !SRE_IS_SPACE(ch);
201 case SRE_CATEGORY_WORD:
202 return SRE_IS_WORD(ch);
203 case SRE_CATEGORY_NOT_WORD:
204 return !SRE_IS_WORD(ch);
205 case SRE_CATEGORY_LINEBREAK:
206 return SRE_IS_LINEBREAK(ch);
207 case SRE_CATEGORY_NOT_LINEBREAK:
208 return !SRE_IS_LINEBREAK(ch);
209
210 case SRE_CATEGORY_LOC_WORD:
211 return SRE_LOC_IS_WORD(ch);
212 case SRE_CATEGORY_LOC_NOT_WORD:
213 return !SRE_LOC_IS_WORD(ch);
214
215 #if defined(HAVE_UNICODE)
216 case SRE_CATEGORY_UNI_DIGIT:
217 return SRE_UNI_IS_DIGIT(ch);
218 case SRE_CATEGORY_UNI_NOT_DIGIT:
219 return !SRE_UNI_IS_DIGIT(ch);
220 case SRE_CATEGORY_UNI_SPACE:
221 return SRE_UNI_IS_SPACE(ch);
222 case SRE_CATEGORY_UNI_NOT_SPACE:
223 return !SRE_UNI_IS_SPACE(ch);
224 case SRE_CATEGORY_UNI_WORD:
225 return SRE_UNI_IS_WORD(ch);
226 case SRE_CATEGORY_UNI_NOT_WORD:
227 return !SRE_UNI_IS_WORD(ch);
228 case SRE_CATEGORY_UNI_LINEBREAK:
229 return SRE_UNI_IS_LINEBREAK(ch);
230 case SRE_CATEGORY_UNI_NOT_LINEBREAK:
231 return !SRE_UNI_IS_LINEBREAK(ch);
232 #else
233 case SRE_CATEGORY_UNI_DIGIT:
234 return SRE_IS_DIGIT(ch);
235 case SRE_CATEGORY_UNI_NOT_DIGIT:
236 return !SRE_IS_DIGIT(ch);
237 case SRE_CATEGORY_UNI_SPACE:
238 return SRE_IS_SPACE(ch);
239 case SRE_CATEGORY_UNI_NOT_SPACE:
240 return !SRE_IS_SPACE(ch);
241 case SRE_CATEGORY_UNI_WORD:
242 return SRE_LOC_IS_WORD(ch);
243 case SRE_CATEGORY_UNI_NOT_WORD:
244 return !SRE_LOC_IS_WORD(ch);
245 case SRE_CATEGORY_UNI_LINEBREAK:
246 return SRE_IS_LINEBREAK(ch);
247 case SRE_CATEGORY_UNI_NOT_LINEBREAK:
248 return !SRE_IS_LINEBREAK(ch);
249 #endif
250 }
251 return 0;
252 }
253
254 /* helpers */
255
256 static void
257 data_stack_dealloc(SRE_STATE* state)
258 {
259 if (state->data_stack) {
260 PyMem_FREE(state->data_stack);
261 state->data_stack = NULL;
262 }
263 state->data_stack_size = state->data_stack_base = 0;
264 }
265
266 static int
267 data_stack_grow(SRE_STATE* state, Py_ssize_t size)
268 {
269 Py_ssize_t minsize, cursize;
270 minsize = state->data_stack_base+size;
271 cursize = state->data_stack_size;
272 if (cursize < minsize) {
273 void* stack;
274 cursize = minsize+minsize/4+1024;
275 TRACE(("allocate/grow stack %" PY_FORMAT_SIZE_T "d\n", cursize));
276 stack = PyMem_REALLOC(state->data_stack, cursize);
277 if (!stack) {
278 data_stack_dealloc(state);
279 return SRE_ERROR_MEMORY;
280 }
281 state->data_stack = (char *)stack;
282 state->data_stack_size = cursize;
283 }
284 return 0;
285 }
286
287 /* generate 8-bit version */
288
289 #define SRE_CHAR unsigned char
290 #define SRE_AT sre_at
291 #define SRE_COUNT sre_count
292 #define SRE_CHARSET sre_charset
293 #define SRE_INFO sre_info
294 #define SRE_MATCH sre_match
295 #define SRE_MATCH_CONTEXT sre_match_context
296 #define SRE_SEARCH sre_search
297 #define SRE_LITERAL_TEMPLATE sre_literal_template
298
299 #if defined(HAVE_UNICODE)
300
301 #define SRE_RECURSIVE
302 #include "_sre.c"
303 #undef SRE_RECURSIVE
304
305 #undef SRE_LITERAL_TEMPLATE
306 #undef SRE_SEARCH
307 #undef SRE_MATCH
308 #undef SRE_MATCH_CONTEXT
309 #undef SRE_INFO
310 #undef SRE_CHARSET
311 #undef SRE_COUNT
312 #undef SRE_AT
313 #undef SRE_CHAR
314
315 /* generate 16-bit unicode version */
316
317 #define SRE_CHAR Py_UNICODE
318 #define SRE_AT sre_uat
319 #define SRE_COUNT sre_ucount
320 #define SRE_CHARSET sre_ucharset
321 #define SRE_INFO sre_uinfo
322 #define SRE_MATCH sre_umatch
323 #define SRE_MATCH_CONTEXT sre_umatch_context
324 #define SRE_SEARCH sre_usearch
325 #define SRE_LITERAL_TEMPLATE sre_uliteral_template
326 #endif
327
328 #endif /* SRE_RECURSIVE */
329
330 /* -------------------------------------------------------------------- */
331 /* String matching engine */
332
333 /* the following section is compiled twice, with different character
334 settings */
335
336 LOCAL(int)
337 SRE_AT(SRE_STATE* state, SRE_CHAR* ptr, SRE_CODE at)
338 {
339 /* check if pointer is at given position */
340
341 Py_ssize_t thisp, thatp;
342
343 switch (at) {
344
345 case SRE_AT_BEGINNING:
346 case SRE_AT_BEGINNING_STRING:
347 return ((void*) ptr == state->beginning);
348
349 case SRE_AT_BEGINNING_LINE:
350 return ((void*) ptr == state->beginning ||
351 SRE_IS_LINEBREAK((int) ptr[-1]));
352
353 case SRE_AT_END:
354 return (((void*) (ptr+1) == state->end &&
355 SRE_IS_LINEBREAK((int) ptr[0])) ||
356 ((void*) ptr == state->end));
357
358 case SRE_AT_END_LINE:
359 return ((void*) ptr == state->end ||
360 SRE_IS_LINEBREAK((int) ptr[0]));
361
362 case SRE_AT_END_STRING:
363 return ((void*) ptr == state->end);
364
365 case SRE_AT_BOUNDARY:
366 if (state->beginning == state->end)
367 return 0;
368 thatp = ((void*) ptr > state->beginning) ?
369 SRE_IS_WORD((int) ptr[-1]) : 0;
370 thisp = ((void*) ptr < state->end) ?
371 SRE_IS_WORD((int) ptr[0]) : 0;
372 return thisp != thatp;
373
374 case SRE_AT_NON_BOUNDARY:
375 if (state->beginning == state->end)
376 return 0;
377 thatp = ((void*) ptr > state->beginning) ?
378 SRE_IS_WORD((int) ptr[-1]) : 0;
379 thisp = ((void*) ptr < state->end) ?
380 SRE_IS_WORD((int) ptr[0]) : 0;
381 return thisp == thatp;
382
383 case SRE_AT_LOC_BOUNDARY:
384 if (state->beginning == state->end)
385 return 0;
386 thatp = ((void*) ptr > state->beginning) ?
387 SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
388 thisp = ((void*) ptr < state->end) ?
389 SRE_LOC_IS_WORD((int) ptr[0]) : 0;
390 return thisp != thatp;
391
392 case SRE_AT_LOC_NON_BOUNDARY:
393 if (state->beginning == state->end)
394 return 0;
395 thatp = ((void*) ptr > state->beginning) ?
396 SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
397 thisp = ((void*) ptr < state->end) ?
398 SRE_LOC_IS_WORD((int) ptr[0]) : 0;
399 return thisp == thatp;
400
401 #if defined(HAVE_UNICODE)
402 case SRE_AT_UNI_BOUNDARY:
403 if (state->beginning == state->end)
404 return 0;
405 thatp = ((void*) ptr > state->beginning) ?
406 SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
407 thisp = ((void*) ptr < state->end) ?
408 SRE_UNI_IS_WORD((int) ptr[0]) : 0;
409 return thisp != thatp;
410
411 case SRE_AT_UNI_NON_BOUNDARY:
412 if (state->beginning == state->end)
413 return 0;
414 thatp = ((void*) ptr > state->beginning) ?
415 SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
416 thisp = ((void*) ptr < state->end) ?
417 SRE_UNI_IS_WORD((int) ptr[0]) : 0;
418 return thisp == thatp;
419 #endif
420
421 }
422
423 return 0;
424 }
425
426 LOCAL(int)
427 SRE_CHARSET(SRE_CODE* set, SRE_CODE ch)
428 {
429 /* check if character is a member of the given set */
430
431 int ok = 1;
432
433 for (;;) {
434 switch (*set++) {
435
436 case SRE_OP_FAILURE:
437 return !ok;
438
439 case SRE_OP_LITERAL:
440 /* <LITERAL> <code> */
441 if (ch == set[0])
442 return ok;
443 set++;
444 break;
445
446 case SRE_OP_CATEGORY:
447 /* <CATEGORY> <code> */
448 if (sre_category(set[0], (int) ch))
449 return ok;
450 set += 1;
451 break;
452
453 case SRE_OP_CHARSET:
454 if (sizeof(SRE_CODE) == 2) {
455 /* <CHARSET> <bitmap> (16 bits per code word) */
456 if (ch < 256 && (set[ch >> 4] & (1 << (ch & 15))))
457 return ok;
458 set += 16;
459 }
460 else {
461 /* <CHARSET> <bitmap> (32 bits per code word) */
462 if (ch < 256 && (set[ch >> 5] & (1u << (ch & 31))))
463 return ok;
464 set += 8;
465 }
466 break;
467
468 case SRE_OP_RANGE:
469 /* <RANGE> <lower> <upper> */
470 if (set[0] <= ch && ch <= set[1])
471 return ok;
472 set += 2;
473 break;
474
475 case SRE_OP_NEGATE:
476 ok = !ok;
477 break;
478
479 case SRE_OP_BIGCHARSET:
480 /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
481 {
482 Py_ssize_t count, block;
483 count = *(set++);
484
485 if (sizeof(SRE_CODE) == 2) {
486 block = ((unsigned char*)set)[ch >> 8];
487 set += 128;
488 if (set[block*16 + ((ch & 255)>>4)] & (1 << (ch & 15)))
489 return ok;
490 set += count*16;
491 }
492 else {
493 /* !(c & ~N) == (c < N+1) for any unsigned c, this avoids
494 * warnings when c's type supports only numbers < N+1 */
495 if (!(ch & ~65535))
496 block = ((unsigned char*)set)[ch >> 8];
497 else
498 block = -1;
499 set += 64;
500 if (block >=0 &&
501 (set[block*8 + ((ch & 255)>>5)] & (1u << (ch & 31))))
502 return ok;
503 set += count*8;
504 }
505 break;
506 }
507
508 default:
509 /* internal error -- there's not much we can do about it
510 here, so let's just pretend it didn't match... */
511 return 0;
512 }
513 }
514 }
515
516 LOCAL(Py_ssize_t) SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern);
517
518 LOCAL(Py_ssize_t)
519 SRE_COUNT(SRE_STATE* state, SRE_CODE* pattern, Py_ssize_t maxcount)
520 {
521 SRE_CODE chr;
522 SRE_CHAR* ptr = (SRE_CHAR *)state->ptr;
523 SRE_CHAR* end = (SRE_CHAR *)state->end;
524 Py_ssize_t i;
525
526 /* adjust end */
527 if (maxcount < end - ptr && maxcount != SRE_MAXREPEAT)
528 end = ptr + maxcount;
529
530 switch (pattern[0]) {
531
532 case SRE_OP_IN:
533 /* repeated set */
534 TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
535 while (ptr < end && SRE_CHARSET(pattern + 2, *ptr))
536 ptr++;
537 break;
538
539 case SRE_OP_ANY:
540 /* repeated dot wildcard. */
541 TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
542 while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
543 ptr++;
544 break;
545
546 case SRE_OP_ANY_ALL:
547 /* repeated dot wildcard. skip to the end of the target
548 string, and backtrack from there */
549 TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
550 ptr = end;
551 break;
552
553 case SRE_OP_LITERAL:
554 /* repeated literal */
555 chr = pattern[1];
556 TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
557 while (ptr < end && (SRE_CODE) *ptr == chr)
558 ptr++;
559 break;
560
561 case SRE_OP_LITERAL_IGNORE:
562 /* repeated literal */
563 chr = pattern[1];
564 TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
565 while (ptr < end && (SRE_CODE) state->lower(*ptr) == chr)
566 ptr++;
567 break;
568
569 case SRE_OP_NOT_LITERAL:
570 /* repeated non-literal */
571 chr = pattern[1];
572 TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
573 while (ptr < end && (SRE_CODE) *ptr != chr)
574 ptr++;
575 break;
576
577 case SRE_OP_NOT_LITERAL_IGNORE:
578 /* repeated non-literal */
579 chr = pattern[1];
580 TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
581 while (ptr < end && (SRE_CODE) state->lower(*ptr) != chr)
582 ptr++;
583 break;
584
585 default:
586 /* repeated single character pattern */
587 TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
588 while ((SRE_CHAR*) state->ptr < end) {
589 i = SRE_MATCH(state, pattern);
590 if (i < 0)
591 return i;
592 if (!i)
593 break;
594 }
595 TRACE(("|%p|%p|COUNT %" PY_FORMAT_SIZE_T "d\n", pattern, ptr,
596 (SRE_CHAR*) state->ptr - ptr));
597 return (SRE_CHAR*) state->ptr - ptr;
598 }
599
600 TRACE(("|%p|%p|COUNT %" PY_FORMAT_SIZE_T "d\n", pattern, ptr,
601 ptr - (SRE_CHAR*) state->ptr));
602 return ptr - (SRE_CHAR*) state->ptr;
603 }
604
605 #if 0 /* not used in this release */
606 LOCAL(int)
607 SRE_INFO(SRE_STATE* state, SRE_CODE* pattern)
608 {
609 /* check if an SRE_OP_INFO block matches at the current position.
610 returns the number of SRE_CODE objects to skip if successful, 0
611 if no match */
612
613 SRE_CHAR* end = state->end;
614 SRE_CHAR* ptr = state->ptr;
615 Py_ssize_t i;
616
617 /* check minimal length */
618 if (pattern[3] && (end - ptr) < pattern[3])
619 return 0;
620
621 /* check known prefix */
622 if (pattern[2] & SRE_INFO_PREFIX && pattern[5] > 1) {
623 /* <length> <skip> <prefix data> <overlap data> */
624 for (i = 0; i < pattern[5]; i++)
625 if ((SRE_CODE) ptr[i] != pattern[7 + i])
626 return 0;
627 return pattern[0] + 2 * pattern[6];
628 }
629 return pattern[0];
630 }
631 #endif
632
633 /* The macros below should be used to protect recursive SRE_MATCH()
634 * calls that *failed* and do *not* return immediately (IOW, those
635 * that will backtrack). Explaining:
636 *
637 * - Recursive SRE_MATCH() returned true: that's usually a success
638 * (besides atypical cases like ASSERT_NOT), therefore there's no
639 * reason to restore lastmark;
640 *
641 * - Recursive SRE_MATCH() returned false but the current SRE_MATCH()
642 * is returning to the caller: If the current SRE_MATCH() is the
643 * top function of the recursion, returning false will be a matching
644 * failure, and it doesn't matter where lastmark is pointing to.
645 * If it's *not* the top function, it will be a recursive SRE_MATCH()
646 * failure by itself, and the calling SRE_MATCH() will have to deal
647 * with the failure by the same rules explained here (it will restore
648 * lastmark by itself if necessary);
649 *
650 * - Recursive SRE_MATCH() returned false, and will continue the
651 * outside 'for' loop: must be protected when breaking, since the next
652 * OP could potentially depend on lastmark;
653 *
654 * - Recursive SRE_MATCH() returned false, and will be called again
655 * inside a local for/while loop: must be protected between each
656 * loop iteration, since the recursive SRE_MATCH() could do anything,
657 * and could potentially depend on lastmark.
658 *
659 * For more information, check the discussion at SF patch #712900.
660 */
661 #define LASTMARK_SAVE() \
662 do { \
663 ctx->lastmark = state->lastmark; \
664 ctx->lastindex = state->lastindex; \
665 } while (0)
666 #define LASTMARK_RESTORE() \
667 do { \
668 state->lastmark = ctx->lastmark; \
669 state->lastindex = ctx->lastindex; \
670 } while (0)
671
672 #define RETURN_ERROR(i) do { return i; } while(0)
673 #define RETURN_FAILURE do { ret = 0; goto exit; } while(0)
674 #define RETURN_SUCCESS do { ret = 1; goto exit; } while(0)
675
676 #define RETURN_ON_ERROR(i) \
677 do { if (i < 0) RETURN_ERROR(i); } while (0)
678 #define RETURN_ON_SUCCESS(i) \
679 do { RETURN_ON_ERROR(i); if (i > 0) RETURN_SUCCESS; } while (0)
680 #define RETURN_ON_FAILURE(i) \
681 do { RETURN_ON_ERROR(i); if (i == 0) RETURN_FAILURE; } while (0)
682
683 #define SFY(x) #x
684
685 #define DATA_STACK_ALLOC(state, type, ptr) \
686 do { \
687 alloc_pos = state->data_stack_base; \
688 TRACE(("allocating %s in %" PY_FORMAT_SIZE_T "d " \
689 "(%" PY_FORMAT_SIZE_T "d)\n", \
690 SFY(type), alloc_pos, sizeof(type))); \
691 if (sizeof(type) > state->data_stack_size - alloc_pos) { \
692 int j = data_stack_grow(state, sizeof(type)); \
693 if (j < 0) return j; \
694 if (ctx_pos != -1) \
695 DATA_STACK_LOOKUP_AT(state, SRE_MATCH_CONTEXT, ctx, ctx_pos); \
696 } \
697 ptr = (type*)(state->data_stack+alloc_pos); \
698 state->data_stack_base += sizeof(type); \
699 } while (0)
700
701 #define DATA_STACK_LOOKUP_AT(state, type, ptr, pos) \
702 do { \
703 TRACE(("looking up %s at %" PY_FORMAT_SIZE_T "d\n", SFY(type), pos)); \
704 ptr = (type*)(state->data_stack+pos); \
705 } while (0)
706
707 #define DATA_STACK_PUSH(state, data, size) \
708 do { \
709 TRACE(("copy data in %p to %" PY_FORMAT_SIZE_T "d " \
710 "(%" PY_FORMAT_SIZE_T "d)\n", \
711 data, state->data_stack_base, size)); \
712 if (size > state->data_stack_size - state->data_stack_base) { \
713 int j = data_stack_grow(state, size); \
714 if (j < 0) return j; \
715 if (ctx_pos != -1) \
716 DATA_STACK_LOOKUP_AT(state, SRE_MATCH_CONTEXT, ctx, ctx_pos); \
717 } \
718 memcpy(state->data_stack+state->data_stack_base, data, size); \
719 state->data_stack_base += size; \
720 } while (0)
721
722 #define DATA_STACK_POP(state, data, size, discard) \
723 do { \
724 TRACE(("copy data to %p from %" PY_FORMAT_SIZE_T "d " \
725 "(%" PY_FORMAT_SIZE_T "d)\n", \
726 data, state->data_stack_base-size, size)); \
727 memcpy(data, state->data_stack+state->data_stack_base-size, size); \
728 if (discard) \
729 state->data_stack_base -= size; \
730 } while (0)
731
732 #define DATA_STACK_POP_DISCARD(state, size) \
733 do { \
734 TRACE(("discard data from %" PY_FORMAT_SIZE_T "d " \
735 "(%" PY_FORMAT_SIZE_T "d)\n", \
736 state->data_stack_base-size, size)); \
737 state->data_stack_base -= size; \
738 } while(0)
739
740 #define DATA_PUSH(x) \
741 DATA_STACK_PUSH(state, (x), sizeof(*(x)))
742 #define DATA_POP(x) \
743 DATA_STACK_POP(state, (x), sizeof(*(x)), 1)
744 #define DATA_POP_DISCARD(x) \
745 DATA_STACK_POP_DISCARD(state, sizeof(*(x)))
746 #define DATA_ALLOC(t,p) \
747 DATA_STACK_ALLOC(state, t, p)
748 #define DATA_LOOKUP_AT(t,p,pos) \
749 DATA_STACK_LOOKUP_AT(state,t,p,pos)
750
751 #define MARK_PUSH(lastmark) \
752 do if (lastmark > 0) { \
753 i = lastmark; /* ctx->lastmark may change if reallocated */ \
754 DATA_STACK_PUSH(state, state->mark, (i+1)*sizeof(void*)); \
755 } while (0)
756 #define MARK_POP(lastmark) \
757 do if (lastmark > 0) { \
758 DATA_STACK_POP(state, state->mark, (lastmark+1)*sizeof(void*), 1); \
759 } while (0)
760 #define MARK_POP_KEEP(lastmark) \
761 do if (lastmark > 0) { \
762 DATA_STACK_POP(state, state->mark, (lastmark+1)*sizeof(void*), 0); \
763 } while (0)
764 #define MARK_POP_DISCARD(lastmark) \
765 do if (lastmark > 0) { \
766 DATA_STACK_POP_DISCARD(state, (lastmark+1)*sizeof(void*)); \
767 } while (0)
768
769 #define JUMP_NONE 0
770 #define JUMP_MAX_UNTIL_1 1
771 #define JUMP_MAX_UNTIL_2 2
772 #define JUMP_MAX_UNTIL_3 3
773 #define JUMP_MIN_UNTIL_1 4
774 #define JUMP_MIN_UNTIL_2 5
775 #define JUMP_MIN_UNTIL_3 6
776 #define JUMP_REPEAT 7
777 #define JUMP_REPEAT_ONE_1 8
778 #define JUMP_REPEAT_ONE_2 9
779 #define JUMP_MIN_REPEAT_ONE 10
780 #define JUMP_BRANCH 11
781 #define JUMP_ASSERT 12
782 #define JUMP_ASSERT_NOT 13
783
784 #define DO_JUMP(jumpvalue, jumplabel, nextpattern) \
785 DATA_ALLOC(SRE_MATCH_CONTEXT, nextctx); \
786 nextctx->last_ctx_pos = ctx_pos; \
787 nextctx->jump = jumpvalue; \
788 nextctx->pattern = nextpattern; \
789 ctx_pos = alloc_pos; \
790 ctx = nextctx; \
791 goto entrance; \
792 jumplabel: \
793 while (0) /* gcc doesn't like labels at end of scopes */ \
794
795 typedef struct {
796 Py_ssize_t last_ctx_pos;
797 Py_ssize_t jump;
798 SRE_CHAR* ptr;
799 SRE_CODE* pattern;
800 Py_ssize_t count;
801 Py_ssize_t lastmark;
802 Py_ssize_t lastindex;
803 union {
804 SRE_CODE chr;
805 SRE_REPEAT* rep;
806 } u;
807 } SRE_MATCH_CONTEXT;
808
809 /* check if string matches the given pattern. returns <0 for
810 error, 0 for failure, and 1 for success */
811 LOCAL(Py_ssize_t)
812 SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern)
813 {
814 SRE_CHAR* end = (SRE_CHAR *)state->end;
815 Py_ssize_t alloc_pos, ctx_pos = -1;
816 Py_ssize_t i, ret = 0;
817 Py_ssize_t jump;
818 unsigned int sigcount=0;
819
820 SRE_MATCH_CONTEXT* ctx;
821 SRE_MATCH_CONTEXT* nextctx;
822
823 TRACE(("|%p|%p|ENTER\n", pattern, state->ptr));
824
825 DATA_ALLOC(SRE_MATCH_CONTEXT, ctx);
826 ctx->last_ctx_pos = -1;
827 ctx->jump = JUMP_NONE;
828 ctx->pattern = pattern;
829 ctx_pos = alloc_pos;
830
831 entrance:
832
833 ctx->ptr = (SRE_CHAR *)state->ptr;
834
835 if (ctx->pattern[0] == SRE_OP_INFO) {
836 /* optimization info block */
837 /* <INFO> <1=skip> <2=flags> <3=min> ... */
838 if (ctx->pattern[3] && (end - ctx->ptr) < ctx->pattern[3]) {
839 TRACE(("reject (got %" PY_FORMAT_SIZE_T "d chars, "
840 "need %" PY_FORMAT_SIZE_T "d)\n",
841 (end - ctx->ptr), (Py_ssize_t) ctx->pattern[3]));
842 RETURN_FAILURE;
843 }
844 ctx->pattern += ctx->pattern[1] + 1;
845 }
846
847 for (;;) {
848 ++sigcount;
849 if ((0 == (sigcount & 0xfff)) && PyErr_CheckSignals())
850 RETURN_ERROR(SRE_ERROR_INTERRUPTED);
851
852 switch (*ctx->pattern++) {
853
854 case SRE_OP_MARK:
855 /* set mark */
856 /* <MARK> <gid> */
857 TRACE(("|%p|%p|MARK %d\n", ctx->pattern,
858 ctx->ptr, ctx->pattern[0]));
859 i = ctx->pattern[0];
860 if (i & 1)
861 state->lastindex = i/2 + 1;
862 if (i > state->lastmark) {
863 /* state->lastmark is the highest valid index in the
864 state->mark array. If it is increased by more than 1,
865 the intervening marks must be set to NULL to signal
866 that these marks have not been encountered. */
867 Py_ssize_t j = state->lastmark + 1;
868 while (j < i)
869 state->mark[j++] = NULL;
870 state->lastmark = i;
871 }
872 state->mark[i] = ctx->ptr;
873 ctx->pattern++;
874 break;
875
876 case SRE_OP_LITERAL:
877 /* match literal string */
878 /* <LITERAL> <code> */
879 TRACE(("|%p|%p|LITERAL %d\n", ctx->pattern,
880 ctx->ptr, *ctx->pattern));
881 if (ctx->ptr >= end || (SRE_CODE) ctx->ptr[0] != ctx->pattern[0])
882 RETURN_FAILURE;
883 ctx->pattern++;
884 ctx->ptr++;
885 break;
886
887 case SRE_OP_NOT_LITERAL:
888 /* match anything that is not literal character */
889 /* <NOT_LITERAL> <code> */
890 TRACE(("|%p|%p|NOT_LITERAL %d\n", ctx->pattern,
891 ctx->ptr, *ctx->pattern));
892 if (ctx->ptr >= end || (SRE_CODE) ctx->ptr[0] == ctx->pattern[0])
893 RETURN_FAILURE;
894 ctx->pattern++;
895 ctx->ptr++;
896 break;
897
898 case SRE_OP_SUCCESS:
899 /* end of pattern */
900 TRACE(("|%p|%p|SUCCESS\n", ctx->pattern, ctx->ptr));
901 state->ptr = ctx->ptr;
902 RETURN_SUCCESS;
903
904 case SRE_OP_AT:
905 /* match at given position */
906 /* <AT> <code> */
907 TRACE(("|%p|%p|AT %d\n", ctx->pattern, ctx->ptr, *ctx->pattern));
908 if (!SRE_AT(state, ctx->ptr, *ctx->pattern))
909 RETURN_FAILURE;
910 ctx->pattern++;
911 break;
912
913 case SRE_OP_CATEGORY:
914 /* match at given category */
915 /* <CATEGORY> <code> */
916 TRACE(("|%p|%p|CATEGORY %d\n", ctx->pattern,
917 ctx->ptr, *ctx->pattern));
918 if (ctx->ptr >= end || !sre_category(ctx->pattern[0], ctx->ptr[0]))
919 RETURN_FAILURE;
920 ctx->pattern++;
921 ctx->ptr++;
922 break;
923
924 case SRE_OP_ANY:
925 /* match anything (except a newline) */
926 /* <ANY> */
927 TRACE(("|%p|%p|ANY\n", ctx->pattern, ctx->ptr));
928 if (ctx->ptr >= end || SRE_IS_LINEBREAK(ctx->ptr[0]))
929 RETURN_FAILURE;
930 ctx->ptr++;
931 break;
932
933 case SRE_OP_ANY_ALL:
934 /* match anything */
935 /* <ANY_ALL> */
936 TRACE(("|%p|%p|ANY_ALL\n", ctx->pattern, ctx->ptr));
937 if (ctx->ptr >= end)
938 RETURN_FAILURE;
939 ctx->ptr++;
940 break;
941
942 case SRE_OP_IN:
943 /* match set member (or non_member) */
944 /* <IN> <skip> <set> */
945 TRACE(("|%p|%p|IN\n", ctx->pattern, ctx->ptr));
946 if (ctx->ptr >= end || !SRE_CHARSET(ctx->pattern + 1, *ctx->ptr))
947 RETURN_FAILURE;
948 ctx->pattern += ctx->pattern[0];
949 ctx->ptr++;
950 break;
951
952 case SRE_OP_LITERAL_IGNORE:
953 TRACE(("|%p|%p|LITERAL_IGNORE %d\n",
954 ctx->pattern, ctx->ptr, ctx->pattern[0]));
955 if (ctx->ptr >= end ||
956 state->lower(*ctx->ptr) != state->lower(*ctx->pattern))
957 RETURN_FAILURE;
958 ctx->pattern++;
959 ctx->ptr++;
960 break;
961
962 case SRE_OP_NOT_LITERAL_IGNORE:
963 TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n",
964 ctx->pattern, ctx->ptr, *ctx->pattern));
965 if (ctx->ptr >= end ||
966 state->lower(*ctx->ptr) == state->lower(*ctx->pattern))
967 RETURN_FAILURE;
968 ctx->pattern++;
969 ctx->ptr++;
970 break;
971
972 case SRE_OP_IN_IGNORE:
973 TRACE(("|%p|%p|IN_IGNORE\n", ctx->pattern, ctx->ptr));
974 if (ctx->ptr >= end
975 || !SRE_CHARSET(ctx->pattern+1,
976 (SRE_CODE)state->lower(*ctx->ptr)))
977 RETURN_FAILURE;
978 ctx->pattern += ctx->pattern[0];
979 ctx->ptr++;
980 break;
981
982 case SRE_OP_JUMP:
983 case SRE_OP_INFO:
984 /* jump forward */
985 /* <JUMP> <offset> */
986 TRACE(("|%p|%p|JUMP %d\n", ctx->pattern,
987 ctx->ptr, ctx->pattern[0]));
988 ctx->pattern += ctx->pattern[0];
989 break;
990
991 case SRE_OP_BRANCH:
992 /* alternation */
993 /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
994 TRACE(("|%p|%p|BRANCH\n", ctx->pattern, ctx->ptr));
995 LASTMARK_SAVE();
996 ctx->u.rep = state->repeat;
997 if (ctx->u.rep)
998 MARK_PUSH(ctx->lastmark);
999 for (; ctx->pattern[0]; ctx->pattern += ctx->pattern[0]) {
1000 if (ctx->pattern[1] == SRE_OP_LITERAL &&
1001 (ctx->ptr >= end ||
1002 (SRE_CODE) *ctx->ptr != ctx->pattern[2]))
1003 continue;
1004 if (ctx->pattern[1] == SRE_OP_IN &&
1005 (ctx->ptr >= end ||
1006 !SRE_CHARSET(ctx->pattern + 3, (SRE_CODE) *ctx->ptr)))
1007 continue;
1008 state->ptr = ctx->ptr;
1009 DO_JUMP(JUMP_BRANCH, jump_branch, ctx->pattern+1);
1010 if (ret) {
1011 if (ctx->u.rep)
1012 MARK_POP_DISCARD(ctx->lastmark);
1013 RETURN_ON_ERROR(ret);
1014 RETURN_SUCCESS;
1015 }
1016 if (ctx->u.rep)
1017 MARK_POP_KEEP(ctx->lastmark);
1018 LASTMARK_RESTORE();
1019 }
1020 if (ctx->u.rep)
1021 MARK_POP_DISCARD(ctx->lastmark);
1022 RETURN_FAILURE;
1023
1024 case SRE_OP_REPEAT_ONE:
1025 /* match repeated sequence (maximizing regexp) */
1026
1027 /* this operator only works if the repeated item is
1028 exactly one character wide, and we're not already
1029 collecting backtracking points. for other cases,
1030 use the MAX_REPEAT operator */
1031
1032 /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
1033
1034 TRACE(("|%p|%p|REPEAT_ONE %d %d\n", ctx->pattern, ctx->ptr,
1035 ctx->pattern[1], ctx->pattern[2]));
1036
1037 if ((Py_ssize_t) ctx->pattern[1] > end - ctx->ptr)
1038 RETURN_FAILURE; /* cannot match */
1039
1040 state->ptr = ctx->ptr;
1041
1042 ret = SRE_COUNT(state, ctx->pattern+3, ctx->pattern[2]);
1043 RETURN_ON_ERROR(ret);
1044 DATA_LOOKUP_AT(SRE_MATCH_CONTEXT, ctx, ctx_pos);
1045 ctx->count = ret;
1046 ctx->ptr += ctx->count;
1047
1048 /* when we arrive here, count contains the number of
1049 matches, and ctx->ptr points to the tail of the target
1050 string. check if the rest of the pattern matches,
1051 and backtrack if not. */
1052
1053 if (ctx->count < (Py_ssize_t) ctx->pattern[1])
1054 RETURN_FAILURE;
1055
1056 if (ctx->pattern[ctx->pattern[0]] == SRE_OP_SUCCESS) {
1057 /* tail is empty. we're finished */
1058 state->ptr = ctx->ptr;
1059 RETURN_SUCCESS;
1060 }
1061
1062 LASTMARK_SAVE();
1063
1064 if (ctx->pattern[ctx->pattern[0]] == SRE_OP_LITERAL) {
1065 /* tail starts with a literal. skip positions where
1066 the rest of the pattern cannot possibly match */
1067 ctx->u.chr = ctx->pattern[ctx->pattern[0]+1];
1068 for (;;) {
1069 while (ctx->count >= (Py_ssize_t) ctx->pattern[1] &&
1070 (ctx->ptr >= end || *ctx->ptr != ctx->u.chr)) {
1071 ctx->ptr--;
1072 ctx->count--;
1073 }
1074 if (ctx->count < (Py_ssize_t) ctx->pattern[1])
1075 break;
1076 state->ptr = ctx->ptr;
1077 DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1,
1078 ctx->pattern+ctx->pattern[0]);
1079 if (ret) {
1080 RETURN_ON_ERROR(ret);
1081 RETURN_SUCCESS;
1082 }
1083
1084 LASTMARK_RESTORE();
1085
1086 ctx->ptr--;
1087 ctx->count--;
1088 }
1089
1090 } else {
1091 /* general case */
1092 while (ctx->count >= (Py_ssize_t) ctx->pattern[1]) {
1093 state->ptr = ctx->ptr;
1094 DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2,
1095 ctx->pattern+ctx->pattern[0]);
1096 if (ret) {
1097 RETURN_ON_ERROR(ret);
1098 RETURN_SUCCESS;
1099 }
1100 ctx->ptr--;
1101 ctx->count--;
1102 LASTMARK_RESTORE();
1103 }
1104 }
1105 RETURN_FAILURE;
1106
1107 case SRE_OP_MIN_REPEAT_ONE:
1108 /* match repeated sequence (minimizing regexp) */
1109
1110 /* this operator only works if the repeated item is
1111 exactly one character wide, and we're not already
1112 collecting backtracking points. for other cases,
1113 use the MIN_REPEAT operator */
1114
1115 /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
1116
1117 TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", ctx->pattern, ctx->ptr,
1118 ctx->pattern[1], ctx->pattern[2]));
1119
1120 if ((Py_ssize_t) ctx->pattern[1] > end - ctx->ptr)
1121 RETURN_FAILURE; /* cannot match */
1122
1123 state->ptr = ctx->ptr;
1124
1125 if (ctx->pattern[1] == 0)
1126 ctx->count = 0;
1127 else {
1128 /* count using pattern min as the maximum */
1129 ret = SRE_COUNT(state, ctx->pattern+3, ctx->pattern[1]);
1130 RETURN_ON_ERROR(ret);
1131 DATA_LOOKUP_AT(SRE_MATCH_CONTEXT, ctx, ctx_pos);
1132 if (ret < (Py_ssize_t) ctx->pattern[1])
1133 /* didn't match minimum number of times */
1134 RETURN_FAILURE;
1135 /* advance past minimum matches of repeat */
1136 ctx->count = ret;
1137 ctx->ptr += ctx->count;
1138 }
1139
1140 if (ctx->pattern[ctx->pattern[0]] == SRE_OP_SUCCESS) {
1141 /* tail is empty. we're finished */
1142 state->ptr = ctx->ptr;
1143 RETURN_SUCCESS;
1144
1145 } else {
1146 /* general case */
1147 LASTMARK_SAVE();
1148 while ((Py_ssize_t)ctx->pattern[2] == SRE_MAXREPEAT
1149 || ctx->count <= (Py_ssize_t)ctx->pattern[2]) {
1150 state->ptr = ctx->ptr;
1151 DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
1152 ctx->pattern+ctx->pattern[0]);
1153 if (ret) {
1154 RETURN_ON_ERROR(ret);
1155 RETURN_SUCCESS;
1156 }
1157 state->ptr = ctx->ptr;
1158 ret = SRE_COUNT(state, ctx->pattern+3, 1);
1159 RETURN_ON_ERROR(ret);
1160 DATA_LOOKUP_AT(SRE_MATCH_CONTEXT, ctx, ctx_pos);
1161 if (ret == 0)
1162 break;
1163 assert(ret == 1);
1164 ctx->ptr++;
1165 ctx->count++;
1166 LASTMARK_RESTORE();
1167 }
1168 }
1169 RETURN_FAILURE;
1170
1171 case SRE_OP_REPEAT:
1172 /* create repeat context. all the hard work is done
1173 by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
1174 /* <REPEAT> <skip> <1=min> <2=max> item <UNTIL> tail */
1175 TRACE(("|%p|%p|REPEAT %d %d\n", ctx->pattern, ctx->ptr,
1176 ctx->pattern[1], ctx->pattern[2]));
1177
1178 /* install new repeat context */
1179 ctx->u.rep = (SRE_REPEAT*) PyObject_MALLOC(sizeof(*ctx->u.rep));
1180 if (!ctx->u.rep) {
1181 PyErr_NoMemory();
1182 RETURN_FAILURE;
1183 }
1184 ctx->u.rep->count = -1;
1185 ctx->u.rep->pattern = ctx->pattern;
1186 ctx->u.rep->prev = state->repeat;
1187 ctx->u.rep->last_ptr = NULL;
1188 state->repeat = ctx->u.rep;
1189
1190 state->ptr = ctx->ptr;
1191 DO_JUMP(JUMP_REPEAT, jump_repeat, ctx->pattern+ctx->pattern[0]);
1192 state->repeat = ctx->u.rep->prev;
1193 PyObject_FREE(ctx->u.rep);
1194
1195 if (ret) {
1196 RETURN_ON_ERROR(ret);
1197 RETURN_SUCCESS;
1198 }
1199 RETURN_FAILURE;
1200
1201 case SRE_OP_MAX_UNTIL:
1202 /* maximizing repeat */
1203 /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
1204
1205 /* FIXME: we probably need to deal with zero-width
1206 matches in here... */
1207
1208 ctx->u.rep = state->repeat;
1209 if (!ctx->u.rep)
1210 RETURN_ERROR(SRE_ERROR_STATE);
1211
1212 state->ptr = ctx->ptr;
1213
1214 ctx->count = ctx->u.rep->count+1;
1215
1216 TRACE(("|%p|%p|MAX_UNTIL %" PY_FORMAT_SIZE_T "d\n", ctx->pattern,
1217 ctx->ptr, ctx->count));
1218
1219 if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1220 /* not enough matches */
1221 ctx->u.rep->count = ctx->count;
1222 DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1,
1223 ctx->u.rep->pattern+3);
1224 if (ret) {
1225 RETURN_ON_ERROR(ret);
1226 RETURN_SUCCESS;
1227 }
1228 ctx->u.rep->count = ctx->count-1;
1229 state->ptr = ctx->ptr;
1230 RETURN_FAILURE;
1231 }
1232
1233 if ((ctx->count < (Py_ssize_t) ctx->u.rep->pattern[2] ||
1234 ctx->u.rep->pattern[2] == SRE_MAXREPEAT) &&
1235 state->ptr != ctx->u.rep->last_ptr) {
1236 /* we may have enough matches, but if we can
1237 match another item, do so */
1238 ctx->u.rep->count = ctx->count;
1239 LASTMARK_SAVE();
1240 MARK_PUSH(ctx->lastmark);
1241 /* zero-width match protection */
1242 DATA_PUSH(&ctx->u.rep->last_ptr);
1243 ctx->u.rep->last_ptr = state->ptr;
1244 DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2,
1245 ctx->u.rep->pattern+3);
1246 DATA_POP(&ctx->u.rep->last_ptr);
1247 if (ret) {
1248 MARK_POP_DISCARD(ctx->lastmark);
1249 RETURN_ON_ERROR(ret);
1250 RETURN_SUCCESS;
1251 }
1252 MARK_POP(ctx->lastmark);
1253 LASTMARK_RESTORE();
1254 ctx->u.rep->count = ctx->count-1;
1255 state->ptr = ctx->ptr;
1256 }
1257
1258 /* cannot match more repeated items here. make sure the
1259 tail matches */
1260 state->repeat = ctx->u.rep->prev;
1261 DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, ctx->pattern);
1262 RETURN_ON_SUCCESS(ret);
1263 state->repeat = ctx->u.rep;
1264 state->ptr = ctx->ptr;
1265 RETURN_FAILURE;
1266
1267 case SRE_OP_MIN_UNTIL:
1268 /* minimizing repeat */
1269 /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
1270
1271 ctx->u.rep = state->repeat;
1272 if (!ctx->u.rep)
1273 RETURN_ERROR(SRE_ERROR_STATE);
1274
1275 state->ptr = ctx->ptr;
1276
1277 ctx->count = ctx->u.rep->count+1;
1278
1279 TRACE(("|%p|%p|MIN_UNTIL %" PY_FORMAT_SIZE_T "d %p\n", ctx->pattern,
1280 ctx->ptr, ctx->count, ctx->u.rep->pattern));
1281
1282 if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1283 /* not enough matches */
1284 ctx->u.rep->count = ctx->count;
1285 DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1,
1286 ctx->u.rep->pattern+3);
1287 if (ret) {
1288 RETURN_ON_ERROR(ret);
1289 RETURN_SUCCESS;
1290 }
1291 ctx->u.rep->count = ctx->count-1;
1292 state->ptr = ctx->ptr;
1293 RETURN_FAILURE;
1294 }
1295
1296 LASTMARK_SAVE();
1297
1298 /* see if the tail matches */
1299 state->repeat = ctx->u.rep->prev;
1300 DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, ctx->pattern);
1301 if (ret) {
1302 RETURN_ON_ERROR(ret);
1303 RETURN_SUCCESS;
1304 }
1305
1306 state->repeat = ctx->u.rep;
1307 state->ptr = ctx->ptr;
1308
1309 LASTMARK_RESTORE();
1310
1311 if ((ctx->count >= (Py_ssize_t) ctx->u.rep->pattern[2]
1312 && ctx->u.rep->pattern[2] != SRE_MAXREPEAT) ||
1313 state->ptr == ctx->u.rep->last_ptr)
1314 RETURN_FAILURE;
1315
1316 ctx->u.rep->count = ctx->count;
1317 /* zero-width match protection */
1318 DATA_PUSH(&ctx->u.rep->last_ptr);
1319 ctx->u.rep->last_ptr = state->ptr;
1320 DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3,
1321 ctx->u.rep->pattern+3);
1322 DATA_POP(&ctx->u.rep->last_ptr);
1323 if (ret) {
1324 RETURN_ON_ERROR(ret);
1325 RETURN_SUCCESS;
1326 }
1327 ctx->u.rep->count = ctx->count-1;
1328 state->ptr = ctx->ptr;
1329 RETURN_FAILURE;
1330
1331 case SRE_OP_GROUPREF:
1332 /* match backreference */
1333 TRACE(("|%p|%p|GROUPREF %d\n", ctx->pattern,
1334 ctx->ptr, ctx->pattern[0]));
1335 i = ctx->pattern[0];
1336 {
1337 Py_ssize_t groupref = i+i;
1338 if (groupref >= state->lastmark) {
1339 RETURN_FAILURE;
1340 } else {
1341 SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1342 SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1343 if (!p || !e || e < p)
1344 RETURN_FAILURE;
1345 while (p < e) {
1346 if (ctx->ptr >= end || *ctx->ptr != *p)
1347 RETURN_FAILURE;
1348 p++; ctx->ptr++;
1349 }
1350 }
1351 }
1352 ctx->pattern++;
1353 break;
1354
1355 case SRE_OP_GROUPREF_IGNORE:
1356 /* match backreference */
1357 TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", ctx->pattern,
1358 ctx->ptr, ctx->pattern[0]));
1359 i = ctx->pattern[0];
1360 {
1361 Py_ssize_t groupref = i+i;
1362 if (groupref >= state->lastmark) {
1363 RETURN_FAILURE;
1364 } else {
1365 SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1366 SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1367 if (!p || !e || e < p)
1368 RETURN_FAILURE;
1369 while (p < e) {
1370 if (ctx->ptr >= end ||
1371 state->lower(*ctx->ptr) != state->lower(*p))
1372 RETURN_FAILURE;
1373 p++; ctx->ptr++;
1374 }
1375 }
1376 }
1377 ctx->pattern++;
1378 break;
1379
1380 case SRE_OP_GROUPREF_EXISTS:
1381 TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", ctx->pattern,
1382 ctx->ptr, ctx->pattern[0]));
1383 /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */
1384 i = ctx->pattern[0];
1385 {
1386 Py_ssize_t groupref = i+i;
1387 if (groupref >= state->lastmark) {
1388 ctx->pattern += ctx->pattern[1];
1389 break;
1390 } else {
1391 SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1392 SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1393 if (!p || !e || e < p) {
1394 ctx->pattern += ctx->pattern[1];
1395 break;
1396 }
1397 }
1398 }
1399 ctx->pattern += 2;
1400 break;
1401
1402 case SRE_OP_ASSERT:
1403 /* assert subpattern */
1404 /* <ASSERT> <skip> <back> <pattern> */
1405 TRACE(("|%p|%p|ASSERT %d\n", ctx->pattern,
1406 ctx->ptr, ctx->pattern[1]));
1407 state->ptr = ctx->ptr - ctx->pattern[1];
1408 if (state->ptr < state->beginning)
1409 RETURN_FAILURE;
1410 DO_JUMP(JUMP_ASSERT, jump_assert, ctx->pattern+2);
1411 RETURN_ON_FAILURE(ret);
1412 ctx->pattern += ctx->pattern[0];
1413 break;
1414
1415 case SRE_OP_ASSERT_NOT:
1416 /* assert not subpattern */
1417 /* <ASSERT_NOT> <skip> <back> <pattern> */
1418 TRACE(("|%p|%p|ASSERT_NOT %d\n", ctx->pattern,
1419 ctx->ptr, ctx->pattern[1]));
1420 state->ptr = ctx->ptr - ctx->pattern[1];
1421 if (state->ptr >= state->beginning) {
1422 DO_JUMP(JUMP_ASSERT_NOT, jump_assert_not, ctx->pattern+2);
1423 if (ret) {
1424 RETURN_ON_ERROR(ret);
1425 RETURN_FAILURE;
1426 }
1427 }
1428 ctx->pattern += ctx->pattern[0];
1429 break;
1430
1431 case SRE_OP_FAILURE:
1432 /* immediate failure */
1433 TRACE(("|%p|%p|FAILURE\n", ctx->pattern, ctx->ptr));
1434 RETURN_FAILURE;
1435
1436 default:
1437 TRACE(("|%p|%p|UNKNOWN %d\n", ctx->pattern, ctx->ptr,
1438 ctx->pattern[-1]));
1439 RETURN_ERROR(SRE_ERROR_ILLEGAL);
1440 }
1441 }
1442
1443 exit:
1444 ctx_pos = ctx->last_ctx_pos;
1445 jump = ctx->jump;
1446 DATA_POP_DISCARD(ctx);
1447 if (ctx_pos == -1)
1448 return ret;
1449 DATA_LOOKUP_AT(SRE_MATCH_CONTEXT, ctx, ctx_pos);
1450
1451 switch (jump) {
1452 case JUMP_MAX_UNTIL_2:
1453 TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", ctx->pattern, ctx->ptr));
1454 goto jump_max_until_2;
1455 case JUMP_MAX_UNTIL_3:
1456 TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", ctx->pattern, ctx->ptr));
1457 goto jump_max_until_3;
1458 case JUMP_MIN_UNTIL_2:
1459 TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", ctx->pattern, ctx->ptr));
1460 goto jump_min_until_2;
1461 case JUMP_MIN_UNTIL_3:
1462 TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", ctx->pattern, ctx->ptr));
1463 goto jump_min_until_3;
1464 case JUMP_BRANCH:
1465 TRACE(("|%p|%p|JUMP_BRANCH\n", ctx->pattern, ctx->ptr));
1466 goto jump_branch;
1467 case JUMP_MAX_UNTIL_1:
1468 TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", ctx->pattern, ctx->ptr));
1469 goto jump_max_until_1;
1470 case JUMP_MIN_UNTIL_1:
1471 TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", ctx->pattern, ctx->ptr));
1472 goto jump_min_until_1;
1473 case JUMP_REPEAT:
1474 TRACE(("|%p|%p|JUMP_REPEAT\n", ctx->pattern, ctx->ptr));
1475 goto jump_repeat;
1476 case JUMP_REPEAT_ONE_1:
1477 TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", ctx->pattern, ctx->ptr));
1478 goto jump_repeat_one_1;
1479 case JUMP_REPEAT_ONE_2:
1480 TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", ctx->pattern, ctx->ptr));
1481 goto jump_repeat_one_2;
1482 case JUMP_MIN_REPEAT_ONE:
1483 TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", ctx->pattern, ctx->ptr));
1484 goto jump_min_repeat_one;
1485 case JUMP_ASSERT:
1486 TRACE(("|%p|%p|JUMP_ASSERT\n", ctx->pattern, ctx->ptr));
1487 goto jump_assert;
1488 case JUMP_ASSERT_NOT:
1489 TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", ctx->pattern, ctx->ptr));
1490 goto jump_assert_not;
1491 case JUMP_NONE:
1492 TRACE(("|%p|%p|RETURN %" PY_FORMAT_SIZE_T "d\n", ctx->pattern,
1493 ctx->ptr, ret));
1494 break;
1495 }
1496
1497 return ret; /* should never get here */
1498 }
1499
1500 LOCAL(Py_ssize_t)
1501 SRE_SEARCH(SRE_STATE* state, SRE_CODE* pattern)
1502 {
1503 SRE_CHAR* ptr = (SRE_CHAR *)state->start;
1504 SRE_CHAR* end = (SRE_CHAR *)state->end;
1505 Py_ssize_t status = 0;
1506 Py_ssize_t prefix_len = 0;
1507 Py_ssize_t prefix_skip = 0;
1508 SRE_CODE* prefix = NULL;
1509 SRE_CODE* charset = NULL;
1510 SRE_CODE* overlap = NULL;
1511 int flags = 0;
1512
1513 if (pattern[0] == SRE_OP_INFO) {
1514 /* optimization info block */
1515 /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info> */
1516
1517 flags = pattern[2];
1518
1519 if (pattern[3] > 1) {
1520 /* adjust end point (but make sure we leave at least one
1521 character in there, so literal search will work) */
1522 end -= pattern[3]-1;
1523 if (end <= ptr)
1524 end = ptr+1;
1525 }
1526
1527 if (flags & SRE_INFO_PREFIX) {
1528 /* pattern starts with a known prefix */
1529 /* <length> <skip> <prefix data> <overlap data> */
1530 prefix_len = pattern[5];
1531 prefix_skip = pattern[6];
1532 prefix = pattern + 7;
1533 overlap = prefix + prefix_len - 1;
1534 } else if (flags & SRE_INFO_CHARSET)
1535 /* pattern starts with a character from a known set */
1536 /* <charset> */
1537 charset = pattern + 5;
1538
1539 pattern += 1 + pattern[1];
1540 }
1541
1542 TRACE(("prefix = %p %" PY_FORMAT_SIZE_T "d %" PY_FORMAT_SIZE_T "d\n",
1543 prefix, prefix_len, prefix_skip));
1544 TRACE(("charset = %p\n", charset));
1545
1546 #if defined(USE_FAST_SEARCH)
1547 if (prefix_len > 1) {
1548 /* pattern starts with a known prefix. use the overlap
1549 table to skip forward as fast as we possibly can */
1550 Py_ssize_t i = 0;
1551 end = (SRE_CHAR *)state->end;
1552 while (ptr < end) {
1553 for (;;) {
1554 if ((SRE_CODE) ptr[0] != prefix[i]) {
1555 if (!i)
1556 break;
1557 else
1558 i = overlap[i];
1559 } else {
1560 if (++i == prefix_len) {
1561 /* found a potential match */
1562 TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
1563 state->start = ptr + 1 - prefix_len;
1564 state->ptr = ptr + 1 - prefix_len + prefix_skip;
1565 if (flags & SRE_INFO_LITERAL)
1566 return 1; /* we got all of it */
1567 status = SRE_MATCH(state, pattern + 2*prefix_skip);
1568 if (status != 0)
1569 return status;
1570 /* close but no cigar -- try again */
1571 i = overlap[i];
1572 }
1573 break;
1574 }
1575 }
1576 ptr++;
1577 }
1578 return 0;
1579 }
1580 #endif
1581
1582 if (pattern[0] == SRE_OP_LITERAL) {
1583 /* pattern starts with a literal character. this is used
1584 for short prefixes, and if fast search is disabled */
1585 SRE_CODE chr = pattern[1];
1586 end = (SRE_CHAR *)state->end;
1587 for (;;) {
1588 while (ptr < end && (SRE_CODE) ptr[0] != chr)
1589 ptr++;
1590 if (ptr >= end)
1591 return 0;
1592 TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
1593 state->start = ptr;
1594 state->ptr = ++ptr;
1595 if (flags & SRE_INFO_LITERAL)
1596 return 1; /* we got all of it */
1597 status = SRE_MATCH(state, pattern + 2);
1598 if (status != 0)
1599 break;
1600 }
1601 } else if (charset) {
1602 /* pattern starts with a character from a known set */
1603 end = (SRE_CHAR *)state->end;
1604 for (;;) {
1605 while (ptr < end && !SRE_CHARSET(charset, ptr[0]))
1606 ptr++;
1607 if (ptr >= end)
1608 return 0;
1609 TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
1610 state->start = ptr;
1611 state->ptr = ptr;
1612 status = SRE_MATCH(state, pattern);
1613 if (status != 0)
1614 break;
1615 ptr++;
1616 }
1617 } else
1618 /* general case */
1619 while (ptr <= end) {
1620 TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1621 state->start = state->ptr = ptr++;
1622 status = SRE_MATCH(state, pattern);
1623 if (status != 0)
1624 break;
1625 }
1626
1627 return status;
1628 }
1629
1630 LOCAL(int)
1631 SRE_LITERAL_TEMPLATE(SRE_CHAR* ptr, Py_ssize_t len)
1632 {
1633 /* check if given string is a literal template (i.e. no escapes) */
1634 while (len-- > 0)
1635 if (*ptr++ == '\\')
1636 return 0;
1637 return 1;
1638 }
1639
1640 #if !defined(SRE_RECURSIVE)
1641
1642 /* -------------------------------------------------------------------- */
1643 /* factories and destructors */
1644
1645 /* see sre.h for object declarations */
1646 static PyObject*pattern_new_match(PatternObject*, SRE_STATE*, int);
1647 static PyObject*pattern_scanner(PatternObject*, PyObject*);
1648
1649 static PyObject *
1650 sre_codesize(PyObject* self, PyObject *unused)
1651 {
1652 return PyInt_FromSize_t(sizeof(SRE_CODE));
1653 }
1654
1655 static PyObject *
1656 sre_getlower(PyObject* self, PyObject* args)
1657 {
1658 int character, flags;
1659 if (!PyArg_ParseTuple(args, "ii", &character, &flags))
1660 return NULL;
1661 if (flags & SRE_FLAG_LOCALE)
1662 return Py_BuildValue("i", sre_lower_locale(character));
1663 if (flags & SRE_FLAG_UNICODE)
1664 #if defined(HAVE_UNICODE)
1665 return Py_BuildValue("i", sre_lower_unicode(character));
1666 #else
1667 return Py_BuildValue("i", sre_lower_locale(character));
1668 #endif
1669 return Py_BuildValue("i", sre_lower(character));
1670 }
1671
1672 LOCAL(void)
1673 state_reset(SRE_STATE* state)
1674 {
1675 /* FIXME: dynamic! */
1676 /*memset(state->mark, 0, sizeof(*state->mark) * SRE_MARK_SIZE);*/
1677
1678 state->lastmark = -1;
1679 state->lastindex = -1;
1680
1681 state->repeat = NULL;
1682
1683 data_stack_dealloc(state);
1684 }
1685
1686 static void*
1687 getstring(PyObject* string, Py_ssize_t* p_length, int* p_charsize)
1688 {
1689 /* given a python object, return a data pointer, a length (in
1690 characters), and a character size. return NULL if the object
1691 is not a string (or not compatible) */
1692
1693 PyBufferProcs *buffer;
1694 Py_ssize_t size, bytes;
1695 int charsize;
1696 void* ptr;
1697
1698 #if defined(HAVE_UNICODE)
1699 if (PyUnicode_Check(string)) {
1700 /* unicode strings doesn't always support the buffer interface */
1701 ptr = (void*) PyUnicode_AS_DATA(string);
1702 /* bytes = PyUnicode_GET_DATA_SIZE(string); */
1703 size = PyUnicode_GET_SIZE(string);
1704 charsize = sizeof(Py_UNICODE);
1705
1706 } else {
1707 #endif
1708
1709 /* get pointer to string buffer */
1710 buffer = Py_TYPE(string)->tp_as_buffer;
1711 if (!buffer || !buffer->bf_getreadbuffer || !buffer->bf_getsegcount ||
1712 buffer->bf_getsegcount(string, NULL) != 1) {
1713 PyErr_SetString(PyExc_TypeError, "expected string or buffer");
1714 return NULL;
1715 }
1716
1717 /* determine buffer size */
1718 bytes = buffer->bf_getreadbuffer(string, 0, &ptr);
1719 if (bytes < 0) {
1720 PyErr_SetString(PyExc_TypeError, "buffer has negative size");
1721 return NULL;
1722 }
1723
1724 /* determine character size */
1725 #if PY_VERSION_HEX >= 0x01060000
1726 size = PyObject_Size(string);
1727 #else
1728 size = PyObject_Length(string);
1729 #endif
1730
1731 if (PyString_Check(string) || bytes == size)
1732 charsize = 1;
1733 #if defined(HAVE_UNICODE)
1734 else if (bytes == (Py_ssize_t) (size * sizeof(Py_UNICODE)))
1735 charsize = sizeof(Py_UNICODE);
1736 #endif
1737 else {
1738 PyErr_SetString(PyExc_TypeError, "buffer size mismatch");
1739 return NULL;
1740 }
1741
1742 #if defined(HAVE_UNICODE)
1743 }
1744 #endif
1745
1746 *p_length = size;
1747 *p_charsize = charsize;
1748
1749 return ptr;
1750 }
1751
1752 LOCAL(PyObject*)
1753 state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string,
1754 Py_ssize_t start, Py_ssize_t end)
1755 {
1756 /* prepare state object */
1757
1758 Py_ssize_t length;
1759 int charsize;
1760 void* ptr;
1761
1762 memset(state, 0, sizeof(SRE_STATE));
1763
1764 state->lastmark = -1;
1765 state->lastindex = -1;
1766
1767 ptr = getstring(string, &length, &charsize);
1768 if (!ptr)
1769 return NULL;
1770
1771 /* adjust boundaries */
1772 if (start < 0)
1773 start = 0;
1774 else if (start > length)
1775 start = length;
1776
1777 if (end < 0)
1778 end = 0;
1779 else if (end > length)
1780 end = length;
1781
1782 state->charsize = charsize;
1783
1784 state->beginning = ptr;
1785
1786 state->start = (void*) ((char*) ptr + start * state->charsize);
1787 state->end = (void*) ((char*) ptr + end * state->charsize);
1788
1789 Py_INCREF(string);
1790 state->string = string;
1791 state->pos = start;
1792 state->endpos = end;
1793
1794 if (pattern->flags & SRE_FLAG_LOCALE)
1795 state->lower = sre_lower_locale;
1796 else if (pattern->flags & SRE_FLAG_UNICODE)
1797 #if defined(HAVE_UNICODE)
1798 state->lower = sre_lower_unicode;
1799 #else
1800 state->lower = sre_lower_locale;
1801 #endif
1802 else
1803 state->lower = sre_lower;
1804
1805 return string;
1806 }
1807
1808 LOCAL(void)
1809 state_fini(SRE_STATE* state)
1810 {
1811 Py_XDECREF(state->string);
1812 data_stack_dealloc(state);
1813 }
1814
1815 /* calculate offset from start of string */
1816 #define STATE_OFFSET(state, member)\
1817 (((char*)(member) - (char*)(state)->beginning) / (state)->charsize)
1818
1819 LOCAL(PyObject*)
1820 state_getslice(SRE_STATE* state, Py_ssize_t index, PyObject* string, int empty)
1821 {
1822 Py_ssize_t i, j;
1823
1824 index = (index - 1) * 2;
1825
1826 if (string == Py_None || index >= state->lastmark || !state->mark[index] || !state->mark[index+1]) {
1827 if (empty)
1828 /* want empty string */
1829 i = j = 0;
1830 else {
1831 Py_INCREF(Py_None);
1832 return Py_None;
1833 }
1834 } else {
1835 i = STATE_OFFSET(state, state->mark[index]);
1836 j = STATE_OFFSET(state, state->mark[index+1]);
1837 }
1838
1839 return PySequence_GetSlice(string, i, j);
1840 }
1841
1842 static void
1843 pattern_error(int status)
1844 {
1845 switch (status) {
1846 case SRE_ERROR_RECURSION_LIMIT:
1847 PyErr_SetString(
1848 PyExc_RuntimeError,
1849 "maximum recursion limit exceeded"
1850 );
1851 break;
1852 case SRE_ERROR_MEMORY:
1853 PyErr_NoMemory();
1854 break;
1855 case SRE_ERROR_INTERRUPTED:
1856 /* An exception has already been raised, so let it fly */
1857 break;
1858 default:
1859 /* other error codes indicate compiler/engine bugs */
1860 PyErr_SetString(
1861 PyExc_RuntimeError,
1862 "internal error in regular expression engine"
1863 );
1864 }
1865 }
1866
1867 static void
1868 pattern_dealloc(PatternObject* self)
1869 {
1870 if (self->weakreflist != NULL)
1871 PyObject_ClearWeakRefs((PyObject *) self);
1872 Py_XDECREF(self->pattern);
1873 Py_XDECREF(self->groupindex);
1874 Py_XDECREF(self->indexgroup);
1875 PyObject_DEL(self);
1876 }
1877
1878 static int
1879 check_args_size(const char *name, PyObject* args, PyObject* kw, int n)
1880 {
1881 Py_ssize_t m = PyTuple_GET_SIZE(args) + (kw ? PyDict_Size(kw) : 0);
1882 if (m <= n)
1883 return 1;
1884 PyErr_Format(PyExc_TypeError,
1885 "%s() takes at most %d positional arguments (%zd given)",
1886 name, n, m);
1887 return 0;
1888 }
1889
1890 static PyObject*
1891 fix_string_param(PyObject *string, PyObject *string2, const char *oldname)
1892 {
1893 if (string2 != NULL) {
1894 char buf[100];
1895 if (string != NULL) {
1896 PyErr_Format(PyExc_TypeError,
1897 "Argument given by name ('%s') and position (1)",
1898 oldname);
1899 return NULL;
1900 }
1901 sprintf(buf, "The '%s' keyword parameter name is deprecated. "
1902 "Use 'string' instead.", oldname);
1903 if (PyErr_Warn(PyExc_DeprecationWarning, buf) < 0)
1904 return NULL;
1905 return string2;
1906 }
1907 if (string == NULL) {
1908 PyErr_SetString(PyExc_TypeError,
1909 "Required argument 'string' (pos 1) not found");
1910 return NULL;
1911 }
1912 return string;
1913 }
1914
1915 static PyObject*
1916 pattern_match(PatternObject* self, PyObject* args, PyObject* kw)
1917 {
1918 SRE_STATE state;
1919 int status;
1920
1921 PyObject *string = NULL, *string2 = NULL;
1922 Py_ssize_t start = 0;
1923 Py_ssize_t end = PY_SSIZE_T_MAX;
1924 static char* kwlist[] = { "string", "pos", "endpos", "pattern", NULL };
1925 if (!check_args_size("match", args, kw, 3))
1926 return NULL;
1927
1928 if (!PyArg_ParseTupleAndKeywords(args, kw, "|OnnO:match", kwlist,
1929 &string, &start, &end, &string2))
1930 return NULL;
1931
1932 string = fix_string_param(string, string2, "pattern");
1933 if (!string)
1934 return NULL;
1935
1936 string = state_init(&state, self, string, start, end);
1937 if (!string)
1938 return NULL;
1939
1940 state.ptr = state.start;
1941
1942 TRACE(("|%p|%p|MATCH\n", PatternObject_GetCode(self), state.ptr));
1943
1944 if (state.charsize == 1) {
1945 status = sre_match(&state, PatternObject_GetCode(self));
1946 } else {
1947 #if defined(HAVE_UNICODE)
1948 status = sre_umatch(&state, PatternObject_GetCode(self));
1949 #endif
1950 }
1951
1952 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
1953 if (PyErr_Occurred())
1954 return NULL;
1955
1956 state_fini(&state);
1957
1958 return pattern_new_match(self, &state, status);
1959 }
1960
1961 static PyObject*
1962 pattern_search(PatternObject* self, PyObject* args, PyObject* kw)
1963 {
1964 SRE_STATE state;
1965 int status;
1966
1967 PyObject *string = NULL, *string2 = NULL;
1968 Py_ssize_t start = 0;
1969 Py_ssize_t end = PY_SSIZE_T_MAX;
1970 static char* kwlist[] = { "string", "pos", "endpos", "pattern", NULL };
1971 if (!check_args_size("search", args, kw, 3))
1972 return NULL;
1973
1974 if (!PyArg_ParseTupleAndKeywords(args, kw, "|OnnO:search", kwlist,
1975 &string, &start, &end, &string2))
1976 return NULL;
1977
1978 string = fix_string_param(string, string2, "pattern");
1979 if (!string)
1980 return NULL;
1981
1982 string = state_init(&state, self, string, start, end);
1983 if (!string)
1984 return NULL;
1985
1986 TRACE(("|%p|%p|SEARCH\n", PatternObject_GetCode(self), state.ptr));
1987
1988 if (state.charsize == 1) {
1989 status = sre_search(&state, PatternObject_GetCode(self));
1990 } else {
1991 #if defined(HAVE_UNICODE)
1992 status = sre_usearch(&state, PatternObject_GetCode(self));
1993 #endif
1994 }
1995
1996 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
1997
1998 state_fini(&state);
1999
2000 if (PyErr_Occurred())
2001 return NULL;
2002
2003 return pattern_new_match(self, &state, status);
2004 }
2005
2006 static PyObject*
2007 call(char* module, char* function, PyObject* args)
2008 {
2009 PyObject* name;
2010 PyObject* mod;
2011 PyObject* func;
2012 PyObject* result;
2013
2014 if (!args)
2015 return NULL;
2016 name = PyString_FromString(module);
2017 if (!name)
2018 return NULL;
2019 mod = PyImport_Import(name);
2020 Py_DECREF(name);
2021 if (!mod)
2022 return NULL;
2023 func = PyObject_GetAttrString(mod, function);
2024 Py_DECREF(mod);
2025 if (!func)
2026 return NULL;
2027 result = PyObject_CallObject(func, args);
2028 Py_DECREF(func);
2029 Py_DECREF(args);
2030 return result;
2031 }
2032
2033 #ifdef USE_BUILTIN_COPY
2034 static int
2035 deepcopy(PyObject** object, PyObject* memo)
2036 {
2037 PyObject* copy;
2038
2039 copy = call(
2040 "copy", "deepcopy",
2041 PyTuple_Pack(2, *object, memo)
2042 );
2043 if (!copy)
2044 return 0;
2045
2046 Py_DECREF(*object);
2047 *object = copy;
2048
2049 return 1; /* success */
2050 }
2051 #endif
2052
2053 static PyObject*
2054 join_list(PyObject* list, PyObject* string)
2055 {
2056 /* join list elements */
2057
2058 PyObject* joiner;
2059 #if PY_VERSION_HEX >= 0x01060000
2060 PyObject* function;
2061 PyObject* args;
2062 #endif
2063 PyObject* result;
2064
2065 joiner = PySequence_GetSlice(string, 0, 0);
2066 if (!joiner)
2067 return NULL;
2068
2069 if (PyList_GET_SIZE(list) == 0) {
2070 Py_DECREF(list);
2071 return joiner;
2072 }
2073
2074 #if PY_VERSION_HEX >= 0x01060000
2075 function = PyObject_GetAttrString(joiner, "join");
2076 if (!function) {
2077 Py_DECREF(joiner);
2078 return NULL;
2079 }
2080 args = PyTuple_New(1);
2081 if (!args) {
2082 Py_DECREF(function);
2083 Py_DECREF(joiner);
2084 return NULL;
2085 }
2086 PyTuple_SET_ITEM(args, 0, list);
2087 result = PyObject_CallObject(function, args);
2088 Py_DECREF(args); /* also removes list */
2089 Py_DECREF(function);
2090 #else
2091 result = call(
2092 "string", "join",
2093 PyTuple_Pack(2, list, joiner)
2094 );
2095 #endif
2096 Py_DECREF(joiner);
2097
2098 return result;
2099 }
2100
2101 static PyObject*
2102 pattern_findall(PatternObject* self, PyObject* args, PyObject* kw)
2103 {
2104 SRE_STATE state;
2105 PyObject* list;
2106 int status;
2107 Py_ssize_t i, b, e;
2108
2109 PyObject *string = NULL, *string2 = NULL;
2110 Py_ssize_t start = 0;
2111 Py_ssize_t end = PY_SSIZE_T_MAX;
2112 static char* kwlist[] = { "string", "pos", "endpos", "source", NULL };
2113 if (!check_args_size("findall", args, kw, 3))
2114 return NULL;
2115
2116 if (!PyArg_ParseTupleAndKeywords(args, kw, "|OnnO:findall", kwlist,
2117 &string, &start, &end, &string2))
2118 return NULL;
2119
2120 string = fix_string_param(string, string2, "source");
2121 if (!string)
2122 return NULL;
2123
2124 string = state_init(&state, self, string, start, end);
2125 if (!string)
2126 return NULL;
2127
2128 list = PyList_New(0);
2129 if (!list) {
2130 state_fini(&state);
2131 return NULL;
2132 }
2133
2134 while (state.start <= state.end) {
2135
2136 PyObject* item;
2137
2138 state_reset(&state);
2139
2140 state.ptr = state.start;
2141
2142 if (state.charsize == 1) {
2143 status = sre_search(&state, PatternObject_GetCode(self));
2144 } else {
2145 #if defined(HAVE_UNICODE)
2146 status = sre_usearch(&state, PatternObject_GetCode(self));
2147 #endif
2148 }
2149
2150 if (PyErr_Occurred())
2151 goto error;
2152
2153 if (status <= 0) {
2154 if (status == 0)
2155 break;
2156 pattern_error(status);
2157 goto error;
2158 }
2159
2160 /* don't bother to build a match object */
2161 switch (self->groups) {
2162 case 0:
2163 b = STATE_OFFSET(&state, state.start);
2164 e = STATE_OFFSET(&state, state.ptr);
2165 item = PySequence_GetSlice(string, b, e);
2166 if (!item)
2167 goto error;
2168 break;
2169 case 1:
2170 item = state_getslice(&state, 1, string, 1);
2171 if (!item)
2172 goto error;
2173 break;
2174 default:
2175 item = PyTuple_New(self->groups);
2176 if (!item)
2177 goto error;
2178 for (i = 0; i < self->groups; i++) {
2179 PyObject* o = state_getslice(&state, i+1, string, 1);
2180 if (!o) {
2181 Py_DECREF(item);
2182 goto error;
2183 }
2184 PyTuple_SET_ITEM(item, i, o);
2185 }
2186 break;
2187 }
2188
2189 status = PyList_Append(list, item);
2190 Py_DECREF(item);
2191 if (status < 0)
2192 goto error;
2193
2194 if (state.ptr == state.start)
2195 state.start = (void*) ((char*) state.ptr + state.charsize);
2196 else
2197 state.start = state.ptr;
2198
2199 }
2200
2201 state_fini(&state);
2202 return list;
2203
2204 error:
2205 Py_DECREF(list);
2206 state_fini(&state);
2207 return NULL;
2208
2209 }
2210
2211 #if PY_VERSION_HEX >= 0x02020000
2212 static PyObject*
2213 pattern_finditer(PatternObject* pattern, PyObject* args)
2214 {
2215 PyObject* scanner;
2216 PyObject* search;
2217 PyObject* iterator;
2218
2219 scanner = pattern_scanner(pattern, args);
2220 if (!scanner)
2221 return NULL;
2222
2223 search = PyObject_GetAttrString(scanner, "search");
2224 Py_DECREF(scanner);
2225 if (!search)
2226 return NULL;
2227
2228 iterator = PyCallIter_New(search, Py_None);
2229 Py_DECREF(search);
2230
2231 return iterator;
2232 }
2233 #endif
2234
2235 static PyObject*
2236 pattern_split(PatternObject* self, PyObject* args, PyObject* kw)
2237 {
2238 SRE_STATE state;
2239 PyObject* list;
2240 PyObject* item;
2241 int status;
2242 Py_ssize_t n;
2243 Py_ssize_t i;
2244 void* last;
2245
2246 PyObject *string = NULL, *string2 = NULL;
2247 Py_ssize_t maxsplit = 0;
2248 static char* kwlist[] = { "string", "maxsplit", "source", NULL };
2249 if (!check_args_size("split", args, kw, 2))
2250 return NULL;
2251
2252 if (!PyArg_ParseTupleAndKeywords(args, kw, "|OnO:split", kwlist,
2253 &string, &maxsplit, &string2))
2254 return NULL;
2255
2256 string = fix_string_param(string, string2, "source");
2257 if (!string)
2258 return NULL;
2259
2260 string = state_init(&state, self, string, 0, PY_SSIZE_T_MAX);
2261 if (!string)
2262 return NULL;
2263
2264 list = PyList_New(0);
2265 if (!list) {
2266 state_fini(&state);
2267 return NULL;
2268 }
2269
2270 n = 0;
2271 last = state.start;
2272
2273 while (!maxsplit || n < maxsplit) {
2274
2275 state_reset(&state);
2276
2277 state.ptr = state.start;
2278
2279 if (state.charsize == 1) {
2280 status = sre_search(&state, PatternObject_GetCode(self));
2281 } else {
2282 #if defined(HAVE_UNICODE)
2283 status = sre_usearch(&state, PatternObject_GetCode(self));
2284 #endif
2285 }
2286
2287 if (PyErr_Occurred())
2288 goto error;
2289
2290 if (status <= 0) {
2291 if (status == 0)
2292 break;
2293 pattern_error(status);
2294 goto error;
2295 }
2296
2297 if (state.start == state.ptr) {
2298 if (last == state.end)
2299 break;
2300 /* skip one character */
2301 state.start = (void*) ((char*) state.ptr + state.charsize);
2302 continue;
2303 }
2304
2305 /* get segment before this match */
2306 item = PySequence_GetSlice(
2307 string, STATE_OFFSET(&state, last),
2308 STATE_OFFSET(&state, state.start)
2309 );
2310 if (!item)
2311 goto error;
2312 status = PyList_Append(list, item);
2313 Py_DECREF(item);
2314 if (status < 0)
2315 goto error;
2316
2317 /* add groups (if any) */
2318 for (i = 0; i < self->groups; i++) {
2319 item = state_getslice(&state, i+1, string, 0);
2320 if (!item)
2321 goto error;
2322 status = PyList_Append(list, item);
2323 Py_DECREF(item);
2324 if (status < 0)
2325 goto error;
2326 }
2327
2328 n = n + 1;
2329
2330 last = state.start = state.ptr;
2331
2332 }
2333
2334 /* get segment following last match (even if empty) */
2335 item = PySequence_GetSlice(
2336 string, STATE_OFFSET(&state, last), state.endpos
2337 );
2338 if (!item)
2339 goto error;
2340 status = PyList_Append(list, item);
2341 Py_DECREF(item);
2342 if (status < 0)
2343 goto error;
2344
2345 state_fini(&state);
2346 return list;
2347
2348 error:
2349 Py_DECREF(list);
2350 state_fini(&state);
2351 return NULL;
2352
2353 }
2354
2355 static PyObject*
2356 pattern_subx(PatternObject* self, PyObject* ptemplate, PyObject* string,
2357 Py_ssize_t count, Py_ssize_t subn)
2358 {
2359 SRE_STATE state;
2360 PyObject* list;
2361 PyObject* item;
2362 PyObject* filter;
2363 PyObject* args;
2364 PyObject* match;
2365 void* ptr;
2366 int status;
2367 Py_ssize_t n;
2368 Py_ssize_t i, b, e;
2369 int bint;
2370 int filter_is_callable;
2371
2372 if (PyCallable_Check(ptemplate)) {
2373 /* sub/subn takes either a function or a template */
2374 filter = ptemplate;
2375 Py_INCREF(filter);
2376 filter_is_callable = 1;
2377 } else {
2378 /* if not callable, check if it's a literal string */
2379 int literal;
2380 ptr = getstring(ptemplate, &n, &bint);
2381 b = bint;
2382 if (ptr) {
2383 if (b == 1) {
2384 literal = sre_literal_template((unsigned char *)ptr, n);
2385 } else {
2386 #if defined(HAVE_UNICODE)
2387 literal = sre_uliteral_template((Py_UNICODE *)ptr, n);
2388 #endif
2389 }
2390 } else {
2391 PyErr_Clear();
2392 literal = 0;
2393 }
2394 if (literal) {
2395 filter = ptemplate;
2396 Py_INCREF(filter);
2397 filter_is_callable = 0;
2398 } else {
2399 /* not a literal; hand it over to the template compiler */
2400 filter = call(
2401 SRE_PY_MODULE, "_subx",
2402 PyTuple_Pack(2, self, ptemplate)
2403 );
2404 if (!filter)
2405 return NULL;
2406 filter_is_callable = PyCallable_Check(filter);
2407 }
2408 }
2409
2410 string = state_init(&state, self, string, 0, PY_SSIZE_T_MAX);
2411 if (!string) {
2412 Py_DECREF(filter);
2413 return NULL;
2414 }
2415
2416 list = PyList_New(0);
2417 if (!list) {
2418 Py_DECREF(filter);
2419 state_fini(&state);
2420 return NULL;
2421 }
2422
2423 n = i = 0;
2424
2425 while (!count || n < count) {
2426
2427 state_reset(&state);
2428
2429 state.ptr = state.start;
2430
2431 if (state.charsize == 1) {
2432 status = sre_search(&state, PatternObject_GetCode(self));
2433 } else {
2434 #if defined(HAVE_UNICODE)
2435 status = sre_usearch(&state, PatternObject_GetCode(self));
2436 #endif
2437 }
2438
2439 if (PyErr_Occurred())
2440 goto error;
2441
2442 if (status <= 0) {
2443 if (status == 0)
2444 break;
2445 pattern_error(status);
2446 goto error;
2447 }
2448
2449 b = STATE_OFFSET(&state, state.start);
2450 e = STATE_OFFSET(&state, state.ptr);
2451
2452 if (i < b) {
2453 /* get segment before this match */
2454 item = PySequence_GetSlice(string, i, b);
2455 if (!item)
2456 goto error;
2457 status = PyList_Append(list, item);
2458 Py_DECREF(item);
2459 if (status < 0)
2460 goto error;
2461
2462 } else if (i == b && i == e && n > 0)
2463 /* ignore empty match on latest position */
2464 goto next;
2465
2466 if (filter_is_callable) {
2467 /* pass match object through filter */
2468 match = pattern_new_match(self, &state, 1);
2469 if (!match)
2470 goto error;
2471 args = PyTuple_Pack(1, match);
2472 if (!args) {
2473 Py_DECREF(match);
2474 goto error;
2475 }
2476 item = PyObject_CallObject(filter, args);
2477 Py_DECREF(args);
2478 Py_DECREF(match);
2479 if (!item)
2480 goto error;
2481 } else {
2482 /* filter is literal string */
2483 item = filter;
2484 Py_INCREF(item);
2485 }
2486
2487 /* add to list */
2488 if (item != Py_None) {
2489 status = PyList_Append(list, item);
2490 Py_DECREF(item);
2491 if (status < 0)
2492 goto error;
2493 }
2494
2495 i = e;
2496 n = n + 1;
2497
2498 next:
2499 /* move on */
2500 if (state.ptr == state.start)
2501 state.start = (void*) ((char*) state.ptr + state.charsize);
2502 else
2503 state.start = state.ptr;
2504
2505 }
2506
2507 /* get segment following last match */
2508 if (i < state.endpos) {
2509 item = PySequence_GetSlice(string, i, state.endpos);
2510 if (!item)
2511 goto error;
2512 status = PyList_Append(list, item);
2513 Py_DECREF(item);
2514 if (status < 0)
2515 goto error;
2516 }
2517
2518 state_fini(&state);
2519
2520 Py_DECREF(filter);
2521
2522 /* convert list to single string (also removes list) */
2523 item = join_list(list, string);
2524
2525 if (!item)
2526 return NULL;
2527
2528 if (subn)
2529 return Py_BuildValue("Nn", item, n);
2530
2531 return item;
2532
2533 error:
2534 Py_DECREF(list);
2535 state_fini(&state);
2536 Py_DECREF(filter);
2537 return NULL;
2538
2539 }
2540
2541 static PyObject*
2542 pattern_sub(PatternObject* self, PyObject* args, PyObject* kw)
2543 {
2544 PyObject* ptemplate;
2545 PyObject* string;
2546 Py_ssize_t count = 0;
2547 static char* kwlist[] = { "repl", "string", "count", NULL };
2548 if (!PyArg_ParseTupleAndKeywords(args, kw, "OO|n:sub", kwlist,
2549 &ptemplate, &string, &count))
2550 return NULL;
2551
2552 return pattern_subx(self, ptemplate, string, count, 0);
2553 }
2554
2555 static PyObject*
2556 pattern_subn(PatternObject* self, PyObject* args, PyObject* kw)
2557 {
2558 PyObject* ptemplate;
2559 PyObject* string;
2560 Py_ssize_t count = 0;
2561 static char* kwlist[] = { "repl", "string", "count", NULL };
2562 if (!PyArg_ParseTupleAndKeywords(args, kw, "OO|n:subn", kwlist,
2563 &ptemplate, &string, &count))
2564 return NULL;
2565
2566 return pattern_subx(self, ptemplate, string, count, 1);
2567 }
2568
2569 static PyObject*
2570 pattern_copy(PatternObject* self, PyObject *unused)
2571 {
2572 #ifdef USE_BUILTIN_COPY
2573 PatternObject* copy;
2574 int offset;
2575
2576 copy = PyObject_NEW_VAR(PatternObject, &Pattern_Type, self->codesize);
2577 if (!copy)
2578 return NULL;
2579
2580 offset = offsetof(PatternObject, groups);
2581
2582 Py_XINCREF(self->groupindex);
2583 Py_XINCREF(self->indexgroup);
2584 Py_XINCREF(self->pattern);
2585
2586 memcpy((char*) copy + offset, (char*) self + offset,
2587 sizeof(PatternObject) + self->codesize * sizeof(SRE_CODE) - offset);
2588 copy->weakreflist = NULL;
2589
2590 return (PyObject*) copy;
2591 #else
2592 PyErr_SetString(PyExc_TypeError, "cannot copy this pattern object");
2593 return NULL;
2594 #endif
2595 }
2596
2597 static PyObject*
2598 pattern_deepcopy(PatternObject* self, PyObject* memo)
2599 {
2600 #ifdef USE_BUILTIN_COPY
2601 PatternObject* copy;
2602
2603 copy = (PatternObject*) pattern_copy(self);
2604 if (!copy)
2605 return NULL;
2606
2607 if (!deepcopy(&copy->groupindex, memo) ||
2608 !deepcopy(&copy->indexgroup, memo) ||
2609 !deepcopy(&copy->pattern, memo)) {
2610 Py_DECREF(copy);
2611 return NULL;
2612 }
2613
2614 #else
2615 PyErr_SetString(PyExc_TypeError, "cannot deepcopy this pattern object");
2616 return NULL;
2617 #endif
2618 }
2619
2620 PyDoc_STRVAR(pattern_match_doc,
2621 "match(string[, pos[, endpos]]) --> match object or None.\n\
2622 Matches zero or more characters at the beginning of the string");
2623
2624 PyDoc_STRVAR(pattern_search_doc,
2625 "search(string[, pos[, endpos]]) --> match object or None.\n\
2626 Scan through string looking for a match, and return a corresponding\n\
2627 match object instance. Return None if no position in the string matches.");
2628
2629 PyDoc_STRVAR(pattern_split_doc,
2630 "split(string[, maxsplit = 0]) --> list.\n\
2631 Split string by the occurrences of pattern.");
2632
2633 PyDoc_STRVAR(pattern_findall_doc,
2634 "findall(string[, pos[, endpos]]) --> list.\n\
2635 Return a list of all non-overlapping matches of pattern in string.");
2636
2637 PyDoc_STRVAR(pattern_finditer_doc,
2638 "finditer(string[, pos[, endpos]]) --> iterator.\n\
2639 Return an iterator over all non-overlapping matches for the \n\
2640 RE pattern in string. For each match, the iterator returns a\n\
2641 match object.");
2642
2643 PyDoc_STRVAR(pattern_sub_doc,
2644 "sub(repl, string[, count = 0]) --> newstring\n\
2645 Return the string obtained by replacing the leftmost non-overlapping\n\
2646 occurrences of pattern in string by the replacement repl.");
2647
2648 PyDoc_STRVAR(pattern_subn_doc,
2649 "subn(repl, string[, count = 0]) --> (newstring, number of subs)\n\
2650 Return the tuple (new_string, number_of_subs_made) found by replacing\n\
2651 the leftmost non-overlapping occurrences of pattern with the\n\
2652 replacement repl.");
2653
2654 PyDoc_STRVAR(pattern_doc, "Compiled regular expression objects");
2655
2656 static PyMethodDef pattern_methods[] = {
2657 {"match", (PyCFunction) pattern_match, METH_VARARGS|METH_KEYWORDS,
2658 pattern_match_doc},
2659 {"search", (PyCFunction) pattern_search, METH_VARARGS|METH_KEYWORDS,
2660 pattern_search_doc},
2661 {"sub", (PyCFunction) pattern_sub, METH_VARARGS|METH_KEYWORDS,
2662 pattern_sub_doc},
2663 {"subn", (PyCFunction) pattern_subn, METH_VARARGS|METH_KEYWORDS,
2664 pattern_subn_doc},
2665 {"split", (PyCFunction) pattern_split, METH_VARARGS|METH_KEYWORDS,
2666 pattern_split_doc},
2667 {"findall", (PyCFunction) pattern_findall, METH_VARARGS|METH_KEYWORDS,
2668 pattern_findall_doc},
2669 #if PY_VERSION_HEX >= 0x02020000
2670 {"finditer", (PyCFunction) pattern_finditer, METH_VARARGS,
2671 pattern_finditer_doc},
2672 #endif
2673 {"scanner", (PyCFunction) pattern_scanner, METH_VARARGS},
2674 {"__copy__", (PyCFunction) pattern_copy, METH_NOARGS},
2675 {"__deepcopy__", (PyCFunction) pattern_deepcopy, METH_O},
2676 {NULL, NULL}
2677 };
2678
2679 #define PAT_OFF(x) offsetof(PatternObject, x)
2680 static PyMemberDef pattern_members[] = {
2681 {"pattern", T_OBJECT, PAT_OFF(pattern), READONLY},
2682 {"flags", T_INT, PAT_OFF(flags), READONLY},
2683 {"groups", T_PYSSIZET, PAT_OFF(groups), READONLY},
2684 {"groupindex", T_OBJECT, PAT_OFF(groupindex), READONLY},
2685 {NULL} /* Sentinel */
2686 };
2687
2688 statichere PyTypeObject Pattern_Type = {
2689 PyObject_HEAD_INIT(NULL)
2690 0, "_" SRE_MODULE ".SRE_Pattern",
2691 sizeof(PatternObject), sizeof(SRE_CODE),
2692 (destructor)pattern_dealloc, /*tp_dealloc*/
2693 0, /* tp_print */
2694 0, /* tp_getattrn */
2695 0, /* tp_setattr */
2696 0, /* tp_compare */
2697 0, /* tp_repr */
2698 0, /* tp_as_number */
2699 0, /* tp_as_sequence */
2700 0, /* tp_as_mapping */
2701 0, /* tp_hash */
2702 0, /* tp_call */
2703 0, /* tp_str */
2704 0, /* tp_getattro */
2705 0, /* tp_setattro */
2706 0, /* tp_as_buffer */
2707 Py_TPFLAGS_DEFAULT, /* tp_flags */
2708 pattern_doc, /* tp_doc */
2709 0, /* tp_traverse */
2710 0, /* tp_clear */
2711 0, /* tp_richcompare */
2712 offsetof(PatternObject, weakreflist), /* tp_weaklistoffset */
2713 0, /* tp_iter */
2714 0, /* tp_iternext */
2715 pattern_methods, /* tp_methods */
2716 pattern_members, /* tp_members */
2717 };
2718
2719 static int _validate(PatternObject *self); /* Forward */
2720
2721 static PyObject *
2722 _compile(PyObject* self_, PyObject* args)
2723 {
2724 /* "compile" pattern descriptor to pattern object */
2725
2726 PatternObject* self;
2727 Py_ssize_t i, n;
2728
2729 PyObject* pattern;
2730 int flags = 0;
2731 PyObject* code;
2732 Py_ssize_t groups = 0;
2733 PyObject* groupindex = NULL;
2734 PyObject* indexgroup = NULL;
2735 if (!PyArg_ParseTuple(args, "OiO!|nOO", &pattern, &flags,
2736 &PyList_Type, &code, &groups,
2737 &groupindex, &indexgroup))
2738 return NULL;
2739
2740 n = PyList_GET_SIZE(code);
2741 /* coverity[ampersand_in_size] */
2742 self = PyObject_NEW_VAR(PatternObject, &Pattern_Type, n);
2743 if (!self)
2744 return NULL;
2745 self->weakreflist = NULL;
2746 self->pattern = NULL;
2747 self->groupindex = NULL;
2748 self->indexgroup = NULL;
2749
2750 self->codesize = n;
2751
2752 for (i = 0; i < n; i++) {
2753 PyObject *o = PyList_GET_ITEM(code, i);
2754 unsigned long value = PyInt_Check(o) ? (unsigned long)PyInt_AsLong(o)
2755 : PyLong_AsUnsignedLong(o);
2756 if (value == (unsigned long)-1 && PyErr_Occurred()) {
2757 if (PyErr_ExceptionMatches(PyExc_OverflowError)) {
2758 PyErr_SetString(PyExc_OverflowError,
2759 "regular expression code size limit exceeded");
2760 }
2761 break;
2762 }
2763 self->code[i] = (SRE_CODE) value;
2764 if ((unsigned long) self->code[i] != value) {
2765 PyErr_SetString(PyExc_OverflowError,
2766 "regular expression code size limit exceeded");
2767 break;
2768 }
2769 }
2770
2771 if (PyErr_Occurred()) {
2772 Py_DECREF(self);
2773 return NULL;
2774 }
2775
2776 Py_INCREF(pattern);
2777 self->pattern = pattern;
2778
2779 self->flags = flags;
2780
2781 self->groups = groups;
2782
2783 Py_XINCREF(groupindex);
2784 self->groupindex = groupindex;
2785
2786 Py_XINCREF(indexgroup);
2787 self->indexgroup = indexgroup;
2788
2789 self->weakreflist = NULL;
2790
2791 if (!_validate(self)) {
2792 Py_DECREF(self);
2793 return NULL;
2794 }
2795
2796 return (PyObject*) self;
2797 }
2798
2799 /* -------------------------------------------------------------------- */
2800 /* Code validation */
2801
2802 /* To learn more about this code, have a look at the _compile() function in
2803 Lib/sre_compile.py. The validation functions below checks the code array
2804 for conformance with the code patterns generated there.
2805
2806 The nice thing about the generated code is that it is position-independent:
2807 all jumps are relative jumps forward. Also, jumps don't cross each other:
2808 the target of a later jump is always earlier than the target of an earlier
2809 jump. IOW, this is okay:
2810
2811 J---------J-------T--------T
2812 \ \_____/ /
2813 \______________________/
2814
2815 but this is not:
2816
2817 J---------J-------T--------T
2818 \_________\_____/ /
2819 \____________/
2820
2821 It also helps that SRE_CODE is always an unsigned type.
2822 */
2823
2824 /* Defining this one enables tracing of the validator */
2825 #undef VVERBOSE
2826
2827 /* Trace macro for the validator */
2828 #if defined(VVERBOSE)
2829 #define VTRACE(v) printf v
2830 #else
2831 #define VTRACE(v) do {} while(0) /* do nothing */
2832 #endif
2833
2834 /* Report failure */
2835 #define FAIL do { VTRACE(("FAIL: %d\n", __LINE__)); return 0; } while (0)
2836
2837 /* Extract opcode, argument, or skip count from code array */
2838 #define GET_OP \
2839 do { \
2840 VTRACE(("%p: ", code)); \
2841 if (code >= end) FAIL; \
2842 op = *code++; \
2843 VTRACE(("%lu (op)\n", (unsigned long)op)); \
2844 } while (0)
2845 #define GET_ARG \
2846 do { \
2847 VTRACE(("%p= ", code)); \
2848 if (code >= end) FAIL; \
2849 arg = *code++; \
2850 VTRACE(("%lu (arg)\n", (unsigned long)arg)); \
2851 } while (0)
2852 #define GET_SKIP_ADJ(adj) \
2853 do { \
2854 VTRACE(("%p= ", code)); \
2855 if (code >= end) FAIL; \
2856 skip = *code; \
2857 VTRACE(("%lu (skip to %p)\n", \
2858 (unsigned long)skip, code+skip)); \
2859 if (skip-adj > end-code) \
2860 FAIL; \
2861 code++; \
2862 } while (0)
2863 #define GET_SKIP GET_SKIP_ADJ(0)
2864
2865 static int
2866 _validate_charset(SRE_CODE *code, SRE_CODE *end)
2867 {
2868 /* Some variables are manipulated by the macros above */
2869 SRE_CODE op;
2870 SRE_CODE arg;
2871 SRE_CODE offset;
2872 int i;
2873
2874 while (code < end) {
2875 GET_OP;
2876 switch (op) {
2877
2878 case SRE_OP_NEGATE:
2879 break;
2880
2881 case SRE_OP_LITERAL:
2882 GET_ARG;
2883 break;
2884
2885 case SRE_OP_RANGE:
2886 GET_ARG;
2887 GET_ARG;
2888 break;
2889
2890 case SRE_OP_CHARSET:
2891 offset = 32/sizeof(SRE_CODE); /* 32-byte bitmap */
2892 if (offset > end-code)
2893 FAIL;
2894 code += offset;
2895 break;
2896
2897 case SRE_OP_BIGCHARSET:
2898 GET_ARG; /* Number of blocks */
2899 offset = 256/sizeof(SRE_CODE); /* 256-byte table */
2900 if (offset > end-code)
2901 FAIL;
2902 /* Make sure that each byte points to a valid block */
2903 for (i = 0; i < 256; i++) {
2904 if (((unsigned char *)code)[i] >= arg)
2905 FAIL;
2906 }
2907 code += offset;
2908 offset = arg * 32/sizeof(SRE_CODE); /* 32-byte bitmap times arg */
2909 if (offset > end-code)
2910 FAIL;
2911 code += offset;
2912 break;
2913
2914 case SRE_OP_CATEGORY:
2915 GET_ARG;
2916 switch (arg) {
2917 case SRE_CATEGORY_DIGIT:
2918 case SRE_CATEGORY_NOT_DIGIT:
2919 case SRE_CATEGORY_SPACE:
2920 case SRE_CATEGORY_NOT_SPACE:
2921 case SRE_CATEGORY_WORD:
2922 case SRE_CATEGORY_NOT_WORD:
2923 case SRE_CATEGORY_LINEBREAK:
2924 case SRE_CATEGORY_NOT_LINEBREAK:
2925 case SRE_CATEGORY_LOC_WORD:
2926 case SRE_CATEGORY_LOC_NOT_WORD:
2927 case SRE_CATEGORY_UNI_DIGIT:
2928 case SRE_CATEGORY_UNI_NOT_DIGIT:
2929 case SRE_CATEGORY_UNI_SPACE:
2930 case SRE_CATEGORY_UNI_NOT_SPACE:
2931 case SRE_CATEGORY_UNI_WORD:
2932 case SRE_CATEGORY_UNI_NOT_WORD:
2933 case SRE_CATEGORY_UNI_LINEBREAK:
2934 case SRE_CATEGORY_UNI_NOT_LINEBREAK:
2935 break;
2936 default:
2937 FAIL;
2938 }
2939 break;
2940
2941 default:
2942 FAIL;
2943
2944 }
2945 }
2946
2947 return 1;
2948 }
2949
2950 static int
2951 _validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
2952 {
2953 /* Some variables are manipulated by the macros above */
2954 SRE_CODE op;
2955 SRE_CODE arg;
2956 SRE_CODE skip;
2957
2958 VTRACE(("code=%p, end=%p\n", code, end));
2959
2960 if (code > end)
2961 FAIL;
2962
2963 while (code < end) {
2964 GET_OP;
2965 switch (op) {
2966
2967 case SRE_OP_MARK:
2968 /* We don't check whether marks are properly nested; the
2969 sre_match() code is robust even if they don't, and the worst
2970 you can get is nonsensical match results. */
2971 GET_ARG;
2972 if (arg > 2*groups+1) {
2973 VTRACE(("arg=%d, groups=%d\n", (int)arg, (int)groups));
2974 FAIL;
2975 }
2976 break;
2977
2978 case SRE_OP_LITERAL:
2979 case SRE_OP_NOT_LITERAL:
2980 case SRE_OP_LITERAL_IGNORE:
2981 case SRE_OP_NOT_LITERAL_IGNORE:
2982 GET_ARG;
2983 /* The arg is just a character, nothing to check */
2984 break;
2985
2986 case SRE_OP_SUCCESS:
2987 case SRE_OP_FAILURE:
2988 /* Nothing to check; these normally end the matching process */
2989 break;
2990
2991 case SRE_OP_AT:
2992 GET_ARG;
2993 switch (arg) {
2994 case SRE_AT_BEGINNING:
2995 case SRE_AT_BEGINNING_STRING:
2996 case SRE_AT_BEGINNING_LINE:
2997 case SRE_AT_END:
2998 case SRE_AT_END_LINE:
2999 case SRE_AT_END_STRING:
3000 case SRE_AT_BOUNDARY:
3001 case SRE_AT_NON_BOUNDARY:
3002 case SRE_AT_LOC_BOUNDARY:
3003 case SRE_AT_LOC_NON_BOUNDARY:
3004 case SRE_AT_UNI_BOUNDARY:
3005 case SRE_AT_UNI_NON_BOUNDARY:
3006 break;
3007 default:
3008 FAIL;
3009 }
3010 break;
3011
3012 case SRE_OP_ANY:
3013 case SRE_OP_ANY_ALL:
3014 /* These have no operands */
3015 break;
3016
3017 case SRE_OP_IN:
3018 case SRE_OP_IN_IGNORE:
3019 GET_SKIP;
3020 /* Stop 1 before the end; we check the FAILURE below */
3021 if (!_validate_charset(code, code+skip-2))
3022 FAIL;
3023 if (code[skip-2] != SRE_OP_FAILURE)
3024 FAIL;
3025 code += skip-1;
3026 break;
3027
3028 case SRE_OP_INFO:
3029 {
3030 /* A minimal info field is
3031 <INFO> <1=skip> <2=flags> <3=min> <4=max>;
3032 If SRE_INFO_PREFIX or SRE_INFO_CHARSET is in the flags,
3033 more follows. */
3034 SRE_CODE flags, i;
3035 SRE_CODE *newcode;
3036 GET_SKIP;
3037 newcode = code+skip-1;
3038 GET_ARG; flags = arg;
3039 GET_ARG; /* min */
3040 GET_ARG; /* max */
3041 /* Check that only valid flags are present */
3042 if ((flags & ~(SRE_INFO_PREFIX |
3043 SRE_INFO_LITERAL |
3044 SRE_INFO_CHARSET)) != 0)
3045 FAIL;
3046 /* PREFIX and CHARSET are mutually exclusive */
3047 if ((flags & SRE_INFO_PREFIX) &&
3048 (flags & SRE_INFO_CHARSET))
3049 FAIL;
3050 /* LITERAL implies PREFIX */
3051 if ((flags & SRE_INFO_LITERAL) &&
3052 !(flags & SRE_INFO_PREFIX))
3053 FAIL;
3054 /* Validate the prefix */
3055 if (flags & SRE_INFO_PREFIX) {
3056 SRE_CODE prefix_len;
3057 GET_ARG; prefix_len = arg;
3058 GET_ARG; /* prefix skip */
3059 /* Here comes the prefix string */
3060 if (prefix_len > newcode-code)
3061 FAIL;
3062 code += prefix_len;
3063 /* And here comes the overlap table */
3064 if (prefix_len > newcode-code)
3065 FAIL;
3066 /* Each overlap value should be < prefix_len */
3067 for (i = 0; i < prefix_len; i++) {
3068 if (code[i] >= prefix_len)
3069 FAIL;
3070 }
3071 code += prefix_len;
3072 }
3073 /* Validate the charset */
3074 if (flags & SRE_INFO_CHARSET) {
3075 if (!_validate_charset(code, newcode-1))
3076 FAIL;
3077 if (newcode[-1] != SRE_OP_FAILURE)
3078 FAIL;
3079 code = newcode;
3080 }
3081 else if (code != newcode) {
3082 VTRACE(("code=%p, newcode=%p\n", code, newcode));
3083 FAIL;
3084 }
3085 }
3086 break;
3087
3088 case SRE_OP_BRANCH:
3089 {
3090 SRE_CODE *target = NULL;
3091 for (;;) {
3092 GET_SKIP;
3093 if (skip == 0)
3094 break;
3095 /* Stop 2 before the end; we check the JUMP below */
3096 if (!_validate_inner(code, code+skip-3, groups))
3097 FAIL;
3098 code += skip-3;
3099 /* Check that it ends with a JUMP, and that each JUMP
3100 has the same target */
3101 GET_OP;
3102 if (op != SRE_OP_JUMP)
3103 FAIL;
3104 GET_SKIP;
3105 if (target == NULL)
3106 target = code+skip-1;
3107 else if (code+skip-1 != target)
3108 FAIL;
3109 }
3110 }
3111 break;
3112
3113 case SRE_OP_REPEAT_ONE:
3114 case SRE_OP_MIN_REPEAT_ONE:
3115 {
3116 SRE_CODE min, max;
3117 GET_SKIP;
3118 GET_ARG; min = arg;
3119 GET_ARG; max = arg;
3120 if (min > max)
3121 FAIL;
3122 if (max > SRE_MAXREPEAT)
3123 FAIL;
3124 if (!_validate_inner(code, code+skip-4, groups))
3125 FAIL;
3126 code += skip-4;
3127 GET_OP;
3128 if (op != SRE_OP_SUCCESS)
3129 FAIL;
3130 }
3131 break;
3132
3133 case SRE_OP_REPEAT:
3134 {
3135 SRE_CODE min, max;
3136 GET_SKIP;
3137 GET_ARG; min = arg;
3138 GET_ARG; max = arg;
3139 if (min > max)
3140 FAIL;
3141 if (max > SRE_MAXREPEAT)
3142 FAIL;
3143 if (!_validate_inner(code, code+skip-3, groups))
3144 FAIL;
3145 code += skip-3;
3146 GET_OP;
3147 if (op != SRE_OP_MAX_UNTIL && op != SRE_OP_MIN_UNTIL)
3148 FAIL;
3149 }
3150 break;
3151
3152 case SRE_OP_GROUPREF:
3153 case SRE_OP_GROUPREF_IGNORE:
3154 GET_ARG;
3155 if (arg >= groups)
3156 FAIL;
3157 break;
3158
3159 case SRE_OP_GROUPREF_EXISTS:
3160 /* The regex syntax for this is: '(?(group)then|else)', where
3161 'group' is either an integer group number or a group name,
3162 'then' and 'else' are sub-regexes, and 'else' is optional. */
3163 GET_ARG;
3164 if (arg >= groups)
3165 FAIL;
3166 GET_SKIP_ADJ(1);
3167 code--; /* The skip is relative to the first arg! */
3168 /* There are two possibilities here: if there is both a 'then'
3169 part and an 'else' part, the generated code looks like:
3170
3171 GROUPREF_EXISTS
3172 <group>
3173 <skipyes>
3174 ...then part...
3175 JUMP
3176 <skipno>
3177 (<skipyes> jumps here)
3178 ...else part...
3179 (<skipno> jumps here)
3180
3181 If there is only a 'then' part, it looks like:
3182
3183 GROUPREF_EXISTS
3184 <group>
3185 <skip>
3186 ...then part...
3187 (<skip> jumps here)
3188
3189 There is no direct way to decide which it is, and we don't want
3190 to allow arbitrary jumps anywhere in the code; so we just look
3191 for a JUMP opcode preceding our skip target.
3192 */
3193 if (skip >= 3 && skip-3 < end-code &&
3194 code[skip-3] == SRE_OP_JUMP)
3195 {
3196 VTRACE(("both then and else parts present\n"));
3197 if (!_validate_inner(code+1, code+skip-3, groups))
3198 FAIL;
3199 code += skip-2; /* Position after JUMP, at <skipno> */
3200 GET_SKIP;
3201 if (!_validate_inner(code, code+skip-1, groups))
3202 FAIL;
3203 code += skip-1;
3204 }
3205 else {
3206 VTRACE(("only a then part present\n"));
3207 if (!_validate_inner(code+1, code+skip-1, groups))
3208 FAIL;
3209 code += skip-1;
3210 }
3211 break;
3212
3213 case SRE_OP_ASSERT:
3214 case SRE_OP_ASSERT_NOT:
3215 GET_SKIP;
3216 GET_ARG; /* 0 for lookahead, width for lookbehind */
3217 code--; /* Back up over arg to simplify math below */
3218 if (arg & 0x80000000)
3219 FAIL; /* Width too large */
3220 /* Stop 1 before the end; we check the SUCCESS below */
3221 if (!_validate_inner(code+1, code+skip-2, groups))
3222 FAIL;
3223 code += skip-2;
3224 GET_OP;
3225 if (op != SRE_OP_SUCCESS)
3226 FAIL;
3227 break;
3228
3229 default:
3230 FAIL;
3231
3232 }
3233 }
3234
3235 VTRACE(("okay\n"));
3236 return 1;
3237 }
3238
3239 static int
3240 _validate_outer(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
3241 {
3242 if (groups < 0 || groups > 100 || code >= end || end[-1] != SRE_OP_SUCCESS)
3243 FAIL;
3244 if (groups == 0) /* fix for simplejson */
3245 groups = 100; /* 100 groups should always be safe */
3246 return _validate_inner(code, end-1, groups);
3247 }
3248
3249 static int
3250 _validate(PatternObject *self)
3251 {
3252 if (!_validate_outer(self->code, self->code+self->codesize, self->groups))
3253 {
3254 PyErr_SetString(PyExc_RuntimeError, "invalid SRE code");
3255 return 0;
3256 }
3257 else
3258 VTRACE(("Success!\n"));
3259 return 1;
3260 }
3261
3262 /* -------------------------------------------------------------------- */
3263 /* match methods */
3264
3265 static void
3266 match_dealloc(MatchObject* self)
3267 {
3268 Py_XDECREF(self->regs);
3269 Py_XDECREF(self->string);
3270 Py_DECREF(self->pattern);
3271 PyObject_DEL(self);
3272 }
3273
3274 static PyObject*
3275 match_getslice_by_index(MatchObject* self, Py_ssize_t index, PyObject* def)
3276 {
3277 if (index < 0 || index >= self->groups) {
3278 /* raise IndexError if we were given a bad group number */
3279 PyErr_SetString(
3280 PyExc_IndexError,
3281 "no such group"
3282 );
3283 return NULL;
3284 }
3285
3286 index *= 2;
3287
3288 if (self->string == Py_None || self->mark[index] < 0) {
3289 /* return default value if the string or group is undefined */
3290 Py_INCREF(def);
3291 return def;
3292 }
3293
3294 return PySequence_GetSlice(
3295 self->string, self->mark[index], self->mark[index+1]
3296 );
3297 }
3298
3299 static Py_ssize_t
3300 match_getindex(MatchObject* self, PyObject* index)
3301 {
3302 Py_ssize_t i;
3303
3304 if (PyInt_Check(index) || PyLong_Check(index))
3305 return PyInt_AsSsize_t(index);
3306
3307 i = -1;
3308
3309 if (self->pattern->groupindex) {
3310 index = PyObject_GetItem(self->pattern->groupindex, index);
3311 if (index) {
3312 if (PyInt_Check(index) || PyLong_Check(index))
3313 i = PyInt_AsSsize_t(index);
3314 Py_DECREF(index);
3315 } else
3316 PyErr_Clear();
3317 }
3318
3319 return i;
3320 }
3321
3322 static PyObject*
3323 match_getslice(MatchObject* self, PyObject* index, PyObject* def)
3324 {
3325 return match_getslice_by_index(self, match_getindex(self, index), def);
3326 }
3327
3328 static PyObject*
3329 match_expand(MatchObject* self, PyObject* ptemplate)
3330 {
3331 /* delegate to Python code */
3332 return call(
3333 SRE_PY_MODULE, "_expand",
3334 PyTuple_Pack(3, self->pattern, self, ptemplate)
3335 );
3336 }
3337
3338 static PyObject*
3339 match_group(MatchObject* self, PyObject* args)
3340 {
3341 PyObject* result;
3342 Py_ssize_t i, size;
3343
3344 size = PyTuple_GET_SIZE(args);
3345
3346 switch (size) {
3347 case 0:
3348 result = match_getslice(self, Py_False, Py_None);
3349 break;
3350 case 1:
3351 result = match_getslice(self, PyTuple_GET_ITEM(args, 0), Py_None);
3352 break;
3353 default:
3354 /* fetch multiple items */
3355 result = PyTuple_New(size);
3356 if (!result)
3357 return NULL;
3358 for (i = 0; i < size; i++) {
3359 PyObject* item = match_getslice(
3360 self, PyTuple_GET_ITEM(args, i), Py_None
3361 );
3362 if (!item) {
3363 Py_DECREF(result);
3364 return NULL;
3365 }
3366 PyTuple_SET_ITEM(result, i, item);
3367 }
3368 break;
3369 }
3370 return result;
3371 }
3372
3373 static PyObject*
3374 match_groups(MatchObject* self, PyObject* args, PyObject* kw)
3375 {
3376 PyObject* result;
3377 Py_ssize_t index;
3378
3379 PyObject* def = Py_None;
3380 static char* kwlist[] = { "default", NULL };
3381 if (!PyArg_ParseTupleAndKeywords(args, kw, "|O:groups", kwlist, &def))
3382 return NULL;
3383
3384 result = PyTuple_New(self->groups-1);
3385 if (!result)
3386 return NULL;
3387
3388 for (index = 1; index < self->groups; index++) {
3389 PyObject* item;
3390 item = match_getslice_by_index(self, index, def);
3391 if (!item) {
3392 Py_DECREF(result);
3393 return NULL;
3394 }
3395 PyTuple_SET_ITEM(result, index-1, item);
3396 }
3397
3398 return result;
3399 }
3400
3401 static PyObject*
3402 match_groupdict(MatchObject* self, PyObject* args, PyObject* kw)
3403 {
3404 PyObject* result;
3405 PyObject* keys;
3406 Py_ssize_t index;
3407
3408 PyObject* def = Py_None;
3409 static char* kwlist[] = { "default", NULL };
3410 if (!PyArg_ParseTupleAndKeywords(args, kw, "|O:groupdict", kwlist, &def))
3411 return NULL;
3412
3413 result = PyDict_New();
3414 if (!result || !self->pattern->groupindex)
3415 return result;
3416
3417 keys = PyMapping_Keys(self->pattern->groupindex);
3418 if (!keys)
3419 goto failed;
3420
3421 for (index = 0; index < PyList_GET_SIZE(keys); index++) {
3422 int status;
3423 PyObject* key;
3424 PyObject* value;
3425 key = PyList_GET_ITEM(keys, index);
3426 if (!key)
3427 goto failed;
3428 value = match_getslice(self, key, def);
3429 if (!value) {
3430 Py_DECREF(key);
3431 goto failed;
3432 }
3433 status = PyDict_SetItem(result, key, value);
3434 Py_DECREF(value);
3435 if (status < 0)
3436 goto failed;
3437 }
3438
3439 Py_DECREF(keys);
3440
3441 return result;
3442
3443 failed:
3444 Py_XDECREF(keys);
3445 Py_DECREF(result);
3446 return NULL;
3447 }
3448
3449 static PyObject*
3450 match_start(MatchObject* self, PyObject* args)
3451 {
3452 Py_ssize_t index;
3453
3454 PyObject* index_ = Py_False; /* zero */
3455 if (!PyArg_UnpackTuple(args, "start", 0, 1, &index_))
3456 return NULL;
3457
3458 index = match_getindex(self, index_);
3459
3460 if (index < 0 || index >= self->groups) {
3461 PyErr_SetString(
3462 PyExc_IndexError,
3463 "no such group"
3464 );
3465 return NULL;
3466 }
3467
3468 /* mark is -1 if group is undefined */
3469 return PyInt_FromSsize_t(self->mark[index*2]);
3470 }
3471
3472 static PyObject*
3473 match_end(MatchObject* self, PyObject* args)
3474 {
3475 Py_ssize_t index;
3476
3477 PyObject* index_ = Py_False; /* zero */
3478 if (!PyArg_UnpackTuple(args, "end", 0, 1, &index_))
3479 return NULL;
3480
3481 index = match_getindex(self, index_);
3482
3483 if (index < 0 || index >= self->groups) {
3484 PyErr_SetString(
3485 PyExc_IndexError,
3486 "no such group"
3487 );
3488 return NULL;
3489 }
3490
3491 /* mark is -1 if group is undefined */
3492 return PyInt_FromSsize_t(self->mark[index*2+1]);
3493 }
3494
3495 LOCAL(PyObject*)
3496 _pair(Py_ssize_t i1, Py_ssize_t i2)
3497 {
3498 PyObject* pair;
3499 PyObject* item;
3500
3501 pair = PyTuple_New(2);
3502 if (!pair)
3503 return NULL;
3504
3505 item = PyInt_FromSsize_t(i1);
3506 if (!item)
3507 goto error;
3508 PyTuple_SET_ITEM(pair, 0, item);
3509
3510 item = PyInt_FromSsize_t(i2);
3511 if (!item)
3512 goto error;
3513 PyTuple_SET_ITEM(pair, 1, item);
3514
3515 return pair;
3516
3517 error:
3518 Py_DECREF(pair);
3519 return NULL;
3520 }
3521
3522 static PyObject*
3523 match_span(MatchObject* self, PyObject* args)
3524 {
3525 Py_ssize_t index;
3526
3527 PyObject* index_ = Py_False; /* zero */
3528 if (!PyArg_UnpackTuple(args, "span", 0, 1, &index_))
3529 return NULL;
3530
3531 index = match_getindex(self, index_);
3532
3533 if (index < 0 || index >= self->groups) {
3534 PyErr_SetString(
3535 PyExc_IndexError,
3536 "no such group"
3537 );
3538 return NULL;
3539 }
3540
3541 /* marks are -1 if group is undefined */
3542 return _pair(self->mark[index*2], self->mark[index*2+1]);
3543 }
3544
3545 static PyObject*
3546 match_regs(MatchObject* self)
3547 {
3548 PyObject* regs;
3549 PyObject* item;
3550 Py_ssize_t index;
3551
3552 regs = PyTuple_New(self->groups);
3553 if (!regs)
3554 return NULL;
3555
3556 for (index = 0; index < self->groups; index++) {
3557 item = _pair(self->mark[index*2], self->mark[index*2+1]);
3558 if (!item) {
3559 Py_DECREF(regs);
3560 return NULL;
3561 }
3562 PyTuple_SET_ITEM(regs, index, item);
3563 }
3564
3565 Py_INCREF(regs);
3566 self->regs = regs;
3567
3568 return regs;
3569 }
3570
3571 static PyObject*
3572 match_copy(MatchObject* self, PyObject *unused)
3573 {
3574 #ifdef USE_BUILTIN_COPY
3575 MatchObject* copy;
3576 Py_ssize_t slots, offset;
3577
3578 slots = 2 * (self->pattern->groups+1);
3579
3580 copy = PyObject_NEW_VAR(MatchObject, &Match_Type, slots);
3581 if (!copy)
3582 return NULL;
3583
3584 /* this value a constant, but any compiler should be able to
3585 figure that out all by itself */
3586 offset = offsetof(MatchObject, string);
3587
3588 Py_XINCREF(self->pattern);
3589 Py_XINCREF(self->string);
3590 Py_XINCREF(self->regs);
3591
3592 memcpy((char*) copy + offset, (char*) self + offset,
3593 sizeof(MatchObject) + slots * sizeof(Py_ssize_t) - offset);
3594
3595 return (PyObject*) copy;
3596 #else
3597 PyErr_SetString(PyExc_TypeError, "cannot copy this match object");
3598 return NULL;
3599 #endif
3600 }
3601
3602 static PyObject*
3603 match_deepcopy(MatchObject* self, PyObject* memo)
3604 {
3605 #ifdef USE_BUILTIN_COPY
3606 MatchObject* copy;
3607
3608 copy = (MatchObject*) match_copy(self);
3609 if (!copy)
3610 return NULL;
3611
3612 if (!deepcopy((PyObject**) &copy->pattern, memo) ||
3613 !deepcopy(&copy->string, memo) ||
3614 !deepcopy(&copy->regs, memo)) {
3615 Py_DECREF(copy);
3616 return NULL;
3617 }
3618
3619 #else
3620 PyErr_SetString(PyExc_TypeError, "cannot deepcopy this match object");
3621 return NULL;
3622 #endif
3623 }
3624
3625 PyDoc_STRVAR(match_doc,
3626 "The result of re.match() and re.search().\n\
3627 Match objects always have a boolean value of True.");
3628
3629 PyDoc_STRVAR(match_group_doc,
3630 "group([group1, ...]) -> str or tuple.\n\
3631 Return subgroup(s) of the match by indices or names.\n\
3632 For 0 returns the entire match.");
3633
3634 PyDoc_STRVAR(match_start_doc,
3635 "start([group=0]) -> int.\n\
3636 Return index of the start of the substring matched by group.");
3637
3638 PyDoc_STRVAR(match_end_doc,
3639 "end([group=0]) -> int.\n\
3640 Return index of the end of the substring matched by group.");
3641
3642 PyDoc_STRVAR(match_span_doc,
3643 "span([group]) -> tuple.\n\
3644 For MatchObject m, return the 2-tuple (m.start(group), m.end(group)).");
3645
3646 PyDoc_STRVAR(match_groups_doc,
3647 "groups([default=None]) -> tuple.\n\
3648 Return a tuple containing all the subgroups of the match, from 1.\n\
3649 The default argument is used for groups\n\
3650 that did not participate in the match");
3651
3652 PyDoc_STRVAR(match_groupdict_doc,
3653 "groupdict([default=None]) -> dict.\n\
3654 Return a dictionary containing all the named subgroups of the match,\n\
3655 keyed by the subgroup name. The default argument is used for groups\n\
3656 that did not participate in the match");
3657
3658 PyDoc_STRVAR(match_expand_doc,
3659 "expand(template) -> str.\n\
3660 Return the string obtained by doing backslash substitution\n\
3661 on the string template, as done by the sub() method.");
3662
3663 static PyMethodDef match_methods[] = {
3664 {"group", (PyCFunction) match_group, METH_VARARGS, match_group_doc},
3665 {"start", (PyCFunction) match_start, METH_VARARGS, match_start_doc},
3666 {"end", (PyCFunction) match_end, METH_VARARGS, match_end_doc},
3667 {"span", (PyCFunction) match_span, METH_VARARGS, match_span_doc},
3668 {"groups", (PyCFunction) match_groups, METH_VARARGS|METH_KEYWORDS,
3669 match_groups_doc},
3670 {"groupdict", (PyCFunction) match_groupdict, METH_VARARGS|METH_KEYWORDS,
3671 match_groupdict_doc},
3672 {"expand", (PyCFunction) match_expand, METH_O, match_expand_doc},
3673 {"__copy__", (PyCFunction) match_copy, METH_NOARGS},
3674 {"__deepcopy__", (PyCFunction) match_deepcopy, METH_O},
3675 {NULL, NULL}
3676 };
3677
3678 static PyObject *
3679 match_lastindex_get(MatchObject *self)
3680 {
3681 if (self->lastindex >= 0)
3682 return PyInt_FromSsize_t(self->lastindex);
3683 Py_INCREF(Py_None);
3684 return Py_None;
3685 }
3686
3687 static PyObject *
3688 match_lastgroup_get(MatchObject *self)
3689 {
3690 if (self->pattern->indexgroup && self->lastindex >= 0) {
3691 PyObject* result = PySequence_GetItem(
3692 self->pattern->indexgroup, self->lastindex
3693 );
3694 if (result)
3695 return result;
3696 PyErr_Clear();
3697 }
3698 Py_INCREF(Py_None);
3699 return Py_None;
3700 }
3701
3702 static PyObject *
3703 match_regs_get(MatchObject *self)
3704 {
3705 if (self->regs) {
3706 Py_INCREF(self->regs);
3707 return self->regs;
3708 } else
3709 return match_regs(self);
3710 }
3711
3712 static PyGetSetDef match_getset[] = {
3713 {"lastindex", (getter)match_lastindex_get, (setter)NULL},
3714 {"lastgroup", (getter)match_lastgroup_get, (setter)NULL},
3715 {"regs", (getter)match_regs_get, (setter)NULL},
3716 {NULL}
3717 };
3718
3719 #define MATCH_OFF(x) offsetof(MatchObject, x)
3720 static PyMemberDef match_members[] = {
3721 {"string", T_OBJECT, MATCH_OFF(string), READONLY},
3722 {"re", T_OBJECT, MATCH_OFF(pattern), READONLY},
3723 {"pos", T_PYSSIZET, MATCH_OFF(pos), READONLY},
3724 {"endpos", T_PYSSIZET, MATCH_OFF(endpos), READONLY},
3725 {NULL}
3726 };
3727
3728
3729 /* FIXME: implement setattr("string", None) as a special case (to
3730 detach the associated string, if any */
3731
3732 static PyTypeObject Match_Type = {
3733 PyVarObject_HEAD_INIT(NULL, 0)
3734 "_" SRE_MODULE ".SRE_Match",
3735 sizeof(MatchObject), sizeof(Py_ssize_t),
3736 (destructor)match_dealloc, /* tp_dealloc */
3737 0, /* tp_print */
3738 0, /* tp_getattr */
3739 0, /* tp_setattr */
3740 0, /* tp_compare */
3741 0, /* tp_repr */
3742 0, /* tp_as_number */
3743 0, /* tp_as_sequence */
3744 0, /* tp_as_mapping */
3745 0, /* tp_hash */
3746 0, /* tp_call */
3747 0, /* tp_str */
3748 0, /* tp_getattro */
3749 0, /* tp_setattro */
3750 0, /* tp_as_buffer */
3751 Py_TPFLAGS_DEFAULT,
3752 match_doc, /* tp_doc */
3753 0, /* tp_traverse */
3754 0, /* tp_clear */
3755 0, /* tp_richcompare */
3756 0, /* tp_weaklistoffset */
3757 0, /* tp_iter */
3758 0, /* tp_iternext */
3759 match_methods, /* tp_methods */
3760 match_members, /* tp_members */
3761 match_getset, /* tp_getset */
3762 };
3763
3764 static PyObject*
3765 pattern_new_match(PatternObject* pattern, SRE_STATE* state, int status)
3766 {
3767 /* create match object (from state object) */
3768
3769 MatchObject* match;
3770 Py_ssize_t i, j;
3771 char* base;
3772 int n;
3773
3774 if (status > 0) {
3775
3776 /* create match object (with room for extra group marks) */
3777 /* coverity[ampersand_in_size] */
3778 match = PyObject_NEW_VAR(MatchObject, &Match_Type,
3779 2*(pattern->groups+1));
3780 if (!match)
3781 return NULL;
3782
3783 Py_INCREF(pattern);
3784 match->pattern = pattern;
3785
3786 Py_INCREF(state->string);
3787 match->string = state->string;
3788
3789 match->regs = NULL;
3790 match->groups = pattern->groups+1;
3791
3792 /* fill in group slices */
3793
3794 base = (char*) state->beginning;
3795 n = state->charsize;
3796
3797 match->mark[0] = ((char*) state->start - base) / n;
3798 match->mark[1] = ((char*) state->ptr - base) / n;
3799
3800 for (i = j = 0; i < pattern->groups; i++, j+=2)
3801 if (j+1 <= state->lastmark && state->mark[j] && state->mark[j+1]) {
3802 match->mark[j+2] = ((char*) state->mark[j] - base) / n;
3803 match->mark[j+3] = ((char*) state->mark[j+1] - base) / n;
3804 } else
3805 match->mark[j+2] = match->mark[j+3] = -1; /* undefined */
3806
3807 match->pos = state->pos;
3808 match->endpos = state->endpos;
3809
3810 match->lastindex = state->lastindex;
3811
3812 return (PyObject*) match;
3813
3814 } else if (status == 0) {
3815
3816 /* no match */
3817 Py_INCREF(Py_None);
3818 return Py_None;
3819
3820 }
3821
3822 /* internal error */
3823 pattern_error(status);
3824 return NULL;
3825 }
3826
3827
3828 /* -------------------------------------------------------------------- */
3829 /* scanner methods (experimental) */
3830
3831 static void
3832 scanner_dealloc(ScannerObject* self)
3833 {
3834 state_fini(&self->state);
3835 Py_XDECREF(self->pattern);
3836 PyObject_DEL(self);
3837 }
3838
3839 static PyObject*
3840 scanner_match(ScannerObject* self, PyObject *unused)
3841 {
3842 SRE_STATE* state = &self->state;
3843 PyObject* match;
3844 int status;
3845
3846 state_reset(state);
3847
3848 state->ptr = state->start;
3849
3850 if (state->charsize == 1) {
3851 status = sre_match(state, PatternObject_GetCode(self->pattern));
3852 } else {
3853 #if defined(HAVE_UNICODE)
3854 status = sre_umatch(state, PatternObject_GetCode(self->pattern));
3855 #endif
3856 }
3857 if (PyErr_Occurred())
3858 return NULL;
3859
3860 match = pattern_new_match((PatternObject*) self->pattern,
3861 state, status);
3862
3863 if (status == 0 || state->ptr == state->start)
3864 state->start = (void*) ((char*) state->ptr + state->charsize);
3865 else
3866 state->start = state->ptr;
3867
3868 return match;
3869 }
3870
3871
3872 static PyObject*
3873 scanner_search(ScannerObject* self, PyObject *unused)
3874 {
3875 SRE_STATE* state = &self->state;
3876 PyObject* match;
3877 int status;
3878
3879 state_reset(state);
3880
3881 state->ptr = state->start;
3882
3883 if (state->charsize == 1) {
3884 status = sre_search(state, PatternObject_GetCode(self->pattern));
3885 } else {
3886 #if defined(HAVE_UNICODE)
3887 status = sre_usearch(state, PatternObject_GetCode(self->pattern));
3888 #endif
3889 }
3890 if (PyErr_Occurred())
3891 return NULL;
3892
3893 match = pattern_new_match((PatternObject*) self->pattern,
3894 state, status);
3895
3896 if (status == 0 || state->ptr == state->start)
3897 state->start = (void*) ((char*) state->ptr + state->charsize);
3898 else
3899 state->start = state->ptr;
3900
3901 return match;
3902 }
3903
3904 static PyMethodDef scanner_methods[] = {
3905 {"match", (PyCFunction) scanner_match, METH_NOARGS},
3906 {"search", (PyCFunction) scanner_search, METH_NOARGS},
3907 {NULL, NULL}
3908 };
3909
3910 #define SCAN_OFF(x) offsetof(ScannerObject, x)
3911 static PyMemberDef scanner_members[] = {
3912 {"pattern", T_OBJECT, SCAN_OFF(pattern), READONLY},
3913 {NULL} /* Sentinel */
3914 };
3915
3916 statichere PyTypeObject Scanner_Type = {
3917 PyObject_HEAD_INIT(NULL)
3918 0, "_" SRE_MODULE ".SRE_Scanner",
3919 sizeof(ScannerObject), 0,
3920 (destructor)scanner_dealloc, /*tp_dealloc*/
3921 0, /* tp_print */
3922 0, /* tp_getattr */
3923 0, /* tp_setattr */
3924 0, /* tp_reserved */
3925 0, /* tp_repr */
3926 0, /* tp_as_number */
3927 0, /* tp_as_sequence */
3928 0, /* tp_as_mapping */
3929 0, /* tp_hash */
3930 0, /* tp_call */
3931 0, /* tp_str */
3932 0, /* tp_getattro */
3933 0, /* tp_setattro */
3934 0, /* tp_as_buffer */
3935 Py_TPFLAGS_DEFAULT, /* tp_flags */
3936 0, /* tp_doc */
3937 0, /* tp_traverse */
3938 0, /* tp_clear */
3939 0, /* tp_richcompare */
3940 0, /* tp_weaklistoffset */
3941 0, /* tp_iter */
3942 0, /* tp_iternext */
3943 scanner_methods, /* tp_methods */
3944 scanner_members, /* tp_members */
3945 0, /* tp_getset */
3946 };
3947
3948 static PyObject*
3949 pattern_scanner(PatternObject* pattern, PyObject* args)
3950 {
3951 /* create search state object */
3952
3953 ScannerObject* self;
3954
3955 PyObject* string;
3956 Py_ssize_t start = 0;
3957 Py_ssize_t end = PY_SSIZE_T_MAX;
3958 if (!PyArg_ParseTuple(args, "O|nn:scanner", &string, &start, &end))
3959 return NULL;
3960
3961 /* create scanner object */
3962 self = PyObject_NEW(ScannerObject, &Scanner_Type);
3963 if (!self)
3964 return NULL;
3965 self->pattern = NULL;
3966
3967 string = state_init(&self->state, pattern, string, start, end);
3968 if (!string) {
3969 Py_DECREF(self);
3970 return NULL;
3971 }
3972
3973 Py_INCREF(pattern);
3974 self->pattern = (PyObject*) pattern;
3975
3976 return (PyObject*) self;
3977 }
3978
3979 static PyMethodDef _functions[] = {
3980 {"compile", _compile, METH_VARARGS},
3981 {"getcodesize", sre_codesize, METH_NOARGS},
3982 {"getlower", sre_getlower, METH_VARARGS},
3983 {NULL, NULL}
3984 };
3985
3986 #if PY_VERSION_HEX < 0x02030000
3987 DL_EXPORT(void) init_sre(void)
3988 #else
3989 PyMODINIT_FUNC init_sre(void)
3990 #endif
3991 {
3992 PyObject* m;
3993 PyObject* d;
3994 PyObject* x;
3995
3996 /* Patch object types */
3997 if (PyType_Ready(&Pattern_Type) || PyType_Ready(&Match_Type) ||
3998 PyType_Ready(&Scanner_Type))
3999 return;
4000
4001 m = Py_InitModule("_" SRE_MODULE, _functions);
4002 if (m == NULL)
4003 return;
4004 d = PyModule_GetDict(m);
4005
4006 x = PyInt_FromLong(SRE_MAGIC);
4007 if (x) {
4008 PyDict_SetItemString(d, "MAGIC", x);
4009 Py_DECREF(x);
4010 }
4011
4012 x = PyInt_FromLong(sizeof(SRE_CODE));
4013 if (x) {
4014 PyDict_SetItemString(d, "CODESIZE", x);
4015 Py_DECREF(x);
4016 }
4017
4018 x = PyLong_FromUnsignedLong(SRE_MAXREPEAT);
4019 if (x) {
4020 PyDict_SetItemString(d, "MAXREPEAT", x);
4021 Py_DECREF(x);
4022 }
4023
4024 x = PyString_FromString(copyright);
4025 if (x) {
4026 PyDict_SetItemString(d, "copyright", x);
4027 Py_DECREF(x);
4028 }
4029 }
4030
4031 #endif /* !defined(SRE_RECURSIVE) */
4032
4033 /* vim:ts=4:sw=4:et
4034 */