From 6c0ebd5f2f53d5c4d6a91fd0b9148ba23e730c39 Mon Sep 17 00:00:00 2001 From: darylm503 Date: Sun, 11 Sep 2011 20:04:18 +0000 Subject: [PATCH] AppPkg|Python: Files from the Python 2.7.2 distribution that must be modified to build under EDK II. Signed-off-by: duanev@gmail.com Reviewed-by: darylm503 git-svn-id: https://edk2.svn.sourceforge.net/svnroot/edk2/trunk/edk2@12322 6f19259b-4bc3-4df7-8a09-765794883524 --- .../Python/PyMod-2.7.2/Include/fileobject.h | 90 + .../Python/PyMod-2.7.2/Include/pyport.h | 905 ++++ .../Python/PyMod-2.7.2/Modules/_sre.c | 3912 +++++++++++++++ .../Python/PyMod-2.7.2/Modules/zlib/zutil.h | 269 + .../Python/PyMod-2.7.2/Objects/longobject.c | 4370 +++++++++++++++++ .../Objects/stringlib/localeutil.h | 216 + .../Python/PyMod-2.7.2/Python/marshal.c | 1412 ++++++ 7 files changed, 11174 insertions(+) create mode 100644 AppPkg/Applications/Python/PyMod-2.7.2/Include/fileobject.h create mode 100644 AppPkg/Applications/Python/PyMod-2.7.2/Include/pyport.h create mode 100644 AppPkg/Applications/Python/PyMod-2.7.2/Modules/_sre.c create mode 100644 AppPkg/Applications/Python/PyMod-2.7.2/Modules/zlib/zutil.h create mode 100644 AppPkg/Applications/Python/PyMod-2.7.2/Objects/longobject.c create mode 100644 AppPkg/Applications/Python/PyMod-2.7.2/Objects/stringlib/localeutil.h create mode 100644 AppPkg/Applications/Python/PyMod-2.7.2/Python/marshal.c diff --git a/AppPkg/Applications/Python/PyMod-2.7.2/Include/fileobject.h b/AppPkg/Applications/Python/PyMod-2.7.2/Include/fileobject.h new file mode 100644 index 0000000000..c5d15b1352 --- /dev/null +++ b/AppPkg/Applications/Python/PyMod-2.7.2/Include/fileobject.h @@ -0,0 +1,90 @@ + +/* File object interface */ + +#ifndef Py_FILEOBJECT_H +#define Py_FILEOBJECT_H +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct { + PyObject_HEAD + FILE *f_fp; + PyObject *f_name; + PyObject *f_mode; + int (*f_close)(FILE *); + int f_softspace; /* Flag used by 'print' command */ + int f_binary; /* Flag which indicates whether the file is + open in binary (1) or text (0) mode */ + char* f_buf; /* Allocated readahead buffer */ + char* f_bufend; /* Points after last occupied position */ + char* f_bufptr; /* Current buffer position */ + char *f_setbuf; /* Buffer for setbuf(3) and setvbuf(3) */ + int f_univ_newline; /* Handle any newline convention */ + int f_newlinetypes; /* Types of newlines seen */ + int f_skipnextlf; /* Skip next \n */ + PyObject *f_encoding; + PyObject *f_errors; + PyObject *weakreflist; /* List of weak references */ + int unlocked_count; /* Num. currently running sections of code + using f_fp with the GIL released. */ + int readable; + int writable; +} PyFileObject; + +PyAPI_DATA(PyTypeObject) PyFile_Type; + +#define PyFile_Check(op) PyObject_TypeCheck(op, &PyFile_Type) +#define PyFile_CheckExact(op) (Py_TYPE(op) == &PyFile_Type) + +PyAPI_FUNC(PyObject *) PyFile_FromString(char *, char *); +PyAPI_FUNC(void) PyFile_SetBufSize(PyObject *, int); +PyAPI_FUNC(int) PyFile_SetEncoding(PyObject *, const char *); +PyAPI_FUNC(int) PyFile_SetEncodingAndErrors(PyObject *, const char *, char *errors); +PyAPI_FUNC(PyObject *) PyFile_FromFile(FILE *, char *, char *, + int (*)(FILE *)); +PyAPI_FUNC(FILE *) PyFile_AsFile(PyObject *); +PyAPI_FUNC(void) PyFile_IncUseCount(PyFileObject *); +PyAPI_FUNC(void) PyFile_DecUseCount(PyFileObject *); +PyAPI_FUNC(PyObject *) PyFile_Name(PyObject *); +PyAPI_FUNC(PyObject *) PyFile_GetLine(PyObject *, int); +PyAPI_FUNC(int) PyFile_WriteObject(PyObject *, PyObject *, int); +PyAPI_FUNC(int) PyFile_SoftSpace(PyObject *, int); +PyAPI_FUNC(int) PyFile_WriteString(const char *, PyObject *); +PyAPI_FUNC(int) PyObject_AsFileDescriptor(PyObject *); + +/* The default encoding used by the platform file system APIs + If non-NULL, this is different than the default encoding for strings +*/ +PyAPI_DATA(const char *) Py_FileSystemDefaultEncoding; + +/* Routines to replace fread() and fgets() which accept any of \r, \n + or \r\n as line terminators. +*/ +#define PY_STDIOTEXTMODE "b" +char *Py_UniversalNewlineFgets(char *, int, FILE*, PyObject *); +size_t Py_UniversalNewlineFread(char *, size_t, FILE *, PyObject *); + +/* A routine to do sanity checking on the file mode string. returns + non-zero on if an exception occurred +*/ +int _PyFile_SanitizeMode(char *mode); + +//#if defined _MSC_VER && _MSC_VER >= 1400 +/* A routine to check if a file descriptor is valid on Windows. Returns 0 + * and sets errno to EBADF if it isn't. This is to avoid Assertions + * from various functions in the Windows CRT beginning with + * Visual Studio 2005 + */ +//int _PyVerify_fd(int fd); +//#elif defined _MSC_VER && _MSC_VER >= 1200 +/* fdopen doesn't set errno EBADF and crashes for large fd on debug build */ +//#define _PyVerify_fd(fd) (_get_osfhandle(fd) >= 0) +//#else +#define _PyVerify_fd(A) (1) /* dummy */ +//#endif + +#ifdef __cplusplus +} +#endif +#endif /* !Py_FILEOBJECT_H */ diff --git a/AppPkg/Applications/Python/PyMod-2.7.2/Include/pyport.h b/AppPkg/Applications/Python/PyMod-2.7.2/Include/pyport.h new file mode 100644 index 0000000000..4c6eef0aef --- /dev/null +++ b/AppPkg/Applications/Python/PyMod-2.7.2/Include/pyport.h @@ -0,0 +1,905 @@ +#ifndef Py_PYPORT_H +#define Py_PYPORT_H + +#include "pyconfig.h" /* include for defines */ + +/* Some versions of HP-UX & Solaris need inttypes.h for int32_t, + INT32_MAX, etc. */ +#ifdef HAVE_INTTYPES_H +#include +#endif + +#ifdef HAVE_STDINT_H +#include +#endif + +/************************************************************************** +Symbols and macros to supply platform-independent interfaces to basic +C language & library operations whose spellings vary across platforms. + +Please try to make documentation here as clear as possible: by definition, +the stuff here is trying to illuminate C's darkest corners. + +Config #defines referenced here: + +SIGNED_RIGHT_SHIFT_ZERO_FILLS +Meaning: To be defined iff i>>j does not extend the sign bit when i is a + signed integral type and i < 0. +Used in: Py_ARITHMETIC_RIGHT_SHIFT + +Py_DEBUG +Meaning: Extra checks compiled in for debug mode. +Used in: Py_SAFE_DOWNCAST + +HAVE_UINTPTR_T +Meaning: The C9X type uintptr_t is supported by the compiler +Used in: Py_uintptr_t + +HAVE_LONG_LONG +Meaning: The compiler supports the C type "long long" +Used in: PY_LONG_LONG + +**************************************************************************/ + + +/* For backward compatibility only. Obsolete, do not use. */ +#ifdef HAVE_PROTOTYPES +#define Py_PROTO(x) x +#else +#define Py_PROTO(x) () +#endif +#ifndef Py_FPROTO +#define Py_FPROTO(x) Py_PROTO(x) +#endif + +/* typedefs for some C9X-defined synonyms for integral types. + * + * The names in Python are exactly the same as the C9X names, except with a + * Py_ prefix. Until C9X is universally implemented, this is the only way + * to ensure that Python gets reliable names that don't conflict with names + * in non-Python code that are playing their own tricks to define the C9X + * names. + * + * NOTE: don't go nuts here! Python has no use for *most* of the C9X + * integral synonyms. Only define the ones we actually need. + */ + +#ifdef HAVE_LONG_LONG +#ifndef PY_LONG_LONG +#define PY_LONG_LONG long long +#if defined(LLONG_MAX) +/* If LLONG_MAX is defined in limits.h, use that. */ +#define PY_LLONG_MIN LLONG_MIN +#define PY_LLONG_MAX LLONG_MAX +#define PY_ULLONG_MAX ULLONG_MAX +#elif defined(__LONG_LONG_MAX__) +/* Otherwise, if GCC has a builtin define, use that. */ +#define PY_LLONG_MAX __LONG_LONG_MAX__ +#define PY_LLONG_MIN (-PY_LLONG_MAX-1) +#define PY_ULLONG_MAX (__LONG_LONG_MAX__*2ULL + 1ULL) +#else +/* Otherwise, rely on two's complement. */ +#define PY_ULLONG_MAX (~0ULL) +#define PY_LLONG_MAX ((long long)(PY_ULLONG_MAX>>1)) +#define PY_LLONG_MIN (-PY_LLONG_MAX-1) +#endif /* LLONG_MAX */ +#endif +#endif /* HAVE_LONG_LONG */ + +/* a build with 30-bit digits for Python long integers needs an exact-width + * 32-bit unsigned integer type to store those digits. (We could just use + * type 'unsigned long', but that would be wasteful on a system where longs + * are 64-bits.) On Unix systems, the autoconf macro AC_TYPE_UINT32_T defines + * uint32_t to be such a type unless stdint.h or inttypes.h defines uint32_t. + * However, it doesn't set HAVE_UINT32_T, so we do that here. + */ +#if (defined UINT32_MAX || defined uint32_t) +#ifndef PY_UINT32_T +#define HAVE_UINT32_T 1 +#define PY_UINT32_T uint32_t +#endif +#endif + +/* Macros for a 64-bit unsigned integer type; used for type 'twodigits' in the + * long integer implementation, when 30-bit digits are enabled. + */ +#if (defined UINT64_MAX || defined uint64_t) +#ifndef PY_UINT64_T +#define HAVE_UINT64_T 1 +#define PY_UINT64_T uint64_t +#endif +#endif + +/* Signed variants of the above */ +#if (defined INT32_MAX || defined int32_t) +#ifndef PY_INT32_T +#define HAVE_INT32_T 1 +#define PY_INT32_T int32_t +#endif +#endif +#if (defined INT64_MAX || defined int64_t) +#ifndef PY_INT64_T +#define HAVE_INT64_T 1 +#define PY_INT64_T int64_t +#endif +#endif + +/* If PYLONG_BITS_IN_DIGIT is not defined then we'll use 30-bit digits if all + the necessary integer types are available, and we're on a 64-bit platform + (as determined by SIZEOF_VOID_P); otherwise we use 15-bit digits. */ + +#ifndef PYLONG_BITS_IN_DIGIT +#if (defined HAVE_UINT64_T && defined HAVE_INT64_T && \ + defined HAVE_UINT32_T && defined HAVE_INT32_T && SIZEOF_VOID_P >= 8) +#define PYLONG_BITS_IN_DIGIT 30 +#else +#define PYLONG_BITS_IN_DIGIT 15 +#endif +#endif + +/* uintptr_t is the C9X name for an unsigned integral type such that a + * legitimate void* can be cast to uintptr_t and then back to void* again + * without loss of information. Similarly for intptr_t, wrt a signed + * integral type. + */ +#ifdef HAVE_UINTPTR_T +typedef uintptr_t Py_uintptr_t; +typedef intptr_t Py_intptr_t; + +#elif SIZEOF_VOID_P <= SIZEOF_INT +typedef unsigned int Py_uintptr_t; +typedef int Py_intptr_t; + +#elif SIZEOF_VOID_P <= SIZEOF_LONG +typedef unsigned long Py_uintptr_t; +typedef long Py_intptr_t; + +#elif defined(HAVE_LONG_LONG) && (SIZEOF_VOID_P <= SIZEOF_LONG_LONG) +typedef unsigned PY_LONG_LONG Py_uintptr_t; +typedef PY_LONG_LONG Py_intptr_t; + +#else +# error "Python needs a typedef for Py_uintptr_t in pyport.h." +#endif /* HAVE_UINTPTR_T */ + +/* Py_ssize_t is a signed integral type such that sizeof(Py_ssize_t) == + * sizeof(size_t). C99 doesn't define such a thing directly (size_t is an + * unsigned integral type). See PEP 353 for details. + */ +#ifdef HAVE_SSIZE_T +typedef ssize_t Py_ssize_t; +#elif SIZEOF_VOID_P == SIZEOF_SIZE_T +typedef Py_intptr_t Py_ssize_t; +#else +# error "Python needs a typedef for Py_ssize_t in pyport.h." +#endif + +/* Largest possible value of size_t. + SIZE_MAX is part of C99, so it might be defined on some + platforms. If it is not defined, (size_t)-1 is a portable + definition for C89, due to the way signed->unsigned + conversion is defined. */ +#ifdef SIZE_MAX +#define PY_SIZE_MAX SIZE_MAX +#else +#define PY_SIZE_MAX ((size_t)-1) +#endif + +/* Largest positive value of type Py_ssize_t. */ +#define PY_SSIZE_T_MAX ((Py_ssize_t)(((size_t)-1)>>1)) +/* Smallest negative value of type Py_ssize_t. */ +#define PY_SSIZE_T_MIN (-PY_SSIZE_T_MAX-1) + +#if SIZEOF_PID_T > SIZEOF_LONG +# error "Python doesn't support sizeof(pid_t) > sizeof(long)" +#endif + +/* PY_FORMAT_SIZE_T is a platform-specific modifier for use in a printf + * format to convert an argument with the width of a size_t or Py_ssize_t. + * C99 introduced "z" for this purpose, but not all platforms support that; + * e.g., MS compilers use "I" instead. + * + * These "high level" Python format functions interpret "z" correctly on + * all platforms (Python interprets the format string itself, and does whatever + * the platform C requires to convert a size_t/Py_ssize_t argument): + * + * PyString_FromFormat + * PyErr_Format + * PyString_FromFormatV + * + * Lower-level uses require that you interpolate the correct format modifier + * yourself (e.g., calling printf, fprintf, sprintf, PyOS_snprintf); for + * example, + * + * Py_ssize_t index; + * fprintf(stderr, "index %" PY_FORMAT_SIZE_T "d sucks\n", index); + * + * That will expand to %ld, or %Id, or to something else correct for a + * Py_ssize_t on the platform. + */ +#ifndef PY_FORMAT_SIZE_T +# if SIZEOF_SIZE_T == SIZEOF_INT && !defined(__APPLE__) +# define PY_FORMAT_SIZE_T "" +# elif SIZEOF_SIZE_T == SIZEOF_LONG +# define PY_FORMAT_SIZE_T "l" +# elif defined(MS_WINDOWS) +# define PY_FORMAT_SIZE_T "I" +# else +# error "This platform's pyconfig.h needs to define PY_FORMAT_SIZE_T" +# endif +#endif + +/* PY_FORMAT_LONG_LONG is analogous to PY_FORMAT_SIZE_T above, but for + * the long long type instead of the size_t type. It's only available + * when HAVE_LONG_LONG is defined. The "high level" Python format + * functions listed above will interpret "lld" or "llu" correctly on + * all platforms. + */ +#ifdef HAVE_LONG_LONG +# ifndef PY_FORMAT_LONG_LONG +# if defined(MS_WIN64) || defined(MS_WINDOWS) +# define PY_FORMAT_LONG_LONG "I64" +# else +# error "This platform's pyconfig.h needs to define PY_FORMAT_LONG_LONG" +# endif +# endif +#endif + +/* Py_LOCAL can be used instead of static to get the fastest possible calling + * convention for functions that are local to a given module. + * + * Py_LOCAL_INLINE does the same thing, and also explicitly requests inlining, + * for platforms that support that. + * + * If PY_LOCAL_AGGRESSIVE is defined before python.h is included, more + * "aggressive" inlining/optimizaion is enabled for the entire module. This + * may lead to code bloat, and may slow things down for those reasons. It may + * also lead to errors, if the code relies on pointer aliasing. Use with + * care. + * + * NOTE: You can only use this for functions that are entirely local to a + * module; functions that are exported via method tables, callbacks, etc, + * should keep using static. + */ + +#undef USE_INLINE /* XXX - set via configure? */ + +#if defined(_MSC_VER) +#if defined(PY_LOCAL_AGGRESSIVE) +/* enable more aggressive optimization for visual studio */ +//#pragma optimize("agtw", on) +#pragma optimize("gt", on) // a and w are not legal for VS2005 +#endif +/* ignore warnings if the compiler decides not to inline a function */ +#pragma warning(disable: 4710) +/* fastest possible local call under MSVC */ +#define Py_LOCAL(type) static type __fastcall +#define Py_LOCAL_INLINE(type) static __inline type __fastcall +#elif defined(USE_INLINE) +#define Py_LOCAL(type) static type +#define Py_LOCAL_INLINE(type) static inline type +#else +#define Py_LOCAL(type) static type +#define Py_LOCAL_INLINE(type) static type +#endif + +/* Py_MEMCPY can be used instead of memcpy in cases where the copied blocks + * are often very short. While most platforms have highly optimized code for + * large transfers, the setup costs for memcpy are often quite high. MEMCPY + * solves this by doing short copies "in line". + */ + +#if defined(_MSC_VER) +#define Py_MEMCPY(target, source, length) do { \ + size_t i_, n_ = (length); \ + char *t_ = (void*) (target); \ + const char *s_ = (void*) (source); \ + if (n_ >= 16) \ + memcpy(t_, s_, n_); \ + else \ + for (i_ = 0; i_ < n_; i_++) \ + t_[i_] = s_[i_]; \ + } while (0) +#else +#define Py_MEMCPY memcpy +#endif + +#include + +#ifdef HAVE_IEEEFP_H +#include /* needed for 'finite' declaration on some platforms */ +#endif + +#include /* Moved here from the math section, before extern "C" */ + +/******************************************** + * WRAPPER FOR and/or * + ********************************************/ + +#ifdef TIME_WITH_SYS_TIME +#include +#include +#else /* !TIME_WITH_SYS_TIME */ +#ifdef HAVE_SYS_TIME_H +#include +#else /* !HAVE_SYS_TIME_H */ +#include +#endif /* !HAVE_SYS_TIME_H */ +#endif /* !TIME_WITH_SYS_TIME */ + + +/****************************** + * WRAPPER FOR * + ******************************/ + +/* NB caller must include */ + +#ifdef HAVE_SYS_SELECT_H + +#include + +#endif /* !HAVE_SYS_SELECT_H */ + +/******************************* + * stat() and fstat() fiddling * + *******************************/ + +/* We expect that stat and fstat exist on most systems. + * It's confirmed on Unix, Mac and Windows. + * If you don't have them, add + * #define DONT_HAVE_STAT + * and/or + * #define DONT_HAVE_FSTAT + * to your pyconfig.h. Python code beyond this should check HAVE_STAT and + * HAVE_FSTAT instead. + * Also + * #define HAVE_SYS_STAT_H + * if exists on your platform, and + * #define HAVE_STAT_H + * if does. + */ +#ifndef DONT_HAVE_STAT +#define HAVE_STAT +#endif + +#ifndef DONT_HAVE_FSTAT +#define HAVE_FSTAT +#endif + +#ifdef RISCOS +#include +#include "unixstuff.h" +#endif + +#ifdef HAVE_SYS_STAT_H +#if defined(PYOS_OS2) && defined(PYCC_GCC) +#include +#endif +#include +#elif defined(HAVE_STAT_H) +#include +#endif + +#if defined(PYCC_VACPP) +/* VisualAge C/C++ Failed to Define MountType Field in sys/stat.h */ +#define S_IFMT (S_IFDIR|S_IFCHR|S_IFREG) +#endif + +#ifndef S_ISREG +#define S_ISREG(x) (((x) & S_IFMT) == S_IFREG) +#endif + +#ifndef S_ISDIR +#define S_ISDIR(x) (((x) & S_IFMT) == S_IFDIR) +#endif + + +#ifdef __cplusplus +/* Move this down here since some C++ #include's don't like to be included + inside an extern "C" */ +extern "C" { +#endif + + +/* Py_ARITHMETIC_RIGHT_SHIFT + * C doesn't define whether a right-shift of a signed integer sign-extends + * or zero-fills. Here a macro to force sign extension: + * Py_ARITHMETIC_RIGHT_SHIFT(TYPE, I, J) + * Return I >> J, forcing sign extension. Arithmetically, return the + * floor of I/2**J. + * Requirements: + * I should have signed integer type. In the terminology of C99, this can + * be either one of the five standard signed integer types (signed char, + * short, int, long, long long) or an extended signed integer type. + * J is an integer >= 0 and strictly less than the number of bits in the + * type of I (because C doesn't define what happens for J outside that + * range either). + * TYPE used to specify the type of I, but is now ignored. It's been left + * in for backwards compatibility with versions <= 2.6 or 3.0. + * Caution: + * I may be evaluated more than once. + */ +#ifdef SIGNED_RIGHT_SHIFT_ZERO_FILLS +#define Py_ARITHMETIC_RIGHT_SHIFT(TYPE, I, J) \ + ((I) < 0 ? -1-((-1-(I)) >> (J)) : (I) >> (J)) +#else +#define Py_ARITHMETIC_RIGHT_SHIFT(TYPE, I, J) ((I) >> (J)) +#endif + +/* Py_FORCE_EXPANSION(X) + * "Simply" returns its argument. However, macro expansions within the + * argument are evaluated. This unfortunate trickery is needed to get + * token-pasting to work as desired in some cases. + */ +#define Py_FORCE_EXPANSION(X) X + +/* Py_SAFE_DOWNCAST(VALUE, WIDE, NARROW) + * Cast VALUE to type NARROW from type WIDE. In Py_DEBUG mode, this + * assert-fails if any information is lost. + * Caution: + * VALUE may be evaluated more than once. + */ +#ifdef Py_DEBUG +#define Py_SAFE_DOWNCAST(VALUE, WIDE, NARROW) \ + (assert((WIDE)(NARROW)(VALUE) == (VALUE)), (NARROW)(VALUE)) +#else +#define Py_SAFE_DOWNCAST(VALUE, WIDE, NARROW) (NARROW)(VALUE) +#endif + +/* Py_SET_ERRNO_ON_MATH_ERROR(x) + * If a libm function did not set errno, but it looks like the result + * overflowed or not-a-number, set errno to ERANGE or EDOM. Set errno + * to 0 before calling a libm function, and invoke this macro after, + * passing the function result. + * Caution: + * This isn't reliable. See Py_OVERFLOWED comments. + * X is evaluated more than once. + */ +#if defined(__FreeBSD__) || defined(__OpenBSD__) || (defined(__hpux) && defined(__ia64)) +#define _Py_SET_EDOM_FOR_NAN(X) if (isnan(X)) errno = EDOM; +#else +#define _Py_SET_EDOM_FOR_NAN(X) ; +#endif +#define Py_SET_ERRNO_ON_MATH_ERROR(X) \ + do { \ + if (errno == 0) { \ + if ((X) == Py_HUGE_VAL || (X) == -Py_HUGE_VAL) \ + errno = ERANGE; \ + else _Py_SET_EDOM_FOR_NAN(X) \ + } \ + } while(0) + +/* Py_SET_ERANGE_ON_OVERFLOW(x) + * An alias of Py_SET_ERRNO_ON_MATH_ERROR for backward-compatibility. + */ +#define Py_SET_ERANGE_IF_OVERFLOW(X) Py_SET_ERRNO_ON_MATH_ERROR(X) + +/* Py_ADJUST_ERANGE1(x) + * Py_ADJUST_ERANGE2(x, y) + * Set errno to 0 before calling a libm function, and invoke one of these + * macros after, passing the function result(s) (Py_ADJUST_ERANGE2 is useful + * for functions returning complex results). This makes two kinds of + * adjustments to errno: (A) If it looks like the platform libm set + * errno=ERANGE due to underflow, clear errno. (B) If it looks like the + * platform libm overflowed but didn't set errno, force errno to ERANGE. In + * effect, we're trying to force a useful implementation of C89 errno + * behavior. + * Caution: + * This isn't reliable. See Py_OVERFLOWED comments. + * X and Y may be evaluated more than once. + */ +#define Py_ADJUST_ERANGE1(X) \ + do { \ + if (errno == 0) { \ + if ((X) == Py_HUGE_VAL || (X) == -Py_HUGE_VAL) \ + errno = ERANGE; \ + } \ + else if (errno == ERANGE && (X) == 0.0) \ + errno = 0; \ + } while(0) + +#define Py_ADJUST_ERANGE2(X, Y) \ + do { \ + if ((X) == Py_HUGE_VAL || (X) == -Py_HUGE_VAL || \ + (Y) == Py_HUGE_VAL || (Y) == -Py_HUGE_VAL) { \ + if (errno == 0) \ + errno = ERANGE; \ + } \ + else if (errno == ERANGE) \ + errno = 0; \ + } while(0) + +/* The functions _Py_dg_strtod and _Py_dg_dtoa in Python/dtoa.c (which are + * required to support the short float repr introduced in Python 3.1) require + * that the floating-point unit that's being used for arithmetic operations + * on C doubles is set to use 53-bit precision. It also requires that the + * FPU rounding mode is round-half-to-even, but that's less often an issue. + * + * If your FPU isn't already set to 53-bit precision/round-half-to-even, and + * you want to make use of _Py_dg_strtod and _Py_dg_dtoa, then you should + * + * #define HAVE_PY_SET_53BIT_PRECISION 1 + * + * and also give appropriate definitions for the following three macros: + * + * _PY_SET_53BIT_PRECISION_START : store original FPU settings, and + * set FPU to 53-bit precision/round-half-to-even + * _PY_SET_53BIT_PRECISION_END : restore original FPU settings + * _PY_SET_53BIT_PRECISION_HEADER : any variable declarations needed to + * use the two macros above. + * + * The macros are designed to be used within a single C function: see + * Python/pystrtod.c for an example of their use. + */ + +/* get and set x87 control word for gcc/x86 */ +#ifdef HAVE_GCC_ASM_FOR_X87 +#define HAVE_PY_SET_53BIT_PRECISION 1 +/* _Py_get/set_387controlword functions are defined in Python/pymath.c */ +#define _Py_SET_53BIT_PRECISION_HEADER \ + unsigned short old_387controlword, new_387controlword +#define _Py_SET_53BIT_PRECISION_START \ + do { \ + old_387controlword = _Py_get_387controlword(); \ + new_387controlword = (old_387controlword & ~0x0f00) | 0x0200; \ + if (new_387controlword != old_387controlword) \ + _Py_set_387controlword(new_387controlword); \ + } while (0) +#define _Py_SET_53BIT_PRECISION_END \ + if (new_387controlword != old_387controlword) \ + _Py_set_387controlword(old_387controlword) +#endif + +/* default definitions are empty */ +#ifndef HAVE_PY_SET_53BIT_PRECISION +#define _Py_SET_53BIT_PRECISION_HEADER +#define _Py_SET_53BIT_PRECISION_START +#define _Py_SET_53BIT_PRECISION_END +#endif + +/* If we can't guarantee 53-bit precision, don't use the code + in Python/dtoa.c, but fall back to standard code. This + means that repr of a float will be long (17 sig digits). + + Realistically, there are two things that could go wrong: + + (1) doubles aren't IEEE 754 doubles, or + (2) we're on x86 with the rounding precision set to 64-bits + (extended precision), and we don't know how to change + the rounding precision. + */ + +#if !defined(DOUBLE_IS_LITTLE_ENDIAN_IEEE754) && \ + !defined(DOUBLE_IS_BIG_ENDIAN_IEEE754) && \ + !defined(DOUBLE_IS_ARM_MIXED_ENDIAN_IEEE754) +#define PY_NO_SHORT_FLOAT_REPR +#endif + +/* double rounding is symptomatic of use of extended precision on x86. If + we're seeing double rounding, and we don't have any mechanism available for + changing the FPU rounding precision, then don't use Python/dtoa.c. */ +#if defined(X87_DOUBLE_ROUNDING) && !defined(HAVE_PY_SET_53BIT_PRECISION) +#define PY_NO_SHORT_FLOAT_REPR +#endif + +/* Py_DEPRECATED(version) + * Declare a variable, type, or function deprecated. + * Usage: + * extern int old_var Py_DEPRECATED(2.3); + * typedef int T1 Py_DEPRECATED(2.4); + * extern int x() Py_DEPRECATED(2.5); + */ +#if defined(__GNUC__) && ((__GNUC__ >= 4) || \ + (__GNUC__ == 3) && (__GNUC_MINOR__ >= 1)) +#define Py_DEPRECATED(VERSION_UNUSED) __attribute__((__deprecated__)) +#else +#define Py_DEPRECATED(VERSION_UNUSED) +#endif + +/************************************************************************** +Prototypes that are missing from the standard include files on some systems +(and possibly only some versions of such systems.) + +Please be conservative with adding new ones, document them and enclose them +in platform-specific #ifdefs. +**************************************************************************/ + +#ifdef SOLARIS +/* Unchecked */ +extern int gethostname(char *, int); +#endif + +#ifdef __BEOS__ +/* Unchecked */ +/* It's in the libs, but not the headers... - [cjh] */ +int shutdown( int, int ); +#endif + +#ifdef HAVE__GETPTY +#include /* we need to import mode_t */ +extern char * _getpty(int *, int, mode_t, int); +#endif + +/* On QNX 6, struct termio must be declared by including sys/termio.h + if TCGETA, TCSETA, TCSETAW, or TCSETAF are used. sys/termio.h must + be included before termios.h or it will generate an error. */ +#ifdef HAVE_SYS_TERMIO_H +#include +#endif + +#if defined(HAVE_OPENPTY) || defined(HAVE_FORKPTY) +#if !defined(HAVE_PTY_H) && !defined(HAVE_LIBUTIL_H) && !defined(HAVE_UTIL_H) +/* BSDI does not supply a prototype for the 'openpty' and 'forkpty' + functions, even though they are included in libutil. */ +#include +extern int openpty(int *, int *, char *, struct termios *, struct winsize *); +extern pid_t forkpty(int *, char *, struct termios *, struct winsize *); +#endif /* !defined(HAVE_PTY_H) && !defined(HAVE_LIBUTIL_H) */ +#endif /* defined(HAVE_OPENPTY) || defined(HAVE_FORKPTY) */ + + +/* These are pulled from various places. It isn't obvious on what platforms + they are necessary, nor what the exact prototype should look like (which + is likely to vary between platforms!) If you find you need one of these + declarations, please move them to a platform-specific block and include + proper prototypes. */ +#if 0 + +/* From Modules/resource.c */ +extern int getrusage(); +extern int getpagesize(); + +/* From Python/sysmodule.c and Modules/posixmodule.c */ +extern int fclose(FILE *); + +/* From Modules/posixmodule.c */ +extern int fdatasync(int); +#endif /* 0 */ + + +/* On 4.4BSD-descendants, ctype functions serves the whole range of + * wchar_t character set rather than single byte code points only. + * This characteristic can break some operations of string object + * including str.upper() and str.split() on UTF-8 locales. This + * workaround was provided by Tim Robbins of FreeBSD project. + */ + +#ifdef __FreeBSD__ +#include +#if __FreeBSD_version > 500039 +# define _PY_PORT_CTYPE_UTF8_ISSUE +#endif +#endif + + +#if defined(__APPLE__) +# define _PY_PORT_CTYPE_UTF8_ISSUE +#endif + +#ifdef _PY_PORT_CTYPE_UTF8_ISSUE +#include +#include +#undef isalnum +#define isalnum(c) iswalnum(btowc(c)) +#undef isalpha +#define isalpha(c) iswalpha(btowc(c)) +#undef islower +#define islower(c) iswlower(btowc(c)) +#undef isspace +#define isspace(c) iswspace(btowc(c)) +#undef isupper +#define isupper(c) iswupper(btowc(c)) +#undef tolower +#define tolower(c) towlower(btowc(c)) +#undef toupper +#define toupper(c) towupper(btowc(c)) +#endif + + +/* Declarations for symbol visibility. + + PyAPI_FUNC(type): Declares a public Python API function and return type + PyAPI_DATA(type): Declares public Python data and its type + PyMODINIT_FUNC: A Python module init function. If these functions are + inside the Python core, they are private to the core. + If in an extension module, it may be declared with + external linkage depending on the platform. + + As a number of platforms support/require "__declspec(dllimport/dllexport)", + we support a HAVE_DECLSPEC_DLL macro to save duplication. +*/ + +/* + All windows ports, except cygwin, are handled in PC/pyconfig.h. + + BeOS and cygwin are the only other autoconf platform requiring special + linkage handling and both of these use __declspec(). +*/ +#if defined(__CYGWIN__) || defined(__BEOS__) +# define HAVE_DECLSPEC_DLL +#endif + +/* only get special linkage if built as shared or platform is Cygwin */ +#if defined(Py_ENABLE_SHARED) || defined(__CYGWIN__) +# if defined(HAVE_DECLSPEC_DLL) +# ifdef Py_BUILD_CORE +# define PyAPI_FUNC(RTYPE) __declspec(dllexport) RTYPE +# define PyAPI_DATA(RTYPE) extern __declspec(dllexport) RTYPE + /* module init functions inside the core need no external linkage */ + /* except for Cygwin to handle embedding (FIXME: BeOS too?) */ +# if defined(__CYGWIN__) +# define PyMODINIT_FUNC __declspec(dllexport) void +# else /* __CYGWIN__ */ +# define PyMODINIT_FUNC void +# endif /* __CYGWIN__ */ +# else /* Py_BUILD_CORE */ + /* Building an extension module, or an embedded situation */ + /* public Python functions and data are imported */ + /* Under Cygwin, auto-import functions to prevent compilation */ + /* failures similar to those described at the bottom of 4.1: */ + /* http://docs.python.org/extending/windows.html#a-cookbook-approach */ +# if !defined(__CYGWIN__) +# define PyAPI_FUNC(RTYPE) __declspec(dllimport) RTYPE +# endif /* !__CYGWIN__ */ +# define PyAPI_DATA(RTYPE) extern __declspec(dllimport) RTYPE + /* module init functions outside the core must be exported */ +# if defined(__cplusplus) +# define PyMODINIT_FUNC extern "C" __declspec(dllexport) void +# else /* __cplusplus */ +# define PyMODINIT_FUNC __declspec(dllexport) void +# endif /* __cplusplus */ +# endif /* Py_BUILD_CORE */ +# endif /* HAVE_DECLSPEC */ +#endif /* Py_ENABLE_SHARED */ + +/* If no external linkage macros defined by now, create defaults */ +#ifndef PyAPI_FUNC +# define PyAPI_FUNC(RTYPE) RTYPE +#endif +#ifndef PyAPI_DATA +# define PyAPI_DATA(RTYPE) extern RTYPE +#endif +#ifndef PyMODINIT_FUNC +# if defined(__cplusplus) +# define PyMODINIT_FUNC extern "C" void +# else /* __cplusplus */ +# define PyMODINIT_FUNC void +# endif /* __cplusplus */ +#endif + +/* Deprecated DL_IMPORT and DL_EXPORT macros */ +#if defined(Py_ENABLE_SHARED) && defined (HAVE_DECLSPEC_DLL) +# if defined(Py_BUILD_CORE) +# define DL_IMPORT(RTYPE) __declspec(dllexport) RTYPE +# define DL_EXPORT(RTYPE) __declspec(dllexport) RTYPE +# else +# define DL_IMPORT(RTYPE) __declspec(dllimport) RTYPE +# define DL_EXPORT(RTYPE) __declspec(dllexport) RTYPE +# endif +#endif +#ifndef DL_EXPORT +# define DL_EXPORT(RTYPE) RTYPE +#endif +#ifndef DL_IMPORT +# define DL_IMPORT(RTYPE) RTYPE +#endif +/* End of deprecated DL_* macros */ + +/* If the fd manipulation macros aren't defined, + here is a set that should do the job */ + +#if 0 /* disabled and probably obsolete */ + +#ifndef FD_SETSIZE +#define FD_SETSIZE 256 +#endif + +#ifndef FD_SET + +typedef long fd_mask; + +#define NFDBITS (sizeof(fd_mask) * NBBY) /* bits per mask */ +#ifndef howmany +#define howmany(x, y) (((x)+((y)-1))/(y)) +#endif /* howmany */ + +typedef struct fd_set { + fd_mask fds_bits[howmany(FD_SETSIZE, NFDBITS)]; +} fd_set; + +#define FD_SET(n, p) ((p)->fds_bits[(n)/NFDBITS] |= (1 << ((n) % NFDBITS))) +#define FD_CLR(n, p) ((p)->fds_bits[(n)/NFDBITS] &= ~(1 << ((n) % NFDBITS))) +#define FD_ISSET(n, p) ((p)->fds_bits[(n)/NFDBITS] & (1 << ((n) % NFDBITS))) +#define FD_ZERO(p) memset((char *)(p), '\0', sizeof(*(p))) + +#endif /* FD_SET */ + +#endif /* fd manipulation macros */ + + +/* limits.h constants that may be missing */ + +#ifndef INT_MAX +#define INT_MAX 2147483647 +#endif + +#ifndef LONG_MAX +#if SIZEOF_LONG == 4 +#define LONG_MAX 0X7FFFFFFFL +#elif SIZEOF_LONG == 8 +#define LONG_MAX 0X7FFFFFFFFFFFFFFFL +#else +#error "could not set LONG_MAX in pyport.h" +#endif +#endif + +#ifndef LONG_MIN +#define LONG_MIN (-LONG_MAX-1) +#endif + +#ifndef LONG_BIT +#define LONG_BIT (8 * SIZEOF_LONG) +#endif + +#if LONG_BIT != 8 * SIZEOF_LONG +/* 04-Oct-2000 LONG_BIT is apparently (mis)defined as 64 on some recent + * 32-bit platforms using gcc. We try to catch that here at compile-time + * rather than waiting for integer multiplication to trigger bogus + * overflows. + */ +#error "LONG_BIT definition appears wrong for platform (bad gcc/glibc config?)." +#endif + +#ifdef __cplusplus +} +#endif + +/* + * Hide GCC attributes from compilers that don't support them. + */ +#if (!defined(__GNUC__) || __GNUC__ < 2 || \ + (__GNUC__ == 2 && __GNUC_MINOR__ < 7) ) && \ + !defined(RISCOS) +#define Py_GCC_ATTRIBUTE(x) +#else +#define Py_GCC_ATTRIBUTE(x) __attribute__(x) +#endif + +/* + * Add PyArg_ParseTuple format where available. + */ +#ifdef HAVE_ATTRIBUTE_FORMAT_PARSETUPLE +#define Py_FORMAT_PARSETUPLE(func,p1,p2) __attribute__((format(func,p1,p2))) +#else +#define Py_FORMAT_PARSETUPLE(func,p1,p2) +#endif + +/* + * Specify alignment on compilers that support it. + */ +#if defined(__GNUC__) && __GNUC__ >= 3 +#define Py_ALIGNED(x) __attribute__((aligned(x))) +#else +#define Py_ALIGNED(x) +#endif + +/* Eliminate end-of-loop code not reached warnings from SunPro C + * when using do{...}while(0) macros + */ +#ifdef __SUNPRO_C +#pragma error_messages (off,E_END_OF_LOOP_CODE_NOT_REACHED) +#endif + +/* + * Older Microsoft compilers don't support the C99 long long literal suffixes, + * so these will be defined in PC/pyconfig.h for those compilers. + */ +#ifndef Py_LL +#define Py_LL(x) x##LL +#endif + +#ifndef Py_ULL +#define Py_ULL(x) Py_LL(x##U) +#endif + +#endif /* Py_PYPORT_H */ diff --git a/AppPkg/Applications/Python/PyMod-2.7.2/Modules/_sre.c b/AppPkg/Applications/Python/PyMod-2.7.2/Modules/_sre.c new file mode 100644 index 0000000000..ec723cc675 --- /dev/null +++ b/AppPkg/Applications/Python/PyMod-2.7.2/Modules/_sre.c @@ -0,0 +1,3912 @@ +/* + * Secret Labs' Regular Expression Engine + * + * regular expression matching engine + * + * partial history: + * 1999-10-24 fl created (based on existing template matcher code) + * 2000-03-06 fl first alpha, sort of + * 2000-08-01 fl fixes for 1.6b1 + * 2000-08-07 fl use PyOS_CheckStack() if available + * 2000-09-20 fl added expand method + * 2001-03-20 fl lots of fixes for 2.1b2 + * 2001-04-15 fl export copyright as Python attribute, not global + * 2001-04-28 fl added __copy__ methods (work in progress) + * 2001-05-14 fl fixes for 1.5.2 compatibility + * 2001-07-01 fl added BIGCHARSET support (from Martin von Loewis) + * 2001-10-18 fl fixed group reset issue (from Matthew Mueller) + * 2001-10-20 fl added split primitive; reenable unicode for 1.6/2.0/2.1 + * 2001-10-21 fl added sub/subn primitive + * 2001-10-24 fl added finditer primitive (for 2.2 only) + * 2001-12-07 fl fixed memory leak in sub/subn (Guido van Rossum) + * 2002-11-09 fl fixed empty sub/subn return type + * 2003-04-18 mvl fully support 4-byte codes + * 2003-10-17 gn implemented non recursive scheme + * + * Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved. + * + * This version of the SRE library can be redistributed under CNRI's + * Python 1.6 license. For any other use, please contact Secret Labs + * AB (info@pythonware.com). + * + * Portions of this engine have been developed in cooperation with + * CNRI. Hewlett-Packard provided funding for 1.6 integration and + * other compatibility work. + */ + +/* Get rid of these macros to prevent collisions between EFI and Python in this file. */ +#undef RETURN_ERROR +#undef RETURN_SUCCESS + +#ifndef SRE_RECURSIVE + +static char copyright[] = + " SRE 2.2.2 Copyright (c) 1997-2002 by Secret Labs AB "; + +#define PY_SSIZE_T_CLEAN + +#include "Python.h" +#include "structmember.h" /* offsetof */ + +#include "sre.h" + +#include + +/* name of this module, minus the leading underscore */ +#if !defined(SRE_MODULE) +#define SRE_MODULE "sre" +#endif + +#define SRE_PY_MODULE "re" + +/* defining this one enables tracing */ +#undef VERBOSE + +#if PY_VERSION_HEX >= 0x01060000 +#if PY_VERSION_HEX < 0x02020000 || defined(Py_USING_UNICODE) +/* defining this enables unicode support (default under 1.6a1 and later) */ +#define HAVE_UNICODE +#endif +#endif + +/* -------------------------------------------------------------------- */ +/* optional features */ + +/* enables fast searching */ +#define USE_FAST_SEARCH + +/* enables aggressive inlining (always on for Visual C) */ +#undef USE_INLINE + +/* enables copy/deepcopy handling (work in progress) */ +#undef USE_BUILTIN_COPY + +#if PY_VERSION_HEX < 0x01060000 +#define PyObject_DEL(op) PyMem_DEL((op)) +#endif + +/* -------------------------------------------------------------------- */ + +#if defined(_MSC_VER) +#pragma optimize("gt", on) /* doesn't seem to make much difference... */ +#pragma warning(disable: 4710) /* who cares if functions are not inlined ;-) */ +/* fastest possible local call under MSVC */ +#define LOCAL(type) static __inline type __fastcall +#elif defined(USE_INLINE) +#define LOCAL(type) static inline type +#else +#define LOCAL(type) static type +#endif + +/* error codes */ +#define SRE_ERROR_ILLEGAL -1 /* illegal opcode */ +#define SRE_ERROR_STATE -2 /* illegal state */ +#define SRE_ERROR_RECURSION_LIMIT -3 /* runaway recursion */ +#define SRE_ERROR_MEMORY -9 /* out of memory */ +#define SRE_ERROR_INTERRUPTED -10 /* signal handler raised exception */ + +#if defined(VERBOSE) +#define TRACE(v) printf v +#else +#define TRACE(v) +#endif + +/* -------------------------------------------------------------------- */ +/* search engine state */ + +/* default character predicates (run sre_chars.py to regenerate tables) */ + +#define SRE_DIGIT_MASK 1 +#define SRE_SPACE_MASK 2 +#define SRE_LINEBREAK_MASK 4 +#define SRE_ALNUM_MASK 8 +#define SRE_WORD_MASK 16 + +/* FIXME: this assumes ASCII. create tables in init_sre() instead */ + +static char sre_char_info[128] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 6, 2, +2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25, 25, 25, 25, 25, 25, 25, 25, +25, 25, 0, 0, 0, 0, 0, 0, 0, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, +24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0, +0, 0, 16, 0, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, +24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0, 0, 0, 0 }; + +static char sre_char_lower[128] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, +10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, +27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, +44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, +61, 62, 63, 64, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, +108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, +122, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, +106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, +120, 121, 122, 123, 124, 125, 126, 127 }; + +#define SRE_IS_DIGIT(ch)\ + ((ch) < 128 ? (sre_char_info[(ch)] & SRE_DIGIT_MASK) : 0) +#define SRE_IS_SPACE(ch)\ + ((ch) < 128 ? (sre_char_info[(ch)] & SRE_SPACE_MASK) : 0) +#define SRE_IS_LINEBREAK(ch)\ + ((ch) < 128 ? (sre_char_info[(ch)] & SRE_LINEBREAK_MASK) : 0) +#define SRE_IS_ALNUM(ch)\ + ((ch) < 128 ? (sre_char_info[(ch)] & SRE_ALNUM_MASK) : 0) +#define SRE_IS_WORD(ch)\ + ((ch) < 128 ? (sre_char_info[(ch)] & SRE_WORD_MASK) : 0) + +static unsigned int sre_lower(unsigned int ch) +{ + return ((ch) < 128 ? (unsigned int)sre_char_lower[ch] : ch); +} + +/* locale-specific character predicates */ +/* !(c & ~N) == (c < N+1) for any unsigned c, this avoids + * warnings when c's type supports only numbers < N+1 */ +#define SRE_LOC_IS_DIGIT(ch) (!((ch) & ~255) ? isdigit((ch)) : 0) +#define SRE_LOC_IS_SPACE(ch) (!((ch) & ~255) ? isspace((ch)) : 0) +#define SRE_LOC_IS_LINEBREAK(ch) ((ch) == '\n') +#define SRE_LOC_IS_ALNUM(ch) (!((ch) & ~255) ? isalnum((ch)) : 0) +#define SRE_LOC_IS_WORD(ch) (SRE_LOC_IS_ALNUM((ch)) || (ch) == '_') + +static unsigned int sre_lower_locale(unsigned int ch) +{ + return ((ch) < 256 ? (unsigned int)tolower((ch)) : ch); +} + +/* unicode-specific character predicates */ + +#if defined(HAVE_UNICODE) + +#define SRE_UNI_IS_DIGIT(ch) Py_UNICODE_ISDECIMAL((Py_UNICODE)(ch)) +#define SRE_UNI_IS_SPACE(ch) Py_UNICODE_ISSPACE((Py_UNICODE)(ch)) +#define SRE_UNI_IS_LINEBREAK(ch) Py_UNICODE_ISLINEBREAK((Py_UNICODE)(ch)) +#define SRE_UNI_IS_ALNUM(ch) Py_UNICODE_ISALNUM((Py_UNICODE)(ch)) +#define SRE_UNI_IS_WORD(ch) (SRE_UNI_IS_ALNUM((ch)) || (ch) == '_') + +static unsigned int sre_lower_unicode(unsigned int ch) +{ + return (unsigned int) Py_UNICODE_TOLOWER((Py_UNICODE)(ch)); +} + +#endif + +LOCAL(int) +sre_category(SRE_CODE category, unsigned int ch) +{ + switch (category) { + + case SRE_CATEGORY_DIGIT: + return SRE_IS_DIGIT(ch); + case SRE_CATEGORY_NOT_DIGIT: + return !SRE_IS_DIGIT(ch); + case SRE_CATEGORY_SPACE: + return SRE_IS_SPACE(ch); + case SRE_CATEGORY_NOT_SPACE: + return !SRE_IS_SPACE(ch); + case SRE_CATEGORY_WORD: + return SRE_IS_WORD(ch); + case SRE_CATEGORY_NOT_WORD: + return !SRE_IS_WORD(ch); + case SRE_CATEGORY_LINEBREAK: + return SRE_IS_LINEBREAK(ch); + case SRE_CATEGORY_NOT_LINEBREAK: + return !SRE_IS_LINEBREAK(ch); + + case SRE_CATEGORY_LOC_WORD: + return SRE_LOC_IS_WORD(ch); + case SRE_CATEGORY_LOC_NOT_WORD: + return !SRE_LOC_IS_WORD(ch); + +#if defined(HAVE_UNICODE) + case SRE_CATEGORY_UNI_DIGIT: + return SRE_UNI_IS_DIGIT(ch); + case SRE_CATEGORY_UNI_NOT_DIGIT: + return !SRE_UNI_IS_DIGIT(ch); + case SRE_CATEGORY_UNI_SPACE: + return SRE_UNI_IS_SPACE(ch); + case SRE_CATEGORY_UNI_NOT_SPACE: + return !SRE_UNI_IS_SPACE(ch); + case SRE_CATEGORY_UNI_WORD: + return SRE_UNI_IS_WORD(ch); + case SRE_CATEGORY_UNI_NOT_WORD: + return !SRE_UNI_IS_WORD(ch); + case SRE_CATEGORY_UNI_LINEBREAK: + return SRE_UNI_IS_LINEBREAK(ch); + case SRE_CATEGORY_UNI_NOT_LINEBREAK: + return !SRE_UNI_IS_LINEBREAK(ch); +#else + case SRE_CATEGORY_UNI_DIGIT: + return SRE_IS_DIGIT(ch); + case SRE_CATEGORY_UNI_NOT_DIGIT: + return !SRE_IS_DIGIT(ch); + case SRE_CATEGORY_UNI_SPACE: + return SRE_IS_SPACE(ch); + case SRE_CATEGORY_UNI_NOT_SPACE: + return !SRE_IS_SPACE(ch); + case SRE_CATEGORY_UNI_WORD: + return SRE_LOC_IS_WORD(ch); + case SRE_CATEGORY_UNI_NOT_WORD: + return !SRE_LOC_IS_WORD(ch); + case SRE_CATEGORY_UNI_LINEBREAK: + return SRE_IS_LINEBREAK(ch); + case SRE_CATEGORY_UNI_NOT_LINEBREAK: + return !SRE_IS_LINEBREAK(ch); +#endif + } + return 0; +} + +/* helpers */ + +static void +data_stack_dealloc(SRE_STATE* state) +{ + if (state->data_stack) { + PyMem_FREE(state->data_stack); + state->data_stack = NULL; + } + state->data_stack_size = state->data_stack_base = 0; +} + +static int +data_stack_grow(SRE_STATE* state, Py_ssize_t size) +{ + Py_ssize_t minsize, cursize; + minsize = state->data_stack_base+size; + cursize = state->data_stack_size; + if (cursize < minsize) { + void* stack; + cursize = minsize+minsize/4+1024; + TRACE(("allocate/grow stack %d\n", cursize)); + stack = PyMem_REALLOC(state->data_stack, cursize); + if (!stack) { + data_stack_dealloc(state); + return SRE_ERROR_MEMORY; + } + state->data_stack = (char *)stack; + state->data_stack_size = cursize; + } + return 0; +} + +/* generate 8-bit version */ + +#define SRE_CHAR unsigned char +#define SRE_AT sre_at +#define SRE_COUNT sre_count +#define SRE_CHARSET sre_charset +#define SRE_INFO sre_info +#define SRE_MATCH sre_match +#define SRE_MATCH_CONTEXT sre_match_context +#define SRE_SEARCH sre_search +#define SRE_LITERAL_TEMPLATE sre_literal_template + +#if defined(HAVE_UNICODE) + +#define SRE_RECURSIVE +#include "_sre.c" +#undef SRE_RECURSIVE + +#undef SRE_LITERAL_TEMPLATE +#undef SRE_SEARCH +#undef SRE_MATCH +#undef SRE_MATCH_CONTEXT +#undef SRE_INFO +#undef SRE_CHARSET +#undef SRE_COUNT +#undef SRE_AT +#undef SRE_CHAR + +/* generate 16-bit unicode version */ + +#define SRE_CHAR Py_UNICODE +#define SRE_AT sre_uat +#define SRE_COUNT sre_ucount +#define SRE_CHARSET sre_ucharset +#define SRE_INFO sre_uinfo +#define SRE_MATCH sre_umatch +#define SRE_MATCH_CONTEXT sre_umatch_context +#define SRE_SEARCH sre_usearch +#define SRE_LITERAL_TEMPLATE sre_uliteral_template +#endif + +#endif /* SRE_RECURSIVE */ + +/* -------------------------------------------------------------------- */ +/* String matching engine */ + +/* the following section is compiled twice, with different character + settings */ + +LOCAL(int) +SRE_AT(SRE_STATE* state, SRE_CHAR* ptr, SRE_CODE at) +{ + /* check if pointer is at given position */ + + Py_ssize_t thisp, thatp; + + switch (at) { + + case SRE_AT_BEGINNING: + case SRE_AT_BEGINNING_STRING: + return ((void*) ptr == state->beginning); + + case SRE_AT_BEGINNING_LINE: + return ((void*) ptr == state->beginning || + SRE_IS_LINEBREAK((int) ptr[-1])); + + case SRE_AT_END: + return (((void*) (ptr+1) == state->end && + SRE_IS_LINEBREAK((int) ptr[0])) || + ((void*) ptr == state->end)); + + case SRE_AT_END_LINE: + return ((void*) ptr == state->end || + SRE_IS_LINEBREAK((int) ptr[0])); + + case SRE_AT_END_STRING: + return ((void*) ptr == state->end); + + case SRE_AT_BOUNDARY: + if (state->beginning == state->end) + return 0; + thatp = ((void*) ptr > state->beginning) ? + SRE_IS_WORD((int) ptr[-1]) : 0; + thisp = ((void*) ptr < state->end) ? + SRE_IS_WORD((int) ptr[0]) : 0; + return thisp != thatp; + + case SRE_AT_NON_BOUNDARY: + if (state->beginning == state->end) + return 0; + thatp = ((void*) ptr > state->beginning) ? + SRE_IS_WORD((int) ptr[-1]) : 0; + thisp = ((void*) ptr < state->end) ? + SRE_IS_WORD((int) ptr[0]) : 0; + return thisp == thatp; + + case SRE_AT_LOC_BOUNDARY: + if (state->beginning == state->end) + return 0; + thatp = ((void*) ptr > state->beginning) ? + SRE_LOC_IS_WORD((int) ptr[-1]) : 0; + thisp = ((void*) ptr < state->end) ? + SRE_LOC_IS_WORD((int) ptr[0]) : 0; + return thisp != thatp; + + case SRE_AT_LOC_NON_BOUNDARY: + if (state->beginning == state->end) + return 0; + thatp = ((void*) ptr > state->beginning) ? + SRE_LOC_IS_WORD((int) ptr[-1]) : 0; + thisp = ((void*) ptr < state->end) ? + SRE_LOC_IS_WORD((int) ptr[0]) : 0; + return thisp == thatp; + +#if defined(HAVE_UNICODE) + case SRE_AT_UNI_BOUNDARY: + if (state->beginning == state->end) + return 0; + thatp = ((void*) ptr > state->beginning) ? + SRE_UNI_IS_WORD((int) ptr[-1]) : 0; + thisp = ((void*) ptr < state->end) ? + SRE_UNI_IS_WORD((int) ptr[0]) : 0; + return thisp != thatp; + + case SRE_AT_UNI_NON_BOUNDARY: + if (state->beginning == state->end) + return 0; + thatp = ((void*) ptr > state->beginning) ? + SRE_UNI_IS_WORD((int) ptr[-1]) : 0; + thisp = ((void*) ptr < state->end) ? + SRE_UNI_IS_WORD((int) ptr[0]) : 0; + return thisp == thatp; +#endif + + } + + return 0; +} + +LOCAL(int) +SRE_CHARSET(SRE_CODE* set, SRE_CODE ch) +{ + /* check if character is a member of the given set */ + + int ok = 1; + + for (;;) { + switch (*set++) { + + case SRE_OP_FAILURE: + return !ok; + + case SRE_OP_LITERAL: + /* */ + if (ch == set[0]) + return ok; + set++; + break; + + case SRE_OP_CATEGORY: + /* */ + if (sre_category(set[0], (int) ch)) + return ok; + set += 1; + break; + + case SRE_OP_CHARSET: + if (sizeof(SRE_CODE) == 2) { + /* (16 bits per code word) */ + if (ch < 256 && (set[ch >> 4] & (1 << (ch & 15)))) + return ok; + set += 16; + } + else { + /* (32 bits per code word) */ + if (ch < 256 && (set[ch >> 5] & (1 << (ch & 31)))) + return ok; + set += 8; + } + break; + + case SRE_OP_RANGE: + /* */ + if (set[0] <= ch && ch <= set[1]) + return ok; + set += 2; + break; + + case SRE_OP_NEGATE: + ok = !ok; + break; + + case SRE_OP_BIGCHARSET: + /* <256 blockindices> */ + { + Py_ssize_t count, block; + count = *(set++); + + if (sizeof(SRE_CODE) == 2) { + block = ((unsigned char*)set)[ch >> 8]; + set += 128; + if (set[block*16 + ((ch & 255)>>4)] & (1 << (ch & 15))) + return ok; + set += count*16; + } + else { + /* !(c & ~N) == (c < N+1) for any unsigned c, this avoids + * warnings when c's type supports only numbers < N+1 */ + if (!(ch & ~65535)) + block = ((unsigned char*)set)[ch >> 8]; + else + block = -1; + set += 64; + if (block >=0 && + (set[block*8 + ((ch & 255)>>5)] & (1 << (ch & 31)))) + return ok; + set += count*8; + } + break; + } + + default: + /* internal error -- there's not much we can do about it + here, so let's just pretend it didn't match... */ + return 0; + } + } +} + +LOCAL(Py_ssize_t) SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern); + +LOCAL(Py_ssize_t) +SRE_COUNT(SRE_STATE* state, SRE_CODE* pattern, Py_ssize_t maxcount) +{ + SRE_CODE chr; + SRE_CHAR* ptr = (SRE_CHAR *)state->ptr; + SRE_CHAR* end = (SRE_CHAR *)state->end; + Py_ssize_t i; + + /* adjust end */ + if (maxcount < end - ptr && maxcount != 65535) + end = ptr + maxcount; + + switch (pattern[0]) { + + case SRE_OP_IN: + /* repeated set */ + TRACE(("|%p|%p|COUNT IN\n", pattern, ptr)); + while (ptr < end && SRE_CHARSET(pattern + 2, *ptr)) + ptr++; + break; + + case SRE_OP_ANY: + /* repeated dot wildcard. */ + TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr)); + while (ptr < end && !SRE_IS_LINEBREAK(*ptr)) + ptr++; + break; + + case SRE_OP_ANY_ALL: + /* repeated dot wildcard. skip to the end of the target + string, and backtrack from there */ + TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr)); + ptr = end; + break; + + case SRE_OP_LITERAL: + /* repeated literal */ + chr = pattern[1]; + TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr)); + while (ptr < end && (SRE_CODE) *ptr == chr) + ptr++; + break; + + case SRE_OP_LITERAL_IGNORE: + /* repeated literal */ + chr = pattern[1]; + TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr)); + while (ptr < end && (SRE_CODE) state->lower(*ptr) == chr) + ptr++; + break; + + case SRE_OP_NOT_LITERAL: + /* repeated non-literal */ + chr = pattern[1]; + TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr)); + while (ptr < end && (SRE_CODE) *ptr != chr) + ptr++; + break; + + case SRE_OP_NOT_LITERAL_IGNORE: + /* repeated non-literal */ + chr = pattern[1]; + TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr)); + while (ptr < end && (SRE_CODE) state->lower(*ptr) != chr) + ptr++; + break; + + default: + /* repeated single character pattern */ + TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr)); + while ((SRE_CHAR*) state->ptr < end) { + i = SRE_MATCH(state, pattern); + if (i < 0) + return i; + if (!i) + break; + } + TRACE(("|%p|%p|COUNT %d\n", pattern, ptr, + (SRE_CHAR*) state->ptr - ptr)); + return (SRE_CHAR*) state->ptr - ptr; + } + + TRACE(("|%p|%p|COUNT %d\n", pattern, ptr, ptr - (SRE_CHAR*) state->ptr)); + return ptr - (SRE_CHAR*) state->ptr; +} + +#if 0 /* not used in this release */ +LOCAL(int) +SRE_INFO(SRE_STATE* state, SRE_CODE* pattern) +{ + /* check if an SRE_OP_INFO block matches at the current position. + returns the number of SRE_CODE objects to skip if successful, 0 + if no match */ + + SRE_CHAR* end = state->end; + SRE_CHAR* ptr = state->ptr; + Py_ssize_t i; + + /* check minimal length */ + if (pattern[3] && (end - ptr) < pattern[3]) + return 0; + + /* check known prefix */ + if (pattern[2] & SRE_INFO_PREFIX && pattern[5] > 1) { + /* */ + for (i = 0; i < pattern[5]; i++) + if ((SRE_CODE) ptr[i] != pattern[7 + i]) + return 0; + return pattern[0] + 2 * pattern[6]; + } + return pattern[0]; +} +#endif + +/* The macros below should be used to protect recursive SRE_MATCH() + * calls that *failed* and do *not* return immediately (IOW, those + * that will backtrack). Explaining: + * + * - Recursive SRE_MATCH() returned true: that's usually a success + * (besides atypical cases like ASSERT_NOT), therefore there's no + * reason to restore lastmark; + * + * - Recursive SRE_MATCH() returned false but the current SRE_MATCH() + * is returning to the caller: If the current SRE_MATCH() is the + * top function of the recursion, returning false will be a matching + * failure, and it doesn't matter where lastmark is pointing to. + * If it's *not* the top function, it will be a recursive SRE_MATCH() + * failure by itself, and the calling SRE_MATCH() will have to deal + * with the failure by the same rules explained here (it will restore + * lastmark by itself if necessary); + * + * - Recursive SRE_MATCH() returned false, and will continue the + * outside 'for' loop: must be protected when breaking, since the next + * OP could potentially depend on lastmark; + * + * - Recursive SRE_MATCH() returned false, and will be called again + * inside a local for/while loop: must be protected between each + * loop iteration, since the recursive SRE_MATCH() could do anything, + * and could potentially depend on lastmark. + * + * For more information, check the discussion at SF patch #712900. + */ +#define LASTMARK_SAVE() \ + do { \ + ctx->lastmark = state->lastmark; \ + ctx->lastindex = state->lastindex; \ + } while (0) +#define LASTMARK_RESTORE() \ + do { \ + state->lastmark = ctx->lastmark; \ + state->lastindex = ctx->lastindex; \ + } while (0) + +#define RETURN_ERROR(i) do { return i; } while(0) +#define RETURN_FAILURE do { ret = 0; goto exit; } while(0) +#define RETURN_SUCCESS do { ret = 1; goto exit; } while(0) + +#define RETURN_ON_ERROR(i) \ + do { if (i < 0) RETURN_ERROR(i); } while (0) +#define RETURN_ON_SUCCESS(i) \ + do { RETURN_ON_ERROR(i); if (i > 0) RETURN_SUCCESS; } while (0) +#define RETURN_ON_FAILURE(i) \ + do { RETURN_ON_ERROR(i); if (i == 0) RETURN_FAILURE; } while (0) + +#define SFY(x) #x + +#define DATA_STACK_ALLOC(state, type, ptr) \ +do { \ + alloc_pos = state->data_stack_base; \ + TRACE(("allocating %s in %d (%d)\n", \ + SFY(type), alloc_pos, sizeof(type))); \ + if (state->data_stack_size < alloc_pos+sizeof(type)) { \ + int j = data_stack_grow(state, sizeof(type)); \ + if (j < 0) return j; \ + if (ctx_pos != -1) \ + DATA_STACK_LOOKUP_AT(state, SRE_MATCH_CONTEXT, ctx, ctx_pos); \ + } \ + ptr = (type*)(state->data_stack+alloc_pos); \ + state->data_stack_base += sizeof(type); \ +} while (0) + +#define DATA_STACK_LOOKUP_AT(state, type, ptr, pos) \ +do { \ + TRACE(("looking up %s at %d\n", SFY(type), pos)); \ + ptr = (type*)(state->data_stack+pos); \ +} while (0) + +#define DATA_STACK_PUSH(state, data, size) \ +do { \ + TRACE(("copy data in %p to %d (%d)\n", \ + data, state->data_stack_base, size)); \ + if (state->data_stack_size < state->data_stack_base+size) { \ + int j = data_stack_grow(state, size); \ + if (j < 0) return j; \ + if (ctx_pos != -1) \ + DATA_STACK_LOOKUP_AT(state, SRE_MATCH_CONTEXT, ctx, ctx_pos); \ + } \ + memcpy(state->data_stack+state->data_stack_base, data, size); \ + state->data_stack_base += size; \ +} while (0) + +#define DATA_STACK_POP(state, data, size, discard) \ +do { \ + TRACE(("copy data to %p from %d (%d)\n", \ + data, state->data_stack_base-size, size)); \ + memcpy(data, state->data_stack+state->data_stack_base-size, size); \ + if (discard) \ + state->data_stack_base -= size; \ +} while (0) + +#define DATA_STACK_POP_DISCARD(state, size) \ +do { \ + TRACE(("discard data from %d (%d)\n", \ + state->data_stack_base-size, size)); \ + state->data_stack_base -= size; \ +} while(0) + +#define DATA_PUSH(x) \ + DATA_STACK_PUSH(state, (x), sizeof(*(x))) +#define DATA_POP(x) \ + DATA_STACK_POP(state, (x), sizeof(*(x)), 1) +#define DATA_POP_DISCARD(x) \ + DATA_STACK_POP_DISCARD(state, sizeof(*(x))) +#define DATA_ALLOC(t,p) \ + DATA_STACK_ALLOC(state, t, p) +#define DATA_LOOKUP_AT(t,p,pos) \ + DATA_STACK_LOOKUP_AT(state,t,p,pos) + +#define MARK_PUSH(lastmark) \ + do if (lastmark > 0) { \ + i = lastmark; /* ctx->lastmark may change if reallocated */ \ + DATA_STACK_PUSH(state, state->mark, (i+1)*sizeof(void*)); \ + } while (0) +#define MARK_POP(lastmark) \ + do if (lastmark > 0) { \ + DATA_STACK_POP(state, state->mark, (lastmark+1)*sizeof(void*), 1); \ + } while (0) +#define MARK_POP_KEEP(lastmark) \ + do if (lastmark > 0) { \ + DATA_STACK_POP(state, state->mark, (lastmark+1)*sizeof(void*), 0); \ + } while (0) +#define MARK_POP_DISCARD(lastmark) \ + do if (lastmark > 0) { \ + DATA_STACK_POP_DISCARD(state, (lastmark+1)*sizeof(void*)); \ + } while (0) + +#define JUMP_NONE 0 +#define JUMP_MAX_UNTIL_1 1 +#define JUMP_MAX_UNTIL_2 2 +#define JUMP_MAX_UNTIL_3 3 +#define JUMP_MIN_UNTIL_1 4 +#define JUMP_MIN_UNTIL_2 5 +#define JUMP_MIN_UNTIL_3 6 +#define JUMP_REPEAT 7 +#define JUMP_REPEAT_ONE_1 8 +#define JUMP_REPEAT_ONE_2 9 +#define JUMP_MIN_REPEAT_ONE 10 +#define JUMP_BRANCH 11 +#define JUMP_ASSERT 12 +#define JUMP_ASSERT_NOT 13 + +#define DO_JUMP(jumpvalue, jumplabel, nextpattern) \ + DATA_ALLOC(SRE_MATCH_CONTEXT, nextctx); \ + nextctx->last_ctx_pos = ctx_pos; \ + nextctx->jump = jumpvalue; \ + nextctx->pattern = nextpattern; \ + ctx_pos = alloc_pos; \ + ctx = nextctx; \ + goto entrance; \ + jumplabel: \ + while (0) /* gcc doesn't like labels at end of scopes */ \ + +typedef struct { + Py_ssize_t last_ctx_pos; + Py_ssize_t jump; + SRE_CHAR* ptr; + SRE_CODE* pattern; + Py_ssize_t count; + Py_ssize_t lastmark; + Py_ssize_t lastindex; + union { + SRE_CODE chr; + SRE_REPEAT* rep; + } u; +} SRE_MATCH_CONTEXT; + +/* check if string matches the given pattern. returns <0 for + error, 0 for failure, and 1 for success */ +LOCAL(Py_ssize_t) +SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern) +{ + SRE_CHAR* end = (SRE_CHAR *)state->end; + Py_ssize_t alloc_pos, ctx_pos = -1; + Py_ssize_t i, ret = 0; + Py_ssize_t jump; + unsigned int sigcount=0; + + SRE_MATCH_CONTEXT* ctx; + SRE_MATCH_CONTEXT* nextctx; + + TRACE(("|%p|%p|ENTER\n", pattern, state->ptr)); + + DATA_ALLOC(SRE_MATCH_CONTEXT, ctx); + ctx->last_ctx_pos = -1; + ctx->jump = JUMP_NONE; + ctx->pattern = pattern; + ctx_pos = alloc_pos; + +entrance: + + ctx->ptr = (SRE_CHAR *)state->ptr; + + if (ctx->pattern[0] == SRE_OP_INFO) { + /* optimization info block */ + /* <1=skip> <2=flags> <3=min> ... */ + if (ctx->pattern[3] && (end - ctx->ptr) < ctx->pattern[3]) { + TRACE(("reject (got %d chars, need %d)\n", + (end - ctx->ptr), ctx->pattern[3])); + RETURN_FAILURE; + } + ctx->pattern += ctx->pattern[1] + 1; + } + + for (;;) { + ++sigcount; + if ((0 == (sigcount & 0xfff)) && PyErr_CheckSignals()) + RETURN_ERROR(SRE_ERROR_INTERRUPTED); + + switch (*ctx->pattern++) { + + case SRE_OP_MARK: + /* set mark */ + /* */ + TRACE(("|%p|%p|MARK %d\n", ctx->pattern, + ctx->ptr, ctx->pattern[0])); + i = ctx->pattern[0]; + if (i & 1) + state->lastindex = i/2 + 1; + if (i > state->lastmark) { + /* state->lastmark is the highest valid index in the + state->mark array. If it is increased by more than 1, + the intervening marks must be set to NULL to signal + that these marks have not been encountered. */ + Py_ssize_t j = state->lastmark + 1; + while (j < i) + state->mark[j++] = NULL; + state->lastmark = i; + } + state->mark[i] = ctx->ptr; + ctx->pattern++; + break; + + case SRE_OP_LITERAL: + /* match literal string */ + /* */ + TRACE(("|%p|%p|LITERAL %d\n", ctx->pattern, + ctx->ptr, *ctx->pattern)); + if (ctx->ptr >= end || (SRE_CODE) ctx->ptr[0] != ctx->pattern[0]) + RETURN_FAILURE; + ctx->pattern++; + ctx->ptr++; + break; + + case SRE_OP_NOT_LITERAL: + /* match anything that is not literal character */ + /* */ + TRACE(("|%p|%p|NOT_LITERAL %d\n", ctx->pattern, + ctx->ptr, *ctx->pattern)); + if (ctx->ptr >= end || (SRE_CODE) ctx->ptr[0] == ctx->pattern[0]) + RETURN_FAILURE; + ctx->pattern++; + ctx->ptr++; + break; + + case SRE_OP_SUCCESS: + /* end of pattern */ + TRACE(("|%p|%p|SUCCESS\n", ctx->pattern, ctx->ptr)); + state->ptr = ctx->ptr; + RETURN_SUCCESS; + + case SRE_OP_AT: + /* match at given position */ + /* */ + TRACE(("|%p|%p|AT %d\n", ctx->pattern, ctx->ptr, *ctx->pattern)); + if (!SRE_AT(state, ctx->ptr, *ctx->pattern)) + RETURN_FAILURE; + ctx->pattern++; + break; + + case SRE_OP_CATEGORY: + /* match at given category */ + /* */ + TRACE(("|%p|%p|CATEGORY %d\n", ctx->pattern, + ctx->ptr, *ctx->pattern)); + if (ctx->ptr >= end || !sre_category(ctx->pattern[0], ctx->ptr[0])) + RETURN_FAILURE; + ctx->pattern++; + ctx->ptr++; + break; + + case SRE_OP_ANY: + /* match anything (except a newline) */ + /* */ + TRACE(("|%p|%p|ANY\n", ctx->pattern, ctx->ptr)); + if (ctx->ptr >= end || SRE_IS_LINEBREAK(ctx->ptr[0])) + RETURN_FAILURE; + ctx->ptr++; + break; + + case SRE_OP_ANY_ALL: + /* match anything */ + /* */ + TRACE(("|%p|%p|ANY_ALL\n", ctx->pattern, ctx->ptr)); + if (ctx->ptr >= end) + RETURN_FAILURE; + ctx->ptr++; + break; + + case SRE_OP_IN: + /* match set member (or non_member) */ + /* */ + TRACE(("|%p|%p|IN\n", ctx->pattern, ctx->ptr)); + if (ctx->ptr >= end || !SRE_CHARSET(ctx->pattern + 1, *ctx->ptr)) + RETURN_FAILURE; + ctx->pattern += ctx->pattern[0]; + ctx->ptr++; + break; + + case SRE_OP_LITERAL_IGNORE: + TRACE(("|%p|%p|LITERAL_IGNORE %d\n", + ctx->pattern, ctx->ptr, ctx->pattern[0])); + if (ctx->ptr >= end || + state->lower(*ctx->ptr) != state->lower(*ctx->pattern)) + RETURN_FAILURE; + ctx->pattern++; + ctx->ptr++; + break; + + case SRE_OP_NOT_LITERAL_IGNORE: + TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n", + ctx->pattern, ctx->ptr, *ctx->pattern)); + if (ctx->ptr >= end || + state->lower(*ctx->ptr) == state->lower(*ctx->pattern)) + RETURN_FAILURE; + ctx->pattern++; + ctx->ptr++; + break; + + case SRE_OP_IN_IGNORE: + TRACE(("|%p|%p|IN_IGNORE\n", ctx->pattern, ctx->ptr)); + if (ctx->ptr >= end + || !SRE_CHARSET(ctx->pattern+1, + (SRE_CODE)state->lower(*ctx->ptr))) + RETURN_FAILURE; + ctx->pattern += ctx->pattern[0]; + ctx->ptr++; + break; + + case SRE_OP_JUMP: + case SRE_OP_INFO: + /* jump forward */ + /* */ + TRACE(("|%p|%p|JUMP %d\n", ctx->pattern, + ctx->ptr, ctx->pattern[0])); + ctx->pattern += ctx->pattern[0]; + break; + + case SRE_OP_BRANCH: + /* alternation */ + /* <0=skip> code ... */ + TRACE(("|%p|%p|BRANCH\n", ctx->pattern, ctx->ptr)); + LASTMARK_SAVE(); + ctx->u.rep = state->repeat; + if (ctx->u.rep) + MARK_PUSH(ctx->lastmark); + for (; ctx->pattern[0]; ctx->pattern += ctx->pattern[0]) { + if (ctx->pattern[1] == SRE_OP_LITERAL && + (ctx->ptr >= end || + (SRE_CODE) *ctx->ptr != ctx->pattern[2])) + continue; + if (ctx->pattern[1] == SRE_OP_IN && + (ctx->ptr >= end || + !SRE_CHARSET(ctx->pattern + 3, (SRE_CODE) *ctx->ptr))) + continue; + state->ptr = ctx->ptr; + DO_JUMP(JUMP_BRANCH, jump_branch, ctx->pattern+1); + if (ret) { + if (ctx->u.rep) + MARK_POP_DISCARD(ctx->lastmark); + RETURN_ON_ERROR(ret); + RETURN_SUCCESS; + } + if (ctx->u.rep) + MARK_POP_KEEP(ctx->lastmark); + LASTMARK_RESTORE(); + } + if (ctx->u.rep) + MARK_POP_DISCARD(ctx->lastmark); + RETURN_FAILURE; + + case SRE_OP_REPEAT_ONE: + /* match repeated sequence (maximizing regexp) */ + + /* this operator only works if the repeated item is + exactly one character wide, and we're not already + collecting backtracking points. for other cases, + use the MAX_REPEAT operator */ + + /* <1=min> <2=max> item tail */ + + TRACE(("|%p|%p|REPEAT_ONE %d %d\n", ctx->pattern, ctx->ptr, + ctx->pattern[1], ctx->pattern[2])); + + if (ctx->ptr + ctx->pattern[1] > end) + RETURN_FAILURE; /* cannot match */ + + state->ptr = ctx->ptr; + + ret = SRE_COUNT(state, ctx->pattern+3, ctx->pattern[2]); + RETURN_ON_ERROR(ret); + DATA_LOOKUP_AT(SRE_MATCH_CONTEXT, ctx, ctx_pos); + ctx->count = ret; + ctx->ptr += ctx->count; + + /* when we arrive here, count contains the number of + matches, and ctx->ptr points to the tail of the target + string. check if the rest of the pattern matches, + and backtrack if not. */ + + if (ctx->count < (Py_ssize_t) ctx->pattern[1]) + RETURN_FAILURE; + + if (ctx->pattern[ctx->pattern[0]] == SRE_OP_SUCCESS) { + /* tail is empty. we're finished */ + state->ptr = ctx->ptr; + RETURN_SUCCESS; + } + + LASTMARK_SAVE(); + + if (ctx->pattern[ctx->pattern[0]] == SRE_OP_LITERAL) { + /* tail starts with a literal. skip positions where + the rest of the pattern cannot possibly match */ + ctx->u.chr = ctx->pattern[ctx->pattern[0]+1]; + for (;;) { + while (ctx->count >= (Py_ssize_t) ctx->pattern[1] && + (ctx->ptr >= end || *ctx->ptr != ctx->u.chr)) { + ctx->ptr--; + ctx->count--; + } + if (ctx->count < (Py_ssize_t) ctx->pattern[1]) + break; + state->ptr = ctx->ptr; + DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1, + ctx->pattern+ctx->pattern[0]); + if (ret) { + RETURN_ON_ERROR(ret); + RETURN_SUCCESS; + } + + LASTMARK_RESTORE(); + + ctx->ptr--; + ctx->count--; + } + + } else { + /* general case */ + while (ctx->count >= (Py_ssize_t) ctx->pattern[1]) { + state->ptr = ctx->ptr; + DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2, + ctx->pattern+ctx->pattern[0]); + if (ret) { + RETURN_ON_ERROR(ret); + RETURN_SUCCESS; + } + ctx->ptr--; + ctx->count--; + LASTMARK_RESTORE(); + } + } + RETURN_FAILURE; + + case SRE_OP_MIN_REPEAT_ONE: + /* match repeated sequence (minimizing regexp) */ + + /* this operator only works if the repeated item is + exactly one character wide, and we're not already + collecting backtracking points. for other cases, + use the MIN_REPEAT operator */ + + /* <1=min> <2=max> item tail */ + + TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", ctx->pattern, ctx->ptr, + ctx->pattern[1], ctx->pattern[2])); + + if (ctx->ptr + ctx->pattern[1] > end) + RETURN_FAILURE; /* cannot match */ + + state->ptr = ctx->ptr; + + if (ctx->pattern[1] == 0) + ctx->count = 0; + else { + /* count using pattern min as the maximum */ + ret = SRE_COUNT(state, ctx->pattern+3, ctx->pattern[1]); + RETURN_ON_ERROR(ret); + DATA_LOOKUP_AT(SRE_MATCH_CONTEXT, ctx, ctx_pos); + if (ret < (Py_ssize_t) ctx->pattern[1]) + /* didn't match minimum number of times */ + RETURN_FAILURE; + /* advance past minimum matches of repeat */ + ctx->count = ret; + ctx->ptr += ctx->count; + } + + if (ctx->pattern[ctx->pattern[0]] == SRE_OP_SUCCESS) { + /* tail is empty. we're finished */ + state->ptr = ctx->ptr; + RETURN_SUCCESS; + + } else { + /* general case */ + LASTMARK_SAVE(); + while ((Py_ssize_t)ctx->pattern[2] == 65535 + || ctx->count <= (Py_ssize_t)ctx->pattern[2]) { + state->ptr = ctx->ptr; + DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one, + ctx->pattern+ctx->pattern[0]); + if (ret) { + RETURN_ON_ERROR(ret); + RETURN_SUCCESS; + } + state->ptr = ctx->ptr; + ret = SRE_COUNT(state, ctx->pattern+3, 1); + RETURN_ON_ERROR(ret); + DATA_LOOKUP_AT(SRE_MATCH_CONTEXT, ctx, ctx_pos); + if (ret == 0) + break; + assert(ret == 1); + ctx->ptr++; + ctx->count++; + LASTMARK_RESTORE(); + } + } + RETURN_FAILURE; + + case SRE_OP_REPEAT: + /* create repeat context. all the hard work is done + by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */ + /* <1=min> <2=max> item tail */ + TRACE(("|%p|%p|REPEAT %d %d\n", ctx->pattern, ctx->ptr, + ctx->pattern[1], ctx->pattern[2])); + + /* install new repeat context */ + ctx->u.rep = (SRE_REPEAT*) PyObject_MALLOC(sizeof(*ctx->u.rep)); + if (!ctx->u.rep) { + PyErr_NoMemory(); + RETURN_FAILURE; + } + ctx->u.rep->count = -1; + ctx->u.rep->pattern = ctx->pattern; + ctx->u.rep->prev = state->repeat; + ctx->u.rep->last_ptr = NULL; + state->repeat = ctx->u.rep; + + state->ptr = ctx->ptr; + DO_JUMP(JUMP_REPEAT, jump_repeat, ctx->pattern+ctx->pattern[0]); + state->repeat = ctx->u.rep->prev; + PyObject_FREE(ctx->u.rep); + + if (ret) { + RETURN_ON_ERROR(ret); + RETURN_SUCCESS; + } + RETURN_FAILURE; + + case SRE_OP_MAX_UNTIL: + /* maximizing repeat */ + /* <1=min> <2=max> item tail */ + + /* FIXME: we probably need to deal with zero-width + matches in here... */ + + ctx->u.rep = state->repeat; + if (!ctx->u.rep) + RETURN_ERROR(SRE_ERROR_STATE); + + state->ptr = ctx->ptr; + + ctx->count = ctx->u.rep->count+1; + + TRACE(("|%p|%p|MAX_UNTIL %d\n", ctx->pattern, + ctx->ptr, ctx->count)); + + if (ctx->count < ctx->u.rep->pattern[1]) { + /* not enough matches */ + ctx->u.rep->count = ctx->count; + DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1, + ctx->u.rep->pattern+3); + if (ret) { + RETURN_ON_ERROR(ret); + RETURN_SUCCESS; + } + ctx->u.rep->count = ctx->count-1; + state->ptr = ctx->ptr; + RETURN_FAILURE; + } + + if ((ctx->count < ctx->u.rep->pattern[2] || + ctx->u.rep->pattern[2] == 65535) && + state->ptr != ctx->u.rep->last_ptr) { + /* we may have enough matches, but if we can + match another item, do so */ + ctx->u.rep->count = ctx->count; + LASTMARK_SAVE(); + MARK_PUSH(ctx->lastmark); + /* zero-width match protection */ + DATA_PUSH(&ctx->u.rep->last_ptr); + ctx->u.rep->last_ptr = state->ptr; + DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2, + ctx->u.rep->pattern+3); + DATA_POP(&ctx->u.rep->last_ptr); + if (ret) { + MARK_POP_DISCARD(ctx->lastmark); + RETURN_ON_ERROR(ret); + RETURN_SUCCESS; + } + MARK_POP(ctx->lastmark); + LASTMARK_RESTORE(); + ctx->u.rep->count = ctx->count-1; + state->ptr = ctx->ptr; + } + + /* cannot match more repeated items here. make sure the + tail matches */ + state->repeat = ctx->u.rep->prev; + DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, ctx->pattern); + RETURN_ON_SUCCESS(ret); + state->repeat = ctx->u.rep; + state->ptr = ctx->ptr; + RETURN_FAILURE; + + case SRE_OP_MIN_UNTIL: + /* minimizing repeat */ + /* <1=min> <2=max> item tail */ + + ctx->u.rep = state->repeat; + if (!ctx->u.rep) + RETURN_ERROR(SRE_ERROR_STATE); + + state->ptr = ctx->ptr; + + ctx->count = ctx->u.rep->count+1; + + TRACE(("|%p|%p|MIN_UNTIL %d %p\n", ctx->pattern, + ctx->ptr, ctx->count, ctx->u.rep->pattern)); + + if (ctx->count < ctx->u.rep->pattern[1]) { + /* not enough matches */ + ctx->u.rep->count = ctx->count; + DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1, + ctx->u.rep->pattern+3); + if (ret) { + RETURN_ON_ERROR(ret); + RETURN_SUCCESS; + } + ctx->u.rep->count = ctx->count-1; + state->ptr = ctx->ptr; + RETURN_FAILURE; + } + + LASTMARK_SAVE(); + + /* see if the tail matches */ + state->repeat = ctx->u.rep->prev; + DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, ctx->pattern); + if (ret) { + RETURN_ON_ERROR(ret); + RETURN_SUCCESS; + } + + state->repeat = ctx->u.rep; + state->ptr = ctx->ptr; + + LASTMARK_RESTORE(); + + if (ctx->count >= ctx->u.rep->pattern[2] + && ctx->u.rep->pattern[2] != 65535) + RETURN_FAILURE; + + ctx->u.rep->count = ctx->count; + DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3, + ctx->u.rep->pattern+3); + if (ret) { + RETURN_ON_ERROR(ret); + RETURN_SUCCESS; + } + ctx->u.rep->count = ctx->count-1; + state->ptr = ctx->ptr; + RETURN_FAILURE; + + case SRE_OP_GROUPREF: + /* match backreference */ + TRACE(("|%p|%p|GROUPREF %d\n", ctx->pattern, + ctx->ptr, ctx->pattern[0])); + i = ctx->pattern[0]; + { + Py_ssize_t groupref = i+i; + if (groupref >= state->lastmark) { + RETURN_FAILURE; + } else { + SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref]; + SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1]; + if (!p || !e || e < p) + RETURN_FAILURE; + while (p < e) { + if (ctx->ptr >= end || *ctx->ptr != *p) + RETURN_FAILURE; + p++; ctx->ptr++; + } + } + } + ctx->pattern++; + break; + + case SRE_OP_GROUPREF_IGNORE: + /* match backreference */ + TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", ctx->pattern, + ctx->ptr, ctx->pattern[0])); + i = ctx->pattern[0]; + { + Py_ssize_t groupref = i+i; + if (groupref >= state->lastmark) { + RETURN_FAILURE; + } else { + SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref]; + SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1]; + if (!p || !e || e < p) + RETURN_FAILURE; + while (p < e) { + if (ctx->ptr >= end || + state->lower(*ctx->ptr) != state->lower(*p)) + RETURN_FAILURE; + p++; ctx->ptr++; + } + } + } + ctx->pattern++; + break; + + case SRE_OP_GROUPREF_EXISTS: + TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", ctx->pattern, + ctx->ptr, ctx->pattern[0])); + /* codeyes codeno ... */ + i = ctx->pattern[0]; + { + Py_ssize_t groupref = i+i; + if (groupref >= state->lastmark) { + ctx->pattern += ctx->pattern[1]; + break; + } else { + SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref]; + SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1]; + if (!p || !e || e < p) { + ctx->pattern += ctx->pattern[1]; + break; + } + } + } + ctx->pattern += 2; + break; + + case SRE_OP_ASSERT: + /* assert subpattern */ + /* */ + TRACE(("|%p|%p|ASSERT %d\n", ctx->pattern, + ctx->ptr, ctx->pattern[1])); + state->ptr = ctx->ptr - ctx->pattern[1]; + if (state->ptr < state->beginning) + RETURN_FAILURE; + DO_JUMP(JUMP_ASSERT, jump_assert, ctx->pattern+2); + RETURN_ON_FAILURE(ret); + ctx->pattern += ctx->pattern[0]; + break; + + case SRE_OP_ASSERT_NOT: + /* assert not subpattern */ + /* */ + TRACE(("|%p|%p|ASSERT_NOT %d\n", ctx->pattern, + ctx->ptr, ctx->pattern[1])); + state->ptr = ctx->ptr - ctx->pattern[1]; + if (state->ptr >= state->beginning) { + DO_JUMP(JUMP_ASSERT_NOT, jump_assert_not, ctx->pattern+2); + if (ret) { + RETURN_ON_ERROR(ret); + RETURN_FAILURE; + } + } + ctx->pattern += ctx->pattern[0]; + break; + + case SRE_OP_FAILURE: + /* immediate failure */ + TRACE(("|%p|%p|FAILURE\n", ctx->pattern, ctx->ptr)); + RETURN_FAILURE; + + default: + TRACE(("|%p|%p|UNKNOWN %d\n", ctx->pattern, ctx->ptr, + ctx->pattern[-1])); + RETURN_ERROR(SRE_ERROR_ILLEGAL); + } + } + +exit: + ctx_pos = ctx->last_ctx_pos; + jump = ctx->jump; + DATA_POP_DISCARD(ctx); + if (ctx_pos == -1) + return ret; + DATA_LOOKUP_AT(SRE_MATCH_CONTEXT, ctx, ctx_pos); + + switch (jump) { + case JUMP_MAX_UNTIL_2: + TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", ctx->pattern, ctx->ptr)); + goto jump_max_until_2; + case JUMP_MAX_UNTIL_3: + TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", ctx->pattern, ctx->ptr)); + goto jump_max_until_3; + case JUMP_MIN_UNTIL_2: + TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", ctx->pattern, ctx->ptr)); + goto jump_min_until_2; + case JUMP_MIN_UNTIL_3: + TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", ctx->pattern, ctx->ptr)); + goto jump_min_until_3; + case JUMP_BRANCH: + TRACE(("|%p|%p|JUMP_BRANCH\n", ctx->pattern, ctx->ptr)); + goto jump_branch; + case JUMP_MAX_UNTIL_1: + TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", ctx->pattern, ctx->ptr)); + goto jump_max_until_1; + case JUMP_MIN_UNTIL_1: + TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", ctx->pattern, ctx->ptr)); + goto jump_min_until_1; + case JUMP_REPEAT: + TRACE(("|%p|%p|JUMP_REPEAT\n", ctx->pattern, ctx->ptr)); + goto jump_repeat; + case JUMP_REPEAT_ONE_1: + TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", ctx->pattern, ctx->ptr)); + goto jump_repeat_one_1; + case JUMP_REPEAT_ONE_2: + TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", ctx->pattern, ctx->ptr)); + goto jump_repeat_one_2; + case JUMP_MIN_REPEAT_ONE: + TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", ctx->pattern, ctx->ptr)); + goto jump_min_repeat_one; + case JUMP_ASSERT: + TRACE(("|%p|%p|JUMP_ASSERT\n", ctx->pattern, ctx->ptr)); + goto jump_assert; + case JUMP_ASSERT_NOT: + TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", ctx->pattern, ctx->ptr)); + goto jump_assert_not; + case JUMP_NONE: + TRACE(("|%p|%p|RETURN %d\n", ctx->pattern, ctx->ptr, ret)); + break; + } + + return ret; /* should never get here */ +} + +LOCAL(Py_ssize_t) +SRE_SEARCH(SRE_STATE* state, SRE_CODE* pattern) +{ + SRE_CHAR* ptr = (SRE_CHAR *)state->start; + SRE_CHAR* end = (SRE_CHAR *)state->end; + Py_ssize_t status = 0; + Py_ssize_t prefix_len = 0; + Py_ssize_t prefix_skip = 0; + SRE_CODE* prefix = NULL; + SRE_CODE* charset = NULL; + SRE_CODE* overlap = NULL; + int flags = 0; + + if (pattern[0] == SRE_OP_INFO) { + /* optimization info block */ + /* <1=skip> <2=flags> <3=min> <4=max> <5=prefix info> */ + + flags = pattern[2]; + + if (pattern[3] > 1) { + /* adjust end point (but make sure we leave at least one + character in there, so literal search will work) */ + end -= pattern[3]-1; + if (end <= ptr) + end = ptr+1; + } + + if (flags & SRE_INFO_PREFIX) { + /* pattern starts with a known prefix */ + /* */ + prefix_len = pattern[5]; + prefix_skip = pattern[6]; + prefix = pattern + 7; + overlap = prefix + prefix_len - 1; + } else if (flags & SRE_INFO_CHARSET) + /* pattern starts with a character from a known set */ + /* */ + charset = pattern + 5; + + pattern += 1 + pattern[1]; + } + + TRACE(("prefix = %p %d %d\n", prefix, prefix_len, prefix_skip)); + TRACE(("charset = %p\n", charset)); + +#if defined(USE_FAST_SEARCH) + if (prefix_len > 1) { + /* pattern starts with a known prefix. use the overlap + table to skip forward as fast as we possibly can */ + Py_ssize_t i = 0; + end = (SRE_CHAR *)state->end; + while (ptr < end) { + for (;;) { + if ((SRE_CODE) ptr[0] != prefix[i]) { + if (!i) + break; + else + i = overlap[i]; + } else { + if (++i == prefix_len) { + /* found a potential match */ + TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr)); + state->start = ptr + 1 - prefix_len; + state->ptr = ptr + 1 - prefix_len + prefix_skip; + if (flags & SRE_INFO_LITERAL) + return 1; /* we got all of it */ + status = SRE_MATCH(state, pattern + 2*prefix_skip); + if (status != 0) + return status; + /* close but no cigar -- try again */ + i = overlap[i]; + } + break; + } + } + ptr++; + } + return 0; + } +#endif + + if (pattern[0] == SRE_OP_LITERAL) { + /* pattern starts with a literal character. this is used + for short prefixes, and if fast search is disabled */ + SRE_CODE chr = pattern[1]; + end = (SRE_CHAR *)state->end; + for (;;) { + while (ptr < end && (SRE_CODE) ptr[0] != chr) + ptr++; + if (ptr >= end) + return 0; + TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr)); + state->start = ptr; + state->ptr = ++ptr; + if (flags & SRE_INFO_LITERAL) + return 1; /* we got all of it */ + status = SRE_MATCH(state, pattern + 2); + if (status != 0) + break; + } + } else if (charset) { + /* pattern starts with a character from a known set */ + end = (SRE_CHAR *)state->end; + for (;;) { + while (ptr < end && !SRE_CHARSET(charset, ptr[0])) + ptr++; + if (ptr >= end) + return 0; + TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr)); + state->start = ptr; + state->ptr = ptr; + status = SRE_MATCH(state, pattern); + if (status != 0) + break; + ptr++; + } + } else + /* general case */ + while (ptr <= end) { + TRACE(("|%p|%p|SEARCH\n", pattern, ptr)); + state->start = state->ptr = ptr++; + status = SRE_MATCH(state, pattern); + if (status != 0) + break; + } + + return status; +} + +LOCAL(int) +SRE_LITERAL_TEMPLATE(SRE_CHAR* ptr, Py_ssize_t len) +{ + /* check if given string is a literal template (i.e. no escapes) */ + while (len-- > 0) + if (*ptr++ == '\\') + return 0; + return 1; +} + +#if !defined(SRE_RECURSIVE) + +/* -------------------------------------------------------------------- */ +/* factories and destructors */ + +/* see sre.h for object declarations */ +static PyObject*pattern_new_match(PatternObject*, SRE_STATE*, int); +static PyObject*pattern_scanner(PatternObject*, PyObject*); + +static PyObject * +sre_codesize(PyObject* self, PyObject *unused) +{ + return Py_BuildValue("l", sizeof(SRE_CODE)); +} + +static PyObject * +sre_getlower(PyObject* self, PyObject* args) +{ + int character, flags; + if (!PyArg_ParseTuple(args, "ii", &character, &flags)) + return NULL; + if (flags & SRE_FLAG_LOCALE) + return Py_BuildValue("i", sre_lower_locale(character)); + if (flags & SRE_FLAG_UNICODE) +#if defined(HAVE_UNICODE) + return Py_BuildValue("i", sre_lower_unicode(character)); +#else + return Py_BuildValue("i", sre_lower_locale(character)); +#endif + return Py_BuildValue("i", sre_lower(character)); +} + +LOCAL(void) +state_reset(SRE_STATE* state) +{ + /* FIXME: dynamic! */ + /*memset(state->mark, 0, sizeof(*state->mark) * SRE_MARK_SIZE);*/ + + state->lastmark = -1; + state->lastindex = -1; + + state->repeat = NULL; + + data_stack_dealloc(state); +} + +static void* +getstring(PyObject* string, Py_ssize_t* p_length, int* p_charsize) +{ + /* given a python object, return a data pointer, a length (in + characters), and a character size. return NULL if the object + is not a string (or not compatible) */ + + PyBufferProcs *buffer; + Py_ssize_t size, bytes; + int charsize; + void* ptr; + +#if defined(HAVE_UNICODE) + if (PyUnicode_Check(string)) { + /* unicode strings doesn't always support the buffer interface */ + ptr = (void*) PyUnicode_AS_DATA(string); + /* bytes = PyUnicode_GET_DATA_SIZE(string); */ + size = PyUnicode_GET_SIZE(string); + charsize = sizeof(Py_UNICODE); + + } else { +#endif + + /* get pointer to string buffer */ + buffer = Py_TYPE(string)->tp_as_buffer; + if (!buffer || !buffer->bf_getreadbuffer || !buffer->bf_getsegcount || + buffer->bf_getsegcount(string, NULL) != 1) { + PyErr_SetString(PyExc_TypeError, "expected string or buffer"); + return NULL; + } + + /* determine buffer size */ + bytes = buffer->bf_getreadbuffer(string, 0, &ptr); + if (bytes < 0) { + PyErr_SetString(PyExc_TypeError, "buffer has negative size"); + return NULL; + } + + /* determine character size */ +#if PY_VERSION_HEX >= 0x01060000 + size = PyObject_Size(string); +#else + size = PyObject_Length(string); +#endif + + if (PyString_Check(string) || bytes == size) + charsize = 1; +#if defined(HAVE_UNICODE) + else if (bytes == (Py_ssize_t) (size * sizeof(Py_UNICODE))) + charsize = sizeof(Py_UNICODE); +#endif + else { + PyErr_SetString(PyExc_TypeError, "buffer size mismatch"); + return NULL; + } + +#if defined(HAVE_UNICODE) + } +#endif + + *p_length = size; + *p_charsize = charsize; + + return ptr; +} + +LOCAL(PyObject*) +state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string, + Py_ssize_t start, Py_ssize_t end) +{ + /* prepare state object */ + + Py_ssize_t length; + int charsize; + void* ptr; + + memset(state, 0, sizeof(SRE_STATE)); + + state->lastmark = -1; + state->lastindex = -1; + + ptr = getstring(string, &length, &charsize); + if (!ptr) + return NULL; + + /* adjust boundaries */ + if (start < 0) + start = 0; + else if (start > length) + start = length; + + if (end < 0) + end = 0; + else if (end > length) + end = length; + + state->charsize = charsize; + + state->beginning = ptr; + + state->start = (void*) ((char*) ptr + start * state->charsize); + state->end = (void*) ((char*) ptr + end * state->charsize); + + Py_INCREF(string); + state->string = string; + state->pos = start; + state->endpos = end; + + if (pattern->flags & SRE_FLAG_LOCALE) + state->lower = sre_lower_locale; + else if (pattern->flags & SRE_FLAG_UNICODE) +#if defined(HAVE_UNICODE) + state->lower = sre_lower_unicode; +#else + state->lower = sre_lower_locale; +#endif + else + state->lower = sre_lower; + + return string; +} + +LOCAL(void) +state_fini(SRE_STATE* state) +{ + Py_XDECREF(state->string); + data_stack_dealloc(state); +} + +/* calculate offset from start of string */ +#define STATE_OFFSET(state, member)\ + (((char*)(member) - (char*)(state)->beginning) / (state)->charsize) + +LOCAL(PyObject*) +state_getslice(SRE_STATE* state, Py_ssize_t index, PyObject* string, int empty) +{ + Py_ssize_t i, j; + + index = (index - 1) * 2; + + if (string == Py_None || index >= state->lastmark || !state->mark[index] || !state->mark[index+1]) { + if (empty) + /* want empty string */ + i = j = 0; + else { + Py_INCREF(Py_None); + return Py_None; + } + } else { + i = STATE_OFFSET(state, state->mark[index]); + j = STATE_OFFSET(state, state->mark[index+1]); + } + + return PySequence_GetSlice(string, i, j); +} + +static void +pattern_error(int status) +{ + switch (status) { + case SRE_ERROR_RECURSION_LIMIT: + PyErr_SetString( + PyExc_RuntimeError, + "maximum recursion limit exceeded" + ); + break; + case SRE_ERROR_MEMORY: + PyErr_NoMemory(); + break; + case SRE_ERROR_INTERRUPTED: + /* An exception has already been raised, so let it fly */ + break; + default: + /* other error codes indicate compiler/engine bugs */ + PyErr_SetString( + PyExc_RuntimeError, + "internal error in regular expression engine" + ); + } +} + +static void +pattern_dealloc(PatternObject* self) +{ + if (self->weakreflist != NULL) + PyObject_ClearWeakRefs((PyObject *) self); + Py_XDECREF(self->pattern); + Py_XDECREF(self->groupindex); + Py_XDECREF(self->indexgroup); + PyObject_DEL(self); +} + +static PyObject* +pattern_match(PatternObject* self, PyObject* args, PyObject* kw) +{ + SRE_STATE state; + int status; + + PyObject* string; + Py_ssize_t start = 0; + Py_ssize_t end = PY_SSIZE_T_MAX; + static char* kwlist[] = { "pattern", "pos", "endpos", NULL }; + if (!PyArg_ParseTupleAndKeywords(args, kw, "O|nn:match", kwlist, + &string, &start, &end)) + return NULL; + + string = state_init(&state, self, string, start, end); + if (!string) + return NULL; + + state.ptr = state.start; + + TRACE(("|%p|%p|MATCH\n", PatternObject_GetCode(self), state.ptr)); + + if (state.charsize == 1) { + status = sre_match(&state, PatternObject_GetCode(self)); + } else { +#if defined(HAVE_UNICODE) + status = sre_umatch(&state, PatternObject_GetCode(self)); +#endif + } + + TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr)); + if (PyErr_Occurred()) + return NULL; + + state_fini(&state); + + return pattern_new_match(self, &state, status); +} + +static PyObject* +pattern_search(PatternObject* self, PyObject* args, PyObject* kw) +{ + SRE_STATE state; + int status; + + PyObject* string; + Py_ssize_t start = 0; + Py_ssize_t end = PY_SSIZE_T_MAX; + static char* kwlist[] = { "pattern", "pos", "endpos", NULL }; + if (!PyArg_ParseTupleAndKeywords(args, kw, "O|nn:search", kwlist, + &string, &start, &end)) + return NULL; + + string = state_init(&state, self, string, start, end); + if (!string) + return NULL; + + TRACE(("|%p|%p|SEARCH\n", PatternObject_GetCode(self), state.ptr)); + + if (state.charsize == 1) { + status = sre_search(&state, PatternObject_GetCode(self)); + } else { +#if defined(HAVE_UNICODE) + status = sre_usearch(&state, PatternObject_GetCode(self)); +#endif + } + + TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr)); + + state_fini(&state); + + if (PyErr_Occurred()) + return NULL; + + return pattern_new_match(self, &state, status); +} + +static PyObject* +call(char* module, char* function, PyObject* args) +{ + PyObject* name; + PyObject* mod; + PyObject* func; + PyObject* result; + + if (!args) + return NULL; + name = PyString_FromString(module); + if (!name) + return NULL; + mod = PyImport_Import(name); + Py_DECREF(name); + if (!mod) + return NULL; + func = PyObject_GetAttrString(mod, function); + Py_DECREF(mod); + if (!func) + return NULL; + result = PyObject_CallObject(func, args); + Py_DECREF(func); + Py_DECREF(args); + return result; +} + +#ifdef USE_BUILTIN_COPY +static int +deepcopy(PyObject** object, PyObject* memo) +{ + PyObject* copy; + + copy = call( + "copy", "deepcopy", + PyTuple_Pack(2, *object, memo) + ); + if (!copy) + return 0; + + Py_DECREF(*object); + *object = copy; + + return 1; /* success */ +} +#endif + +static PyObject* +join_list(PyObject* list, PyObject* string) +{ + /* join list elements */ + + PyObject* joiner; +#if PY_VERSION_HEX >= 0x01060000 + PyObject* function; + PyObject* args; +#endif + PyObject* result; + + joiner = PySequence_GetSlice(string, 0, 0); + if (!joiner) + return NULL; + + if (PyList_GET_SIZE(list) == 0) { + Py_DECREF(list); + return joiner; + } + +#if PY_VERSION_HEX >= 0x01060000 + function = PyObject_GetAttrString(joiner, "join"); + if (!function) { + Py_DECREF(joiner); + return NULL; + } + args = PyTuple_New(1); + if (!args) { + Py_DECREF(function); + Py_DECREF(joiner); + return NULL; + } + PyTuple_SET_ITEM(args, 0, list); + result = PyObject_CallObject(function, args); + Py_DECREF(args); /* also removes list */ + Py_DECREF(function); +#else + result = call( + "string", "join", + PyTuple_Pack(2, list, joiner) + ); +#endif + Py_DECREF(joiner); + + return result; +} + +static PyObject* +pattern_findall(PatternObject* self, PyObject* args, PyObject* kw) +{ + SRE_STATE state; + PyObject* list; + int status; + Py_ssize_t i, b, e; + + PyObject* string; + Py_ssize_t start = 0; + Py_ssize_t end = PY_SSIZE_T_MAX; + static char* kwlist[] = { "source", "pos", "endpos", NULL }; + if (!PyArg_ParseTupleAndKeywords(args, kw, "O|nn:findall", kwlist, + &string, &start, &end)) + return NULL; + + string = state_init(&state, self, string, start, end); + if (!string) + return NULL; + + list = PyList_New(0); + if (!list) { + state_fini(&state); + return NULL; + } + + while (state.start <= state.end) { + + PyObject* item; + + state_reset(&state); + + state.ptr = state.start; + + if (state.charsize == 1) { + status = sre_search(&state, PatternObject_GetCode(self)); + } else { +#if defined(HAVE_UNICODE) + status = sre_usearch(&state, PatternObject_GetCode(self)); +#endif + } + + if (PyErr_Occurred()) + goto error; + + if (status <= 0) { + if (status == 0) + break; + pattern_error(status); + goto error; + } + + /* don't bother to build a match object */ + switch (self->groups) { + case 0: + b = STATE_OFFSET(&state, state.start); + e = STATE_OFFSET(&state, state.ptr); + item = PySequence_GetSlice(string, b, e); + if (!item) + goto error; + break; + case 1: + item = state_getslice(&state, 1, string, 1); + if (!item) + goto error; + break; + default: + item = PyTuple_New(self->groups); + if (!item) + goto error; + for (i = 0; i < self->groups; i++) { + PyObject* o = state_getslice(&state, i+1, string, 1); + if (!o) { + Py_DECREF(item); + goto error; + } + PyTuple_SET_ITEM(item, i, o); + } + break; + } + + status = PyList_Append(list, item); + Py_DECREF(item); + if (status < 0) + goto error; + + if (state.ptr == state.start) + state.start = (void*) ((char*) state.ptr + state.charsize); + else + state.start = state.ptr; + + } + + state_fini(&state); + return list; + +error: + Py_DECREF(list); + state_fini(&state); + return NULL; + +} + +#if PY_VERSION_HEX >= 0x02020000 +static PyObject* +pattern_finditer(PatternObject* pattern, PyObject* args) +{ + PyObject* scanner; + PyObject* search; + PyObject* iterator; + + scanner = pattern_scanner(pattern, args); + if (!scanner) + return NULL; + + search = PyObject_GetAttrString(scanner, "search"); + Py_DECREF(scanner); + if (!search) + return NULL; + + iterator = PyCallIter_New(search, Py_None); + Py_DECREF(search); + + return iterator; +} +#endif + +static PyObject* +pattern_split(PatternObject* self, PyObject* args, PyObject* kw) +{ + SRE_STATE state; + PyObject* list; + PyObject* item; + int status; + Py_ssize_t n; + Py_ssize_t i; + void* last; + + PyObject* string; + Py_ssize_t maxsplit = 0; + static char* kwlist[] = { "source", "maxsplit", NULL }; + if (!PyArg_ParseTupleAndKeywords(args, kw, "O|n:split", kwlist, + &string, &maxsplit)) + return NULL; + + string = state_init(&state, self, string, 0, PY_SSIZE_T_MAX); + if (!string) + return NULL; + + list = PyList_New(0); + if (!list) { + state_fini(&state); + return NULL; + } + + n = 0; + last = state.start; + + while (!maxsplit || n < maxsplit) { + + state_reset(&state); + + state.ptr = state.start; + + if (state.charsize == 1) { + status = sre_search(&state, PatternObject_GetCode(self)); + } else { +#if defined(HAVE_UNICODE) + status = sre_usearch(&state, PatternObject_GetCode(self)); +#endif + } + + if (PyErr_Occurred()) + goto error; + + if (status <= 0) { + if (status == 0) + break; + pattern_error(status); + goto error; + } + + if (state.start == state.ptr) { + if (last == state.end) + break; + /* skip one character */ + state.start = (void*) ((char*) state.ptr + state.charsize); + continue; + } + + /* get segment before this match */ + item = PySequence_GetSlice( + string, STATE_OFFSET(&state, last), + STATE_OFFSET(&state, state.start) + ); + if (!item) + goto error; + status = PyList_Append(list, item); + Py_DECREF(item); + if (status < 0) + goto error; + + /* add groups (if any) */ + for (i = 0; i < self->groups; i++) { + item = state_getslice(&state, i+1, string, 0); + if (!item) + goto error; + status = PyList_Append(list, item); + Py_DECREF(item); + if (status < 0) + goto error; + } + + n = n + 1; + + last = state.start = state.ptr; + + } + + /* get segment following last match (even if empty) */ + item = PySequence_GetSlice( + string, STATE_OFFSET(&state, last), state.endpos + ); + if (!item) + goto error; + status = PyList_Append(list, item); + Py_DECREF(item); + if (status < 0) + goto error; + + state_fini(&state); + return list; + +error: + Py_DECREF(list); + state_fini(&state); + return NULL; + +} + +static PyObject* +pattern_subx(PatternObject* self, PyObject* ptemplate, PyObject* string, + Py_ssize_t count, Py_ssize_t subn) +{ + SRE_STATE state; + PyObject* list; + PyObject* item; + PyObject* filter; + PyObject* args; + PyObject* match; + void* ptr; + int status; + Py_ssize_t n; + Py_ssize_t i, b, e; + int bint; + int filter_is_callable; + + if (PyCallable_Check(ptemplate)) { + /* sub/subn takes either a function or a template */ + filter = ptemplate; + Py_INCREF(filter); + filter_is_callable = 1; + } else { + /* if not callable, check if it's a literal string */ + int literal; + ptr = getstring(ptemplate, &n, &bint); + b = bint; + if (ptr) { + if (b == 1) { + literal = sre_literal_template((unsigned char *)ptr, n); + } else { +#if defined(HAVE_UNICODE) + literal = sre_uliteral_template((Py_UNICODE *)ptr, n); +#endif + } + } else { + PyErr_Clear(); + literal = 0; + } + if (literal) { + filter = ptemplate; + Py_INCREF(filter); + filter_is_callable = 0; + } else { + /* not a literal; hand it over to the template compiler */ + filter = call( + SRE_PY_MODULE, "_subx", + PyTuple_Pack(2, self, ptemplate) + ); + if (!filter) + return NULL; + filter_is_callable = PyCallable_Check(filter); + } + } + + string = state_init(&state, self, string, 0, PY_SSIZE_T_MAX); + if (!string) { + Py_DECREF(filter); + return NULL; + } + + list = PyList_New(0); + if (!list) { + Py_DECREF(filter); + state_fini(&state); + return NULL; + } + + n = i = 0; + + while (!count || n < count) { + + state_reset(&state); + + state.ptr = state.start; + + if (state.charsize == 1) { + status = sre_search(&state, PatternObject_GetCode(self)); + } else { +#if defined(HAVE_UNICODE) + status = sre_usearch(&state, PatternObject_GetCode(self)); +#endif + } + + if (PyErr_Occurred()) + goto error; + + if (status <= 0) { + if (status == 0) + break; + pattern_error(status); + goto error; + } + + b = STATE_OFFSET(&state, state.start); + e = STATE_OFFSET(&state, state.ptr); + + if (i < b) { + /* get segment before this match */ + item = PySequence_GetSlice(string, i, b); + if (!item) + goto error; + status = PyList_Append(list, item); + Py_DECREF(item); + if (status < 0) + goto error; + + } else if (i == b && i == e && n > 0) + /* ignore empty match on latest position */ + goto next; + + if (filter_is_callable) { + /* pass match object through filter */ + match = pattern_new_match(self, &state, 1); + if (!match) + goto error; + args = PyTuple_Pack(1, match); + if (!args) { + Py_DECREF(match); + goto error; + } + item = PyObject_CallObject(filter, args); + Py_DECREF(args); + Py_DECREF(match); + if (!item) + goto error; + } else { + /* filter is literal string */ + item = filter; + Py_INCREF(item); + } + + /* add to list */ + if (item != Py_None) { + status = PyList_Append(list, item); + Py_DECREF(item); + if (status < 0) + goto error; + } + + i = e; + n = n + 1; + +next: + /* move on */ + if (state.ptr == state.start) + state.start = (void*) ((char*) state.ptr + state.charsize); + else + state.start = state.ptr; + + } + + /* get segment following last match */ + if (i < state.endpos) { + item = PySequence_GetSlice(string, i, state.endpos); + if (!item) + goto error; + status = PyList_Append(list, item); + Py_DECREF(item); + if (status < 0) + goto error; + } + + state_fini(&state); + + Py_DECREF(filter); + + /* convert list to single string (also removes list) */ + item = join_list(list, string); + + if (!item) + return NULL; + + if (subn) + return Py_BuildValue("Ni", item, n); + + return item; + +error: + Py_DECREF(list); + state_fini(&state); + Py_DECREF(filter); + return NULL; + +} + +static PyObject* +pattern_sub(PatternObject* self, PyObject* args, PyObject* kw) +{ + PyObject* ptemplate; + PyObject* string; + Py_ssize_t count = 0; + static char* kwlist[] = { "repl", "string", "count", NULL }; + if (!PyArg_ParseTupleAndKeywords(args, kw, "OO|n:sub", kwlist, + &ptemplate, &string, &count)) + return NULL; + + return pattern_subx(self, ptemplate, string, count, 0); +} + +static PyObject* +pattern_subn(PatternObject* self, PyObject* args, PyObject* kw) +{ + PyObject* ptemplate; + PyObject* string; + Py_ssize_t count = 0; + static char* kwlist[] = { "repl", "string", "count", NULL }; + if (!PyArg_ParseTupleAndKeywords(args, kw, "OO|n:subn", kwlist, + &ptemplate, &string, &count)) + return NULL; + + return pattern_subx(self, ptemplate, string, count, 1); +} + +static PyObject* +pattern_copy(PatternObject* self, PyObject *unused) +{ +#ifdef USE_BUILTIN_COPY + PatternObject* copy; + int offset; + + copy = PyObject_NEW_VAR(PatternObject, &Pattern_Type, self->codesize); + if (!copy) + return NULL; + + offset = offsetof(PatternObject, groups); + + Py_XINCREF(self->groupindex); + Py_XINCREF(self->indexgroup); + Py_XINCREF(self->pattern); + + memcpy((char*) copy + offset, (char*) self + offset, + sizeof(PatternObject) + self->codesize * sizeof(SRE_CODE) - offset); + copy->weakreflist = NULL; + + return (PyObject*) copy; +#else + PyErr_SetString(PyExc_TypeError, "cannot copy this pattern object"); + return NULL; +#endif +} + +static PyObject* +pattern_deepcopy(PatternObject* self, PyObject* memo) +{ +#ifdef USE_BUILTIN_COPY + PatternObject* copy; + + copy = (PatternObject*) pattern_copy(self); + if (!copy) + return NULL; + + if (!deepcopy(©->groupindex, memo) || + !deepcopy(©->indexgroup, memo) || + !deepcopy(©->pattern, memo)) { + Py_DECREF(copy); + return NULL; + } + +#else + PyErr_SetString(PyExc_TypeError, "cannot deepcopy this pattern object"); + return NULL; +#endif +} + +PyDoc_STRVAR(pattern_match_doc, +"match(string[, pos[, endpos]]) --> match object or None.\n\ + Matches zero or more characters at the beginning of the string"); + +PyDoc_STRVAR(pattern_search_doc, +"search(string[, pos[, endpos]]) --> match object or None.\n\ + Scan through string looking for a match, and return a corresponding\n\ + MatchObject instance. Return None if no position in the string matches."); + +PyDoc_STRVAR(pattern_split_doc, +"split(string[, maxsplit = 0]) --> list.\n\ + Split string by the occurrences of pattern."); + +PyDoc_STRVAR(pattern_findall_doc, +"findall(string[, pos[, endpos]]) --> list.\n\ + Return a list of all non-overlapping matches of pattern in string."); + +PyDoc_STRVAR(pattern_finditer_doc, +"finditer(string[, pos[, endpos]]) --> iterator.\n\ + Return an iterator over all non-overlapping matches for the \n\ + RE pattern in string. For each match, the iterator returns a\n\ + match object."); + +PyDoc_STRVAR(pattern_sub_doc, +"sub(repl, string[, count = 0]) --> newstring\n\ + Return the string obtained by replacing the leftmost non-overlapping\n\ + occurrences of pattern in string by the replacement repl."); + +PyDoc_STRVAR(pattern_subn_doc, +"subn(repl, string[, count = 0]) --> (newstring, number of subs)\n\ + Return the tuple (new_string, number_of_subs_made) found by replacing\n\ + the leftmost non-overlapping occurrences of pattern with the\n\ + replacement repl."); + +PyDoc_STRVAR(pattern_doc, "Compiled regular expression objects"); + +static PyMethodDef pattern_methods[] = { + {"match", (PyCFunction) pattern_match, METH_VARARGS|METH_KEYWORDS, + pattern_match_doc}, + {"search", (PyCFunction) pattern_search, METH_VARARGS|METH_KEYWORDS, + pattern_search_doc}, + {"sub", (PyCFunction) pattern_sub, METH_VARARGS|METH_KEYWORDS, + pattern_sub_doc}, + {"subn", (PyCFunction) pattern_subn, METH_VARARGS|METH_KEYWORDS, + pattern_subn_doc}, + {"split", (PyCFunction) pattern_split, METH_VARARGS|METH_KEYWORDS, + pattern_split_doc}, + {"findall", (PyCFunction) pattern_findall, METH_VARARGS|METH_KEYWORDS, + pattern_findall_doc}, +#if PY_VERSION_HEX >= 0x02020000 + {"finditer", (PyCFunction) pattern_finditer, METH_VARARGS, + pattern_finditer_doc}, +#endif + {"scanner", (PyCFunction) pattern_scanner, METH_VARARGS}, + {"__copy__", (PyCFunction) pattern_copy, METH_NOARGS}, + {"__deepcopy__", (PyCFunction) pattern_deepcopy, METH_O}, + {NULL, NULL} +}; + +#define PAT_OFF(x) offsetof(PatternObject, x) +static PyMemberDef pattern_members[] = { + {"pattern", T_OBJECT, PAT_OFF(pattern), READONLY}, + {"flags", T_INT, PAT_OFF(flags), READONLY}, + {"groups", T_PYSSIZET, PAT_OFF(groups), READONLY}, + {"groupindex", T_OBJECT, PAT_OFF(groupindex), READONLY}, + {NULL} /* Sentinel */ +}; + +statichere PyTypeObject Pattern_Type = { + PyObject_HEAD_INIT(NULL) + 0, "_" SRE_MODULE ".SRE_Pattern", + sizeof(PatternObject), sizeof(SRE_CODE), + (destructor)pattern_dealloc, /*tp_dealloc*/ + 0, /* tp_print */ + 0, /* tp_getattrn */ + 0, /* tp_setattr */ + 0, /* tp_compare */ + 0, /* tp_repr */ + 0, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + 0, /* tp_hash */ + 0, /* tp_call */ + 0, /* tp_str */ + 0, /* tp_getattro */ + 0, /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT, /* tp_flags */ + pattern_doc, /* tp_doc */ + 0, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + offsetof(PatternObject, weakreflist), /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iternext */ + pattern_methods, /* tp_methods */ + pattern_members, /* tp_members */ +}; + +static int _validate(PatternObject *self); /* Forward */ + +static PyObject * +_compile(PyObject* self_, PyObject* args) +{ + /* "compile" pattern descriptor to pattern object */ + + PatternObject* self; + Py_ssize_t i, n; + + PyObject* pattern; + int flags = 0; + PyObject* code; + Py_ssize_t groups = 0; + PyObject* groupindex = NULL; + PyObject* indexgroup = NULL; + if (!PyArg_ParseTuple(args, "OiO!|nOO", &pattern, &flags, + &PyList_Type, &code, &groups, + &groupindex, &indexgroup)) + return NULL; + + n = PyList_GET_SIZE(code); + /* coverity[ampersand_in_size] */ + self = PyObject_NEW_VAR(PatternObject, &Pattern_Type, n); + if (!self) + return NULL; + self->weakreflist = NULL; + self->pattern = NULL; + self->groupindex = NULL; + self->indexgroup = NULL; + + self->codesize = n; + + for (i = 0; i < n; i++) { + PyObject *o = PyList_GET_ITEM(code, i); + unsigned long value = PyInt_Check(o) ? (unsigned long)PyInt_AsLong(o) + : PyLong_AsUnsignedLong(o); + self->code[i] = (SRE_CODE) value; + if ((unsigned long) self->code[i] != value) { + PyErr_SetString(PyExc_OverflowError, + "regular expression code size limit exceeded"); + break; + } + } + + if (PyErr_Occurred()) { + Py_DECREF(self); + return NULL; + } + + Py_INCREF(pattern); + self->pattern = pattern; + + self->flags = flags; + + self->groups = groups; + + Py_XINCREF(groupindex); + self->groupindex = groupindex; + + Py_XINCREF(indexgroup); + self->indexgroup = indexgroup; + + self->weakreflist = NULL; + + if (!_validate(self)) { + Py_DECREF(self); + return NULL; + } + + return (PyObject*) self; +} + +/* -------------------------------------------------------------------- */ +/* Code validation */ + +/* To learn more about this code, have a look at the _compile() function in + Lib/sre_compile.py. The validation functions below checks the code array + for conformance with the code patterns generated there. + + The nice thing about the generated code is that it is position-independent: + all jumps are relative jumps forward. Also, jumps don't cross each other: + the target of a later jump is always earlier than the target of an earlier + jump. IOW, this is okay: + + J---------J-------T--------T + \ \_____/ / + \______________________/ + + but this is not: + + J---------J-------T--------T + \_________\_____/ / + \____________/ + + It also helps that SRE_CODE is always an unsigned type, either 2 bytes or 4 + bytes wide (the latter if Python is compiled for "wide" unicode support). +*/ + +/* Defining this one enables tracing of the validator */ +#undef VVERBOSE + +/* Trace macro for the validator */ +#if defined(VVERBOSE) +#define VTRACE(v) printf v +#else +#define VTRACE(v) +#endif + +/* Report failure */ +#define FAIL do { VTRACE(("FAIL: %d\n", __LINE__)); return 0; } while (0) + +/* Extract opcode, argument, or skip count from code array */ +#define GET_OP \ + do { \ + VTRACE(("%p: ", code)); \ + if (code >= end) FAIL; \ + op = *code++; \ + VTRACE(("%lu (op)\n", (unsigned long)op)); \ + } while (0) +#define GET_ARG \ + do { \ + VTRACE(("%p= ", code)); \ + if (code >= end) FAIL; \ + arg = *code++; \ + VTRACE(("%lu (arg)\n", (unsigned long)arg)); \ + } while (0) +#define GET_SKIP_ADJ(adj) \ + do { \ + VTRACE(("%p= ", code)); \ + if (code >= end) FAIL; \ + skip = *code; \ + VTRACE(("%lu (skip to %p)\n", \ + (unsigned long)skip, code+skip)); \ + if (code+skip-adj < code || code+skip-adj > end)\ + FAIL; \ + code++; \ + } while (0) +#define GET_SKIP GET_SKIP_ADJ(0) + +static int +_validate_charset(SRE_CODE *code, SRE_CODE *end) +{ + /* Some variables are manipulated by the macros above */ + SRE_CODE op; + SRE_CODE arg; + SRE_CODE offset; + int i; + + while (code < end) { + GET_OP; + switch (op) { + + case SRE_OP_NEGATE: + break; + + case SRE_OP_LITERAL: + GET_ARG; + break; + + case SRE_OP_RANGE: + GET_ARG; + GET_ARG; + break; + + case SRE_OP_CHARSET: + offset = 32/sizeof(SRE_CODE); /* 32-byte bitmap */ + if (code+offset < code || code+offset > end) + FAIL; + code += offset; + break; + + case SRE_OP_BIGCHARSET: + GET_ARG; /* Number of blocks */ + offset = 256/sizeof(SRE_CODE); /* 256-byte table */ + if (code+offset < code || code+offset > end) + FAIL; + /* Make sure that each byte points to a valid block */ + for (i = 0; i < 256; i++) { + if (((unsigned char *)code)[i] >= arg) + FAIL; + } + code += offset; + offset = arg * 32/sizeof(SRE_CODE); /* 32-byte bitmap times arg */ + if (code+offset < code || code+offset > end) + FAIL; + code += offset; + break; + + case SRE_OP_CATEGORY: + GET_ARG; + switch (arg) { + case SRE_CATEGORY_DIGIT: + case SRE_CATEGORY_NOT_DIGIT: + case SRE_CATEGORY_SPACE: + case SRE_CATEGORY_NOT_SPACE: + case SRE_CATEGORY_WORD: + case SRE_CATEGORY_NOT_WORD: + case SRE_CATEGORY_LINEBREAK: + case SRE_CATEGORY_NOT_LINEBREAK: + case SRE_CATEGORY_LOC_WORD: + case SRE_CATEGORY_LOC_NOT_WORD: + case SRE_CATEGORY_UNI_DIGIT: + case SRE_CATEGORY_UNI_NOT_DIGIT: + case SRE_CATEGORY_UNI_SPACE: + case SRE_CATEGORY_UNI_NOT_SPACE: + case SRE_CATEGORY_UNI_WORD: + case SRE_CATEGORY_UNI_NOT_WORD: + case SRE_CATEGORY_UNI_LINEBREAK: + case SRE_CATEGORY_UNI_NOT_LINEBREAK: + break; + default: + FAIL; + } + break; + + default: + FAIL; + + } + } + + return 1; +} + +static int +_validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups) +{ + /* Some variables are manipulated by the macros above */ + SRE_CODE op; + SRE_CODE arg; + SRE_CODE skip; + + VTRACE(("code=%p, end=%p\n", code, end)); + + if (code > end) + FAIL; + + while (code < end) { + GET_OP; + switch (op) { + + case SRE_OP_MARK: + /* We don't check whether marks are properly nested; the + sre_match() code is robust even if they don't, and the worst + you can get is nonsensical match results. */ + GET_ARG; + if (arg > 2*groups+1) { + VTRACE(("arg=%d, groups=%d\n", (int)arg, (int)groups)); + FAIL; + } + break; + + case SRE_OP_LITERAL: + case SRE_OP_NOT_LITERAL: + case SRE_OP_LITERAL_IGNORE: + case SRE_OP_NOT_LITERAL_IGNORE: + GET_ARG; + /* The arg is just a character, nothing to check */ + break; + + case SRE_OP_SUCCESS: + case SRE_OP_FAILURE: + /* Nothing to check; these normally end the matching process */ + break; + + case SRE_OP_AT: + GET_ARG; + switch (arg) { + case SRE_AT_BEGINNING: + case SRE_AT_BEGINNING_STRING: + case SRE_AT_BEGINNING_LINE: + case SRE_AT_END: + case SRE_AT_END_LINE: + case SRE_AT_END_STRING: + case SRE_AT_BOUNDARY: + case SRE_AT_NON_BOUNDARY: + case SRE_AT_LOC_BOUNDARY: + case SRE_AT_LOC_NON_BOUNDARY: + case SRE_AT_UNI_BOUNDARY: + case SRE_AT_UNI_NON_BOUNDARY: + break; + default: + FAIL; + } + break; + + case SRE_OP_ANY: + case SRE_OP_ANY_ALL: + /* These have no operands */ + break; + + case SRE_OP_IN: + case SRE_OP_IN_IGNORE: + GET_SKIP; + /* Stop 1 before the end; we check the FAILURE below */ + if (!_validate_charset(code, code+skip-2)) + FAIL; + if (code[skip-2] != SRE_OP_FAILURE) + FAIL; + code += skip-1; + break; + + case SRE_OP_INFO: + { + /* A minimal info field is + <1=skip> <2=flags> <3=min> <4=max>; + If SRE_INFO_PREFIX or SRE_INFO_CHARSET is in the flags, + more follows. */ + SRE_CODE flags, i; + SRE_CODE *newcode; + GET_SKIP; + newcode = code+skip-1; + GET_ARG; flags = arg; + GET_ARG; /* min */ + GET_ARG; /* max */ + /* Check that only valid flags are present */ + if ((flags & ~(SRE_INFO_PREFIX | + SRE_INFO_LITERAL | + SRE_INFO_CHARSET)) != 0) + FAIL; + /* PREFIX and CHARSET are mutually exclusive */ + if ((flags & SRE_INFO_PREFIX) && + (flags & SRE_INFO_CHARSET)) + FAIL; + /* LITERAL implies PREFIX */ + if ((flags & SRE_INFO_LITERAL) && + !(flags & SRE_INFO_PREFIX)) + FAIL; + /* Validate the prefix */ + if (flags & SRE_INFO_PREFIX) { + SRE_CODE prefix_len; + GET_ARG; prefix_len = arg; + GET_ARG; /* prefix skip */ + /* Here comes the prefix string */ + if (code+prefix_len < code || code+prefix_len > newcode) + FAIL; + code += prefix_len; + /* And here comes the overlap table */ + if (code+prefix_len < code || code+prefix_len > newcode) + FAIL; + /* Each overlap value should be < prefix_len */ + for (i = 0; i < prefix_len; i++) { + if (code[i] >= prefix_len) + FAIL; + } + code += prefix_len; + } + /* Validate the charset */ + if (flags & SRE_INFO_CHARSET) { + if (!_validate_charset(code, newcode-1)) + FAIL; + if (newcode[-1] != SRE_OP_FAILURE) + FAIL; + code = newcode; + } + else if (code != newcode) { + VTRACE(("code=%p, newcode=%p\n", code, newcode)); + FAIL; + } + } + break; + + case SRE_OP_BRANCH: + { + SRE_CODE *target = NULL; + for (;;) { + GET_SKIP; + if (skip == 0) + break; + /* Stop 2 before the end; we check the JUMP below */ + if (!_validate_inner(code, code+skip-3, groups)) + FAIL; + code += skip-3; + /* Check that it ends with a JUMP, and that each JUMP + has the same target */ + GET_OP; + if (op != SRE_OP_JUMP) + FAIL; + GET_SKIP; + if (target == NULL) + target = code+skip-1; + else if (code+skip-1 != target) + FAIL; + } + } + break; + + case SRE_OP_REPEAT_ONE: + case SRE_OP_MIN_REPEAT_ONE: + { + SRE_CODE min, max; + GET_SKIP; + GET_ARG; min = arg; + GET_ARG; max = arg; + if (min > max) + FAIL; +#ifdef Py_UNICODE_WIDE + if (max > 65535) + FAIL; +#endif + if (!_validate_inner(code, code+skip-4, groups)) + FAIL; + code += skip-4; + GET_OP; + if (op != SRE_OP_SUCCESS) + FAIL; + } + break; + + case SRE_OP_REPEAT: + { + SRE_CODE min, max; + GET_SKIP; + GET_ARG; min = arg; + GET_ARG; max = arg; + if (min > max) + FAIL; +#ifdef Py_UNICODE_WIDE + if (max > 65535) + FAIL; +#endif + if (!_validate_inner(code, code+skip-3, groups)) + FAIL; + code += skip-3; + GET_OP; + if (op != SRE_OP_MAX_UNTIL && op != SRE_OP_MIN_UNTIL) + FAIL; + } + break; + + case SRE_OP_GROUPREF: + case SRE_OP_GROUPREF_IGNORE: + GET_ARG; + if (arg >= groups) + FAIL; + break; + + case SRE_OP_GROUPREF_EXISTS: + /* The regex syntax for this is: '(?(group)then|else)', where + 'group' is either an integer group number or a group name, + 'then' and 'else' are sub-regexes, and 'else' is optional. */ + GET_ARG; + if (arg >= groups) + FAIL; + GET_SKIP_ADJ(1); + code--; /* The skip is relative to the first arg! */ + /* There are two possibilities here: if there is both a 'then' + part and an 'else' part, the generated code looks like: + + GROUPREF_EXISTS + + + ...then part... + JUMP + + ( jumps here) + ...else part... + ( jumps here) + + If there is only a 'then' part, it looks like: + + GROUPREF_EXISTS + + + ...then part... + ( jumps here) + + There is no direct way to decide which it is, and we don't want + to allow arbitrary jumps anywhere in the code; so we just look + for a JUMP opcode preceding our skip target. + */ + if (skip >= 3 && code+skip-3 >= code && + code[skip-3] == SRE_OP_JUMP) + { + VTRACE(("both then and else parts present\n")); + if (!_validate_inner(code+1, code+skip-3, groups)) + FAIL; + code += skip-2; /* Position after JUMP, at */ + GET_SKIP; + if (!_validate_inner(code, code+skip-1, groups)) + FAIL; + code += skip-1; + } + else { + VTRACE(("only a then part present\n")); + if (!_validate_inner(code+1, code+skip-1, groups)) + FAIL; + code += skip-1; + } + break; + + case SRE_OP_ASSERT: + case SRE_OP_ASSERT_NOT: + GET_SKIP; + GET_ARG; /* 0 for lookahead, width for lookbehind */ + code--; /* Back up over arg to simplify math below */ + if (arg & 0x80000000) + FAIL; /* Width too large */ + /* Stop 1 before the end; we check the SUCCESS below */ + if (!_validate_inner(code+1, code+skip-2, groups)) + FAIL; + code += skip-2; + GET_OP; + if (op != SRE_OP_SUCCESS) + FAIL; + break; + + default: + FAIL; + + } + } + + VTRACE(("okay\n")); + return 1; +} + +static int +_validate_outer(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups) +{ + if (groups < 0 || groups > 100 || code >= end || end[-1] != SRE_OP_SUCCESS) + FAIL; + if (groups == 0) /* fix for simplejson */ + groups = 100; /* 100 groups should always be safe */ + return _validate_inner(code, end-1, groups); +} + +static int +_validate(PatternObject *self) +{ + if (!_validate_outer(self->code, self->code+self->codesize, self->groups)) + { + PyErr_SetString(PyExc_RuntimeError, "invalid SRE code"); + return 0; + } + else + VTRACE(("Success!\n")); + return 1; +} + +/* -------------------------------------------------------------------- */ +/* match methods */ + +static void +match_dealloc(MatchObject* self) +{ + Py_XDECREF(self->regs); + Py_XDECREF(self->string); + Py_DECREF(self->pattern); + PyObject_DEL(self); +} + +static PyObject* +match_getslice_by_index(MatchObject* self, Py_ssize_t index, PyObject* def) +{ + if (index < 0 || index >= self->groups) { + /* raise IndexError if we were given a bad group number */ + PyErr_SetString( + PyExc_IndexError, + "no such group" + ); + return NULL; + } + + index *= 2; + + if (self->string == Py_None || self->mark[index] < 0) { + /* return default value if the string or group is undefined */ + Py_INCREF(def); + return def; + } + + return PySequence_GetSlice( + self->string, self->mark[index], self->mark[index+1] + ); +} + +static Py_ssize_t +match_getindex(MatchObject* self, PyObject* index) +{ + Py_ssize_t i; + + if (PyInt_Check(index)) + return PyInt_AsSsize_t(index); + + i = -1; + + if (self->pattern->groupindex) { + index = PyObject_GetItem(self->pattern->groupindex, index); + if (index) { + if (PyInt_Check(index) || PyLong_Check(index)) + i = PyInt_AsSsize_t(index); + Py_DECREF(index); + } else + PyErr_Clear(); + } + + return i; +} + +static PyObject* +match_getslice(MatchObject* self, PyObject* index, PyObject* def) +{ + return match_getslice_by_index(self, match_getindex(self, index), def); +} + +static PyObject* +match_expand(MatchObject* self, PyObject* ptemplate) +{ + /* delegate to Python code */ + return call( + SRE_PY_MODULE, "_expand", + PyTuple_Pack(3, self->pattern, self, ptemplate) + ); +} + +static PyObject* +match_group(MatchObject* self, PyObject* args) +{ + PyObject* result; + Py_ssize_t i, size; + + size = PyTuple_GET_SIZE(args); + + switch (size) { + case 0: + result = match_getslice(self, Py_False, Py_None); + break; + case 1: + result = match_getslice(self, PyTuple_GET_ITEM(args, 0), Py_None); + break; + default: + /* fetch multiple items */ + result = PyTuple_New(size); + if (!result) + return NULL; + for (i = 0; i < size; i++) { + PyObject* item = match_getslice( + self, PyTuple_GET_ITEM(args, i), Py_None + ); + if (!item) { + Py_DECREF(result); + return NULL; + } + PyTuple_SET_ITEM(result, i, item); + } + break; + } + return result; +} + +static PyObject* +match_groups(MatchObject* self, PyObject* args, PyObject* kw) +{ + PyObject* result; + Py_ssize_t index; + + PyObject* def = Py_None; + static char* kwlist[] = { "default", NULL }; + if (!PyArg_ParseTupleAndKeywords(args, kw, "|O:groups", kwlist, &def)) + return NULL; + + result = PyTuple_New(self->groups-1); + if (!result) + return NULL; + + for (index = 1; index < self->groups; index++) { + PyObject* item; + item = match_getslice_by_index(self, index, def); + if (!item) { + Py_DECREF(result); + return NULL; + } + PyTuple_SET_ITEM(result, index-1, item); + } + + return result; +} + +static PyObject* +match_groupdict(MatchObject* self, PyObject* args, PyObject* kw) +{ + PyObject* result; + PyObject* keys; + Py_ssize_t index; + + PyObject* def = Py_None; + static char* kwlist[] = { "default", NULL }; + if (!PyArg_ParseTupleAndKeywords(args, kw, "|O:groupdict", kwlist, &def)) + return NULL; + + result = PyDict_New(); + if (!result || !self->pattern->groupindex) + return result; + + keys = PyMapping_Keys(self->pattern->groupindex); + if (!keys) + goto failed; + + for (index = 0; index < PyList_GET_SIZE(keys); index++) { + int status; + PyObject* key; + PyObject* value; + key = PyList_GET_ITEM(keys, index); + if (!key) + goto failed; + value = match_getslice(self, key, def); + if (!value) { + Py_DECREF(key); + goto failed; + } + status = PyDict_SetItem(result, key, value); + Py_DECREF(value); + if (status < 0) + goto failed; + } + + Py_DECREF(keys); + + return result; + +failed: + Py_XDECREF(keys); + Py_DECREF(result); + return NULL; +} + +static PyObject* +match_start(MatchObject* self, PyObject* args) +{ + Py_ssize_t index; + + PyObject* index_ = Py_False; /* zero */ + if (!PyArg_UnpackTuple(args, "start", 0, 1, &index_)) + return NULL; + + index = match_getindex(self, index_); + + if (index < 0 || index >= self->groups) { + PyErr_SetString( + PyExc_IndexError, + "no such group" + ); + return NULL; + } + + /* mark is -1 if group is undefined */ + return Py_BuildValue("i", self->mark[index*2]); +} + +static PyObject* +match_end(MatchObject* self, PyObject* args) +{ + Py_ssize_t index; + + PyObject* index_ = Py_False; /* zero */ + if (!PyArg_UnpackTuple(args, "end", 0, 1, &index_)) + return NULL; + + index = match_getindex(self, index_); + + if (index < 0 || index >= self->groups) { + PyErr_SetString( + PyExc_IndexError, + "no such group" + ); + return NULL; + } + + /* mark is -1 if group is undefined */ + return Py_BuildValue("i", self->mark[index*2+1]); +} + +LOCAL(PyObject*) +_pair(Py_ssize_t i1, Py_ssize_t i2) +{ + PyObject* pair; + PyObject* item; + + pair = PyTuple_New(2); + if (!pair) + return NULL; + + item = PyInt_FromSsize_t(i1); + if (!item) + goto error; + PyTuple_SET_ITEM(pair, 0, item); + + item = PyInt_FromSsize_t(i2); + if (!item) + goto error; + PyTuple_SET_ITEM(pair, 1, item); + + return pair; + + error: + Py_DECREF(pair); + return NULL; +} + +static PyObject* +match_span(MatchObject* self, PyObject* args) +{ + Py_ssize_t index; + + PyObject* index_ = Py_False; /* zero */ + if (!PyArg_UnpackTuple(args, "span", 0, 1, &index_)) + return NULL; + + index = match_getindex(self, index_); + + if (index < 0 || index >= self->groups) { + PyErr_SetString( + PyExc_IndexError, + "no such group" + ); + return NULL; + } + + /* marks are -1 if group is undefined */ + return _pair(self->mark[index*2], self->mark[index*2+1]); +} + +static PyObject* +match_regs(MatchObject* self) +{ + PyObject* regs; + PyObject* item; + Py_ssize_t index; + + regs = PyTuple_New(self->groups); + if (!regs) + return NULL; + + for (index = 0; index < self->groups; index++) { + item = _pair(self->mark[index*2], self->mark[index*2+1]); + if (!item) { + Py_DECREF(regs); + return NULL; + } + PyTuple_SET_ITEM(regs, index, item); + } + + Py_INCREF(regs); + self->regs = regs; + + return regs; +} + +static PyObject* +match_copy(MatchObject* self, PyObject *unused) +{ +#ifdef USE_BUILTIN_COPY + MatchObject* copy; + Py_ssize_t slots, offset; + + slots = 2 * (self->pattern->groups+1); + + copy = PyObject_NEW_VAR(MatchObject, &Match_Type, slots); + if (!copy) + return NULL; + + /* this value a constant, but any compiler should be able to + figure that out all by itself */ + offset = offsetof(MatchObject, string); + + Py_XINCREF(self->pattern); + Py_XINCREF(self->string); + Py_XINCREF(self->regs); + + memcpy((char*) copy + offset, (char*) self + offset, + sizeof(MatchObject) + slots * sizeof(Py_ssize_t) - offset); + + return (PyObject*) copy; +#else + PyErr_SetString(PyExc_TypeError, "cannot copy this match object"); + return NULL; +#endif +} + +static PyObject* +match_deepcopy(MatchObject* self, PyObject* memo) +{ +#ifdef USE_BUILTIN_COPY + MatchObject* copy; + + copy = (MatchObject*) match_copy(self); + if (!copy) + return NULL; + + if (!deepcopy((PyObject**) ©->pattern, memo) || + !deepcopy(©->string, memo) || + !deepcopy(©->regs, memo)) { + Py_DECREF(copy); + return NULL; + } + +#else + PyErr_SetString(PyExc_TypeError, "cannot deepcopy this match object"); + return NULL; +#endif +} + +static struct PyMethodDef match_methods[] = { + {"group", (PyCFunction) match_group, METH_VARARGS}, + {"start", (PyCFunction) match_start, METH_VARARGS}, + {"end", (PyCFunction) match_end, METH_VARARGS}, + {"span", (PyCFunction) match_span, METH_VARARGS}, + {"groups", (PyCFunction) match_groups, METH_VARARGS|METH_KEYWORDS}, + {"groupdict", (PyCFunction) match_groupdict, METH_VARARGS|METH_KEYWORDS}, + {"expand", (PyCFunction) match_expand, METH_O}, + {"__copy__", (PyCFunction) match_copy, METH_NOARGS}, + {"__deepcopy__", (PyCFunction) match_deepcopy, METH_O}, + {NULL, NULL} +}; + +static PyObject * +match_lastindex_get(MatchObject *self) +{ + if (self->lastindex >= 0) + return Py_BuildValue("i", self->lastindex); + Py_INCREF(Py_None); + return Py_None; +} + +static PyObject * +match_lastgroup_get(MatchObject *self) +{ + if (self->pattern->indexgroup && self->lastindex >= 0) { + PyObject* result = PySequence_GetItem( + self->pattern->indexgroup, self->lastindex + ); + if (result) + return result; + PyErr_Clear(); + } + Py_INCREF(Py_None); + return Py_None; +} + +static PyObject * +match_regs_get(MatchObject *self) +{ + if (self->regs) { + Py_INCREF(self->regs); + return self->regs; + } else + return match_regs(self); +} + +static PyGetSetDef match_getset[] = { + {"lastindex", (getter)match_lastindex_get, (setter)NULL}, + {"lastgroup", (getter)match_lastgroup_get, (setter)NULL}, + {"regs", (getter)match_regs_get, (setter)NULL}, + {NULL} +}; + +#define MATCH_OFF(x) offsetof(MatchObject, x) +static PyMemberDef match_members[] = { + {"string", T_OBJECT, MATCH_OFF(string), READONLY}, + {"re", T_OBJECT, MATCH_OFF(pattern), READONLY}, + {"pos", T_PYSSIZET, MATCH_OFF(pos), READONLY}, + {"endpos", T_PYSSIZET, MATCH_OFF(endpos), READONLY}, + {NULL} +}; + + +/* FIXME: implement setattr("string", None) as a special case (to + detach the associated string, if any */ + +static PyTypeObject Match_Type = { + PyVarObject_HEAD_INIT(NULL, 0) + "_" SRE_MODULE ".SRE_Match", + sizeof(MatchObject), sizeof(Py_ssize_t), + (destructor)match_dealloc, /* tp_dealloc */ + 0, /* tp_print */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_compare */ + 0, /* tp_repr */ + 0, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + 0, /* tp_hash */ + 0, /* tp_call */ + 0, /* tp_str */ + 0, /* tp_getattro */ + 0, /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT, + 0, /* tp_doc */ + 0, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iternext */ + match_methods, /* tp_methods */ + match_members, /* tp_members */ + match_getset, /* tp_getset */ +}; + +static PyObject* +pattern_new_match(PatternObject* pattern, SRE_STATE* state, int status) +{ + /* create match object (from state object) */ + + MatchObject* match; + Py_ssize_t i, j; + char* base; + int n; + + if (status > 0) { + + /* create match object (with room for extra group marks) */ + /* coverity[ampersand_in_size] */ + match = PyObject_NEW_VAR(MatchObject, &Match_Type, + 2*(pattern->groups+1)); + if (!match) + return NULL; + + Py_INCREF(pattern); + match->pattern = pattern; + + Py_INCREF(state->string); + match->string = state->string; + + match->regs = NULL; + match->groups = pattern->groups+1; + + /* fill in group slices */ + + base = (char*) state->beginning; + n = state->charsize; + + match->mark[0] = ((char*) state->start - base) / n; + match->mark[1] = ((char*) state->ptr - base) / n; + + for (i = j = 0; i < pattern->groups; i++, j+=2) + if (j+1 <= state->lastmark && state->mark[j] && state->mark[j+1]) { + match->mark[j+2] = ((char*) state->mark[j] - base) / n; + match->mark[j+3] = ((char*) state->mark[j+1] - base) / n; + } else + match->mark[j+2] = match->mark[j+3] = -1; /* undefined */ + + match->pos = state->pos; + match->endpos = state->endpos; + + match->lastindex = state->lastindex; + + return (PyObject*) match; + + } else if (status == 0) { + + /* no match */ + Py_INCREF(Py_None); + return Py_None; + + } + + /* internal error */ + pattern_error(status); + return NULL; +} + + +/* -------------------------------------------------------------------- */ +/* scanner methods (experimental) */ + +static void +scanner_dealloc(ScannerObject* self) +{ + state_fini(&self->state); + Py_XDECREF(self->pattern); + PyObject_DEL(self); +} + +static PyObject* +scanner_match(ScannerObject* self, PyObject *unused) +{ + SRE_STATE* state = &self->state; + PyObject* match; + int status; + + state_reset(state); + + state->ptr = state->start; + + if (state->charsize == 1) { + status = sre_match(state, PatternObject_GetCode(self->pattern)); + } else { +#if defined(HAVE_UNICODE) + status = sre_umatch(state, PatternObject_GetCode(self->pattern)); +#endif + } + if (PyErr_Occurred()) + return NULL; + + match = pattern_new_match((PatternObject*) self->pattern, + state, status); + + if (status == 0 || state->ptr == state->start) + state->start = (void*) ((char*) state->ptr + state->charsize); + else + state->start = state->ptr; + + return match; +} + + +static PyObject* +scanner_search(ScannerObject* self, PyObject *unused) +{ + SRE_STATE* state = &self->state; + PyObject* match; + int status; + + state_reset(state); + + state->ptr = state->start; + + if (state->charsize == 1) { + status = sre_search(state, PatternObject_GetCode(self->pattern)); + } else { +#if defined(HAVE_UNICODE) + status = sre_usearch(state, PatternObject_GetCode(self->pattern)); +#endif + } + if (PyErr_Occurred()) + return NULL; + + match = pattern_new_match((PatternObject*) self->pattern, + state, status); + + if (status == 0 || state->ptr == state->start) + state->start = (void*) ((char*) state->ptr + state->charsize); + else + state->start = state->ptr; + + return match; +} + +static PyMethodDef scanner_methods[] = { + {"match", (PyCFunction) scanner_match, METH_NOARGS}, + {"search", (PyCFunction) scanner_search, METH_NOARGS}, + {NULL, NULL} +}; + +#define SCAN_OFF(x) offsetof(ScannerObject, x) +static PyMemberDef scanner_members[] = { + {"pattern", T_OBJECT, SCAN_OFF(pattern), READONLY}, + {NULL} /* Sentinel */ +}; + +statichere PyTypeObject Scanner_Type = { + PyObject_HEAD_INIT(NULL) + 0, "_" SRE_MODULE ".SRE_Scanner", + sizeof(ScannerObject), 0, + (destructor)scanner_dealloc, /*tp_dealloc*/ + 0, /* tp_print */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_reserved */ + 0, /* tp_repr */ + 0, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + 0, /* tp_hash */ + 0, /* tp_call */ + 0, /* tp_str */ + 0, /* tp_getattro */ + 0, /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT, /* tp_flags */ + 0, /* tp_doc */ + 0, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iternext */ + scanner_methods, /* tp_methods */ + scanner_members, /* tp_members */ + 0, /* tp_getset */ +}; + +static PyObject* +pattern_scanner(PatternObject* pattern, PyObject* args) +{ + /* create search state object */ + + ScannerObject* self; + + PyObject* string; + Py_ssize_t start = 0; + Py_ssize_t end = PY_SSIZE_T_MAX; + if (!PyArg_ParseTuple(args, "O|nn:scanner", &string, &start, &end)) + return NULL; + + /* create scanner object */ + self = PyObject_NEW(ScannerObject, &Scanner_Type); + if (!self) + return NULL; + self->pattern = NULL; + + string = state_init(&self->state, pattern, string, start, end); + if (!string) { + Py_DECREF(self); + return NULL; + } + + Py_INCREF(pattern); + self->pattern = (PyObject*) pattern; + + return (PyObject*) self; +} + +static PyMethodDef _functions[] = { + {"compile", _compile, METH_VARARGS}, + {"getcodesize", sre_codesize, METH_NOARGS}, + {"getlower", sre_getlower, METH_VARARGS}, + {NULL, NULL} +}; + +#if PY_VERSION_HEX < 0x02030000 +DL_EXPORT(void) init_sre(void) +#else +PyMODINIT_FUNC init_sre(void) +#endif +{ + PyObject* m; + PyObject* d; + PyObject* x; + + /* Patch object types */ + if (PyType_Ready(&Pattern_Type) || PyType_Ready(&Match_Type) || + PyType_Ready(&Scanner_Type)) + return; + + m = Py_InitModule("_" SRE_MODULE, _functions); + if (m == NULL) + return; + d = PyModule_GetDict(m); + + x = PyInt_FromLong(SRE_MAGIC); + if (x) { + PyDict_SetItemString(d, "MAGIC", x); + Py_DECREF(x); + } + + x = PyInt_FromLong(sizeof(SRE_CODE)); + if (x) { + PyDict_SetItemString(d, "CODESIZE", x); + Py_DECREF(x); + } + + x = PyString_FromString(copyright); + if (x) { + PyDict_SetItemString(d, "copyright", x); + Py_DECREF(x); + } +} + +#endif /* !defined(SRE_RECURSIVE) */ + +/* vim:ts=4:sw=4:et +*/ diff --git a/AppPkg/Applications/Python/PyMod-2.7.2/Modules/zlib/zutil.h b/AppPkg/Applications/Python/PyMod-2.7.2/Modules/zlib/zutil.h new file mode 100644 index 0000000000..4cee6218b8 --- /dev/null +++ b/AppPkg/Applications/Python/PyMod-2.7.2/Modules/zlib/zutil.h @@ -0,0 +1,269 @@ +/* zutil.h -- internal interface and configuration of the compression library + * Copyright (C) 1995-2005 Jean-loup Gailly. + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* WARNING: this file should *not* be used by applications. It is + part of the implementation of the compression library and is + subject to change. Applications should only use zlib.h. + */ + +/* @(#) $Id$ */ + +#ifndef ZUTIL_H +#define ZUTIL_H + +#define ZLIB_INTERNAL +#include "zlib.h" + +#ifdef STDC +# ifndef _WIN32_WCE +# include +# endif +# include +# include +#endif +#ifdef NO_ERRNO_H +# ifdef _WIN32_WCE + /* The Microsoft C Run-Time Library for Windows CE doesn't have + * errno. We define it as a global variable to simplify porting. + * Its value is always 0 and should not be used. We rename it to + * avoid conflict with other libraries that use the same workaround. + */ +# define errno z_errno +# endif + extern int errno; +#else +# ifndef _WIN32_WCE +# include +# endif +#endif + +#ifndef local +# define local static +#endif +/* compile with -Dlocal if your debugger can't find static symbols */ + +typedef unsigned char uch; +typedef uch FAR uchf; +typedef unsigned short ush; +typedef ush FAR ushf; +typedef unsigned long ulg; + +extern const char * const z_errmsg[10]; /* indexed by 2-zlib_error */ +/* (size given to avoid silly warnings with Visual C++) */ + +#define ERR_MSG(err) z_errmsg[Z_NEED_DICT-(err)] + +#define ERR_RETURN(strm,err) \ + return (strm->msg = (char*)ERR_MSG(err), (err)) +/* To be used only when the state is known to be valid */ + + /* common constants */ + +#ifndef DEF_WBITS +# define DEF_WBITS MAX_WBITS +#endif +/* default windowBits for decompression. MAX_WBITS is for compression only */ + +#if MAX_MEM_LEVEL >= 8 +# define DEF_MEM_LEVEL 8 +#else +# define DEF_MEM_LEVEL MAX_MEM_LEVEL +#endif +/* default memLevel */ + +#define STORED_BLOCK 0 +#define STATIC_TREES 1 +#define DYN_TREES 2 +/* The three kinds of block type */ + +#define MIN_MATCH 3 +#define MAX_MATCH 258 +/* The minimum and maximum match lengths */ + +#define PRESET_DICT 0x20 /* preset dictionary flag in zlib header */ + + /* target dependencies */ + +#if defined(MSDOS) || (defined(WINDOWS) && !defined(WIN32)) +# define OS_CODE 0x00 +# if defined(__TURBOC__) || defined(__BORLANDC__) +# if(__STDC__ == 1) && (defined(__LARGE__) || defined(__COMPACT__)) + /* Allow compilation with ANSI keywords only enabled */ + void _Cdecl farfree( void *block ); + void *_Cdecl farmalloc( unsigned long nbytes ); +# else +# include +# endif +# else /* MSC or DJGPP */ +# include +# endif +#endif + +#ifdef AMIGA +# define OS_CODE 0x01 +#endif + +#if defined(VAXC) || defined(VMS) +# define OS_CODE 0x02 +# define F_OPEN(name, mode) \ + fopen((name), (mode), "mbc=60", "ctx=stm", "rfm=fix", "mrs=512") +#endif + +#if defined(ATARI) || defined(atarist) +# define OS_CODE 0x05 +#endif + +#ifdef OS2 +# define OS_CODE 0x06 +# ifdef M_I86 + #include +# endif +#endif + +#if defined(MACOS) || defined(TARGET_OS_MAC) +# define OS_CODE 0x07 +# if defined(__MWERKS__) && __dest_os != __be_os && __dest_os != __win32_os +# include /* for fdopen */ +# else +# ifndef fdopen +# define fdopen(fd,mode) NULL /* No fdopen() */ +# endif +# endif +#endif + +#ifdef TOPS20 +# define OS_CODE 0x0a +#endif + +#ifdef WIN32 +# ifndef __CYGWIN__ /* Cygwin is Unix, not Win32 */ +# define OS_CODE 0x0b +# endif +#endif + +#ifdef __50SERIES /* Prime/PRIMOS */ +# define OS_CODE 0x0f +#endif + +#if defined(_BEOS_) || defined(RISCOS) +# define fdopen(fd,mode) NULL /* No fdopen() */ +#endif + +#if (defined(_MSC_VER) && (_MSC_VER > 600)) +# if defined(_WIN32_WCE) || defined(_EFI_STDLIB) +# define fdopen(fd,mode) NULL /* No fdopen() */ +# ifndef _PTRDIFF_T_DEFINED + typedef int ptrdiff_t; +# define _PTRDIFF_T_DEFINED +# endif +# else +# define fdopen(fd,type) _fdopen(fd,type) +# endif +#endif + + /* common defaults */ + +#ifndef OS_CODE +# define OS_CODE 0x03 /* assume Unix */ +#endif + +#ifndef F_OPEN +# define F_OPEN(name, mode) fopen((name), (mode)) +#endif + + /* functions */ + +#if defined(STDC99) || (defined(__TURBOC__) && __TURBOC__ >= 0x550) +# ifndef HAVE_VSNPRINTF +# define HAVE_VSNPRINTF +# endif +#endif +#if defined(__CYGWIN__) +# ifndef HAVE_VSNPRINTF +# define HAVE_VSNPRINTF +# endif +#endif +#ifndef HAVE_VSNPRINTF +# ifdef MSDOS + /* vsnprintf may exist on some MS-DOS compilers (DJGPP?), + but for now we just assume it doesn't. */ +# define NO_vsnprintf +# endif +# ifdef __TURBOC__ +# define NO_vsnprintf +# endif +# ifdef WIN32 + /* In Win32, vsnprintf is available as the "non-ANSI" _vsnprintf. */ +# if !defined(vsnprintf) && !defined(NO_vsnprintf) +# define vsnprintf _vsnprintf +# endif +# endif +# ifdef __SASC +# define NO_vsnprintf +# endif +#endif +#ifdef VMS +# define NO_vsnprintf +#endif + +#if defined(pyr) +# define NO_MEMCPY +#endif +#if defined(SMALL_MEDIUM) && !defined(_MSC_VER) && !defined(__SC__) + /* Use our own functions for small and medium model with MSC <= 5.0. + * You may have to use the same strategy for Borland C (untested). + * The __SC__ check is for Symantec. + */ +# define NO_MEMCPY +#endif +#if defined(STDC) && !defined(HAVE_MEMCPY) && !defined(NO_MEMCPY) +# define HAVE_MEMCPY +#endif +#ifdef HAVE_MEMCPY +# ifdef SMALL_MEDIUM /* MSDOS small or medium model */ +# define zmemcpy _fmemcpy +# define zmemcmp _fmemcmp +# define zmemzero(dest, len) _fmemset(dest, 0, len) +# else +# define zmemcpy memcpy +# define zmemcmp memcmp +# define zmemzero(dest, len) memset(dest, 0, len) +# endif +#else + extern void zmemcpy OF((Bytef* dest, const Bytef* source, uInt len)); + extern int zmemcmp OF((const Bytef* s1, const Bytef* s2, uInt len)); + extern void zmemzero OF((Bytef* dest, uInt len)); +#endif + +/* Diagnostic functions */ +#ifdef DEBUG +# include + extern int z_verbose; + extern void z_error OF((char *m)); +# define Assert(cond,msg) {if(!(cond)) z_error(msg);} +# define Trace(x) {if (z_verbose>=0) fprintf x ;} +# define Tracev(x) {if (z_verbose>0) fprintf x ;} +# define Tracevv(x) {if (z_verbose>1) fprintf x ;} +# define Tracec(c,x) {if (z_verbose>0 && (c)) fprintf x ;} +# define Tracecv(c,x) {if (z_verbose>1 && (c)) fprintf x ;} +#else +# define Assert(cond,msg) +# define Trace(x) +# define Tracev(x) +# define Tracevv(x) +# define Tracec(c,x) +# define Tracecv(c,x) +#endif + + +voidpf zcalloc OF((voidpf opaque, unsigned items, unsigned size)); +void zcfree OF((voidpf opaque, voidpf ptr)); + +#define ZALLOC(strm, items, size) \ + (*((strm)->zalloc))((strm)->opaque, (items), (size)) +#define ZFREE(strm, addr) (*((strm)->zfree))((strm)->opaque, (voidpf)(addr)) +#define TRY_FREE(s, p) {if (p) ZFREE(s, p);} + +#endif /* ZUTIL_H */ diff --git a/AppPkg/Applications/Python/PyMod-2.7.2/Objects/longobject.c b/AppPkg/Applications/Python/PyMod-2.7.2/Objects/longobject.c new file mode 100644 index 0000000000..1fbc675f13 --- /dev/null +++ b/AppPkg/Applications/Python/PyMod-2.7.2/Objects/longobject.c @@ -0,0 +1,4370 @@ +/* Long (arbitrary precision) integer object implementation */ + +/* XXX The functional organization of this file is terrible */ + +#include "Python.h" +#include "longintrepr.h" +#include "structseq.h" + +#include +#include +#include + +/* For long multiplication, use the O(N**2) school algorithm unless + * both operands contain more than KARATSUBA_CUTOFF digits (this + * being an internal Python long digit, in base PyLong_BASE). + */ +#define KARATSUBA_CUTOFF 70 +#define KARATSUBA_SQUARE_CUTOFF (2 * KARATSUBA_CUTOFF) + +/* For exponentiation, use the binary left-to-right algorithm + * unless the exponent contains more than FIVEARY_CUTOFF digits. + * In that case, do 5 bits at a time. The potential drawback is that + * a table of 2**5 intermediate results is computed. + */ +#define FIVEARY_CUTOFF 8 + +#ifndef ABS + #define ABS(x) ((x) < 0 ? -(x) : (x)) +#endif + +#ifndef MAX + #define MAX(x, y) ((x) < (y) ? (y) : (x)) +#endif + +#ifndef MIN + #define MIN(x, y) ((x) > (y) ? (y) : (x)) +#endif + +#define SIGCHECK(PyTryBlock) \ + do { \ + if (--_Py_Ticker < 0) { \ + _Py_Ticker = _Py_CheckInterval; \ + if (PyErr_CheckSignals()) PyTryBlock \ + } \ + } while(0) + +/* Normalize (remove leading zeros from) a long int object. + Doesn't attempt to free the storage--in most cases, due to the nature + of the algorithms used, this could save at most be one word anyway. */ + +static PyLongObject * +long_normalize(register PyLongObject *v) +{ + Py_ssize_t j = ABS(Py_SIZE(v)); + Py_ssize_t i = j; + + while (i > 0 && v->ob_digit[i-1] == 0) + --i; + if (i != j) + Py_SIZE(v) = (Py_SIZE(v) < 0) ? -(i) : i; + return v; +} + +/* Allocate a new long int object with size digits. + Return NULL and set exception if we run out of memory. */ + +#define MAX_LONG_DIGITS \ + ((PY_SSIZE_T_MAX - offsetof(PyLongObject, ob_digit))/sizeof(digit)) + +PyLongObject * +_PyLong_New(Py_ssize_t size) +{ + if (size > (Py_ssize_t)MAX_LONG_DIGITS) { + PyErr_SetString(PyExc_OverflowError, + "too many digits in integer"); + return NULL; + } + /* coverity[ampersand_in_size] */ + /* XXX(nnorwitz): PyObject_NEW_VAR / _PyObject_VAR_SIZE need to detect + overflow */ + return PyObject_NEW_VAR(PyLongObject, &PyLong_Type, size); +} + +PyObject * +_PyLong_Copy(PyLongObject *src) +{ + PyLongObject *result; + Py_ssize_t i; + + assert(src != NULL); + i = src->ob_size; + if (i < 0) + i = -(i); + result = _PyLong_New(i); + if (result != NULL) { + result->ob_size = src->ob_size; + while (--i >= 0) + result->ob_digit[i] = src->ob_digit[i]; + } + return (PyObject *)result; +} + +/* Create a new long int object from a C long int */ + +PyObject * +PyLong_FromLong(long ival) +{ + PyLongObject *v; + unsigned long abs_ival; + unsigned long t; /* unsigned so >> doesn't propagate sign bit */ + int ndigits = 0; + int negative = 0; + + if (ival < 0) { + /* if LONG_MIN == -LONG_MAX-1 (true on most platforms) then + ANSI C says that the result of -ival is undefined when ival + == LONG_MIN. Hence the following workaround. */ + abs_ival = (unsigned long)(-1-ival) + 1; + negative = 1; + } + else { + abs_ival = (unsigned long)ival; + } + + /* Count the number of Python digits. + We used to pick 5 ("big enough for anything"), but that's a + waste of time and space given that 5*15 = 75 bits are rarely + needed. */ + t = abs_ival; + while (t) { + ++ndigits; + t >>= PyLong_SHIFT; + } + v = _PyLong_New(ndigits); + if (v != NULL) { + digit *p = v->ob_digit; + v->ob_size = negative ? -ndigits : ndigits; + t = abs_ival; + while (t) { + *p++ = (digit)(t & PyLong_MASK); + t >>= PyLong_SHIFT; + } + } + return (PyObject *)v; +} + +/* Create a new long int object from a C unsigned long int */ + +PyObject * +PyLong_FromUnsignedLong(unsigned long ival) +{ + PyLongObject *v; + unsigned long t; + int ndigits = 0; + + /* Count the number of Python digits. */ + t = (unsigned long)ival; + while (t) { + ++ndigits; + t >>= PyLong_SHIFT; + } + v = _PyLong_New(ndigits); + if (v != NULL) { + digit *p = v->ob_digit; + Py_SIZE(v) = ndigits; + while (ival) { + *p++ = (digit)(ival & PyLong_MASK); + ival >>= PyLong_SHIFT; + } + } + return (PyObject *)v; +} + +/* Create a new long int object from a C double */ + +PyObject * +PyLong_FromDouble(double dval) +{ + PyLongObject *v; + double frac; + int i, ndig, expo, neg; + neg = 0; + if (Py_IS_INFINITY(dval)) { + PyErr_SetString(PyExc_OverflowError, + "cannot convert float infinity to integer"); + return NULL; + } + if (Py_IS_NAN(dval)) { + PyErr_SetString(PyExc_ValueError, + "cannot convert float NaN to integer"); + return NULL; + } + if (dval < 0.0) { + neg = 1; + dval = -dval; + } + frac = frexp(dval, &expo); /* dval = frac*2**expo; 0.0 <= frac < 1.0 */ + if (expo <= 0) + return PyLong_FromLong(0L); + ndig = (expo-1) / PyLong_SHIFT + 1; /* Number of 'digits' in result */ + v = _PyLong_New(ndig); + if (v == NULL) + return NULL; + frac = ldexp(frac, (expo-1) % PyLong_SHIFT + 1); + for (i = ndig; --i >= 0; ) { + digit bits = (digit)frac; + v->ob_digit[i] = bits; + frac = frac - (double)bits; + frac = ldexp(frac, PyLong_SHIFT); + } + if (neg) + Py_SIZE(v) = -(Py_SIZE(v)); + return (PyObject *)v; +} + +/* Checking for overflow in PyLong_AsLong is a PITA since C doesn't define + * anything about what happens when a signed integer operation overflows, + * and some compilers think they're doing you a favor by being "clever" + * then. The bit pattern for the largest postive signed long is + * (unsigned long)LONG_MAX, and for the smallest negative signed long + * it is abs(LONG_MIN), which we could write -(unsigned long)LONG_MIN. + * However, some other compilers warn about applying unary minus to an + * unsigned operand. Hence the weird "0-". + */ +#define PY_ABS_LONG_MIN (0-(unsigned long)LONG_MIN) +#define PY_ABS_SSIZE_T_MIN (0-(size_t)PY_SSIZE_T_MIN) + +/* Get a C long int from a Python long or Python int object. + On overflow, returns -1 and sets *overflow to 1 or -1 depending + on the sign of the result. Otherwise *overflow is 0. + + For other errors (e.g., type error), returns -1 and sets an error + condition. +*/ + +long +PyLong_AsLongAndOverflow(PyObject *vv, int *overflow) +{ + /* This version by Tim Peters */ + register PyLongObject *v; + unsigned long x, prev; + long res; + Py_ssize_t i; + int sign; + int do_decref = 0; /* if nb_int was called */ + + *overflow = 0; + if (vv == NULL) { + PyErr_BadInternalCall(); + return -1; + } + + if(PyInt_Check(vv)) + return PyInt_AsLong(vv); + + if (!PyLong_Check(vv)) { + PyNumberMethods *nb; + nb = vv->ob_type->tp_as_number; + if (nb == NULL || nb->nb_int == NULL) { + PyErr_SetString(PyExc_TypeError, + "an integer is required"); + return -1; + } + vv = (*nb->nb_int) (vv); + if (vv == NULL) + return -1; + do_decref = 1; + if(PyInt_Check(vv)) { + res = PyInt_AsLong(vv); + goto exit; + } + if (!PyLong_Check(vv)) { + Py_DECREF(vv); + PyErr_SetString(PyExc_TypeError, + "nb_int should return int object"); + return -1; + } + } + + res = -1; + v = (PyLongObject *)vv; + i = Py_SIZE(v); + + switch (i) { + case -1: + res = -(sdigit)v->ob_digit[0]; + break; + case 0: + res = 0; + break; + case 1: + res = v->ob_digit[0]; + break; + default: + sign = 1; + x = 0; + if (i < 0) { + sign = -1; + i = -(i); + } + while (--i >= 0) { + prev = x; + x = (x << PyLong_SHIFT) + v->ob_digit[i]; + if ((x >> PyLong_SHIFT) != prev) { + *overflow = sign; + goto exit; + } + } + /* Haven't lost any bits, but casting to long requires extra + * care (see comment above). + */ + if (x <= (unsigned long)LONG_MAX) { + res = (long)x * sign; + } + else if (sign < 0 && x == PY_ABS_LONG_MIN) { + res = LONG_MIN; + } + else { + *overflow = sign; + /* res is already set to -1 */ + } + } + exit: + if (do_decref) { + Py_DECREF(vv); + } + return res; +} + +/* Get a C long int from a long int object. + Returns -1 and sets an error condition if overflow occurs. */ + +long +PyLong_AsLong(PyObject *obj) +{ + int overflow; + long result = PyLong_AsLongAndOverflow(obj, &overflow); + if (overflow) { + /* XXX: could be cute and give a different + message for overflow == -1 */ + PyErr_SetString(PyExc_OverflowError, + "Python int too large to convert to C long"); + } + return result; +} + +/* Get a Py_ssize_t from a long int object. + Returns -1 and sets an error condition if overflow occurs. */ + +Py_ssize_t +PyLong_AsSsize_t(PyObject *vv) { + register PyLongObject *v; + size_t x, prev; + Py_ssize_t i; + int sign; + + if (vv == NULL || !PyLong_Check(vv)) { + PyErr_BadInternalCall(); + return -1; + } + v = (PyLongObject *)vv; + i = v->ob_size; + sign = 1; + x = 0; + if (i < 0) { + sign = -1; + i = -(i); + } + while (--i >= 0) { + prev = x; + x = (x << PyLong_SHIFT) | v->ob_digit[i]; + if ((x >> PyLong_SHIFT) != prev) + goto overflow; + } + /* Haven't lost any bits, but casting to a signed type requires + * extra care (see comment above). + */ + if (x <= (size_t)PY_SSIZE_T_MAX) { + return (Py_ssize_t)x * sign; + } + else if (sign < 0 && x == PY_ABS_SSIZE_T_MIN) { + return PY_SSIZE_T_MIN; + } + /* else overflow */ + + overflow: + PyErr_SetString(PyExc_OverflowError, + "long int too large to convert to int"); + return -1; +} + +/* Get a C unsigned long int from a long int object. + Returns -1 and sets an error condition if overflow occurs. */ + +unsigned long +PyLong_AsUnsignedLong(PyObject *vv) +{ + register PyLongObject *v; + unsigned long x, prev; + Py_ssize_t i; + + if (vv == NULL || !PyLong_Check(vv)) { + if (vv != NULL && PyInt_Check(vv)) { + long val = PyInt_AsLong(vv); + if (val < 0) { + PyErr_SetString(PyExc_OverflowError, + "can't convert negative value " + "to unsigned long"); + return (unsigned long) -1; + } + return val; + } + PyErr_BadInternalCall(); + return (unsigned long) -1; + } + v = (PyLongObject *)vv; + i = Py_SIZE(v); + x = 0; + if (i < 0) { + PyErr_SetString(PyExc_OverflowError, + "can't convert negative value to unsigned long"); + return (unsigned long) -1; + } + while (--i >= 0) { + prev = x; + x = (x << PyLong_SHIFT) | v->ob_digit[i]; + if ((x >> PyLong_SHIFT) != prev) { + PyErr_SetString(PyExc_OverflowError, + "long int too large to convert"); + return (unsigned long) -1; + } + } + return x; +} + +/* Get a C unsigned long int from a long int object, ignoring the high bits. + Returns -1 and sets an error condition if an error occurs. */ + +unsigned long +PyLong_AsUnsignedLongMask(PyObject *vv) +{ + register PyLongObject *v; + unsigned long x; + Py_ssize_t i; + int sign; + + if (vv == NULL || !PyLong_Check(vv)) { + if (vv != NULL && PyInt_Check(vv)) + return PyInt_AsUnsignedLongMask(vv); + PyErr_BadInternalCall(); + return (unsigned long) -1; + } + v = (PyLongObject *)vv; + i = v->ob_size; + sign = 1; + x = 0; + if (i < 0) { + sign = -1; + i = -i; + } + while (--i >= 0) { + x = (x << PyLong_SHIFT) | v->ob_digit[i]; + } + return x * sign; +} + +int +_PyLong_Sign(PyObject *vv) +{ + PyLongObject *v = (PyLongObject *)vv; + + assert(v != NULL); + assert(PyLong_Check(v)); + + return Py_SIZE(v) == 0 ? 0 : (Py_SIZE(v) < 0 ? -1 : 1); +} + +size_t +_PyLong_NumBits(PyObject *vv) +{ + PyLongObject *v = (PyLongObject *)vv; + size_t result = 0; + Py_ssize_t ndigits; + + assert(v != NULL); + assert(PyLong_Check(v)); + ndigits = ABS(Py_SIZE(v)); + assert(ndigits == 0 || v->ob_digit[ndigits - 1] != 0); + if (ndigits > 0) { + digit msd = v->ob_digit[ndigits - 1]; + + result = (ndigits - 1) * PyLong_SHIFT; + if (result / PyLong_SHIFT != (size_t)(ndigits - 1)) + goto Overflow; + do { + ++result; + if (result == 0) + goto Overflow; + msd >>= 1; + } while (msd); + } + return result; + + Overflow: + PyErr_SetString(PyExc_OverflowError, "long has too many bits " + "to express in a platform size_t"); + return (size_t)-1; +} + +PyObject * +_PyLong_FromByteArray(const unsigned char* bytes, size_t n, + int little_endian, int is_signed) +{ + const unsigned char* pstartbyte; /* LSB of bytes */ + int incr; /* direction to move pstartbyte */ + const unsigned char* pendbyte; /* MSB of bytes */ + size_t numsignificantbytes; /* number of bytes that matter */ + Py_ssize_t ndigits; /* number of Python long digits */ + PyLongObject* v; /* result */ + Py_ssize_t idigit = 0; /* next free index in v->ob_digit */ + + if (n == 0) + return PyLong_FromLong(0L); + + if (little_endian) { + pstartbyte = bytes; + pendbyte = bytes + n - 1; + incr = 1; + } + else { + pstartbyte = bytes + n - 1; + pendbyte = bytes; + incr = -1; + } + + if (is_signed) + is_signed = *pendbyte >= 0x80; + + /* Compute numsignificantbytes. This consists of finding the most + significant byte. Leading 0 bytes are insignificant if the number + is positive, and leading 0xff bytes if negative. */ + { + size_t i; + const unsigned char* p = pendbyte; + const int pincr = -incr; /* search MSB to LSB */ + const unsigned char insignficant = is_signed ? 0xff : 0x00; + + for (i = 0; i < n; ++i, p += pincr) { + if (*p != insignficant) + break; + } + numsignificantbytes = n - i; + /* 2's-comp is a bit tricky here, e.g. 0xff00 == -0x0100, so + actually has 2 significant bytes. OTOH, 0xff0001 == + -0x00ffff, so we wouldn't *need* to bump it there; but we + do for 0xffff = -0x0001. To be safe without bothering to + check every case, bump it regardless. */ + if (is_signed && numsignificantbytes < n) + ++numsignificantbytes; + } + + /* How many Python long digits do we need? We have + 8*numsignificantbytes bits, and each Python long digit has + PyLong_SHIFT bits, so it's the ceiling of the quotient. */ + /* catch overflow before it happens */ + if (numsignificantbytes > (PY_SSIZE_T_MAX - PyLong_SHIFT) / 8) { + PyErr_SetString(PyExc_OverflowError, + "byte array too long to convert to int"); + return NULL; + } + ndigits = (numsignificantbytes * 8 + PyLong_SHIFT - 1) / PyLong_SHIFT; + v = _PyLong_New(ndigits); + if (v == NULL) + return NULL; + + /* Copy the bits over. The tricky parts are computing 2's-comp on + the fly for signed numbers, and dealing with the mismatch between + 8-bit bytes and (probably) 15-bit Python digits.*/ + { + size_t i; + twodigits carry = 1; /* for 2's-comp calculation */ + twodigits accum = 0; /* sliding register */ + unsigned int accumbits = 0; /* number of bits in accum */ + const unsigned char* p = pstartbyte; + + for (i = 0; i < numsignificantbytes; ++i, p += incr) { + twodigits thisbyte = *p; + /* Compute correction for 2's comp, if needed. */ + if (is_signed) { + thisbyte = (0xff ^ thisbyte) + carry; + carry = thisbyte >> 8; + thisbyte &= 0xff; + } + /* Because we're going LSB to MSB, thisbyte is + more significant than what's already in accum, + so needs to be prepended to accum. */ + accum |= (twodigits)thisbyte << accumbits; + accumbits += 8; + if (accumbits >= PyLong_SHIFT) { + /* There's enough to fill a Python digit. */ + assert(idigit < ndigits); + v->ob_digit[idigit] = (digit)(accum & PyLong_MASK); + ++idigit; + accum >>= PyLong_SHIFT; + accumbits -= PyLong_SHIFT; + assert(accumbits < PyLong_SHIFT); + } + } + assert(accumbits < PyLong_SHIFT); + if (accumbits) { + assert(idigit < ndigits); + v->ob_digit[idigit] = (digit)accum; + ++idigit; + } + } + + Py_SIZE(v) = is_signed ? -idigit : idigit; + return (PyObject *)long_normalize(v); +} + +int +_PyLong_AsByteArray(PyLongObject* v, + unsigned char* bytes, size_t n, + int little_endian, int is_signed) +{ + Py_ssize_t i; /* index into v->ob_digit */ + Py_ssize_t ndigits; /* |v->ob_size| */ + twodigits accum; /* sliding register */ + unsigned int accumbits; /* # bits in accum */ + int do_twos_comp; /* store 2's-comp? is_signed and v < 0 */ + digit carry; /* for computing 2's-comp */ + size_t j; /* # bytes filled */ + unsigned char* p; /* pointer to next byte in bytes */ + int pincr; /* direction to move p */ + + assert(v != NULL && PyLong_Check(v)); + + if (Py_SIZE(v) < 0) { + ndigits = -(Py_SIZE(v)); + if (!is_signed) { + PyErr_SetString(PyExc_OverflowError, + "can't convert negative long to unsigned"); + return -1; + } + do_twos_comp = 1; + } + else { + ndigits = Py_SIZE(v); + do_twos_comp = 0; + } + + if (little_endian) { + p = bytes; + pincr = 1; + } + else { + p = bytes + n - 1; + pincr = -1; + } + + /* Copy over all the Python digits. + It's crucial that every Python digit except for the MSD contribute + exactly PyLong_SHIFT bits to the total, so first assert that the long is + normalized. */ + assert(ndigits == 0 || v->ob_digit[ndigits - 1] != 0); + j = 0; + accum = 0; + accumbits = 0; + carry = do_twos_comp ? 1 : 0; + for (i = 0; i < ndigits; ++i) { + digit thisdigit = v->ob_digit[i]; + if (do_twos_comp) { + thisdigit = (thisdigit ^ PyLong_MASK) + carry; + carry = thisdigit >> PyLong_SHIFT; + thisdigit &= PyLong_MASK; + } + /* Because we're going LSB to MSB, thisdigit is more + significant than what's already in accum, so needs to be + prepended to accum. */ + accum |= (twodigits)thisdigit << accumbits; + + /* The most-significant digit may be (probably is) at least + partly empty. */ + if (i == ndigits - 1) { + /* Count # of sign bits -- they needn't be stored, + * although for signed conversion we need later to + * make sure at least one sign bit gets stored. */ + digit s = do_twos_comp ? thisdigit ^ PyLong_MASK : thisdigit; + while (s != 0) { + s >>= 1; + accumbits++; + } + } + else + accumbits += PyLong_SHIFT; + + /* Store as many bytes as possible. */ + while (accumbits >= 8) { + if (j >= n) + goto Overflow; + ++j; + *p = (unsigned char)(accum & 0xff); + p += pincr; + accumbits -= 8; + accum >>= 8; + } + } + + /* Store the straggler (if any). */ + assert(accumbits < 8); + assert(carry == 0); /* else do_twos_comp and *every* digit was 0 */ + if (accumbits > 0) { + if (j >= n) + goto Overflow; + ++j; + if (do_twos_comp) { + /* Fill leading bits of the byte with sign bits + (appropriately pretending that the long had an + infinite supply of sign bits). */ + accum |= (~(twodigits)0) << accumbits; + } + *p = (unsigned char)(accum & 0xff); + p += pincr; + } + else if (j == n && n > 0 && is_signed) { + /* The main loop filled the byte array exactly, so the code + just above didn't get to ensure there's a sign bit, and the + loop below wouldn't add one either. Make sure a sign bit + exists. */ + unsigned char msb = *(p - pincr); + int sign_bit_set = msb >= 0x80; + assert(accumbits == 0); + if (sign_bit_set == do_twos_comp) + return 0; + else + goto Overflow; + } + + /* Fill remaining bytes with copies of the sign bit. */ + { + unsigned char signbyte = do_twos_comp ? 0xffU : 0U; + for ( ; j < n; ++j, p += pincr) + *p = signbyte; + } + + return 0; + + Overflow: + PyErr_SetString(PyExc_OverflowError, "long too big to convert"); + return -1; + +} + +/* Create a new long (or int) object from a C pointer */ + +PyObject * +PyLong_FromVoidPtr(void *p) +{ +#if SIZEOF_VOID_P <= SIZEOF_LONG + if ((long)p < 0) + return PyLong_FromUnsignedLong((unsigned long)p); + return PyInt_FromLong((long)p); +#else + +#ifndef HAVE_LONG_LONG +# error "PyLong_FromVoidPtr: sizeof(void*) > sizeof(long), but no long long" +#endif +#if SIZEOF_LONG_LONG < SIZEOF_VOID_P +# error "PyLong_FromVoidPtr: sizeof(PY_LONG_LONG) < sizeof(void*)" +#endif + /* optimize null pointers */ + if (p == NULL) + return PyInt_FromLong(0); + return PyLong_FromUnsignedLongLong((unsigned PY_LONG_LONG)p); + +#endif /* SIZEOF_VOID_P <= SIZEOF_LONG */ +} + +/* Get a C pointer from a long object (or an int object in some cases) */ + +void * +PyLong_AsVoidPtr(PyObject *vv) +{ + /* This function will allow int or long objects. If vv is neither, + then the PyLong_AsLong*() functions will raise the exception: + PyExc_SystemError, "bad argument to internal function" + */ +#if SIZEOF_VOID_P <= SIZEOF_LONG + long x; + + if (PyInt_Check(vv)) + x = PyInt_AS_LONG(vv); + else if (PyLong_Check(vv) && _PyLong_Sign(vv) < 0) + x = PyLong_AsLong(vv); + else + x = PyLong_AsUnsignedLong(vv); +#else + +#ifndef HAVE_LONG_LONG +# error "PyLong_AsVoidPtr: sizeof(void*) > sizeof(long), but no long long" +#endif +#if SIZEOF_LONG_LONG < SIZEOF_VOID_P +# error "PyLong_AsVoidPtr: sizeof(PY_LONG_LONG) < sizeof(void*)" +#endif + PY_LONG_LONG x; + + if (PyInt_Check(vv)) + x = PyInt_AS_LONG(vv); + else if (PyLong_Check(vv) && _PyLong_Sign(vv) < 0) + x = PyLong_AsLongLong(vv); + else + x = PyLong_AsUnsignedLongLong(vv); + +#endif /* SIZEOF_VOID_P <= SIZEOF_LONG */ + + if (x == -1 && PyErr_Occurred()) + return NULL; + return (void *)x; +} + +#ifdef HAVE_LONG_LONG + +/* Initial PY_LONG_LONG support by Chris Herborth (chrish@qnx.com), later + * rewritten to use the newer PyLong_{As,From}ByteArray API. + */ + +#define IS_LITTLE_ENDIAN (int)*(unsigned char*)&one +#define PY_ABS_LLONG_MIN (0-(unsigned PY_LONG_LONG)PY_LLONG_MIN) + +/* Create a new long int object from a C PY_LONG_LONG int. */ + +PyObject * +PyLong_FromLongLong(PY_LONG_LONG ival) +{ + PyLongObject *v; + unsigned PY_LONG_LONG abs_ival; + unsigned PY_LONG_LONG t; /* unsigned so >> doesn't propagate sign bit */ + int ndigits = 0; + int negative = 0; + + if (ival < 0) { + /* avoid signed overflow on negation; see comments + in PyLong_FromLong above. */ + abs_ival = (unsigned PY_LONG_LONG)(-1-ival) + 1; + negative = 1; + } + else { + abs_ival = (unsigned PY_LONG_LONG)ival; + } + + /* Count the number of Python digits. + We used to pick 5 ("big enough for anything"), but that's a + waste of time and space given that 5*15 = 75 bits are rarely + needed. */ + t = abs_ival; + while (t) { + ++ndigits; + t >>= PyLong_SHIFT; + } + v = _PyLong_New(ndigits); + if (v != NULL) { + digit *p = v->ob_digit; + Py_SIZE(v) = negative ? -ndigits : ndigits; + t = abs_ival; + while (t) { + *p++ = (digit)(t & PyLong_MASK); + t >>= PyLong_SHIFT; + } + } + return (PyObject *)v; +} + +/* Create a new long int object from a C unsigned PY_LONG_LONG int. */ + +PyObject * +PyLong_FromUnsignedLongLong(unsigned PY_LONG_LONG ival) +{ + PyLongObject *v; + unsigned PY_LONG_LONG t; + int ndigits = 0; + + /* Count the number of Python digits. */ + t = (unsigned PY_LONG_LONG)ival; + while (t) { + ++ndigits; + t >>= PyLong_SHIFT; + } + v = _PyLong_New(ndigits); + if (v != NULL) { + digit *p = v->ob_digit; + Py_SIZE(v) = ndigits; + while (ival) { + *p++ = (digit)(ival & PyLong_MASK); + ival >>= PyLong_SHIFT; + } + } + return (PyObject *)v; +} + +/* Create a new long int object from a C Py_ssize_t. */ + +PyObject * +PyLong_FromSsize_t(Py_ssize_t ival) +{ + Py_ssize_t bytes = ival; + int one = 1; + return _PyLong_FromByteArray((unsigned char *)&bytes, + SIZEOF_SIZE_T, IS_LITTLE_ENDIAN, 1); +} + +/* Create a new long int object from a C size_t. */ + +PyObject * +PyLong_FromSize_t(size_t ival) +{ + size_t bytes = ival; + int one = 1; + return _PyLong_FromByteArray((unsigned char *)&bytes, + SIZEOF_SIZE_T, IS_LITTLE_ENDIAN, 0); +} + +/* Get a C PY_LONG_LONG int from a long int object. + Return -1 and set an error if overflow occurs. */ + +PY_LONG_LONG +PyLong_AsLongLong(PyObject *vv) +{ + PY_LONG_LONG bytes; + int one = 1; + int res; + + if (vv == NULL) { + PyErr_BadInternalCall(); + return -1; + } + if (!PyLong_Check(vv)) { + PyNumberMethods *nb; + PyObject *io; + if (PyInt_Check(vv)) + return (PY_LONG_LONG)PyInt_AsLong(vv); + if ((nb = vv->ob_type->tp_as_number) == NULL || + nb->nb_int == NULL) { + PyErr_SetString(PyExc_TypeError, "an integer is required"); + return -1; + } + io = (*nb->nb_int) (vv); + if (io == NULL) + return -1; + if (PyInt_Check(io)) { + bytes = PyInt_AsLong(io); + Py_DECREF(io); + return bytes; + } + if (PyLong_Check(io)) { + bytes = PyLong_AsLongLong(io); + Py_DECREF(io); + return bytes; + } + Py_DECREF(io); + PyErr_SetString(PyExc_TypeError, "integer conversion failed"); + return -1; + } + + res = _PyLong_AsByteArray((PyLongObject *)vv, (unsigned char *)&bytes, + SIZEOF_LONG_LONG, IS_LITTLE_ENDIAN, 1); + + /* Plan 9 can't handle PY_LONG_LONG in ? : expressions */ + if (res < 0) + return (PY_LONG_LONG)-1; + else + return bytes; +} + +/* Get a C unsigned PY_LONG_LONG int from a long int object. + Return -1 and set an error if overflow occurs. */ + +unsigned PY_LONG_LONG +PyLong_AsUnsignedLongLong(PyObject *vv) +{ + unsigned PY_LONG_LONG bytes; + int one = 1; + int res; + + if (vv == NULL || !PyLong_Check(vv)) { + PyErr_BadInternalCall(); + return (unsigned PY_LONG_LONG)-1; + } + + res = _PyLong_AsByteArray((PyLongObject *)vv, (unsigned char *)&bytes, + SIZEOF_LONG_LONG, IS_LITTLE_ENDIAN, 0); + + /* Plan 9 can't handle PY_LONG_LONG in ? : expressions */ + if (res < 0) + return (unsigned PY_LONG_LONG)res; + else + return bytes; +} + +/* Get a C unsigned long int from a long int object, ignoring the high bits. + Returns -1 and sets an error condition if an error occurs. */ + +unsigned PY_LONG_LONG +PyLong_AsUnsignedLongLongMask(PyObject *vv) +{ + register PyLongObject *v; + unsigned PY_LONG_LONG x; + Py_ssize_t i; + int sign; + + if (vv == NULL || !PyLong_Check(vv)) { + PyErr_BadInternalCall(); + return (unsigned long) -1; + } + v = (PyLongObject *)vv; + i = v->ob_size; + sign = 1; + x = 0; + if (i < 0) { + sign = -1; + i = -i; + } + while (--i >= 0) { + x = (x << PyLong_SHIFT) | v->ob_digit[i]; + } + return x * sign; +} + +/* Get a C long long int from a Python long or Python int object. + On overflow, returns -1 and sets *overflow to 1 or -1 depending + on the sign of the result. Otherwise *overflow is 0. + + For other errors (e.g., type error), returns -1 and sets an error + condition. +*/ + +PY_LONG_LONG +PyLong_AsLongLongAndOverflow(PyObject *vv, int *overflow) +{ + /* This version by Tim Peters */ + register PyLongObject *v; + unsigned PY_LONG_LONG x, prev; + PY_LONG_LONG res; + Py_ssize_t i; + int sign; + int do_decref = 0; /* if nb_int was called */ + + *overflow = 0; + if (vv == NULL) { + PyErr_BadInternalCall(); + return -1; + } + + if (PyInt_Check(vv)) + return PyInt_AsLong(vv); + + if (!PyLong_Check(vv)) { + PyNumberMethods *nb; + nb = vv->ob_type->tp_as_number; + if (nb == NULL || nb->nb_int == NULL) { + PyErr_SetString(PyExc_TypeError, + "an integer is required"); + return -1; + } + vv = (*nb->nb_int) (vv); + if (vv == NULL) + return -1; + do_decref = 1; + if(PyInt_Check(vv)) { + res = PyInt_AsLong(vv); + goto exit; + } + if (!PyLong_Check(vv)) { + Py_DECREF(vv); + PyErr_SetString(PyExc_TypeError, + "nb_int should return int object"); + return -1; + } + } + + res = -1; + v = (PyLongObject *)vv; + i = Py_SIZE(v); + + switch (i) { + case -1: + res = -(sdigit)v->ob_digit[0]; + break; + case 0: + res = 0; + break; + case 1: + res = v->ob_digit[0]; + break; + default: + sign = 1; + x = 0; + if (i < 0) { + sign = -1; + i = -(i); + } + while (--i >= 0) { + prev = x; + x = (x << PyLong_SHIFT) + v->ob_digit[i]; + if ((x >> PyLong_SHIFT) != prev) { + *overflow = sign; + goto exit; + } + } + /* Haven't lost any bits, but casting to long requires extra + * care (see comment above). + */ + if (x <= (unsigned PY_LONG_LONG)PY_LLONG_MAX) { + res = (PY_LONG_LONG)x * sign; + } + else if (sign < 0 && x == PY_ABS_LLONG_MIN) { + res = PY_LLONG_MIN; + } + else { + *overflow = sign; + /* res is already set to -1 */ + } + } + exit: + if (do_decref) { + Py_DECREF(vv); + } + return res; +} + +#undef IS_LITTLE_ENDIAN + +#endif /* HAVE_LONG_LONG */ + + +static int +convert_binop(PyObject *v, PyObject *w, PyLongObject **a, PyLongObject **b) { + if (PyLong_Check(v)) { + *a = (PyLongObject *) v; + Py_INCREF(v); + } + else if (PyInt_Check(v)) { + *a = (PyLongObject *) PyLong_FromLong(PyInt_AS_LONG(v)); + } + else { + return 0; + } + if (PyLong_Check(w)) { + *b = (PyLongObject *) w; + Py_INCREF(w); + } + else if (PyInt_Check(w)) { + *b = (PyLongObject *) PyLong_FromLong(PyInt_AS_LONG(w)); + } + else { + Py_DECREF(*a); + return 0; + } + return 1; +} + +#define CONVERT_BINOP(v, w, a, b) \ + do { \ + if (!convert_binop(v, w, a, b)) { \ + Py_INCREF(Py_NotImplemented); \ + return Py_NotImplemented; \ + } \ + } while(0) \ + +/* bits_in_digit(d) returns the unique integer k such that 2**(k-1) <= d < + 2**k if d is nonzero, else 0. */ + +static const unsigned char BitLengthTable[32] = { + 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5 +}; + +static int +bits_in_digit(digit d) +{ + int d_bits = 0; + while (d >= 32) { + d_bits += 6; + d >>= 6; + } + d_bits += (int)BitLengthTable[d]; + return d_bits; +} + +/* x[0:m] and y[0:n] are digit vectors, LSD first, m >= n required. x[0:n] + * is modified in place, by adding y to it. Carries are propagated as far as + * x[m-1], and the remaining carry (0 or 1) is returned. + */ +static digit +v_iadd(digit *x, Py_ssize_t m, digit *y, Py_ssize_t n) +{ + Py_ssize_t i; + digit carry = 0; + + assert(m >= n); + for (i = 0; i < n; ++i) { + carry += x[i] + y[i]; + x[i] = carry & PyLong_MASK; + carry >>= PyLong_SHIFT; + assert((carry & 1) == carry); + } + for (; carry && i < m; ++i) { + carry += x[i]; + x[i] = carry & PyLong_MASK; + carry >>= PyLong_SHIFT; + assert((carry & 1) == carry); + } + return carry; +} + +/* x[0:m] and y[0:n] are digit vectors, LSD first, m >= n required. x[0:n] + * is modified in place, by subtracting y from it. Borrows are propagated as + * far as x[m-1], and the remaining borrow (0 or 1) is returned. + */ +static digit +v_isub(digit *x, Py_ssize_t m, digit *y, Py_ssize_t n) +{ + Py_ssize_t i; + digit borrow = 0; + + assert(m >= n); + for (i = 0; i < n; ++i) { + borrow = x[i] - y[i] - borrow; + x[i] = borrow & PyLong_MASK; + borrow >>= PyLong_SHIFT; + borrow &= 1; /* keep only 1 sign bit */ + } + for (; borrow && i < m; ++i) { + borrow = x[i] - borrow; + x[i] = borrow & PyLong_MASK; + borrow >>= PyLong_SHIFT; + borrow &= 1; + } + return borrow; +} + +/* Shift digit vector a[0:m] d bits left, with 0 <= d < PyLong_SHIFT. Put + * result in z[0:m], and return the d bits shifted out of the top. + */ +static digit +v_lshift(digit *z, digit *a, Py_ssize_t m, int d) +{ + Py_ssize_t i; + digit carry = 0; + + assert(0 <= d && d < PyLong_SHIFT); + for (i=0; i < m; i++) { + twodigits acc = (twodigits)a[i] << d | carry; + z[i] = (digit)acc & PyLong_MASK; + carry = (digit)(acc >> PyLong_SHIFT); + } + return carry; +} + +/* Shift digit vector a[0:m] d bits right, with 0 <= d < PyLong_SHIFT. Put + * result in z[0:m], and return the d bits shifted out of the bottom. + */ +static digit +v_rshift(digit *z, digit *a, Py_ssize_t m, int d) +{ + Py_ssize_t i; + digit carry = 0; + digit mask = ((digit)1 << d) - 1U; + + assert(0 <= d && d < PyLong_SHIFT); + for (i=m; i-- > 0;) { + twodigits acc = (twodigits)carry << PyLong_SHIFT | a[i]; + carry = (digit)acc & mask; + z[i] = (digit)(acc >> d); + } + return carry; +} + +/* Divide long pin, w/ size digits, by non-zero digit n, storing quotient + in pout, and returning the remainder. pin and pout point at the LSD. + It's OK for pin == pout on entry, which saves oodles of mallocs/frees in + _PyLong_Format, but that should be done with great care since longs are + immutable. */ + +static digit +inplace_divrem1(digit *pout, digit *pin, Py_ssize_t size, digit n) +{ + twodigits rem = 0; + + assert(n > 0 && n <= PyLong_MASK); + pin += size; + pout += size; + while (--size >= 0) { + digit hi; + rem = (rem << PyLong_SHIFT) | *--pin; + *--pout = hi = (digit)(rem / n); + rem -= (twodigits)hi * n; + } + return (digit)rem; +} + +/* Divide a long integer by a digit, returning both the quotient + (as function result) and the remainder (through *prem). + The sign of a is ignored; n should not be zero. */ + +static PyLongObject * +divrem1(PyLongObject *a, digit n, digit *prem) +{ + const Py_ssize_t size = ABS(Py_SIZE(a)); + PyLongObject *z; + + assert(n > 0 && n <= PyLong_MASK); + z = _PyLong_New(size); + if (z == NULL) + return NULL; + *prem = inplace_divrem1(z->ob_digit, a->ob_digit, size, n); + return long_normalize(z); +} + +/* Convert a long integer to a base 10 string. Returns a new non-shared + string. (Return value is non-shared so that callers can modify the + returned value if necessary.) */ + +static PyObject * +long_to_decimal_string(PyObject *aa, int addL) +{ + PyLongObject *scratch, *a; + PyObject *str; + Py_ssize_t size, strlen, size_a, i, j; + digit *pout, *pin, rem, tenpow; + char *p; + int negative; + + a = (PyLongObject *)aa; + if (a == NULL || !PyLong_Check(a)) { + PyErr_BadInternalCall(); + return NULL; + } + size_a = ABS(Py_SIZE(a)); + negative = Py_SIZE(a) < 0; + + /* quick and dirty upper bound for the number of digits + required to express a in base _PyLong_DECIMAL_BASE: + + #digits = 1 + floor(log2(a) / log2(_PyLong_DECIMAL_BASE)) + + But log2(a) < size_a * PyLong_SHIFT, and + log2(_PyLong_DECIMAL_BASE) = log2(10) * _PyLong_DECIMAL_SHIFT + > 3 * _PyLong_DECIMAL_SHIFT + */ + if (size_a > PY_SSIZE_T_MAX / PyLong_SHIFT) { + PyErr_SetString(PyExc_OverflowError, + "long is too large to format"); + return NULL; + } + /* the expression size_a * PyLong_SHIFT is now safe from overflow */ + size = 1 + size_a * PyLong_SHIFT / (3 * _PyLong_DECIMAL_SHIFT); + scratch = _PyLong_New(size); + if (scratch == NULL) + return NULL; + + /* convert array of base _PyLong_BASE digits in pin to an array of + base _PyLong_DECIMAL_BASE digits in pout, following Knuth (TAOCP, + Volume 2 (3rd edn), section 4.4, Method 1b). */ + pin = a->ob_digit; + pout = scratch->ob_digit; + size = 0; + for (i = size_a; --i >= 0; ) { + digit hi = pin[i]; + for (j = 0; j < size; j++) { + twodigits z = (twodigits)pout[j] << PyLong_SHIFT | hi; + hi = (digit)(z / _PyLong_DECIMAL_BASE); + pout[j] = (digit)(z - (twodigits)hi * + _PyLong_DECIMAL_BASE); + } + while (hi) { + pout[size++] = hi % _PyLong_DECIMAL_BASE; + hi /= _PyLong_DECIMAL_BASE; + } + /* check for keyboard interrupt */ + SIGCHECK({ + Py_DECREF(scratch); + return NULL; + }); + } + /* pout should have at least one digit, so that the case when a = 0 + works correctly */ + if (size == 0) + pout[size++] = 0; + + /* calculate exact length of output string, and allocate */ + strlen = (addL != 0) + negative + + 1 + (size - 1) * _PyLong_DECIMAL_SHIFT; + tenpow = 10; + rem = pout[size-1]; + while (rem >= tenpow) { + tenpow *= 10; + strlen++; + } + str = PyString_FromStringAndSize(NULL, strlen); + if (str == NULL) { + Py_DECREF(scratch); + return NULL; + } + + /* fill the string right-to-left */ + p = PyString_AS_STRING(str) + strlen; + *p = '\0'; + if (addL) + *--p = 'L'; + /* pout[0] through pout[size-2] contribute exactly + _PyLong_DECIMAL_SHIFT digits each */ + for (i=0; i < size - 1; i++) { + rem = pout[i]; + for (j = 0; j < _PyLong_DECIMAL_SHIFT; j++) { + *--p = '0' + rem % 10; + rem /= 10; + } + } + /* pout[size-1]: always produce at least one decimal digit */ + rem = pout[i]; + do { + *--p = '0' + rem % 10; + rem /= 10; + } while (rem != 0); + + /* and sign */ + if (negative) + *--p = '-'; + + /* check we've counted correctly */ + assert(p == PyString_AS_STRING(str)); + Py_DECREF(scratch); + return (PyObject *)str; +} + +/* Convert the long to a string object with given base, + appending a base prefix of 0[box] if base is 2, 8 or 16. + Add a trailing "L" if addL is non-zero. + If newstyle is zero, then use the pre-2.6 behavior of octal having + a leading "0", instead of the prefix "0o" */ +PyAPI_FUNC(PyObject *) +_PyLong_Format(PyObject *aa, int base, int addL, int newstyle) +{ + register PyLongObject *a = (PyLongObject *)aa; + PyStringObject *str; + Py_ssize_t i, sz; + Py_ssize_t size_a; + char *p; + int bits; + char sign = '\0'; + + if (base == 10) + return long_to_decimal_string((PyObject *)a, addL); + + if (a == NULL || !PyLong_Check(a)) { + PyErr_BadInternalCall(); + return NULL; + } + assert(base >= 2 && base <= 36); + size_a = ABS(Py_SIZE(a)); + + /* Compute a rough upper bound for the length of the string */ + i = base; + bits = 0; + while (i > 1) { + ++bits; + i >>= 1; + } + i = 5 + (addL ? 1 : 0); + /* ensure we don't get signed overflow in sz calculation */ + if (size_a > (PY_SSIZE_T_MAX - i) / PyLong_SHIFT) { + PyErr_SetString(PyExc_OverflowError, + "long is too large to format"); + return NULL; + } + sz = i + 1 + (size_a * PyLong_SHIFT - 1) / bits; + assert(sz >= 0); + str = (PyStringObject *) PyString_FromStringAndSize((char *)0, sz); + if (str == NULL) + return NULL; + p = PyString_AS_STRING(str) + sz; + *p = '\0'; + if (addL) + *--p = 'L'; + if (a->ob_size < 0) + sign = '-'; + + if (a->ob_size == 0) { + *--p = '0'; + } + else if ((base & (base - 1)) == 0) { + /* JRH: special case for power-of-2 bases */ + twodigits accum = 0; + int accumbits = 0; /* # of bits in accum */ + int basebits = 1; /* # of bits in base-1 */ + i = base; + while ((i >>= 1) > 1) + ++basebits; + + for (i = 0; i < size_a; ++i) { + accum |= (twodigits)a->ob_digit[i] << accumbits; + accumbits += PyLong_SHIFT; + assert(accumbits >= basebits); + do { + char cdigit = (char)(accum & (base - 1)); + cdigit += (cdigit < 10) ? '0' : 'a'-10; + assert(p > PyString_AS_STRING(str)); + *--p = cdigit; + accumbits -= basebits; + accum >>= basebits; + } while (i < size_a-1 ? accumbits >= basebits : accum > 0); + } + } + else { + /* Not 0, and base not a power of 2. Divide repeatedly by + base, but for speed use the highest power of base that + fits in a digit. */ + Py_ssize_t size = size_a; + digit *pin = a->ob_digit; + PyLongObject *scratch; + /* powbasw <- largest power of base that fits in a digit. */ + digit powbase = base; /* powbase == base ** power */ + int power = 1; + for (;;) { + twodigits newpow = powbase * (twodigits)base; + if (newpow >> PyLong_SHIFT) + /* doesn't fit in a digit */ + break; + powbase = (digit)newpow; + ++power; + } + + /* Get a scratch area for repeated division. */ + scratch = _PyLong_New(size); + if (scratch == NULL) { + Py_DECREF(str); + return NULL; + } + + /* Repeatedly divide by powbase. */ + do { + int ntostore = power; + digit rem = inplace_divrem1(scratch->ob_digit, + pin, size, powbase); + pin = scratch->ob_digit; /* no need to use a again */ + if (pin[size - 1] == 0) + --size; + SIGCHECK({ + Py_DECREF(scratch); + Py_DECREF(str); + return NULL; + }); + + /* Break rem into digits. */ + assert(ntostore > 0); + do { + digit nextrem = (digit)(rem / base); + char c = (char)(rem - nextrem * base); + assert(p > PyString_AS_STRING(str)); + c += (c < 10) ? '0' : 'a'-10; + *--p = c; + rem = nextrem; + --ntostore; + /* Termination is a bit delicate: must not + store leading zeroes, so must get out if + remaining quotient and rem are both 0. */ + } while (ntostore && (size || rem)); + } while (size != 0); + Py_DECREF(scratch); + } + + if (base == 2) { + *--p = 'b'; + *--p = '0'; + } + else if (base == 8) { + if (newstyle) { + *--p = 'o'; + *--p = '0'; + } + else + if (size_a != 0) + *--p = '0'; + } + else if (base == 16) { + *--p = 'x'; + *--p = '0'; + } + else if (base != 10) { + *--p = '#'; + *--p = '0' + base%10; + if (base > 10) + *--p = '0' + base/10; + } + if (sign) + *--p = sign; + if (p != PyString_AS_STRING(str)) { + char *q = PyString_AS_STRING(str); + assert(p > q); + do { + } while ((*q++ = *p++) != '\0'); + q--; + _PyString_Resize((PyObject **)&str, + (Py_ssize_t) (q - PyString_AS_STRING(str))); + } + return (PyObject *)str; +} + +/* Table of digit values for 8-bit string -> integer conversion. + * '0' maps to 0, ..., '9' maps to 9. + * 'a' and 'A' map to 10, ..., 'z' and 'Z' map to 35. + * All other indices map to 37. + * Note that when converting a base B string, a char c is a legitimate + * base B digit iff _PyLong_DigitValue[Py_CHARMASK(c)] < B. + */ +int _PyLong_DigitValue[256] = { + 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, + 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, + 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 37, 37, 37, 37, 37, 37, + 37, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, + 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 37, 37, 37, 37, 37, + 37, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, + 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 37, 37, 37, 37, 37, + 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, + 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, + 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, + 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, + 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, + 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, + 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, + 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, +}; + +/* *str points to the first digit in a string of base `base` digits. base + * is a power of 2 (2, 4, 8, 16, or 32). *str is set to point to the first + * non-digit (which may be *str!). A normalized long is returned. + * The point to this routine is that it takes time linear in the number of + * string characters. + */ +static PyLongObject * +long_from_binary_base(char **str, int base) +{ + char *p = *str; + char *start = p; + int bits_per_char; + Py_ssize_t n; + PyLongObject *z; + twodigits accum; + int bits_in_accum; + digit *pdigit; + + assert(base >= 2 && base <= 32 && (base & (base - 1)) == 0); + n = base; + for (bits_per_char = -1; n; ++bits_per_char) + n >>= 1; + /* n <- total # of bits needed, while setting p to end-of-string */ + while (_PyLong_DigitValue[Py_CHARMASK(*p)] < base) + ++p; + *str = p; + /* n <- # of Python digits needed, = ceiling(n/PyLong_SHIFT). */ + n = (p - start) * bits_per_char + PyLong_SHIFT - 1; + if (n / bits_per_char < p - start) { + PyErr_SetString(PyExc_ValueError, + "long string too large to convert"); + return NULL; + } + n = n / PyLong_SHIFT; + z = _PyLong_New(n); + if (z == NULL) + return NULL; + /* Read string from right, and fill in long from left; i.e., + * from least to most significant in both. + */ + accum = 0; + bits_in_accum = 0; + pdigit = z->ob_digit; + while (--p >= start) { + int k = _PyLong_DigitValue[Py_CHARMASK(*p)]; + assert(k >= 0 && k < base); + accum |= (twodigits)k << bits_in_accum; + bits_in_accum += bits_per_char; + if (bits_in_accum >= PyLong_SHIFT) { + *pdigit++ = (digit)(accum & PyLong_MASK); + assert(pdigit - z->ob_digit <= n); + accum >>= PyLong_SHIFT; + bits_in_accum -= PyLong_SHIFT; + assert(bits_in_accum < PyLong_SHIFT); + } + } + if (bits_in_accum) { + assert(bits_in_accum <= PyLong_SHIFT); + *pdigit++ = (digit)accum; + assert(pdigit - z->ob_digit <= n); + } + while (pdigit - z->ob_digit < n) + *pdigit++ = 0; + return long_normalize(z); +} + +PyObject * +PyLong_FromString(char *str, char **pend, int base) +{ + int sign = 1; + char *start, *orig_str = str; + PyLongObject *z; + PyObject *strobj, *strrepr; + Py_ssize_t slen; + + if ((base != 0 && base < 2) || base > 36) { + PyErr_SetString(PyExc_ValueError, + "long() arg 2 must be >= 2 and <= 36"); + return NULL; + } + while (*str != '\0' && isspace(Py_CHARMASK(*str))) + str++; + if (*str == '+') + ++str; + else if (*str == '-') { + ++str; + sign = -1; + } + while (*str != '\0' && isspace(Py_CHARMASK(*str))) + str++; + if (base == 0) { + /* No base given. Deduce the base from the contents + of the string */ + if (str[0] != '0') + base = 10; + else if (str[1] == 'x' || str[1] == 'X') + base = 16; + else if (str[1] == 'o' || str[1] == 'O') + base = 8; + else if (str[1] == 'b' || str[1] == 'B') + base = 2; + else + /* "old" (C-style) octal literal, still valid in + 2.x, although illegal in 3.x */ + base = 8; + } + /* Whether or not we were deducing the base, skip leading chars + as needed */ + if (str[0] == '0' && + ((base == 16 && (str[1] == 'x' || str[1] == 'X')) || + (base == 8 && (str[1] == 'o' || str[1] == 'O')) || + (base == 2 && (str[1] == 'b' || str[1] == 'B')))) + str += 2; + + start = str; + if ((base & (base - 1)) == 0) + z = long_from_binary_base(&str, base); + else { +/*** +Binary bases can be converted in time linear in the number of digits, because +Python's representation base is binary. Other bases (including decimal!) use +the simple quadratic-time algorithm below, complicated by some speed tricks. + +First some math: the largest integer that can be expressed in N base-B digits +is B**N-1. Consequently, if we have an N-digit input in base B, the worst- +case number of Python digits needed to hold it is the smallest integer n s.t. + + PyLong_BASE**n-1 >= B**N-1 [or, adding 1 to both sides] + PyLong_BASE**n >= B**N [taking logs to base PyLong_BASE] + n >= log(B**N)/log(PyLong_BASE) = N * log(B)/log(PyLong_BASE) + +The static array log_base_PyLong_BASE[base] == log(base)/log(PyLong_BASE) so +we can compute this quickly. A Python long with that much space is reserved +near the start, and the result is computed into it. + +The input string is actually treated as being in base base**i (i.e., i digits +are processed at a time), where two more static arrays hold: + + convwidth_base[base] = the largest integer i such that + base**i <= PyLong_BASE + convmultmax_base[base] = base ** convwidth_base[base] + +The first of these is the largest i such that i consecutive input digits +must fit in a single Python digit. The second is effectively the input +base we're really using. + +Viewing the input as a sequence of digits in base +convmultmax_base[base], the result is "simply" + + (((c0*B + c1)*B + c2)*B + c3)*B + ... ))) + c_n-1 + +where B = convmultmax_base[base]. + +Error analysis: as above, the number of Python digits `n` needed is worst- +case + + n >= N * log(B)/log(PyLong_BASE) + +where `N` is the number of input digits in base `B`. This is computed via + + size_z = (Py_ssize_t)((scan - str) * log_base_PyLong_BASE[base]) + 1; + +below. Two numeric concerns are how much space this can waste, and whether +the computed result can be too small. To be concrete, assume PyLong_BASE = +2**15, which is the default (and it's unlikely anyone changes that). + +Waste isn't a problem: provided the first input digit isn't 0, the difference +between the worst-case input with N digits and the smallest input with N +digits is about a factor of B, but B is small compared to PyLong_BASE so at +most one allocated Python digit can remain unused on that count. If +N*log(B)/log(PyLong_BASE) is mathematically an exact integer, then truncating +that and adding 1 returns a result 1 larger than necessary. However, that +can't happen: whenever B is a power of 2, long_from_binary_base() is called +instead, and it's impossible for B**i to be an integer power of 2**15 when B +is not a power of 2 (i.e., it's impossible for N*log(B)/log(PyLong_BASE) to be +an exact integer when B is not a power of 2, since B**i has a prime factor +other than 2 in that case, but (2**15)**j's only prime factor is 2). + +The computed result can be too small if the true value of +N*log(B)/log(PyLong_BASE) is a little bit larger than an exact integer, but +due to roundoff errors (in computing log(B), log(PyLong_BASE), their quotient, +and/or multiplying that by N) yields a numeric result a little less than that +integer. Unfortunately, "how close can a transcendental function get to an +integer over some range?" questions are generally theoretically intractable. +Computer analysis via continued fractions is practical: expand +log(B)/log(PyLong_BASE) via continued fractions, giving a sequence i/j of "the +best" rational approximations. Then j*log(B)/log(PyLong_BASE) is +approximately equal to (the integer) i. This shows that we can get very close +to being in trouble, but very rarely. For example, 76573 is a denominator in +one of the continued-fraction approximations to log(10)/log(2**15), and +indeed: + + >>> log(10)/log(2**15)*76573 + 16958.000000654003 + +is very close to an integer. If we were working with IEEE single-precision, +rounding errors could kill us. Finding worst cases in IEEE double-precision +requires better-than-double-precision log() functions, and Tim didn't bother. +Instead the code checks to see whether the allocated space is enough as each +new Python digit is added, and copies the whole thing to a larger long if not. +This should happen extremely rarely, and in fact I don't have a test case +that triggers it(!). Instead the code was tested by artificially allocating +just 1 digit at the start, so that the copying code was exercised for every +digit beyond the first. +***/ + register twodigits c; /* current input character */ + Py_ssize_t size_z; + int i; + int convwidth; + twodigits convmultmax, convmult; + digit *pz, *pzstop; + char* scan; + + static double log_base_PyLong_BASE[37] = {0.0e0,}; + static int convwidth_base[37] = {0,}; + static twodigits convmultmax_base[37] = {0,}; + + if (log_base_PyLong_BASE[base] == 0.0) { + twodigits convmax = base; + int i = 1; + + log_base_PyLong_BASE[base] = (log((double)base) / + log((double)PyLong_BASE)); + for (;;) { + twodigits next = convmax * base; + if (next > PyLong_BASE) + break; + convmax = next; + ++i; + } + convmultmax_base[base] = convmax; + assert(i > 0); + convwidth_base[base] = i; + } + + /* Find length of the string of numeric characters. */ + scan = str; + while (_PyLong_DigitValue[Py_CHARMASK(*scan)] < base) + ++scan; + + /* Create a long object that can contain the largest possible + * integer with this base and length. Note that there's no + * need to initialize z->ob_digit -- no slot is read up before + * being stored into. + */ + size_z = (Py_ssize_t)((scan - str) * log_base_PyLong_BASE[base]) + 1; + /* Uncomment next line to test exceedingly rare copy code */ + /* size_z = 1; */ + assert(size_z > 0); + z = _PyLong_New(size_z); + if (z == NULL) + return NULL; + Py_SIZE(z) = 0; + + /* `convwidth` consecutive input digits are treated as a single + * digit in base `convmultmax`. + */ + convwidth = convwidth_base[base]; + convmultmax = convmultmax_base[base]; + + /* Work ;-) */ + while (str < scan) { + /* grab up to convwidth digits from the input string */ + c = (digit)_PyLong_DigitValue[Py_CHARMASK(*str++)]; + for (i = 1; i < convwidth && str != scan; ++i, ++str) { + c = (twodigits)(c * base + + _PyLong_DigitValue[Py_CHARMASK(*str)]); + assert(c < PyLong_BASE); + } + + convmult = convmultmax; + /* Calculate the shift only if we couldn't get + * convwidth digits. + */ + if (i != convwidth) { + convmult = base; + for ( ; i > 1; --i) + convmult *= base; + } + + /* Multiply z by convmult, and add c. */ + pz = z->ob_digit; + pzstop = pz + Py_SIZE(z); + for (; pz < pzstop; ++pz) { + c += (twodigits)*pz * convmult; + *pz = (digit)(c & PyLong_MASK); + c >>= PyLong_SHIFT; + } + /* carry off the current end? */ + if (c) { + assert(c < PyLong_BASE); + if (Py_SIZE(z) < size_z) { + *pz = (digit)c; + ++Py_SIZE(z); + } + else { + PyLongObject *tmp; + /* Extremely rare. Get more space. */ + assert(Py_SIZE(z) == size_z); + tmp = _PyLong_New(size_z + 1); + if (tmp == NULL) { + Py_DECREF(z); + return NULL; + } + memcpy(tmp->ob_digit, + z->ob_digit, + sizeof(digit) * size_z); + Py_DECREF(z); + z = tmp; + z->ob_digit[size_z] = (digit)c; + ++size_z; + } + } + } + } + if (z == NULL) + return NULL; + if (str == start) + goto onError; + if (sign < 0) + Py_SIZE(z) = -(Py_SIZE(z)); + if (*str == 'L' || *str == 'l') + str++; + while (*str && isspace(Py_CHARMASK(*str))) + str++; + if (*str != '\0') + goto onError; + if (pend) + *pend = str; + return (PyObject *) z; + + onError: + Py_XDECREF(z); + slen = strlen(orig_str) < 200 ? strlen(orig_str) : 200; + strobj = PyString_FromStringAndSize(orig_str, slen); + if (strobj == NULL) + return NULL; + strrepr = PyObject_Repr(strobj); + Py_DECREF(strobj); + if (strrepr == NULL) + return NULL; + PyErr_Format(PyExc_ValueError, + "invalid literal for long() with base %d: %s", + base, PyString_AS_STRING(strrepr)); + Py_DECREF(strrepr); + return NULL; +} + +#ifdef Py_USING_UNICODE +PyObject * +PyLong_FromUnicode(Py_UNICODE *u, Py_ssize_t length, int base) +{ + PyObject *result; + char *buffer = (char *)PyMem_MALLOC(length+1); + + if (buffer == NULL) + return NULL; + + if (PyUnicode_EncodeDecimal(u, length, buffer, NULL)) { + PyMem_FREE(buffer); + return NULL; + } + result = PyLong_FromString(buffer, NULL, base); + PyMem_FREE(buffer); + return result; +} +#endif + +/* forward */ +static PyLongObject *x_divrem + (PyLongObject *, PyLongObject *, PyLongObject **); +static PyObject *long_long(PyObject *v); + +/* Long division with remainder, top-level routine */ + +static int +long_divrem(PyLongObject *a, PyLongObject *b, + PyLongObject **pdiv, PyLongObject **prem) +{ + Py_ssize_t size_a = ABS(Py_SIZE(a)), size_b = ABS(Py_SIZE(b)); + PyLongObject *z; + + if (size_b == 0) { + PyErr_SetString(PyExc_ZeroDivisionError, + "long division or modulo by zero"); + return -1; + } + if (size_a < size_b || + (size_a == size_b && + a->ob_digit[size_a-1] < b->ob_digit[size_b-1])) { + /* |a| < |b|. */ + *pdiv = _PyLong_New(0); + if (*pdiv == NULL) + return -1; + Py_INCREF(a); + *prem = (PyLongObject *) a; + return 0; + } + if (size_b == 1) { + digit rem = 0; + z = divrem1(a, b->ob_digit[0], &rem); + if (z == NULL) + return -1; + *prem = (PyLongObject *) PyLong_FromLong((long)rem); + if (*prem == NULL) { + Py_DECREF(z); + return -1; + } + } + else { + z = x_divrem(a, b, prem); + if (z == NULL) + return -1; + } + /* Set the signs. + The quotient z has the sign of a*b; + the remainder r has the sign of a, + so a = b*z + r. */ + if ((a->ob_size < 0) != (b->ob_size < 0)) + z->ob_size = -(z->ob_size); + if (a->ob_size < 0 && (*prem)->ob_size != 0) + (*prem)->ob_size = -((*prem)->ob_size); + *pdiv = z; + return 0; +} + +/* Unsigned long division with remainder -- the algorithm. The arguments v1 + and w1 should satisfy 2 <= ABS(Py_SIZE(w1)) <= ABS(Py_SIZE(v1)). */ + +static PyLongObject * +x_divrem(PyLongObject *v1, PyLongObject *w1, PyLongObject **prem) +{ + PyLongObject *v, *w, *a; + Py_ssize_t i, k, size_v, size_w; + int d; + digit wm1, wm2, carry, q, r, vtop, *v0, *vk, *w0, *ak; + twodigits vv; + sdigit zhi; + stwodigits z; + + /* We follow Knuth [The Art of Computer Programming, Vol. 2 (3rd + edn.), section 4.3.1, Algorithm D], except that we don't explicitly + handle the special case when the initial estimate q for a quotient + digit is >= PyLong_BASE: the max value for q is PyLong_BASE+1, and + that won't overflow a digit. */ + + /* allocate space; w will also be used to hold the final remainder */ + size_v = ABS(Py_SIZE(v1)); + size_w = ABS(Py_SIZE(w1)); + assert(size_v >= size_w && size_w >= 2); /* Assert checks by div() */ + v = _PyLong_New(size_v+1); + if (v == NULL) { + *prem = NULL; + return NULL; + } + w = _PyLong_New(size_w); + if (w == NULL) { + Py_DECREF(v); + *prem = NULL; + return NULL; + } + + /* normalize: shift w1 left so that its top digit is >= PyLong_BASE/2. + shift v1 left by the same amount. Results go into w and v. */ + d = PyLong_SHIFT - bits_in_digit(w1->ob_digit[size_w-1]); + carry = v_lshift(w->ob_digit, w1->ob_digit, size_w, d); + assert(carry == 0); + carry = v_lshift(v->ob_digit, v1->ob_digit, size_v, d); + if (carry != 0 || v->ob_digit[size_v-1] >= w->ob_digit[size_w-1]) { + v->ob_digit[size_v] = carry; + size_v++; + } + + /* Now v->ob_digit[size_v-1] < w->ob_digit[size_w-1], so quotient has + at most (and usually exactly) k = size_v - size_w digits. */ + k = size_v - size_w; + assert(k >= 0); + a = _PyLong_New(k); + if (a == NULL) { + Py_DECREF(w); + Py_DECREF(v); + *prem = NULL; + return NULL; + } + v0 = v->ob_digit; + w0 = w->ob_digit; + wm1 = w0[size_w-1]; + wm2 = w0[size_w-2]; + for (vk = v0+k, ak = a->ob_digit + k; vk-- > v0;) { + /* inner loop: divide vk[0:size_w+1] by w0[0:size_w], giving + single-digit quotient q, remainder in vk[0:size_w]. */ + + SIGCHECK({ + Py_DECREF(a); + Py_DECREF(w); + Py_DECREF(v); + *prem = NULL; + return NULL; + }); + + /* estimate quotient digit q; may overestimate by 1 (rare) */ + vtop = vk[size_w]; + assert(vtop <= wm1); + vv = ((twodigits)vtop << PyLong_SHIFT) | vk[size_w-1]; + q = (digit)(vv / wm1); + r = (digit)(vv - (twodigits)wm1 * q); /* r = vv % wm1 */ + while ((twodigits)wm2 * q > (((twodigits)r << PyLong_SHIFT) + | vk[size_w-2])) { + --q; + r += wm1; + if (r >= PyLong_BASE) + break; + } + assert(q <= PyLong_BASE); + + /* subtract q*w0[0:size_w] from vk[0:size_w+1] */ + zhi = 0; + for (i = 0; i < size_w; ++i) { + /* invariants: -PyLong_BASE <= -q <= zhi <= 0; + -PyLong_BASE * q <= z < PyLong_BASE */ + z = (sdigit)vk[i] + zhi - + (stwodigits)q * (stwodigits)w0[i]; + vk[i] = (digit)z & PyLong_MASK; + zhi = (sdigit)Py_ARITHMETIC_RIGHT_SHIFT(stwodigits, + z, PyLong_SHIFT); + } + + /* add w back if q was too large (this branch taken rarely) */ + assert((sdigit)vtop + zhi == -1 || (sdigit)vtop + zhi == 0); + if ((sdigit)vtop + zhi < 0) { + carry = 0; + for (i = 0; i < size_w; ++i) { + carry += vk[i] + w0[i]; + vk[i] = carry & PyLong_MASK; + carry >>= PyLong_SHIFT; + } + --q; + } + + /* store quotient digit */ + assert(q < PyLong_BASE); + *--ak = q; + } + + /* unshift remainder; we reuse w to store the result */ + carry = v_rshift(w0, v0, size_w, d); + assert(carry==0); + Py_DECREF(v); + + *prem = long_normalize(w); + return long_normalize(a); +} + +/* For a nonzero PyLong a, express a in the form x * 2**e, with 0.5 <= + abs(x) < 1.0 and e >= 0; return x and put e in *e. Here x is + rounded to DBL_MANT_DIG significant bits using round-half-to-even. + If a == 0, return 0.0 and set *e = 0. If the resulting exponent + e is larger than PY_SSIZE_T_MAX, raise OverflowError and return + -1.0. */ + +/* attempt to define 2.0**DBL_MANT_DIG as a compile-time constant */ +#if DBL_MANT_DIG == 53 +#define EXP2_DBL_MANT_DIG 9007199254740992.0 +#else +#define EXP2_DBL_MANT_DIG (ldexp(1.0, DBL_MANT_DIG)) +#endif + +double +_PyLong_Frexp(PyLongObject *a, Py_ssize_t *e) +{ + Py_ssize_t a_size, a_bits, shift_digits, shift_bits, x_size; + /* See below for why x_digits is always large enough. */ + digit rem, x_digits[2 + (DBL_MANT_DIG + 1) / PyLong_SHIFT]; + double dx; + /* Correction term for round-half-to-even rounding. For a digit x, + "x + half_even_correction[x & 7]" gives x rounded to the nearest + multiple of 4, rounding ties to a multiple of 8. */ + static const int half_even_correction[8] = {0, -1, -2, 1, 0, -1, 2, 1}; + + a_size = ABS(Py_SIZE(a)); + if (a_size == 0) { + /* Special case for 0: significand 0.0, exponent 0. */ + *e = 0; + return 0.0; + } + a_bits = bits_in_digit(a->ob_digit[a_size-1]); + /* The following is an overflow-free version of the check + "if ((a_size - 1) * PyLong_SHIFT + a_bits > PY_SSIZE_T_MAX) ..." */ + if (a_size >= (PY_SSIZE_T_MAX - 1) / PyLong_SHIFT + 1 && + (a_size > (PY_SSIZE_T_MAX - 1) / PyLong_SHIFT + 1 || + a_bits > (PY_SSIZE_T_MAX - 1) % PyLong_SHIFT + 1)) + goto overflow; + a_bits = (a_size - 1) * PyLong_SHIFT + a_bits; + + /* Shift the first DBL_MANT_DIG + 2 bits of a into x_digits[0:x_size] + (shifting left if a_bits <= DBL_MANT_DIG + 2). + + Number of digits needed for result: write // for floor division. + Then if shifting left, we end up using + + 1 + a_size + (DBL_MANT_DIG + 2 - a_bits) // PyLong_SHIFT + + digits. If shifting right, we use + + a_size - (a_bits - DBL_MANT_DIG - 2) // PyLong_SHIFT + + digits. Using a_size = 1 + (a_bits - 1) // PyLong_SHIFT along with + the inequalities + + m // PyLong_SHIFT + n // PyLong_SHIFT <= (m + n) // PyLong_SHIFT + m // PyLong_SHIFT - n // PyLong_SHIFT <= + 1 + (m - n - 1) // PyLong_SHIFT, + + valid for any integers m and n, we find that x_size satisfies + + x_size <= 2 + (DBL_MANT_DIG + 1) // PyLong_SHIFT + + in both cases. + */ + if (a_bits <= DBL_MANT_DIG + 2) { + shift_digits = (DBL_MANT_DIG + 2 - a_bits) / PyLong_SHIFT; + shift_bits = (DBL_MANT_DIG + 2 - a_bits) % PyLong_SHIFT; + x_size = 0; + while (x_size < shift_digits) + x_digits[x_size++] = 0; + rem = v_lshift(x_digits + x_size, a->ob_digit, a_size, + (int)shift_bits); + x_size += a_size; + x_digits[x_size++] = rem; + } + else { + shift_digits = (a_bits - DBL_MANT_DIG - 2) / PyLong_SHIFT; + shift_bits = (a_bits - DBL_MANT_DIG - 2) % PyLong_SHIFT; + rem = v_rshift(x_digits, a->ob_digit + shift_digits, + a_size - shift_digits, (int)shift_bits); + x_size = a_size - shift_digits; + /* For correct rounding below, we need the least significant + bit of x to be 'sticky' for this shift: if any of the bits + shifted out was nonzero, we set the least significant bit + of x. */ + if (rem) + x_digits[0] |= 1; + else + while (shift_digits > 0) + if (a->ob_digit[--shift_digits]) { + x_digits[0] |= 1; + break; + } + } + assert(1 <= x_size && + x_size <= (Py_ssize_t)(sizeof(x_digits)/sizeof(digit))); + + /* Round, and convert to double. */ + x_digits[0] += half_even_correction[x_digits[0] & 7]; + dx = x_digits[--x_size]; + while (x_size > 0) + dx = dx * PyLong_BASE + x_digits[--x_size]; + + /* Rescale; make correction if result is 1.0. */ + dx /= 4.0 * EXP2_DBL_MANT_DIG; + if (dx == 1.0) { + if (a_bits == PY_SSIZE_T_MAX) + goto overflow; + dx = 0.5; + a_bits += 1; + } + + *e = a_bits; + return Py_SIZE(a) < 0 ? -dx : dx; + + overflow: + /* exponent > PY_SSIZE_T_MAX */ + PyErr_SetString(PyExc_OverflowError, + "huge integer: number of bits overflows a Py_ssize_t"); + *e = 0; + return -1.0; +} + +/* Get a C double from a long int object. Rounds to the nearest double, + using the round-half-to-even rule in the case of a tie. */ + +double +PyLong_AsDouble(PyObject *v) +{ + Py_ssize_t exponent; + double x; + + if (v == NULL || !PyLong_Check(v)) { + PyErr_BadInternalCall(); + return -1.0; + } + x = _PyLong_Frexp((PyLongObject *)v, &exponent); + if ((x == -1.0 && PyErr_Occurred()) || exponent > DBL_MAX_EXP) { + PyErr_SetString(PyExc_OverflowError, + "long int too large to convert to float"); + return -1.0; + } + return ldexp(x, (int)exponent); +} + +/* Methods */ + +static void +long_dealloc(PyObject *v) +{ + Py_TYPE(v)->tp_free(v); +} + +static PyObject * +long_repr(PyObject *v) +{ + return _PyLong_Format(v, 10, 1, 0); +} + +static PyObject * +long_str(PyObject *v) +{ + return _PyLong_Format(v, 10, 0, 0); +} + +static int +long_compare(PyLongObject *a, PyLongObject *b) +{ + Py_ssize_t sign; + + if (Py_SIZE(a) != Py_SIZE(b)) { + sign = Py_SIZE(a) - Py_SIZE(b); + } + else { + Py_ssize_t i = ABS(Py_SIZE(a)); + while (--i >= 0 && a->ob_digit[i] == b->ob_digit[i]) + ; + if (i < 0) + sign = 0; + else { + sign = (sdigit)a->ob_digit[i] - (sdigit)b->ob_digit[i]; + if (Py_SIZE(a) < 0) + sign = -sign; + } + } + return sign < 0 ? -1 : sign > 0 ? 1 : 0; +} + +static long +long_hash(PyLongObject *v) +{ + unsigned long x; + Py_ssize_t i; + int sign; + + /* This is designed so that Python ints and longs with the + same value hash to the same value, otherwise comparisons + of mapping keys will turn out weird */ + i = v->ob_size; + sign = 1; + x = 0; + if (i < 0) { + sign = -1; + i = -(i); + } + /* The following loop produces a C unsigned long x such that x is + congruent to the absolute value of v modulo ULONG_MAX. The + resulting x is nonzero if and only if v is. */ + while (--i >= 0) { + /* Force a native long #-bits (32 or 64) circular shift */ + x = (x >> (8*SIZEOF_LONG-PyLong_SHIFT)) | (x << PyLong_SHIFT); + x += v->ob_digit[i]; + /* If the addition above overflowed we compensate by + incrementing. This preserves the value modulo + ULONG_MAX. */ + if (x < v->ob_digit[i]) + x++; + } + x = x * sign; + if (x == (unsigned long)-1) + x = (unsigned long)-2; + return (long)x; +} + + +/* Add the absolute values of two long integers. */ + +static PyLongObject * +x_add(PyLongObject *a, PyLongObject *b) +{ + Py_ssize_t size_a = ABS(Py_SIZE(a)), size_b = ABS(Py_SIZE(b)); + PyLongObject *z; + Py_ssize_t i; + digit carry = 0; + + /* Ensure a is the larger of the two: */ + if (size_a < size_b) { + { PyLongObject *temp = a; a = b; b = temp; } + { Py_ssize_t size_temp = size_a; + size_a = size_b; + size_b = size_temp; } + } + z = _PyLong_New(size_a+1); + if (z == NULL) + return NULL; + for (i = 0; i < size_b; ++i) { + carry += a->ob_digit[i] + b->ob_digit[i]; + z->ob_digit[i] = carry & PyLong_MASK; + carry >>= PyLong_SHIFT; + } + for (; i < size_a; ++i) { + carry += a->ob_digit[i]; + z->ob_digit[i] = carry & PyLong_MASK; + carry >>= PyLong_SHIFT; + } + z->ob_digit[i] = carry; + return long_normalize(z); +} + +/* Subtract the absolute values of two integers. */ + +static PyLongObject * +x_sub(PyLongObject *a, PyLongObject *b) +{ + Py_ssize_t size_a = ABS(Py_SIZE(a)), size_b = ABS(Py_SIZE(b)); + PyLongObject *z; + Py_ssize_t i; + int sign = 1; + digit borrow = 0; + + /* Ensure a is the larger of the two: */ + if (size_a < size_b) { + sign = -1; + { PyLongObject *temp = a; a = b; b = temp; } + { Py_ssize_t size_temp = size_a; + size_a = size_b; + size_b = size_temp; } + } + else if (size_a == size_b) { + /* Find highest digit where a and b differ: */ + i = size_a; + while (--i >= 0 && a->ob_digit[i] == b->ob_digit[i]) + ; + if (i < 0) + return _PyLong_New(0); + if (a->ob_digit[i] < b->ob_digit[i]) { + sign = -1; + { PyLongObject *temp = a; a = b; b = temp; } + } + size_a = size_b = i+1; + } + z = _PyLong_New(size_a); + if (z == NULL) + return NULL; + for (i = 0; i < size_b; ++i) { + /* The following assumes unsigned arithmetic + works module 2**N for some N>PyLong_SHIFT. */ + borrow = a->ob_digit[i] - b->ob_digit[i] - borrow; + z->ob_digit[i] = borrow & PyLong_MASK; + borrow >>= PyLong_SHIFT; + borrow &= 1; /* Keep only one sign bit */ + } + for (; i < size_a; ++i) { + borrow = a->ob_digit[i] - borrow; + z->ob_digit[i] = borrow & PyLong_MASK; + borrow >>= PyLong_SHIFT; + borrow &= 1; /* Keep only one sign bit */ + } + assert(borrow == 0); + if (sign < 0) + z->ob_size = -(z->ob_size); + return long_normalize(z); +} + +static PyObject * +long_add(PyLongObject *v, PyLongObject *w) +{ + PyLongObject *a, *b, *z; + + CONVERT_BINOP((PyObject *)v, (PyObject *)w, &a, &b); + + if (a->ob_size < 0) { + if (b->ob_size < 0) { + z = x_add(a, b); + if (z != NULL && z->ob_size != 0) + z->ob_size = -(z->ob_size); + } + else + z = x_sub(b, a); + } + else { + if (b->ob_size < 0) + z = x_sub(a, b); + else + z = x_add(a, b); + } + Py_DECREF(a); + Py_DECREF(b); + return (PyObject *)z; +} + +static PyObject * +long_sub(PyLongObject *v, PyLongObject *w) +{ + PyLongObject *a, *b, *z; + + CONVERT_BINOP((PyObject *)v, (PyObject *)w, &a, &b); + + if (a->ob_size < 0) { + if (b->ob_size < 0) + z = x_sub(a, b); + else + z = x_add(a, b); + if (z != NULL && z->ob_size != 0) + z->ob_size = -(z->ob_size); + } + else { + if (b->ob_size < 0) + z = x_add(a, b); + else + z = x_sub(a, b); + } + Py_DECREF(a); + Py_DECREF(b); + return (PyObject *)z; +} + +/* Grade school multiplication, ignoring the signs. + * Returns the absolute value of the product, or NULL if error. + */ +static PyLongObject * +x_mul(PyLongObject *a, PyLongObject *b) +{ + PyLongObject *z; + Py_ssize_t size_a = ABS(Py_SIZE(a)); + Py_ssize_t size_b = ABS(Py_SIZE(b)); + Py_ssize_t i; + + z = _PyLong_New(size_a + size_b); + if (z == NULL) + return NULL; + + memset(z->ob_digit, 0, Py_SIZE(z) * sizeof(digit)); + if (a == b) { + /* Efficient squaring per HAC, Algorithm 14.16: + * http://www.cacr.math.uwaterloo.ca/hac/about/chap14.pdf + * Gives slightly less than a 2x speedup when a == b, + * via exploiting that each entry in the multiplication + * pyramid appears twice (except for the size_a squares). + */ + for (i = 0; i < size_a; ++i) { + twodigits carry; + twodigits f = a->ob_digit[i]; + digit *pz = z->ob_digit + (i << 1); + digit *pa = a->ob_digit + i + 1; + digit *paend = a->ob_digit + size_a; + + SIGCHECK({ + Py_DECREF(z); + return NULL; + }); + + carry = *pz + f * f; + *pz++ = (digit)(carry & PyLong_MASK); + carry >>= PyLong_SHIFT; + assert(carry <= PyLong_MASK); + + /* Now f is added in twice in each column of the + * pyramid it appears. Same as adding f<<1 once. + */ + f <<= 1; + while (pa < paend) { + carry += *pz + *pa++ * f; + *pz++ = (digit)(carry & PyLong_MASK); + carry >>= PyLong_SHIFT; + assert(carry <= (PyLong_MASK << 1)); + } + if (carry) { + carry += *pz; + *pz++ = (digit)(carry & PyLong_MASK); + carry >>= PyLong_SHIFT; + } + if (carry) + *pz += (digit)(carry & PyLong_MASK); + assert((carry >> PyLong_SHIFT) == 0); + } + } + else { /* a is not the same as b -- gradeschool long mult */ + for (i = 0; i < size_a; ++i) { + twodigits carry = 0; + twodigits f = a->ob_digit[i]; + digit *pz = z->ob_digit + i; + digit *pb = b->ob_digit; + digit *pbend = b->ob_digit + size_b; + + SIGCHECK({ + Py_DECREF(z); + return NULL; + }); + + while (pb < pbend) { + carry += *pz + *pb++ * f; + *pz++ = (digit)(carry & PyLong_MASK); + carry >>= PyLong_SHIFT; + assert(carry <= PyLong_MASK); + } + if (carry) + *pz += (digit)(carry & PyLong_MASK); + assert((carry >> PyLong_SHIFT) == 0); + } + } + return long_normalize(z); +} + +/* A helper for Karatsuba multiplication (k_mul). + Takes a long "n" and an integer "size" representing the place to + split, and sets low and high such that abs(n) == (high << size) + low, + viewing the shift as being by digits. The sign bit is ignored, and + the return values are >= 0. + Returns 0 on success, -1 on failure. +*/ +static int +kmul_split(PyLongObject *n, + Py_ssize_t size, + PyLongObject **high, + PyLongObject **low) +{ + PyLongObject *hi, *lo; + Py_ssize_t size_lo, size_hi; + const Py_ssize_t size_n = ABS(Py_SIZE(n)); + + size_lo = MIN(size_n, size); + size_hi = size_n - size_lo; + + if ((hi = _PyLong_New(size_hi)) == NULL) + return -1; + if ((lo = _PyLong_New(size_lo)) == NULL) { + Py_DECREF(hi); + return -1; + } + + memcpy(lo->ob_digit, n->ob_digit, size_lo * sizeof(digit)); + memcpy(hi->ob_digit, n->ob_digit + size_lo, size_hi * sizeof(digit)); + + *high = long_normalize(hi); + *low = long_normalize(lo); + return 0; +} + +static PyLongObject *k_lopsided_mul(PyLongObject *a, PyLongObject *b); + +/* Karatsuba multiplication. Ignores the input signs, and returns the + * absolute value of the product (or NULL if error). + * See Knuth Vol. 2 Chapter 4.3.3 (Pp. 294-295). + */ +static PyLongObject * +k_mul(PyLongObject *a, PyLongObject *b) +{ + Py_ssize_t asize = ABS(Py_SIZE(a)); + Py_ssize_t bsize = ABS(Py_SIZE(b)); + PyLongObject *ah = NULL; + PyLongObject *al = NULL; + PyLongObject *bh = NULL; + PyLongObject *bl = NULL; + PyLongObject *ret = NULL; + PyLongObject *t1, *t2, *t3; + Py_ssize_t shift; /* the number of digits we split off */ + Py_ssize_t i; + + /* (ah*X+al)(bh*X+bl) = ah*bh*X*X + (ah*bl + al*bh)*X + al*bl + * Let k = (ah+al)*(bh+bl) = ah*bl + al*bh + ah*bh + al*bl + * Then the original product is + * ah*bh*X*X + (k - ah*bh - al*bl)*X + al*bl + * By picking X to be a power of 2, "*X" is just shifting, and it's + * been reduced to 3 multiplies on numbers half the size. + */ + + /* We want to split based on the larger number; fiddle so that b + * is largest. + */ + if (asize > bsize) { + t1 = a; + a = b; + b = t1; + + i = asize; + asize = bsize; + bsize = i; + } + + /* Use gradeschool math when either number is too small. */ + i = a == b ? KARATSUBA_SQUARE_CUTOFF : KARATSUBA_CUTOFF; + if (asize <= i) { + if (asize == 0) + return _PyLong_New(0); + else + return x_mul(a, b); + } + + /* If a is small compared to b, splitting on b gives a degenerate + * case with ah==0, and Karatsuba may be (even much) less efficient + * than "grade school" then. However, we can still win, by viewing + * b as a string of "big digits", each of width a->ob_size. That + * leads to a sequence of balanced calls to k_mul. + */ + if (2 * asize <= bsize) + return k_lopsided_mul(a, b); + + /* Split a & b into hi & lo pieces. */ + shift = bsize >> 1; + if (kmul_split(a, shift, &ah, &al) < 0) goto fail; + assert(Py_SIZE(ah) > 0); /* the split isn't degenerate */ + + if (a == b) { + bh = ah; + bl = al; + Py_INCREF(bh); + Py_INCREF(bl); + } + else if (kmul_split(b, shift, &bh, &bl) < 0) goto fail; + + /* The plan: + * 1. Allocate result space (asize + bsize digits: that's always + * enough). + * 2. Compute ah*bh, and copy into result at 2*shift. + * 3. Compute al*bl, and copy into result at 0. Note that this + * can't overlap with #2. + * 4. Subtract al*bl from the result, starting at shift. This may + * underflow (borrow out of the high digit), but we don't care: + * we're effectively doing unsigned arithmetic mod + * PyLong_BASE**(sizea + sizeb), and so long as the *final* result fits, + * borrows and carries out of the high digit can be ignored. + * 5. Subtract ah*bh from the result, starting at shift. + * 6. Compute (ah+al)*(bh+bl), and add it into the result starting + * at shift. + */ + + /* 1. Allocate result space. */ + ret = _PyLong_New(asize + bsize); + if (ret == NULL) goto fail; +#ifdef Py_DEBUG + /* Fill with trash, to catch reference to uninitialized digits. */ + memset(ret->ob_digit, 0xDF, Py_SIZE(ret) * sizeof(digit)); +#endif + + /* 2. t1 <- ah*bh, and copy into high digits of result. */ + if ((t1 = k_mul(ah, bh)) == NULL) goto fail; + assert(Py_SIZE(t1) >= 0); + assert(2*shift + Py_SIZE(t1) <= Py_SIZE(ret)); + memcpy(ret->ob_digit + 2*shift, t1->ob_digit, + Py_SIZE(t1) * sizeof(digit)); + + /* Zero-out the digits higher than the ah*bh copy. */ + i = Py_SIZE(ret) - 2*shift - Py_SIZE(t1); + if (i) + memset(ret->ob_digit + 2*shift + Py_SIZE(t1), 0, + i * sizeof(digit)); + + /* 3. t2 <- al*bl, and copy into the low digits. */ + if ((t2 = k_mul(al, bl)) == NULL) { + Py_DECREF(t1); + goto fail; + } + assert(Py_SIZE(t2) >= 0); + assert(Py_SIZE(t2) <= 2*shift); /* no overlap with high digits */ + memcpy(ret->ob_digit, t2->ob_digit, Py_SIZE(t2) * sizeof(digit)); + + /* Zero out remaining digits. */ + i = 2*shift - Py_SIZE(t2); /* number of uninitialized digits */ + if (i) + memset(ret->ob_digit + Py_SIZE(t2), 0, i * sizeof(digit)); + + /* 4 & 5. Subtract ah*bh (t1) and al*bl (t2). We do al*bl first + * because it's fresher in cache. + */ + i = Py_SIZE(ret) - shift; /* # digits after shift */ + (void)v_isub(ret->ob_digit + shift, i, t2->ob_digit, Py_SIZE(t2)); + Py_DECREF(t2); + + (void)v_isub(ret->ob_digit + shift, i, t1->ob_digit, Py_SIZE(t1)); + Py_DECREF(t1); + + /* 6. t3 <- (ah+al)(bh+bl), and add into result. */ + if ((t1 = x_add(ah, al)) == NULL) goto fail; + Py_DECREF(ah); + Py_DECREF(al); + ah = al = NULL; + + if (a == b) { + t2 = t1; + Py_INCREF(t2); + } + else if ((t2 = x_add(bh, bl)) == NULL) { + Py_DECREF(t1); + goto fail; + } + Py_DECREF(bh); + Py_DECREF(bl); + bh = bl = NULL; + + t3 = k_mul(t1, t2); + Py_DECREF(t1); + Py_DECREF(t2); + if (t3 == NULL) goto fail; + assert(Py_SIZE(t3) >= 0); + + /* Add t3. It's not obvious why we can't run out of room here. + * See the (*) comment after this function. + */ + (void)v_iadd(ret->ob_digit + shift, i, t3->ob_digit, Py_SIZE(t3)); + Py_DECREF(t3); + + return long_normalize(ret); + + fail: + Py_XDECREF(ret); + Py_XDECREF(ah); + Py_XDECREF(al); + Py_XDECREF(bh); + Py_XDECREF(bl); + return NULL; +} + +/* (*) Why adding t3 can't "run out of room" above. + +Let f(x) mean the floor of x and c(x) mean the ceiling of x. Some facts +to start with: + +1. For any integer i, i = c(i/2) + f(i/2). In particular, + bsize = c(bsize/2) + f(bsize/2). +2. shift = f(bsize/2) +3. asize <= bsize +4. Since we call k_lopsided_mul if asize*2 <= bsize, asize*2 > bsize in this + routine, so asize > bsize/2 >= f(bsize/2) in this routine. + +We allocated asize + bsize result digits, and add t3 into them at an offset +of shift. This leaves asize+bsize-shift allocated digit positions for t3 +to fit into, = (by #1 and #2) asize + f(bsize/2) + c(bsize/2) - f(bsize/2) = +asize + c(bsize/2) available digit positions. + +bh has c(bsize/2) digits, and bl at most f(size/2) digits. So bh+hl has +at most c(bsize/2) digits + 1 bit. + +If asize == bsize, ah has c(bsize/2) digits, else ah has at most f(bsize/2) +digits, and al has at most f(bsize/2) digits in any case. So ah+al has at +most (asize == bsize ? c(bsize/2) : f(bsize/2)) digits + 1 bit. + +The product (ah+al)*(bh+bl) therefore has at most + + c(bsize/2) + (asize == bsize ? c(bsize/2) : f(bsize/2)) digits + 2 bits + +and we have asize + c(bsize/2) available digit positions. We need to show +this is always enough. An instance of c(bsize/2) cancels out in both, so +the question reduces to whether asize digits is enough to hold +(asize == bsize ? c(bsize/2) : f(bsize/2)) digits + 2 bits. If asize < bsize, +then we're asking whether asize digits >= f(bsize/2) digits + 2 bits. By #4, +asize is at least f(bsize/2)+1 digits, so this in turn reduces to whether 1 +digit is enough to hold 2 bits. This is so since PyLong_SHIFT=15 >= 2. If +asize == bsize, then we're asking whether bsize digits is enough to hold +c(bsize/2) digits + 2 bits, or equivalently (by #1) whether f(bsize/2) digits +is enough to hold 2 bits. This is so if bsize >= 2, which holds because +bsize >= KARATSUBA_CUTOFF >= 2. + +Note that since there's always enough room for (ah+al)*(bh+bl), and that's +clearly >= each of ah*bh and al*bl, there's always enough room to subtract +ah*bh and al*bl too. +*/ + +/* b has at least twice the digits of a, and a is big enough that Karatsuba + * would pay off *if* the inputs had balanced sizes. View b as a sequence + * of slices, each with a->ob_size digits, and multiply the slices by a, + * one at a time. This gives k_mul balanced inputs to work with, and is + * also cache-friendly (we compute one double-width slice of the result + * at a time, then move on, never backtracking except for the helpful + * single-width slice overlap between successive partial sums). + */ +static PyLongObject * +k_lopsided_mul(PyLongObject *a, PyLongObject *b) +{ + const Py_ssize_t asize = ABS(Py_SIZE(a)); + Py_ssize_t bsize = ABS(Py_SIZE(b)); + Py_ssize_t nbdone; /* # of b digits already multiplied */ + PyLongObject *ret; + PyLongObject *bslice = NULL; + + assert(asize > KARATSUBA_CUTOFF); + assert(2 * asize <= bsize); + + /* Allocate result space, and zero it out. */ + ret = _PyLong_New(asize + bsize); + if (ret == NULL) + return NULL; + memset(ret->ob_digit, 0, Py_SIZE(ret) * sizeof(digit)); + + /* Successive slices of b are copied into bslice. */ + bslice = _PyLong_New(asize); + if (bslice == NULL) + goto fail; + + nbdone = 0; + while (bsize > 0) { + PyLongObject *product; + const Py_ssize_t nbtouse = MIN(bsize, asize); + + /* Multiply the next slice of b by a. */ + memcpy(bslice->ob_digit, b->ob_digit + nbdone, + nbtouse * sizeof(digit)); + Py_SIZE(bslice) = nbtouse; + product = k_mul(a, bslice); + if (product == NULL) + goto fail; + + /* Add into result. */ + (void)v_iadd(ret->ob_digit + nbdone, Py_SIZE(ret) - nbdone, + product->ob_digit, Py_SIZE(product)); + Py_DECREF(product); + + bsize -= nbtouse; + nbdone += nbtouse; + } + + Py_DECREF(bslice); + return long_normalize(ret); + + fail: + Py_DECREF(ret); + Py_XDECREF(bslice); + return NULL; +} + +static PyObject * +long_mul(PyLongObject *v, PyLongObject *w) +{ + PyLongObject *a, *b, *z; + + if (!convert_binop((PyObject *)v, (PyObject *)w, &a, &b)) { + Py_INCREF(Py_NotImplemented); + return Py_NotImplemented; + } + + z = k_mul(a, b); + /* Negate if exactly one of the inputs is negative. */ + if (((a->ob_size ^ b->ob_size) < 0) && z) + z->ob_size = -(z->ob_size); + Py_DECREF(a); + Py_DECREF(b); + return (PyObject *)z; +} + +/* The / and % operators are now defined in terms of divmod(). + The expression a mod b has the value a - b*floor(a/b). + The long_divrem function gives the remainder after division of + |a| by |b|, with the sign of a. This is also expressed + as a - b*trunc(a/b), if trunc truncates towards zero. + Some examples: + a b a rem b a mod b + 13 10 3 3 + -13 10 -3 7 + 13 -10 3 -7 + -13 -10 -3 -3 + So, to get from rem to mod, we have to add b if a and b + have different signs. We then subtract one from the 'div' + part of the outcome to keep the invariant intact. */ + +/* Compute + * *pdiv, *pmod = divmod(v, w) + * NULL can be passed for pdiv or pmod, in which case that part of + * the result is simply thrown away. The caller owns a reference to + * each of these it requests (does not pass NULL for). + */ +static int +l_divmod(PyLongObject *v, PyLongObject *w, + PyLongObject **pdiv, PyLongObject **pmod) +{ + PyLongObject *div, *mod; + + if (long_divrem(v, w, &div, &mod) < 0) + return -1; + if ((Py_SIZE(mod) < 0 && Py_SIZE(w) > 0) || + (Py_SIZE(mod) > 0 && Py_SIZE(w) < 0)) { + PyLongObject *temp; + PyLongObject *one; + temp = (PyLongObject *) long_add(mod, w); + Py_DECREF(mod); + mod = temp; + if (mod == NULL) { + Py_DECREF(div); + return -1; + } + one = (PyLongObject *) PyLong_FromLong(1L); + if (one == NULL || + (temp = (PyLongObject *) long_sub(div, one)) == NULL) { + Py_DECREF(mod); + Py_DECREF(div); + Py_XDECREF(one); + return -1; + } + Py_DECREF(one); + Py_DECREF(div); + div = temp; + } + if (pdiv != NULL) + *pdiv = div; + else + Py_DECREF(div); + + if (pmod != NULL) + *pmod = mod; + else + Py_DECREF(mod); + + return 0; +} + +static PyObject * +long_div(PyObject *v, PyObject *w) +{ + PyLongObject *a, *b, *div; + + CONVERT_BINOP(v, w, &a, &b); + if (l_divmod(a, b, &div, NULL) < 0) + div = NULL; + Py_DECREF(a); + Py_DECREF(b); + return (PyObject *)div; +} + +static PyObject * +long_classic_div(PyObject *v, PyObject *w) +{ + PyLongObject *a, *b, *div; + + CONVERT_BINOP(v, w, &a, &b); + if (Py_DivisionWarningFlag && + PyErr_Warn(PyExc_DeprecationWarning, "classic long division") < 0) + div = NULL; + else if (l_divmod(a, b, &div, NULL) < 0) + div = NULL; + Py_DECREF(a); + Py_DECREF(b); + return (PyObject *)div; +} + +/* PyLong/PyLong -> float, with correctly rounded result. */ + +#define MANT_DIG_DIGITS (DBL_MANT_DIG / PyLong_SHIFT) +#define MANT_DIG_BITS (DBL_MANT_DIG % PyLong_SHIFT) + +static PyObject * +long_true_divide(PyObject *v, PyObject *w) +{ + PyLongObject *a, *b, *x; + Py_ssize_t a_size, b_size, shift, extra_bits, diff, x_size, x_bits; + digit mask, low; + int inexact, negate, a_is_small, b_is_small; + double dx, result; + + CONVERT_BINOP(v, w, &a, &b); + + /* + Method in a nutshell: + + 0. reduce to case a, b > 0; filter out obvious underflow/overflow + 1. choose a suitable integer 'shift' + 2. use integer arithmetic to compute x = floor(2**-shift*a/b) + 3. adjust x for correct rounding + 4. convert x to a double dx with the same value + 5. return ldexp(dx, shift). + + In more detail: + + 0. For any a, a/0 raises ZeroDivisionError; for nonzero b, 0/b + returns either 0.0 or -0.0, depending on the sign of b. For a and + b both nonzero, ignore signs of a and b, and add the sign back in + at the end. Now write a_bits and b_bits for the bit lengths of a + and b respectively (that is, a_bits = 1 + floor(log_2(a)); likewise + for b). Then + + 2**(a_bits - b_bits - 1) < a/b < 2**(a_bits - b_bits + 1). + + So if a_bits - b_bits > DBL_MAX_EXP then a/b > 2**DBL_MAX_EXP and + so overflows. Similarly, if a_bits - b_bits < DBL_MIN_EXP - + DBL_MANT_DIG - 1 then a/b underflows to 0. With these cases out of + the way, we can assume that + + DBL_MIN_EXP - DBL_MANT_DIG - 1 <= a_bits - b_bits <= DBL_MAX_EXP. + + 1. The integer 'shift' is chosen so that x has the right number of + bits for a double, plus two or three extra bits that will be used + in the rounding decisions. Writing a_bits and b_bits for the + number of significant bits in a and b respectively, a + straightforward formula for shift is: + + shift = a_bits - b_bits - DBL_MANT_DIG - 2 + + This is fine in the usual case, but if a/b is smaller than the + smallest normal float then it can lead to double rounding on an + IEEE 754 platform, giving incorrectly rounded results. So we + adjust the formula slightly. The actual formula used is: + + shift = MAX(a_bits - b_bits, DBL_MIN_EXP) - DBL_MANT_DIG - 2 + + 2. The quantity x is computed by first shifting a (left -shift bits + if shift <= 0, right shift bits if shift > 0) and then dividing by + b. For both the shift and the division, we keep track of whether + the result is inexact, in a flag 'inexact'; this information is + needed at the rounding stage. + + With the choice of shift above, together with our assumption that + a_bits - b_bits >= DBL_MIN_EXP - DBL_MANT_DIG - 1, it follows + that x >= 1. + + 3. Now x * 2**shift <= a/b < (x+1) * 2**shift. We want to replace + this with an exactly representable float of the form + + round(x/2**extra_bits) * 2**(extra_bits+shift). + + For float representability, we need x/2**extra_bits < + 2**DBL_MANT_DIG and extra_bits + shift >= DBL_MIN_EXP - + DBL_MANT_DIG. This translates to the condition: + + extra_bits >= MAX(x_bits, DBL_MIN_EXP - shift) - DBL_MANT_DIG + + To round, we just modify the bottom digit of x in-place; this can + end up giving a digit with value > PyLONG_MASK, but that's not a + problem since digits can hold values up to 2*PyLONG_MASK+1. + + With the original choices for shift above, extra_bits will always + be 2 or 3. Then rounding under the round-half-to-even rule, we + round up iff the most significant of the extra bits is 1, and + either: (a) the computation of x in step 2 had an inexact result, + or (b) at least one other of the extra bits is 1, or (c) the least + significant bit of x (above those to be rounded) is 1. + + 4. Conversion to a double is straightforward; all floating-point + operations involved in the conversion are exact, so there's no + danger of rounding errors. + + 5. Use ldexp(x, shift) to compute x*2**shift, the final result. + The result will always be exactly representable as a double, except + in the case that it overflows. To avoid dependence on the exact + behaviour of ldexp on overflow, we check for overflow before + applying ldexp. The result of ldexp is adjusted for sign before + returning. + */ + + /* Reduce to case where a and b are both positive. */ + a_size = ABS(Py_SIZE(a)); + b_size = ABS(Py_SIZE(b)); + negate = (Py_SIZE(a) < 0) ^ (Py_SIZE(b) < 0); + if (b_size == 0) { + PyErr_SetString(PyExc_ZeroDivisionError, + "division by zero"); + goto error; + } + if (a_size == 0) + goto underflow_or_zero; + + /* Fast path for a and b small (exactly representable in a double). + Relies on floating-point division being correctly rounded; results + may be subject to double rounding on x86 machines that operate with + the x87 FPU set to 64-bit precision. */ + a_is_small = a_size <= MANT_DIG_DIGITS || + (a_size == MANT_DIG_DIGITS+1 && + a->ob_digit[MANT_DIG_DIGITS] >> MANT_DIG_BITS == 0); + b_is_small = b_size <= MANT_DIG_DIGITS || + (b_size == MANT_DIG_DIGITS+1 && + b->ob_digit[MANT_DIG_DIGITS] >> MANT_DIG_BITS == 0); + if (a_is_small && b_is_small) { + double da, db; + da = a->ob_digit[--a_size]; + while (a_size > 0) + da = da * PyLong_BASE + a->ob_digit[--a_size]; + db = b->ob_digit[--b_size]; + while (b_size > 0) + db = db * PyLong_BASE + b->ob_digit[--b_size]; + result = da / db; + goto success; + } + + /* Catch obvious cases of underflow and overflow */ + diff = a_size - b_size; + if (diff > PY_SSIZE_T_MAX/PyLong_SHIFT - 1) + /* Extreme overflow */ + goto overflow; + else if (diff < 1 - PY_SSIZE_T_MAX/PyLong_SHIFT) + /* Extreme underflow */ + goto underflow_or_zero; + /* Next line is now safe from overflowing a Py_ssize_t */ + diff = diff * PyLong_SHIFT + bits_in_digit(a->ob_digit[a_size - 1]) - + bits_in_digit(b->ob_digit[b_size - 1]); + /* Now diff = a_bits - b_bits. */ + if (diff > DBL_MAX_EXP) + goto overflow; + else if (diff < DBL_MIN_EXP - DBL_MANT_DIG - 1) + goto underflow_or_zero; + + /* Choose value for shift; see comments for step 1 above. */ + shift = MAX(diff, DBL_MIN_EXP) - DBL_MANT_DIG - 2; + + inexact = 0; + + /* x = abs(a * 2**-shift) */ + if (shift <= 0) { + Py_ssize_t i, shift_digits = -shift / PyLong_SHIFT; + digit rem; + /* x = a << -shift */ + if (a_size >= PY_SSIZE_T_MAX - 1 - shift_digits) { + /* In practice, it's probably impossible to end up + here. Both a and b would have to be enormous, + using close to SIZE_T_MAX bytes of memory each. */ + PyErr_SetString(PyExc_OverflowError, + "intermediate overflow during division"); + goto error; + } + x = _PyLong_New(a_size + shift_digits + 1); + if (x == NULL) + goto error; + for (i = 0; i < shift_digits; i++) + x->ob_digit[i] = 0; + rem = v_lshift(x->ob_digit + shift_digits, a->ob_digit, + a_size, -shift % PyLong_SHIFT); + x->ob_digit[a_size + shift_digits] = rem; + } + else { + Py_ssize_t shift_digits = shift / PyLong_SHIFT; + digit rem; + /* x = a >> shift */ + assert(a_size >= shift_digits); + x = _PyLong_New(a_size - shift_digits); + if (x == NULL) + goto error; + rem = v_rshift(x->ob_digit, a->ob_digit + shift_digits, + a_size - shift_digits, shift % PyLong_SHIFT); + /* set inexact if any of the bits shifted out is nonzero */ + if (rem) + inexact = 1; + while (!inexact && shift_digits > 0) + if (a->ob_digit[--shift_digits]) + inexact = 1; + } + long_normalize(x); + x_size = Py_SIZE(x); + + /* x //= b. If the remainder is nonzero, set inexact. We own the only + reference to x, so it's safe to modify it in-place. */ + if (b_size == 1) { + digit rem = inplace_divrem1(x->ob_digit, x->ob_digit, x_size, + b->ob_digit[0]); + long_normalize(x); + if (rem) + inexact = 1; + } + else { + PyLongObject *div, *rem; + div = x_divrem(x, b, &rem); + Py_DECREF(x); + x = div; + if (x == NULL) + goto error; + if (Py_SIZE(rem)) + inexact = 1; + Py_DECREF(rem); + } + x_size = ABS(Py_SIZE(x)); + assert(x_size > 0); /* result of division is never zero */ + x_bits = (x_size-1)*PyLong_SHIFT+bits_in_digit(x->ob_digit[x_size-1]); + + /* The number of extra bits that have to be rounded away. */ + extra_bits = MAX(x_bits, DBL_MIN_EXP - shift) - DBL_MANT_DIG; + assert(extra_bits == 2 || extra_bits == 3); + + /* Round by directly modifying the low digit of x. */ + mask = (digit)1 << (extra_bits - 1); + low = x->ob_digit[0] | inexact; + if (low & mask && low & (3*mask-1)) + low += mask; + x->ob_digit[0] = low & ~(mask-1U); + + /* Convert x to a double dx; the conversion is exact. */ + dx = x->ob_digit[--x_size]; + while (x_size > 0) + dx = dx * PyLong_BASE + x->ob_digit[--x_size]; + Py_DECREF(x); + + /* Check whether ldexp result will overflow a double. */ + if (shift + x_bits >= DBL_MAX_EXP && + (shift + x_bits > DBL_MAX_EXP || dx == ldexp(1.0, (int)x_bits))) + goto overflow; + result = ldexp(dx, (int)shift); + + success: + Py_DECREF(a); + Py_DECREF(b); + return PyFloat_FromDouble(negate ? -result : result); + + underflow_or_zero: + Py_DECREF(a); + Py_DECREF(b); + return PyFloat_FromDouble(negate ? -0.0 : 0.0); + + overflow: + PyErr_SetString(PyExc_OverflowError, + "integer division result too large for a float"); + error: + Py_DECREF(a); + Py_DECREF(b); + return NULL; +} + +static PyObject * +long_mod(PyObject *v, PyObject *w) +{ + PyLongObject *a, *b, *mod; + + CONVERT_BINOP(v, w, &a, &b); + + if (l_divmod(a, b, NULL, &mod) < 0) + mod = NULL; + Py_DECREF(a); + Py_DECREF(b); + return (PyObject *)mod; +} + +static PyObject * +long_divmod(PyObject *v, PyObject *w) +{ + PyLongObject *a, *b, *div, *mod; + PyObject *z; + + CONVERT_BINOP(v, w, &a, &b); + + if (l_divmod(a, b, &div, &mod) < 0) { + Py_DECREF(a); + Py_DECREF(b); + return NULL; + } + z = PyTuple_New(2); + if (z != NULL) { + PyTuple_SetItem(z, 0, (PyObject *) div); + PyTuple_SetItem(z, 1, (PyObject *) mod); + } + else { + Py_DECREF(div); + Py_DECREF(mod); + } + Py_DECREF(a); + Py_DECREF(b); + return z; +} + +/* pow(v, w, x) */ +static PyObject * +long_pow(PyObject *v, PyObject *w, PyObject *x) +{ + PyLongObject *a, *b, *c; /* a,b,c = v,w,x */ + int negativeOutput = 0; /* if x<0 return negative output */ + + PyLongObject *z = NULL; /* accumulated result */ + Py_ssize_t i, j, k; /* counters */ + PyLongObject *temp = NULL; + + /* 5-ary values. If the exponent is large enough, table is + * precomputed so that table[i] == a**i % c for i in range(32). + */ + PyLongObject *table[32] = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; + + /* a, b, c = v, w, x */ + CONVERT_BINOP(v, w, &a, &b); + if (PyLong_Check(x)) { + c = (PyLongObject *)x; + Py_INCREF(x); + } + else if (PyInt_Check(x)) { + c = (PyLongObject *)PyLong_FromLong(PyInt_AS_LONG(x)); + if (c == NULL) + goto Error; + } + else if (x == Py_None) + c = NULL; + else { + Py_DECREF(a); + Py_DECREF(b); + Py_INCREF(Py_NotImplemented); + return Py_NotImplemented; + } + + if (Py_SIZE(b) < 0) { /* if exponent is negative */ + if (c) { + PyErr_SetString(PyExc_TypeError, "pow() 2nd argument " + "cannot be negative when 3rd argument specified"); + goto Error; + } + else { + /* else return a float. This works because we know + that this calls float_pow() which converts its + arguments to double. */ + Py_DECREF(a); + Py_DECREF(b); + return PyFloat_Type.tp_as_number->nb_power(v, w, x); + } + } + + if (c) { + /* if modulus == 0: + raise ValueError() */ + if (Py_SIZE(c) == 0) { + PyErr_SetString(PyExc_ValueError, + "pow() 3rd argument cannot be 0"); + goto Error; + } + + /* if modulus < 0: + negativeOutput = True + modulus = -modulus */ + if (Py_SIZE(c) < 0) { + negativeOutput = 1; + temp = (PyLongObject *)_PyLong_Copy(c); + if (temp == NULL) + goto Error; + Py_DECREF(c); + c = temp; + temp = NULL; + c->ob_size = - c->ob_size; + } + + /* if modulus == 1: + return 0 */ + if ((Py_SIZE(c) == 1) && (c->ob_digit[0] == 1)) { + z = (PyLongObject *)PyLong_FromLong(0L); + goto Done; + } + + /* if base < 0: + base = base % modulus + Having the base positive just makes things easier. */ + if (Py_SIZE(a) < 0) { + if (l_divmod(a, c, NULL, &temp) < 0) + goto Error; + Py_DECREF(a); + a = temp; + temp = NULL; + } + } + + /* At this point a, b, and c are guaranteed non-negative UNLESS + c is NULL, in which case a may be negative. */ + + z = (PyLongObject *)PyLong_FromLong(1L); + if (z == NULL) + goto Error; + + /* Perform a modular reduction, X = X % c, but leave X alone if c + * is NULL. + */ +#define REDUCE(X) \ + do { \ + if (c != NULL) { \ + if (l_divmod(X, c, NULL, &temp) < 0) \ + goto Error; \ + Py_XDECREF(X); \ + X = temp; \ + temp = NULL; \ + } \ + } while(0) + + /* Multiply two values, then reduce the result: + result = X*Y % c. If c is NULL, skip the mod. */ +#define MULT(X, Y, result) \ + do { \ + temp = (PyLongObject *)long_mul(X, Y); \ + if (temp == NULL) \ + goto Error; \ + Py_XDECREF(result); \ + result = temp; \ + temp = NULL; \ + REDUCE(result); \ + } while(0) + + if (Py_SIZE(b) <= FIVEARY_CUTOFF) { + /* Left-to-right binary exponentiation (HAC Algorithm 14.79) */ + /* http://www.cacr.math.uwaterloo.ca/hac/about/chap14.pdf */ + for (i = Py_SIZE(b) - 1; i >= 0; --i) { + digit bi = b->ob_digit[i]; + + for (j = (digit)1 << (PyLong_SHIFT-1); j != 0; j >>= 1) { + MULT(z, z, z); + if (bi & j) + MULT(z, a, z); + } + } + } + else { + /* Left-to-right 5-ary exponentiation (HAC Algorithm 14.82) */ + Py_INCREF(z); /* still holds 1L */ + table[0] = z; + for (i = 1; i < 32; ++i) + MULT(table[i-1], a, table[i]); + + for (i = Py_SIZE(b) - 1; i >= 0; --i) { + const digit bi = b->ob_digit[i]; + + for (j = PyLong_SHIFT - 5; j >= 0; j -= 5) { + const int index = (bi >> j) & 0x1f; + for (k = 0; k < 5; ++k) + MULT(z, z, z); + if (index) + MULT(z, table[index], z); + } + } + } + + if (negativeOutput && (Py_SIZE(z) != 0)) { + temp = (PyLongObject *)long_sub(z, c); + if (temp == NULL) + goto Error; + Py_DECREF(z); + z = temp; + temp = NULL; + } + goto Done; + + Error: + if (z != NULL) { + Py_DECREF(z); + z = NULL; + } + /* fall through */ + Done: + if (Py_SIZE(b) > FIVEARY_CUTOFF) { + for (i = 0; i < 32; ++i) + Py_XDECREF(table[i]); + } + Py_DECREF(a); + Py_DECREF(b); + Py_XDECREF(c); + Py_XDECREF(temp); + return (PyObject *)z; +} + +static PyObject * +long_invert(PyLongObject *v) +{ + /* Implement ~x as -(x+1) */ + PyLongObject *x; + PyLongObject *w; + w = (PyLongObject *)PyLong_FromLong(1L); + if (w == NULL) + return NULL; + x = (PyLongObject *) long_add(v, w); + Py_DECREF(w); + if (x == NULL) + return NULL; + Py_SIZE(x) = -(Py_SIZE(x)); + return (PyObject *)x; +} + +static PyObject * +long_neg(PyLongObject *v) +{ + PyLongObject *z; + if (v->ob_size == 0 && PyLong_CheckExact(v)) { + /* -0 == 0 */ + Py_INCREF(v); + return (PyObject *) v; + } + z = (PyLongObject *)_PyLong_Copy(v); + if (z != NULL) + z->ob_size = -(v->ob_size); + return (PyObject *)z; +} + +static PyObject * +long_abs(PyLongObject *v) +{ + if (v->ob_size < 0) + return long_neg(v); + else + return long_long((PyObject *)v); +} + +static int +long_nonzero(PyLongObject *v) +{ + return Py_SIZE(v) != 0; +} + +static PyObject * +long_rshift(PyLongObject *v, PyLongObject *w) +{ + PyLongObject *a, *b; + PyLongObject *z = NULL; + Py_ssize_t shiftby, newsize, wordshift, loshift, hishift, i, j; + digit lomask, himask; + + CONVERT_BINOP((PyObject *)v, (PyObject *)w, &a, &b); + + if (Py_SIZE(a) < 0) { + /* Right shifting negative numbers is harder */ + PyLongObject *a1, *a2; + a1 = (PyLongObject *) long_invert(a); + if (a1 == NULL) + goto rshift_error; + a2 = (PyLongObject *) long_rshift(a1, b); + Py_DECREF(a1); + if (a2 == NULL) + goto rshift_error; + z = (PyLongObject *) long_invert(a2); + Py_DECREF(a2); + } + else { + shiftby = PyLong_AsSsize_t((PyObject *)b); + if (shiftby == -1L && PyErr_Occurred()) + goto rshift_error; + if (shiftby < 0) { + PyErr_SetString(PyExc_ValueError, + "negative shift count"); + goto rshift_error; + } + wordshift = shiftby / PyLong_SHIFT; + newsize = ABS(Py_SIZE(a)) - wordshift; + if (newsize <= 0) { + z = _PyLong_New(0); + Py_DECREF(a); + Py_DECREF(b); + return (PyObject *)z; + } + loshift = shiftby % PyLong_SHIFT; + hishift = PyLong_SHIFT - loshift; + lomask = ((digit)1 << hishift) - 1; + himask = PyLong_MASK ^ lomask; + z = _PyLong_New(newsize); + if (z == NULL) + goto rshift_error; + if (Py_SIZE(a) < 0) + Py_SIZE(z) = -(Py_SIZE(z)); + for (i = 0, j = wordshift; i < newsize; i++, j++) { + z->ob_digit[i] = (a->ob_digit[j] >> loshift) & lomask; + if (i+1 < newsize) + z->ob_digit[i] |= (a->ob_digit[j+1] << hishift) & himask; + } + z = long_normalize(z); + } + rshift_error: + Py_DECREF(a); + Py_DECREF(b); + return (PyObject *) z; + +} + +static PyObject * +long_lshift(PyObject *v, PyObject *w) +{ + /* This version due to Tim Peters */ + PyLongObject *a, *b; + PyLongObject *z = NULL; + Py_ssize_t shiftby, oldsize, newsize, wordshift, remshift, i, j; + twodigits accum; + + CONVERT_BINOP(v, w, &a, &b); + + shiftby = PyLong_AsSsize_t((PyObject *)b); + if (shiftby == -1L && PyErr_Occurred()) + goto lshift_error; + if (shiftby < 0) { + PyErr_SetString(PyExc_ValueError, "negative shift count"); + goto lshift_error; + } + /* wordshift, remshift = divmod(shiftby, PyLong_SHIFT) */ + wordshift = shiftby / PyLong_SHIFT; + remshift = shiftby - wordshift * PyLong_SHIFT; + + oldsize = ABS(a->ob_size); + newsize = oldsize + wordshift; + if (remshift) + ++newsize; + z = _PyLong_New(newsize); + if (z == NULL) + goto lshift_error; + if (a->ob_size < 0) + z->ob_size = -(z->ob_size); + for (i = 0; i < wordshift; i++) + z->ob_digit[i] = 0; + accum = 0; + for (i = wordshift, j = 0; j < oldsize; i++, j++) { + accum |= (twodigits)a->ob_digit[j] << remshift; + z->ob_digit[i] = (digit)(accum & PyLong_MASK); + accum >>= PyLong_SHIFT; + } + if (remshift) + z->ob_digit[newsize-1] = (digit)accum; + else + assert(!accum); + z = long_normalize(z); + lshift_error: + Py_DECREF(a); + Py_DECREF(b); + return (PyObject *) z; +} + +/* Compute two's complement of digit vector a[0:m], writing result to + z[0:m]. The digit vector a need not be normalized, but should not + be entirely zero. a and z may point to the same digit vector. */ + +static void +v_complement(digit *z, digit *a, Py_ssize_t m) +{ + Py_ssize_t i; + digit carry = 1; + for (i = 0; i < m; ++i) { + carry += a[i] ^ PyLong_MASK; + z[i] = carry & PyLong_MASK; + carry >>= PyLong_SHIFT; + } + assert(carry == 0); +} + +/* Bitwise and/xor/or operations */ + +static PyObject * +long_bitwise(PyLongObject *a, + int op, /* '&', '|', '^' */ + PyLongObject *b) +{ + int nega, negb, negz; + Py_ssize_t size_a, size_b, size_z, i; + PyLongObject *z; + + /* Bitwise operations for negative numbers operate as though + on a two's complement representation. So convert arguments + from sign-magnitude to two's complement, and convert the + result back to sign-magnitude at the end. */ + + /* If a is negative, replace it by its two's complement. */ + size_a = ABS(Py_SIZE(a)); + nega = Py_SIZE(a) < 0; + if (nega) { + z = _PyLong_New(size_a); + if (z == NULL) + return NULL; + v_complement(z->ob_digit, a->ob_digit, size_a); + a = z; + } + else + /* Keep reference count consistent. */ + Py_INCREF(a); + + /* Same for b. */ + size_b = ABS(Py_SIZE(b)); + negb = Py_SIZE(b) < 0; + if (negb) { + z = _PyLong_New(size_b); + if (z == NULL) { + Py_DECREF(a); + return NULL; + } + v_complement(z->ob_digit, b->ob_digit, size_b); + b = z; + } + else + Py_INCREF(b); + + /* Swap a and b if necessary to ensure size_a >= size_b. */ + if (size_a < size_b) { + z = a; a = b; b = z; + size_z = size_a; size_a = size_b; size_b = size_z; + negz = nega; nega = negb; negb = negz; + } + + /* JRH: The original logic here was to allocate the result value (z) + as the longer of the two operands. However, there are some cases + where the result is guaranteed to be shorter than that: AND of two + positives, OR of two negatives: use the shorter number. AND with + mixed signs: use the positive number. OR with mixed signs: use the + negative number. + */ + switch (op) { + case '^': + negz = nega ^ negb; + size_z = size_a; + break; + case '&': + negz = nega & negb; + size_z = negb ? size_a : size_b; + break; + case '|': + negz = nega | negb; + size_z = negb ? size_b : size_a; + break; + default: + PyErr_BadArgument(); + return NULL; + } + + /* We allow an extra digit if z is negative, to make sure that + the final two's complement of z doesn't overflow. */ + z = _PyLong_New(size_z + negz); + if (z == NULL) { + Py_DECREF(a); + Py_DECREF(b); + return NULL; + } + + /* Compute digits for overlap of a and b. */ + switch(op) { + case '&': + for (i = 0; i < size_b; ++i) + z->ob_digit[i] = a->ob_digit[i] & b->ob_digit[i]; + break; + case '|': + for (i = 0; i < size_b; ++i) + z->ob_digit[i] = a->ob_digit[i] | b->ob_digit[i]; + break; + case '^': + for (i = 0; i < size_b; ++i) + z->ob_digit[i] = a->ob_digit[i] ^ b->ob_digit[i]; + break; + default: + PyErr_BadArgument(); + return NULL; + } + + /* Copy any remaining digits of a, inverting if necessary. */ + if (op == '^' && negb) + for (; i < size_z; ++i) + z->ob_digit[i] = a->ob_digit[i] ^ PyLong_MASK; + else if (i < size_z) + memcpy(&z->ob_digit[i], &a->ob_digit[i], + (size_z-i)*sizeof(digit)); + + /* Complement result if negative. */ + if (negz) { + Py_SIZE(z) = -(Py_SIZE(z)); + z->ob_digit[size_z] = PyLong_MASK; + v_complement(z->ob_digit, z->ob_digit, size_z+1); + } + + Py_DECREF(a); + Py_DECREF(b); + return (PyObject *)long_normalize(z); +} + +static PyObject * +long_and(PyObject *v, PyObject *w) +{ + PyLongObject *a, *b; + PyObject *c; + CONVERT_BINOP(v, w, &a, &b); + c = long_bitwise(a, '&', b); + Py_DECREF(a); + Py_DECREF(b); + return c; +} + +static PyObject * +long_xor(PyObject *v, PyObject *w) +{ + PyLongObject *a, *b; + PyObject *c; + CONVERT_BINOP(v, w, &a, &b); + c = long_bitwise(a, '^', b); + Py_DECREF(a); + Py_DECREF(b); + return c; +} + +static PyObject * +long_or(PyObject *v, PyObject *w) +{ + PyLongObject *a, *b; + PyObject *c; + CONVERT_BINOP(v, w, &a, &b); + c = long_bitwise(a, '|', b); + Py_DECREF(a); + Py_DECREF(b); + return c; +} + +static int +long_coerce(PyObject **pv, PyObject **pw) +{ + if (PyInt_Check(*pw)) { + *pw = PyLong_FromLong(PyInt_AS_LONG(*pw)); + if (*pw == NULL) + return -1; + Py_INCREF(*pv); + return 0; + } + else if (PyLong_Check(*pw)) { + Py_INCREF(*pv); + Py_INCREF(*pw); + return 0; + } + return 1; /* Can't do it */ +} + +static PyObject * +long_long(PyObject *v) +{ + if (PyLong_CheckExact(v)) + Py_INCREF(v); + else + v = _PyLong_Copy((PyLongObject *)v); + return v; +} + +static PyObject * +long_int(PyObject *v) +{ + long x; + x = PyLong_AsLong(v); + if (PyErr_Occurred()) { + if (PyErr_ExceptionMatches(PyExc_OverflowError)) { + PyErr_Clear(); + if (PyLong_CheckExact(v)) { + Py_INCREF(v); + return v; + } + else + return _PyLong_Copy((PyLongObject *)v); + } + else + return NULL; + } + return PyInt_FromLong(x); +} + +static PyObject * +long_float(PyObject *v) +{ + double result; + result = PyLong_AsDouble(v); + if (result == -1.0 && PyErr_Occurred()) + return NULL; + return PyFloat_FromDouble(result); +} + +static PyObject * +long_oct(PyObject *v) +{ + return _PyLong_Format(v, 8, 1, 0); +} + +static PyObject * +long_hex(PyObject *v) +{ + return _PyLong_Format(v, 16, 1, 0); +} + +static PyObject * +long_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds); + +static PyObject * +long_new(PyTypeObject *type, PyObject *args, PyObject *kwds) +{ + PyObject *x = NULL; + int base = -909; /* unlikely! */ + static char *kwlist[] = {"x", "base", 0}; + + if (type != &PyLong_Type) + return long_subtype_new(type, args, kwds); /* Wimp out */ + if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oi:long", kwlist, + &x, &base)) + return NULL; + if (x == NULL) + return PyLong_FromLong(0L); + if (base == -909) + return PyNumber_Long(x); + else if (PyString_Check(x)) { + /* Since PyLong_FromString doesn't have a length parameter, + * check here for possible NULs in the string. */ + char *string = PyString_AS_STRING(x); + if (strlen(string) != (size_t)PyString_Size(x)) { + /* create a repr() of the input string, + * just like PyLong_FromString does. */ + PyObject *srepr; + srepr = PyObject_Repr(x); + if (srepr == NULL) + return NULL; + PyErr_Format(PyExc_ValueError, + "invalid literal for long() with base %d: %s", + base, PyString_AS_STRING(srepr)); + Py_DECREF(srepr); + return NULL; + } + return PyLong_FromString(PyString_AS_STRING(x), NULL, base); + } +#ifdef Py_USING_UNICODE + else if (PyUnicode_Check(x)) + return PyLong_FromUnicode(PyUnicode_AS_UNICODE(x), + PyUnicode_GET_SIZE(x), + base); +#endif + else { + PyErr_SetString(PyExc_TypeError, + "long() can't convert non-string with explicit base"); + return NULL; + } +} + +/* Wimpy, slow approach to tp_new calls for subtypes of long: + first create a regular long from whatever arguments we got, + then allocate a subtype instance and initialize it from + the regular long. The regular long is then thrown away. +*/ +static PyObject * +long_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds) +{ + PyLongObject *tmp, *newobj; + Py_ssize_t i, n; + + assert(PyType_IsSubtype(type, &PyLong_Type)); + tmp = (PyLongObject *)long_new(&PyLong_Type, args, kwds); + if (tmp == NULL) + return NULL; + assert(PyLong_CheckExact(tmp)); + n = Py_SIZE(tmp); + if (n < 0) + n = -n; + newobj = (PyLongObject *)type->tp_alloc(type, n); + if (newobj == NULL) { + Py_DECREF(tmp); + return NULL; + } + assert(PyLong_Check(newobj)); + Py_SIZE(newobj) = Py_SIZE(tmp); + for (i = 0; i < n; i++) + newobj->ob_digit[i] = tmp->ob_digit[i]; + Py_DECREF(tmp); + return (PyObject *)newobj; +} + +static PyObject * +long_getnewargs(PyLongObject *v) +{ + return Py_BuildValue("(N)", _PyLong_Copy(v)); +} + +static PyObject * +long_get0(PyLongObject *v, void *context) { + return PyLong_FromLong(0L); +} + +static PyObject * +long_get1(PyLongObject *v, void *context) { + return PyLong_FromLong(1L); +} + +static PyObject * +long__format__(PyObject *self, PyObject *args) +{ + PyObject *format_spec; + + if (!PyArg_ParseTuple(args, "O:__format__", &format_spec)) + return NULL; + if (PyBytes_Check(format_spec)) + return _PyLong_FormatAdvanced(self, + PyBytes_AS_STRING(format_spec), + PyBytes_GET_SIZE(format_spec)); + if (PyUnicode_Check(format_spec)) { + /* Convert format_spec to a str */ + PyObject *result; + PyObject *str_spec = PyObject_Str(format_spec); + + if (str_spec == NULL) + return NULL; + + result = _PyLong_FormatAdvanced(self, + PyBytes_AS_STRING(str_spec), + PyBytes_GET_SIZE(str_spec)); + + Py_DECREF(str_spec); + return result; + } + PyErr_SetString(PyExc_TypeError, "__format__ requires str or unicode"); + return NULL; +} + +static PyObject * +long_sizeof(PyLongObject *v) +{ + Py_ssize_t res; + + res = v->ob_type->tp_basicsize + ABS(Py_SIZE(v))*sizeof(digit); + return PyInt_FromSsize_t(res); +} + +static PyObject * +long_bit_length(PyLongObject *v) +{ + PyLongObject *result, *x, *y; + Py_ssize_t ndigits, msd_bits = 0; + digit msd; + + assert(v != NULL); + assert(PyLong_Check(v)); + + ndigits = ABS(Py_SIZE(v)); + if (ndigits == 0) + return PyInt_FromLong(0); + + msd = v->ob_digit[ndigits-1]; + while (msd >= 32) { + msd_bits += 6; + msd >>= 6; + } + msd_bits += (long)(BitLengthTable[msd]); + + if (ndigits <= PY_SSIZE_T_MAX/PyLong_SHIFT) + return PyInt_FromSsize_t((ndigits-1)*PyLong_SHIFT + msd_bits); + + /* expression above may overflow; use Python integers instead */ + result = (PyLongObject *)PyLong_FromSsize_t(ndigits - 1); + if (result == NULL) + return NULL; + x = (PyLongObject *)PyLong_FromLong(PyLong_SHIFT); + if (x == NULL) + goto error; + y = (PyLongObject *)long_mul(result, x); + Py_DECREF(x); + if (y == NULL) + goto error; + Py_DECREF(result); + result = y; + + x = (PyLongObject *)PyLong_FromLong((long)msd_bits); + if (x == NULL) + goto error; + y = (PyLongObject *)long_add(result, x); + Py_DECREF(x); + if (y == NULL) + goto error; + Py_DECREF(result); + result = y; + + return (PyObject *)result; + + error: + Py_DECREF(result); + return NULL; +} + +PyDoc_STRVAR(long_bit_length_doc, +"long.bit_length() -> int or long\n\ +\n\ +Number of bits necessary to represent self in binary.\n\ +>>> bin(37L)\n\ +'0b100101'\n\ +>>> (37L).bit_length()\n\ +6"); + +#if 0 +static PyObject * +long_is_finite(PyObject *v) +{ + Py_RETURN_TRUE; +} +#endif + +static PyMethodDef long_methods[] = { + {"conjugate", (PyCFunction)long_long, METH_NOARGS, + "Returns self, the complex conjugate of any long."}, + {"bit_length", (PyCFunction)long_bit_length, METH_NOARGS, + long_bit_length_doc}, +#if 0 + {"is_finite", (PyCFunction)long_is_finite, METH_NOARGS, + "Returns always True."}, +#endif + {"__trunc__", (PyCFunction)long_long, METH_NOARGS, + "Truncating an Integral returns itself."}, + {"__getnewargs__", (PyCFunction)long_getnewargs, METH_NOARGS}, + {"__format__", (PyCFunction)long__format__, METH_VARARGS}, + {"__sizeof__", (PyCFunction)long_sizeof, METH_NOARGS, + "Returns size in memory, in bytes"}, + {NULL, NULL} /* sentinel */ +}; + +static PyGetSetDef long_getset[] = { + {"real", + (getter)long_long, (setter)NULL, + "the real part of a complex number", + NULL}, + {"imag", + (getter)long_get0, (setter)NULL, + "the imaginary part of a complex number", + NULL}, + {"numerator", + (getter)long_long, (setter)NULL, + "the numerator of a rational number in lowest terms", + NULL}, + {"denominator", + (getter)long_get1, (setter)NULL, + "the denominator of a rational number in lowest terms", + NULL}, + {NULL} /* Sentinel */ +}; + +PyDoc_STRVAR(long_doc, +"long(x[, base]) -> integer\n\ +\n\ +Convert a string or number to a long integer, if possible. A floating\n\ +point argument will be truncated towards zero (this does not include a\n\ +string representation of a floating point number!) When converting a\n\ +string, use the optional base. It is an error to supply a base when\n\ +converting a non-string."); + +static PyNumberMethods long_as_number = { + (binaryfunc)long_add, /*nb_add*/ + (binaryfunc)long_sub, /*nb_subtract*/ + (binaryfunc)long_mul, /*nb_multiply*/ + long_classic_div, /*nb_divide*/ + long_mod, /*nb_remainder*/ + long_divmod, /*nb_divmod*/ + long_pow, /*nb_power*/ + (unaryfunc)long_neg, /*nb_negative*/ + (unaryfunc)long_long, /*tp_positive*/ + (unaryfunc)long_abs, /*tp_absolute*/ + (inquiry)long_nonzero, /*tp_nonzero*/ + (unaryfunc)long_invert, /*nb_invert*/ + long_lshift, /*nb_lshift*/ + (binaryfunc)long_rshift, /*nb_rshift*/ + long_and, /*nb_and*/ + long_xor, /*nb_xor*/ + long_or, /*nb_or*/ + long_coerce, /*nb_coerce*/ + long_int, /*nb_int*/ + long_long, /*nb_long*/ + long_float, /*nb_float*/ + long_oct, /*nb_oct*/ + long_hex, /*nb_hex*/ + 0, /* nb_inplace_add */ + 0, /* nb_inplace_subtract */ + 0, /* nb_inplace_multiply */ + 0, /* nb_inplace_divide */ + 0, /* nb_inplace_remainder */ + 0, /* nb_inplace_power */ + 0, /* nb_inplace_lshift */ + 0, /* nb_inplace_rshift */ + 0, /* nb_inplace_and */ + 0, /* nb_inplace_xor */ + 0, /* nb_inplace_or */ + long_div, /* nb_floor_divide */ + long_true_divide, /* nb_true_divide */ + 0, /* nb_inplace_floor_divide */ + 0, /* nb_inplace_true_divide */ + long_long, /* nb_index */ +}; + +PyTypeObject PyLong_Type = { + PyObject_HEAD_INIT(&PyType_Type) + 0, /* ob_size */ + "long", /* tp_name */ + offsetof(PyLongObject, ob_digit), /* tp_basicsize */ + sizeof(digit), /* tp_itemsize */ + long_dealloc, /* tp_dealloc */ + 0, /* tp_print */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + (cmpfunc)long_compare, /* tp_compare */ + long_repr, /* tp_repr */ + &long_as_number, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + (hashfunc)long_hash, /* tp_hash */ + 0, /* tp_call */ + long_str, /* tp_str */ + PyObject_GenericGetAttr, /* tp_getattro */ + 0, /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_CHECKTYPES | + Py_TPFLAGS_BASETYPE | Py_TPFLAGS_LONG_SUBCLASS, /* tp_flags */ + long_doc, /* tp_doc */ + 0, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iternext */ + long_methods, /* tp_methods */ + 0, /* tp_members */ + long_getset, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + 0, /* tp_init */ + 0, /* tp_alloc */ + long_new, /* tp_new */ + PyObject_Del, /* tp_free */ +}; + +static PyTypeObject Long_InfoType; + +PyDoc_STRVAR(long_info__doc__, +"sys.long_info\n\ +\n\ +A struct sequence that holds information about Python's\n\ +internal representation of integers. The attributes are read only."); + +static PyStructSequence_Field long_info_fields[] = { + {"bits_per_digit", "size of a digit in bits"}, + {"sizeof_digit", "size in bytes of the C type used to represent a digit"}, + {NULL, NULL} +}; + +static PyStructSequence_Desc long_info_desc = { + "sys.long_info", /* name */ + long_info__doc__, /* doc */ + long_info_fields, /* fields */ + 2 /* number of fields */ +}; + +PyObject * +PyLong_GetInfo(void) +{ + PyObject* long_info; + int field = 0; + long_info = PyStructSequence_New(&Long_InfoType); + if (long_info == NULL) + return NULL; + PyStructSequence_SET_ITEM(long_info, field++, + PyInt_FromLong(PyLong_SHIFT)); + PyStructSequence_SET_ITEM(long_info, field++, + PyInt_FromLong(sizeof(digit))); + if (PyErr_Occurred()) { + Py_CLEAR(long_info); + return NULL; + } + return long_info; +} + +int +_PyLong_Init(void) +{ + /* initialize long_info */ + if (Long_InfoType.tp_name == 0) + PyStructSequence_InitType(&Long_InfoType, &long_info_desc); + return 1; +} diff --git a/AppPkg/Applications/Python/PyMod-2.7.2/Objects/stringlib/localeutil.h b/AppPkg/Applications/Python/PyMod-2.7.2/Objects/stringlib/localeutil.h new file mode 100644 index 0000000000..25b7f6fbd7 --- /dev/null +++ b/AppPkg/Applications/Python/PyMod-2.7.2/Objects/stringlib/localeutil.h @@ -0,0 +1,216 @@ +/* stringlib: locale related helpers implementation */ + +#ifndef STRINGLIB_LOCALEUTIL_H +#define STRINGLIB_LOCALEUTIL_H + +#include + +// Prevent conflicts with EFI +#undef MAX +#undef MIN + +#define MAX(x, y) ((x) < (y) ? (y) : (x)) +#define MIN(x, y) ((x) < (y) ? (x) : (y)) + +typedef struct { + const char *grouping; + char previous; + Py_ssize_t i; /* Where we're currently pointing in grouping. */ +} GroupGenerator; + +static void +_GroupGenerator_init(GroupGenerator *self, const char *grouping) +{ + self->grouping = grouping; + self->i = 0; + self->previous = 0; +} + +/* Returns the next grouping, or 0 to signify end. */ +static Py_ssize_t +_GroupGenerator_next(GroupGenerator *self) +{ + /* Note that we don't really do much error checking here. If a + grouping string contains just CHAR_MAX, for example, then just + terminate the generator. That shouldn't happen, but at least we + fail gracefully. */ + switch (self->grouping[self->i]) { + case 0: + return self->previous; + case CHAR_MAX: + /* Stop the generator. */ + return 0; + default: { + char ch = self->grouping[self->i]; + self->previous = ch; + self->i++; + return (Py_ssize_t)ch; + } + } +} + +/* Fill in some digits, leading zeros, and thousands separator. All + are optional, depending on when we're called. */ +static void +fill(STRINGLIB_CHAR **digits_end, STRINGLIB_CHAR **buffer_end, + Py_ssize_t n_chars, Py_ssize_t n_zeros, const char* thousands_sep, + Py_ssize_t thousands_sep_len) +{ +#if STRINGLIB_IS_UNICODE + Py_ssize_t i; +#endif + + if (thousands_sep) { + *buffer_end -= thousands_sep_len; + + /* Copy the thousands_sep chars into the buffer. */ +#if STRINGLIB_IS_UNICODE + /* Convert from the char's of the thousands_sep from + the locale into unicode. */ + for (i = 0; i < thousands_sep_len; ++i) + (*buffer_end)[i] = thousands_sep[i]; +#else + /* No conversion, just memcpy the thousands_sep. */ + memcpy(*buffer_end, thousands_sep, thousands_sep_len); +#endif + } + + *buffer_end -= n_chars; + *digits_end -= n_chars; + memcpy(*buffer_end, *digits_end, n_chars * sizeof(STRINGLIB_CHAR)); + + *buffer_end -= n_zeros; + STRINGLIB_FILL(*buffer_end, '0', n_zeros); +} + +/** + * _Py_InsertThousandsGrouping: + * @buffer: A pointer to the start of a string. + * @n_buffer: Number of characters in @buffer. + * @digits: A pointer to the digits we're reading from. If count + * is non-NULL, this is unused. + * @n_digits: The number of digits in the string, in which we want + * to put the grouping chars. + * @min_width: The minimum width of the digits in the output string. + * Output will be zero-padded on the left to fill. + * @grouping: see definition in localeconv(). + * @thousands_sep: see definition in localeconv(). + * + * There are 2 modes: counting and filling. If @buffer is NULL, + * we are in counting mode, else filling mode. + * If counting, the required buffer size is returned. + * If filling, we know the buffer will be large enough, so we don't + * need to pass in the buffer size. + * Inserts thousand grouping characters (as defined by grouping and + * thousands_sep) into the string between buffer and buffer+n_digits. + * + * Return value: 0 on error, else 1. Note that no error can occur if + * count is non-NULL. + * + * This name won't be used, the includer of this file should define + * it to be the actual function name, based on unicode or string. + * + * As closely as possible, this code mimics the logic in decimal.py's + _insert_thousands_sep(). + **/ +Py_ssize_t +_Py_InsertThousandsGrouping(STRINGLIB_CHAR *buffer, + Py_ssize_t n_buffer, + STRINGLIB_CHAR *digits, + Py_ssize_t n_digits, + Py_ssize_t min_width, + const char *grouping, + const char *thousands_sep) +{ + Py_ssize_t count = 0; + Py_ssize_t n_zeros; + int loop_broken = 0; + int use_separator = 0; /* First time through, don't append the + separator. They only go between + groups. */ + STRINGLIB_CHAR *buffer_end = NULL; + STRINGLIB_CHAR *digits_end = NULL; + Py_ssize_t l; + Py_ssize_t n_chars; + Py_ssize_t thousands_sep_len = strlen(thousands_sep); + Py_ssize_t remaining = n_digits; /* Number of chars remaining to + be looked at */ + /* A generator that returns all of the grouping widths, until it + returns 0. */ + GroupGenerator groupgen; + _GroupGenerator_init(&groupgen, grouping); + + if (buffer) { + buffer_end = buffer + n_buffer; + digits_end = digits + n_digits; + } + + while ((l = _GroupGenerator_next(&groupgen)) > 0) { + l = MIN(l, MAX(MAX(remaining, min_width), 1)); + n_zeros = MAX(0, l - remaining); + n_chars = MAX(0, MIN(remaining, l)); + + /* Use n_zero zero's and n_chars chars */ + + /* Count only, don't do anything. */ + count += (use_separator ? thousands_sep_len : 0) + n_zeros + n_chars; + + if (buffer) { + /* Copy into the output buffer. */ + fill(&digits_end, &buffer_end, n_chars, n_zeros, + use_separator ? thousands_sep : NULL, thousands_sep_len); + } + + /* Use a separator next time. */ + use_separator = 1; + + remaining -= n_chars; + min_width -= l; + + if (remaining <= 0 && min_width <= 0) { + loop_broken = 1; + break; + } + min_width -= thousands_sep_len; + } + if (!loop_broken) { + /* We left the loop without using a break statement. */ + + l = MAX(MAX(remaining, min_width), 1); + n_zeros = MAX(0, l - remaining); + n_chars = MAX(0, MIN(remaining, l)); + + /* Use n_zero zero's and n_chars chars */ + count += (use_separator ? thousands_sep_len : 0) + n_zeros + n_chars; + if (buffer) { + /* Copy into the output buffer. */ + fill(&digits_end, &buffer_end, n_chars, n_zeros, + use_separator ? thousands_sep : NULL, thousands_sep_len); + } + } + return count; +} + +/** + * _Py_InsertThousandsGroupingLocale: + * @buffer: A pointer to the start of a string. + * @n_digits: The number of digits in the string, in which we want + * to put the grouping chars. + * + * Reads thee current locale and calls _Py_InsertThousandsGrouping(). + **/ +Py_ssize_t +_Py_InsertThousandsGroupingLocale(STRINGLIB_CHAR *buffer, + Py_ssize_t n_buffer, + STRINGLIB_CHAR *digits, + Py_ssize_t n_digits, + Py_ssize_t min_width) +{ + struct lconv *locale_data = localeconv(); + const char *grouping = locale_data->grouping; + const char *thousands_sep = locale_data->thousands_sep; + + return _Py_InsertThousandsGrouping(buffer, n_buffer, digits, n_digits, + min_width, grouping, thousands_sep); +} +#endif /* STRINGLIB_LOCALEUTIL_H */ diff --git a/AppPkg/Applications/Python/PyMod-2.7.2/Python/marshal.c b/AppPkg/Applications/Python/PyMod-2.7.2/Python/marshal.c new file mode 100644 index 0000000000..785f4389ba --- /dev/null +++ b/AppPkg/Applications/Python/PyMod-2.7.2/Python/marshal.c @@ -0,0 +1,1412 @@ + +/* Write Python objects to files and read them back. + This is intended for writing and reading compiled Python code only; + a true persistent storage facility would be much harder, since + it would have to take circular links and sharing into account. */ + +#define PY_SSIZE_T_CLEAN + +#include "Python.h" +#include "longintrepr.h" +#include "code.h" +#include "marshal.h" + +#ifndef ABS + #define ABS(x) ((x) < 0 ? -(x) : (x)) +#endif + +/* High water mark to determine when the marshalled object is dangerously deep + * and risks coring the interpreter. When the object stack gets this deep, + * raise an exception instead of continuing. + */ +#define MAX_MARSHAL_STACK_DEPTH 2000 + +#define TYPE_NULL '0' +#define TYPE_NONE 'N' +#define TYPE_FALSE 'F' +#define TYPE_TRUE 'T' +#define TYPE_STOPITER 'S' +#define TYPE_ELLIPSIS '.' +#define TYPE_INT 'i' +#define TYPE_INT64 'I' +#define TYPE_FLOAT 'f' +#define TYPE_BINARY_FLOAT 'g' +#define TYPE_COMPLEX 'x' +#define TYPE_BINARY_COMPLEX 'y' +#define TYPE_LONG 'l' +#define TYPE_STRING 's' +#define TYPE_INTERNED 't' +#define TYPE_STRINGREF 'R' +#define TYPE_TUPLE '(' +#define TYPE_LIST '[' +#define TYPE_DICT '{' +#define TYPE_CODE 'c' +#define TYPE_UNICODE 'u' +#define TYPE_UNKNOWN '?' +#define TYPE_SET '<' +#define TYPE_FROZENSET '>' + +#define WFERR_OK 0 +#define WFERR_UNMARSHALLABLE 1 +#define WFERR_NESTEDTOODEEP 2 +#define WFERR_NOMEMORY 3 + +typedef struct { + FILE *fp; + int error; /* see WFERR_* values */ + int depth; + /* If fp == NULL, the following are valid: */ + PyObject *str; + char *ptr; + char *end; + PyObject *strings; /* dict on marshal, list on unmarshal */ + int version; +} WFILE; + +#define w_byte(c, p) if (((p)->fp)) putc((c), (p)->fp); \ + else if ((p)->ptr != (p)->end) *(p)->ptr++ = (c); \ + else w_more(c, p) + +static void +w_more(int c, WFILE *p) +{ + Py_ssize_t size, newsize; + if (p->str == NULL) + return; /* An error already occurred */ + size = PyString_Size(p->str); + newsize = size + size + 1024; + if (newsize > 32*1024*1024) { + newsize = size + (size >> 3); /* 12.5% overallocation */ + } + if (_PyString_Resize(&p->str, newsize) != 0) { + p->ptr = p->end = NULL; + } + else { + p->ptr = PyString_AS_STRING((PyStringObject *)p->str) + size; + p->end = + PyString_AS_STRING((PyStringObject *)p->str) + newsize; + *p->ptr++ = Py_SAFE_DOWNCAST(c, int, char); + } +} + +static void +w_string(char *s, int n, WFILE *p) +{ + if (p->fp != NULL) { + fwrite(s, 1, n, p->fp); + } + else { + while (--n >= 0) { + w_byte(*s, p); + s++; + } + } +} + +static void +w_short(int x, WFILE *p) +{ + w_byte((char)( x & 0xff), p); + w_byte((char)((x>> 8) & 0xff), p); +} + +static void +w_long(long x, WFILE *p) +{ + w_byte((char)( x & 0xff), p); + w_byte((char)((x>> 8) & 0xff), p); + w_byte((char)((x>>16) & 0xff), p); + w_byte((char)((x>>24) & 0xff), p); +} + +#if SIZEOF_LONG > 4 +static void +w_long64(long x, WFILE *p) +{ + w_long(x, p); + w_long(x>>32, p); +} +#endif + +/* We assume that Python longs are stored internally in base some power of + 2**15; for the sake of portability we'll always read and write them in base + exactly 2**15. */ + +#define PyLong_MARSHAL_SHIFT 15 +#define PyLong_MARSHAL_BASE ((short)1 << PyLong_MARSHAL_SHIFT) +#define PyLong_MARSHAL_MASK (PyLong_MARSHAL_BASE - 1) +#if PyLong_SHIFT % PyLong_MARSHAL_SHIFT != 0 +#error "PyLong_SHIFT must be a multiple of PyLong_MARSHAL_SHIFT" +#endif +#define PyLong_MARSHAL_RATIO (PyLong_SHIFT / PyLong_MARSHAL_SHIFT) + +static void +w_PyLong(const PyLongObject *ob, WFILE *p) +{ + Py_ssize_t i, j, n, l; + digit d; + + w_byte(TYPE_LONG, p); + if (Py_SIZE(ob) == 0) { + w_long((long)0, p); + return; + } + + /* set l to number of base PyLong_MARSHAL_BASE digits */ + n = ABS(Py_SIZE(ob)); + l = (n-1) * PyLong_MARSHAL_RATIO; + d = ob->ob_digit[n-1]; + assert(d != 0); /* a PyLong is always normalized */ + do { + d >>= PyLong_MARSHAL_SHIFT; + l++; + } while (d != 0); + w_long((long)(Py_SIZE(ob) > 0 ? l : -l), p); + + for (i=0; i < n-1; i++) { + d = ob->ob_digit[i]; + for (j=0; j < PyLong_MARSHAL_RATIO; j++) { + w_short(d & PyLong_MARSHAL_MASK, p); + d >>= PyLong_MARSHAL_SHIFT; + } + assert (d == 0); + } + d = ob->ob_digit[n-1]; + do { + w_short(d & PyLong_MARSHAL_MASK, p); + d >>= PyLong_MARSHAL_SHIFT; + } while (d != 0); +} + +static void +w_object(PyObject *v, WFILE *p) +{ + Py_ssize_t i, n; + + p->depth++; + + if (p->depth > MAX_MARSHAL_STACK_DEPTH) { + p->error = WFERR_NESTEDTOODEEP; + } + else if (v == NULL) { + w_byte(TYPE_NULL, p); + } + else if (v == Py_None) { + w_byte(TYPE_NONE, p); + } + else if (v == PyExc_StopIteration) { + w_byte(TYPE_STOPITER, p); + } + else if (v == Py_Ellipsis) { + w_byte(TYPE_ELLIPSIS, p); + } + else if (v == Py_False) { + w_byte(TYPE_FALSE, p); + } + else if (v == Py_True) { + w_byte(TYPE_TRUE, p); + } + else if (PyInt_CheckExact(v)) { + long x = PyInt_AS_LONG((PyIntObject *)v); +#if SIZEOF_LONG > 4 + long y = Py_ARITHMETIC_RIGHT_SHIFT(long, x, 31); + if (y && y != -1) { + w_byte(TYPE_INT64, p); + w_long64(x, p); + } + else +#endif + { + w_byte(TYPE_INT, p); + w_long(x, p); + } + } + else if (PyLong_CheckExact(v)) { + PyLongObject *ob = (PyLongObject *)v; + w_PyLong(ob, p); + } + else if (PyFloat_CheckExact(v)) { + if (p->version > 1) { + unsigned char buf[8]; + if (_PyFloat_Pack8(PyFloat_AsDouble(v), + buf, 1) < 0) { + p->error = WFERR_UNMARSHALLABLE; + return; + } + w_byte(TYPE_BINARY_FLOAT, p); + w_string((char*)buf, 8, p); + } + else { + char *buf = PyOS_double_to_string(PyFloat_AS_DOUBLE(v), + 'g', 17, 0, NULL); + if (!buf) { + p->error = WFERR_NOMEMORY; + return; + } + n = strlen(buf); + w_byte(TYPE_FLOAT, p); + w_byte((int)n, p); + w_string(buf, (int)n, p); + PyMem_Free(buf); + } + } +#ifndef WITHOUT_COMPLEX + else if (PyComplex_CheckExact(v)) { + if (p->version > 1) { + unsigned char buf[8]; + if (_PyFloat_Pack8(PyComplex_RealAsDouble(v), + buf, 1) < 0) { + p->error = WFERR_UNMARSHALLABLE; + return; + } + w_byte(TYPE_BINARY_COMPLEX, p); + w_string((char*)buf, 8, p); + if (_PyFloat_Pack8(PyComplex_ImagAsDouble(v), + buf, 1) < 0) { + p->error = WFERR_UNMARSHALLABLE; + return; + } + w_string((char*)buf, 8, p); + } + else { + char *buf; + w_byte(TYPE_COMPLEX, p); + buf = PyOS_double_to_string(PyComplex_RealAsDouble(v), + 'g', 17, 0, NULL); + if (!buf) { + p->error = WFERR_NOMEMORY; + return; + } + n = strlen(buf); + w_byte((int)n, p); + w_string(buf, (int)n, p); + PyMem_Free(buf); + buf = PyOS_double_to_string(PyComplex_ImagAsDouble(v), + 'g', 17, 0, NULL); + if (!buf) { + p->error = WFERR_NOMEMORY; + return; + } + n = strlen(buf); + w_byte((int)n, p); + w_string(buf, (int)n, p); + PyMem_Free(buf); + } + } +#endif + else if (PyString_CheckExact(v)) { + if (p->strings && PyString_CHECK_INTERNED(v)) { + PyObject *o = PyDict_GetItem(p->strings, v); + if (o) { + long w = PyInt_AsLong(o); + w_byte(TYPE_STRINGREF, p); + w_long(w, p); + goto exit; + } + else { + int ok; + o = PyInt_FromSsize_t(PyDict_Size(p->strings)); + ok = o && + PyDict_SetItem(p->strings, v, o) >= 0; + Py_XDECREF(o); + if (!ok) { + p->depth--; + p->error = WFERR_UNMARSHALLABLE; + return; + } + w_byte(TYPE_INTERNED, p); + } + } + else { + w_byte(TYPE_STRING, p); + } + n = PyString_GET_SIZE(v); + if (n > INT_MAX) { + /* huge strings are not supported */ + p->depth--; + p->error = WFERR_UNMARSHALLABLE; + return; + } + w_long((long)n, p); + w_string(PyString_AS_STRING(v), (int)n, p); + } +#ifdef Py_USING_UNICODE + else if (PyUnicode_CheckExact(v)) { + PyObject *utf8; + utf8 = PyUnicode_AsUTF8String(v); + if (utf8 == NULL) { + p->depth--; + p->error = WFERR_UNMARSHALLABLE; + return; + } + w_byte(TYPE_UNICODE, p); + n = PyString_GET_SIZE(utf8); + if (n > INT_MAX) { + p->depth--; + p->error = WFERR_UNMARSHALLABLE; + return; + } + w_long((long)n, p); + w_string(PyString_AS_STRING(utf8), (int)n, p); + Py_DECREF(utf8); + } +#endif + else if (PyTuple_CheckExact(v)) { + w_byte(TYPE_TUPLE, p); + n = PyTuple_Size(v); + w_long((long)n, p); + for (i = 0; i < n; i++) { + w_object(PyTuple_GET_ITEM(v, i), p); + } + } + else if (PyList_CheckExact(v)) { + w_byte(TYPE_LIST, p); + n = PyList_GET_SIZE(v); + w_long((long)n, p); + for (i = 0; i < n; i++) { + w_object(PyList_GET_ITEM(v, i), p); + } + } + else if (PyDict_CheckExact(v)) { + Py_ssize_t pos; + PyObject *key, *value; + w_byte(TYPE_DICT, p); + /* This one is NULL object terminated! */ + pos = 0; + while (PyDict_Next(v, &pos, &key, &value)) { + w_object(key, p); + w_object(value, p); + } + w_object((PyObject *)NULL, p); + } + else if (PyAnySet_CheckExact(v)) { + PyObject *value, *it; + + if (PyObject_TypeCheck(v, &PySet_Type)) + w_byte(TYPE_SET, p); + else + w_byte(TYPE_FROZENSET, p); + n = PyObject_Size(v); + if (n == -1) { + p->depth--; + p->error = WFERR_UNMARSHALLABLE; + return; + } + w_long((long)n, p); + it = PyObject_GetIter(v); + if (it == NULL) { + p->depth--; + p->error = WFERR_UNMARSHALLABLE; + return; + } + while ((value = PyIter_Next(it)) != NULL) { + w_object(value, p); + Py_DECREF(value); + } + Py_DECREF(it); + if (PyErr_Occurred()) { + p->depth--; + p->error = WFERR_UNMARSHALLABLE; + return; + } + } + else if (PyCode_Check(v)) { + PyCodeObject *co = (PyCodeObject *)v; + w_byte(TYPE_CODE, p); + w_long(co->co_argcount, p); + w_long(co->co_nlocals, p); + w_long(co->co_stacksize, p); + w_long(co->co_flags, p); + w_object(co->co_code, p); + w_object(co->co_consts, p); + w_object(co->co_names, p); + w_object(co->co_varnames, p); + w_object(co->co_freevars, p); + w_object(co->co_cellvars, p); + w_object(co->co_filename, p); + w_object(co->co_name, p); + w_long(co->co_firstlineno, p); + w_object(co->co_lnotab, p); + } + else if (PyObject_CheckReadBuffer(v)) { + /* Write unknown buffer-style objects as a string */ + char *s; + PyBufferProcs *pb = v->ob_type->tp_as_buffer; + w_byte(TYPE_STRING, p); + n = (*pb->bf_getreadbuffer)(v, 0, (void **)&s); + if (n > INT_MAX) { + p->depth--; + p->error = WFERR_UNMARSHALLABLE; + return; + } + w_long((long)n, p); + w_string(s, (int)n, p); + } + else { + w_byte(TYPE_UNKNOWN, p); + p->error = WFERR_UNMARSHALLABLE; + } + exit: + p->depth--; +} + +/* version currently has no effect for writing longs. */ +void +PyMarshal_WriteLongToFile(long x, FILE *fp, int version) +{ + WFILE wf; + wf.fp = fp; + wf.error = WFERR_OK; + wf.depth = 0; + wf.strings = NULL; + wf.version = version; + w_long(x, &wf); +} + +void +PyMarshal_WriteObjectToFile(PyObject *x, FILE *fp, int version) +{ + WFILE wf; + wf.fp = fp; + wf.error = WFERR_OK; + wf.depth = 0; + wf.strings = (version > 0) ? PyDict_New() : NULL; + wf.version = version; + w_object(x, &wf); + Py_XDECREF(wf.strings); +} + +typedef WFILE RFILE; /* Same struct with different invariants */ + +#define rs_byte(p) (((p)->ptr < (p)->end) ? (unsigned char)*(p)->ptr++ : EOF) + +#define r_byte(p) ((p)->fp ? getc((p)->fp) : rs_byte(p)) + +static int +r_string(char *s, int n, RFILE *p) +{ + if (p->fp != NULL) + /* The result fits into int because it must be <=n. */ + return (int)fread(s, 1, n, p->fp); + if (p->end - p->ptr < n) + n = (int)(p->end - p->ptr); + memcpy(s, p->ptr, n); + p->ptr += n; + return n; +} + +static int +r_short(RFILE *p) +{ + register short x; + x = r_byte(p); + x |= r_byte(p) << 8; + /* Sign-extension, in case short greater than 16 bits */ + x |= -(x & 0x8000); + return x; +} + +static long +r_long(RFILE *p) +{ + register long x; + register FILE *fp = p->fp; + if (fp) { + x = getc(fp); + x |= (long)getc(fp) << 8; + x |= (long)getc(fp) << 16; + x |= (long)getc(fp) << 24; + } + else { + x = rs_byte(p); + x |= (long)rs_byte(p) << 8; + x |= (long)rs_byte(p) << 16; + x |= (long)rs_byte(p) << 24; + } +#if SIZEOF_LONG > 4 + /* Sign extension for 64-bit machines */ + x |= -(x & 0x80000000L); +#endif + return x; +} + +/* r_long64 deals with the TYPE_INT64 code. On a machine with + sizeof(long) > 4, it returns a Python int object, else a Python long + object. Note that w_long64 writes out TYPE_INT if 32 bits is enough, + so there's no inefficiency here in returning a PyLong on 32-bit boxes + for everything written via TYPE_INT64 (i.e., if an int is written via + TYPE_INT64, it *needs* more than 32 bits). +*/ +static PyObject * +r_long64(RFILE *p) +{ + long lo4 = r_long(p); + long hi4 = r_long(p); +#if SIZEOF_LONG > 4 + long x = (hi4 << 32) | (lo4 & 0xFFFFFFFFL); + return PyInt_FromLong(x); +#else + unsigned char buf[8]; + int one = 1; + int is_little_endian = (int)*(char*)&one; + if (is_little_endian) { + memcpy(buf, &lo4, 4); + memcpy(buf+4, &hi4, 4); + } + else { + memcpy(buf, &hi4, 4); + memcpy(buf+4, &lo4, 4); + } + return _PyLong_FromByteArray(buf, 8, is_little_endian, 1); +#endif +} + +static PyObject * +r_PyLong(RFILE *p) +{ + PyLongObject *ob; + int size, i, j, md, shorts_in_top_digit; + long n; + digit d; + + n = r_long(p); + if (n == 0) + return (PyObject *)_PyLong_New(0); + if (n < -INT_MAX || n > INT_MAX) { + PyErr_SetString(PyExc_ValueError, + "bad marshal data (long size out of range)"); + return NULL; + } + + size = 1 + (ABS(n) - 1) / PyLong_MARSHAL_RATIO; + shorts_in_top_digit = 1 + (ABS(n) - 1) % PyLong_MARSHAL_RATIO; + ob = _PyLong_New(size); + if (ob == NULL) + return NULL; + Py_SIZE(ob) = n > 0 ? size : -size; + + for (i = 0; i < size-1; i++) { + d = 0; + for (j=0; j < PyLong_MARSHAL_RATIO; j++) { + md = r_short(p); + if (md < 0 || md > PyLong_MARSHAL_BASE) + goto bad_digit; + d += (digit)md << j*PyLong_MARSHAL_SHIFT; + } + ob->ob_digit[i] = d; + } + d = 0; + for (j=0; j < shorts_in_top_digit; j++) { + md = r_short(p); + if (md < 0 || md > PyLong_MARSHAL_BASE) + goto bad_digit; + /* topmost marshal digit should be nonzero */ + if (md == 0 && j == shorts_in_top_digit - 1) { + Py_DECREF(ob); + PyErr_SetString(PyExc_ValueError, + "bad marshal data (unnormalized long data)"); + return NULL; + } + d += (digit)md << j*PyLong_MARSHAL_SHIFT; + } + /* top digit should be nonzero, else the resulting PyLong won't be + normalized */ + ob->ob_digit[size-1] = d; + return (PyObject *)ob; + bad_digit: + Py_DECREF(ob); + PyErr_SetString(PyExc_ValueError, + "bad marshal data (digit out of range in long)"); + return NULL; +} + + +static PyObject * +r_object(RFILE *p) +{ + /* NULL is a valid return value, it does not necessarily means that + an exception is set. */ + PyObject *v, *v2; + long i, n; + int type = r_byte(p); + PyObject *retval; + + p->depth++; + + if (p->depth > MAX_MARSHAL_STACK_DEPTH) { + p->depth--; + PyErr_SetString(PyExc_ValueError, "recursion limit exceeded"); + return NULL; + } + + switch (type) { + + case EOF: + PyErr_SetString(PyExc_EOFError, + "EOF read where object expected"); + retval = NULL; + break; + + case TYPE_NULL: + retval = NULL; + break; + + case TYPE_NONE: + Py_INCREF(Py_None); + retval = Py_None; + break; + + case TYPE_STOPITER: + Py_INCREF(PyExc_StopIteration); + retval = PyExc_StopIteration; + break; + + case TYPE_ELLIPSIS: + Py_INCREF(Py_Ellipsis); + retval = Py_Ellipsis; + break; + + case TYPE_FALSE: + Py_INCREF(Py_False); + retval = Py_False; + break; + + case TYPE_TRUE: + Py_INCREF(Py_True); + retval = Py_True; + break; + + case TYPE_INT: + retval = PyInt_FromLong(r_long(p)); + break; + + case TYPE_INT64: + retval = r_long64(p); + break; + + case TYPE_LONG: + retval = r_PyLong(p); + break; + + case TYPE_FLOAT: + { + char buf[256]; + double dx; + n = r_byte(p); + if (n == EOF || r_string(buf, (int)n, p) != n) { + PyErr_SetString(PyExc_EOFError, + "EOF read where object expected"); + retval = NULL; + break; + } + buf[n] = '\0'; + dx = PyOS_string_to_double(buf, NULL, NULL); + if (dx == -1.0 && PyErr_Occurred()) { + retval = NULL; + break; + } + retval = PyFloat_FromDouble(dx); + break; + } + + case TYPE_BINARY_FLOAT: + { + unsigned char buf[8]; + double x; + if (r_string((char*)buf, 8, p) != 8) { + PyErr_SetString(PyExc_EOFError, + "EOF read where object expected"); + retval = NULL; + break; + } + x = _PyFloat_Unpack8(buf, 1); + if (x == -1.0 && PyErr_Occurred()) { + retval = NULL; + break; + } + retval = PyFloat_FromDouble(x); + break; + } + +#ifndef WITHOUT_COMPLEX + case TYPE_COMPLEX: + { + char buf[256]; + Py_complex c; + n = r_byte(p); + if (n == EOF || r_string(buf, (int)n, p) != n) { + PyErr_SetString(PyExc_EOFError, + "EOF read where object expected"); + retval = NULL; + break; + } + buf[n] = '\0'; + c.real = PyOS_string_to_double(buf, NULL, NULL); + if (c.real == -1.0 && PyErr_Occurred()) { + retval = NULL; + break; + } + n = r_byte(p); + if (n == EOF || r_string(buf, (int)n, p) != n) { + PyErr_SetString(PyExc_EOFError, + "EOF read where object expected"); + retval = NULL; + break; + } + buf[n] = '\0'; + c.imag = PyOS_string_to_double(buf, NULL, NULL); + if (c.imag == -1.0 && PyErr_Occurred()) { + retval = NULL; + break; + } + retval = PyComplex_FromCComplex(c); + break; + } + + case TYPE_BINARY_COMPLEX: + { + unsigned char buf[8]; + Py_complex c; + if (r_string((char*)buf, 8, p) != 8) { + PyErr_SetString(PyExc_EOFError, + "EOF read where object expected"); + retval = NULL; + break; + } + c.real = _PyFloat_Unpack8(buf, 1); + if (c.real == -1.0 && PyErr_Occurred()) { + retval = NULL; + break; + } + if (r_string((char*)buf, 8, p) != 8) { + PyErr_SetString(PyExc_EOFError, + "EOF read where object expected"); + retval = NULL; + break; + } + c.imag = _PyFloat_Unpack8(buf, 1); + if (c.imag == -1.0 && PyErr_Occurred()) { + retval = NULL; + break; + } + retval = PyComplex_FromCComplex(c); + break; + } +#endif + + case TYPE_INTERNED: + case TYPE_STRING: + n = r_long(p); + if (n < 0 || n > INT_MAX) { + PyErr_SetString(PyExc_ValueError, "bad marshal data (string size out of range)"); + retval = NULL; + break; + } + v = PyString_FromStringAndSize((char *)NULL, n); + if (v == NULL) { + retval = NULL; + break; + } + if (r_string(PyString_AS_STRING(v), (int)n, p) != n) { + Py_DECREF(v); + PyErr_SetString(PyExc_EOFError, + "EOF read where object expected"); + retval = NULL; + break; + } + if (type == TYPE_INTERNED) { + PyString_InternInPlace(&v); + if (PyList_Append(p->strings, v) < 0) { + retval = NULL; + break; + } + } + retval = v; + break; + + case TYPE_STRINGREF: + n = r_long(p); + if (n < 0 || n >= PyList_GET_SIZE(p->strings)) { + PyErr_SetString(PyExc_ValueError, "bad marshal data (string ref out of range)"); + retval = NULL; + break; + } + v = PyList_GET_ITEM(p->strings, n); + Py_INCREF(v); + retval = v; + break; + +#ifdef Py_USING_UNICODE + case TYPE_UNICODE: + { + char *buffer; + + n = r_long(p); + if (n < 0 || n > INT_MAX) { + PyErr_SetString(PyExc_ValueError, "bad marshal data (unicode size out of range)"); + retval = NULL; + break; + } + buffer = PyMem_NEW(char, n); + if (buffer == NULL) { + retval = PyErr_NoMemory(); + break; + } + if (r_string(buffer, (int)n, p) != n) { + PyMem_DEL(buffer); + PyErr_SetString(PyExc_EOFError, + "EOF read where object expected"); + retval = NULL; + break; + } + v = PyUnicode_DecodeUTF8(buffer, n, NULL); + PyMem_DEL(buffer); + retval = v; + break; + } +#endif + + case TYPE_TUPLE: + n = r_long(p); + if (n < 0 || n > INT_MAX) { + PyErr_SetString(PyExc_ValueError, "bad marshal data (tuple size out of range)"); + retval = NULL; + break; + } + v = PyTuple_New((int)n); + if (v == NULL) { + retval = NULL; + break; + } + for (i = 0; i < n; i++) { + v2 = r_object(p); + if ( v2 == NULL ) { + if (!PyErr_Occurred()) + PyErr_SetString(PyExc_TypeError, + "NULL object in marshal data for tuple"); + Py_DECREF(v); + v = NULL; + break; + } + PyTuple_SET_ITEM(v, (int)i, v2); + } + retval = v; + break; + + case TYPE_LIST: + n = r_long(p); + if (n < 0 || n > INT_MAX) { + PyErr_SetString(PyExc_ValueError, "bad marshal data (list size out of range)"); + retval = NULL; + break; + } + v = PyList_New((int)n); + if (v == NULL) { + retval = NULL; + break; + } + for (i = 0; i < n; i++) { + v2 = r_object(p); + if ( v2 == NULL ) { + if (!PyErr_Occurred()) + PyErr_SetString(PyExc_TypeError, + "NULL object in marshal data for list"); + Py_DECREF(v); + v = NULL; + break; + } + PyList_SET_ITEM(v, (int)i, v2); + } + retval = v; + break; + + case TYPE_DICT: + v = PyDict_New(); + if (v == NULL) { + retval = NULL; + break; + } + for (;;) { + PyObject *key, *val; + key = r_object(p); + if (key == NULL) + break; + val = r_object(p); + if (val != NULL) + PyDict_SetItem(v, key, val); + Py_DECREF(key); + Py_XDECREF(val); + } + if (PyErr_Occurred()) { + Py_DECREF(v); + v = NULL; + } + retval = v; + break; + + case TYPE_SET: + case TYPE_FROZENSET: + n = r_long(p); + if (n < 0 || n > INT_MAX) { + PyErr_SetString(PyExc_ValueError, "bad marshal data (set size out of range)"); + retval = NULL; + break; + } + v = (type == TYPE_SET) ? PySet_New(NULL) : PyFrozenSet_New(NULL); + if (v == NULL) { + retval = NULL; + break; + } + for (i = 0; i < n; i++) { + v2 = r_object(p); + if ( v2 == NULL ) { + if (!PyErr_Occurred()) + PyErr_SetString(PyExc_TypeError, + "NULL object in marshal data for set"); + Py_DECREF(v); + v = NULL; + break; + } + if (PySet_Add(v, v2) == -1) { + Py_DECREF(v); + Py_DECREF(v2); + v = NULL; + break; + } + Py_DECREF(v2); + } + retval = v; + break; + + case TYPE_CODE: + if (PyEval_GetRestricted()) { + PyErr_SetString(PyExc_RuntimeError, + "cannot unmarshal code objects in " + "restricted execution mode"); + retval = NULL; + break; + } + else { + int argcount; + int nlocals; + int stacksize; + int flags; + PyObject *code = NULL; + PyObject *consts = NULL; + PyObject *names = NULL; + PyObject *varnames = NULL; + PyObject *freevars = NULL; + PyObject *cellvars = NULL; + PyObject *filename = NULL; + PyObject *name = NULL; + int firstlineno; + PyObject *lnotab = NULL; + + v = NULL; + + /* XXX ignore long->int overflows for now */ + argcount = (int)r_long(p); + nlocals = (int)r_long(p); + stacksize = (int)r_long(p); + flags = (int)r_long(p); + code = r_object(p); + if (code == NULL) + goto code_error; + consts = r_object(p); + if (consts == NULL) + goto code_error; + names = r_object(p); + if (names == NULL) + goto code_error; + varnames = r_object(p); + if (varnames == NULL) + goto code_error; + freevars = r_object(p); + if (freevars == NULL) + goto code_error; + cellvars = r_object(p); + if (cellvars == NULL) + goto code_error; + filename = r_object(p); + if (filename == NULL) + goto code_error; + name = r_object(p); + if (name == NULL) + goto code_error; + firstlineno = (int)r_long(p); + lnotab = r_object(p); + if (lnotab == NULL) + goto code_error; + + v = (PyObject *) PyCode_New( + argcount, nlocals, stacksize, flags, + code, consts, names, varnames, + freevars, cellvars, filename, name, + firstlineno, lnotab); + + code_error: + Py_XDECREF(code); + Py_XDECREF(consts); + Py_XDECREF(names); + Py_XDECREF(varnames); + Py_XDECREF(freevars); + Py_XDECREF(cellvars); + Py_XDECREF(filename); + Py_XDECREF(name); + Py_XDECREF(lnotab); + + } + retval = v; + break; + + default: + /* Bogus data got written, which isn't ideal. + This will let you keep working and recover. */ + PyErr_SetString(PyExc_ValueError, "bad marshal data (unknown type code)"); + retval = NULL; + break; + + } + p->depth--; + return retval; +} + +static PyObject * +read_object(RFILE *p) +{ + PyObject *v; + if (PyErr_Occurred()) { + fprintf(stderr, "XXX readobject called with exception set\n"); + return NULL; + } + v = r_object(p); + if (v == NULL && !PyErr_Occurred()) + PyErr_SetString(PyExc_TypeError, "NULL object in marshal data for object"); + return v; +} + +int +PyMarshal_ReadShortFromFile(FILE *fp) +{ + RFILE rf; + assert(fp); + rf.fp = fp; + rf.strings = NULL; + rf.end = rf.ptr = NULL; + return r_short(&rf); +} + +long +PyMarshal_ReadLongFromFile(FILE *fp) +{ + RFILE rf; + rf.fp = fp; + rf.strings = NULL; + rf.ptr = rf.end = NULL; + return r_long(&rf); +} + +#ifdef HAVE_FSTAT +/* Return size of file in bytes; < 0 if unknown. */ +static off_t +getfilesize(FILE *fp) +{ + struct stat st; + if (fstat(fileno(fp), &st) != 0) + return -1; + else + return st.st_size; +} +#endif + +/* If we can get the size of the file up-front, and it's reasonably small, + * read it in one gulp and delegate to ...FromString() instead. Much quicker + * than reading a byte at a time from file; speeds .pyc imports. + * CAUTION: since this may read the entire remainder of the file, don't + * call it unless you know you're done with the file. + */ +PyObject * +PyMarshal_ReadLastObjectFromFile(FILE *fp) +{ +/* REASONABLE_FILE_LIMIT is by defn something big enough for Tkinter.pyc. */ +#define REASONABLE_FILE_LIMIT (1L << 18) +#ifdef HAVE_FSTAT + off_t filesize; + filesize = getfilesize(fp); + if (filesize > 0 && filesize <= REASONABLE_FILE_LIMIT) { + char* pBuf = (char *)PyMem_MALLOC(filesize); + if (pBuf != NULL) { + PyObject* v; + size_t n; + /* filesize must fit into an int, because it + is smaller than REASONABLE_FILE_LIMIT */ + n = fread(pBuf, 1, (int)filesize, fp); + v = PyMarshal_ReadObjectFromString(pBuf, n); + PyMem_FREE(pBuf); + return v; + } + + } +#endif + /* We don't have fstat, or we do but the file is larger than + * REASONABLE_FILE_LIMIT or malloc failed -- read a byte at a time. + */ + return PyMarshal_ReadObjectFromFile(fp); + +#undef REASONABLE_FILE_LIMIT +} + +PyObject * +PyMarshal_ReadObjectFromFile(FILE *fp) +{ + RFILE rf; + PyObject *result; + rf.fp = fp; + rf.strings = PyList_New(0); + rf.depth = 0; + rf.ptr = rf.end = NULL; + result = r_object(&rf); + Py_DECREF(rf.strings); + return result; +} + +PyObject * +PyMarshal_ReadObjectFromString(char *str, Py_ssize_t len) +{ + RFILE rf; + PyObject *result; + rf.fp = NULL; + rf.ptr = str; + rf.end = str + len; + rf.strings = PyList_New(0); + rf.depth = 0; + result = r_object(&rf); + Py_DECREF(rf.strings); + return result; +} + +static void +set_error(int error) +{ + switch (error) { + case WFERR_NOMEMORY: + PyErr_NoMemory(); + break; + case WFERR_UNMARSHALLABLE: + PyErr_SetString(PyExc_ValueError, "unmarshallable object"); + break; + case WFERR_NESTEDTOODEEP: + default: + PyErr_SetString(PyExc_ValueError, + "object too deeply nested to marshal"); + break; + } +} + +PyObject * +PyMarshal_WriteObjectToString(PyObject *x, int version) +{ + WFILE wf; + wf.fp = NULL; + wf.str = PyString_FromStringAndSize((char *)NULL, 50); + if (wf.str == NULL) + return NULL; + wf.ptr = PyString_AS_STRING((PyStringObject *)wf.str); + wf.end = wf.ptr + PyString_Size(wf.str); + wf.error = WFERR_OK; + wf.depth = 0; + wf.version = version; + wf.strings = (version > 0) ? PyDict_New() : NULL; + w_object(x, &wf); + Py_XDECREF(wf.strings); + if (wf.str != NULL) { + char *base = PyString_AS_STRING((PyStringObject *)wf.str); + if (wf.ptr - base > PY_SSIZE_T_MAX) { + Py_DECREF(wf.str); + PyErr_SetString(PyExc_OverflowError, + "too much marshall data for a string"); + return NULL; + } + if (_PyString_Resize(&wf.str, (Py_ssize_t)(wf.ptr - base))) + return NULL; + } + if (wf.error != WFERR_OK) { + Py_XDECREF(wf.str); + set_error(wf.error); + return NULL; + } + return wf.str; +} + +/* And an interface for Python programs... */ + +static PyObject * +marshal_dump(PyObject *self, PyObject *args) +{ + WFILE wf; + PyObject *x; + PyObject *f; + int version = Py_MARSHAL_VERSION; + if (!PyArg_ParseTuple(args, "OO|i:dump", &x, &f, &version)) + return NULL; + if (!PyFile_Check(f)) { + PyErr_SetString(PyExc_TypeError, + "marshal.dump() 2nd arg must be file"); + return NULL; + } + wf.fp = PyFile_AsFile(f); + wf.str = NULL; + wf.ptr = wf.end = NULL; + wf.error = WFERR_OK; + wf.depth = 0; + wf.strings = (version > 0) ? PyDict_New() : 0; + wf.version = version; + w_object(x, &wf); + Py_XDECREF(wf.strings); + if (wf.error != WFERR_OK) { + set_error(wf.error); + return NULL; + } + Py_INCREF(Py_None); + return Py_None; +} + +PyDoc_STRVAR(dump_doc, +"dump(value, file[, version])\n\ +\n\ +Write the value on the open file. The value must be a supported type.\n\ +The file must be an open file object such as sys.stdout or returned by\n\ +open() or os.popen(). It must be opened in binary mode ('wb' or 'w+b').\n\ +\n\ +If the value has (or contains an object that has) an unsupported type, a\n\ +ValueError exception is raised — but garbage data will also be written\n\ +to the file. The object will not be properly read back by load()\n\ +\n\ +New in version 2.4: The version argument indicates the data format that\n\ +dump should use."); + +static PyObject * +marshal_load(PyObject *self, PyObject *f) +{ + RFILE rf; + PyObject *result; + if (!PyFile_Check(f)) { + PyErr_SetString(PyExc_TypeError, + "marshal.load() arg must be file"); + return NULL; + } + rf.fp = PyFile_AsFile(f); + rf.strings = PyList_New(0); + rf.depth = 0; + result = read_object(&rf); + Py_DECREF(rf.strings); + return result; +} + +PyDoc_STRVAR(load_doc, +"load(file)\n\ +\n\ +Read one value from the open file and return it. If no valid value is\n\ +read (e.g. because the data has a different Python version’s\n\ +incompatible marshal format), raise EOFError, ValueError or TypeError.\n\ +The file must be an open file object opened in binary mode ('rb' or\n\ +'r+b').\n\ +\n\ +Note: If an object containing an unsupported type was marshalled with\n\ +dump(), load() will substitute None for the unmarshallable type."); + + +static PyObject * +marshal_dumps(PyObject *self, PyObject *args) +{ + PyObject *x; + int version = Py_MARSHAL_VERSION; + if (!PyArg_ParseTuple(args, "O|i:dumps", &x, &version)) + return NULL; + return PyMarshal_WriteObjectToString(x, version); +} + +PyDoc_STRVAR(dumps_doc, +"dumps(value[, version])\n\ +\n\ +Return the string that would be written to a file by dump(value, file).\n\ +The value must be a supported type. Raise a ValueError exception if\n\ +value has (or contains an object that has) an unsupported type.\n\ +\n\ +New in version 2.4: The version argument indicates the data format that\n\ +dumps should use."); + + +static PyObject * +marshal_loads(PyObject *self, PyObject *args) +{ + RFILE rf; + char *s; + Py_ssize_t n; + PyObject* result; + if (!PyArg_ParseTuple(args, "s#:loads", &s, &n)) + return NULL; + rf.fp = NULL; + rf.ptr = s; + rf.end = s + n; + rf.strings = PyList_New(0); + rf.depth = 0; + result = read_object(&rf); + Py_DECREF(rf.strings); + return result; +} + +PyDoc_STRVAR(loads_doc, +"loads(string)\n\ +\n\ +Convert the string to a value. If no valid value is found, raise\n\ +EOFError, ValueError or TypeError. Extra characters in the string are\n\ +ignored."); + +static PyMethodDef marshal_methods[] = { + {"dump", marshal_dump, METH_VARARGS, dump_doc}, + {"load", marshal_load, METH_O, load_doc}, + {"dumps", marshal_dumps, METH_VARARGS, dumps_doc}, + {"loads", marshal_loads, METH_VARARGS, loads_doc}, + {NULL, NULL} /* sentinel */ +}; + +PyDoc_STRVAR(marshal_doc, +"This module contains functions that can read and write Python values in\n\ +a binary format. The format is specific to Python, but independent of\n\ +machine architecture issues.\n\ +\n\ +Not all Python object types are supported; in general, only objects\n\ +whose value is independent from a particular invocation of Python can be\n\ +written and read by this module. The following types are supported:\n\ +None, integers, long integers, floating point numbers, strings, Unicode\n\ +objects, tuples, lists, sets, dictionaries, and code objects, where it\n\ +should be understood that tuples, lists and dictionaries are only\n\ +supported as long as the values contained therein are themselves\n\ +supported; and recursive lists and dictionaries should not be written\n\ +(they will cause infinite loops).\n\ +\n\ +Variables:\n\ +\n\ +version -- indicates the format that the module uses. Version 0 is the\n\ + historical format, version 1 (added in Python 2.4) shares interned\n\ + strings and version 2 (added in Python 2.5) uses a binary format for\n\ + floating point numbers. (New in version 2.4)\n\ +\n\ +Functions:\n\ +\n\ +dump() -- write value to a file\n\ +load() -- read value from a file\n\ +dumps() -- write value to a string\n\ +loads() -- read value from a string"); + + +PyMODINIT_FUNC +PyMarshal_Init(void) +{ + PyObject *mod = Py_InitModule3("marshal", marshal_methods, + marshal_doc); + if (mod == NULL) + return; + PyModule_AddIntConstant(mod, "version", Py_MARSHAL_VERSION); +} -- 2.39.2