+++ /dev/null
-/*\r
- Unicode character type helpers.\r
-\r
- Written by Marc-Andre Lemburg (mal@lemburg.com).\r
- Modified for Python 2.0 by Fredrik Lundh (fredrik@pythonware.com)\r
-\r
- Copyright (c) Corporation for National Research Initiatives.\r
-\r
-*/\r
-\r
-#include "Python.h"\r
-#include "unicodeobject.h"\r
-\r
-#define ALPHA_MASK 0x01\r
-#define DECIMAL_MASK 0x02\r
-#define DIGIT_MASK 0x04\r
-#define LOWER_MASK 0x08\r
-#define LINEBREAK_MASK 0x10\r
-#define SPACE_MASK 0x20\r
-#define TITLE_MASK 0x40\r
-#define UPPER_MASK 0x80\r
-#define NODELTA_MASK 0x100\r
-#define NUMERIC_MASK 0x200\r
-\r
-typedef struct {\r
- const Py_UNICODE upper;\r
- const Py_UNICODE lower;\r
- const Py_UNICODE title;\r
- const unsigned char decimal;\r
- const unsigned char digit;\r
- const unsigned short flags;\r
-} _PyUnicode_TypeRecord;\r
-\r
-#include "unicodetype_db.h"\r
-\r
-static const _PyUnicode_TypeRecord *\r
-gettyperecord(Py_UNICODE code)\r
-{\r
- int index;\r
-\r
-#ifdef Py_UNICODE_WIDE\r
- if (code >= 0x110000)\r
- index = 0;\r
- else\r
-#endif\r
- {\r
- index = index1[(code>>SHIFT)];\r
- index = index2[(index<<SHIFT)+(code&((1<<SHIFT)-1))];\r
- }\r
-\r
- return &_PyUnicode_TypeRecords[index];\r
-}\r
-\r
-/* Returns the titlecase Unicode characters corresponding to ch or just\r
- ch if no titlecase mapping is known. */\r
-\r
-Py_UNICODE _PyUnicode_ToTitlecase(register Py_UNICODE ch)\r
-{\r
- const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);\r
- int delta = ctype->title;\r
-\r
- if (ctype->flags & NODELTA_MASK)\r
- return delta;\r
-\r
- if (delta >= 32768)\r
- delta -= 65536;\r
-\r
- return ch + delta;\r
-}\r
-\r
-/* Returns 1 for Unicode characters having the category 'Lt', 0\r
- otherwise. */\r
-\r
-int _PyUnicode_IsTitlecase(Py_UNICODE ch)\r
-{\r
- const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);\r
-\r
- return (ctype->flags & TITLE_MASK) != 0;\r
-}\r
-\r
-/* Returns the integer decimal (0-9) for Unicode characters having\r
- this property, -1 otherwise. */\r
-\r
-int _PyUnicode_ToDecimalDigit(Py_UNICODE ch)\r
-{\r
- const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);\r
-\r
- return (ctype->flags & DECIMAL_MASK) ? ctype->decimal : -1;\r
-}\r
-\r
-int _PyUnicode_IsDecimalDigit(Py_UNICODE ch)\r
-{\r
- if (_PyUnicode_ToDecimalDigit(ch) < 0)\r
- return 0;\r
- return 1;\r
-}\r
-\r
-/* Returns the integer digit (0-9) for Unicode characters having\r
- this property, -1 otherwise. */\r
-\r
-int _PyUnicode_ToDigit(Py_UNICODE ch)\r
-{\r
- const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);\r
-\r
- return (ctype->flags & DIGIT_MASK) ? ctype->digit : -1;\r
-}\r
-\r
-int _PyUnicode_IsDigit(Py_UNICODE ch)\r
-{\r
- if (_PyUnicode_ToDigit(ch) < 0)\r
- return 0;\r
- return 1;\r
-}\r
-\r
-/* Returns the numeric value as double for Unicode characters having\r
- this property, -1.0 otherwise. */\r
-\r
-int _PyUnicode_IsNumeric(Py_UNICODE ch)\r
-{\r
- const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);\r
-\r
- return (ctype->flags & NUMERIC_MASK) != 0;\r
-}\r
-\r
-#ifndef WANT_WCTYPE_FUNCTIONS\r
-\r
-/* Returns 1 for Unicode characters having the category 'Ll', 0\r
- otherwise. */\r
-\r
-int _PyUnicode_IsLowercase(Py_UNICODE ch)\r
-{\r
- const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);\r
-\r
- return (ctype->flags & LOWER_MASK) != 0;\r
-}\r
-\r
-/* Returns 1 for Unicode characters having the category 'Lu', 0\r
- otherwise. */\r
-\r
-int _PyUnicode_IsUppercase(Py_UNICODE ch)\r
-{\r
- const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);\r
-\r
- return (ctype->flags & UPPER_MASK) != 0;\r
-}\r
-\r
-/* Returns the uppercase Unicode characters corresponding to ch or just\r
- ch if no uppercase mapping is known. */\r
-\r
-Py_UNICODE _PyUnicode_ToUppercase(Py_UNICODE ch)\r
-{\r
- const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);\r
- int delta = ctype->upper;\r
- if (ctype->flags & NODELTA_MASK)\r
- return delta;\r
- if (delta >= 32768)\r
- delta -= 65536;\r
- return ch + delta;\r
-}\r
-\r
-/* Returns the lowercase Unicode characters corresponding to ch or just\r
- ch if no lowercase mapping is known. */\r
-\r
-Py_UNICODE _PyUnicode_ToLowercase(Py_UNICODE ch)\r
-{\r
- const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);\r
- int delta = ctype->lower;\r
- if (ctype->flags & NODELTA_MASK)\r
- return delta;\r
- if (delta >= 32768)\r
- delta -= 65536;\r
- return ch + delta;\r
-}\r
-\r
-/* Returns 1 for Unicode characters having the category 'Ll', 'Lu', 'Lt',\r
- 'Lo' or 'Lm', 0 otherwise. */\r
-\r
-int _PyUnicode_IsAlpha(Py_UNICODE ch)\r
-{\r
- const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);\r
-\r
- return (ctype->flags & ALPHA_MASK) != 0;\r
-}\r
-\r
-#else\r
-\r
-/* Export the interfaces using the wchar_t type for portability\r
- reasons: */\r
-\r
-int _PyUnicode_IsLowercase(Py_UNICODE ch)\r
-{\r
- return iswlower(ch);\r
-}\r
-\r
-int _PyUnicode_IsUppercase(Py_UNICODE ch)\r
-{\r
- return iswupper(ch);\r
-}\r
-\r
-Py_UNICODE _PyUnicode_ToLowercase(Py_UNICODE ch)\r
-{\r
- return towlower(ch);\r
-}\r
-\r
-Py_UNICODE _PyUnicode_ToUppercase(Py_UNICODE ch)\r
-{\r
- return towupper(ch);\r
-}\r
-\r
-int _PyUnicode_IsAlpha(Py_UNICODE ch)\r
-{\r
- return iswalpha(ch);\r
-}\r
-\r
-#endif\r