--- /dev/null
+/*\r
+ Unicode character type helpers.\r
+\r
+ Written by Marc-Andre Lemburg (mal@lemburg.com).\r
+ Modified for Python 2.0 by Fredrik Lundh (fredrik@pythonware.com)\r
+\r
+ Copyright (c) Corporation for National Research Initiatives.\r
+\r
+*/\r
+\r
+#include "Python.h"\r
+#include "unicodeobject.h"\r
+\r
+#define ALPHA_MASK 0x01\r
+#define DECIMAL_MASK 0x02\r
+#define DIGIT_MASK 0x04\r
+#define LOWER_MASK 0x08\r
+#define LINEBREAK_MASK 0x10\r
+#define SPACE_MASK 0x20\r
+#define TITLE_MASK 0x40\r
+#define UPPER_MASK 0x80\r
+#define NODELTA_MASK 0x100\r
+#define NUMERIC_MASK 0x200\r
+\r
+typedef struct {\r
+ const Py_UNICODE upper;\r
+ const Py_UNICODE lower;\r
+ const Py_UNICODE title;\r
+ const unsigned char decimal;\r
+ const unsigned char digit;\r
+ const unsigned short flags;\r
+} _PyUnicode_TypeRecord;\r
+\r
+#include "unicodetype_db.h"\r
+\r
+static const _PyUnicode_TypeRecord *\r
+gettyperecord(Py_UNICODE code)\r
+{\r
+ int index;\r
+\r
+#ifdef Py_UNICODE_WIDE\r
+ if (code >= 0x110000)\r
+ index = 0;\r
+ else\r
+#endif\r
+ {\r
+ index = index1[(code>>SHIFT)];\r
+ index = index2[(index<<SHIFT)+(code&((1<<SHIFT)-1))];\r
+ }\r
+\r
+ return &_PyUnicode_TypeRecords[index];\r
+}\r
+\r
+/* Returns the titlecase Unicode characters corresponding to ch or just\r
+ ch if no titlecase mapping is known. */\r
+\r
+Py_UNICODE _PyUnicode_ToTitlecase(register Py_UNICODE ch)\r
+{\r
+ const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);\r
+ int delta = ctype->title;\r
+\r
+ if (ctype->flags & NODELTA_MASK)\r
+ return delta;\r
+\r
+ if (delta >= 32768)\r
+ delta -= 65536;\r
+\r
+ return ch + delta;\r
+}\r
+\r
+/* Returns 1 for Unicode characters having the category 'Lt', 0\r
+ otherwise. */\r
+\r
+int _PyUnicode_IsTitlecase(Py_UNICODE ch)\r
+{\r
+ const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);\r
+\r
+ return (ctype->flags & TITLE_MASK) != 0;\r
+}\r
+\r
+/* Returns the integer decimal (0-9) for Unicode characters having\r
+ this property, -1 otherwise. */\r
+\r
+int _PyUnicode_ToDecimalDigit(Py_UNICODE ch)\r
+{\r
+ const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);\r
+\r
+ return (ctype->flags & DECIMAL_MASK) ? ctype->decimal : -1;\r
+}\r
+\r
+int _PyUnicode_IsDecimalDigit(Py_UNICODE ch)\r
+{\r
+ if (_PyUnicode_ToDecimalDigit(ch) < 0)\r
+ return 0;\r
+ return 1;\r
+}\r
+\r
+/* Returns the integer digit (0-9) for Unicode characters having\r
+ this property, -1 otherwise. */\r
+\r
+int _PyUnicode_ToDigit(Py_UNICODE ch)\r
+{\r
+ const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);\r
+\r
+ return (ctype->flags & DIGIT_MASK) ? ctype->digit : -1;\r
+}\r
+\r
+int _PyUnicode_IsDigit(Py_UNICODE ch)\r
+{\r
+ if (_PyUnicode_ToDigit(ch) < 0)\r
+ return 0;\r
+ return 1;\r
+}\r
+\r
+/* Returns the numeric value as double for Unicode characters having\r
+ this property, -1.0 otherwise. */\r
+\r
+int _PyUnicode_IsNumeric(Py_UNICODE ch)\r
+{\r
+ const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);\r
+\r
+ return (ctype->flags & NUMERIC_MASK) != 0;\r
+}\r
+\r
+#ifndef WANT_WCTYPE_FUNCTIONS\r
+\r
+/* Returns 1 for Unicode characters having the category 'Ll', 0\r
+ otherwise. */\r
+\r
+int _PyUnicode_IsLowercase(Py_UNICODE ch)\r
+{\r
+ const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);\r
+\r
+ return (ctype->flags & LOWER_MASK) != 0;\r
+}\r
+\r
+/* Returns 1 for Unicode characters having the category 'Lu', 0\r
+ otherwise. */\r
+\r
+int _PyUnicode_IsUppercase(Py_UNICODE ch)\r
+{\r
+ const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);\r
+\r
+ return (ctype->flags & UPPER_MASK) != 0;\r
+}\r
+\r
+/* Returns the uppercase Unicode characters corresponding to ch or just\r
+ ch if no uppercase mapping is known. */\r
+\r
+Py_UNICODE _PyUnicode_ToUppercase(Py_UNICODE ch)\r
+{\r
+ const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);\r
+ int delta = ctype->upper;\r
+ if (ctype->flags & NODELTA_MASK)\r
+ return delta;\r
+ if (delta >= 32768)\r
+ delta -= 65536;\r
+ return ch + delta;\r
+}\r
+\r
+/* Returns the lowercase Unicode characters corresponding to ch or just\r
+ ch if no lowercase mapping is known. */\r
+\r
+Py_UNICODE _PyUnicode_ToLowercase(Py_UNICODE ch)\r
+{\r
+ const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);\r
+ int delta = ctype->lower;\r
+ if (ctype->flags & NODELTA_MASK)\r
+ return delta;\r
+ if (delta >= 32768)\r
+ delta -= 65536;\r
+ return ch + delta;\r
+}\r
+\r
+/* Returns 1 for Unicode characters having the category 'Ll', 'Lu', 'Lt',\r
+ 'Lo' or 'Lm', 0 otherwise. */\r
+\r
+int _PyUnicode_IsAlpha(Py_UNICODE ch)\r
+{\r
+ const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);\r
+\r
+ return (ctype->flags & ALPHA_MASK) != 0;\r
+}\r
+\r
+#else\r
+\r
+/* Export the interfaces using the wchar_t type for portability\r
+ reasons: */\r
+\r
+int _PyUnicode_IsLowercase(Py_UNICODE ch)\r
+{\r
+ return iswlower(ch);\r
+}\r
+\r
+int _PyUnicode_IsUppercase(Py_UNICODE ch)\r
+{\r
+ return iswupper(ch);\r
+}\r
+\r
+Py_UNICODE _PyUnicode_ToLowercase(Py_UNICODE ch)\r
+{\r
+ return towlower(ch);\r
+}\r
+\r
+Py_UNICODE _PyUnicode_ToUppercase(Py_UNICODE ch)\r
+{\r
+ return towupper(ch);\r
+}\r
+\r
+int _PyUnicode_IsAlpha(Py_UNICODE ch)\r
+{\r
+ return iswalpha(ch);\r
+}\r
+\r
+#endif\r