From d8062f1573f7aaab450dc1735c2b6c9d07f52e5e Mon Sep 17 00:00:00 2001 From: Eric Dong Date: Tue, 8 Sep 2015 07:04:28 +0000 Subject: [PATCH] Rollback the patch which has error changes. Contributed-under: TianoCore Contribution Agreement 1.0 Signed-off-by: Eric Dong git-svn-id: https://svn.code.sf.net/p/edk2/code/trunk/edk2@18412 6f19259b-4bc3-4df7-8a09-765794883524 --- MdeModulePkg/MdeModulePkg.dsc | 1 - .../RegularExpressionDxe/Oniguruma/AUTHORS | 1 - .../RegularExpressionDxe/Oniguruma/COPYING | 28 - .../Oniguruma/OnigurumaIntrinsics.c | 53 - .../Oniguruma/OnigurumaUefiPort.c | 32 - .../Oniguruma/OnigurumaUefiPort.h | 66 - .../RegularExpressionDxe/Oniguruma/README | 189 - .../Oniguruma/enc/ascii.c | 58 - .../Oniguruma/enc/unicode.c | 11374 ---------------- .../Oniguruma/enc/utf16_le.c | 226 - .../RegularExpressionDxe/Oniguruma/oniggnu.h | 85 - .../Oniguruma/onigposix.h | 169 - .../Oniguruma/oniguruma.h | 829 -- .../RegularExpressionDxe/Oniguruma/regcomp.c | 6291 --------- .../RegularExpressionDxe/Oniguruma/regenc.c | 904 -- .../RegularExpressionDxe/Oniguruma/regenc.h | 189 - .../RegularExpressionDxe/Oniguruma/regerror.c | 394 - .../RegularExpressionDxe/Oniguruma/regexec.c | 3810 ------ .../RegularExpressionDxe/Oniguruma/reggnu.c | 169 - .../RegularExpressionDxe/Oniguruma/regint.h | 820 -- .../RegularExpressionDxe/Oniguruma/regparse.c | 5556 -------- .../RegularExpressionDxe/Oniguruma/regparse.h | 353 - .../Oniguruma/regposerr.c | 102 - .../RegularExpressionDxe/Oniguruma/regposix.c | 305 - .../Oniguruma/regsyntax.c | 315 - .../RegularExpressionDxe/Oniguruma/regtrav.c | 76 - .../Oniguruma/regversion.c | 56 - .../RegularExpressionDxe/Oniguruma/st.c | 579 - .../RegularExpressionDxe/Oniguruma/st.h | 68 - .../RegularExpressionDxe.c | 321 - .../RegularExpressionDxe.h | 130 - .../RegularExpressionDxe.inf | 98 - 32 files changed, 33647 deletions(-) delete mode 100644 MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/AUTHORS delete mode 100644 MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/COPYING delete mode 100644 MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/OnigurumaIntrinsics.c delete mode 100644 MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/OnigurumaUefiPort.c delete mode 100644 MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/OnigurumaUefiPort.h delete mode 100644 MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/README delete mode 100644 MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/enc/ascii.c delete mode 100644 MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/enc/unicode.c delete mode 100644 MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/enc/utf16_le.c delete mode 100644 MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/oniggnu.h delete mode 100644 MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/onigposix.h delete mode 100644 MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/oniguruma.h delete mode 100644 MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/regcomp.c delete mode 100644 MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/regenc.c delete mode 100644 MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/regenc.h delete mode 100644 MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/regerror.c delete mode 100644 MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/regexec.c delete mode 100644 MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/reggnu.c delete mode 100644 MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/regint.h delete mode 100644 MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/regparse.c delete mode 100644 MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/regparse.h delete mode 100644 MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/regposerr.c delete mode 100644 MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/regposix.c delete mode 100644 MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/regsyntax.c delete mode 100644 MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/regtrav.c delete mode 100644 MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/regversion.c delete mode 100644 MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/st.c delete mode 100644 MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/st.h delete mode 100644 MdeModulePkg/Universal/RegularExpressionDxe/RegularExpressionDxe.c delete mode 100644 MdeModulePkg/Universal/RegularExpressionDxe/RegularExpressionDxe.h delete mode 100644 MdeModulePkg/Universal/RegularExpressionDxe/RegularExpressionDxe.inf diff --git a/MdeModulePkg/MdeModulePkg.dsc b/MdeModulePkg/MdeModulePkg.dsc index 8a76a4c7a3..3e04477318 100644 --- a/MdeModulePkg/MdeModulePkg.dsc +++ b/MdeModulePkg/MdeModulePkg.dsc @@ -310,7 +310,6 @@ MdeModulePkg/Universal/MonotonicCounterRuntimeDxe/MonotonicCounterRuntimeDxe.inf MdeModulePkg/Universal/ResetSystemRuntimeDxe/ResetSystemRuntimeDxe.inf MdeModulePkg/Universal/SmbiosDxe/SmbiosDxe.inf - MdeModulePkg/Universal/RegularExpressionDxe/RegularExpressionDxe.inf MdeModulePkg/Universal/Network/ArpDxe/ArpDxe.inf MdeModulePkg/Universal/Network/Dhcp4Dxe/Dhcp4Dxe.inf diff --git a/MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/AUTHORS b/MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/AUTHORS deleted file mode 100644 index 3b3287e7d4..0000000000 --- a/MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/AUTHORS +++ /dev/null @@ -1 +0,0 @@ -sndgk393 AT ybb DOT ne DOT jp (K.Kosako) diff --git a/MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/COPYING b/MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/COPYING deleted file mode 100644 index f3d17ae519..0000000000 --- a/MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/COPYING +++ /dev/null @@ -1,28 +0,0 @@ -Oniguruma LICENSE ------------------ - -/*- - * Copyright (c) 2002-2007 K.Kosako - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ diff --git a/MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/OnigurumaIntrinsics.c b/MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/OnigurumaIntrinsics.c deleted file mode 100644 index a60b647c36..0000000000 --- a/MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/OnigurumaIntrinsics.c +++ /dev/null @@ -1,53 +0,0 @@ -/** @file - - Provide intrinsics within Oniguruma - - Copyright (c) 2015, Hewlett-Packard Development Company, L.P.
- - This program and the accompanying materials are licensed and made available - under the terms and conditions of the BSD License that accompanies this - distribution. The full text of the license may be found at - http://opensource.org/licenses/bsd-license.php. - - THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS, WITHOUT - WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED. -**/ - -#include - -// -// From CryptoPkg/IntrinsicLib -// - -/* Copies bytes between buffers */ -#pragma function(memcpy) -void * memcpy (void *dest, const void *src, unsigned int count) -{ - return CopyMem (dest, src, (UINTN)count); -} - -/* Sets buffers to a specified character */ -#pragma function(memset) -void * memset (void *dest, char ch, unsigned int count) -{ - // - // NOTE: Here we use one base implementation for memset, instead of the direct - // optimized SetMem() wrapper. Because the IntrinsicLib has to be built - // without whole program optimization option, and there will be some - // potential register usage errors when calling other optimized codes. - // - - // - // Declare the local variables that actually move the data elements as - // volatile to prevent the optimizer from replacing this function with - // the intrinsic memset() - // - volatile UINT8 *Pointer; - - Pointer = (UINT8 *)dest; - while (count-- != 0) { - *(Pointer++) = ch; - } - - return dest; -} diff --git a/MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/OnigurumaUefiPort.c b/MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/OnigurumaUefiPort.c deleted file mode 100644 index 98822f3648..0000000000 --- a/MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/OnigurumaUefiPort.c +++ /dev/null @@ -1,32 +0,0 @@ -/** @file - - Module to rewrite stdlib references within Oniguruma - - Copyright (c) 2014-2015, Hewlett-Packard Development Company, L.P.
- - This program and the accompanying materials are licensed and made available - under the terms and conditions of the BSD License that accompanies this - distribution. The full text of the license may be found at - http://opensource.org/licenses/bsd-license.php. - - THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS, WITHOUT - WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED. -**/ -#include "OnigurumaUefiPort.h" - -int sprintf(char *str, char const *fmt, ...) -{ - VA_LIST Marker; - int NumberOfPrinted; - - VA_START (Marker, fmt); - NumberOfPrinted = (int)AsciiVSPrint (str, 1000000, fmt, Marker); - VA_END (Marker); - - return NumberOfPrinted; -} - -int OnigStrCmp (char* Str1, char* Str2) -{ - return (int)AsciiStrCmp (Str1, Str2); -} diff --git a/MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/OnigurumaUefiPort.h b/MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/OnigurumaUefiPort.h deleted file mode 100644 index 504c9fb4f7..0000000000 --- a/MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/OnigurumaUefiPort.h +++ /dev/null @@ -1,66 +0,0 @@ -/** @file - - Module to rewrite stdlib references within Oniguruma - - Copyright (c) 2014-2015, Hewlett-Packard Development Company, L.P.
- - This program and the accompanying materials are licensed and made available - under the terms and conditions of the BSD License that accompanies this - distribution. The full text of the license may be found at - http://opensource.org/licenses/bsd-license.php. - - THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS, WITHOUT - WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED. -**/ -#ifndef ONIGURUMA_UEFI_PORT_H -#define ONIGURUMA_UEFI_PORT_H - -#include -#include -#include -#include -#include - -#undef _WIN32 -#define P_(args) args - -#define SIZEOF_LONG sizeof(long) -#define SIZEOF_INT sizeof(int) -typedef UINTN size_t; - -#define malloc(n) AllocatePool(n) -#define calloc(n,s) AllocateZeroPool((n)*(s)) -#define free(p) FreePool(p) -#define realloc(OldPtr,NewSize,OldSize) ReallocatePool(OldSize,NewSize,OldPtr) -#define xmemmove(Dest,Src,Length) CopyMem(Dest,Src,Length) -#define xmemcpy(Dest,Src,Length) CopyMem(Dest,Src,Length) -#define xmemset(Buffer,Value,Length) SetMem(Buffer,Length,Value) - -#define va_init_list(a,b) VA_START(a,b) -#define va_list VA_LIST -#define va_arg(a,b) VA_ARG(a,b) -#define va_end(a) VA_END(a) - -#define FILE VOID -#define stdout NULL -#define fprintf(...) -#define fputs(a,b) -#define vsnprintf (int)AsciiVSPrint -#define _vsnprintf vsnprintf - -#define setlocale(a,b) -#define LC_ALL 0 - -//#define MAX_STRING_SIZE 0x1000 -#define strlen_s(String,MaxSize) AsciiStrnLenS (String, MaxSize) -#define strcat_s(Dest,MaxSize,Src) AsciiStrCatS (Dest, MaxSize, Src) -#define strncpy_s(Dest,MaxSize,Src,Length) AsciiStrnCpyS (Dest, MaxSize, Src, Length) -#define strcmp OnigStrCmp - -int OnigStrCmp (char* Str1, char* Str2); - -int sprintf (char *str, char const *fmt, ...); - -#define exit(n) ASSERT(FALSE); - -#endif // !ONIGURUMA_UEFI_PORT_H diff --git a/MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/README b/MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/README deleted file mode 100644 index 18952eed00..0000000000 --- a/MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/README +++ /dev/null @@ -1,189 +0,0 @@ -README 2007/05/31 - -Oniguruma ---- (C) K.Kosako - -http://www.geocities.jp/kosako3/oniguruma/ - -Oniguruma is a regular expressions library. -The characteristics of this library is that different character encoding -for every regular expression object can be specified. - -Supported character encodings: - - ASCII, UTF-8, UTF-16BE, UTF-16LE, UTF-32BE, UTF-32LE, - EUC-JP, EUC-TW, EUC-KR, EUC-CN, - Shift_JIS, Big5, GB18030, KOI8-R, CP1251, - ISO-8859-1, ISO-8859-2, ISO-8859-3, ISO-8859-4, ISO-8859-5, - ISO-8859-6, ISO-8859-7, ISO-8859-8, ISO-8859-9, ISO-8859-10, - ISO-8859-11, ISO-8859-13, ISO-8859-14, ISO-8859-15, ISO-8859-16 - -* GB18030: contributed by KUBO Takehiro -* CP1251: contributed by Byte ------------------------------------------------------------- - -License - - BSD license. - - -Install - - Case 1: Unix and Cygwin platform - - 1. ./configure - 2. make - 3. make install - - * uninstall - - make uninstall - - * test (ASCII/EUC-JP) - - make atest - - * configuration check - - onig-config --cflags - onig-config --libs - onig-config --prefix - onig-config --exec-prefix - - - - Case 2: Win32 platform (VC++) - - 1. copy win32\Makefile Makefile - 2. copy win32\config.h config.h - 3. nmake - - onig_s.lib: static link library - onig.dll: dynamic link library - - * test (ASCII/Shift_JIS) - 4. copy win32\testc.c testc.c - 5. nmake ctest - - - -Regular Expressions - - See doc/RE (or doc/RE.ja for Japanese). - - -Usage - - Include oniguruma.h in your program. (Oniguruma API) - See doc/API for Oniguruma API. - - If you want to disable UChar type (== unsigned char) definition - in oniguruma.h, define ONIG_ESCAPE_UCHAR_COLLISION and then - include oniguruma.h. - - If you want to disable regex_t type definition in oniguruma.h, - define ONIG_ESCAPE_REGEX_T_COLLISION and then include oniguruma.h. - - Example of the compiling/linking command line in Unix or Cygwin, - (prefix == /usr/local case) - - cc sample.c -L/usr/local/lib -lonig - - - If you want to use static link library(onig_s.lib) in Win32, - add option -DONIG_EXTERN=extern to C compiler. - - - -Sample Programs - - sample/simple.c example of the minimum (Oniguruma API) - sample/names.c example of the named group callback. - sample/encode.c example of some encodings. - sample/listcap.c example of the capture history. - sample/posix.c POSIX API sample. - sample/sql.c example of the variable meta characters. - (SQL-like pattern matching) - -Test Programs - sample/syntax.c Perl, Java and ASIS syntax test. - sample/crnl.c --enable-crnl-as-line-terminator test - - -Source Files - - oniguruma.h Oniguruma API header file. (public) - onig-config.in configuration check program template. - - regenc.h character encodings framework header file. - regint.h internal definitions - regparse.h internal definitions for regparse.c and regcomp.c - regcomp.c compiling and optimization functions - regenc.c character encodings framework. - regerror.c error message function - regext.c extended API functions. (deluxe version API) - regexec.c search and match functions - regparse.c parsing functions. - regsyntax.c pattern syntax functions and built-in syntax definitions. - regtrav.c capture history tree data traverse functions. - regversion.c version info function. - st.h hash table functions header file - st.c hash table functions - - oniggnu.h GNU regex API header file. (public) - reggnu.c GNU regex API functions - - onigposix.h POSIX API header file. (public) - regposerr.c POSIX error message function. - regposix.c POSIX API functions. - - enc/mktable.c character type table generator. - enc/ascii.c ASCII encoding. - enc/euc_jp.c EUC-JP encoding. - enc/euc_tw.c EUC-TW encoding. - enc/euc_kr.c EUC-KR, EUC-CN encoding. - enc/sjis.c Shift_JIS encoding. - enc/big5.c Big5 encoding. - enc/gb18030.c GB18030 encoding. - enc/koi8.c KOI8 encoding. - enc/koi8_r.c KOI8-R encoding. - enc/cp1251.c CP1251 encoding. - enc/iso8859_1.c ISO-8859-1 encoding. (Latin-1) - enc/iso8859_2.c ISO-8859-2 encoding. (Latin-2) - enc/iso8859_3.c ISO-8859-3 encoding. (Latin-3) - enc/iso8859_4.c ISO-8859-4 encoding. (Latin-4) - enc/iso8859_5.c ISO-8859-5 encoding. (Cyrillic) - enc/iso8859_6.c ISO-8859-6 encoding. (Arabic) - enc/iso8859_7.c ISO-8859-7 encoding. (Greek) - enc/iso8859_8.c ISO-8859-8 encoding. (Hebrew) - enc/iso8859_9.c ISO-8859-9 encoding. (Latin-5 or Turkish) - enc/iso8859_10.c ISO-8859-10 encoding. (Latin-6 or Nordic) - enc/iso8859_11.c ISO-8859-11 encoding. (Thai) - enc/iso8859_13.c ISO-8859-13 encoding. (Latin-7 or Baltic Rim) - enc/iso8859_14.c ISO-8859-14 encoding. (Latin-8 or Celtic) - enc/iso8859_15.c ISO-8859-15 encoding. (Latin-9 or West European with Euro) - enc/iso8859_16.c ISO-8859-16 encoding. - (Latin-10 or South-Eastern European with Euro) - enc/utf8.c UTF-8 encoding. - enc/utf16_be.c UTF-16BE encoding. - enc/utf16_le.c UTF-16LE encoding. - enc/utf32_be.c UTF-32BE encoding. - enc/utf32_le.c UTF-32LE encoding. - enc/unicode.c Unicode information data. - - win32/Makefile Makefile for Win32 (VC++) - win32/config.h config.h for Win32 - - - -ToDo - - ? case fold flag: Katakana <-> Hiragana. - ? add ONIG_OPTION_NOTBOS/NOTEOS. (\A, \z, \Z) - ?? \X (== \PM\pM*) - ?? implement syntax behavior ONIG_SYN_CONTEXT_INDEP_ANCHORS. - ?? transmission stopper. (return ONIG_STOP from match_at()) - -and I'm thankful to Akinori MUSHA. - - -Mail Address: K.Kosako diff --git a/MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/enc/ascii.c b/MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/enc/ascii.c deleted file mode 100644 index 25afc980be..0000000000 --- a/MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/enc/ascii.c +++ /dev/null @@ -1,58 +0,0 @@ -/********************************************************************** - ascii.c - Oniguruma (regular expression library) -**********************************************************************/ -/*- - * Copyright (c) 2002-2006 K.Kosako - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include "regenc.h" - -static int -ascii_is_code_ctype(OnigCodePoint code, unsigned int ctype) -{ - if (code < 128) - return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype); - else - return FALSE; -} - -OnigEncodingType OnigEncodingASCII = { - onigenc_single_byte_mbc_enc_len, - "US-ASCII", /* name */ - 1, /* max byte length */ - 1, /* min byte length */ - onigenc_is_mbc_newline_0x0a, - onigenc_single_byte_mbc_to_code, - onigenc_single_byte_code_to_mbclen, - onigenc_single_byte_code_to_mbc, - onigenc_ascii_mbc_case_fold, - onigenc_ascii_apply_all_case_fold, - onigenc_ascii_get_case_fold_codes_by_str, - onigenc_minimum_property_name_to_ctype, - ascii_is_code_ctype, - onigenc_not_support_get_ctype_code_range, - onigenc_single_byte_left_adjust_char_head, - onigenc_always_true_is_allowed_reverse_match -}; diff --git a/MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/enc/unicode.c b/MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/enc/unicode.c deleted file mode 100644 index 70a1a5207a..0000000000 --- a/MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/enc/unicode.c +++ /dev/null @@ -1,11374 +0,0 @@ -/********************************************************************** - unicode.c - Oniguruma (regular expression library) -**********************************************************************/ -/*- - * Copyright (c) 2002-2013 K.Kosako - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include "regint.h" - -#define ONIGENC_IS_UNICODE_ISO_8859_1_CTYPE(code,ctype) \ - ((EncUNICODE_ISO_8859_1_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0) -#if 0 -#define ONIGENC_IS_UNICODE_ISO_8859_1_BIT_CTYPE(code,cbit) \ - ((EncUNICODE_ISO_8859_1_CtypeTable[code] & (cbit)) != 0) -#endif - -static const unsigned short EncUNICODE_ISO_8859_1_CtypeTable[256] = { - 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, - 0x4008, 0x428c, 0x4289, 0x4288, 0x4288, 0x4288, 0x4008, 0x4008, - 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, - 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, - 0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, - 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, - 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, - 0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, - 0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2, - 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, - 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, - 0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0, - 0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2, - 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, - 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, - 0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008, - 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0288, 0x0008, 0x0008, - 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, - 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, - 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, - 0x0284, 0x01a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, - 0x00a0, 0x00a0, 0x30e2, 0x01a0, 0x00a0, 0x00a8, 0x00a0, 0x00a0, - 0x00a0, 0x00a0, 0x10a0, 0x10a0, 0x00a0, 0x30e2, 0x00a0, 0x01a0, - 0x00a0, 0x10a0, 0x30e2, 0x01a0, 0x10a0, 0x10a0, 0x10a0, 0x01a0, - 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, - 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, - 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x00a0, - 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x30e2, - 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, - 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, - 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x00a0, - 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2 -}; - -/* 'NEWLINE' */ -static const OnigCodePoint CR_NEWLINE[] = { - 1, - 0x000a, 0x000a -}; /* CR_NEWLINE */ - -/* 'Alpha': [[:Alpha:]] */ -static const OnigCodePoint CR_Alpha[] = { - 418, - 0x0041, 0x005a, - 0x0061, 0x007a, - 0x00aa, 0x00aa, - 0x00b5, 0x00b5, - 0x00ba, 0x00ba, - 0x00c0, 0x00d6, - 0x00d8, 0x00f6, - 0x00f8, 0x0241, - 0x0250, 0x02c1, - 0x02c6, 0x02d1, - 0x02e0, 0x02e4, - 0x02ee, 0x02ee, - 0x0300, 0x036f, - 0x037a, 0x037a, - 0x0386, 0x0386, - 0x0388, 0x038a, - 0x038c, 0x038c, - 0x038e, 0x03a1, - 0x03a3, 0x03ce, - 0x03d0, 0x03f5, - 0x03f7, 0x0481, - 0x0483, 0x0486, - 0x0488, 0x04ce, - 0x04d0, 0x04f9, - 0x0500, 0x050f, - 0x0531, 0x0556, - 0x0559, 0x0559, - 0x0561, 0x0587, - 0x0591, 0x05b9, - 0x05bb, 0x05bd, - 0x05bf, 0x05bf, - 0x05c1, 0x05c2, - 0x05c4, 0x05c5, - 0x05c7, 0x05c7, - 0x05d0, 0x05ea, - 0x05f0, 0x05f2, - 0x0610, 0x0615, - 0x0621, 0x063a, - 0x0640, 0x065e, - 0x066e, 0x06d3, - 0x06d5, 0x06dc, - 0x06de, 0x06e8, - 0x06ea, 0x06ef, - 0x06fa, 0x06fc, - 0x06ff, 0x06ff, - 0x0710, 0x074a, - 0x074d, 0x076d, - 0x0780, 0x07b1, - 0x0901, 0x0939, - 0x093c, 0x094d, - 0x0950, 0x0954, - 0x0958, 0x0963, - 0x097d, 0x097d, - 0x0981, 0x0983, - 0x0985, 0x098c, - 0x098f, 0x0990, - 0x0993, 0x09a8, - 0x09aa, 0x09b0, - 0x09b2, 0x09b2, - 0x09b6, 0x09b9, - 0x09bc, 0x09c4, - 0x09c7, 0x09c8, - 0x09cb, 0x09ce, - 0x09d7, 0x09d7, - 0x09dc, 0x09dd, - 0x09df, 0x09e3, - 0x09f0, 0x09f1, - 0x0a01, 0x0a03, - 0x0a05, 0x0a0a, - 0x0a0f, 0x0a10, - 0x0a13, 0x0a28, - 0x0a2a, 0x0a30, - 0x0a32, 0x0a33, - 0x0a35, 0x0a36, - 0x0a38, 0x0a39, - 0x0a3c, 0x0a3c, - 0x0a3e, 0x0a42, - 0x0a47, 0x0a48, - 0x0a4b, 0x0a4d, - 0x0a59, 0x0a5c, - 0x0a5e, 0x0a5e, - 0x0a70, 0x0a74, - 0x0a81, 0x0a83, - 0x0a85, 0x0a8d, - 0x0a8f, 0x0a91, - 0x0a93, 0x0aa8, - 0x0aaa, 0x0ab0, - 0x0ab2, 0x0ab3, - 0x0ab5, 0x0ab9, - 0x0abc, 0x0ac5, - 0x0ac7, 0x0ac9, - 0x0acb, 0x0acd, - 0x0ad0, 0x0ad0, - 0x0ae0, 0x0ae3, - 0x0b01, 0x0b03, - 0x0b05, 0x0b0c, - 0x0b0f, 0x0b10, - 0x0b13, 0x0b28, - 0x0b2a, 0x0b30, - 0x0b32, 0x0b33, - 0x0b35, 0x0b39, - 0x0b3c, 0x0b43, - 0x0b47, 0x0b48, - 0x0b4b, 0x0b4d, - 0x0b56, 0x0b57, - 0x0b5c, 0x0b5d, - 0x0b5f, 0x0b61, - 0x0b71, 0x0b71, - 0x0b82, 0x0b83, - 0x0b85, 0x0b8a, - 0x0b8e, 0x0b90, - 0x0b92, 0x0b95, - 0x0b99, 0x0b9a, - 0x0b9c, 0x0b9c, - 0x0b9e, 0x0b9f, - 0x0ba3, 0x0ba4, - 0x0ba8, 0x0baa, - 0x0bae, 0x0bb9, - 0x0bbe, 0x0bc2, - 0x0bc6, 0x0bc8, - 0x0bca, 0x0bcd, - 0x0bd7, 0x0bd7, - 0x0c01, 0x0c03, - 0x0c05, 0x0c0c, - 0x0c0e, 0x0c10, - 0x0c12, 0x0c28, - 0x0c2a, 0x0c33, - 0x0c35, 0x0c39, - 0x0c3e, 0x0c44, - 0x0c46, 0x0c48, - 0x0c4a, 0x0c4d, - 0x0c55, 0x0c56, - 0x0c60, 0x0c61, - 0x0c82, 0x0c83, - 0x0c85, 0x0c8c, - 0x0c8e, 0x0c90, - 0x0c92, 0x0ca8, - 0x0caa, 0x0cb3, - 0x0cb5, 0x0cb9, - 0x0cbc, 0x0cc4, - 0x0cc6, 0x0cc8, - 0x0cca, 0x0ccd, - 0x0cd5, 0x0cd6, - 0x0cde, 0x0cde, - 0x0ce0, 0x0ce1, - 0x0d02, 0x0d03, - 0x0d05, 0x0d0c, - 0x0d0e, 0x0d10, - 0x0d12, 0x0d28, - 0x0d2a, 0x0d39, - 0x0d3e, 0x0d43, - 0x0d46, 0x0d48, - 0x0d4a, 0x0d4d, - 0x0d57, 0x0d57, - 0x0d60, 0x0d61, - 0x0d82, 0x0d83, - 0x0d85, 0x0d96, - 0x0d9a, 0x0db1, - 0x0db3, 0x0dbb, - 0x0dbd, 0x0dbd, - 0x0dc0, 0x0dc6, - 0x0dca, 0x0dca, - 0x0dcf, 0x0dd4, - 0x0dd6, 0x0dd6, - 0x0dd8, 0x0ddf, - 0x0df2, 0x0df3, - 0x0e01, 0x0e3a, - 0x0e40, 0x0e4e, - 0x0e81, 0x0e82, - 0x0e84, 0x0e84, - 0x0e87, 0x0e88, - 0x0e8a, 0x0e8a, - 0x0e8d, 0x0e8d, - 0x0e94, 0x0e97, - 0x0e99, 0x0e9f, - 0x0ea1, 0x0ea3, - 0x0ea5, 0x0ea5, - 0x0ea7, 0x0ea7, - 0x0eaa, 0x0eab, - 0x0ead, 0x0eb9, - 0x0ebb, 0x0ebd, - 0x0ec0, 0x0ec4, - 0x0ec6, 0x0ec6, - 0x0ec8, 0x0ecd, - 0x0edc, 0x0edd, - 0x0f00, 0x0f00, - 0x0f18, 0x0f19, - 0x0f35, 0x0f35, - 0x0f37, 0x0f37, - 0x0f39, 0x0f39, - 0x0f3e, 0x0f47, - 0x0f49, 0x0f6a, - 0x0f71, 0x0f84, - 0x0f86, 0x0f8b, - 0x0f90, 0x0f97, - 0x0f99, 0x0fbc, - 0x0fc6, 0x0fc6, - 0x1000, 0x1021, - 0x1023, 0x1027, - 0x1029, 0x102a, - 0x102c, 0x1032, - 0x1036, 0x1039, - 0x1050, 0x1059, - 0x10a0, 0x10c5, - 0x10d0, 0x10fa, - 0x10fc, 0x10fc, - 0x1100, 0x1159, - 0x115f, 0x11a2, - 0x11a8, 0x11f9, - 0x1200, 0x1248, - 0x124a, 0x124d, - 0x1250, 0x1256, - 0x1258, 0x1258, - 0x125a, 0x125d, - 0x1260, 0x1288, - 0x128a, 0x128d, - 0x1290, 0x12b0, - 0x12b2, 0x12b5, - 0x12b8, 0x12be, - 0x12c0, 0x12c0, - 0x12c2, 0x12c5, - 0x12c8, 0x12d6, - 0x12d8, 0x1310, - 0x1312, 0x1315, - 0x1318, 0x135a, - 0x135f, 0x135f, - 0x1380, 0x138f, - 0x13a0, 0x13f4, - 0x1401, 0x166c, - 0x166f, 0x1676, - 0x1681, 0x169a, - 0x16a0, 0x16ea, - 0x1700, 0x170c, - 0x170e, 0x1714, - 0x1720, 0x1734, - 0x1740, 0x1753, - 0x1760, 0x176c, - 0x176e, 0x1770, - 0x1772, 0x1773, - 0x1780, 0x17b3, - 0x17b6, 0x17d3, - 0x17d7, 0x17d7, - 0x17dc, 0x17dd, - 0x180b, 0x180d, - 0x1820, 0x1877, - 0x1880, 0x18a9, - 0x1900, 0x191c, - 0x1920, 0x192b, - 0x1930, 0x193b, - 0x1950, 0x196d, - 0x1970, 0x1974, - 0x1980, 0x19a9, - 0x19b0, 0x19c9, - 0x1a00, 0x1a1b, - 0x1d00, 0x1dc3, - 0x1e00, 0x1e9b, - 0x1ea0, 0x1ef9, - 0x1f00, 0x1f15, - 0x1f18, 0x1f1d, - 0x1f20, 0x1f45, - 0x1f48, 0x1f4d, - 0x1f50, 0x1f57, - 0x1f59, 0x1f59, - 0x1f5b, 0x1f5b, - 0x1f5d, 0x1f5d, - 0x1f5f, 0x1f7d, - 0x1f80, 0x1fb4, - 0x1fb6, 0x1fbc, - 0x1fbe, 0x1fbe, - 0x1fc2, 0x1fc4, - 0x1fc6, 0x1fcc, - 0x1fd0, 0x1fd3, - 0x1fd6, 0x1fdb, - 0x1fe0, 0x1fec, - 0x1ff2, 0x1ff4, - 0x1ff6, 0x1ffc, - 0x2071, 0x2071, - 0x207f, 0x207f, - 0x2090, 0x2094, - 0x20d0, 0x20eb, - 0x2102, 0x2102, - 0x2107, 0x2107, - 0x210a, 0x2113, - 0x2115, 0x2115, - 0x2119, 0x211d, - 0x2124, 0x2124, - 0x2126, 0x2126, - 0x2128, 0x2128, - 0x212a, 0x212d, - 0x212f, 0x2131, - 0x2133, 0x2139, - 0x213c, 0x213f, - 0x2145, 0x2149, - 0x2c00, 0x2c2e, - 0x2c30, 0x2c5e, - 0x2c80, 0x2ce4, - 0x2d00, 0x2d25, - 0x2d30, 0x2d65, - 0x2d6f, 0x2d6f, - 0x2d80, 0x2d96, - 0x2da0, 0x2da6, - 0x2da8, 0x2dae, - 0x2db0, 0x2db6, - 0x2db8, 0x2dbe, - 0x2dc0, 0x2dc6, - 0x2dc8, 0x2dce, - 0x2dd0, 0x2dd6, - 0x2dd8, 0x2dde, - 0x3005, 0x3006, - 0x302a, 0x302f, - 0x3031, 0x3035, - 0x303b, 0x303c, - 0x3041, 0x3096, - 0x3099, 0x309a, - 0x309d, 0x309f, - 0x30a1, 0x30fa, - 0x30fc, 0x30ff, - 0x3105, 0x312c, - 0x3131, 0x318e, - 0x31a0, 0x31b7, - 0x31f0, 0x31ff, - 0x3400, 0x4db5, - 0x4e00, 0x9fbb, - 0xa000, 0xa48c, - 0xa800, 0xa827, - 0xac00, 0xd7a3, - 0xf900, 0xfa2d, - 0xfa30, 0xfa6a, - 0xfa70, 0xfad9, - 0xfb00, 0xfb06, - 0xfb13, 0xfb17, - 0xfb1d, 0xfb28, - 0xfb2a, 0xfb36, - 0xfb38, 0xfb3c, - 0xfb3e, 0xfb3e, - 0xfb40, 0xfb41, - 0xfb43, 0xfb44, - 0xfb46, 0xfbb1, - 0xfbd3, 0xfd3d, - 0xfd50, 0xfd8f, - 0xfd92, 0xfdc7, - 0xfdf0, 0xfdfb, - 0xfe00, 0xfe0f, - 0xfe20, 0xfe23, - 0xfe70, 0xfe74, - 0xfe76, 0xfefc, - 0xff21, 0xff3a, - 0xff41, 0xff5a, - 0xff66, 0xffbe, - 0xffc2, 0xffc7, - 0xffca, 0xffcf, - 0xffd2, 0xffd7, - 0xffda, 0xffdc, - 0x10000, 0x1000b, - 0x1000d, 0x10026, - 0x10028, 0x1003a, - 0x1003c, 0x1003d, - 0x1003f, 0x1004d, - 0x10050, 0x1005d, - 0x10080, 0x100fa, - 0x10300, 0x1031e, - 0x10330, 0x10349, - 0x10380, 0x1039d, - 0x103a0, 0x103c3, - 0x103c8, 0x103cf, - 0x10400, 0x1049d, - 0x10800, 0x10805, - 0x10808, 0x10808, - 0x1080a, 0x10835, - 0x10837, 0x10838, - 0x1083c, 0x1083c, - 0x1083f, 0x1083f, - 0x10a00, 0x10a03, - 0x10a05, 0x10a06, - 0x10a0c, 0x10a13, - 0x10a15, 0x10a17, - 0x10a19, 0x10a33, - 0x10a38, 0x10a3a, - 0x10a3f, 0x10a3f, - 0x1d165, 0x1d169, - 0x1d16d, 0x1d172, - 0x1d17b, 0x1d182, - 0x1d185, 0x1d18b, - 0x1d1aa, 0x1d1ad, - 0x1d242, 0x1d244, - 0x1d400, 0x1d454, - 0x1d456, 0x1d49c, - 0x1d49e, 0x1d49f, - 0x1d4a2, 0x1d4a2, - 0x1d4a5, 0x1d4a6, - 0x1d4a9, 0x1d4ac, - 0x1d4ae, 0x1d4b9, - 0x1d4bb, 0x1d4bb, - 0x1d4bd, 0x1d4c3, - 0x1d4c5, 0x1d505, - 0x1d507, 0x1d50a, - 0x1d50d, 0x1d514, - 0x1d516, 0x1d51c, - 0x1d51e, 0x1d539, - 0x1d53b, 0x1d53e, - 0x1d540, 0x1d544, - 0x1d546, 0x1d546, - 0x1d54a, 0x1d550, - 0x1d552, 0x1d6a5, - 0x1d6a8, 0x1d6c0, - 0x1d6c2, 0x1d6da, - 0x1d6dc, 0x1d6fa, - 0x1d6fc, 0x1d714, - 0x1d716, 0x1d734, - 0x1d736, 0x1d74e, - 0x1d750, 0x1d76e, - 0x1d770, 0x1d788, - 0x1d78a, 0x1d7a8, - 0x1d7aa, 0x1d7c2, - 0x1d7c4, 0x1d7c9, - 0x20000, 0x2a6d6, - 0x2f800, 0x2fa1d, - 0xe0100, 0xe01ef -}; /* CR_Alpha */ - -/* 'Blank': [[:Blank:]] */ -static const OnigCodePoint CR_Blank[] = { - 9, - 0x0009, 0x0009, - 0x0020, 0x0020, - 0x00a0, 0x00a0, - 0x1680, 0x1680, - 0x180e, 0x180e, - 0x2000, 0x200a, - 0x202f, 0x202f, - 0x205f, 0x205f, - 0x3000, 0x3000 -}; /* CR_Blank */ - -/* 'Cntrl': [[:Cntrl:]] */ -static const OnigCodePoint CR_Cntrl[] = { - 19, - 0x0000, 0x001f, - 0x007f, 0x009f, - 0x00ad, 0x00ad, - 0x0600, 0x0603, - 0x06dd, 0x06dd, - 0x070f, 0x070f, - 0x17b4, 0x17b5, - 0x200b, 0x200f, - 0x202a, 0x202e, - 0x2060, 0x2063, - 0x206a, 0x206f, - 0xd800, 0xf8ff, - 0xfeff, 0xfeff, - 0xfff9, 0xfffb, - 0x1d173, 0x1d17a, - 0xe0001, 0xe0001, - 0xe0020, 0xe007f, - 0xf0000, 0xffffd, - 0x100000, 0x10fffd -}; /* CR_Cntrl */ - -/* 'Digit': [[:Digit:]] */ -static const OnigCodePoint CR_Digit[] = { - 23, - 0x0030, 0x0039, - 0x0660, 0x0669, - 0x06f0, 0x06f9, - 0x0966, 0x096f, - 0x09e6, 0x09ef, - 0x0a66, 0x0a6f, - 0x0ae6, 0x0aef, - 0x0b66, 0x0b6f, - 0x0be6, 0x0bef, - 0x0c66, 0x0c6f, - 0x0ce6, 0x0cef, - 0x0d66, 0x0d6f, - 0x0e50, 0x0e59, - 0x0ed0, 0x0ed9, - 0x0f20, 0x0f29, - 0x1040, 0x1049, - 0x17e0, 0x17e9, - 0x1810, 0x1819, - 0x1946, 0x194f, - 0x19d0, 0x19d9, - 0xff10, 0xff19, - 0x104a0, 0x104a9, - 0x1d7ce, 0x1d7ff -}; /* CR_Digit */ - -/* 'Graph': [[:Graph:]] */ -static const OnigCodePoint CR_Graph[] = { - 424, - 0x0021, 0x007e, - 0x00a1, 0x0241, - 0x0250, 0x036f, - 0x0374, 0x0375, - 0x037a, 0x037a, - 0x037e, 0x037e, - 0x0384, 0x038a, - 0x038c, 0x038c, - 0x038e, 0x03a1, - 0x03a3, 0x03ce, - 0x03d0, 0x0486, - 0x0488, 0x04ce, - 0x04d0, 0x04f9, - 0x0500, 0x050f, - 0x0531, 0x0556, - 0x0559, 0x055f, - 0x0561, 0x0587, - 0x0589, 0x058a, - 0x0591, 0x05b9, - 0x05bb, 0x05c7, - 0x05d0, 0x05ea, - 0x05f0, 0x05f4, - 0x0600, 0x0603, - 0x060b, 0x0615, - 0x061b, 0x061b, - 0x061e, 0x061f, - 0x0621, 0x063a, - 0x0640, 0x065e, - 0x0660, 0x070d, - 0x070f, 0x074a, - 0x074d, 0x076d, - 0x0780, 0x07b1, - 0x0901, 0x0939, - 0x093c, 0x094d, - 0x0950, 0x0954, - 0x0958, 0x0970, - 0x097d, 0x097d, - 0x0981, 0x0983, - 0x0985, 0x098c, - 0x098f, 0x0990, - 0x0993, 0x09a8, - 0x09aa, 0x09b0, - 0x09b2, 0x09b2, - 0x09b6, 0x09b9, - 0x09bc, 0x09c4, - 0x09c7, 0x09c8, - 0x09cb, 0x09ce, - 0x09d7, 0x09d7, - 0x09dc, 0x09dd, - 0x09df, 0x09e3, - 0x09e6, 0x09fa, - 0x0a01, 0x0a03, - 0x0a05, 0x0a0a, - 0x0a0f, 0x0a10, - 0x0a13, 0x0a28, - 0x0a2a, 0x0a30, - 0x0a32, 0x0a33, - 0x0a35, 0x0a36, - 0x0a38, 0x0a39, - 0x0a3c, 0x0a3c, - 0x0a3e, 0x0a42, - 0x0a47, 0x0a48, - 0x0a4b, 0x0a4d, - 0x0a59, 0x0a5c, - 0x0a5e, 0x0a5e, - 0x0a66, 0x0a74, - 0x0a81, 0x0a83, - 0x0a85, 0x0a8d, - 0x0a8f, 0x0a91, - 0x0a93, 0x0aa8, - 0x0aaa, 0x0ab0, - 0x0ab2, 0x0ab3, - 0x0ab5, 0x0ab9, - 0x0abc, 0x0ac5, - 0x0ac7, 0x0ac9, - 0x0acb, 0x0acd, - 0x0ad0, 0x0ad0, - 0x0ae0, 0x0ae3, - 0x0ae6, 0x0aef, - 0x0af1, 0x0af1, - 0x0b01, 0x0b03, - 0x0b05, 0x0b0c, - 0x0b0f, 0x0b10, - 0x0b13, 0x0b28, - 0x0b2a, 0x0b30, - 0x0b32, 0x0b33, - 0x0b35, 0x0b39, - 0x0b3c, 0x0b43, - 0x0b47, 0x0b48, - 0x0b4b, 0x0b4d, - 0x0b56, 0x0b57, - 0x0b5c, 0x0b5d, - 0x0b5f, 0x0b61, - 0x0b66, 0x0b71, - 0x0b82, 0x0b83, - 0x0b85, 0x0b8a, - 0x0b8e, 0x0b90, - 0x0b92, 0x0b95, - 0x0b99, 0x0b9a, - 0x0b9c, 0x0b9c, - 0x0b9e, 0x0b9f, - 0x0ba3, 0x0ba4, - 0x0ba8, 0x0baa, - 0x0bae, 0x0bb9, - 0x0bbe, 0x0bc2, - 0x0bc6, 0x0bc8, - 0x0bca, 0x0bcd, - 0x0bd7, 0x0bd7, - 0x0be6, 0x0bfa, - 0x0c01, 0x0c03, - 0x0c05, 0x0c0c, - 0x0c0e, 0x0c10, - 0x0c12, 0x0c28, - 0x0c2a, 0x0c33, - 0x0c35, 0x0c39, - 0x0c3e, 0x0c44, - 0x0c46, 0x0c48, - 0x0c4a, 0x0c4d, - 0x0c55, 0x0c56, - 0x0c60, 0x0c61, - 0x0c66, 0x0c6f, - 0x0c82, 0x0c83, - 0x0c85, 0x0c8c, - 0x0c8e, 0x0c90, - 0x0c92, 0x0ca8, - 0x0caa, 0x0cb3, - 0x0cb5, 0x0cb9, - 0x0cbc, 0x0cc4, - 0x0cc6, 0x0cc8, - 0x0cca, 0x0ccd, - 0x0cd5, 0x0cd6, - 0x0cde, 0x0cde, - 0x0ce0, 0x0ce1, - 0x0ce6, 0x0cef, - 0x0d02, 0x0d03, - 0x0d05, 0x0d0c, - 0x0d0e, 0x0d10, - 0x0d12, 0x0d28, - 0x0d2a, 0x0d39, - 0x0d3e, 0x0d43, - 0x0d46, 0x0d48, - 0x0d4a, 0x0d4d, - 0x0d57, 0x0d57, - 0x0d60, 0x0d61, - 0x0d66, 0x0d6f, - 0x0d82, 0x0d83, - 0x0d85, 0x0d96, - 0x0d9a, 0x0db1, - 0x0db3, 0x0dbb, - 0x0dbd, 0x0dbd, - 0x0dc0, 0x0dc6, - 0x0dca, 0x0dca, - 0x0dcf, 0x0dd4, - 0x0dd6, 0x0dd6, - 0x0dd8, 0x0ddf, - 0x0df2, 0x0df4, - 0x0e01, 0x0e3a, - 0x0e3f, 0x0e5b, - 0x0e81, 0x0e82, - 0x0e84, 0x0e84, - 0x0e87, 0x0e88, - 0x0e8a, 0x0e8a, - 0x0e8d, 0x0e8d, - 0x0e94, 0x0e97, - 0x0e99, 0x0e9f, - 0x0ea1, 0x0ea3, - 0x0ea5, 0x0ea5, - 0x0ea7, 0x0ea7, - 0x0eaa, 0x0eab, - 0x0ead, 0x0eb9, - 0x0ebb, 0x0ebd, - 0x0ec0, 0x0ec4, - 0x0ec6, 0x0ec6, - 0x0ec8, 0x0ecd, - 0x0ed0, 0x0ed9, - 0x0edc, 0x0edd, - 0x0f00, 0x0f47, - 0x0f49, 0x0f6a, - 0x0f71, 0x0f8b, - 0x0f90, 0x0f97, - 0x0f99, 0x0fbc, - 0x0fbe, 0x0fcc, - 0x0fcf, 0x0fd1, - 0x1000, 0x1021, - 0x1023, 0x1027, - 0x1029, 0x102a, - 0x102c, 0x1032, - 0x1036, 0x1039, - 0x1040, 0x1059, - 0x10a0, 0x10c5, - 0x10d0, 0x10fc, - 0x1100, 0x1159, - 0x115f, 0x11a2, - 0x11a8, 0x11f9, - 0x1200, 0x1248, - 0x124a, 0x124d, - 0x1250, 0x1256, - 0x1258, 0x1258, - 0x125a, 0x125d, - 0x1260, 0x1288, - 0x128a, 0x128d, - 0x1290, 0x12b0, - 0x12b2, 0x12b5, - 0x12b8, 0x12be, - 0x12c0, 0x12c0, - 0x12c2, 0x12c5, - 0x12c8, 0x12d6, - 0x12d8, 0x1310, - 0x1312, 0x1315, - 0x1318, 0x135a, - 0x135f, 0x137c, - 0x1380, 0x1399, - 0x13a0, 0x13f4, - 0x1401, 0x1676, - 0x1681, 0x169c, - 0x16a0, 0x16f0, - 0x1700, 0x170c, - 0x170e, 0x1714, - 0x1720, 0x1736, - 0x1740, 0x1753, - 0x1760, 0x176c, - 0x176e, 0x1770, - 0x1772, 0x1773, - 0x1780, 0x17dd, - 0x17e0, 0x17e9, - 0x17f0, 0x17f9, - 0x1800, 0x180d, - 0x1810, 0x1819, - 0x1820, 0x1877, - 0x1880, 0x18a9, - 0x1900, 0x191c, - 0x1920, 0x192b, - 0x1930, 0x193b, - 0x1940, 0x1940, - 0x1944, 0x196d, - 0x1970, 0x1974, - 0x1980, 0x19a9, - 0x19b0, 0x19c9, - 0x19d0, 0x19d9, - 0x19de, 0x1a1b, - 0x1a1e, 0x1a1f, - 0x1d00, 0x1dc3, - 0x1e00, 0x1e9b, - 0x1ea0, 0x1ef9, - 0x1f00, 0x1f15, - 0x1f18, 0x1f1d, - 0x1f20, 0x1f45, - 0x1f48, 0x1f4d, - 0x1f50, 0x1f57, - 0x1f59, 0x1f59, - 0x1f5b, 0x1f5b, - 0x1f5d, 0x1f5d, - 0x1f5f, 0x1f7d, - 0x1f80, 0x1fb4, - 0x1fb6, 0x1fc4, - 0x1fc6, 0x1fd3, - 0x1fd6, 0x1fdb, - 0x1fdd, 0x1fef, - 0x1ff2, 0x1ff4, - 0x1ff6, 0x1ffe, - 0x200b, 0x2027, - 0x202a, 0x202e, - 0x2030, 0x205e, - 0x2060, 0x2063, - 0x206a, 0x2071, - 0x2074, 0x208e, - 0x2090, 0x2094, - 0x20a0, 0x20b5, - 0x20d0, 0x20eb, - 0x2100, 0x214c, - 0x2153, 0x2183, - 0x2190, 0x23db, - 0x2400, 0x2426, - 0x2440, 0x244a, - 0x2460, 0x269c, - 0x26a0, 0x26b1, - 0x2701, 0x2704, - 0x2706, 0x2709, - 0x270c, 0x2727, - 0x2729, 0x274b, - 0x274d, 0x274d, - 0x274f, 0x2752, - 0x2756, 0x2756, - 0x2758, 0x275e, - 0x2761, 0x2794, - 0x2798, 0x27af, - 0x27b1, 0x27be, - 0x27c0, 0x27c6, - 0x27d0, 0x27eb, - 0x27f0, 0x2b13, - 0x2c00, 0x2c2e, - 0x2c30, 0x2c5e, - 0x2c80, 0x2cea, - 0x2cf9, 0x2d25, - 0x2d30, 0x2d65, - 0x2d6f, 0x2d6f, - 0x2d80, 0x2d96, - 0x2da0, 0x2da6, - 0x2da8, 0x2dae, - 0x2db0, 0x2db6, - 0x2db8, 0x2dbe, - 0x2dc0, 0x2dc6, - 0x2dc8, 0x2dce, - 0x2dd0, 0x2dd6, - 0x2dd8, 0x2dde, - 0x2e00, 0x2e17, - 0x2e1c, 0x2e1d, - 0x2e80, 0x2e99, - 0x2e9b, 0x2ef3, - 0x2f00, 0x2fd5, - 0x2ff0, 0x2ffb, - 0x3001, 0x303f, - 0x3041, 0x3096, - 0x3099, 0x30ff, - 0x3105, 0x312c, - 0x3131, 0x318e, - 0x3190, 0x31b7, - 0x31c0, 0x31cf, - 0x31f0, 0x321e, - 0x3220, 0x3243, - 0x3250, 0x32fe, - 0x3300, 0x4db5, - 0x4dc0, 0x9fbb, - 0xa000, 0xa48c, - 0xa490, 0xa4c6, - 0xa700, 0xa716, - 0xa800, 0xa82b, - 0xac00, 0xd7a3, - 0xe000, 0xfa2d, - 0xfa30, 0xfa6a, - 0xfa70, 0xfad9, - 0xfb00, 0xfb06, - 0xfb13, 0xfb17, - 0xfb1d, 0xfb36, - 0xfb38, 0xfb3c, - 0xfb3e, 0xfb3e, - 0xfb40, 0xfb41, - 0xfb43, 0xfb44, - 0xfb46, 0xfbb1, - 0xfbd3, 0xfd3f, - 0xfd50, 0xfd8f, - 0xfd92, 0xfdc7, - 0xfdf0, 0xfdfd, - 0xfe00, 0xfe19, - 0xfe20, 0xfe23, - 0xfe30, 0xfe52, - 0xfe54, 0xfe66, - 0xfe68, 0xfe6b, - 0xfe70, 0xfe74, - 0xfe76, 0xfefc, - 0xfeff, 0xfeff, - 0xff01, 0xffbe, - 0xffc2, 0xffc7, - 0xffca, 0xffcf, - 0xffd2, 0xffd7, - 0xffda, 0xffdc, - 0xffe0, 0xffe6, - 0xffe8, 0xffee, - 0xfff9, 0xfffd, - 0x10000, 0x1000b, - 0x1000d, 0x10026, - 0x10028, 0x1003a, - 0x1003c, 0x1003d, - 0x1003f, 0x1004d, - 0x10050, 0x1005d, - 0x10080, 0x100fa, - 0x10100, 0x10102, - 0x10107, 0x10133, - 0x10137, 0x1018a, - 0x10300, 0x1031e, - 0x10320, 0x10323, - 0x10330, 0x1034a, - 0x10380, 0x1039d, - 0x1039f, 0x103c3, - 0x103c8, 0x103d5, - 0x10400, 0x1049d, - 0x104a0, 0x104a9, - 0x10800, 0x10805, - 0x10808, 0x10808, - 0x1080a, 0x10835, - 0x10837, 0x10838, - 0x1083c, 0x1083c, - 0x1083f, 0x1083f, - 0x10a00, 0x10a03, - 0x10a05, 0x10a06, - 0x10a0c, 0x10a13, - 0x10a15, 0x10a17, - 0x10a19, 0x10a33, - 0x10a38, 0x10a3a, - 0x10a3f, 0x10a47, - 0x10a50, 0x10a58, - 0x1d000, 0x1d0f5, - 0x1d100, 0x1d126, - 0x1d12a, 0x1d1dd, - 0x1d200, 0x1d245, - 0x1d300, 0x1d356, - 0x1d400, 0x1d454, - 0x1d456, 0x1d49c, - 0x1d49e, 0x1d49f, - 0x1d4a2, 0x1d4a2, - 0x1d4a5, 0x1d4a6, - 0x1d4a9, 0x1d4ac, - 0x1d4ae, 0x1d4b9, - 0x1d4bb, 0x1d4bb, - 0x1d4bd, 0x1d4c3, - 0x1d4c5, 0x1d505, - 0x1d507, 0x1d50a, - 0x1d50d, 0x1d514, - 0x1d516, 0x1d51c, - 0x1d51e, 0x1d539, - 0x1d53b, 0x1d53e, - 0x1d540, 0x1d544, - 0x1d546, 0x1d546, - 0x1d54a, 0x1d550, - 0x1d552, 0x1d6a5, - 0x1d6a8, 0x1d7c9, - 0x1d7ce, 0x1d7ff, - 0x20000, 0x2a6d6, - 0x2f800, 0x2fa1d, - 0xe0001, 0xe0001, - 0xe0020, 0xe007f, - 0xe0100, 0xe01ef, - 0xf0000, 0xffffd, - 0x100000, 0x10fffd -}; /* CR_Graph */ - -/* 'Lower': [[:Lower:]] */ -static const OnigCodePoint CR_Lower[] = { - 480, - 0x0061, 0x007a, - 0x00aa, 0x00aa, - 0x00b5, 0x00b5, - 0x00ba, 0x00ba, - 0x00df, 0x00f6, - 0x00f8, 0x00ff, - 0x0101, 0x0101, - 0x0103, 0x0103, - 0x0105, 0x0105, - 0x0107, 0x0107, - 0x0109, 0x0109, - 0x010b, 0x010b, - 0x010d, 0x010d, - 0x010f, 0x010f, - 0x0111, 0x0111, - 0x0113, 0x0113, - 0x0115, 0x0115, - 0x0117, 0x0117, - 0x0119, 0x0119, - 0x011b, 0x011b, - 0x011d, 0x011d, - 0x011f, 0x011f, - 0x0121, 0x0121, - 0x0123, 0x0123, - 0x0125, 0x0125, - 0x0127, 0x0127, - 0x0129, 0x0129, - 0x012b, 0x012b, - 0x012d, 0x012d, - 0x012f, 0x012f, - 0x0131, 0x0131, - 0x0133, 0x0133, - 0x0135, 0x0135, - 0x0137, 0x0138, - 0x013a, 0x013a, - 0x013c, 0x013c, - 0x013e, 0x013e, - 0x0140, 0x0140, - 0x0142, 0x0142, - 0x0144, 0x0144, - 0x0146, 0x0146, - 0x0148, 0x0149, - 0x014b, 0x014b, - 0x014d, 0x014d, - 0x014f, 0x014f, - 0x0151, 0x0151, - 0x0153, 0x0153, - 0x0155, 0x0155, - 0x0157, 0x0157, - 0x0159, 0x0159, - 0x015b, 0x015b, - 0x015d, 0x015d, - 0x015f, 0x015f, - 0x0161, 0x0161, - 0x0163, 0x0163, - 0x0165, 0x0165, - 0x0167, 0x0167, - 0x0169, 0x0169, - 0x016b, 0x016b, - 0x016d, 0x016d, - 0x016f, 0x016f, - 0x0171, 0x0171, - 0x0173, 0x0173, - 0x0175, 0x0175, - 0x0177, 0x0177, - 0x017a, 0x017a, - 0x017c, 0x017c, - 0x017e, 0x0180, - 0x0183, 0x0183, - 0x0185, 0x0185, - 0x0188, 0x0188, - 0x018c, 0x018d, - 0x0192, 0x0192, - 0x0195, 0x0195, - 0x0199, 0x019b, - 0x019e, 0x019e, - 0x01a1, 0x01a1, - 0x01a3, 0x01a3, - 0x01a5, 0x01a5, - 0x01a8, 0x01a8, - 0x01aa, 0x01ab, - 0x01ad, 0x01ad, - 0x01b0, 0x01b0, - 0x01b4, 0x01b4, - 0x01b6, 0x01b6, - 0x01b9, 0x01ba, - 0x01bd, 0x01bf, - 0x01c6, 0x01c6, - 0x01c9, 0x01c9, - 0x01cc, 0x01cc, - 0x01ce, 0x01ce, - 0x01d0, 0x01d0, - 0x01d2, 0x01d2, - 0x01d4, 0x01d4, - 0x01d6, 0x01d6, - 0x01d8, 0x01d8, - 0x01da, 0x01da, - 0x01dc, 0x01dd, - 0x01df, 0x01df, - 0x01e1, 0x01e1, - 0x01e3, 0x01e3, - 0x01e5, 0x01e5, - 0x01e7, 0x01e7, - 0x01e9, 0x01e9, - 0x01eb, 0x01eb, - 0x01ed, 0x01ed, - 0x01ef, 0x01f0, - 0x01f3, 0x01f3, - 0x01f5, 0x01f5, - 0x01f9, 0x01f9, - 0x01fb, 0x01fb, - 0x01fd, 0x01fd, - 0x01ff, 0x01ff, - 0x0201, 0x0201, - 0x0203, 0x0203, - 0x0205, 0x0205, - 0x0207, 0x0207, - 0x0209, 0x0209, - 0x020b, 0x020b, - 0x020d, 0x020d, - 0x020f, 0x020f, - 0x0211, 0x0211, - 0x0213, 0x0213, - 0x0215, 0x0215, - 0x0217, 0x0217, - 0x0219, 0x0219, - 0x021b, 0x021b, - 0x021d, 0x021d, - 0x021f, 0x021f, - 0x0221, 0x0221, - 0x0223, 0x0223, - 0x0225, 0x0225, - 0x0227, 0x0227, - 0x0229, 0x0229, - 0x022b, 0x022b, - 0x022d, 0x022d, - 0x022f, 0x022f, - 0x0231, 0x0231, - 0x0233, 0x0239, - 0x023c, 0x023c, - 0x023f, 0x0240, - 0x0250, 0x02af, - 0x0390, 0x0390, - 0x03ac, 0x03ce, - 0x03d0, 0x03d1, - 0x03d5, 0x03d7, - 0x03d9, 0x03d9, - 0x03db, 0x03db, - 0x03dd, 0x03dd, - 0x03df, 0x03df, - 0x03e1, 0x03e1, - 0x03e3, 0x03e3, - 0x03e5, 0x03e5, - 0x03e7, 0x03e7, - 0x03e9, 0x03e9, - 0x03eb, 0x03eb, - 0x03ed, 0x03ed, - 0x03ef, 0x03f3, - 0x03f5, 0x03f5, - 0x03f8, 0x03f8, - 0x03fb, 0x03fc, - 0x0430, 0x045f, - 0x0461, 0x0461, - 0x0463, 0x0463, - 0x0465, 0x0465, - 0x0467, 0x0467, - 0x0469, 0x0469, - 0x046b, 0x046b, - 0x046d, 0x046d, - 0x046f, 0x046f, - 0x0471, 0x0471, - 0x0473, 0x0473, - 0x0475, 0x0475, - 0x0477, 0x0477, - 0x0479, 0x0479, - 0x047b, 0x047b, - 0x047d, 0x047d, - 0x047f, 0x047f, - 0x0481, 0x0481, - 0x048b, 0x048b, - 0x048d, 0x048d, - 0x048f, 0x048f, - 0x0491, 0x0491, - 0x0493, 0x0493, - 0x0495, 0x0495, - 0x0497, 0x0497, - 0x0499, 0x0499, - 0x049b, 0x049b, - 0x049d, 0x049d, - 0x049f, 0x049f, - 0x04a1, 0x04a1, - 0x04a3, 0x04a3, - 0x04a5, 0x04a5, - 0x04a7, 0x04a7, - 0x04a9, 0x04a9, - 0x04ab, 0x04ab, - 0x04ad, 0x04ad, - 0x04af, 0x04af, - 0x04b1, 0x04b1, - 0x04b3, 0x04b3, - 0x04b5, 0x04b5, - 0x04b7, 0x04b7, - 0x04b9, 0x04b9, - 0x04bb, 0x04bb, - 0x04bd, 0x04bd, - 0x04bf, 0x04bf, - 0x04c2, 0x04c2, - 0x04c4, 0x04c4, - 0x04c6, 0x04c6, - 0x04c8, 0x04c8, - 0x04ca, 0x04ca, - 0x04cc, 0x04cc, - 0x04ce, 0x04ce, - 0x04d1, 0x04d1, - 0x04d3, 0x04d3, - 0x04d5, 0x04d5, - 0x04d7, 0x04d7, - 0x04d9, 0x04d9, - 0x04db, 0x04db, - 0x04dd, 0x04dd, - 0x04df, 0x04df, - 0x04e1, 0x04e1, - 0x04e3, 0x04e3, - 0x04e5, 0x04e5, - 0x04e7, 0x04e7, - 0x04e9, 0x04e9, - 0x04eb, 0x04eb, - 0x04ed, 0x04ed, - 0x04ef, 0x04ef, - 0x04f1, 0x04f1, - 0x04f3, 0x04f3, - 0x04f5, 0x04f5, - 0x04f7, 0x04f7, - 0x04f9, 0x04f9, - 0x0501, 0x0501, - 0x0503, 0x0503, - 0x0505, 0x0505, - 0x0507, 0x0507, - 0x0509, 0x0509, - 0x050b, 0x050b, - 0x050d, 0x050d, - 0x050f, 0x050f, - 0x0561, 0x0587, - 0x1d00, 0x1d2b, - 0x1d62, 0x1d77, - 0x1d79, 0x1d9a, - 0x1e01, 0x1e01, - 0x1e03, 0x1e03, - 0x1e05, 0x1e05, - 0x1e07, 0x1e07, - 0x1e09, 0x1e09, - 0x1e0b, 0x1e0b, - 0x1e0d, 0x1e0d, - 0x1e0f, 0x1e0f, - 0x1e11, 0x1e11, - 0x1e13, 0x1e13, - 0x1e15, 0x1e15, - 0x1e17, 0x1e17, - 0x1e19, 0x1e19, - 0x1e1b, 0x1e1b, - 0x1e1d, 0x1e1d, - 0x1e1f, 0x1e1f, - 0x1e21, 0x1e21, - 0x1e23, 0x1e23, - 0x1e25, 0x1e25, - 0x1e27, 0x1e27, - 0x1e29, 0x1e29, - 0x1e2b, 0x1e2b, - 0x1e2d, 0x1e2d, - 0x1e2f, 0x1e2f, - 0x1e31, 0x1e31, - 0x1e33, 0x1e33, - 0x1e35, 0x1e35, - 0x1e37, 0x1e37, - 0x1e39, 0x1e39, - 0x1e3b, 0x1e3b, - 0x1e3d, 0x1e3d, - 0x1e3f, 0x1e3f, - 0x1e41, 0x1e41, - 0x1e43, 0x1e43, - 0x1e45, 0x1e45, - 0x1e47, 0x1e47, - 0x1e49, 0x1e49, - 0x1e4b, 0x1e4b, - 0x1e4d, 0x1e4d, - 0x1e4f, 0x1e4f, - 0x1e51, 0x1e51, - 0x1e53, 0x1e53, - 0x1e55, 0x1e55, - 0x1e57, 0x1e57, - 0x1e59, 0x1e59, - 0x1e5b, 0x1e5b, - 0x1e5d, 0x1e5d, - 0x1e5f, 0x1e5f, - 0x1e61, 0x1e61, - 0x1e63, 0x1e63, - 0x1e65, 0x1e65, - 0x1e67, 0x1e67, - 0x1e69, 0x1e69, - 0x1e6b, 0x1e6b, - 0x1e6d, 0x1e6d, - 0x1e6f, 0x1e6f, - 0x1e71, 0x1e71, - 0x1e73, 0x1e73, - 0x1e75, 0x1e75, - 0x1e77, 0x1e77, - 0x1e79, 0x1e79, - 0x1e7b, 0x1e7b, - 0x1e7d, 0x1e7d, - 0x1e7f, 0x1e7f, - 0x1e81, 0x1e81, - 0x1e83, 0x1e83, - 0x1e85, 0x1e85, - 0x1e87, 0x1e87, - 0x1e89, 0x1e89, - 0x1e8b, 0x1e8b, - 0x1e8d, 0x1e8d, - 0x1e8f, 0x1e8f, - 0x1e91, 0x1e91, - 0x1e93, 0x1e93, - 0x1e95, 0x1e9b, - 0x1ea1, 0x1ea1, - 0x1ea3, 0x1ea3, - 0x1ea5, 0x1ea5, - 0x1ea7, 0x1ea7, - 0x1ea9, 0x1ea9, - 0x1eab, 0x1eab, - 0x1ead, 0x1ead, - 0x1eaf, 0x1eaf, - 0x1eb1, 0x1eb1, - 0x1eb3, 0x1eb3, - 0x1eb5, 0x1eb5, - 0x1eb7, 0x1eb7, - 0x1eb9, 0x1eb9, - 0x1ebb, 0x1ebb, - 0x1ebd, 0x1ebd, - 0x1ebf, 0x1ebf, - 0x1ec1, 0x1ec1, - 0x1ec3, 0x1ec3, - 0x1ec5, 0x1ec5, - 0x1ec7, 0x1ec7, - 0x1ec9, 0x1ec9, - 0x1ecb, 0x1ecb, - 0x1ecd, 0x1ecd, - 0x1ecf, 0x1ecf, - 0x1ed1, 0x1ed1, - 0x1ed3, 0x1ed3, - 0x1ed5, 0x1ed5, - 0x1ed7, 0x1ed7, - 0x1ed9, 0x1ed9, - 0x1edb, 0x1edb, - 0x1edd, 0x1edd, - 0x1edf, 0x1edf, - 0x1ee1, 0x1ee1, - 0x1ee3, 0x1ee3, - 0x1ee5, 0x1ee5, - 0x1ee7, 0x1ee7, - 0x1ee9, 0x1ee9, - 0x1eeb, 0x1eeb, - 0x1eed, 0x1eed, - 0x1eef, 0x1eef, - 0x1ef1, 0x1ef1, - 0x1ef3, 0x1ef3, - 0x1ef5, 0x1ef5, - 0x1ef7, 0x1ef7, - 0x1ef9, 0x1ef9, - 0x1f00, 0x1f07, - 0x1f10, 0x1f15, - 0x1f20, 0x1f27, - 0x1f30, 0x1f37, - 0x1f40, 0x1f45, - 0x1f50, 0x1f57, - 0x1f60, 0x1f67, - 0x1f70, 0x1f7d, - 0x1f80, 0x1f87, - 0x1f90, 0x1f97, - 0x1fa0, 0x1fa7, - 0x1fb0, 0x1fb4, - 0x1fb6, 0x1fb7, - 0x1fbe, 0x1fbe, - 0x1fc2, 0x1fc4, - 0x1fc6, 0x1fc7, - 0x1fd0, 0x1fd3, - 0x1fd6, 0x1fd7, - 0x1fe0, 0x1fe7, - 0x1ff2, 0x1ff4, - 0x1ff6, 0x1ff7, - 0x2071, 0x2071, - 0x207f, 0x207f, - 0x210a, 0x210a, - 0x210e, 0x210f, - 0x2113, 0x2113, - 0x212f, 0x212f, - 0x2134, 0x2134, - 0x2139, 0x2139, - 0x213c, 0x213d, - 0x2146, 0x2149, - 0x2c30, 0x2c5e, - 0x2c81, 0x2c81, - 0x2c83, 0x2c83, - 0x2c85, 0x2c85, - 0x2c87, 0x2c87, - 0x2c89, 0x2c89, - 0x2c8b, 0x2c8b, - 0x2c8d, 0x2c8d, - 0x2c8f, 0x2c8f, - 0x2c91, 0x2c91, - 0x2c93, 0x2c93, - 0x2c95, 0x2c95, - 0x2c97, 0x2c97, - 0x2c99, 0x2c99, - 0x2c9b, 0x2c9b, - 0x2c9d, 0x2c9d, - 0x2c9f, 0x2c9f, - 0x2ca1, 0x2ca1, - 0x2ca3, 0x2ca3, - 0x2ca5, 0x2ca5, - 0x2ca7, 0x2ca7, - 0x2ca9, 0x2ca9, - 0x2cab, 0x2cab, - 0x2cad, 0x2cad, - 0x2caf, 0x2caf, - 0x2cb1, 0x2cb1, - 0x2cb3, 0x2cb3, - 0x2cb5, 0x2cb5, - 0x2cb7, 0x2cb7, - 0x2cb9, 0x2cb9, - 0x2cbb, 0x2cbb, - 0x2cbd, 0x2cbd, - 0x2cbf, 0x2cbf, - 0x2cc1, 0x2cc1, - 0x2cc3, 0x2cc3, - 0x2cc5, 0x2cc5, - 0x2cc7, 0x2cc7, - 0x2cc9, 0x2cc9, - 0x2ccb, 0x2ccb, - 0x2ccd, 0x2ccd, - 0x2ccf, 0x2ccf, - 0x2cd1, 0x2cd1, - 0x2cd3, 0x2cd3, - 0x2cd5, 0x2cd5, - 0x2cd7, 0x2cd7, - 0x2cd9, 0x2cd9, - 0x2cdb, 0x2cdb, - 0x2cdd, 0x2cdd, - 0x2cdf, 0x2cdf, - 0x2ce1, 0x2ce1, - 0x2ce3, 0x2ce4, - 0x2d00, 0x2d25, - 0xfb00, 0xfb06, - 0xfb13, 0xfb17, - 0xff41, 0xff5a, - 0x10428, 0x1044f, - 0x1d41a, 0x1d433, - 0x1d44e, 0x1d454, - 0x1d456, 0x1d467, - 0x1d482, 0x1d49b, - 0x1d4b6, 0x1d4b9, - 0x1d4bb, 0x1d4bb, - 0x1d4bd, 0x1d4c3, - 0x1d4c5, 0x1d4cf, - 0x1d4ea, 0x1d503, - 0x1d51e, 0x1d537, - 0x1d552, 0x1d56b, - 0x1d586, 0x1d59f, - 0x1d5ba, 0x1d5d3, - 0x1d5ee, 0x1d607, - 0x1d622, 0x1d63b, - 0x1d656, 0x1d66f, - 0x1d68a, 0x1d6a5, - 0x1d6c2, 0x1d6da, - 0x1d6dc, 0x1d6e1, - 0x1d6fc, 0x1d714, - 0x1d716, 0x1d71b, - 0x1d736, 0x1d74e, - 0x1d750, 0x1d755, - 0x1d770, 0x1d788, - 0x1d78a, 0x1d78f, - 0x1d7aa, 0x1d7c2, - 0x1d7c4, 0x1d7c9 -}; /* CR_Lower */ - -/* 'Print': [[:Print:]] */ -static const OnigCodePoint CR_Print[] = { - 423, - 0x0009, 0x000d, - 0x0020, 0x007e, - 0x0085, 0x0085, - 0x00a0, 0x0241, - 0x0250, 0x036f, - 0x0374, 0x0375, - 0x037a, 0x037a, - 0x037e, 0x037e, - 0x0384, 0x038a, - 0x038c, 0x038c, - 0x038e, 0x03a1, - 0x03a3, 0x03ce, - 0x03d0, 0x0486, - 0x0488, 0x04ce, - 0x04d0, 0x04f9, - 0x0500, 0x050f, - 0x0531, 0x0556, - 0x0559, 0x055f, - 0x0561, 0x0587, - 0x0589, 0x058a, - 0x0591, 0x05b9, - 0x05bb, 0x05c7, - 0x05d0, 0x05ea, - 0x05f0, 0x05f4, - 0x0600, 0x0603, - 0x060b, 0x0615, - 0x061b, 0x061b, - 0x061e, 0x061f, - 0x0621, 0x063a, - 0x0640, 0x065e, - 0x0660, 0x070d, - 0x070f, 0x074a, - 0x074d, 0x076d, - 0x0780, 0x07b1, - 0x0901, 0x0939, - 0x093c, 0x094d, - 0x0950, 0x0954, - 0x0958, 0x0970, - 0x097d, 0x097d, - 0x0981, 0x0983, - 0x0985, 0x098c, - 0x098f, 0x0990, - 0x0993, 0x09a8, - 0x09aa, 0x09b0, - 0x09b2, 0x09b2, - 0x09b6, 0x09b9, - 0x09bc, 0x09c4, - 0x09c7, 0x09c8, - 0x09cb, 0x09ce, - 0x09d7, 0x09d7, - 0x09dc, 0x09dd, - 0x09df, 0x09e3, - 0x09e6, 0x09fa, - 0x0a01, 0x0a03, - 0x0a05, 0x0a0a, - 0x0a0f, 0x0a10, - 0x0a13, 0x0a28, - 0x0a2a, 0x0a30, - 0x0a32, 0x0a33, - 0x0a35, 0x0a36, - 0x0a38, 0x0a39, - 0x0a3c, 0x0a3c, - 0x0a3e, 0x0a42, - 0x0a47, 0x0a48, - 0x0a4b, 0x0a4d, - 0x0a59, 0x0a5c, - 0x0a5e, 0x0a5e, - 0x0a66, 0x0a74, - 0x0a81, 0x0a83, - 0x0a85, 0x0a8d, - 0x0a8f, 0x0a91, - 0x0a93, 0x0aa8, - 0x0aaa, 0x0ab0, - 0x0ab2, 0x0ab3, - 0x0ab5, 0x0ab9, - 0x0abc, 0x0ac5, - 0x0ac7, 0x0ac9, - 0x0acb, 0x0acd, - 0x0ad0, 0x0ad0, - 0x0ae0, 0x0ae3, - 0x0ae6, 0x0aef, - 0x0af1, 0x0af1, - 0x0b01, 0x0b03, - 0x0b05, 0x0b0c, - 0x0b0f, 0x0b10, - 0x0b13, 0x0b28, - 0x0b2a, 0x0b30, - 0x0b32, 0x0b33, - 0x0b35, 0x0b39, - 0x0b3c, 0x0b43, - 0x0b47, 0x0b48, - 0x0b4b, 0x0b4d, - 0x0b56, 0x0b57, - 0x0b5c, 0x0b5d, - 0x0b5f, 0x0b61, - 0x0b66, 0x0b71, - 0x0b82, 0x0b83, - 0x0b85, 0x0b8a, - 0x0b8e, 0x0b90, - 0x0b92, 0x0b95, - 0x0b99, 0x0b9a, - 0x0b9c, 0x0b9c, - 0x0b9e, 0x0b9f, - 0x0ba3, 0x0ba4, - 0x0ba8, 0x0baa, - 0x0bae, 0x0bb9, - 0x0bbe, 0x0bc2, - 0x0bc6, 0x0bc8, - 0x0bca, 0x0bcd, - 0x0bd7, 0x0bd7, - 0x0be6, 0x0bfa, - 0x0c01, 0x0c03, - 0x0c05, 0x0c0c, - 0x0c0e, 0x0c10, - 0x0c12, 0x0c28, - 0x0c2a, 0x0c33, - 0x0c35, 0x0c39, - 0x0c3e, 0x0c44, - 0x0c46, 0x0c48, - 0x0c4a, 0x0c4d, - 0x0c55, 0x0c56, - 0x0c60, 0x0c61, - 0x0c66, 0x0c6f, - 0x0c82, 0x0c83, - 0x0c85, 0x0c8c, - 0x0c8e, 0x0c90, - 0x0c92, 0x0ca8, - 0x0caa, 0x0cb3, - 0x0cb5, 0x0cb9, - 0x0cbc, 0x0cc4, - 0x0cc6, 0x0cc8, - 0x0cca, 0x0ccd, - 0x0cd5, 0x0cd6, - 0x0cde, 0x0cde, - 0x0ce0, 0x0ce1, - 0x0ce6, 0x0cef, - 0x0d02, 0x0d03, - 0x0d05, 0x0d0c, - 0x0d0e, 0x0d10, - 0x0d12, 0x0d28, - 0x0d2a, 0x0d39, - 0x0d3e, 0x0d43, - 0x0d46, 0x0d48, - 0x0d4a, 0x0d4d, - 0x0d57, 0x0d57, - 0x0d60, 0x0d61, - 0x0d66, 0x0d6f, - 0x0d82, 0x0d83, - 0x0d85, 0x0d96, - 0x0d9a, 0x0db1, - 0x0db3, 0x0dbb, - 0x0dbd, 0x0dbd, - 0x0dc0, 0x0dc6, - 0x0dca, 0x0dca, - 0x0dcf, 0x0dd4, - 0x0dd6, 0x0dd6, - 0x0dd8, 0x0ddf, - 0x0df2, 0x0df4, - 0x0e01, 0x0e3a, - 0x0e3f, 0x0e5b, - 0x0e81, 0x0e82, - 0x0e84, 0x0e84, - 0x0e87, 0x0e88, - 0x0e8a, 0x0e8a, - 0x0e8d, 0x0e8d, - 0x0e94, 0x0e97, - 0x0e99, 0x0e9f, - 0x0ea1, 0x0ea3, - 0x0ea5, 0x0ea5, - 0x0ea7, 0x0ea7, - 0x0eaa, 0x0eab, - 0x0ead, 0x0eb9, - 0x0ebb, 0x0ebd, - 0x0ec0, 0x0ec4, - 0x0ec6, 0x0ec6, - 0x0ec8, 0x0ecd, - 0x0ed0, 0x0ed9, - 0x0edc, 0x0edd, - 0x0f00, 0x0f47, - 0x0f49, 0x0f6a, - 0x0f71, 0x0f8b, - 0x0f90, 0x0f97, - 0x0f99, 0x0fbc, - 0x0fbe, 0x0fcc, - 0x0fcf, 0x0fd1, - 0x1000, 0x1021, - 0x1023, 0x1027, - 0x1029, 0x102a, - 0x102c, 0x1032, - 0x1036, 0x1039, - 0x1040, 0x1059, - 0x10a0, 0x10c5, - 0x10d0, 0x10fc, - 0x1100, 0x1159, - 0x115f, 0x11a2, - 0x11a8, 0x11f9, - 0x1200, 0x1248, - 0x124a, 0x124d, - 0x1250, 0x1256, - 0x1258, 0x1258, - 0x125a, 0x125d, - 0x1260, 0x1288, - 0x128a, 0x128d, - 0x1290, 0x12b0, - 0x12b2, 0x12b5, - 0x12b8, 0x12be, - 0x12c0, 0x12c0, - 0x12c2, 0x12c5, - 0x12c8, 0x12d6, - 0x12d8, 0x1310, - 0x1312, 0x1315, - 0x1318, 0x135a, - 0x135f, 0x137c, - 0x1380, 0x1399, - 0x13a0, 0x13f4, - 0x1401, 0x1676, - 0x1680, 0x169c, - 0x16a0, 0x16f0, - 0x1700, 0x170c, - 0x170e, 0x1714, - 0x1720, 0x1736, - 0x1740, 0x1753, - 0x1760, 0x176c, - 0x176e, 0x1770, - 0x1772, 0x1773, - 0x1780, 0x17dd, - 0x17e0, 0x17e9, - 0x17f0, 0x17f9, - 0x1800, 0x180e, - 0x1810, 0x1819, - 0x1820, 0x1877, - 0x1880, 0x18a9, - 0x1900, 0x191c, - 0x1920, 0x192b, - 0x1930, 0x193b, - 0x1940, 0x1940, - 0x1944, 0x196d, - 0x1970, 0x1974, - 0x1980, 0x19a9, - 0x19b0, 0x19c9, - 0x19d0, 0x19d9, - 0x19de, 0x1a1b, - 0x1a1e, 0x1a1f, - 0x1d00, 0x1dc3, - 0x1e00, 0x1e9b, - 0x1ea0, 0x1ef9, - 0x1f00, 0x1f15, - 0x1f18, 0x1f1d, - 0x1f20, 0x1f45, - 0x1f48, 0x1f4d, - 0x1f50, 0x1f57, - 0x1f59, 0x1f59, - 0x1f5b, 0x1f5b, - 0x1f5d, 0x1f5d, - 0x1f5f, 0x1f7d, - 0x1f80, 0x1fb4, - 0x1fb6, 0x1fc4, - 0x1fc6, 0x1fd3, - 0x1fd6, 0x1fdb, - 0x1fdd, 0x1fef, - 0x1ff2, 0x1ff4, - 0x1ff6, 0x1ffe, - 0x2000, 0x2063, - 0x206a, 0x2071, - 0x2074, 0x208e, - 0x2090, 0x2094, - 0x20a0, 0x20b5, - 0x20d0, 0x20eb, - 0x2100, 0x214c, - 0x2153, 0x2183, - 0x2190, 0x23db, - 0x2400, 0x2426, - 0x2440, 0x244a, - 0x2460, 0x269c, - 0x26a0, 0x26b1, - 0x2701, 0x2704, - 0x2706, 0x2709, - 0x270c, 0x2727, - 0x2729, 0x274b, - 0x274d, 0x274d, - 0x274f, 0x2752, - 0x2756, 0x2756, - 0x2758, 0x275e, - 0x2761, 0x2794, - 0x2798, 0x27af, - 0x27b1, 0x27be, - 0x27c0, 0x27c6, - 0x27d0, 0x27eb, - 0x27f0, 0x2b13, - 0x2c00, 0x2c2e, - 0x2c30, 0x2c5e, - 0x2c80, 0x2cea, - 0x2cf9, 0x2d25, - 0x2d30, 0x2d65, - 0x2d6f, 0x2d6f, - 0x2d80, 0x2d96, - 0x2da0, 0x2da6, - 0x2da8, 0x2dae, - 0x2db0, 0x2db6, - 0x2db8, 0x2dbe, - 0x2dc0, 0x2dc6, - 0x2dc8, 0x2dce, - 0x2dd0, 0x2dd6, - 0x2dd8, 0x2dde, - 0x2e00, 0x2e17, - 0x2e1c, 0x2e1d, - 0x2e80, 0x2e99, - 0x2e9b, 0x2ef3, - 0x2f00, 0x2fd5, - 0x2ff0, 0x2ffb, - 0x3000, 0x303f, - 0x3041, 0x3096, - 0x3099, 0x30ff, - 0x3105, 0x312c, - 0x3131, 0x318e, - 0x3190, 0x31b7, - 0x31c0, 0x31cf, - 0x31f0, 0x321e, - 0x3220, 0x3243, - 0x3250, 0x32fe, - 0x3300, 0x4db5, - 0x4dc0, 0x9fbb, - 0xa000, 0xa48c, - 0xa490, 0xa4c6, - 0xa700, 0xa716, - 0xa800, 0xa82b, - 0xac00, 0xd7a3, - 0xe000, 0xfa2d, - 0xfa30, 0xfa6a, - 0xfa70, 0xfad9, - 0xfb00, 0xfb06, - 0xfb13, 0xfb17, - 0xfb1d, 0xfb36, - 0xfb38, 0xfb3c, - 0xfb3e, 0xfb3e, - 0xfb40, 0xfb41, - 0xfb43, 0xfb44, - 0xfb46, 0xfbb1, - 0xfbd3, 0xfd3f, - 0xfd50, 0xfd8f, - 0xfd92, 0xfdc7, - 0xfdf0, 0xfdfd, - 0xfe00, 0xfe19, - 0xfe20, 0xfe23, - 0xfe30, 0xfe52, - 0xfe54, 0xfe66, - 0xfe68, 0xfe6b, - 0xfe70, 0xfe74, - 0xfe76, 0xfefc, - 0xfeff, 0xfeff, - 0xff01, 0xffbe, - 0xffc2, 0xffc7, - 0xffca, 0xffcf, - 0xffd2, 0xffd7, - 0xffda, 0xffdc, - 0xffe0, 0xffe6, - 0xffe8, 0xffee, - 0xfff9, 0xfffd, - 0x10000, 0x1000b, - 0x1000d, 0x10026, - 0x10028, 0x1003a, - 0x1003c, 0x1003d, - 0x1003f, 0x1004d, - 0x10050, 0x1005d, - 0x10080, 0x100fa, - 0x10100, 0x10102, - 0x10107, 0x10133, - 0x10137, 0x1018a, - 0x10300, 0x1031e, - 0x10320, 0x10323, - 0x10330, 0x1034a, - 0x10380, 0x1039d, - 0x1039f, 0x103c3, - 0x103c8, 0x103d5, - 0x10400, 0x1049d, - 0x104a0, 0x104a9, - 0x10800, 0x10805, - 0x10808, 0x10808, - 0x1080a, 0x10835, - 0x10837, 0x10838, - 0x1083c, 0x1083c, - 0x1083f, 0x1083f, - 0x10a00, 0x10a03, - 0x10a05, 0x10a06, - 0x10a0c, 0x10a13, - 0x10a15, 0x10a17, - 0x10a19, 0x10a33, - 0x10a38, 0x10a3a, - 0x10a3f, 0x10a47, - 0x10a50, 0x10a58, - 0x1d000, 0x1d0f5, - 0x1d100, 0x1d126, - 0x1d12a, 0x1d1dd, - 0x1d200, 0x1d245, - 0x1d300, 0x1d356, - 0x1d400, 0x1d454, - 0x1d456, 0x1d49c, - 0x1d49e, 0x1d49f, - 0x1d4a2, 0x1d4a2, - 0x1d4a5, 0x1d4a6, - 0x1d4a9, 0x1d4ac, - 0x1d4ae, 0x1d4b9, - 0x1d4bb, 0x1d4bb, - 0x1d4bd, 0x1d4c3, - 0x1d4c5, 0x1d505, - 0x1d507, 0x1d50a, - 0x1d50d, 0x1d514, - 0x1d516, 0x1d51c, - 0x1d51e, 0x1d539, - 0x1d53b, 0x1d53e, - 0x1d540, 0x1d544, - 0x1d546, 0x1d546, - 0x1d54a, 0x1d550, - 0x1d552, 0x1d6a5, - 0x1d6a8, 0x1d7c9, - 0x1d7ce, 0x1d7ff, - 0x20000, 0x2a6d6, - 0x2f800, 0x2fa1d, - 0xe0001, 0xe0001, - 0xe0020, 0xe007f, - 0xe0100, 0xe01ef, - 0xf0000, 0xffffd, - 0x100000, 0x10fffd -}; /* CR_Print */ - -/* 'Punct': [[:Punct:]] */ -static const OnigCodePoint CR_Punct[] = { - 96, - 0x0021, 0x0023, - 0x0025, 0x002a, - 0x002c, 0x002f, - 0x003a, 0x003b, - 0x003f, 0x0040, - 0x005b, 0x005d, - 0x005f, 0x005f, - 0x007b, 0x007b, - 0x007d, 0x007d, - 0x00a1, 0x00a1, - 0x00ab, 0x00ab, - 0x00b7, 0x00b7, - 0x00bb, 0x00bb, - 0x00bf, 0x00bf, - 0x037e, 0x037e, - 0x0387, 0x0387, - 0x055a, 0x055f, - 0x0589, 0x058a, - 0x05be, 0x05be, - 0x05c0, 0x05c0, - 0x05c3, 0x05c3, - 0x05c6, 0x05c6, - 0x05f3, 0x05f4, - 0x060c, 0x060d, - 0x061b, 0x061b, - 0x061e, 0x061f, - 0x066a, 0x066d, - 0x06d4, 0x06d4, - 0x0700, 0x070d, - 0x0964, 0x0965, - 0x0970, 0x0970, - 0x0df4, 0x0df4, - 0x0e4f, 0x0e4f, - 0x0e5a, 0x0e5b, - 0x0f04, 0x0f12, - 0x0f3a, 0x0f3d, - 0x0f85, 0x0f85, - 0x0fd0, 0x0fd1, - 0x104a, 0x104f, - 0x10fb, 0x10fb, - 0x1361, 0x1368, - 0x166d, 0x166e, - 0x169b, 0x169c, - 0x16eb, 0x16ed, - 0x1735, 0x1736, - 0x17d4, 0x17d6, - 0x17d8, 0x17da, - 0x1800, 0x180a, - 0x1944, 0x1945, - 0x19de, 0x19df, - 0x1a1e, 0x1a1f, - 0x2010, 0x2027, - 0x2030, 0x2043, - 0x2045, 0x2051, - 0x2053, 0x205e, - 0x207d, 0x207e, - 0x208d, 0x208e, - 0x2329, 0x232a, - 0x23b4, 0x23b6, - 0x2768, 0x2775, - 0x27c5, 0x27c6, - 0x27e6, 0x27eb, - 0x2983, 0x2998, - 0x29d8, 0x29db, - 0x29fc, 0x29fd, - 0x2cf9, 0x2cfc, - 0x2cfe, 0x2cff, - 0x2e00, 0x2e17, - 0x2e1c, 0x2e1d, - 0x3001, 0x3003, - 0x3008, 0x3011, - 0x3014, 0x301f, - 0x3030, 0x3030, - 0x303d, 0x303d, - 0x30a0, 0x30a0, - 0x30fb, 0x30fb, - 0xfd3e, 0xfd3f, - 0xfe10, 0xfe19, - 0xfe30, 0xfe52, - 0xfe54, 0xfe61, - 0xfe63, 0xfe63, - 0xfe68, 0xfe68, - 0xfe6a, 0xfe6b, - 0xff01, 0xff03, - 0xff05, 0xff0a, - 0xff0c, 0xff0f, - 0xff1a, 0xff1b, - 0xff1f, 0xff20, - 0xff3b, 0xff3d, - 0xff3f, 0xff3f, - 0xff5b, 0xff5b, - 0xff5d, 0xff5d, - 0xff5f, 0xff65, - 0x10100, 0x10101, - 0x1039f, 0x1039f, - 0x10a50, 0x10a58 -}; /* CR_Punct */ - -/* 'Space': [[:Space:]] */ -static const OnigCodePoint CR_Space[] = { - 11, - 0x0009, 0x000d, - 0x0020, 0x0020, - 0x0085, 0x0085, - 0x00a0, 0x00a0, - 0x1680, 0x1680, - 0x180e, 0x180e, - 0x2000, 0x200a, - 0x2028, 0x2029, - 0x202f, 0x202f, - 0x205f, 0x205f, - 0x3000, 0x3000 -}; /* CR_Space */ - -/* 'Upper': [[:Upper:]] */ -static const OnigCodePoint CR_Upper[] = { - 476, - 0x0041, 0x005a, - 0x00c0, 0x00d6, - 0x00d8, 0x00de, - 0x0100, 0x0100, - 0x0102, 0x0102, - 0x0104, 0x0104, - 0x0106, 0x0106, - 0x0108, 0x0108, - 0x010a, 0x010a, - 0x010c, 0x010c, - 0x010e, 0x010e, - 0x0110, 0x0110, - 0x0112, 0x0112, - 0x0114, 0x0114, - 0x0116, 0x0116, - 0x0118, 0x0118, - 0x011a, 0x011a, - 0x011c, 0x011c, - 0x011e, 0x011e, - 0x0120, 0x0120, - 0x0122, 0x0122, - 0x0124, 0x0124, - 0x0126, 0x0126, - 0x0128, 0x0128, - 0x012a, 0x012a, - 0x012c, 0x012c, - 0x012e, 0x012e, - 0x0130, 0x0130, - 0x0132, 0x0132, - 0x0134, 0x0134, - 0x0136, 0x0136, - 0x0139, 0x0139, - 0x013b, 0x013b, - 0x013d, 0x013d, - 0x013f, 0x013f, - 0x0141, 0x0141, - 0x0143, 0x0143, - 0x0145, 0x0145, - 0x0147, 0x0147, - 0x014a, 0x014a, - 0x014c, 0x014c, - 0x014e, 0x014e, - 0x0150, 0x0150, - 0x0152, 0x0152, - 0x0154, 0x0154, - 0x0156, 0x0156, - 0x0158, 0x0158, - 0x015a, 0x015a, - 0x015c, 0x015c, - 0x015e, 0x015e, - 0x0160, 0x0160, - 0x0162, 0x0162, - 0x0164, 0x0164, - 0x0166, 0x0166, - 0x0168, 0x0168, - 0x016a, 0x016a, - 0x016c, 0x016c, - 0x016e, 0x016e, - 0x0170, 0x0170, - 0x0172, 0x0172, - 0x0174, 0x0174, - 0x0176, 0x0176, - 0x0178, 0x0179, - 0x017b, 0x017b, - 0x017d, 0x017d, - 0x0181, 0x0182, - 0x0184, 0x0184, - 0x0186, 0x0187, - 0x0189, 0x018b, - 0x018e, 0x0191, - 0x0193, 0x0194, - 0x0196, 0x0198, - 0x019c, 0x019d, - 0x019f, 0x01a0, - 0x01a2, 0x01a2, - 0x01a4, 0x01a4, - 0x01a6, 0x01a7, - 0x01a9, 0x01a9, - 0x01ac, 0x01ac, - 0x01ae, 0x01af, - 0x01b1, 0x01b3, - 0x01b5, 0x01b5, - 0x01b7, 0x01b8, - 0x01bc, 0x01bc, - 0x01c4, 0x01c4, - 0x01c7, 0x01c7, - 0x01ca, 0x01ca, - 0x01cd, 0x01cd, - 0x01cf, 0x01cf, - 0x01d1, 0x01d1, - 0x01d3, 0x01d3, - 0x01d5, 0x01d5, - 0x01d7, 0x01d7, - 0x01d9, 0x01d9, - 0x01db, 0x01db, - 0x01de, 0x01de, - 0x01e0, 0x01e0, - 0x01e2, 0x01e2, - 0x01e4, 0x01e4, - 0x01e6, 0x01e6, - 0x01e8, 0x01e8, - 0x01ea, 0x01ea, - 0x01ec, 0x01ec, - 0x01ee, 0x01ee, - 0x01f1, 0x01f1, - 0x01f4, 0x01f4, - 0x01f6, 0x01f8, - 0x01fa, 0x01fa, - 0x01fc, 0x01fc, - 0x01fe, 0x01fe, - 0x0200, 0x0200, - 0x0202, 0x0202, - 0x0204, 0x0204, - 0x0206, 0x0206, - 0x0208, 0x0208, - 0x020a, 0x020a, - 0x020c, 0x020c, - 0x020e, 0x020e, - 0x0210, 0x0210, - 0x0212, 0x0212, - 0x0214, 0x0214, - 0x0216, 0x0216, - 0x0218, 0x0218, - 0x021a, 0x021a, - 0x021c, 0x021c, - 0x021e, 0x021e, - 0x0220, 0x0220, - 0x0222, 0x0222, - 0x0224, 0x0224, - 0x0226, 0x0226, - 0x0228, 0x0228, - 0x022a, 0x022a, - 0x022c, 0x022c, - 0x022e, 0x022e, - 0x0230, 0x0230, - 0x0232, 0x0232, - 0x023a, 0x023b, - 0x023d, 0x023e, - 0x0241, 0x0241, - 0x0386, 0x0386, - 0x0388, 0x038a, - 0x038c, 0x038c, - 0x038e, 0x038f, - 0x0391, 0x03a1, - 0x03a3, 0x03ab, - 0x03d2, 0x03d4, - 0x03d8, 0x03d8, - 0x03da, 0x03da, - 0x03dc, 0x03dc, - 0x03de, 0x03de, - 0x03e0, 0x03e0, - 0x03e2, 0x03e2, - 0x03e4, 0x03e4, - 0x03e6, 0x03e6, - 0x03e8, 0x03e8, - 0x03ea, 0x03ea, - 0x03ec, 0x03ec, - 0x03ee, 0x03ee, - 0x03f4, 0x03f4, - 0x03f7, 0x03f7, - 0x03f9, 0x03fa, - 0x03fd, 0x042f, - 0x0460, 0x0460, - 0x0462, 0x0462, - 0x0464, 0x0464, - 0x0466, 0x0466, - 0x0468, 0x0468, - 0x046a, 0x046a, - 0x046c, 0x046c, - 0x046e, 0x046e, - 0x0470, 0x0470, - 0x0472, 0x0472, - 0x0474, 0x0474, - 0x0476, 0x0476, - 0x0478, 0x0478, - 0x047a, 0x047a, - 0x047c, 0x047c, - 0x047e, 0x047e, - 0x0480, 0x0480, - 0x048a, 0x048a, - 0x048c, 0x048c, - 0x048e, 0x048e, - 0x0490, 0x0490, - 0x0492, 0x0492, - 0x0494, 0x0494, - 0x0496, 0x0496, - 0x0498, 0x0498, - 0x049a, 0x049a, - 0x049c, 0x049c, - 0x049e, 0x049e, - 0x04a0, 0x04a0, - 0x04a2, 0x04a2, - 0x04a4, 0x04a4, - 0x04a6, 0x04a6, - 0x04a8, 0x04a8, - 0x04aa, 0x04aa, - 0x04ac, 0x04ac, - 0x04ae, 0x04ae, - 0x04b0, 0x04b0, - 0x04b2, 0x04b2, - 0x04b4, 0x04b4, - 0x04b6, 0x04b6, - 0x04b8, 0x04b8, - 0x04ba, 0x04ba, - 0x04bc, 0x04bc, - 0x04be, 0x04be, - 0x04c0, 0x04c1, - 0x04c3, 0x04c3, - 0x04c5, 0x04c5, - 0x04c7, 0x04c7, - 0x04c9, 0x04c9, - 0x04cb, 0x04cb, - 0x04cd, 0x04cd, - 0x04d0, 0x04d0, - 0x04d2, 0x04d2, - 0x04d4, 0x04d4, - 0x04d6, 0x04d6, - 0x04d8, 0x04d8, - 0x04da, 0x04da, - 0x04dc, 0x04dc, - 0x04de, 0x04de, - 0x04e0, 0x04e0, - 0x04e2, 0x04e2, - 0x04e4, 0x04e4, - 0x04e6, 0x04e6, - 0x04e8, 0x04e8, - 0x04ea, 0x04ea, - 0x04ec, 0x04ec, - 0x04ee, 0x04ee, - 0x04f0, 0x04f0, - 0x04f2, 0x04f2, - 0x04f4, 0x04f4, - 0x04f6, 0x04f6, - 0x04f8, 0x04f8, - 0x0500, 0x0500, - 0x0502, 0x0502, - 0x0504, 0x0504, - 0x0506, 0x0506, - 0x0508, 0x0508, - 0x050a, 0x050a, - 0x050c, 0x050c, - 0x050e, 0x050e, - 0x0531, 0x0556, - 0x10a0, 0x10c5, - 0x1e00, 0x1e00, - 0x1e02, 0x1e02, - 0x1e04, 0x1e04, - 0x1e06, 0x1e06, - 0x1e08, 0x1e08, - 0x1e0a, 0x1e0a, - 0x1e0c, 0x1e0c, - 0x1e0e, 0x1e0e, - 0x1e10, 0x1e10, - 0x1e12, 0x1e12, - 0x1e14, 0x1e14, - 0x1e16, 0x1e16, - 0x1e18, 0x1e18, - 0x1e1a, 0x1e1a, - 0x1e1c, 0x1e1c, - 0x1e1e, 0x1e1e, - 0x1e20, 0x1e20, - 0x1e22, 0x1e22, - 0x1e24, 0x1e24, - 0x1e26, 0x1e26, - 0x1e28, 0x1e28, - 0x1e2a, 0x1e2a, - 0x1e2c, 0x1e2c, - 0x1e2e, 0x1e2e, - 0x1e30, 0x1e30, - 0x1e32, 0x1e32, - 0x1e34, 0x1e34, - 0x1e36, 0x1e36, - 0x1e38, 0x1e38, - 0x1e3a, 0x1e3a, - 0x1e3c, 0x1e3c, - 0x1e3e, 0x1e3e, - 0x1e40, 0x1e40, - 0x1e42, 0x1e42, - 0x1e44, 0x1e44, - 0x1e46, 0x1e46, - 0x1e48, 0x1e48, - 0x1e4a, 0x1e4a, - 0x1e4c, 0x1e4c, - 0x1e4e, 0x1e4e, - 0x1e50, 0x1e50, - 0x1e52, 0x1e52, - 0x1e54, 0x1e54, - 0x1e56, 0x1e56, - 0x1e58, 0x1e58, - 0x1e5a, 0x1e5a, - 0x1e5c, 0x1e5c, - 0x1e5e, 0x1e5e, - 0x1e60, 0x1e60, - 0x1e62, 0x1e62, - 0x1e64, 0x1e64, - 0x1e66, 0x1e66, - 0x1e68, 0x1e68, - 0x1e6a, 0x1e6a, - 0x1e6c, 0x1e6c, - 0x1e6e, 0x1e6e, - 0x1e70, 0x1e70, - 0x1e72, 0x1e72, - 0x1e74, 0x1e74, - 0x1e76, 0x1e76, - 0x1e78, 0x1e78, - 0x1e7a, 0x1e7a, - 0x1e7c, 0x1e7c, - 0x1e7e, 0x1e7e, - 0x1e80, 0x1e80, - 0x1e82, 0x1e82, - 0x1e84, 0x1e84, - 0x1e86, 0x1e86, - 0x1e88, 0x1e88, - 0x1e8a, 0x1e8a, - 0x1e8c, 0x1e8c, - 0x1e8e, 0x1e8e, - 0x1e90, 0x1e90, - 0x1e92, 0x1e92, - 0x1e94, 0x1e94, - 0x1ea0, 0x1ea0, - 0x1ea2, 0x1ea2, - 0x1ea4, 0x1ea4, - 0x1ea6, 0x1ea6, - 0x1ea8, 0x1ea8, - 0x1eaa, 0x1eaa, - 0x1eac, 0x1eac, - 0x1eae, 0x1eae, - 0x1eb0, 0x1eb0, - 0x1eb2, 0x1eb2, - 0x1eb4, 0x1eb4, - 0x1eb6, 0x1eb6, - 0x1eb8, 0x1eb8, - 0x1eba, 0x1eba, - 0x1ebc, 0x1ebc, - 0x1ebe, 0x1ebe, - 0x1ec0, 0x1ec0, - 0x1ec2, 0x1ec2, - 0x1ec4, 0x1ec4, - 0x1ec6, 0x1ec6, - 0x1ec8, 0x1ec8, - 0x1eca, 0x1eca, - 0x1ecc, 0x1ecc, - 0x1ece, 0x1ece, - 0x1ed0, 0x1ed0, - 0x1ed2, 0x1ed2, - 0x1ed4, 0x1ed4, - 0x1ed6, 0x1ed6, - 0x1ed8, 0x1ed8, - 0x1eda, 0x1eda, - 0x1edc, 0x1edc, - 0x1ede, 0x1ede, - 0x1ee0, 0x1ee0, - 0x1ee2, 0x1ee2, - 0x1ee4, 0x1ee4, - 0x1ee6, 0x1ee6, - 0x1ee8, 0x1ee8, - 0x1eea, 0x1eea, - 0x1eec, 0x1eec, - 0x1eee, 0x1eee, - 0x1ef0, 0x1ef0, - 0x1ef2, 0x1ef2, - 0x1ef4, 0x1ef4, - 0x1ef6, 0x1ef6, - 0x1ef8, 0x1ef8, - 0x1f08, 0x1f0f, - 0x1f18, 0x1f1d, - 0x1f28, 0x1f2f, - 0x1f38, 0x1f3f, - 0x1f48, 0x1f4d, - 0x1f59, 0x1f59, - 0x1f5b, 0x1f5b, - 0x1f5d, 0x1f5d, - 0x1f5f, 0x1f5f, - 0x1f68, 0x1f6f, - 0x1fb8, 0x1fbb, - 0x1fc8, 0x1fcb, - 0x1fd8, 0x1fdb, - 0x1fe8, 0x1fec, - 0x1ff8, 0x1ffb, - 0x2102, 0x2102, - 0x2107, 0x2107, - 0x210b, 0x210d, - 0x2110, 0x2112, - 0x2115, 0x2115, - 0x2119, 0x211d, - 0x2124, 0x2124, - 0x2126, 0x2126, - 0x2128, 0x2128, - 0x212a, 0x212d, - 0x2130, 0x2131, - 0x2133, 0x2133, - 0x213e, 0x213f, - 0x2145, 0x2145, - 0x2c00, 0x2c2e, - 0x2c80, 0x2c80, - 0x2c82, 0x2c82, - 0x2c84, 0x2c84, - 0x2c86, 0x2c86, - 0x2c88, 0x2c88, - 0x2c8a, 0x2c8a, - 0x2c8c, 0x2c8c, - 0x2c8e, 0x2c8e, - 0x2c90, 0x2c90, - 0x2c92, 0x2c92, - 0x2c94, 0x2c94, - 0x2c96, 0x2c96, - 0x2c98, 0x2c98, - 0x2c9a, 0x2c9a, - 0x2c9c, 0x2c9c, - 0x2c9e, 0x2c9e, - 0x2ca0, 0x2ca0, - 0x2ca2, 0x2ca2, - 0x2ca4, 0x2ca4, - 0x2ca6, 0x2ca6, - 0x2ca8, 0x2ca8, - 0x2caa, 0x2caa, - 0x2cac, 0x2cac, - 0x2cae, 0x2cae, - 0x2cb0, 0x2cb0, - 0x2cb2, 0x2cb2, - 0x2cb4, 0x2cb4, - 0x2cb6, 0x2cb6, - 0x2cb8, 0x2cb8, - 0x2cba, 0x2cba, - 0x2cbc, 0x2cbc, - 0x2cbe, 0x2cbe, - 0x2cc0, 0x2cc0, - 0x2cc2, 0x2cc2, - 0x2cc4, 0x2cc4, - 0x2cc6, 0x2cc6, - 0x2cc8, 0x2cc8, - 0x2cca, 0x2cca, - 0x2ccc, 0x2ccc, - 0x2cce, 0x2cce, - 0x2cd0, 0x2cd0, - 0x2cd2, 0x2cd2, - 0x2cd4, 0x2cd4, - 0x2cd6, 0x2cd6, - 0x2cd8, 0x2cd8, - 0x2cda, 0x2cda, - 0x2cdc, 0x2cdc, - 0x2cde, 0x2cde, - 0x2ce0, 0x2ce0, - 0x2ce2, 0x2ce2, - 0xff21, 0xff3a, - 0x10400, 0x10427, - 0x1d400, 0x1d419, - 0x1d434, 0x1d44d, - 0x1d468, 0x1d481, - 0x1d49c, 0x1d49c, - 0x1d49e, 0x1d49f, - 0x1d4a2, 0x1d4a2, - 0x1d4a5, 0x1d4a6, - 0x1d4a9, 0x1d4ac, - 0x1d4ae, 0x1d4b5, - 0x1d4d0, 0x1d4e9, - 0x1d504, 0x1d505, - 0x1d507, 0x1d50a, - 0x1d50d, 0x1d514, - 0x1d516, 0x1d51c, - 0x1d538, 0x1d539, - 0x1d53b, 0x1d53e, - 0x1d540, 0x1d544, - 0x1d546, 0x1d546, - 0x1d54a, 0x1d550, - 0x1d56c, 0x1d585, - 0x1d5a0, 0x1d5b9, - 0x1d5d4, 0x1d5ed, - 0x1d608, 0x1d621, - 0x1d63c, 0x1d655, - 0x1d670, 0x1d689, - 0x1d6a8, 0x1d6c0, - 0x1d6e2, 0x1d6fa, - 0x1d71c, 0x1d734, - 0x1d756, 0x1d76e, - 0x1d790, 0x1d7a8 -}; /* CR_Upper */ - -/* 'XDigit': [[:XDigit:]] */ -static const OnigCodePoint CR_XDigit[] = { - 3, - 0x0030, 0x0039, - 0x0041, 0x0046, - 0x0061, 0x0066 -}; /* CR_XDigit */ - -/* 'Word': [[:Word:]] */ -static const OnigCodePoint CR_Word[] = { - 464, - 0x0030, 0x0039, - 0x0041, 0x005a, - 0x005f, 0x005f, - 0x0061, 0x007a, - 0x00aa, 0x00aa, - 0x00b2, 0x00b3, - 0x00b5, 0x00b5, - 0x00b9, 0x00ba, - 0x00bc, 0x00be, - 0x00c0, 0x00d6, - 0x00d8, 0x00f6, - 0x00f8, 0x0241, - 0x0250, 0x02c1, - 0x02c6, 0x02d1, - 0x02e0, 0x02e4, - 0x02ee, 0x02ee, - 0x0300, 0x036f, - 0x037a, 0x037a, - 0x0386, 0x0386, - 0x0388, 0x038a, - 0x038c, 0x038c, - 0x038e, 0x03a1, - 0x03a3, 0x03ce, - 0x03d0, 0x03f5, - 0x03f7, 0x0481, - 0x0483, 0x0486, - 0x0488, 0x04ce, - 0x04d0, 0x04f9, - 0x0500, 0x050f, - 0x0531, 0x0556, - 0x0559, 0x0559, - 0x0561, 0x0587, - 0x0591, 0x05b9, - 0x05bb, 0x05bd, - 0x05bf, 0x05bf, - 0x05c1, 0x05c2, - 0x05c4, 0x05c5, - 0x05c7, 0x05c7, - 0x05d0, 0x05ea, - 0x05f0, 0x05f2, - 0x0610, 0x0615, - 0x0621, 0x063a, - 0x0640, 0x065e, - 0x0660, 0x0669, - 0x066e, 0x06d3, - 0x06d5, 0x06dc, - 0x06de, 0x06e8, - 0x06ea, 0x06fc, - 0x06ff, 0x06ff, - 0x0710, 0x074a, - 0x074d, 0x076d, - 0x0780, 0x07b1, - 0x0901, 0x0939, - 0x093c, 0x094d, - 0x0950, 0x0954, - 0x0958, 0x0963, - 0x0966, 0x096f, - 0x097d, 0x097d, - 0x0981, 0x0983, - 0x0985, 0x098c, - 0x098f, 0x0990, - 0x0993, 0x09a8, - 0x09aa, 0x09b0, - 0x09b2, 0x09b2, - 0x09b6, 0x09b9, - 0x09bc, 0x09c4, - 0x09c7, 0x09c8, - 0x09cb, 0x09ce, - 0x09d7, 0x09d7, - 0x09dc, 0x09dd, - 0x09df, 0x09e3, - 0x09e6, 0x09f1, - 0x09f4, 0x09f9, - 0x0a01, 0x0a03, - 0x0a05, 0x0a0a, - 0x0a0f, 0x0a10, - 0x0a13, 0x0a28, - 0x0a2a, 0x0a30, - 0x0a32, 0x0a33, - 0x0a35, 0x0a36, - 0x0a38, 0x0a39, - 0x0a3c, 0x0a3c, - 0x0a3e, 0x0a42, - 0x0a47, 0x0a48, - 0x0a4b, 0x0a4d, - 0x0a59, 0x0a5c, - 0x0a5e, 0x0a5e, - 0x0a66, 0x0a74, - 0x0a81, 0x0a83, - 0x0a85, 0x0a8d, - 0x0a8f, 0x0a91, - 0x0a93, 0x0aa8, - 0x0aaa, 0x0ab0, - 0x0ab2, 0x0ab3, - 0x0ab5, 0x0ab9, - 0x0abc, 0x0ac5, - 0x0ac7, 0x0ac9, - 0x0acb, 0x0acd, - 0x0ad0, 0x0ad0, - 0x0ae0, 0x0ae3, - 0x0ae6, 0x0aef, - 0x0b01, 0x0b03, - 0x0b05, 0x0b0c, - 0x0b0f, 0x0b10, - 0x0b13, 0x0b28, - 0x0b2a, 0x0b30, - 0x0b32, 0x0b33, - 0x0b35, 0x0b39, - 0x0b3c, 0x0b43, - 0x0b47, 0x0b48, - 0x0b4b, 0x0b4d, - 0x0b56, 0x0b57, - 0x0b5c, 0x0b5d, - 0x0b5f, 0x0b61, - 0x0b66, 0x0b6f, - 0x0b71, 0x0b71, - 0x0b82, 0x0b83, - 0x0b85, 0x0b8a, - 0x0b8e, 0x0b90, - 0x0b92, 0x0b95, - 0x0b99, 0x0b9a, - 0x0b9c, 0x0b9c, - 0x0b9e, 0x0b9f, - 0x0ba3, 0x0ba4, - 0x0ba8, 0x0baa, - 0x0bae, 0x0bb9, - 0x0bbe, 0x0bc2, - 0x0bc6, 0x0bc8, - 0x0bca, 0x0bcd, - 0x0bd7, 0x0bd7, - 0x0be6, 0x0bf2, - 0x0c01, 0x0c03, - 0x0c05, 0x0c0c, - 0x0c0e, 0x0c10, - 0x0c12, 0x0c28, - 0x0c2a, 0x0c33, - 0x0c35, 0x0c39, - 0x0c3e, 0x0c44, - 0x0c46, 0x0c48, - 0x0c4a, 0x0c4d, - 0x0c55, 0x0c56, - 0x0c60, 0x0c61, - 0x0c66, 0x0c6f, - 0x0c82, 0x0c83, - 0x0c85, 0x0c8c, - 0x0c8e, 0x0c90, - 0x0c92, 0x0ca8, - 0x0caa, 0x0cb3, - 0x0cb5, 0x0cb9, - 0x0cbc, 0x0cc4, - 0x0cc6, 0x0cc8, - 0x0cca, 0x0ccd, - 0x0cd5, 0x0cd6, - 0x0cde, 0x0cde, - 0x0ce0, 0x0ce1, - 0x0ce6, 0x0cef, - 0x0d02, 0x0d03, - 0x0d05, 0x0d0c, - 0x0d0e, 0x0d10, - 0x0d12, 0x0d28, - 0x0d2a, 0x0d39, - 0x0d3e, 0x0d43, - 0x0d46, 0x0d48, - 0x0d4a, 0x0d4d, - 0x0d57, 0x0d57, - 0x0d60, 0x0d61, - 0x0d66, 0x0d6f, - 0x0d82, 0x0d83, - 0x0d85, 0x0d96, - 0x0d9a, 0x0db1, - 0x0db3, 0x0dbb, - 0x0dbd, 0x0dbd, - 0x0dc0, 0x0dc6, - 0x0dca, 0x0dca, - 0x0dcf, 0x0dd4, - 0x0dd6, 0x0dd6, - 0x0dd8, 0x0ddf, - 0x0df2, 0x0df3, - 0x0e01, 0x0e3a, - 0x0e40, 0x0e4e, - 0x0e50, 0x0e59, - 0x0e81, 0x0e82, - 0x0e84, 0x0e84, - 0x0e87, 0x0e88, - 0x0e8a, 0x0e8a, - 0x0e8d, 0x0e8d, - 0x0e94, 0x0e97, - 0x0e99, 0x0e9f, - 0x0ea1, 0x0ea3, - 0x0ea5, 0x0ea5, - 0x0ea7, 0x0ea7, - 0x0eaa, 0x0eab, - 0x0ead, 0x0eb9, - 0x0ebb, 0x0ebd, - 0x0ec0, 0x0ec4, - 0x0ec6, 0x0ec6, - 0x0ec8, 0x0ecd, - 0x0ed0, 0x0ed9, - 0x0edc, 0x0edd, - 0x0f00, 0x0f00, - 0x0f18, 0x0f19, - 0x0f20, 0x0f33, - 0x0f35, 0x0f35, - 0x0f37, 0x0f37, - 0x0f39, 0x0f39, - 0x0f3e, 0x0f47, - 0x0f49, 0x0f6a, - 0x0f71, 0x0f84, - 0x0f86, 0x0f8b, - 0x0f90, 0x0f97, - 0x0f99, 0x0fbc, - 0x0fc6, 0x0fc6, - 0x1000, 0x1021, - 0x1023, 0x1027, - 0x1029, 0x102a, - 0x102c, 0x1032, - 0x1036, 0x1039, - 0x1040, 0x1049, - 0x1050, 0x1059, - 0x10a0, 0x10c5, - 0x10d0, 0x10fa, - 0x10fc, 0x10fc, - 0x1100, 0x1159, - 0x115f, 0x11a2, - 0x11a8, 0x11f9, - 0x1200, 0x1248, - 0x124a, 0x124d, - 0x1250, 0x1256, - 0x1258, 0x1258, - 0x125a, 0x125d, - 0x1260, 0x1288, - 0x128a, 0x128d, - 0x1290, 0x12b0, - 0x12b2, 0x12b5, - 0x12b8, 0x12be, - 0x12c0, 0x12c0, - 0x12c2, 0x12c5, - 0x12c8, 0x12d6, - 0x12d8, 0x1310, - 0x1312, 0x1315, - 0x1318, 0x135a, - 0x135f, 0x135f, - 0x1369, 0x137c, - 0x1380, 0x138f, - 0x13a0, 0x13f4, - 0x1401, 0x166c, - 0x166f, 0x1676, - 0x1681, 0x169a, - 0x16a0, 0x16ea, - 0x16ee, 0x16f0, - 0x1700, 0x170c, - 0x170e, 0x1714, - 0x1720, 0x1734, - 0x1740, 0x1753, - 0x1760, 0x176c, - 0x176e, 0x1770, - 0x1772, 0x1773, - 0x1780, 0x17b3, - 0x17b6, 0x17d3, - 0x17d7, 0x17d7, - 0x17dc, 0x17dd, - 0x17e0, 0x17e9, - 0x17f0, 0x17f9, - 0x180b, 0x180d, - 0x1810, 0x1819, - 0x1820, 0x1877, - 0x1880, 0x18a9, - 0x1900, 0x191c, - 0x1920, 0x192b, - 0x1930, 0x193b, - 0x1946, 0x196d, - 0x1970, 0x1974, - 0x1980, 0x19a9, - 0x19b0, 0x19c9, - 0x19d0, 0x19d9, - 0x1a00, 0x1a1b, - 0x1d00, 0x1dc3, - 0x1e00, 0x1e9b, - 0x1ea0, 0x1ef9, - 0x1f00, 0x1f15, - 0x1f18, 0x1f1d, - 0x1f20, 0x1f45, - 0x1f48, 0x1f4d, - 0x1f50, 0x1f57, - 0x1f59, 0x1f59, - 0x1f5b, 0x1f5b, - 0x1f5d, 0x1f5d, - 0x1f5f, 0x1f7d, - 0x1f80, 0x1fb4, - 0x1fb6, 0x1fbc, - 0x1fbe, 0x1fbe, - 0x1fc2, 0x1fc4, - 0x1fc6, 0x1fcc, - 0x1fd0, 0x1fd3, - 0x1fd6, 0x1fdb, - 0x1fe0, 0x1fec, - 0x1ff2, 0x1ff4, - 0x1ff6, 0x1ffc, - 0x203f, 0x2040, - 0x2054, 0x2054, - 0x2070, 0x2071, - 0x2074, 0x2079, - 0x207f, 0x2089, - 0x2090, 0x2094, - 0x20d0, 0x20eb, - 0x2102, 0x2102, - 0x2107, 0x2107, - 0x210a, 0x2113, - 0x2115, 0x2115, - 0x2119, 0x211d, - 0x2124, 0x2124, - 0x2126, 0x2126, - 0x2128, 0x2128, - 0x212a, 0x212d, - 0x212f, 0x2131, - 0x2133, 0x2139, - 0x213c, 0x213f, - 0x2145, 0x2149, - 0x2153, 0x2183, - 0x2460, 0x249b, - 0x24ea, 0x24ff, - 0x2776, 0x2793, - 0x2c00, 0x2c2e, - 0x2c30, 0x2c5e, - 0x2c80, 0x2ce4, - 0x2cfd, 0x2cfd, - 0x2d00, 0x2d25, - 0x2d30, 0x2d65, - 0x2d6f, 0x2d6f, - 0x2d80, 0x2d96, - 0x2da0, 0x2da6, - 0x2da8, 0x2dae, - 0x2db0, 0x2db6, - 0x2db8, 0x2dbe, - 0x2dc0, 0x2dc6, - 0x2dc8, 0x2dce, - 0x2dd0, 0x2dd6, - 0x2dd8, 0x2dde, - 0x3005, 0x3007, - 0x3021, 0x302f, - 0x3031, 0x3035, - 0x3038, 0x303c, - 0x3041, 0x3096, - 0x3099, 0x309a, - 0x309d, 0x309f, - 0x30a1, 0x30fa, - 0x30fc, 0x30ff, - 0x3105, 0x312c, - 0x3131, 0x318e, - 0x3192, 0x3195, - 0x31a0, 0x31b7, - 0x31f0, 0x31ff, - 0x3220, 0x3229, - 0x3251, 0x325f, - 0x3280, 0x3289, - 0x32b1, 0x32bf, - 0x3400, 0x4db5, - 0x4e00, 0x9fbb, - 0xa000, 0xa48c, - 0xa800, 0xa827, - 0xac00, 0xd7a3, - 0xf900, 0xfa2d, - 0xfa30, 0xfa6a, - 0xfa70, 0xfad9, - 0xfb00, 0xfb06, - 0xfb13, 0xfb17, - 0xfb1d, 0xfb28, - 0xfb2a, 0xfb36, - 0xfb38, 0xfb3c, - 0xfb3e, 0xfb3e, - 0xfb40, 0xfb41, - 0xfb43, 0xfb44, - 0xfb46, 0xfbb1, - 0xfbd3, 0xfd3d, - 0xfd50, 0xfd8f, - 0xfd92, 0xfdc7, - 0xfdf0, 0xfdfb, - 0xfe00, 0xfe0f, - 0xfe20, 0xfe23, - 0xfe33, 0xfe34, - 0xfe4d, 0xfe4f, - 0xfe70, 0xfe74, - 0xfe76, 0xfefc, - 0xff10, 0xff19, - 0xff21, 0xff3a, - 0xff3f, 0xff3f, - 0xff41, 0xff5a, - 0xff66, 0xffbe, - 0xffc2, 0xffc7, - 0xffca, 0xffcf, - 0xffd2, 0xffd7, - 0xffda, 0xffdc, - 0x10000, 0x1000b, - 0x1000d, 0x10026, - 0x10028, 0x1003a, - 0x1003c, 0x1003d, - 0x1003f, 0x1004d, - 0x10050, 0x1005d, - 0x10080, 0x100fa, - 0x10107, 0x10133, - 0x10140, 0x10178, - 0x1018a, 0x1018a, - 0x10300, 0x1031e, - 0x10320, 0x10323, - 0x10330, 0x1034a, - 0x10380, 0x1039d, - 0x103a0, 0x103c3, - 0x103c8, 0x103cf, - 0x103d1, 0x103d5, - 0x10400, 0x1049d, - 0x104a0, 0x104a9, - 0x10800, 0x10805, - 0x10808, 0x10808, - 0x1080a, 0x10835, - 0x10837, 0x10838, - 0x1083c, 0x1083c, - 0x1083f, 0x1083f, - 0x10a00, 0x10a03, - 0x10a05, 0x10a06, - 0x10a0c, 0x10a13, - 0x10a15, 0x10a17, - 0x10a19, 0x10a33, - 0x10a38, 0x10a3a, - 0x10a3f, 0x10a47, - 0x1d165, 0x1d169, - 0x1d16d, 0x1d172, - 0x1d17b, 0x1d182, - 0x1d185, 0x1d18b, - 0x1d1aa, 0x1d1ad, - 0x1d242, 0x1d244, - 0x1d400, 0x1d454, - 0x1d456, 0x1d49c, - 0x1d49e, 0x1d49f, - 0x1d4a2, 0x1d4a2, - 0x1d4a5, 0x1d4a6, - 0x1d4a9, 0x1d4ac, - 0x1d4ae, 0x1d4b9, - 0x1d4bb, 0x1d4bb, - 0x1d4bd, 0x1d4c3, - 0x1d4c5, 0x1d505, - 0x1d507, 0x1d50a, - 0x1d50d, 0x1d514, - 0x1d516, 0x1d51c, - 0x1d51e, 0x1d539, - 0x1d53b, 0x1d53e, - 0x1d540, 0x1d544, - 0x1d546, 0x1d546, - 0x1d54a, 0x1d550, - 0x1d552, 0x1d6a5, - 0x1d6a8, 0x1d6c0, - 0x1d6c2, 0x1d6da, - 0x1d6dc, 0x1d6fa, - 0x1d6fc, 0x1d714, - 0x1d716, 0x1d734, - 0x1d736, 0x1d74e, - 0x1d750, 0x1d76e, - 0x1d770, 0x1d788, - 0x1d78a, 0x1d7a8, - 0x1d7aa, 0x1d7c2, - 0x1d7c4, 0x1d7c9, - 0x1d7ce, 0x1d7ff, - 0x20000, 0x2a6d6, - 0x2f800, 0x2fa1d, - 0xe0100, 0xe01ef -}; /* CR_Word */ - -/* 'Alnum': [[:Alnum:]] */ -static const OnigCodePoint CR_Alnum[] = { - 436, - 0x0030, 0x0039, - 0x0041, 0x005a, - 0x0061, 0x007a, - 0x00aa, 0x00aa, - 0x00b5, 0x00b5, - 0x00ba, 0x00ba, - 0x00c0, 0x00d6, - 0x00d8, 0x00f6, - 0x00f8, 0x0241, - 0x0250, 0x02c1, - 0x02c6, 0x02d1, - 0x02e0, 0x02e4, - 0x02ee, 0x02ee, - 0x0300, 0x036f, - 0x037a, 0x037a, - 0x0386, 0x0386, - 0x0388, 0x038a, - 0x038c, 0x038c, - 0x038e, 0x03a1, - 0x03a3, 0x03ce, - 0x03d0, 0x03f5, - 0x03f7, 0x0481, - 0x0483, 0x0486, - 0x0488, 0x04ce, - 0x04d0, 0x04f9, - 0x0500, 0x050f, - 0x0531, 0x0556, - 0x0559, 0x0559, - 0x0561, 0x0587, - 0x0591, 0x05b9, - 0x05bb, 0x05bd, - 0x05bf, 0x05bf, - 0x05c1, 0x05c2, - 0x05c4, 0x05c5, - 0x05c7, 0x05c7, - 0x05d0, 0x05ea, - 0x05f0, 0x05f2, - 0x0610, 0x0615, - 0x0621, 0x063a, - 0x0640, 0x065e, - 0x0660, 0x0669, - 0x066e, 0x06d3, - 0x06d5, 0x06dc, - 0x06de, 0x06e8, - 0x06ea, 0x06fc, - 0x06ff, 0x06ff, - 0x0710, 0x074a, - 0x074d, 0x076d, - 0x0780, 0x07b1, - 0x0901, 0x0939, - 0x093c, 0x094d, - 0x0950, 0x0954, - 0x0958, 0x0963, - 0x0966, 0x096f, - 0x097d, 0x097d, - 0x0981, 0x0983, - 0x0985, 0x098c, - 0x098f, 0x0990, - 0x0993, 0x09a8, - 0x09aa, 0x09b0, - 0x09b2, 0x09b2, - 0x09b6, 0x09b9, - 0x09bc, 0x09c4, - 0x09c7, 0x09c8, - 0x09cb, 0x09ce, - 0x09d7, 0x09d7, - 0x09dc, 0x09dd, - 0x09df, 0x09e3, - 0x09e6, 0x09f1, - 0x0a01, 0x0a03, - 0x0a05, 0x0a0a, - 0x0a0f, 0x0a10, - 0x0a13, 0x0a28, - 0x0a2a, 0x0a30, - 0x0a32, 0x0a33, - 0x0a35, 0x0a36, - 0x0a38, 0x0a39, - 0x0a3c, 0x0a3c, - 0x0a3e, 0x0a42, - 0x0a47, 0x0a48, - 0x0a4b, 0x0a4d, - 0x0a59, 0x0a5c, - 0x0a5e, 0x0a5e, - 0x0a66, 0x0a74, - 0x0a81, 0x0a83, - 0x0a85, 0x0a8d, - 0x0a8f, 0x0a91, - 0x0a93, 0x0aa8, - 0x0aaa, 0x0ab0, - 0x0ab2, 0x0ab3, - 0x0ab5, 0x0ab9, - 0x0abc, 0x0ac5, - 0x0ac7, 0x0ac9, - 0x0acb, 0x0acd, - 0x0ad0, 0x0ad0, - 0x0ae0, 0x0ae3, - 0x0ae6, 0x0aef, - 0x0b01, 0x0b03, - 0x0b05, 0x0b0c, - 0x0b0f, 0x0b10, - 0x0b13, 0x0b28, - 0x0b2a, 0x0b30, - 0x0b32, 0x0b33, - 0x0b35, 0x0b39, - 0x0b3c, 0x0b43, - 0x0b47, 0x0b48, - 0x0b4b, 0x0b4d, - 0x0b56, 0x0b57, - 0x0b5c, 0x0b5d, - 0x0b5f, 0x0b61, - 0x0b66, 0x0b6f, - 0x0b71, 0x0b71, - 0x0b82, 0x0b83, - 0x0b85, 0x0b8a, - 0x0b8e, 0x0b90, - 0x0b92, 0x0b95, - 0x0b99, 0x0b9a, - 0x0b9c, 0x0b9c, - 0x0b9e, 0x0b9f, - 0x0ba3, 0x0ba4, - 0x0ba8, 0x0baa, - 0x0bae, 0x0bb9, - 0x0bbe, 0x0bc2, - 0x0bc6, 0x0bc8, - 0x0bca, 0x0bcd, - 0x0bd7, 0x0bd7, - 0x0be6, 0x0bef, - 0x0c01, 0x0c03, - 0x0c05, 0x0c0c, - 0x0c0e, 0x0c10, - 0x0c12, 0x0c28, - 0x0c2a, 0x0c33, - 0x0c35, 0x0c39, - 0x0c3e, 0x0c44, - 0x0c46, 0x0c48, - 0x0c4a, 0x0c4d, - 0x0c55, 0x0c56, - 0x0c60, 0x0c61, - 0x0c66, 0x0c6f, - 0x0c82, 0x0c83, - 0x0c85, 0x0c8c, - 0x0c8e, 0x0c90, - 0x0c92, 0x0ca8, - 0x0caa, 0x0cb3, - 0x0cb5, 0x0cb9, - 0x0cbc, 0x0cc4, - 0x0cc6, 0x0cc8, - 0x0cca, 0x0ccd, - 0x0cd5, 0x0cd6, - 0x0cde, 0x0cde, - 0x0ce0, 0x0ce1, - 0x0ce6, 0x0cef, - 0x0d02, 0x0d03, - 0x0d05, 0x0d0c, - 0x0d0e, 0x0d10, - 0x0d12, 0x0d28, - 0x0d2a, 0x0d39, - 0x0d3e, 0x0d43, - 0x0d46, 0x0d48, - 0x0d4a, 0x0d4d, - 0x0d57, 0x0d57, - 0x0d60, 0x0d61, - 0x0d66, 0x0d6f, - 0x0d82, 0x0d83, - 0x0d85, 0x0d96, - 0x0d9a, 0x0db1, - 0x0db3, 0x0dbb, - 0x0dbd, 0x0dbd, - 0x0dc0, 0x0dc6, - 0x0dca, 0x0dca, - 0x0dcf, 0x0dd4, - 0x0dd6, 0x0dd6, - 0x0dd8, 0x0ddf, - 0x0df2, 0x0df3, - 0x0e01, 0x0e3a, - 0x0e40, 0x0e4e, - 0x0e50, 0x0e59, - 0x0e81, 0x0e82, - 0x0e84, 0x0e84, - 0x0e87, 0x0e88, - 0x0e8a, 0x0e8a, - 0x0e8d, 0x0e8d, - 0x0e94, 0x0e97, - 0x0e99, 0x0e9f, - 0x0ea1, 0x0ea3, - 0x0ea5, 0x0ea5, - 0x0ea7, 0x0ea7, - 0x0eaa, 0x0eab, - 0x0ead, 0x0eb9, - 0x0ebb, 0x0ebd, - 0x0ec0, 0x0ec4, - 0x0ec6, 0x0ec6, - 0x0ec8, 0x0ecd, - 0x0ed0, 0x0ed9, - 0x0edc, 0x0edd, - 0x0f00, 0x0f00, - 0x0f18, 0x0f19, - 0x0f20, 0x0f29, - 0x0f35, 0x0f35, - 0x0f37, 0x0f37, - 0x0f39, 0x0f39, - 0x0f3e, 0x0f47, - 0x0f49, 0x0f6a, - 0x0f71, 0x0f84, - 0x0f86, 0x0f8b, - 0x0f90, 0x0f97, - 0x0f99, 0x0fbc, - 0x0fc6, 0x0fc6, - 0x1000, 0x1021, - 0x1023, 0x1027, - 0x1029, 0x102a, - 0x102c, 0x1032, - 0x1036, 0x1039, - 0x1040, 0x1049, - 0x1050, 0x1059, - 0x10a0, 0x10c5, - 0x10d0, 0x10fa, - 0x10fc, 0x10fc, - 0x1100, 0x1159, - 0x115f, 0x11a2, - 0x11a8, 0x11f9, - 0x1200, 0x1248, - 0x124a, 0x124d, - 0x1250, 0x1256, - 0x1258, 0x1258, - 0x125a, 0x125d, - 0x1260, 0x1288, - 0x128a, 0x128d, - 0x1290, 0x12b0, - 0x12b2, 0x12b5, - 0x12b8, 0x12be, - 0x12c0, 0x12c0, - 0x12c2, 0x12c5, - 0x12c8, 0x12d6, - 0x12d8, 0x1310, - 0x1312, 0x1315, - 0x1318, 0x135a, - 0x135f, 0x135f, - 0x1380, 0x138f, - 0x13a0, 0x13f4, - 0x1401, 0x166c, - 0x166f, 0x1676, - 0x1681, 0x169a, - 0x16a0, 0x16ea, - 0x1700, 0x170c, - 0x170e, 0x1714, - 0x1720, 0x1734, - 0x1740, 0x1753, - 0x1760, 0x176c, - 0x176e, 0x1770, - 0x1772, 0x1773, - 0x1780, 0x17b3, - 0x17b6, 0x17d3, - 0x17d7, 0x17d7, - 0x17dc, 0x17dd, - 0x17e0, 0x17e9, - 0x180b, 0x180d, - 0x1810, 0x1819, - 0x1820, 0x1877, - 0x1880, 0x18a9, - 0x1900, 0x191c, - 0x1920, 0x192b, - 0x1930, 0x193b, - 0x1946, 0x196d, - 0x1970, 0x1974, - 0x1980, 0x19a9, - 0x19b0, 0x19c9, - 0x19d0, 0x19d9, - 0x1a00, 0x1a1b, - 0x1d00, 0x1dc3, - 0x1e00, 0x1e9b, - 0x1ea0, 0x1ef9, - 0x1f00, 0x1f15, - 0x1f18, 0x1f1d, - 0x1f20, 0x1f45, - 0x1f48, 0x1f4d, - 0x1f50, 0x1f57, - 0x1f59, 0x1f59, - 0x1f5b, 0x1f5b, - 0x1f5d, 0x1f5d, - 0x1f5f, 0x1f7d, - 0x1f80, 0x1fb4, - 0x1fb6, 0x1fbc, - 0x1fbe, 0x1fbe, - 0x1fc2, 0x1fc4, - 0x1fc6, 0x1fcc, - 0x1fd0, 0x1fd3, - 0x1fd6, 0x1fdb, - 0x1fe0, 0x1fec, - 0x1ff2, 0x1ff4, - 0x1ff6, 0x1ffc, - 0x2071, 0x2071, - 0x207f, 0x207f, - 0x2090, 0x2094, - 0x20d0, 0x20eb, - 0x2102, 0x2102, - 0x2107, 0x2107, - 0x210a, 0x2113, - 0x2115, 0x2115, - 0x2119, 0x211d, - 0x2124, 0x2124, - 0x2126, 0x2126, - 0x2128, 0x2128, - 0x212a, 0x212d, - 0x212f, 0x2131, - 0x2133, 0x2139, - 0x213c, 0x213f, - 0x2145, 0x2149, - 0x2c00, 0x2c2e, - 0x2c30, 0x2c5e, - 0x2c80, 0x2ce4, - 0x2d00, 0x2d25, - 0x2d30, 0x2d65, - 0x2d6f, 0x2d6f, - 0x2d80, 0x2d96, - 0x2da0, 0x2da6, - 0x2da8, 0x2dae, - 0x2db0, 0x2db6, - 0x2db8, 0x2dbe, - 0x2dc0, 0x2dc6, - 0x2dc8, 0x2dce, - 0x2dd0, 0x2dd6, - 0x2dd8, 0x2dde, - 0x3005, 0x3006, - 0x302a, 0x302f, - 0x3031, 0x3035, - 0x303b, 0x303c, - 0x3041, 0x3096, - 0x3099, 0x309a, - 0x309d, 0x309f, - 0x30a1, 0x30fa, - 0x30fc, 0x30ff, - 0x3105, 0x312c, - 0x3131, 0x318e, - 0x31a0, 0x31b7, - 0x31f0, 0x31ff, - 0x3400, 0x4db5, - 0x4e00, 0x9fbb, - 0xa000, 0xa48c, - 0xa800, 0xa827, - 0xac00, 0xd7a3, - 0xf900, 0xfa2d, - 0xfa30, 0xfa6a, - 0xfa70, 0xfad9, - 0xfb00, 0xfb06, - 0xfb13, 0xfb17, - 0xfb1d, 0xfb28, - 0xfb2a, 0xfb36, - 0xfb38, 0xfb3c, - 0xfb3e, 0xfb3e, - 0xfb40, 0xfb41, - 0xfb43, 0xfb44, - 0xfb46, 0xfbb1, - 0xfbd3, 0xfd3d, - 0xfd50, 0xfd8f, - 0xfd92, 0xfdc7, - 0xfdf0, 0xfdfb, - 0xfe00, 0xfe0f, - 0xfe20, 0xfe23, - 0xfe70, 0xfe74, - 0xfe76, 0xfefc, - 0xff10, 0xff19, - 0xff21, 0xff3a, - 0xff41, 0xff5a, - 0xff66, 0xffbe, - 0xffc2, 0xffc7, - 0xffca, 0xffcf, - 0xffd2, 0xffd7, - 0xffda, 0xffdc, - 0x10000, 0x1000b, - 0x1000d, 0x10026, - 0x10028, 0x1003a, - 0x1003c, 0x1003d, - 0x1003f, 0x1004d, - 0x10050, 0x1005d, - 0x10080, 0x100fa, - 0x10300, 0x1031e, - 0x10330, 0x10349, - 0x10380, 0x1039d, - 0x103a0, 0x103c3, - 0x103c8, 0x103cf, - 0x10400, 0x1049d, - 0x104a0, 0x104a9, - 0x10800, 0x10805, - 0x10808, 0x10808, - 0x1080a, 0x10835, - 0x10837, 0x10838, - 0x1083c, 0x1083c, - 0x1083f, 0x1083f, - 0x10a00, 0x10a03, - 0x10a05, 0x10a06, - 0x10a0c, 0x10a13, - 0x10a15, 0x10a17, - 0x10a19, 0x10a33, - 0x10a38, 0x10a3a, - 0x10a3f, 0x10a3f, - 0x1d165, 0x1d169, - 0x1d16d, 0x1d172, - 0x1d17b, 0x1d182, - 0x1d185, 0x1d18b, - 0x1d1aa, 0x1d1ad, - 0x1d242, 0x1d244, - 0x1d400, 0x1d454, - 0x1d456, 0x1d49c, - 0x1d49e, 0x1d49f, - 0x1d4a2, 0x1d4a2, - 0x1d4a5, 0x1d4a6, - 0x1d4a9, 0x1d4ac, - 0x1d4ae, 0x1d4b9, - 0x1d4bb, 0x1d4bb, - 0x1d4bd, 0x1d4c3, - 0x1d4c5, 0x1d505, - 0x1d507, 0x1d50a, - 0x1d50d, 0x1d514, - 0x1d516, 0x1d51c, - 0x1d51e, 0x1d539, - 0x1d53b, 0x1d53e, - 0x1d540, 0x1d544, - 0x1d546, 0x1d546, - 0x1d54a, 0x1d550, - 0x1d552, 0x1d6a5, - 0x1d6a8, 0x1d6c0, - 0x1d6c2, 0x1d6da, - 0x1d6dc, 0x1d6fa, - 0x1d6fc, 0x1d714, - 0x1d716, 0x1d734, - 0x1d736, 0x1d74e, - 0x1d750, 0x1d76e, - 0x1d770, 0x1d788, - 0x1d78a, 0x1d7a8, - 0x1d7aa, 0x1d7c2, - 0x1d7c4, 0x1d7c9, - 0x1d7ce, 0x1d7ff, - 0x20000, 0x2a6d6, - 0x2f800, 0x2fa1d, - 0xe0100, 0xe01ef -}; /* CR_Alnum */ - -/* 'ASCII': [[:ASCII:]] */ -static const OnigCodePoint CR_ASCII[] = { - 1, - 0x0000, 0x007f -}; /* CR_ASCII */ - -#ifdef USE_UNICODE_PROPERTIES - -/* 'Any': - */ -static const OnigCodePoint CR_Any[] = { - 1, - 0x0000, 0x10ffff -}; /* CR_Any */ - -/* 'Assigned': - */ -static const OnigCodePoint CR_Assigned[] = { - 420, - 0x0000, 0x0241, - 0x0250, 0x036f, - 0x0374, 0x0375, - 0x037a, 0x037a, - 0x037e, 0x037e, - 0x0384, 0x038a, - 0x038c, 0x038c, - 0x038e, 0x03a1, - 0x03a3, 0x03ce, - 0x03d0, 0x0486, - 0x0488, 0x04ce, - 0x04d0, 0x04f9, - 0x0500, 0x050f, - 0x0531, 0x0556, - 0x0559, 0x055f, - 0x0561, 0x0587, - 0x0589, 0x058a, - 0x0591, 0x05b9, - 0x05bb, 0x05c7, - 0x05d0, 0x05ea, - 0x05f0, 0x05f4, - 0x0600, 0x0603, - 0x060b, 0x0615, - 0x061b, 0x061b, - 0x061e, 0x061f, - 0x0621, 0x063a, - 0x0640, 0x065e, - 0x0660, 0x070d, - 0x070f, 0x074a, - 0x074d, 0x076d, - 0x0780, 0x07b1, - 0x0901, 0x0939, - 0x093c, 0x094d, - 0x0950, 0x0954, - 0x0958, 0x0970, - 0x097d, 0x097d, - 0x0981, 0x0983, - 0x0985, 0x098c, - 0x098f, 0x0990, - 0x0993, 0x09a8, - 0x09aa, 0x09b0, - 0x09b2, 0x09b2, - 0x09b6, 0x09b9, - 0x09bc, 0x09c4, - 0x09c7, 0x09c8, - 0x09cb, 0x09ce, - 0x09d7, 0x09d7, - 0x09dc, 0x09dd, - 0x09df, 0x09e3, - 0x09e6, 0x09fa, - 0x0a01, 0x0a03, - 0x0a05, 0x0a0a, - 0x0a0f, 0x0a10, - 0x0a13, 0x0a28, - 0x0a2a, 0x0a30, - 0x0a32, 0x0a33, - 0x0a35, 0x0a36, - 0x0a38, 0x0a39, - 0x0a3c, 0x0a3c, - 0x0a3e, 0x0a42, - 0x0a47, 0x0a48, - 0x0a4b, 0x0a4d, - 0x0a59, 0x0a5c, - 0x0a5e, 0x0a5e, - 0x0a66, 0x0a74, - 0x0a81, 0x0a83, - 0x0a85, 0x0a8d, - 0x0a8f, 0x0a91, - 0x0a93, 0x0aa8, - 0x0aaa, 0x0ab0, - 0x0ab2, 0x0ab3, - 0x0ab5, 0x0ab9, - 0x0abc, 0x0ac5, - 0x0ac7, 0x0ac9, - 0x0acb, 0x0acd, - 0x0ad0, 0x0ad0, - 0x0ae0, 0x0ae3, - 0x0ae6, 0x0aef, - 0x0af1, 0x0af1, - 0x0b01, 0x0b03, - 0x0b05, 0x0b0c, - 0x0b0f, 0x0b10, - 0x0b13, 0x0b28, - 0x0b2a, 0x0b30, - 0x0b32, 0x0b33, - 0x0b35, 0x0b39, - 0x0b3c, 0x0b43, - 0x0b47, 0x0b48, - 0x0b4b, 0x0b4d, - 0x0b56, 0x0b57, - 0x0b5c, 0x0b5d, - 0x0b5f, 0x0b61, - 0x0b66, 0x0b71, - 0x0b82, 0x0b83, - 0x0b85, 0x0b8a, - 0x0b8e, 0x0b90, - 0x0b92, 0x0b95, - 0x0b99, 0x0b9a, - 0x0b9c, 0x0b9c, - 0x0b9e, 0x0b9f, - 0x0ba3, 0x0ba4, - 0x0ba8, 0x0baa, - 0x0bae, 0x0bb9, - 0x0bbe, 0x0bc2, - 0x0bc6, 0x0bc8, - 0x0bca, 0x0bcd, - 0x0bd7, 0x0bd7, - 0x0be6, 0x0bfa, - 0x0c01, 0x0c03, - 0x0c05, 0x0c0c, - 0x0c0e, 0x0c10, - 0x0c12, 0x0c28, - 0x0c2a, 0x0c33, - 0x0c35, 0x0c39, - 0x0c3e, 0x0c44, - 0x0c46, 0x0c48, - 0x0c4a, 0x0c4d, - 0x0c55, 0x0c56, - 0x0c60, 0x0c61, - 0x0c66, 0x0c6f, - 0x0c82, 0x0c83, - 0x0c85, 0x0c8c, - 0x0c8e, 0x0c90, - 0x0c92, 0x0ca8, - 0x0caa, 0x0cb3, - 0x0cb5, 0x0cb9, - 0x0cbc, 0x0cc4, - 0x0cc6, 0x0cc8, - 0x0cca, 0x0ccd, - 0x0cd5, 0x0cd6, - 0x0cde, 0x0cde, - 0x0ce0, 0x0ce1, - 0x0ce6, 0x0cef, - 0x0d02, 0x0d03, - 0x0d05, 0x0d0c, - 0x0d0e, 0x0d10, - 0x0d12, 0x0d28, - 0x0d2a, 0x0d39, - 0x0d3e, 0x0d43, - 0x0d46, 0x0d48, - 0x0d4a, 0x0d4d, - 0x0d57, 0x0d57, - 0x0d60, 0x0d61, - 0x0d66, 0x0d6f, - 0x0d82, 0x0d83, - 0x0d85, 0x0d96, - 0x0d9a, 0x0db1, - 0x0db3, 0x0dbb, - 0x0dbd, 0x0dbd, - 0x0dc0, 0x0dc6, - 0x0dca, 0x0dca, - 0x0dcf, 0x0dd4, - 0x0dd6, 0x0dd6, - 0x0dd8, 0x0ddf, - 0x0df2, 0x0df4, - 0x0e01, 0x0e3a, - 0x0e3f, 0x0e5b, - 0x0e81, 0x0e82, - 0x0e84, 0x0e84, - 0x0e87, 0x0e88, - 0x0e8a, 0x0e8a, - 0x0e8d, 0x0e8d, - 0x0e94, 0x0e97, - 0x0e99, 0x0e9f, - 0x0ea1, 0x0ea3, - 0x0ea5, 0x0ea5, - 0x0ea7, 0x0ea7, - 0x0eaa, 0x0eab, - 0x0ead, 0x0eb9, - 0x0ebb, 0x0ebd, - 0x0ec0, 0x0ec4, - 0x0ec6, 0x0ec6, - 0x0ec8, 0x0ecd, - 0x0ed0, 0x0ed9, - 0x0edc, 0x0edd, - 0x0f00, 0x0f47, - 0x0f49, 0x0f6a, - 0x0f71, 0x0f8b, - 0x0f90, 0x0f97, - 0x0f99, 0x0fbc, - 0x0fbe, 0x0fcc, - 0x0fcf, 0x0fd1, - 0x1000, 0x1021, - 0x1023, 0x1027, - 0x1029, 0x102a, - 0x102c, 0x1032, - 0x1036, 0x1039, - 0x1040, 0x1059, - 0x10a0, 0x10c5, - 0x10d0, 0x10fc, - 0x1100, 0x1159, - 0x115f, 0x11a2, - 0x11a8, 0x11f9, - 0x1200, 0x1248, - 0x124a, 0x124d, - 0x1250, 0x1256, - 0x1258, 0x1258, - 0x125a, 0x125d, - 0x1260, 0x1288, - 0x128a, 0x128d, - 0x1290, 0x12b0, - 0x12b2, 0x12b5, - 0x12b8, 0x12be, - 0x12c0, 0x12c0, - 0x12c2, 0x12c5, - 0x12c8, 0x12d6, - 0x12d8, 0x1310, - 0x1312, 0x1315, - 0x1318, 0x135a, - 0x135f, 0x137c, - 0x1380, 0x1399, - 0x13a0, 0x13f4, - 0x1401, 0x1676, - 0x1680, 0x169c, - 0x16a0, 0x16f0, - 0x1700, 0x170c, - 0x170e, 0x1714, - 0x1720, 0x1736, - 0x1740, 0x1753, - 0x1760, 0x176c, - 0x176e, 0x1770, - 0x1772, 0x1773, - 0x1780, 0x17dd, - 0x17e0, 0x17e9, - 0x17f0, 0x17f9, - 0x1800, 0x180e, - 0x1810, 0x1819, - 0x1820, 0x1877, - 0x1880, 0x18a9, - 0x1900, 0x191c, - 0x1920, 0x192b, - 0x1930, 0x193b, - 0x1940, 0x1940, - 0x1944, 0x196d, - 0x1970, 0x1974, - 0x1980, 0x19a9, - 0x19b0, 0x19c9, - 0x19d0, 0x19d9, - 0x19de, 0x1a1b, - 0x1a1e, 0x1a1f, - 0x1d00, 0x1dc3, - 0x1e00, 0x1e9b, - 0x1ea0, 0x1ef9, - 0x1f00, 0x1f15, - 0x1f18, 0x1f1d, - 0x1f20, 0x1f45, - 0x1f48, 0x1f4d, - 0x1f50, 0x1f57, - 0x1f59, 0x1f59, - 0x1f5b, 0x1f5b, - 0x1f5d, 0x1f5d, - 0x1f5f, 0x1f7d, - 0x1f80, 0x1fb4, - 0x1fb6, 0x1fc4, - 0x1fc6, 0x1fd3, - 0x1fd6, 0x1fdb, - 0x1fdd, 0x1fef, - 0x1ff2, 0x1ff4, - 0x1ff6, 0x1ffe, - 0x2000, 0x2063, - 0x206a, 0x2071, - 0x2074, 0x208e, - 0x2090, 0x2094, - 0x20a0, 0x20b5, - 0x20d0, 0x20eb, - 0x2100, 0x214c, - 0x2153, 0x2183, - 0x2190, 0x23db, - 0x2400, 0x2426, - 0x2440, 0x244a, - 0x2460, 0x269c, - 0x26a0, 0x26b1, - 0x2701, 0x2704, - 0x2706, 0x2709, - 0x270c, 0x2727, - 0x2729, 0x274b, - 0x274d, 0x274d, - 0x274f, 0x2752, - 0x2756, 0x2756, - 0x2758, 0x275e, - 0x2761, 0x2794, - 0x2798, 0x27af, - 0x27b1, 0x27be, - 0x27c0, 0x27c6, - 0x27d0, 0x27eb, - 0x27f0, 0x2b13, - 0x2c00, 0x2c2e, - 0x2c30, 0x2c5e, - 0x2c80, 0x2cea, - 0x2cf9, 0x2d25, - 0x2d30, 0x2d65, - 0x2d6f, 0x2d6f, - 0x2d80, 0x2d96, - 0x2da0, 0x2da6, - 0x2da8, 0x2dae, - 0x2db0, 0x2db6, - 0x2db8, 0x2dbe, - 0x2dc0, 0x2dc6, - 0x2dc8, 0x2dce, - 0x2dd0, 0x2dd6, - 0x2dd8, 0x2dde, - 0x2e00, 0x2e17, - 0x2e1c, 0x2e1d, - 0x2e80, 0x2e99, - 0x2e9b, 0x2ef3, - 0x2f00, 0x2fd5, - 0x2ff0, 0x2ffb, - 0x3000, 0x303f, - 0x3041, 0x3096, - 0x3099, 0x30ff, - 0x3105, 0x312c, - 0x3131, 0x318e, - 0x3190, 0x31b7, - 0x31c0, 0x31cf, - 0x31f0, 0x321e, - 0x3220, 0x3243, - 0x3250, 0x32fe, - 0x3300, 0x4db5, - 0x4dc0, 0x9fbb, - 0xa000, 0xa48c, - 0xa490, 0xa4c6, - 0xa700, 0xa716, - 0xa800, 0xa82b, - 0xac00, 0xd7a3, - 0xd800, 0xfa2d, - 0xfa30, 0xfa6a, - 0xfa70, 0xfad9, - 0xfb00, 0xfb06, - 0xfb13, 0xfb17, - 0xfb1d, 0xfb36, - 0xfb38, 0xfb3c, - 0xfb3e, 0xfb3e, - 0xfb40, 0xfb41, - 0xfb43, 0xfb44, - 0xfb46, 0xfbb1, - 0xfbd3, 0xfd3f, - 0xfd50, 0xfd8f, - 0xfd92, 0xfdc7, - 0xfdf0, 0xfdfd, - 0xfe00, 0xfe19, - 0xfe20, 0xfe23, - 0xfe30, 0xfe52, - 0xfe54, 0xfe66, - 0xfe68, 0xfe6b, - 0xfe70, 0xfe74, - 0xfe76, 0xfefc, - 0xfeff, 0xfeff, - 0xff01, 0xffbe, - 0xffc2, 0xffc7, - 0xffca, 0xffcf, - 0xffd2, 0xffd7, - 0xffda, 0xffdc, - 0xffe0, 0xffe6, - 0xffe8, 0xffee, - 0xfff9, 0xfffd, - 0x10000, 0x1000b, - 0x1000d, 0x10026, - 0x10028, 0x1003a, - 0x1003c, 0x1003d, - 0x1003f, 0x1004d, - 0x10050, 0x1005d, - 0x10080, 0x100fa, - 0x10100, 0x10102, - 0x10107, 0x10133, - 0x10137, 0x1018a, - 0x10300, 0x1031e, - 0x10320, 0x10323, - 0x10330, 0x1034a, - 0x10380, 0x1039d, - 0x1039f, 0x103c3, - 0x103c8, 0x103d5, - 0x10400, 0x1049d, - 0x104a0, 0x104a9, - 0x10800, 0x10805, - 0x10808, 0x10808, - 0x1080a, 0x10835, - 0x10837, 0x10838, - 0x1083c, 0x1083c, - 0x1083f, 0x1083f, - 0x10a00, 0x10a03, - 0x10a05, 0x10a06, - 0x10a0c, 0x10a13, - 0x10a15, 0x10a17, - 0x10a19, 0x10a33, - 0x10a38, 0x10a3a, - 0x10a3f, 0x10a47, - 0x10a50, 0x10a58, - 0x1d000, 0x1d0f5, - 0x1d100, 0x1d126, - 0x1d12a, 0x1d1dd, - 0x1d200, 0x1d245, - 0x1d300, 0x1d356, - 0x1d400, 0x1d454, - 0x1d456, 0x1d49c, - 0x1d49e, 0x1d49f, - 0x1d4a2, 0x1d4a2, - 0x1d4a5, 0x1d4a6, - 0x1d4a9, 0x1d4ac, - 0x1d4ae, 0x1d4b9, - 0x1d4bb, 0x1d4bb, - 0x1d4bd, 0x1d4c3, - 0x1d4c5, 0x1d505, - 0x1d507, 0x1d50a, - 0x1d50d, 0x1d514, - 0x1d516, 0x1d51c, - 0x1d51e, 0x1d539, - 0x1d53b, 0x1d53e, - 0x1d540, 0x1d544, - 0x1d546, 0x1d546, - 0x1d54a, 0x1d550, - 0x1d552, 0x1d6a5, - 0x1d6a8, 0x1d7c9, - 0x1d7ce, 0x1d7ff, - 0x20000, 0x2a6d6, - 0x2f800, 0x2fa1d, - 0xe0001, 0xe0001, - 0xe0020, 0xe007f, - 0xe0100, 0xe01ef, - 0xf0000, 0xffffd, - 0x100000, 0x10fffd -}; /* CR_Assigned */ - -/* 'C': Major Category */ -static const OnigCodePoint CR_C[] = { - 422, - 0x0000, 0x001f, - 0x007f, 0x009f, - 0x00ad, 0x00ad, - 0x0242, 0x024f, - 0x0370, 0x0373, - 0x0376, 0x0379, - 0x037b, 0x037d, - 0x037f, 0x0383, - 0x038b, 0x038b, - 0x038d, 0x038d, - 0x03a2, 0x03a2, - 0x03cf, 0x03cf, - 0x0487, 0x0487, - 0x04cf, 0x04cf, - 0x04fa, 0x04ff, - 0x0510, 0x0530, - 0x0557, 0x0558, - 0x0560, 0x0560, - 0x0588, 0x0588, - 0x058b, 0x0590, - 0x05ba, 0x05ba, - 0x05c8, 0x05cf, - 0x05eb, 0x05ef, - 0x05f5, 0x060a, - 0x0616, 0x061a, - 0x061c, 0x061d, - 0x0620, 0x0620, - 0x063b, 0x063f, - 0x065f, 0x065f, - 0x06dd, 0x06dd, - 0x070e, 0x070f, - 0x074b, 0x074c, - 0x076e, 0x077f, - 0x07b2, 0x0900, - 0x093a, 0x093b, - 0x094e, 0x094f, - 0x0955, 0x0957, - 0x0971, 0x097c, - 0x097e, 0x0980, - 0x0984, 0x0984, - 0x098d, 0x098e, - 0x0991, 0x0992, - 0x09a9, 0x09a9, - 0x09b1, 0x09b1, - 0x09b3, 0x09b5, - 0x09ba, 0x09bb, - 0x09c5, 0x09c6, - 0x09c9, 0x09ca, - 0x09cf, 0x09d6, - 0x09d8, 0x09db, - 0x09de, 0x09de, - 0x09e4, 0x09e5, - 0x09fb, 0x0a00, - 0x0a04, 0x0a04, - 0x0a0b, 0x0a0e, - 0x0a11, 0x0a12, - 0x0a29, 0x0a29, - 0x0a31, 0x0a31, - 0x0a34, 0x0a34, - 0x0a37, 0x0a37, - 0x0a3a, 0x0a3b, - 0x0a3d, 0x0a3d, - 0x0a43, 0x0a46, - 0x0a49, 0x0a4a, - 0x0a4e, 0x0a58, - 0x0a5d, 0x0a5d, - 0x0a5f, 0x0a65, - 0x0a75, 0x0a80, - 0x0a84, 0x0a84, - 0x0a8e, 0x0a8e, - 0x0a92, 0x0a92, - 0x0aa9, 0x0aa9, - 0x0ab1, 0x0ab1, - 0x0ab4, 0x0ab4, - 0x0aba, 0x0abb, - 0x0ac6, 0x0ac6, - 0x0aca, 0x0aca, - 0x0ace, 0x0acf, - 0x0ad1, 0x0adf, - 0x0ae4, 0x0ae5, - 0x0af0, 0x0af0, - 0x0af2, 0x0b00, - 0x0b04, 0x0b04, - 0x0b0d, 0x0b0e, - 0x0b11, 0x0b12, - 0x0b29, 0x0b29, - 0x0b31, 0x0b31, - 0x0b34, 0x0b34, - 0x0b3a, 0x0b3b, - 0x0b44, 0x0b46, - 0x0b49, 0x0b4a, - 0x0b4e, 0x0b55, - 0x0b58, 0x0b5b, - 0x0b5e, 0x0b5e, - 0x0b62, 0x0b65, - 0x0b72, 0x0b81, - 0x0b84, 0x0b84, - 0x0b8b, 0x0b8d, - 0x0b91, 0x0b91, - 0x0b96, 0x0b98, - 0x0b9b, 0x0b9b, - 0x0b9d, 0x0b9d, - 0x0ba0, 0x0ba2, - 0x0ba5, 0x0ba7, - 0x0bab, 0x0bad, - 0x0bba, 0x0bbd, - 0x0bc3, 0x0bc5, - 0x0bc9, 0x0bc9, - 0x0bce, 0x0bd6, - 0x0bd8, 0x0be5, - 0x0bfb, 0x0c00, - 0x0c04, 0x0c04, - 0x0c0d, 0x0c0d, - 0x0c11, 0x0c11, - 0x0c29, 0x0c29, - 0x0c34, 0x0c34, - 0x0c3a, 0x0c3d, - 0x0c45, 0x0c45, - 0x0c49, 0x0c49, - 0x0c4e, 0x0c54, - 0x0c57, 0x0c5f, - 0x0c62, 0x0c65, - 0x0c70, 0x0c81, - 0x0c84, 0x0c84, - 0x0c8d, 0x0c8d, - 0x0c91, 0x0c91, - 0x0ca9, 0x0ca9, - 0x0cb4, 0x0cb4, - 0x0cba, 0x0cbb, - 0x0cc5, 0x0cc5, - 0x0cc9, 0x0cc9, - 0x0cce, 0x0cd4, - 0x0cd7, 0x0cdd, - 0x0cdf, 0x0cdf, - 0x0ce2, 0x0ce5, - 0x0cf0, 0x0d01, - 0x0d04, 0x0d04, - 0x0d0d, 0x0d0d, - 0x0d11, 0x0d11, - 0x0d29, 0x0d29, - 0x0d3a, 0x0d3d, - 0x0d44, 0x0d45, - 0x0d49, 0x0d49, - 0x0d4e, 0x0d56, - 0x0d58, 0x0d5f, - 0x0d62, 0x0d65, - 0x0d70, 0x0d81, - 0x0d84, 0x0d84, - 0x0d97, 0x0d99, - 0x0db2, 0x0db2, - 0x0dbc, 0x0dbc, - 0x0dbe, 0x0dbf, - 0x0dc7, 0x0dc9, - 0x0dcb, 0x0dce, - 0x0dd5, 0x0dd5, - 0x0dd7, 0x0dd7, - 0x0de0, 0x0df1, - 0x0df5, 0x0e00, - 0x0e3b, 0x0e3e, - 0x0e5c, 0x0e80, - 0x0e83, 0x0e83, - 0x0e85, 0x0e86, - 0x0e89, 0x0e89, - 0x0e8b, 0x0e8c, - 0x0e8e, 0x0e93, - 0x0e98, 0x0e98, - 0x0ea0, 0x0ea0, - 0x0ea4, 0x0ea4, - 0x0ea6, 0x0ea6, - 0x0ea8, 0x0ea9, - 0x0eac, 0x0eac, - 0x0eba, 0x0eba, - 0x0ebe, 0x0ebf, - 0x0ec5, 0x0ec5, - 0x0ec7, 0x0ec7, - 0x0ece, 0x0ecf, - 0x0eda, 0x0edb, - 0x0ede, 0x0eff, - 0x0f48, 0x0f48, - 0x0f6b, 0x0f70, - 0x0f8c, 0x0f8f, - 0x0f98, 0x0f98, - 0x0fbd, 0x0fbd, - 0x0fcd, 0x0fce, - 0x0fd2, 0x0fff, - 0x1022, 0x1022, - 0x1028, 0x1028, - 0x102b, 0x102b, - 0x1033, 0x1035, - 0x103a, 0x103f, - 0x105a, 0x109f, - 0x10c6, 0x10cf, - 0x10fd, 0x10ff, - 0x115a, 0x115e, - 0x11a3, 0x11a7, - 0x11fa, 0x11ff, - 0x1249, 0x1249, - 0x124e, 0x124f, - 0x1257, 0x1257, - 0x1259, 0x1259, - 0x125e, 0x125f, - 0x1289, 0x1289, - 0x128e, 0x128f, - 0x12b1, 0x12b1, - 0x12b6, 0x12b7, - 0x12bf, 0x12bf, - 0x12c1, 0x12c1, - 0x12c6, 0x12c7, - 0x12d7, 0x12d7, - 0x1311, 0x1311, - 0x1316, 0x1317, - 0x135b, 0x135e, - 0x137d, 0x137f, - 0x139a, 0x139f, - 0x13f5, 0x1400, - 0x1677, 0x167f, - 0x169d, 0x169f, - 0x16f1, 0x16ff, - 0x170d, 0x170d, - 0x1715, 0x171f, - 0x1737, 0x173f, - 0x1754, 0x175f, - 0x176d, 0x176d, - 0x1771, 0x1771, - 0x1774, 0x177f, - 0x17b4, 0x17b5, - 0x17de, 0x17df, - 0x17ea, 0x17ef, - 0x17fa, 0x17ff, - 0x180f, 0x180f, - 0x181a, 0x181f, - 0x1878, 0x187f, - 0x18aa, 0x18ff, - 0x191d, 0x191f, - 0x192c, 0x192f, - 0x193c, 0x193f, - 0x1941, 0x1943, - 0x196e, 0x196f, - 0x1975, 0x197f, - 0x19aa, 0x19af, - 0x19ca, 0x19cf, - 0x19da, 0x19dd, - 0x1a1c, 0x1a1d, - 0x1a20, 0x1cff, - 0x1dc4, 0x1dff, - 0x1e9c, 0x1e9f, - 0x1efa, 0x1eff, - 0x1f16, 0x1f17, - 0x1f1e, 0x1f1f, - 0x1f46, 0x1f47, - 0x1f4e, 0x1f4f, - 0x1f58, 0x1f58, - 0x1f5a, 0x1f5a, - 0x1f5c, 0x1f5c, - 0x1f5e, 0x1f5e, - 0x1f7e, 0x1f7f, - 0x1fb5, 0x1fb5, - 0x1fc5, 0x1fc5, - 0x1fd4, 0x1fd5, - 0x1fdc, 0x1fdc, - 0x1ff0, 0x1ff1, - 0x1ff5, 0x1ff5, - 0x1fff, 0x1fff, - 0x200b, 0x200f, - 0x202a, 0x202e, - 0x2060, 0x206f, - 0x2072, 0x2073, - 0x208f, 0x208f, - 0x2095, 0x209f, - 0x20b6, 0x20cf, - 0x20ec, 0x20ff, - 0x214d, 0x2152, - 0x2184, 0x218f, - 0x23dc, 0x23ff, - 0x2427, 0x243f, - 0x244b, 0x245f, - 0x269d, 0x269f, - 0x26b2, 0x2700, - 0x2705, 0x2705, - 0x270a, 0x270b, - 0x2728, 0x2728, - 0x274c, 0x274c, - 0x274e, 0x274e, - 0x2753, 0x2755, - 0x2757, 0x2757, - 0x275f, 0x2760, - 0x2795, 0x2797, - 0x27b0, 0x27b0, - 0x27bf, 0x27bf, - 0x27c7, 0x27cf, - 0x27ec, 0x27ef, - 0x2b14, 0x2bff, - 0x2c2f, 0x2c2f, - 0x2c5f, 0x2c7f, - 0x2ceb, 0x2cf8, - 0x2d26, 0x2d2f, - 0x2d66, 0x2d6e, - 0x2d70, 0x2d7f, - 0x2d97, 0x2d9f, - 0x2da7, 0x2da7, - 0x2daf, 0x2daf, - 0x2db7, 0x2db7, - 0x2dbf, 0x2dbf, - 0x2dc7, 0x2dc7, - 0x2dcf, 0x2dcf, - 0x2dd7, 0x2dd7, - 0x2ddf, 0x2dff, - 0x2e18, 0x2e1b, - 0x2e1e, 0x2e7f, - 0x2e9a, 0x2e9a, - 0x2ef4, 0x2eff, - 0x2fd6, 0x2fef, - 0x2ffc, 0x2fff, - 0x3040, 0x3040, - 0x3097, 0x3098, - 0x3100, 0x3104, - 0x312d, 0x3130, - 0x318f, 0x318f, - 0x31b8, 0x31bf, - 0x31d0, 0x31ef, - 0x321f, 0x321f, - 0x3244, 0x324f, - 0x32ff, 0x32ff, - 0x4db6, 0x4dbf, - 0x9fbc, 0x9fff, - 0xa48d, 0xa48f, - 0xa4c7, 0xa6ff, - 0xa717, 0xa7ff, - 0xa82c, 0xabff, - 0xd7a4, 0xf8ff, - 0xfa2e, 0xfa2f, - 0xfa6b, 0xfa6f, - 0xfada, 0xfaff, - 0xfb07, 0xfb12, - 0xfb18, 0xfb1c, - 0xfb37, 0xfb37, - 0xfb3d, 0xfb3d, - 0xfb3f, 0xfb3f, - 0xfb42, 0xfb42, - 0xfb45, 0xfb45, - 0xfbb2, 0xfbd2, - 0xfd40, 0xfd4f, - 0xfd90, 0xfd91, - 0xfdc8, 0xfdef, - 0xfdfe, 0xfdff, - 0xfe1a, 0xfe1f, - 0xfe24, 0xfe2f, - 0xfe53, 0xfe53, - 0xfe67, 0xfe67, - 0xfe6c, 0xfe6f, - 0xfe75, 0xfe75, - 0xfefd, 0xff00, - 0xffbf, 0xffc1, - 0xffc8, 0xffc9, - 0xffd0, 0xffd1, - 0xffd8, 0xffd9, - 0xffdd, 0xffdf, - 0xffe7, 0xffe7, - 0xffef, 0xfffb, - 0xfffe, 0xffff, - 0x1000c, 0x1000c, - 0x10027, 0x10027, - 0x1003b, 0x1003b, - 0x1003e, 0x1003e, - 0x1004e, 0x1004f, - 0x1005e, 0x1007f, - 0x100fb, 0x100ff, - 0x10103, 0x10106, - 0x10134, 0x10136, - 0x1018b, 0x102ff, - 0x1031f, 0x1031f, - 0x10324, 0x1032f, - 0x1034b, 0x1037f, - 0x1039e, 0x1039e, - 0x103c4, 0x103c7, - 0x103d6, 0x103ff, - 0x1049e, 0x1049f, - 0x104aa, 0x107ff, - 0x10806, 0x10807, - 0x10809, 0x10809, - 0x10836, 0x10836, - 0x10839, 0x1083b, - 0x1083d, 0x1083e, - 0x10840, 0x109ff, - 0x10a04, 0x10a04, - 0x10a07, 0x10a0b, - 0x10a14, 0x10a14, - 0x10a18, 0x10a18, - 0x10a34, 0x10a37, - 0x10a3b, 0x10a3e, - 0x10a48, 0x10a4f, - 0x10a59, 0x1cfff, - 0x1d0f6, 0x1d0ff, - 0x1d127, 0x1d129, - 0x1d173, 0x1d17a, - 0x1d1de, 0x1d1ff, - 0x1d246, 0x1d2ff, - 0x1d357, 0x1d3ff, - 0x1d455, 0x1d455, - 0x1d49d, 0x1d49d, - 0x1d4a0, 0x1d4a1, - 0x1d4a3, 0x1d4a4, - 0x1d4a7, 0x1d4a8, - 0x1d4ad, 0x1d4ad, - 0x1d4ba, 0x1d4ba, - 0x1d4bc, 0x1d4bc, - 0x1d4c4, 0x1d4c4, - 0x1d506, 0x1d506, - 0x1d50b, 0x1d50c, - 0x1d515, 0x1d515, - 0x1d51d, 0x1d51d, - 0x1d53a, 0x1d53a, - 0x1d53f, 0x1d53f, - 0x1d545, 0x1d545, - 0x1d547, 0x1d549, - 0x1d551, 0x1d551, - 0x1d6a6, 0x1d6a7, - 0x1d7ca, 0x1d7cd, - 0x1d800, 0x1ffff, - 0x2a6d7, 0x2f7ff, - 0x2fa1e, 0xe00ff, - 0xe01f0, 0x10ffff -}; /* CR_C */ - -/* 'Cc': General Category */ -static const OnigCodePoint CR_Cc[] = { - 2, - 0x0000, 0x001f, - 0x007f, 0x009f -}; /* CR_Cc */ - -/* 'Cf': General Category */ -static const OnigCodePoint CR_Cf[] = { - 14, - 0x00ad, 0x00ad, - 0x0600, 0x0603, - 0x06dd, 0x06dd, - 0x070f, 0x070f, - 0x17b4, 0x17b5, - 0x200b, 0x200f, - 0x202a, 0x202e, - 0x2060, 0x2063, - 0x206a, 0x206f, - 0xfeff, 0xfeff, - 0xfff9, 0xfffb, - 0x1d173, 0x1d17a, - 0xe0001, 0xe0001, - 0xe0020, 0xe007f -}; /* CR_Cf */ - -/* 'Cn': General Category */ -static const OnigCodePoint CR_Cn[] = { - 420, - 0x0242, 0x024f, - 0x0370, 0x0373, - 0x0376, 0x0379, - 0x037b, 0x037d, - 0x037f, 0x0383, - 0x038b, 0x038b, - 0x038d, 0x038d, - 0x03a2, 0x03a2, - 0x03cf, 0x03cf, - 0x0487, 0x0487, - 0x04cf, 0x04cf, - 0x04fa, 0x04ff, - 0x0510, 0x0530, - 0x0557, 0x0558, - 0x0560, 0x0560, - 0x0588, 0x0588, - 0x058b, 0x0590, - 0x05ba, 0x05ba, - 0x05c8, 0x05cf, - 0x05eb, 0x05ef, - 0x05f5, 0x05ff, - 0x0604, 0x060a, - 0x0616, 0x061a, - 0x061c, 0x061d, - 0x0620, 0x0620, - 0x063b, 0x063f, - 0x065f, 0x065f, - 0x070e, 0x070e, - 0x074b, 0x074c, - 0x076e, 0x077f, - 0x07b2, 0x0900, - 0x093a, 0x093b, - 0x094e, 0x094f, - 0x0955, 0x0957, - 0x0971, 0x097c, - 0x097e, 0x0980, - 0x0984, 0x0984, - 0x098d, 0x098e, - 0x0991, 0x0992, - 0x09a9, 0x09a9, - 0x09b1, 0x09b1, - 0x09b3, 0x09b5, - 0x09ba, 0x09bb, - 0x09c5, 0x09c6, - 0x09c9, 0x09ca, - 0x09cf, 0x09d6, - 0x09d8, 0x09db, - 0x09de, 0x09de, - 0x09e4, 0x09e5, - 0x09fb, 0x0a00, - 0x0a04, 0x0a04, - 0x0a0b, 0x0a0e, - 0x0a11, 0x0a12, - 0x0a29, 0x0a29, - 0x0a31, 0x0a31, - 0x0a34, 0x0a34, - 0x0a37, 0x0a37, - 0x0a3a, 0x0a3b, - 0x0a3d, 0x0a3d, - 0x0a43, 0x0a46, - 0x0a49, 0x0a4a, - 0x0a4e, 0x0a58, - 0x0a5d, 0x0a5d, - 0x0a5f, 0x0a65, - 0x0a75, 0x0a80, - 0x0a84, 0x0a84, - 0x0a8e, 0x0a8e, - 0x0a92, 0x0a92, - 0x0aa9, 0x0aa9, - 0x0ab1, 0x0ab1, - 0x0ab4, 0x0ab4, - 0x0aba, 0x0abb, - 0x0ac6, 0x0ac6, - 0x0aca, 0x0aca, - 0x0ace, 0x0acf, - 0x0ad1, 0x0adf, - 0x0ae4, 0x0ae5, - 0x0af0, 0x0af0, - 0x0af2, 0x0b00, - 0x0b04, 0x0b04, - 0x0b0d, 0x0b0e, - 0x0b11, 0x0b12, - 0x0b29, 0x0b29, - 0x0b31, 0x0b31, - 0x0b34, 0x0b34, - 0x0b3a, 0x0b3b, - 0x0b44, 0x0b46, - 0x0b49, 0x0b4a, - 0x0b4e, 0x0b55, - 0x0b58, 0x0b5b, - 0x0b5e, 0x0b5e, - 0x0b62, 0x0b65, - 0x0b72, 0x0b81, - 0x0b84, 0x0b84, - 0x0b8b, 0x0b8d, - 0x0b91, 0x0b91, - 0x0b96, 0x0b98, - 0x0b9b, 0x0b9b, - 0x0b9d, 0x0b9d, - 0x0ba0, 0x0ba2, - 0x0ba5, 0x0ba7, - 0x0bab, 0x0bad, - 0x0bba, 0x0bbd, - 0x0bc3, 0x0bc5, - 0x0bc9, 0x0bc9, - 0x0bce, 0x0bd6, - 0x0bd8, 0x0be5, - 0x0bfb, 0x0c00, - 0x0c04, 0x0c04, - 0x0c0d, 0x0c0d, - 0x0c11, 0x0c11, - 0x0c29, 0x0c29, - 0x0c34, 0x0c34, - 0x0c3a, 0x0c3d, - 0x0c45, 0x0c45, - 0x0c49, 0x0c49, - 0x0c4e, 0x0c54, - 0x0c57, 0x0c5f, - 0x0c62, 0x0c65, - 0x0c70, 0x0c81, - 0x0c84, 0x0c84, - 0x0c8d, 0x0c8d, - 0x0c91, 0x0c91, - 0x0ca9, 0x0ca9, - 0x0cb4, 0x0cb4, - 0x0cba, 0x0cbb, - 0x0cc5, 0x0cc5, - 0x0cc9, 0x0cc9, - 0x0cce, 0x0cd4, - 0x0cd7, 0x0cdd, - 0x0cdf, 0x0cdf, - 0x0ce2, 0x0ce5, - 0x0cf0, 0x0d01, - 0x0d04, 0x0d04, - 0x0d0d, 0x0d0d, - 0x0d11, 0x0d11, - 0x0d29, 0x0d29, - 0x0d3a, 0x0d3d, - 0x0d44, 0x0d45, - 0x0d49, 0x0d49, - 0x0d4e, 0x0d56, - 0x0d58, 0x0d5f, - 0x0d62, 0x0d65, - 0x0d70, 0x0d81, - 0x0d84, 0x0d84, - 0x0d97, 0x0d99, - 0x0db2, 0x0db2, - 0x0dbc, 0x0dbc, - 0x0dbe, 0x0dbf, - 0x0dc7, 0x0dc9, - 0x0dcb, 0x0dce, - 0x0dd5, 0x0dd5, - 0x0dd7, 0x0dd7, - 0x0de0, 0x0df1, - 0x0df5, 0x0e00, - 0x0e3b, 0x0e3e, - 0x0e5c, 0x0e80, - 0x0e83, 0x0e83, - 0x0e85, 0x0e86, - 0x0e89, 0x0e89, - 0x0e8b, 0x0e8c, - 0x0e8e, 0x0e93, - 0x0e98, 0x0e98, - 0x0ea0, 0x0ea0, - 0x0ea4, 0x0ea4, - 0x0ea6, 0x0ea6, - 0x0ea8, 0x0ea9, - 0x0eac, 0x0eac, - 0x0eba, 0x0eba, - 0x0ebe, 0x0ebf, - 0x0ec5, 0x0ec5, - 0x0ec7, 0x0ec7, - 0x0ece, 0x0ecf, - 0x0eda, 0x0edb, - 0x0ede, 0x0eff, - 0x0f48, 0x0f48, - 0x0f6b, 0x0f70, - 0x0f8c, 0x0f8f, - 0x0f98, 0x0f98, - 0x0fbd, 0x0fbd, - 0x0fcd, 0x0fce, - 0x0fd2, 0x0fff, - 0x1022, 0x1022, - 0x1028, 0x1028, - 0x102b, 0x102b, - 0x1033, 0x1035, - 0x103a, 0x103f, - 0x105a, 0x109f, - 0x10c6, 0x10cf, - 0x10fd, 0x10ff, - 0x115a, 0x115e, - 0x11a3, 0x11a7, - 0x11fa, 0x11ff, - 0x1249, 0x1249, - 0x124e, 0x124f, - 0x1257, 0x1257, - 0x1259, 0x1259, - 0x125e, 0x125f, - 0x1289, 0x1289, - 0x128e, 0x128f, - 0x12b1, 0x12b1, - 0x12b6, 0x12b7, - 0x12bf, 0x12bf, - 0x12c1, 0x12c1, - 0x12c6, 0x12c7, - 0x12d7, 0x12d7, - 0x1311, 0x1311, - 0x1316, 0x1317, - 0x135b, 0x135e, - 0x137d, 0x137f, - 0x139a, 0x139f, - 0x13f5, 0x1400, - 0x1677, 0x167f, - 0x169d, 0x169f, - 0x16f1, 0x16ff, - 0x170d, 0x170d, - 0x1715, 0x171f, - 0x1737, 0x173f, - 0x1754, 0x175f, - 0x176d, 0x176d, - 0x1771, 0x1771, - 0x1774, 0x177f, - 0x17de, 0x17df, - 0x17ea, 0x17ef, - 0x17fa, 0x17ff, - 0x180f, 0x180f, - 0x181a, 0x181f, - 0x1878, 0x187f, - 0x18aa, 0x18ff, - 0x191d, 0x191f, - 0x192c, 0x192f, - 0x193c, 0x193f, - 0x1941, 0x1943, - 0x196e, 0x196f, - 0x1975, 0x197f, - 0x19aa, 0x19af, - 0x19ca, 0x19cf, - 0x19da, 0x19dd, - 0x1a1c, 0x1a1d, - 0x1a20, 0x1cff, - 0x1dc4, 0x1dff, - 0x1e9c, 0x1e9f, - 0x1efa, 0x1eff, - 0x1f16, 0x1f17, - 0x1f1e, 0x1f1f, - 0x1f46, 0x1f47, - 0x1f4e, 0x1f4f, - 0x1f58, 0x1f58, - 0x1f5a, 0x1f5a, - 0x1f5c, 0x1f5c, - 0x1f5e, 0x1f5e, - 0x1f7e, 0x1f7f, - 0x1fb5, 0x1fb5, - 0x1fc5, 0x1fc5, - 0x1fd4, 0x1fd5, - 0x1fdc, 0x1fdc, - 0x1ff0, 0x1ff1, - 0x1ff5, 0x1ff5, - 0x1fff, 0x1fff, - 0x2064, 0x2069, - 0x2072, 0x2073, - 0x208f, 0x208f, - 0x2095, 0x209f, - 0x20b6, 0x20cf, - 0x20ec, 0x20ff, - 0x214d, 0x2152, - 0x2184, 0x218f, - 0x23dc, 0x23ff, - 0x2427, 0x243f, - 0x244b, 0x245f, - 0x269d, 0x269f, - 0x26b2, 0x2700, - 0x2705, 0x2705, - 0x270a, 0x270b, - 0x2728, 0x2728, - 0x274c, 0x274c, - 0x274e, 0x274e, - 0x2753, 0x2755, - 0x2757, 0x2757, - 0x275f, 0x2760, - 0x2795, 0x2797, - 0x27b0, 0x27b0, - 0x27bf, 0x27bf, - 0x27c7, 0x27cf, - 0x27ec, 0x27ef, - 0x2b14, 0x2bff, - 0x2c2f, 0x2c2f, - 0x2c5f, 0x2c7f, - 0x2ceb, 0x2cf8, - 0x2d26, 0x2d2f, - 0x2d66, 0x2d6e, - 0x2d70, 0x2d7f, - 0x2d97, 0x2d9f, - 0x2da7, 0x2da7, - 0x2daf, 0x2daf, - 0x2db7, 0x2db7, - 0x2dbf, 0x2dbf, - 0x2dc7, 0x2dc7, - 0x2dcf, 0x2dcf, - 0x2dd7, 0x2dd7, - 0x2ddf, 0x2dff, - 0x2e18, 0x2e1b, - 0x2e1e, 0x2e7f, - 0x2e9a, 0x2e9a, - 0x2ef4, 0x2eff, - 0x2fd6, 0x2fef, - 0x2ffc, 0x2fff, - 0x3040, 0x3040, - 0x3097, 0x3098, - 0x3100, 0x3104, - 0x312d, 0x3130, - 0x318f, 0x318f, - 0x31b8, 0x31bf, - 0x31d0, 0x31ef, - 0x321f, 0x321f, - 0x3244, 0x324f, - 0x32ff, 0x32ff, - 0x4db6, 0x4dbf, - 0x9fbc, 0x9fff, - 0xa48d, 0xa48f, - 0xa4c7, 0xa6ff, - 0xa717, 0xa7ff, - 0xa82c, 0xabff, - 0xd7a4, 0xd7ff, - 0xfa2e, 0xfa2f, - 0xfa6b, 0xfa6f, - 0xfada, 0xfaff, - 0xfb07, 0xfb12, - 0xfb18, 0xfb1c, - 0xfb37, 0xfb37, - 0xfb3d, 0xfb3d, - 0xfb3f, 0xfb3f, - 0xfb42, 0xfb42, - 0xfb45, 0xfb45, - 0xfbb2, 0xfbd2, - 0xfd40, 0xfd4f, - 0xfd90, 0xfd91, - 0xfdc8, 0xfdef, - 0xfdfe, 0xfdff, - 0xfe1a, 0xfe1f, - 0xfe24, 0xfe2f, - 0xfe53, 0xfe53, - 0xfe67, 0xfe67, - 0xfe6c, 0xfe6f, - 0xfe75, 0xfe75, - 0xfefd, 0xfefe, - 0xff00, 0xff00, - 0xffbf, 0xffc1, - 0xffc8, 0xffc9, - 0xffd0, 0xffd1, - 0xffd8, 0xffd9, - 0xffdd, 0xffdf, - 0xffe7, 0xffe7, - 0xffef, 0xfff8, - 0xfffe, 0xffff, - 0x1000c, 0x1000c, - 0x10027, 0x10027, - 0x1003b, 0x1003b, - 0x1003e, 0x1003e, - 0x1004e, 0x1004f, - 0x1005e, 0x1007f, - 0x100fb, 0x100ff, - 0x10103, 0x10106, - 0x10134, 0x10136, - 0x1018b, 0x102ff, - 0x1031f, 0x1031f, - 0x10324, 0x1032f, - 0x1034b, 0x1037f, - 0x1039e, 0x1039e, - 0x103c4, 0x103c7, - 0x103d6, 0x103ff, - 0x1049e, 0x1049f, - 0x104aa, 0x107ff, - 0x10806, 0x10807, - 0x10809, 0x10809, - 0x10836, 0x10836, - 0x10839, 0x1083b, - 0x1083d, 0x1083e, - 0x10840, 0x109ff, - 0x10a04, 0x10a04, - 0x10a07, 0x10a0b, - 0x10a14, 0x10a14, - 0x10a18, 0x10a18, - 0x10a34, 0x10a37, - 0x10a3b, 0x10a3e, - 0x10a48, 0x10a4f, - 0x10a59, 0x1cfff, - 0x1d0f6, 0x1d0ff, - 0x1d127, 0x1d129, - 0x1d1de, 0x1d1ff, - 0x1d246, 0x1d2ff, - 0x1d357, 0x1d3ff, - 0x1d455, 0x1d455, - 0x1d49d, 0x1d49d, - 0x1d4a0, 0x1d4a1, - 0x1d4a3, 0x1d4a4, - 0x1d4a7, 0x1d4a8, - 0x1d4ad, 0x1d4ad, - 0x1d4ba, 0x1d4ba, - 0x1d4bc, 0x1d4bc, - 0x1d4c4, 0x1d4c4, - 0x1d506, 0x1d506, - 0x1d50b, 0x1d50c, - 0x1d515, 0x1d515, - 0x1d51d, 0x1d51d, - 0x1d53a, 0x1d53a, - 0x1d53f, 0x1d53f, - 0x1d545, 0x1d545, - 0x1d547, 0x1d549, - 0x1d551, 0x1d551, - 0x1d6a6, 0x1d6a7, - 0x1d7ca, 0x1d7cd, - 0x1d800, 0x1ffff, - 0x2a6d7, 0x2f7ff, - 0x2fa1e, 0xe0000, - 0xe0002, 0xe001f, - 0xe0080, 0xe00ff, - 0xe01f0, 0xeffff, - 0xffffe, 0xfffff, - 0x10fffe, 0x10ffff -}; /* CR_Cn */ - -/* 'Co': General Category */ -static const OnigCodePoint CR_Co[] = { - 3, - 0xe000, 0xf8ff, - 0xf0000, 0xffffd, - 0x100000, 0x10fffd -}; /* CR_Co */ - -/* 'Cs': General Category */ -static const OnigCodePoint CR_Cs[] = { - 1, - 0xd800, 0xdfff -}; /* CR_Cs */ - -/* 'L': Major Category */ -static const OnigCodePoint CR_L[] = { - 347, - 0x0041, 0x005a, - 0x0061, 0x007a, - 0x00aa, 0x00aa, - 0x00b5, 0x00b5, - 0x00ba, 0x00ba, - 0x00c0, 0x00d6, - 0x00d8, 0x00f6, - 0x00f8, 0x0241, - 0x0250, 0x02c1, - 0x02c6, 0x02d1, - 0x02e0, 0x02e4, - 0x02ee, 0x02ee, - 0x037a, 0x037a, - 0x0386, 0x0386, - 0x0388, 0x038a, - 0x038c, 0x038c, - 0x038e, 0x03a1, - 0x03a3, 0x03ce, - 0x03d0, 0x03f5, - 0x03f7, 0x0481, - 0x048a, 0x04ce, - 0x04d0, 0x04f9, - 0x0500, 0x050f, - 0x0531, 0x0556, - 0x0559, 0x0559, - 0x0561, 0x0587, - 0x05d0, 0x05ea, - 0x05f0, 0x05f2, - 0x0621, 0x063a, - 0x0640, 0x064a, - 0x066e, 0x066f, - 0x0671, 0x06d3, - 0x06d5, 0x06d5, - 0x06e5, 0x06e6, - 0x06ee, 0x06ef, - 0x06fa, 0x06fc, - 0x06ff, 0x06ff, - 0x0710, 0x0710, - 0x0712, 0x072f, - 0x074d, 0x076d, - 0x0780, 0x07a5, - 0x07b1, 0x07b1, - 0x0904, 0x0939, - 0x093d, 0x093d, - 0x0950, 0x0950, - 0x0958, 0x0961, - 0x097d, 0x097d, - 0x0985, 0x098c, - 0x098f, 0x0990, - 0x0993, 0x09a8, - 0x09aa, 0x09b0, - 0x09b2, 0x09b2, - 0x09b6, 0x09b9, - 0x09bd, 0x09bd, - 0x09ce, 0x09ce, - 0x09dc, 0x09dd, - 0x09df, 0x09e1, - 0x09f0, 0x09f1, - 0x0a05, 0x0a0a, - 0x0a0f, 0x0a10, - 0x0a13, 0x0a28, - 0x0a2a, 0x0a30, - 0x0a32, 0x0a33, - 0x0a35, 0x0a36, - 0x0a38, 0x0a39, - 0x0a59, 0x0a5c, - 0x0a5e, 0x0a5e, - 0x0a72, 0x0a74, - 0x0a85, 0x0a8d, - 0x0a8f, 0x0a91, - 0x0a93, 0x0aa8, - 0x0aaa, 0x0ab0, - 0x0ab2, 0x0ab3, - 0x0ab5, 0x0ab9, - 0x0abd, 0x0abd, - 0x0ad0, 0x0ad0, - 0x0ae0, 0x0ae1, - 0x0b05, 0x0b0c, - 0x0b0f, 0x0b10, - 0x0b13, 0x0b28, - 0x0b2a, 0x0b30, - 0x0b32, 0x0b33, - 0x0b35, 0x0b39, - 0x0b3d, 0x0b3d, - 0x0b5c, 0x0b5d, - 0x0b5f, 0x0b61, - 0x0b71, 0x0b71, - 0x0b83, 0x0b83, - 0x0b85, 0x0b8a, - 0x0b8e, 0x0b90, - 0x0b92, 0x0b95, - 0x0b99, 0x0b9a, - 0x0b9c, 0x0b9c, - 0x0b9e, 0x0b9f, - 0x0ba3, 0x0ba4, - 0x0ba8, 0x0baa, - 0x0bae, 0x0bb9, - 0x0c05, 0x0c0c, - 0x0c0e, 0x0c10, - 0x0c12, 0x0c28, - 0x0c2a, 0x0c33, - 0x0c35, 0x0c39, - 0x0c60, 0x0c61, - 0x0c85, 0x0c8c, - 0x0c8e, 0x0c90, - 0x0c92, 0x0ca8, - 0x0caa, 0x0cb3, - 0x0cb5, 0x0cb9, - 0x0cbd, 0x0cbd, - 0x0cde, 0x0cde, - 0x0ce0, 0x0ce1, - 0x0d05, 0x0d0c, - 0x0d0e, 0x0d10, - 0x0d12, 0x0d28, - 0x0d2a, 0x0d39, - 0x0d60, 0x0d61, - 0x0d85, 0x0d96, - 0x0d9a, 0x0db1, - 0x0db3, 0x0dbb, - 0x0dbd, 0x0dbd, - 0x0dc0, 0x0dc6, - 0x0e01, 0x0e30, - 0x0e32, 0x0e33, - 0x0e40, 0x0e46, - 0x0e81, 0x0e82, - 0x0e84, 0x0e84, - 0x0e87, 0x0e88, - 0x0e8a, 0x0e8a, - 0x0e8d, 0x0e8d, - 0x0e94, 0x0e97, - 0x0e99, 0x0e9f, - 0x0ea1, 0x0ea3, - 0x0ea5, 0x0ea5, - 0x0ea7, 0x0ea7, - 0x0eaa, 0x0eab, - 0x0ead, 0x0eb0, - 0x0eb2, 0x0eb3, - 0x0ebd, 0x0ebd, - 0x0ec0, 0x0ec4, - 0x0ec6, 0x0ec6, - 0x0edc, 0x0edd, - 0x0f00, 0x0f00, - 0x0f40, 0x0f47, - 0x0f49, 0x0f6a, - 0x0f88, 0x0f8b, - 0x1000, 0x1021, - 0x1023, 0x1027, - 0x1029, 0x102a, - 0x1050, 0x1055, - 0x10a0, 0x10c5, - 0x10d0, 0x10fa, - 0x10fc, 0x10fc, - 0x1100, 0x1159, - 0x115f, 0x11a2, - 0x11a8, 0x11f9, - 0x1200, 0x1248, - 0x124a, 0x124d, - 0x1250, 0x1256, - 0x1258, 0x1258, - 0x125a, 0x125d, - 0x1260, 0x1288, - 0x128a, 0x128d, - 0x1290, 0x12b0, - 0x12b2, 0x12b5, - 0x12b8, 0x12be, - 0x12c0, 0x12c0, - 0x12c2, 0x12c5, - 0x12c8, 0x12d6, - 0x12d8, 0x1310, - 0x1312, 0x1315, - 0x1318, 0x135a, - 0x1380, 0x138f, - 0x13a0, 0x13f4, - 0x1401, 0x166c, - 0x166f, 0x1676, - 0x1681, 0x169a, - 0x16a0, 0x16ea, - 0x1700, 0x170c, - 0x170e, 0x1711, - 0x1720, 0x1731, - 0x1740, 0x1751, - 0x1760, 0x176c, - 0x176e, 0x1770, - 0x1780, 0x17b3, - 0x17d7, 0x17d7, - 0x17dc, 0x17dc, - 0x1820, 0x1877, - 0x1880, 0x18a8, - 0x1900, 0x191c, - 0x1950, 0x196d, - 0x1970, 0x1974, - 0x1980, 0x19a9, - 0x19c1, 0x19c7, - 0x1a00, 0x1a16, - 0x1d00, 0x1dbf, - 0x1e00, 0x1e9b, - 0x1ea0, 0x1ef9, - 0x1f00, 0x1f15, - 0x1f18, 0x1f1d, - 0x1f20, 0x1f45, - 0x1f48, 0x1f4d, - 0x1f50, 0x1f57, - 0x1f59, 0x1f59, - 0x1f5b, 0x1f5b, - 0x1f5d, 0x1f5d, - 0x1f5f, 0x1f7d, - 0x1f80, 0x1fb4, - 0x1fb6, 0x1fbc, - 0x1fbe, 0x1fbe, - 0x1fc2, 0x1fc4, - 0x1fc6, 0x1fcc, - 0x1fd0, 0x1fd3, - 0x1fd6, 0x1fdb, - 0x1fe0, 0x1fec, - 0x1ff2, 0x1ff4, - 0x1ff6, 0x1ffc, - 0x2071, 0x2071, - 0x207f, 0x207f, - 0x2090, 0x2094, - 0x2102, 0x2102, - 0x2107, 0x2107, - 0x210a, 0x2113, - 0x2115, 0x2115, - 0x2119, 0x211d, - 0x2124, 0x2124, - 0x2126, 0x2126, - 0x2128, 0x2128, - 0x212a, 0x212d, - 0x212f, 0x2131, - 0x2133, 0x2139, - 0x213c, 0x213f, - 0x2145, 0x2149, - 0x2c00, 0x2c2e, - 0x2c30, 0x2c5e, - 0x2c80, 0x2ce4, - 0x2d00, 0x2d25, - 0x2d30, 0x2d65, - 0x2d6f, 0x2d6f, - 0x2d80, 0x2d96, - 0x2da0, 0x2da6, - 0x2da8, 0x2dae, - 0x2db0, 0x2db6, - 0x2db8, 0x2dbe, - 0x2dc0, 0x2dc6, - 0x2dc8, 0x2dce, - 0x2dd0, 0x2dd6, - 0x2dd8, 0x2dde, - 0x3005, 0x3006, - 0x3031, 0x3035, - 0x303b, 0x303c, - 0x3041, 0x3096, - 0x309d, 0x309f, - 0x30a1, 0x30fa, - 0x30fc, 0x30ff, - 0x3105, 0x312c, - 0x3131, 0x318e, - 0x31a0, 0x31b7, - 0x31f0, 0x31ff, - 0x3400, 0x4db5, - 0x4e00, 0x9fbb, - 0xa000, 0xa48c, - 0xa800, 0xa801, - 0xa803, 0xa805, - 0xa807, 0xa80a, - 0xa80c, 0xa822, - 0xac00, 0xd7a3, - 0xf900, 0xfa2d, - 0xfa30, 0xfa6a, - 0xfa70, 0xfad9, - 0xfb00, 0xfb06, - 0xfb13, 0xfb17, - 0xfb1d, 0xfb1d, - 0xfb1f, 0xfb28, - 0xfb2a, 0xfb36, - 0xfb38, 0xfb3c, - 0xfb3e, 0xfb3e, - 0xfb40, 0xfb41, - 0xfb43, 0xfb44, - 0xfb46, 0xfbb1, - 0xfbd3, 0xfd3d, - 0xfd50, 0xfd8f, - 0xfd92, 0xfdc7, - 0xfdf0, 0xfdfb, - 0xfe70, 0xfe74, - 0xfe76, 0xfefc, - 0xff21, 0xff3a, - 0xff41, 0xff5a, - 0xff66, 0xffbe, - 0xffc2, 0xffc7, - 0xffca, 0xffcf, - 0xffd2, 0xffd7, - 0xffda, 0xffdc, - 0x10000, 0x1000b, - 0x1000d, 0x10026, - 0x10028, 0x1003a, - 0x1003c, 0x1003d, - 0x1003f, 0x1004d, - 0x10050, 0x1005d, - 0x10080, 0x100fa, - 0x10300, 0x1031e, - 0x10330, 0x10349, - 0x10380, 0x1039d, - 0x103a0, 0x103c3, - 0x103c8, 0x103cf, - 0x10400, 0x1049d, - 0x10800, 0x10805, - 0x10808, 0x10808, - 0x1080a, 0x10835, - 0x10837, 0x10838, - 0x1083c, 0x1083c, - 0x1083f, 0x1083f, - 0x10a00, 0x10a00, - 0x10a10, 0x10a13, - 0x10a15, 0x10a17, - 0x10a19, 0x10a33, - 0x1d400, 0x1d454, - 0x1d456, 0x1d49c, - 0x1d49e, 0x1d49f, - 0x1d4a2, 0x1d4a2, - 0x1d4a5, 0x1d4a6, - 0x1d4a9, 0x1d4ac, - 0x1d4ae, 0x1d4b9, - 0x1d4bb, 0x1d4bb, - 0x1d4bd, 0x1d4c3, - 0x1d4c5, 0x1d505, - 0x1d507, 0x1d50a, - 0x1d50d, 0x1d514, - 0x1d516, 0x1d51c, - 0x1d51e, 0x1d539, - 0x1d53b, 0x1d53e, - 0x1d540, 0x1d544, - 0x1d546, 0x1d546, - 0x1d54a, 0x1d550, - 0x1d552, 0x1d6a5, - 0x1d6a8, 0x1d6c0, - 0x1d6c2, 0x1d6da, - 0x1d6dc, 0x1d6fa, - 0x1d6fc, 0x1d714, - 0x1d716, 0x1d734, - 0x1d736, 0x1d74e, - 0x1d750, 0x1d76e, - 0x1d770, 0x1d788, - 0x1d78a, 0x1d7a8, - 0x1d7aa, 0x1d7c2, - 0x1d7c4, 0x1d7c9, - 0x20000, 0x2a6d6, - 0x2f800, 0x2fa1d -}; /* CR_L */ - -/* 'Ll': General Category */ -static const OnigCodePoint CR_Ll[] = { - 480, - 0x0061, 0x007a, - 0x00aa, 0x00aa, - 0x00b5, 0x00b5, - 0x00ba, 0x00ba, - 0x00df, 0x00f6, - 0x00f8, 0x00ff, - 0x0101, 0x0101, - 0x0103, 0x0103, - 0x0105, 0x0105, - 0x0107, 0x0107, - 0x0109, 0x0109, - 0x010b, 0x010b, - 0x010d, 0x010d, - 0x010f, 0x010f, - 0x0111, 0x0111, - 0x0113, 0x0113, - 0x0115, 0x0115, - 0x0117, 0x0117, - 0x0119, 0x0119, - 0x011b, 0x011b, - 0x011d, 0x011d, - 0x011f, 0x011f, - 0x0121, 0x0121, - 0x0123, 0x0123, - 0x0125, 0x0125, - 0x0127, 0x0127, - 0x0129, 0x0129, - 0x012b, 0x012b, - 0x012d, 0x012d, - 0x012f, 0x012f, - 0x0131, 0x0131, - 0x0133, 0x0133, - 0x0135, 0x0135, - 0x0137, 0x0138, - 0x013a, 0x013a, - 0x013c, 0x013c, - 0x013e, 0x013e, - 0x0140, 0x0140, - 0x0142, 0x0142, - 0x0144, 0x0144, - 0x0146, 0x0146, - 0x0148, 0x0149, - 0x014b, 0x014b, - 0x014d, 0x014d, - 0x014f, 0x014f, - 0x0151, 0x0151, - 0x0153, 0x0153, - 0x0155, 0x0155, - 0x0157, 0x0157, - 0x0159, 0x0159, - 0x015b, 0x015b, - 0x015d, 0x015d, - 0x015f, 0x015f, - 0x0161, 0x0161, - 0x0163, 0x0163, - 0x0165, 0x0165, - 0x0167, 0x0167, - 0x0169, 0x0169, - 0x016b, 0x016b, - 0x016d, 0x016d, - 0x016f, 0x016f, - 0x0171, 0x0171, - 0x0173, 0x0173, - 0x0175, 0x0175, - 0x0177, 0x0177, - 0x017a, 0x017a, - 0x017c, 0x017c, - 0x017e, 0x0180, - 0x0183, 0x0183, - 0x0185, 0x0185, - 0x0188, 0x0188, - 0x018c, 0x018d, - 0x0192, 0x0192, - 0x0195, 0x0195, - 0x0199, 0x019b, - 0x019e, 0x019e, - 0x01a1, 0x01a1, - 0x01a3, 0x01a3, - 0x01a5, 0x01a5, - 0x01a8, 0x01a8, - 0x01aa, 0x01ab, - 0x01ad, 0x01ad, - 0x01b0, 0x01b0, - 0x01b4, 0x01b4, - 0x01b6, 0x01b6, - 0x01b9, 0x01ba, - 0x01bd, 0x01bf, - 0x01c6, 0x01c6, - 0x01c9, 0x01c9, - 0x01cc, 0x01cc, - 0x01ce, 0x01ce, - 0x01d0, 0x01d0, - 0x01d2, 0x01d2, - 0x01d4, 0x01d4, - 0x01d6, 0x01d6, - 0x01d8, 0x01d8, - 0x01da, 0x01da, - 0x01dc, 0x01dd, - 0x01df, 0x01df, - 0x01e1, 0x01e1, - 0x01e3, 0x01e3, - 0x01e5, 0x01e5, - 0x01e7, 0x01e7, - 0x01e9, 0x01e9, - 0x01eb, 0x01eb, - 0x01ed, 0x01ed, - 0x01ef, 0x01f0, - 0x01f3, 0x01f3, - 0x01f5, 0x01f5, - 0x01f9, 0x01f9, - 0x01fb, 0x01fb, - 0x01fd, 0x01fd, - 0x01ff, 0x01ff, - 0x0201, 0x0201, - 0x0203, 0x0203, - 0x0205, 0x0205, - 0x0207, 0x0207, - 0x0209, 0x0209, - 0x020b, 0x020b, - 0x020d, 0x020d, - 0x020f, 0x020f, - 0x0211, 0x0211, - 0x0213, 0x0213, - 0x0215, 0x0215, - 0x0217, 0x0217, - 0x0219, 0x0219, - 0x021b, 0x021b, - 0x021d, 0x021d, - 0x021f, 0x021f, - 0x0221, 0x0221, - 0x0223, 0x0223, - 0x0225, 0x0225, - 0x0227, 0x0227, - 0x0229, 0x0229, - 0x022b, 0x022b, - 0x022d, 0x022d, - 0x022f, 0x022f, - 0x0231, 0x0231, - 0x0233, 0x0239, - 0x023c, 0x023c, - 0x023f, 0x0240, - 0x0250, 0x02af, - 0x0390, 0x0390, - 0x03ac, 0x03ce, - 0x03d0, 0x03d1, - 0x03d5, 0x03d7, - 0x03d9, 0x03d9, - 0x03db, 0x03db, - 0x03dd, 0x03dd, - 0x03df, 0x03df, - 0x03e1, 0x03e1, - 0x03e3, 0x03e3, - 0x03e5, 0x03e5, - 0x03e7, 0x03e7, - 0x03e9, 0x03e9, - 0x03eb, 0x03eb, - 0x03ed, 0x03ed, - 0x03ef, 0x03f3, - 0x03f5, 0x03f5, - 0x03f8, 0x03f8, - 0x03fb, 0x03fc, - 0x0430, 0x045f, - 0x0461, 0x0461, - 0x0463, 0x0463, - 0x0465, 0x0465, - 0x0467, 0x0467, - 0x0469, 0x0469, - 0x046b, 0x046b, - 0x046d, 0x046d, - 0x046f, 0x046f, - 0x0471, 0x0471, - 0x0473, 0x0473, - 0x0475, 0x0475, - 0x0477, 0x0477, - 0x0479, 0x0479, - 0x047b, 0x047b, - 0x047d, 0x047d, - 0x047f, 0x047f, - 0x0481, 0x0481, - 0x048b, 0x048b, - 0x048d, 0x048d, - 0x048f, 0x048f, - 0x0491, 0x0491, - 0x0493, 0x0493, - 0x0495, 0x0495, - 0x0497, 0x0497, - 0x0499, 0x0499, - 0x049b, 0x049b, - 0x049d, 0x049d, - 0x049f, 0x049f, - 0x04a1, 0x04a1, - 0x04a3, 0x04a3, - 0x04a5, 0x04a5, - 0x04a7, 0x04a7, - 0x04a9, 0x04a9, - 0x04ab, 0x04ab, - 0x04ad, 0x04ad, - 0x04af, 0x04af, - 0x04b1, 0x04b1, - 0x04b3, 0x04b3, - 0x04b5, 0x04b5, - 0x04b7, 0x04b7, - 0x04b9, 0x04b9, - 0x04bb, 0x04bb, - 0x04bd, 0x04bd, - 0x04bf, 0x04bf, - 0x04c2, 0x04c2, - 0x04c4, 0x04c4, - 0x04c6, 0x04c6, - 0x04c8, 0x04c8, - 0x04ca, 0x04ca, - 0x04cc, 0x04cc, - 0x04ce, 0x04ce, - 0x04d1, 0x04d1, - 0x04d3, 0x04d3, - 0x04d5, 0x04d5, - 0x04d7, 0x04d7, - 0x04d9, 0x04d9, - 0x04db, 0x04db, - 0x04dd, 0x04dd, - 0x04df, 0x04df, - 0x04e1, 0x04e1, - 0x04e3, 0x04e3, - 0x04e5, 0x04e5, - 0x04e7, 0x04e7, - 0x04e9, 0x04e9, - 0x04eb, 0x04eb, - 0x04ed, 0x04ed, - 0x04ef, 0x04ef, - 0x04f1, 0x04f1, - 0x04f3, 0x04f3, - 0x04f5, 0x04f5, - 0x04f7, 0x04f7, - 0x04f9, 0x04f9, - 0x0501, 0x0501, - 0x0503, 0x0503, - 0x0505, 0x0505, - 0x0507, 0x0507, - 0x0509, 0x0509, - 0x050b, 0x050b, - 0x050d, 0x050d, - 0x050f, 0x050f, - 0x0561, 0x0587, - 0x1d00, 0x1d2b, - 0x1d62, 0x1d77, - 0x1d79, 0x1d9a, - 0x1e01, 0x1e01, - 0x1e03, 0x1e03, - 0x1e05, 0x1e05, - 0x1e07, 0x1e07, - 0x1e09, 0x1e09, - 0x1e0b, 0x1e0b, - 0x1e0d, 0x1e0d, - 0x1e0f, 0x1e0f, - 0x1e11, 0x1e11, - 0x1e13, 0x1e13, - 0x1e15, 0x1e15, - 0x1e17, 0x1e17, - 0x1e19, 0x1e19, - 0x1e1b, 0x1e1b, - 0x1e1d, 0x1e1d, - 0x1e1f, 0x1e1f, - 0x1e21, 0x1e21, - 0x1e23, 0x1e23, - 0x1e25, 0x1e25, - 0x1e27, 0x1e27, - 0x1e29, 0x1e29, - 0x1e2b, 0x1e2b, - 0x1e2d, 0x1e2d, - 0x1e2f, 0x1e2f, - 0x1e31, 0x1e31, - 0x1e33, 0x1e33, - 0x1e35, 0x1e35, - 0x1e37, 0x1e37, - 0x1e39, 0x1e39, - 0x1e3b, 0x1e3b, - 0x1e3d, 0x1e3d, - 0x1e3f, 0x1e3f, - 0x1e41, 0x1e41, - 0x1e43, 0x1e43, - 0x1e45, 0x1e45, - 0x1e47, 0x1e47, - 0x1e49, 0x1e49, - 0x1e4b, 0x1e4b, - 0x1e4d, 0x1e4d, - 0x1e4f, 0x1e4f, - 0x1e51, 0x1e51, - 0x1e53, 0x1e53, - 0x1e55, 0x1e55, - 0x1e57, 0x1e57, - 0x1e59, 0x1e59, - 0x1e5b, 0x1e5b, - 0x1e5d, 0x1e5d, - 0x1e5f, 0x1e5f, - 0x1e61, 0x1e61, - 0x1e63, 0x1e63, - 0x1e65, 0x1e65, - 0x1e67, 0x1e67, - 0x1e69, 0x1e69, - 0x1e6b, 0x1e6b, - 0x1e6d, 0x1e6d, - 0x1e6f, 0x1e6f, - 0x1e71, 0x1e71, - 0x1e73, 0x1e73, - 0x1e75, 0x1e75, - 0x1e77, 0x1e77, - 0x1e79, 0x1e79, - 0x1e7b, 0x1e7b, - 0x1e7d, 0x1e7d, - 0x1e7f, 0x1e7f, - 0x1e81, 0x1e81, - 0x1e83, 0x1e83, - 0x1e85, 0x1e85, - 0x1e87, 0x1e87, - 0x1e89, 0x1e89, - 0x1e8b, 0x1e8b, - 0x1e8d, 0x1e8d, - 0x1e8f, 0x1e8f, - 0x1e91, 0x1e91, - 0x1e93, 0x1e93, - 0x1e95, 0x1e9b, - 0x1ea1, 0x1ea1, - 0x1ea3, 0x1ea3, - 0x1ea5, 0x1ea5, - 0x1ea7, 0x1ea7, - 0x1ea9, 0x1ea9, - 0x1eab, 0x1eab, - 0x1ead, 0x1ead, - 0x1eaf, 0x1eaf, - 0x1eb1, 0x1eb1, - 0x1eb3, 0x1eb3, - 0x1eb5, 0x1eb5, - 0x1eb7, 0x1eb7, - 0x1eb9, 0x1eb9, - 0x1ebb, 0x1ebb, - 0x1ebd, 0x1ebd, - 0x1ebf, 0x1ebf, - 0x1ec1, 0x1ec1, - 0x1ec3, 0x1ec3, - 0x1ec5, 0x1ec5, - 0x1ec7, 0x1ec7, - 0x1ec9, 0x1ec9, - 0x1ecb, 0x1ecb, - 0x1ecd, 0x1ecd, - 0x1ecf, 0x1ecf, - 0x1ed1, 0x1ed1, - 0x1ed3, 0x1ed3, - 0x1ed5, 0x1ed5, - 0x1ed7, 0x1ed7, - 0x1ed9, 0x1ed9, - 0x1edb, 0x1edb, - 0x1edd, 0x1edd, - 0x1edf, 0x1edf, - 0x1ee1, 0x1ee1, - 0x1ee3, 0x1ee3, - 0x1ee5, 0x1ee5, - 0x1ee7, 0x1ee7, - 0x1ee9, 0x1ee9, - 0x1eeb, 0x1eeb, - 0x1eed, 0x1eed, - 0x1eef, 0x1eef, - 0x1ef1, 0x1ef1, - 0x1ef3, 0x1ef3, - 0x1ef5, 0x1ef5, - 0x1ef7, 0x1ef7, - 0x1ef9, 0x1ef9, - 0x1f00, 0x1f07, - 0x1f10, 0x1f15, - 0x1f20, 0x1f27, - 0x1f30, 0x1f37, - 0x1f40, 0x1f45, - 0x1f50, 0x1f57, - 0x1f60, 0x1f67, - 0x1f70, 0x1f7d, - 0x1f80, 0x1f87, - 0x1f90, 0x1f97, - 0x1fa0, 0x1fa7, - 0x1fb0, 0x1fb4, - 0x1fb6, 0x1fb7, - 0x1fbe, 0x1fbe, - 0x1fc2, 0x1fc4, - 0x1fc6, 0x1fc7, - 0x1fd0, 0x1fd3, - 0x1fd6, 0x1fd7, - 0x1fe0, 0x1fe7, - 0x1ff2, 0x1ff4, - 0x1ff6, 0x1ff7, - 0x2071, 0x2071, - 0x207f, 0x207f, - 0x210a, 0x210a, - 0x210e, 0x210f, - 0x2113, 0x2113, - 0x212f, 0x212f, - 0x2134, 0x2134, - 0x2139, 0x2139, - 0x213c, 0x213d, - 0x2146, 0x2149, - 0x2c30, 0x2c5e, - 0x2c81, 0x2c81, - 0x2c83, 0x2c83, - 0x2c85, 0x2c85, - 0x2c87, 0x2c87, - 0x2c89, 0x2c89, - 0x2c8b, 0x2c8b, - 0x2c8d, 0x2c8d, - 0x2c8f, 0x2c8f, - 0x2c91, 0x2c91, - 0x2c93, 0x2c93, - 0x2c95, 0x2c95, - 0x2c97, 0x2c97, - 0x2c99, 0x2c99, - 0x2c9b, 0x2c9b, - 0x2c9d, 0x2c9d, - 0x2c9f, 0x2c9f, - 0x2ca1, 0x2ca1, - 0x2ca3, 0x2ca3, - 0x2ca5, 0x2ca5, - 0x2ca7, 0x2ca7, - 0x2ca9, 0x2ca9, - 0x2cab, 0x2cab, - 0x2cad, 0x2cad, - 0x2caf, 0x2caf, - 0x2cb1, 0x2cb1, - 0x2cb3, 0x2cb3, - 0x2cb5, 0x2cb5, - 0x2cb7, 0x2cb7, - 0x2cb9, 0x2cb9, - 0x2cbb, 0x2cbb, - 0x2cbd, 0x2cbd, - 0x2cbf, 0x2cbf, - 0x2cc1, 0x2cc1, - 0x2cc3, 0x2cc3, - 0x2cc5, 0x2cc5, - 0x2cc7, 0x2cc7, - 0x2cc9, 0x2cc9, - 0x2ccb, 0x2ccb, - 0x2ccd, 0x2ccd, - 0x2ccf, 0x2ccf, - 0x2cd1, 0x2cd1, - 0x2cd3, 0x2cd3, - 0x2cd5, 0x2cd5, - 0x2cd7, 0x2cd7, - 0x2cd9, 0x2cd9, - 0x2cdb, 0x2cdb, - 0x2cdd, 0x2cdd, - 0x2cdf, 0x2cdf, - 0x2ce1, 0x2ce1, - 0x2ce3, 0x2ce4, - 0x2d00, 0x2d25, - 0xfb00, 0xfb06, - 0xfb13, 0xfb17, - 0xff41, 0xff5a, - 0x10428, 0x1044f, - 0x1d41a, 0x1d433, - 0x1d44e, 0x1d454, - 0x1d456, 0x1d467, - 0x1d482, 0x1d49b, - 0x1d4b6, 0x1d4b9, - 0x1d4bb, 0x1d4bb, - 0x1d4bd, 0x1d4c3, - 0x1d4c5, 0x1d4cf, - 0x1d4ea, 0x1d503, - 0x1d51e, 0x1d537, - 0x1d552, 0x1d56b, - 0x1d586, 0x1d59f, - 0x1d5ba, 0x1d5d3, - 0x1d5ee, 0x1d607, - 0x1d622, 0x1d63b, - 0x1d656, 0x1d66f, - 0x1d68a, 0x1d6a5, - 0x1d6c2, 0x1d6da, - 0x1d6dc, 0x1d6e1, - 0x1d6fc, 0x1d714, - 0x1d716, 0x1d71b, - 0x1d736, 0x1d74e, - 0x1d750, 0x1d755, - 0x1d770, 0x1d788, - 0x1d78a, 0x1d78f, - 0x1d7aa, 0x1d7c2, - 0x1d7c4, 0x1d7c9 -}; /* CR_Ll */ - -/* 'Lm': General Category */ -static const OnigCodePoint CR_Lm[] = { - 26, - 0x02b0, 0x02c1, - 0x02c6, 0x02d1, - 0x02e0, 0x02e4, - 0x02ee, 0x02ee, - 0x037a, 0x037a, - 0x0559, 0x0559, - 0x0640, 0x0640, - 0x06e5, 0x06e6, - 0x0e46, 0x0e46, - 0x0ec6, 0x0ec6, - 0x10fc, 0x10fc, - 0x17d7, 0x17d7, - 0x1843, 0x1843, - 0x1d2c, 0x1d61, - 0x1d78, 0x1d78, - 0x1d9b, 0x1dbf, - 0x2090, 0x2094, - 0x2d6f, 0x2d6f, - 0x3005, 0x3005, - 0x3031, 0x3035, - 0x303b, 0x303b, - 0x309d, 0x309e, - 0x30fc, 0x30fe, - 0xa015, 0xa015, - 0xff70, 0xff70, - 0xff9e, 0xff9f -}; /* CR_Lm */ - -/* 'Lo': General Category */ -static const OnigCodePoint CR_Lo[] = { - 245, - 0x01bb, 0x01bb, - 0x01c0, 0x01c3, - 0x05d0, 0x05ea, - 0x05f0, 0x05f2, - 0x0621, 0x063a, - 0x0641, 0x064a, - 0x066e, 0x066f, - 0x0671, 0x06d3, - 0x06d5, 0x06d5, - 0x06ee, 0x06ef, - 0x06fa, 0x06fc, - 0x06ff, 0x06ff, - 0x0710, 0x0710, - 0x0712, 0x072f, - 0x074d, 0x076d, - 0x0780, 0x07a5, - 0x07b1, 0x07b1, - 0x0904, 0x0939, - 0x093d, 0x093d, - 0x0950, 0x0950, - 0x0958, 0x0961, - 0x097d, 0x097d, - 0x0985, 0x098c, - 0x098f, 0x0990, - 0x0993, 0x09a8, - 0x09aa, 0x09b0, - 0x09b2, 0x09b2, - 0x09b6, 0x09b9, - 0x09bd, 0x09bd, - 0x09ce, 0x09ce, - 0x09dc, 0x09dd, - 0x09df, 0x09e1, - 0x09f0, 0x09f1, - 0x0a05, 0x0a0a, - 0x0a0f, 0x0a10, - 0x0a13, 0x0a28, - 0x0a2a, 0x0a30, - 0x0a32, 0x0a33, - 0x0a35, 0x0a36, - 0x0a38, 0x0a39, - 0x0a59, 0x0a5c, - 0x0a5e, 0x0a5e, - 0x0a72, 0x0a74, - 0x0a85, 0x0a8d, - 0x0a8f, 0x0a91, - 0x0a93, 0x0aa8, - 0x0aaa, 0x0ab0, - 0x0ab2, 0x0ab3, - 0x0ab5, 0x0ab9, - 0x0abd, 0x0abd, - 0x0ad0, 0x0ad0, - 0x0ae0, 0x0ae1, - 0x0b05, 0x0b0c, - 0x0b0f, 0x0b10, - 0x0b13, 0x0b28, - 0x0b2a, 0x0b30, - 0x0b32, 0x0b33, - 0x0b35, 0x0b39, - 0x0b3d, 0x0b3d, - 0x0b5c, 0x0b5d, - 0x0b5f, 0x0b61, - 0x0b71, 0x0b71, - 0x0b83, 0x0b83, - 0x0b85, 0x0b8a, - 0x0b8e, 0x0b90, - 0x0b92, 0x0b95, - 0x0b99, 0x0b9a, - 0x0b9c, 0x0b9c, - 0x0b9e, 0x0b9f, - 0x0ba3, 0x0ba4, - 0x0ba8, 0x0baa, - 0x0bae, 0x0bb9, - 0x0c05, 0x0c0c, - 0x0c0e, 0x0c10, - 0x0c12, 0x0c28, - 0x0c2a, 0x0c33, - 0x0c35, 0x0c39, - 0x0c60, 0x0c61, - 0x0c85, 0x0c8c, - 0x0c8e, 0x0c90, - 0x0c92, 0x0ca8, - 0x0caa, 0x0cb3, - 0x0cb5, 0x0cb9, - 0x0cbd, 0x0cbd, - 0x0cde, 0x0cde, - 0x0ce0, 0x0ce1, - 0x0d05, 0x0d0c, - 0x0d0e, 0x0d10, - 0x0d12, 0x0d28, - 0x0d2a, 0x0d39, - 0x0d60, 0x0d61, - 0x0d85, 0x0d96, - 0x0d9a, 0x0db1, - 0x0db3, 0x0dbb, - 0x0dbd, 0x0dbd, - 0x0dc0, 0x0dc6, - 0x0e01, 0x0e30, - 0x0e32, 0x0e33, - 0x0e40, 0x0e45, - 0x0e81, 0x0e82, - 0x0e84, 0x0e84, - 0x0e87, 0x0e88, - 0x0e8a, 0x0e8a, - 0x0e8d, 0x0e8d, - 0x0e94, 0x0e97, - 0x0e99, 0x0e9f, - 0x0ea1, 0x0ea3, - 0x0ea5, 0x0ea5, - 0x0ea7, 0x0ea7, - 0x0eaa, 0x0eab, - 0x0ead, 0x0eb0, - 0x0eb2, 0x0eb3, - 0x0ebd, 0x0ebd, - 0x0ec0, 0x0ec4, - 0x0edc, 0x0edd, - 0x0f00, 0x0f00, - 0x0f40, 0x0f47, - 0x0f49, 0x0f6a, - 0x0f88, 0x0f8b, - 0x1000, 0x1021, - 0x1023, 0x1027, - 0x1029, 0x102a, - 0x1050, 0x1055, - 0x10d0, 0x10fa, - 0x1100, 0x1159, - 0x115f, 0x11a2, - 0x11a8, 0x11f9, - 0x1200, 0x1248, - 0x124a, 0x124d, - 0x1250, 0x1256, - 0x1258, 0x1258, - 0x125a, 0x125d, - 0x1260, 0x1288, - 0x128a, 0x128d, - 0x1290, 0x12b0, - 0x12b2, 0x12b5, - 0x12b8, 0x12be, - 0x12c0, 0x12c0, - 0x12c2, 0x12c5, - 0x12c8, 0x12d6, - 0x12d8, 0x1310, - 0x1312, 0x1315, - 0x1318, 0x135a, - 0x1380, 0x138f, - 0x13a0, 0x13f4, - 0x1401, 0x166c, - 0x166f, 0x1676, - 0x1681, 0x169a, - 0x16a0, 0x16ea, - 0x1700, 0x170c, - 0x170e, 0x1711, - 0x1720, 0x1731, - 0x1740, 0x1751, - 0x1760, 0x176c, - 0x176e, 0x1770, - 0x1780, 0x17b3, - 0x17dc, 0x17dc, - 0x1820, 0x1842, - 0x1844, 0x1877, - 0x1880, 0x18a8, - 0x1900, 0x191c, - 0x1950, 0x196d, - 0x1970, 0x1974, - 0x1980, 0x19a9, - 0x19c1, 0x19c7, - 0x1a00, 0x1a16, - 0x2135, 0x2138, - 0x2d30, 0x2d65, - 0x2d80, 0x2d96, - 0x2da0, 0x2da6, - 0x2da8, 0x2dae, - 0x2db0, 0x2db6, - 0x2db8, 0x2dbe, - 0x2dc0, 0x2dc6, - 0x2dc8, 0x2dce, - 0x2dd0, 0x2dd6, - 0x2dd8, 0x2dde, - 0x3006, 0x3006, - 0x303c, 0x303c, - 0x3041, 0x3096, - 0x309f, 0x309f, - 0x30a1, 0x30fa, - 0x30ff, 0x30ff, - 0x3105, 0x312c, - 0x3131, 0x318e, - 0x31a0, 0x31b7, - 0x31f0, 0x31ff, - 0x3400, 0x4db5, - 0x4e00, 0x9fbb, - 0xa000, 0xa014, - 0xa016, 0xa48c, - 0xa800, 0xa801, - 0xa803, 0xa805, - 0xa807, 0xa80a, - 0xa80c, 0xa822, - 0xac00, 0xd7a3, - 0xf900, 0xfa2d, - 0xfa30, 0xfa6a, - 0xfa70, 0xfad9, - 0xfb1d, 0xfb1d, - 0xfb1f, 0xfb28, - 0xfb2a, 0xfb36, - 0xfb38, 0xfb3c, - 0xfb3e, 0xfb3e, - 0xfb40, 0xfb41, - 0xfb43, 0xfb44, - 0xfb46, 0xfbb1, - 0xfbd3, 0xfd3d, - 0xfd50, 0xfd8f, - 0xfd92, 0xfdc7, - 0xfdf0, 0xfdfb, - 0xfe70, 0xfe74, - 0xfe76, 0xfefc, - 0xff66, 0xff6f, - 0xff71, 0xff9d, - 0xffa0, 0xffbe, - 0xffc2, 0xffc7, - 0xffca, 0xffcf, - 0xffd2, 0xffd7, - 0xffda, 0xffdc, - 0x10000, 0x1000b, - 0x1000d, 0x10026, - 0x10028, 0x1003a, - 0x1003c, 0x1003d, - 0x1003f, 0x1004d, - 0x10050, 0x1005d, - 0x10080, 0x100fa, - 0x10300, 0x1031e, - 0x10330, 0x10349, - 0x10380, 0x1039d, - 0x103a0, 0x103c3, - 0x103c8, 0x103cf, - 0x10450, 0x1049d, - 0x10800, 0x10805, - 0x10808, 0x10808, - 0x1080a, 0x10835, - 0x10837, 0x10838, - 0x1083c, 0x1083c, - 0x1083f, 0x1083f, - 0x10a00, 0x10a00, - 0x10a10, 0x10a13, - 0x10a15, 0x10a17, - 0x10a19, 0x10a33, - 0x20000, 0x2a6d6, - 0x2f800, 0x2fa1d -}; /* CR_Lo */ - -/* 'Lt': General Category */ -static const OnigCodePoint CR_Lt[] = { - 10, - 0x01c5, 0x01c5, - 0x01c8, 0x01c8, - 0x01cb, 0x01cb, - 0x01f2, 0x01f2, - 0x1f88, 0x1f8f, - 0x1f98, 0x1f9f, - 0x1fa8, 0x1faf, - 0x1fbc, 0x1fbc, - 0x1fcc, 0x1fcc, - 0x1ffc, 0x1ffc -}; /* CR_Lt */ - -/* 'Lu': General Category */ -static const OnigCodePoint CR_Lu[] = { - 476, - 0x0041, 0x005a, - 0x00c0, 0x00d6, - 0x00d8, 0x00de, - 0x0100, 0x0100, - 0x0102, 0x0102, - 0x0104, 0x0104, - 0x0106, 0x0106, - 0x0108, 0x0108, - 0x010a, 0x010a, - 0x010c, 0x010c, - 0x010e, 0x010e, - 0x0110, 0x0110, - 0x0112, 0x0112, - 0x0114, 0x0114, - 0x0116, 0x0116, - 0x0118, 0x0118, - 0x011a, 0x011a, - 0x011c, 0x011c, - 0x011e, 0x011e, - 0x0120, 0x0120, - 0x0122, 0x0122, - 0x0124, 0x0124, - 0x0126, 0x0126, - 0x0128, 0x0128, - 0x012a, 0x012a, - 0x012c, 0x012c, - 0x012e, 0x012e, - 0x0130, 0x0130, - 0x0132, 0x0132, - 0x0134, 0x0134, - 0x0136, 0x0136, - 0x0139, 0x0139, - 0x013b, 0x013b, - 0x013d, 0x013d, - 0x013f, 0x013f, - 0x0141, 0x0141, - 0x0143, 0x0143, - 0x0145, 0x0145, - 0x0147, 0x0147, - 0x014a, 0x014a, - 0x014c, 0x014c, - 0x014e, 0x014e, - 0x0150, 0x0150, - 0x0152, 0x0152, - 0x0154, 0x0154, - 0x0156, 0x0156, - 0x0158, 0x0158, - 0x015a, 0x015a, - 0x015c, 0x015c, - 0x015e, 0x015e, - 0x0160, 0x0160, - 0x0162, 0x0162, - 0x0164, 0x0164, - 0x0166, 0x0166, - 0x0168, 0x0168, - 0x016a, 0x016a, - 0x016c, 0x016c, - 0x016e, 0x016e, - 0x0170, 0x0170, - 0x0172, 0x0172, - 0x0174, 0x0174, - 0x0176, 0x0176, - 0x0178, 0x0179, - 0x017b, 0x017b, - 0x017d, 0x017d, - 0x0181, 0x0182, - 0x0184, 0x0184, - 0x0186, 0x0187, - 0x0189, 0x018b, - 0x018e, 0x0191, - 0x0193, 0x0194, - 0x0196, 0x0198, - 0x019c, 0x019d, - 0x019f, 0x01a0, - 0x01a2, 0x01a2, - 0x01a4, 0x01a4, - 0x01a6, 0x01a7, - 0x01a9, 0x01a9, - 0x01ac, 0x01ac, - 0x01ae, 0x01af, - 0x01b1, 0x01b3, - 0x01b5, 0x01b5, - 0x01b7, 0x01b8, - 0x01bc, 0x01bc, - 0x01c4, 0x01c4, - 0x01c7, 0x01c7, - 0x01ca, 0x01ca, - 0x01cd, 0x01cd, - 0x01cf, 0x01cf, - 0x01d1, 0x01d1, - 0x01d3, 0x01d3, - 0x01d5, 0x01d5, - 0x01d7, 0x01d7, - 0x01d9, 0x01d9, - 0x01db, 0x01db, - 0x01de, 0x01de, - 0x01e0, 0x01e0, - 0x01e2, 0x01e2, - 0x01e4, 0x01e4, - 0x01e6, 0x01e6, - 0x01e8, 0x01e8, - 0x01ea, 0x01ea, - 0x01ec, 0x01ec, - 0x01ee, 0x01ee, - 0x01f1, 0x01f1, - 0x01f4, 0x01f4, - 0x01f6, 0x01f8, - 0x01fa, 0x01fa, - 0x01fc, 0x01fc, - 0x01fe, 0x01fe, - 0x0200, 0x0200, - 0x0202, 0x0202, - 0x0204, 0x0204, - 0x0206, 0x0206, - 0x0208, 0x0208, - 0x020a, 0x020a, - 0x020c, 0x020c, - 0x020e, 0x020e, - 0x0210, 0x0210, - 0x0212, 0x0212, - 0x0214, 0x0214, - 0x0216, 0x0216, - 0x0218, 0x0218, - 0x021a, 0x021a, - 0x021c, 0x021c, - 0x021e, 0x021e, - 0x0220, 0x0220, - 0x0222, 0x0222, - 0x0224, 0x0224, - 0x0226, 0x0226, - 0x0228, 0x0228, - 0x022a, 0x022a, - 0x022c, 0x022c, - 0x022e, 0x022e, - 0x0230, 0x0230, - 0x0232, 0x0232, - 0x023a, 0x023b, - 0x023d, 0x023e, - 0x0241, 0x0241, - 0x0386, 0x0386, - 0x0388, 0x038a, - 0x038c, 0x038c, - 0x038e, 0x038f, - 0x0391, 0x03a1, - 0x03a3, 0x03ab, - 0x03d2, 0x03d4, - 0x03d8, 0x03d8, - 0x03da, 0x03da, - 0x03dc, 0x03dc, - 0x03de, 0x03de, - 0x03e0, 0x03e0, - 0x03e2, 0x03e2, - 0x03e4, 0x03e4, - 0x03e6, 0x03e6, - 0x03e8, 0x03e8, - 0x03ea, 0x03ea, - 0x03ec, 0x03ec, - 0x03ee, 0x03ee, - 0x03f4, 0x03f4, - 0x03f7, 0x03f7, - 0x03f9, 0x03fa, - 0x03fd, 0x042f, - 0x0460, 0x0460, - 0x0462, 0x0462, - 0x0464, 0x0464, - 0x0466, 0x0466, - 0x0468, 0x0468, - 0x046a, 0x046a, - 0x046c, 0x046c, - 0x046e, 0x046e, - 0x0470, 0x0470, - 0x0472, 0x0472, - 0x0474, 0x0474, - 0x0476, 0x0476, - 0x0478, 0x0478, - 0x047a, 0x047a, - 0x047c, 0x047c, - 0x047e, 0x047e, - 0x0480, 0x0480, - 0x048a, 0x048a, - 0x048c, 0x048c, - 0x048e, 0x048e, - 0x0490, 0x0490, - 0x0492, 0x0492, - 0x0494, 0x0494, - 0x0496, 0x0496, - 0x0498, 0x0498, - 0x049a, 0x049a, - 0x049c, 0x049c, - 0x049e, 0x049e, - 0x04a0, 0x04a0, - 0x04a2, 0x04a2, - 0x04a4, 0x04a4, - 0x04a6, 0x04a6, - 0x04a8, 0x04a8, - 0x04aa, 0x04aa, - 0x04ac, 0x04ac, - 0x04ae, 0x04ae, - 0x04b0, 0x04b0, - 0x04b2, 0x04b2, - 0x04b4, 0x04b4, - 0x04b6, 0x04b6, - 0x04b8, 0x04b8, - 0x04ba, 0x04ba, - 0x04bc, 0x04bc, - 0x04be, 0x04be, - 0x04c0, 0x04c1, - 0x04c3, 0x04c3, - 0x04c5, 0x04c5, - 0x04c7, 0x04c7, - 0x04c9, 0x04c9, - 0x04cb, 0x04cb, - 0x04cd, 0x04cd, - 0x04d0, 0x04d0, - 0x04d2, 0x04d2, - 0x04d4, 0x04d4, - 0x04d6, 0x04d6, - 0x04d8, 0x04d8, - 0x04da, 0x04da, - 0x04dc, 0x04dc, - 0x04de, 0x04de, - 0x04e0, 0x04e0, - 0x04e2, 0x04e2, - 0x04e4, 0x04e4, - 0x04e6, 0x04e6, - 0x04e8, 0x04e8, - 0x04ea, 0x04ea, - 0x04ec, 0x04ec, - 0x04ee, 0x04ee, - 0x04f0, 0x04f0, - 0x04f2, 0x04f2, - 0x04f4, 0x04f4, - 0x04f6, 0x04f6, - 0x04f8, 0x04f8, - 0x0500, 0x0500, - 0x0502, 0x0502, - 0x0504, 0x0504, - 0x0506, 0x0506, - 0x0508, 0x0508, - 0x050a, 0x050a, - 0x050c, 0x050c, - 0x050e, 0x050e, - 0x0531, 0x0556, - 0x10a0, 0x10c5, - 0x1e00, 0x1e00, - 0x1e02, 0x1e02, - 0x1e04, 0x1e04, - 0x1e06, 0x1e06, - 0x1e08, 0x1e08, - 0x1e0a, 0x1e0a, - 0x1e0c, 0x1e0c, - 0x1e0e, 0x1e0e, - 0x1e10, 0x1e10, - 0x1e12, 0x1e12, - 0x1e14, 0x1e14, - 0x1e16, 0x1e16, - 0x1e18, 0x1e18, - 0x1e1a, 0x1e1a, - 0x1e1c, 0x1e1c, - 0x1e1e, 0x1e1e, - 0x1e20, 0x1e20, - 0x1e22, 0x1e22, - 0x1e24, 0x1e24, - 0x1e26, 0x1e26, - 0x1e28, 0x1e28, - 0x1e2a, 0x1e2a, - 0x1e2c, 0x1e2c, - 0x1e2e, 0x1e2e, - 0x1e30, 0x1e30, - 0x1e32, 0x1e32, - 0x1e34, 0x1e34, - 0x1e36, 0x1e36, - 0x1e38, 0x1e38, - 0x1e3a, 0x1e3a, - 0x1e3c, 0x1e3c, - 0x1e3e, 0x1e3e, - 0x1e40, 0x1e40, - 0x1e42, 0x1e42, - 0x1e44, 0x1e44, - 0x1e46, 0x1e46, - 0x1e48, 0x1e48, - 0x1e4a, 0x1e4a, - 0x1e4c, 0x1e4c, - 0x1e4e, 0x1e4e, - 0x1e50, 0x1e50, - 0x1e52, 0x1e52, - 0x1e54, 0x1e54, - 0x1e56, 0x1e56, - 0x1e58, 0x1e58, - 0x1e5a, 0x1e5a, - 0x1e5c, 0x1e5c, - 0x1e5e, 0x1e5e, - 0x1e60, 0x1e60, - 0x1e62, 0x1e62, - 0x1e64, 0x1e64, - 0x1e66, 0x1e66, - 0x1e68, 0x1e68, - 0x1e6a, 0x1e6a, - 0x1e6c, 0x1e6c, - 0x1e6e, 0x1e6e, - 0x1e70, 0x1e70, - 0x1e72, 0x1e72, - 0x1e74, 0x1e74, - 0x1e76, 0x1e76, - 0x1e78, 0x1e78, - 0x1e7a, 0x1e7a, - 0x1e7c, 0x1e7c, - 0x1e7e, 0x1e7e, - 0x1e80, 0x1e80, - 0x1e82, 0x1e82, - 0x1e84, 0x1e84, - 0x1e86, 0x1e86, - 0x1e88, 0x1e88, - 0x1e8a, 0x1e8a, - 0x1e8c, 0x1e8c, - 0x1e8e, 0x1e8e, - 0x1e90, 0x1e90, - 0x1e92, 0x1e92, - 0x1e94, 0x1e94, - 0x1ea0, 0x1ea0, - 0x1ea2, 0x1ea2, - 0x1ea4, 0x1ea4, - 0x1ea6, 0x1ea6, - 0x1ea8, 0x1ea8, - 0x1eaa, 0x1eaa, - 0x1eac, 0x1eac, - 0x1eae, 0x1eae, - 0x1eb0, 0x1eb0, - 0x1eb2, 0x1eb2, - 0x1eb4, 0x1eb4, - 0x1eb6, 0x1eb6, - 0x1eb8, 0x1eb8, - 0x1eba, 0x1eba, - 0x1ebc, 0x1ebc, - 0x1ebe, 0x1ebe, - 0x1ec0, 0x1ec0, - 0x1ec2, 0x1ec2, - 0x1ec4, 0x1ec4, - 0x1ec6, 0x1ec6, - 0x1ec8, 0x1ec8, - 0x1eca, 0x1eca, - 0x1ecc, 0x1ecc, - 0x1ece, 0x1ece, - 0x1ed0, 0x1ed0, - 0x1ed2, 0x1ed2, - 0x1ed4, 0x1ed4, - 0x1ed6, 0x1ed6, - 0x1ed8, 0x1ed8, - 0x1eda, 0x1eda, - 0x1edc, 0x1edc, - 0x1ede, 0x1ede, - 0x1ee0, 0x1ee0, - 0x1ee2, 0x1ee2, - 0x1ee4, 0x1ee4, - 0x1ee6, 0x1ee6, - 0x1ee8, 0x1ee8, - 0x1eea, 0x1eea, - 0x1eec, 0x1eec, - 0x1eee, 0x1eee, - 0x1ef0, 0x1ef0, - 0x1ef2, 0x1ef2, - 0x1ef4, 0x1ef4, - 0x1ef6, 0x1ef6, - 0x1ef8, 0x1ef8, - 0x1f08, 0x1f0f, - 0x1f18, 0x1f1d, - 0x1f28, 0x1f2f, - 0x1f38, 0x1f3f, - 0x1f48, 0x1f4d, - 0x1f59, 0x1f59, - 0x1f5b, 0x1f5b, - 0x1f5d, 0x1f5d, - 0x1f5f, 0x1f5f, - 0x1f68, 0x1f6f, - 0x1fb8, 0x1fbb, - 0x1fc8, 0x1fcb, - 0x1fd8, 0x1fdb, - 0x1fe8, 0x1fec, - 0x1ff8, 0x1ffb, - 0x2102, 0x2102, - 0x2107, 0x2107, - 0x210b, 0x210d, - 0x2110, 0x2112, - 0x2115, 0x2115, - 0x2119, 0x211d, - 0x2124, 0x2124, - 0x2126, 0x2126, - 0x2128, 0x2128, - 0x212a, 0x212d, - 0x2130, 0x2131, - 0x2133, 0x2133, - 0x213e, 0x213f, - 0x2145, 0x2145, - 0x2c00, 0x2c2e, - 0x2c80, 0x2c80, - 0x2c82, 0x2c82, - 0x2c84, 0x2c84, - 0x2c86, 0x2c86, - 0x2c88, 0x2c88, - 0x2c8a, 0x2c8a, - 0x2c8c, 0x2c8c, - 0x2c8e, 0x2c8e, - 0x2c90, 0x2c90, - 0x2c92, 0x2c92, - 0x2c94, 0x2c94, - 0x2c96, 0x2c96, - 0x2c98, 0x2c98, - 0x2c9a, 0x2c9a, - 0x2c9c, 0x2c9c, - 0x2c9e, 0x2c9e, - 0x2ca0, 0x2ca0, - 0x2ca2, 0x2ca2, - 0x2ca4, 0x2ca4, - 0x2ca6, 0x2ca6, - 0x2ca8, 0x2ca8, - 0x2caa, 0x2caa, - 0x2cac, 0x2cac, - 0x2cae, 0x2cae, - 0x2cb0, 0x2cb0, - 0x2cb2, 0x2cb2, - 0x2cb4, 0x2cb4, - 0x2cb6, 0x2cb6, - 0x2cb8, 0x2cb8, - 0x2cba, 0x2cba, - 0x2cbc, 0x2cbc, - 0x2cbe, 0x2cbe, - 0x2cc0, 0x2cc0, - 0x2cc2, 0x2cc2, - 0x2cc4, 0x2cc4, - 0x2cc6, 0x2cc6, - 0x2cc8, 0x2cc8, - 0x2cca, 0x2cca, - 0x2ccc, 0x2ccc, - 0x2cce, 0x2cce, - 0x2cd0, 0x2cd0, - 0x2cd2, 0x2cd2, - 0x2cd4, 0x2cd4, - 0x2cd6, 0x2cd6, - 0x2cd8, 0x2cd8, - 0x2cda, 0x2cda, - 0x2cdc, 0x2cdc, - 0x2cde, 0x2cde, - 0x2ce0, 0x2ce0, - 0x2ce2, 0x2ce2, - 0xff21, 0xff3a, - 0x10400, 0x10427, - 0x1d400, 0x1d419, - 0x1d434, 0x1d44d, - 0x1d468, 0x1d481, - 0x1d49c, 0x1d49c, - 0x1d49e, 0x1d49f, - 0x1d4a2, 0x1d4a2, - 0x1d4a5, 0x1d4a6, - 0x1d4a9, 0x1d4ac, - 0x1d4ae, 0x1d4b5, - 0x1d4d0, 0x1d4e9, - 0x1d504, 0x1d505, - 0x1d507, 0x1d50a, - 0x1d50d, 0x1d514, - 0x1d516, 0x1d51c, - 0x1d538, 0x1d539, - 0x1d53b, 0x1d53e, - 0x1d540, 0x1d544, - 0x1d546, 0x1d546, - 0x1d54a, 0x1d550, - 0x1d56c, 0x1d585, - 0x1d5a0, 0x1d5b9, - 0x1d5d4, 0x1d5ed, - 0x1d608, 0x1d621, - 0x1d63c, 0x1d655, - 0x1d670, 0x1d689, - 0x1d6a8, 0x1d6c0, - 0x1d6e2, 0x1d6fa, - 0x1d71c, 0x1d734, - 0x1d756, 0x1d76e, - 0x1d790, 0x1d7a8 -}; /* CR_Lu */ - -/* 'M': Major Category */ -static const OnigCodePoint CR_M[] = { - 133, - 0x0300, 0x036f, - 0x0483, 0x0486, - 0x0488, 0x0489, - 0x0591, 0x05b9, - 0x05bb, 0x05bd, - 0x05bf, 0x05bf, - 0x05c1, 0x05c2, - 0x05c4, 0x05c5, - 0x05c7, 0x05c7, - 0x0610, 0x0615, - 0x064b, 0x065e, - 0x0670, 0x0670, - 0x06d6, 0x06dc, - 0x06de, 0x06e4, - 0x06e7, 0x06e8, - 0x06ea, 0x06ed, - 0x0711, 0x0711, - 0x0730, 0x074a, - 0x07a6, 0x07b0, - 0x0901, 0x0903, - 0x093c, 0x093c, - 0x093e, 0x094d, - 0x0951, 0x0954, - 0x0962, 0x0963, - 0x0981, 0x0983, - 0x09bc, 0x09bc, - 0x09be, 0x09c4, - 0x09c7, 0x09c8, - 0x09cb, 0x09cd, - 0x09d7, 0x09d7, - 0x09e2, 0x09e3, - 0x0a01, 0x0a03, - 0x0a3c, 0x0a3c, - 0x0a3e, 0x0a42, - 0x0a47, 0x0a48, - 0x0a4b, 0x0a4d, - 0x0a70, 0x0a71, - 0x0a81, 0x0a83, - 0x0abc, 0x0abc, - 0x0abe, 0x0ac5, - 0x0ac7, 0x0ac9, - 0x0acb, 0x0acd, - 0x0ae2, 0x0ae3, - 0x0b01, 0x0b03, - 0x0b3c, 0x0b3c, - 0x0b3e, 0x0b43, - 0x0b47, 0x0b48, - 0x0b4b, 0x0b4d, - 0x0b56, 0x0b57, - 0x0b82, 0x0b82, - 0x0bbe, 0x0bc2, - 0x0bc6, 0x0bc8, - 0x0bca, 0x0bcd, - 0x0bd7, 0x0bd7, - 0x0c01, 0x0c03, - 0x0c3e, 0x0c44, - 0x0c46, 0x0c48, - 0x0c4a, 0x0c4d, - 0x0c55, 0x0c56, - 0x0c82, 0x0c83, - 0x0cbc, 0x0cbc, - 0x0cbe, 0x0cc4, - 0x0cc6, 0x0cc8, - 0x0cca, 0x0ccd, - 0x0cd5, 0x0cd6, - 0x0d02, 0x0d03, - 0x0d3e, 0x0d43, - 0x0d46, 0x0d48, - 0x0d4a, 0x0d4d, - 0x0d57, 0x0d57, - 0x0d82, 0x0d83, - 0x0dca, 0x0dca, - 0x0dcf, 0x0dd4, - 0x0dd6, 0x0dd6, - 0x0dd8, 0x0ddf, - 0x0df2, 0x0df3, - 0x0e31, 0x0e31, - 0x0e34, 0x0e3a, - 0x0e47, 0x0e4e, - 0x0eb1, 0x0eb1, - 0x0eb4, 0x0eb9, - 0x0ebb, 0x0ebc, - 0x0ec8, 0x0ecd, - 0x0f18, 0x0f19, - 0x0f35, 0x0f35, - 0x0f37, 0x0f37, - 0x0f39, 0x0f39, - 0x0f3e, 0x0f3f, - 0x0f71, 0x0f84, - 0x0f86, 0x0f87, - 0x0f90, 0x0f97, - 0x0f99, 0x0fbc, - 0x0fc6, 0x0fc6, - 0x102c, 0x1032, - 0x1036, 0x1039, - 0x1056, 0x1059, - 0x135f, 0x135f, - 0x1712, 0x1714, - 0x1732, 0x1734, - 0x1752, 0x1753, - 0x1772, 0x1773, - 0x17b6, 0x17d3, - 0x17dd, 0x17dd, - 0x180b, 0x180d, - 0x18a9, 0x18a9, - 0x1920, 0x192b, - 0x1930, 0x193b, - 0x19b0, 0x19c0, - 0x19c8, 0x19c9, - 0x1a17, 0x1a1b, - 0x1dc0, 0x1dc3, - 0x20d0, 0x20eb, - 0x302a, 0x302f, - 0x3099, 0x309a, - 0xa802, 0xa802, - 0xa806, 0xa806, - 0xa80b, 0xa80b, - 0xa823, 0xa827, - 0xfb1e, 0xfb1e, - 0xfe00, 0xfe0f, - 0xfe20, 0xfe23, - 0x10a01, 0x10a03, - 0x10a05, 0x10a06, - 0x10a0c, 0x10a0f, - 0x10a38, 0x10a3a, - 0x10a3f, 0x10a3f, - 0x1d165, 0x1d169, - 0x1d16d, 0x1d172, - 0x1d17b, 0x1d182, - 0x1d185, 0x1d18b, - 0x1d1aa, 0x1d1ad, - 0x1d242, 0x1d244, - 0xe0100, 0xe01ef -}; /* CR_M */ - -/* 'Mc': General Category */ -static const OnigCodePoint CR_Mc[] = { - 63, - 0x0903, 0x0903, - 0x093e, 0x0940, - 0x0949, 0x094c, - 0x0982, 0x0983, - 0x09be, 0x09c0, - 0x09c7, 0x09c8, - 0x09cb, 0x09cc, - 0x09d7, 0x09d7, - 0x0a03, 0x0a03, - 0x0a3e, 0x0a40, - 0x0a83, 0x0a83, - 0x0abe, 0x0ac0, - 0x0ac9, 0x0ac9, - 0x0acb, 0x0acc, - 0x0b02, 0x0b03, - 0x0b3e, 0x0b3e, - 0x0b40, 0x0b40, - 0x0b47, 0x0b48, - 0x0b4b, 0x0b4c, - 0x0b57, 0x0b57, - 0x0bbe, 0x0bbf, - 0x0bc1, 0x0bc2, - 0x0bc6, 0x0bc8, - 0x0bca, 0x0bcc, - 0x0bd7, 0x0bd7, - 0x0c01, 0x0c03, - 0x0c41, 0x0c44, - 0x0c82, 0x0c83, - 0x0cbe, 0x0cbe, - 0x0cc0, 0x0cc4, - 0x0cc7, 0x0cc8, - 0x0cca, 0x0ccb, - 0x0cd5, 0x0cd6, - 0x0d02, 0x0d03, - 0x0d3e, 0x0d40, - 0x0d46, 0x0d48, - 0x0d4a, 0x0d4c, - 0x0d57, 0x0d57, - 0x0d82, 0x0d83, - 0x0dcf, 0x0dd1, - 0x0dd8, 0x0ddf, - 0x0df2, 0x0df3, - 0x0f3e, 0x0f3f, - 0x0f7f, 0x0f7f, - 0x102c, 0x102c, - 0x1031, 0x1031, - 0x1038, 0x1038, - 0x1056, 0x1057, - 0x17b6, 0x17b6, - 0x17be, 0x17c5, - 0x17c7, 0x17c8, - 0x1923, 0x1926, - 0x1929, 0x192b, - 0x1930, 0x1931, - 0x1933, 0x1938, - 0x19b0, 0x19c0, - 0x19c8, 0x19c9, - 0x1a19, 0x1a1b, - 0xa802, 0xa802, - 0xa823, 0xa824, - 0xa827, 0xa827, - 0x1d165, 0x1d166, - 0x1d16d, 0x1d172 -}; /* CR_Mc */ - -/* 'Me': General Category */ -static const OnigCodePoint CR_Me[] = { - 4, - 0x0488, 0x0489, - 0x06de, 0x06de, - 0x20dd, 0x20e0, - 0x20e2, 0x20e4 -}; /* CR_Me */ - -/* 'Mn': General Category */ -static const OnigCodePoint CR_Mn[] = { - 124, - 0x0300, 0x036f, - 0x0483, 0x0486, - 0x0591, 0x05b9, - 0x05bb, 0x05bd, - 0x05bf, 0x05bf, - 0x05c1, 0x05c2, - 0x05c4, 0x05c5, - 0x05c7, 0x05c7, - 0x0610, 0x0615, - 0x064b, 0x065e, - 0x0670, 0x0670, - 0x06d6, 0x06dc, - 0x06df, 0x06e4, - 0x06e7, 0x06e8, - 0x06ea, 0x06ed, - 0x0711, 0x0711, - 0x0730, 0x074a, - 0x07a6, 0x07b0, - 0x0901, 0x0902, - 0x093c, 0x093c, - 0x0941, 0x0948, - 0x094d, 0x094d, - 0x0951, 0x0954, - 0x0962, 0x0963, - 0x0981, 0x0981, - 0x09bc, 0x09bc, - 0x09c1, 0x09c4, - 0x09cd, 0x09cd, - 0x09e2, 0x09e3, - 0x0a01, 0x0a02, - 0x0a3c, 0x0a3c, - 0x0a41, 0x0a42, - 0x0a47, 0x0a48, - 0x0a4b, 0x0a4d, - 0x0a70, 0x0a71, - 0x0a81, 0x0a82, - 0x0abc, 0x0abc, - 0x0ac1, 0x0ac5, - 0x0ac7, 0x0ac8, - 0x0acd, 0x0acd, - 0x0ae2, 0x0ae3, - 0x0b01, 0x0b01, - 0x0b3c, 0x0b3c, - 0x0b3f, 0x0b3f, - 0x0b41, 0x0b43, - 0x0b4d, 0x0b4d, - 0x0b56, 0x0b56, - 0x0b82, 0x0b82, - 0x0bc0, 0x0bc0, - 0x0bcd, 0x0bcd, - 0x0c3e, 0x0c40, - 0x0c46, 0x0c48, - 0x0c4a, 0x0c4d, - 0x0c55, 0x0c56, - 0x0cbc, 0x0cbc, - 0x0cbf, 0x0cbf, - 0x0cc6, 0x0cc6, - 0x0ccc, 0x0ccd, - 0x0d41, 0x0d43, - 0x0d4d, 0x0d4d, - 0x0dca, 0x0dca, - 0x0dd2, 0x0dd4, - 0x0dd6, 0x0dd6, - 0x0e31, 0x0e31, - 0x0e34, 0x0e3a, - 0x0e47, 0x0e4e, - 0x0eb1, 0x0eb1, - 0x0eb4, 0x0eb9, - 0x0ebb, 0x0ebc, - 0x0ec8, 0x0ecd, - 0x0f18, 0x0f19, - 0x0f35, 0x0f35, - 0x0f37, 0x0f37, - 0x0f39, 0x0f39, - 0x0f71, 0x0f7e, - 0x0f80, 0x0f84, - 0x0f86, 0x0f87, - 0x0f90, 0x0f97, - 0x0f99, 0x0fbc, - 0x0fc6, 0x0fc6, - 0x102d, 0x1030, - 0x1032, 0x1032, - 0x1036, 0x1037, - 0x1039, 0x1039, - 0x1058, 0x1059, - 0x135f, 0x135f, - 0x1712, 0x1714, - 0x1732, 0x1734, - 0x1752, 0x1753, - 0x1772, 0x1773, - 0x17b7, 0x17bd, - 0x17c6, 0x17c6, - 0x17c9, 0x17d3, - 0x17dd, 0x17dd, - 0x180b, 0x180d, - 0x18a9, 0x18a9, - 0x1920, 0x1922, - 0x1927, 0x1928, - 0x1932, 0x1932, - 0x1939, 0x193b, - 0x1a17, 0x1a18, - 0x1dc0, 0x1dc3, - 0x20d0, 0x20dc, - 0x20e1, 0x20e1, - 0x20e5, 0x20eb, - 0x302a, 0x302f, - 0x3099, 0x309a, - 0xa806, 0xa806, - 0xa80b, 0xa80b, - 0xa825, 0xa826, - 0xfb1e, 0xfb1e, - 0xfe00, 0xfe0f, - 0xfe20, 0xfe23, - 0x10a01, 0x10a03, - 0x10a05, 0x10a06, - 0x10a0c, 0x10a0f, - 0x10a38, 0x10a3a, - 0x10a3f, 0x10a3f, - 0x1d167, 0x1d169, - 0x1d17b, 0x1d182, - 0x1d185, 0x1d18b, - 0x1d1aa, 0x1d1ad, - 0x1d242, 0x1d244, - 0xe0100, 0xe01ef -}; /* CR_Mn */ - -/* 'N': Major Category */ -static const OnigCodePoint CR_N[] = { - 53, - 0x0030, 0x0039, - 0x00b2, 0x00b3, - 0x00b9, 0x00b9, - 0x00bc, 0x00be, - 0x0660, 0x0669, - 0x06f0, 0x06f9, - 0x0966, 0x096f, - 0x09e6, 0x09ef, - 0x09f4, 0x09f9, - 0x0a66, 0x0a6f, - 0x0ae6, 0x0aef, - 0x0b66, 0x0b6f, - 0x0be6, 0x0bf2, - 0x0c66, 0x0c6f, - 0x0ce6, 0x0cef, - 0x0d66, 0x0d6f, - 0x0e50, 0x0e59, - 0x0ed0, 0x0ed9, - 0x0f20, 0x0f33, - 0x1040, 0x1049, - 0x1369, 0x137c, - 0x16ee, 0x16f0, - 0x17e0, 0x17e9, - 0x17f0, 0x17f9, - 0x1810, 0x1819, - 0x1946, 0x194f, - 0x19d0, 0x19d9, - 0x2070, 0x2070, - 0x2074, 0x2079, - 0x2080, 0x2089, - 0x2153, 0x2183, - 0x2460, 0x249b, - 0x24ea, 0x24ff, - 0x2776, 0x2793, - 0x2cfd, 0x2cfd, - 0x3007, 0x3007, - 0x3021, 0x3029, - 0x3038, 0x303a, - 0x3192, 0x3195, - 0x3220, 0x3229, - 0x3251, 0x325f, - 0x3280, 0x3289, - 0x32b1, 0x32bf, - 0xff10, 0xff19, - 0x10107, 0x10133, - 0x10140, 0x10178, - 0x1018a, 0x1018a, - 0x10320, 0x10323, - 0x1034a, 0x1034a, - 0x103d1, 0x103d5, - 0x104a0, 0x104a9, - 0x10a40, 0x10a47, - 0x1d7ce, 0x1d7ff -}; /* CR_N */ - -/* 'Nd': General Category */ -static const OnigCodePoint CR_Nd[] = { - 23, - 0x0030, 0x0039, - 0x0660, 0x0669, - 0x06f0, 0x06f9, - 0x0966, 0x096f, - 0x09e6, 0x09ef, - 0x0a66, 0x0a6f, - 0x0ae6, 0x0aef, - 0x0b66, 0x0b6f, - 0x0be6, 0x0bef, - 0x0c66, 0x0c6f, - 0x0ce6, 0x0cef, - 0x0d66, 0x0d6f, - 0x0e50, 0x0e59, - 0x0ed0, 0x0ed9, - 0x0f20, 0x0f29, - 0x1040, 0x1049, - 0x17e0, 0x17e9, - 0x1810, 0x1819, - 0x1946, 0x194f, - 0x19d0, 0x19d9, - 0xff10, 0xff19, - 0x104a0, 0x104a9, - 0x1d7ce, 0x1d7ff -}; /* CR_Nd */ - -/* 'Nl': General Category */ -static const OnigCodePoint CR_Nl[] = { - 8, - 0x16ee, 0x16f0, - 0x2160, 0x2183, - 0x3007, 0x3007, - 0x3021, 0x3029, - 0x3038, 0x303a, - 0x10140, 0x10174, - 0x1034a, 0x1034a, - 0x103d1, 0x103d5 -}; /* CR_Nl */ - -/* 'No': General Category */ -static const OnigCodePoint CR_No[] = { - 26, - 0x00b2, 0x00b3, - 0x00b9, 0x00b9, - 0x00bc, 0x00be, - 0x09f4, 0x09f9, - 0x0bf0, 0x0bf2, - 0x0f2a, 0x0f33, - 0x1369, 0x137c, - 0x17f0, 0x17f9, - 0x2070, 0x2070, - 0x2074, 0x2079, - 0x2080, 0x2089, - 0x2153, 0x215f, - 0x2460, 0x249b, - 0x24ea, 0x24ff, - 0x2776, 0x2793, - 0x2cfd, 0x2cfd, - 0x3192, 0x3195, - 0x3220, 0x3229, - 0x3251, 0x325f, - 0x3280, 0x3289, - 0x32b1, 0x32bf, - 0x10107, 0x10133, - 0x10175, 0x10178, - 0x1018a, 0x1018a, - 0x10320, 0x10323, - 0x10a40, 0x10a47 -}; /* CR_No */ - -/* 'P': Major Category */ -static const OnigCodePoint CR_P[] = { - 96, - 0x0021, 0x0023, - 0x0025, 0x002a, - 0x002c, 0x002f, - 0x003a, 0x003b, - 0x003f, 0x0040, - 0x005b, 0x005d, - 0x005f, 0x005f, - 0x007b, 0x007b, - 0x007d, 0x007d, - 0x00a1, 0x00a1, - 0x00ab, 0x00ab, - 0x00b7, 0x00b7, - 0x00bb, 0x00bb, - 0x00bf, 0x00bf, - 0x037e, 0x037e, - 0x0387, 0x0387, - 0x055a, 0x055f, - 0x0589, 0x058a, - 0x05be, 0x05be, - 0x05c0, 0x05c0, - 0x05c3, 0x05c3, - 0x05c6, 0x05c6, - 0x05f3, 0x05f4, - 0x060c, 0x060d, - 0x061b, 0x061b, - 0x061e, 0x061f, - 0x066a, 0x066d, - 0x06d4, 0x06d4, - 0x0700, 0x070d, - 0x0964, 0x0965, - 0x0970, 0x0970, - 0x0df4, 0x0df4, - 0x0e4f, 0x0e4f, - 0x0e5a, 0x0e5b, - 0x0f04, 0x0f12, - 0x0f3a, 0x0f3d, - 0x0f85, 0x0f85, - 0x0fd0, 0x0fd1, - 0x104a, 0x104f, - 0x10fb, 0x10fb, - 0x1361, 0x1368, - 0x166d, 0x166e, - 0x169b, 0x169c, - 0x16eb, 0x16ed, - 0x1735, 0x1736, - 0x17d4, 0x17d6, - 0x17d8, 0x17da, - 0x1800, 0x180a, - 0x1944, 0x1945, - 0x19de, 0x19df, - 0x1a1e, 0x1a1f, - 0x2010, 0x2027, - 0x2030, 0x2043, - 0x2045, 0x2051, - 0x2053, 0x205e, - 0x207d, 0x207e, - 0x208d, 0x208e, - 0x2329, 0x232a, - 0x23b4, 0x23b6, - 0x2768, 0x2775, - 0x27c5, 0x27c6, - 0x27e6, 0x27eb, - 0x2983, 0x2998, - 0x29d8, 0x29db, - 0x29fc, 0x29fd, - 0x2cf9, 0x2cfc, - 0x2cfe, 0x2cff, - 0x2e00, 0x2e17, - 0x2e1c, 0x2e1d, - 0x3001, 0x3003, - 0x3008, 0x3011, - 0x3014, 0x301f, - 0x3030, 0x3030, - 0x303d, 0x303d, - 0x30a0, 0x30a0, - 0x30fb, 0x30fb, - 0xfd3e, 0xfd3f, - 0xfe10, 0xfe19, - 0xfe30, 0xfe52, - 0xfe54, 0xfe61, - 0xfe63, 0xfe63, - 0xfe68, 0xfe68, - 0xfe6a, 0xfe6b, - 0xff01, 0xff03, - 0xff05, 0xff0a, - 0xff0c, 0xff0f, - 0xff1a, 0xff1b, - 0xff1f, 0xff20, - 0xff3b, 0xff3d, - 0xff3f, 0xff3f, - 0xff5b, 0xff5b, - 0xff5d, 0xff5d, - 0xff5f, 0xff65, - 0x10100, 0x10101, - 0x1039f, 0x1039f, - 0x10a50, 0x10a58 -}; /* CR_P */ - -/* 'Pc': General Category */ -static const OnigCodePoint CR_Pc[] = { - 6, - 0x005f, 0x005f, - 0x203f, 0x2040, - 0x2054, 0x2054, - 0xfe33, 0xfe34, - 0xfe4d, 0xfe4f, - 0xff3f, 0xff3f -}; /* CR_Pc */ - -/* 'Pd': General Category */ -static const OnigCodePoint CR_Pd[] = { - 12, - 0x002d, 0x002d, - 0x058a, 0x058a, - 0x1806, 0x1806, - 0x2010, 0x2015, - 0x2e17, 0x2e17, - 0x301c, 0x301c, - 0x3030, 0x3030, - 0x30a0, 0x30a0, - 0xfe31, 0xfe32, - 0xfe58, 0xfe58, - 0xfe63, 0xfe63, - 0xff0d, 0xff0d -}; /* CR_Pd */ - -/* 'Pe': General Category */ -static const OnigCodePoint CR_Pe[] = { - 65, - 0x0029, 0x0029, - 0x005d, 0x005d, - 0x007d, 0x007d, - 0x0f3b, 0x0f3b, - 0x0f3d, 0x0f3d, - 0x169c, 0x169c, - 0x2046, 0x2046, - 0x207e, 0x207e, - 0x208e, 0x208e, - 0x232a, 0x232a, - 0x23b5, 0x23b5, - 0x2769, 0x2769, - 0x276b, 0x276b, - 0x276d, 0x276d, - 0x276f, 0x276f, - 0x2771, 0x2771, - 0x2773, 0x2773, - 0x2775, 0x2775, - 0x27c6, 0x27c6, - 0x27e7, 0x27e7, - 0x27e9, 0x27e9, - 0x27eb, 0x27eb, - 0x2984, 0x2984, - 0x2986, 0x2986, - 0x2988, 0x2988, - 0x298a, 0x298a, - 0x298c, 0x298c, - 0x298e, 0x298e, - 0x2990, 0x2990, - 0x2992, 0x2992, - 0x2994, 0x2994, - 0x2996, 0x2996, - 0x2998, 0x2998, - 0x29d9, 0x29d9, - 0x29db, 0x29db, - 0x29fd, 0x29fd, - 0x3009, 0x3009, - 0x300b, 0x300b, - 0x300d, 0x300d, - 0x300f, 0x300f, - 0x3011, 0x3011, - 0x3015, 0x3015, - 0x3017, 0x3017, - 0x3019, 0x3019, - 0x301b, 0x301b, - 0x301e, 0x301f, - 0xfd3f, 0xfd3f, - 0xfe18, 0xfe18, - 0xfe36, 0xfe36, - 0xfe38, 0xfe38, - 0xfe3a, 0xfe3a, - 0xfe3c, 0xfe3c, - 0xfe3e, 0xfe3e, - 0xfe40, 0xfe40, - 0xfe42, 0xfe42, - 0xfe44, 0xfe44, - 0xfe48, 0xfe48, - 0xfe5a, 0xfe5a, - 0xfe5c, 0xfe5c, - 0xfe5e, 0xfe5e, - 0xff09, 0xff09, - 0xff3d, 0xff3d, - 0xff5d, 0xff5d, - 0xff60, 0xff60, - 0xff63, 0xff63 -}; /* CR_Pe */ - -/* 'Pf': General Category */ -static const OnigCodePoint CR_Pf[] = { - 9, - 0x00bb, 0x00bb, - 0x2019, 0x2019, - 0x201d, 0x201d, - 0x203a, 0x203a, - 0x2e03, 0x2e03, - 0x2e05, 0x2e05, - 0x2e0a, 0x2e0a, - 0x2e0d, 0x2e0d, - 0x2e1d, 0x2e1d -}; /* CR_Pf */ - -/* 'Pi': General Category */ -static const OnigCodePoint CR_Pi[] = { - 10, - 0x00ab, 0x00ab, - 0x2018, 0x2018, - 0x201b, 0x201c, - 0x201f, 0x201f, - 0x2039, 0x2039, - 0x2e02, 0x2e02, - 0x2e04, 0x2e04, - 0x2e09, 0x2e09, - 0x2e0c, 0x2e0c, - 0x2e1c, 0x2e1c -}; /* CR_Pi */ - -/* 'Po': General Category */ -static const OnigCodePoint CR_Po[] = { - 88, - 0x0021, 0x0023, - 0x0025, 0x0027, - 0x002a, 0x002a, - 0x002c, 0x002c, - 0x002e, 0x002f, - 0x003a, 0x003b, - 0x003f, 0x0040, - 0x005c, 0x005c, - 0x00a1, 0x00a1, - 0x00b7, 0x00b7, - 0x00bf, 0x00bf, - 0x037e, 0x037e, - 0x0387, 0x0387, - 0x055a, 0x055f, - 0x0589, 0x0589, - 0x05be, 0x05be, - 0x05c0, 0x05c0, - 0x05c3, 0x05c3, - 0x05c6, 0x05c6, - 0x05f3, 0x05f4, - 0x060c, 0x060d, - 0x061b, 0x061b, - 0x061e, 0x061f, - 0x066a, 0x066d, - 0x06d4, 0x06d4, - 0x0700, 0x070d, - 0x0964, 0x0965, - 0x0970, 0x0970, - 0x0df4, 0x0df4, - 0x0e4f, 0x0e4f, - 0x0e5a, 0x0e5b, - 0x0f04, 0x0f12, - 0x0f85, 0x0f85, - 0x0fd0, 0x0fd1, - 0x104a, 0x104f, - 0x10fb, 0x10fb, - 0x1361, 0x1368, - 0x166d, 0x166e, - 0x16eb, 0x16ed, - 0x1735, 0x1736, - 0x17d4, 0x17d6, - 0x17d8, 0x17da, - 0x1800, 0x1805, - 0x1807, 0x180a, - 0x1944, 0x1945, - 0x19de, 0x19df, - 0x1a1e, 0x1a1f, - 0x2016, 0x2017, - 0x2020, 0x2027, - 0x2030, 0x2038, - 0x203b, 0x203e, - 0x2041, 0x2043, - 0x2047, 0x2051, - 0x2053, 0x2053, - 0x2055, 0x205e, - 0x23b6, 0x23b6, - 0x2cf9, 0x2cfc, - 0x2cfe, 0x2cff, - 0x2e00, 0x2e01, - 0x2e06, 0x2e08, - 0x2e0b, 0x2e0b, - 0x2e0e, 0x2e16, - 0x3001, 0x3003, - 0x303d, 0x303d, - 0x30fb, 0x30fb, - 0xfe10, 0xfe16, - 0xfe19, 0xfe19, - 0xfe30, 0xfe30, - 0xfe45, 0xfe46, - 0xfe49, 0xfe4c, - 0xfe50, 0xfe52, - 0xfe54, 0xfe57, - 0xfe5f, 0xfe61, - 0xfe68, 0xfe68, - 0xfe6a, 0xfe6b, - 0xff01, 0xff03, - 0xff05, 0xff07, - 0xff0a, 0xff0a, - 0xff0c, 0xff0c, - 0xff0e, 0xff0f, - 0xff1a, 0xff1b, - 0xff1f, 0xff20, - 0xff3c, 0xff3c, - 0xff61, 0xff61, - 0xff64, 0xff65, - 0x10100, 0x10101, - 0x1039f, 0x1039f, - 0x10a50, 0x10a58 -}; /* CR_Po */ - -/* 'Ps': General Category */ -static const OnigCodePoint CR_Ps[] = { - 67, - 0x0028, 0x0028, - 0x005b, 0x005b, - 0x007b, 0x007b, - 0x0f3a, 0x0f3a, - 0x0f3c, 0x0f3c, - 0x169b, 0x169b, - 0x201a, 0x201a, - 0x201e, 0x201e, - 0x2045, 0x2045, - 0x207d, 0x207d, - 0x208d, 0x208d, - 0x2329, 0x2329, - 0x23b4, 0x23b4, - 0x2768, 0x2768, - 0x276a, 0x276a, - 0x276c, 0x276c, - 0x276e, 0x276e, - 0x2770, 0x2770, - 0x2772, 0x2772, - 0x2774, 0x2774, - 0x27c5, 0x27c5, - 0x27e6, 0x27e6, - 0x27e8, 0x27e8, - 0x27ea, 0x27ea, - 0x2983, 0x2983, - 0x2985, 0x2985, - 0x2987, 0x2987, - 0x2989, 0x2989, - 0x298b, 0x298b, - 0x298d, 0x298d, - 0x298f, 0x298f, - 0x2991, 0x2991, - 0x2993, 0x2993, - 0x2995, 0x2995, - 0x2997, 0x2997, - 0x29d8, 0x29d8, - 0x29da, 0x29da, - 0x29fc, 0x29fc, - 0x3008, 0x3008, - 0x300a, 0x300a, - 0x300c, 0x300c, - 0x300e, 0x300e, - 0x3010, 0x3010, - 0x3014, 0x3014, - 0x3016, 0x3016, - 0x3018, 0x3018, - 0x301a, 0x301a, - 0x301d, 0x301d, - 0xfd3e, 0xfd3e, - 0xfe17, 0xfe17, - 0xfe35, 0xfe35, - 0xfe37, 0xfe37, - 0xfe39, 0xfe39, - 0xfe3b, 0xfe3b, - 0xfe3d, 0xfe3d, - 0xfe3f, 0xfe3f, - 0xfe41, 0xfe41, - 0xfe43, 0xfe43, - 0xfe47, 0xfe47, - 0xfe59, 0xfe59, - 0xfe5b, 0xfe5b, - 0xfe5d, 0xfe5d, - 0xff08, 0xff08, - 0xff3b, 0xff3b, - 0xff5b, 0xff5b, - 0xff5f, 0xff5f, - 0xff62, 0xff62 -}; /* CR_Ps */ - -/* 'S': Major Category */ -static const OnigCodePoint CR_S[] = { - 162, - 0x0024, 0x0024, - 0x002b, 0x002b, - 0x003c, 0x003e, - 0x005e, 0x005e, - 0x0060, 0x0060, - 0x007c, 0x007c, - 0x007e, 0x007e, - 0x00a2, 0x00a9, - 0x00ac, 0x00ac, - 0x00ae, 0x00b1, - 0x00b4, 0x00b4, - 0x00b6, 0x00b6, - 0x00b8, 0x00b8, - 0x00d7, 0x00d7, - 0x00f7, 0x00f7, - 0x02c2, 0x02c5, - 0x02d2, 0x02df, - 0x02e5, 0x02ed, - 0x02ef, 0x02ff, - 0x0374, 0x0375, - 0x0384, 0x0385, - 0x03f6, 0x03f6, - 0x0482, 0x0482, - 0x060b, 0x060b, - 0x060e, 0x060f, - 0x06e9, 0x06e9, - 0x06fd, 0x06fe, - 0x09f2, 0x09f3, - 0x09fa, 0x09fa, - 0x0af1, 0x0af1, - 0x0b70, 0x0b70, - 0x0bf3, 0x0bfa, - 0x0e3f, 0x0e3f, - 0x0f01, 0x0f03, - 0x0f13, 0x0f17, - 0x0f1a, 0x0f1f, - 0x0f34, 0x0f34, - 0x0f36, 0x0f36, - 0x0f38, 0x0f38, - 0x0fbe, 0x0fc5, - 0x0fc7, 0x0fcc, - 0x0fcf, 0x0fcf, - 0x1360, 0x1360, - 0x1390, 0x1399, - 0x17db, 0x17db, - 0x1940, 0x1940, - 0x19e0, 0x19ff, - 0x1fbd, 0x1fbd, - 0x1fbf, 0x1fc1, - 0x1fcd, 0x1fcf, - 0x1fdd, 0x1fdf, - 0x1fed, 0x1fef, - 0x1ffd, 0x1ffe, - 0x2044, 0x2044, - 0x2052, 0x2052, - 0x207a, 0x207c, - 0x208a, 0x208c, - 0x20a0, 0x20b5, - 0x2100, 0x2101, - 0x2103, 0x2106, - 0x2108, 0x2109, - 0x2114, 0x2114, - 0x2116, 0x2118, - 0x211e, 0x2123, - 0x2125, 0x2125, - 0x2127, 0x2127, - 0x2129, 0x2129, - 0x212e, 0x212e, - 0x2132, 0x2132, - 0x213a, 0x213b, - 0x2140, 0x2144, - 0x214a, 0x214c, - 0x2190, 0x2328, - 0x232b, 0x23b3, - 0x23b7, 0x23db, - 0x2400, 0x2426, - 0x2440, 0x244a, - 0x249c, 0x24e9, - 0x2500, 0x269c, - 0x26a0, 0x26b1, - 0x2701, 0x2704, - 0x2706, 0x2709, - 0x270c, 0x2727, - 0x2729, 0x274b, - 0x274d, 0x274d, - 0x274f, 0x2752, - 0x2756, 0x2756, - 0x2758, 0x275e, - 0x2761, 0x2767, - 0x2794, 0x2794, - 0x2798, 0x27af, - 0x27b1, 0x27be, - 0x27c0, 0x27c4, - 0x27d0, 0x27e5, - 0x27f0, 0x2982, - 0x2999, 0x29d7, - 0x29dc, 0x29fb, - 0x29fe, 0x2b13, - 0x2ce5, 0x2cea, - 0x2e80, 0x2e99, - 0x2e9b, 0x2ef3, - 0x2f00, 0x2fd5, - 0x2ff0, 0x2ffb, - 0x3004, 0x3004, - 0x3012, 0x3013, - 0x3020, 0x3020, - 0x3036, 0x3037, - 0x303e, 0x303f, - 0x309b, 0x309c, - 0x3190, 0x3191, - 0x3196, 0x319f, - 0x31c0, 0x31cf, - 0x3200, 0x321e, - 0x322a, 0x3243, - 0x3250, 0x3250, - 0x3260, 0x327f, - 0x328a, 0x32b0, - 0x32c0, 0x32fe, - 0x3300, 0x33ff, - 0x4dc0, 0x4dff, - 0xa490, 0xa4c6, - 0xa700, 0xa716, - 0xa828, 0xa82b, - 0xfb29, 0xfb29, - 0xfdfc, 0xfdfd, - 0xfe62, 0xfe62, - 0xfe64, 0xfe66, - 0xfe69, 0xfe69, - 0xff04, 0xff04, - 0xff0b, 0xff0b, - 0xff1c, 0xff1e, - 0xff3e, 0xff3e, - 0xff40, 0xff40, - 0xff5c, 0xff5c, - 0xff5e, 0xff5e, - 0xffe0, 0xffe6, - 0xffe8, 0xffee, - 0xfffc, 0xfffd, - 0x10102, 0x10102, - 0x10137, 0x1013f, - 0x10179, 0x10189, - 0x103d0, 0x103d0, - 0x1d000, 0x1d0f5, - 0x1d100, 0x1d126, - 0x1d12a, 0x1d164, - 0x1d16a, 0x1d16c, - 0x1d183, 0x1d184, - 0x1d18c, 0x1d1a9, - 0x1d1ae, 0x1d1dd, - 0x1d200, 0x1d241, - 0x1d245, 0x1d245, - 0x1d300, 0x1d356, - 0x1d6c1, 0x1d6c1, - 0x1d6db, 0x1d6db, - 0x1d6fb, 0x1d6fb, - 0x1d715, 0x1d715, - 0x1d735, 0x1d735, - 0x1d74f, 0x1d74f, - 0x1d76f, 0x1d76f, - 0x1d789, 0x1d789, - 0x1d7a9, 0x1d7a9, - 0x1d7c3, 0x1d7c3 -}; /* CR_S */ - -/* 'Sc': General Category */ -static const OnigCodePoint CR_Sc[] = { - 14, - 0x0024, 0x0024, - 0x00a2, 0x00a5, - 0x060b, 0x060b, - 0x09f2, 0x09f3, - 0x0af1, 0x0af1, - 0x0bf9, 0x0bf9, - 0x0e3f, 0x0e3f, - 0x17db, 0x17db, - 0x20a0, 0x20b5, - 0xfdfc, 0xfdfc, - 0xfe69, 0xfe69, - 0xff04, 0xff04, - 0xffe0, 0xffe1, - 0xffe5, 0xffe6 -}; /* CR_Sc */ - -/* 'Sk': General Category */ -static const OnigCodePoint CR_Sk[] = { - 23, - 0x005e, 0x005e, - 0x0060, 0x0060, - 0x00a8, 0x00a8, - 0x00af, 0x00af, - 0x00b4, 0x00b4, - 0x00b8, 0x00b8, - 0x02c2, 0x02c5, - 0x02d2, 0x02df, - 0x02e5, 0x02ed, - 0x02ef, 0x02ff, - 0x0374, 0x0375, - 0x0384, 0x0385, - 0x1fbd, 0x1fbd, - 0x1fbf, 0x1fc1, - 0x1fcd, 0x1fcf, - 0x1fdd, 0x1fdf, - 0x1fed, 0x1fef, - 0x1ffd, 0x1ffe, - 0x309b, 0x309c, - 0xa700, 0xa716, - 0xff3e, 0xff3e, - 0xff40, 0xff40, - 0xffe3, 0xffe3 -}; /* CR_Sk */ - -/* 'Sm': General Category */ -static const OnigCodePoint CR_Sm[] = { - 59, - 0x002b, 0x002b, - 0x003c, 0x003e, - 0x007c, 0x007c, - 0x007e, 0x007e, - 0x00ac, 0x00ac, - 0x00b1, 0x00b1, - 0x00d7, 0x00d7, - 0x00f7, 0x00f7, - 0x03f6, 0x03f6, - 0x2044, 0x2044, - 0x2052, 0x2052, - 0x207a, 0x207c, - 0x208a, 0x208c, - 0x2140, 0x2144, - 0x214b, 0x214b, - 0x2190, 0x2194, - 0x219a, 0x219b, - 0x21a0, 0x21a0, - 0x21a3, 0x21a3, - 0x21a6, 0x21a6, - 0x21ae, 0x21ae, - 0x21ce, 0x21cf, - 0x21d2, 0x21d2, - 0x21d4, 0x21d4, - 0x21f4, 0x22ff, - 0x2308, 0x230b, - 0x2320, 0x2321, - 0x237c, 0x237c, - 0x239b, 0x23b3, - 0x25b7, 0x25b7, - 0x25c1, 0x25c1, - 0x25f8, 0x25ff, - 0x266f, 0x266f, - 0x27c0, 0x27c4, - 0x27d0, 0x27e5, - 0x27f0, 0x27ff, - 0x2900, 0x2982, - 0x2999, 0x29d7, - 0x29dc, 0x29fb, - 0x29fe, 0x2aff, - 0xfb29, 0xfb29, - 0xfe62, 0xfe62, - 0xfe64, 0xfe66, - 0xff0b, 0xff0b, - 0xff1c, 0xff1e, - 0xff5c, 0xff5c, - 0xff5e, 0xff5e, - 0xffe2, 0xffe2, - 0xffe9, 0xffec, - 0x1d6c1, 0x1d6c1, - 0x1d6db, 0x1d6db, - 0x1d6fb, 0x1d6fb, - 0x1d715, 0x1d715, - 0x1d735, 0x1d735, - 0x1d74f, 0x1d74f, - 0x1d76f, 0x1d76f, - 0x1d789, 0x1d789, - 0x1d7a9, 0x1d7a9, - 0x1d7c3, 0x1d7c3 -}; /* CR_Sm */ - -/* 'So': General Category */ -static const OnigCodePoint CR_So[] = { - 120, - 0x00a6, 0x00a7, - 0x00a9, 0x00a9, - 0x00ae, 0x00ae, - 0x00b0, 0x00b0, - 0x00b6, 0x00b6, - 0x0482, 0x0482, - 0x060e, 0x060f, - 0x06e9, 0x06e9, - 0x06fd, 0x06fe, - 0x09fa, 0x09fa, - 0x0b70, 0x0b70, - 0x0bf3, 0x0bf8, - 0x0bfa, 0x0bfa, - 0x0f01, 0x0f03, - 0x0f13, 0x0f17, - 0x0f1a, 0x0f1f, - 0x0f34, 0x0f34, - 0x0f36, 0x0f36, - 0x0f38, 0x0f38, - 0x0fbe, 0x0fc5, - 0x0fc7, 0x0fcc, - 0x0fcf, 0x0fcf, - 0x1360, 0x1360, - 0x1390, 0x1399, - 0x1940, 0x1940, - 0x19e0, 0x19ff, - 0x2100, 0x2101, - 0x2103, 0x2106, - 0x2108, 0x2109, - 0x2114, 0x2114, - 0x2116, 0x2118, - 0x211e, 0x2123, - 0x2125, 0x2125, - 0x2127, 0x2127, - 0x2129, 0x2129, - 0x212e, 0x212e, - 0x2132, 0x2132, - 0x213a, 0x213b, - 0x214a, 0x214a, - 0x214c, 0x214c, - 0x2195, 0x2199, - 0x219c, 0x219f, - 0x21a1, 0x21a2, - 0x21a4, 0x21a5, - 0x21a7, 0x21ad, - 0x21af, 0x21cd, - 0x21d0, 0x21d1, - 0x21d3, 0x21d3, - 0x21d5, 0x21f3, - 0x2300, 0x2307, - 0x230c, 0x231f, - 0x2322, 0x2328, - 0x232b, 0x237b, - 0x237d, 0x239a, - 0x23b7, 0x23db, - 0x2400, 0x2426, - 0x2440, 0x244a, - 0x249c, 0x24e9, - 0x2500, 0x25b6, - 0x25b8, 0x25c0, - 0x25c2, 0x25f7, - 0x2600, 0x266e, - 0x2670, 0x269c, - 0x26a0, 0x26b1, - 0x2701, 0x2704, - 0x2706, 0x2709, - 0x270c, 0x2727, - 0x2729, 0x274b, - 0x274d, 0x274d, - 0x274f, 0x2752, - 0x2756, 0x2756, - 0x2758, 0x275e, - 0x2761, 0x2767, - 0x2794, 0x2794, - 0x2798, 0x27af, - 0x27b1, 0x27be, - 0x2800, 0x28ff, - 0x2b00, 0x2b13, - 0x2ce5, 0x2cea, - 0x2e80, 0x2e99, - 0x2e9b, 0x2ef3, - 0x2f00, 0x2fd5, - 0x2ff0, 0x2ffb, - 0x3004, 0x3004, - 0x3012, 0x3013, - 0x3020, 0x3020, - 0x3036, 0x3037, - 0x303e, 0x303f, - 0x3190, 0x3191, - 0x3196, 0x319f, - 0x31c0, 0x31cf, - 0x3200, 0x321e, - 0x322a, 0x3243, - 0x3250, 0x3250, - 0x3260, 0x327f, - 0x328a, 0x32b0, - 0x32c0, 0x32fe, - 0x3300, 0x33ff, - 0x4dc0, 0x4dff, - 0xa490, 0xa4c6, - 0xa828, 0xa82b, - 0xfdfd, 0xfdfd, - 0xffe4, 0xffe4, - 0xffe8, 0xffe8, - 0xffed, 0xffee, - 0xfffc, 0xfffd, - 0x10102, 0x10102, - 0x10137, 0x1013f, - 0x10179, 0x10189, - 0x103d0, 0x103d0, - 0x1d000, 0x1d0f5, - 0x1d100, 0x1d126, - 0x1d12a, 0x1d164, - 0x1d16a, 0x1d16c, - 0x1d183, 0x1d184, - 0x1d18c, 0x1d1a9, - 0x1d1ae, 0x1d1dd, - 0x1d200, 0x1d241, - 0x1d245, 0x1d245, - 0x1d300, 0x1d356 -}; /* CR_So */ - -/* 'Z': Major Category */ -static const OnigCodePoint CR_Z[] = { - 9, - 0x0020, 0x0020, - 0x00a0, 0x00a0, - 0x1680, 0x1680, - 0x180e, 0x180e, - 0x2000, 0x200a, - 0x2028, 0x2029, - 0x202f, 0x202f, - 0x205f, 0x205f, - 0x3000, 0x3000 -}; /* CR_Z */ - -/* 'Zl': General Category */ -static const OnigCodePoint CR_Zl[] = { - 1, - 0x2028, 0x2028 -}; /* CR_Zl */ - -/* 'Zp': General Category */ -static const OnigCodePoint CR_Zp[] = { - 1, - 0x2029, 0x2029 -}; /* CR_Zp */ - -/* 'Zs': General Category */ -static const OnigCodePoint CR_Zs[] = { - 8, - 0x0020, 0x0020, - 0x00a0, 0x00a0, - 0x1680, 0x1680, - 0x180e, 0x180e, - 0x2000, 0x200a, - 0x202f, 0x202f, - 0x205f, 0x205f, - 0x3000, 0x3000 -}; /* CR_Zs */ - -/* 'Arabic': Script */ -static const OnigCodePoint CR_Arabic[] = { - 17, - 0x060b, 0x060b, - 0x060d, 0x0615, - 0x061e, 0x061e, - 0x0621, 0x063a, - 0x0641, 0x064a, - 0x0656, 0x065e, - 0x066a, 0x066f, - 0x0671, 0x06dc, - 0x06de, 0x06ff, - 0x0750, 0x076d, - 0xfb50, 0xfbb1, - 0xfbd3, 0xfd3d, - 0xfd50, 0xfd8f, - 0xfd92, 0xfdc7, - 0xfdf0, 0xfdfc, - 0xfe70, 0xfe74, - 0xfe76, 0xfefc -}; /* CR_Arabic */ - -/* 'Armenian': Script */ -static const OnigCodePoint CR_Armenian[] = { - 5, - 0x0531, 0x0556, - 0x0559, 0x055f, - 0x0561, 0x0587, - 0x058a, 0x058a, - 0xfb13, 0xfb17 -}; /* CR_Armenian */ - -/* 'Bengali': Script */ -static const OnigCodePoint CR_Bengali[] = { - 14, - 0x0981, 0x0983, - 0x0985, 0x098c, - 0x098f, 0x0990, - 0x0993, 0x09a8, - 0x09aa, 0x09b0, - 0x09b2, 0x09b2, - 0x09b6, 0x09b9, - 0x09bc, 0x09c4, - 0x09c7, 0x09c8, - 0x09cb, 0x09ce, - 0x09d7, 0x09d7, - 0x09dc, 0x09dd, - 0x09df, 0x09e3, - 0x09e6, 0x09fa -}; /* CR_Bengali */ - -/* 'Bopomofo': Script */ -static const OnigCodePoint CR_Bopomofo[] = { - 2, - 0x3105, 0x312c, - 0x31a0, 0x31b7 -}; /* CR_Bopomofo */ - -/* 'Braille': Script */ -static const OnigCodePoint CR_Braille[] = { - 1, - 0x2800, 0x28ff -}; /* CR_Braille */ - -/* 'Buginese': Script */ -static const OnigCodePoint CR_Buginese[] = { - 2, - 0x1a00, 0x1a1b, - 0x1a1e, 0x1a1f -}; /* CR_Buginese */ - -/* 'Buhid': Script */ -static const OnigCodePoint CR_Buhid[] = { - 1, - 0x1740, 0x1753 -}; /* CR_Buhid */ - -/* 'Canadian_Aboriginal': Script */ -static const OnigCodePoint CR_Canadian_Aboriginal[] = { - 1, - 0x1401, 0x1676 -}; /* CR_Canadian_Aboriginal */ - -/* 'Cherokee': Script */ -static const OnigCodePoint CR_Cherokee[] = { - 1, - 0x13a0, 0x13f4 -}; /* CR_Cherokee */ - -/* 'Common': Script */ -static const OnigCodePoint CR_Common[] = { - 126, - 0x0000, 0x0040, - 0x005b, 0x0060, - 0x007b, 0x00a9, - 0x00ab, 0x00b9, - 0x00bb, 0x00bf, - 0x00d7, 0x00d7, - 0x00f7, 0x00f7, - 0x02b9, 0x02df, - 0x02e5, 0x02ff, - 0x037e, 0x037e, - 0x0387, 0x0387, - 0x0589, 0x0589, - 0x0600, 0x0603, - 0x060c, 0x060c, - 0x061b, 0x061b, - 0x061f, 0x061f, - 0x0640, 0x0640, - 0x0660, 0x0669, - 0x06dd, 0x06dd, - 0x0964, 0x0965, - 0x0970, 0x0970, - 0x0e3f, 0x0e3f, - 0x10fb, 0x10fb, - 0x16eb, 0x16ed, - 0x1735, 0x1736, - 0x2000, 0x200b, - 0x200e, 0x2063, - 0x206a, 0x2070, - 0x2074, 0x207e, - 0x2080, 0x208e, - 0x20a0, 0x20b5, - 0x2100, 0x2125, - 0x2127, 0x2129, - 0x212c, 0x214c, - 0x2153, 0x2183, - 0x2190, 0x23db, - 0x2400, 0x2426, - 0x2440, 0x244a, - 0x2460, 0x269c, - 0x26a0, 0x26b1, - 0x2701, 0x2704, - 0x2706, 0x2709, - 0x270c, 0x2727, - 0x2729, 0x274b, - 0x274d, 0x274d, - 0x274f, 0x2752, - 0x2756, 0x2756, - 0x2758, 0x275e, - 0x2761, 0x2794, - 0x2798, 0x27af, - 0x27b1, 0x27be, - 0x27c0, 0x27c6, - 0x27d0, 0x27eb, - 0x27f0, 0x27ff, - 0x2900, 0x2b13, - 0x2e00, 0x2e17, - 0x2e1c, 0x2e1d, - 0x2ff0, 0x2ffb, - 0x3000, 0x3004, - 0x3006, 0x3006, - 0x3008, 0x3020, - 0x3030, 0x3037, - 0x303c, 0x303f, - 0x309b, 0x309c, - 0x30a0, 0x30a0, - 0x30fb, 0x30fc, - 0x3190, 0x319f, - 0x31c0, 0x31cf, - 0x3220, 0x3243, - 0x3250, 0x325f, - 0x327e, 0x32fe, - 0x3300, 0x33ff, - 0x4dc0, 0x4dff, - 0xa700, 0xa716, - 0xe000, 0xf8ff, - 0xfd3e, 0xfd3f, - 0xfdfd, 0xfdfd, - 0xfe10, 0xfe19, - 0xfe30, 0xfe52, - 0xfe54, 0xfe66, - 0xfe68, 0xfe6b, - 0xfeff, 0xfeff, - 0xff01, 0xff20, - 0xff3b, 0xff40, - 0xff5b, 0xff65, - 0xff70, 0xff70, - 0xff9e, 0xff9f, - 0xffe0, 0xffe6, - 0xffe8, 0xffee, - 0xfff9, 0xfffd, - 0x10100, 0x10102, - 0x10107, 0x10133, - 0x10137, 0x1013f, - 0x1d000, 0x1d0f5, - 0x1d100, 0x1d126, - 0x1d12a, 0x1d166, - 0x1d16a, 0x1d17a, - 0x1d183, 0x1d184, - 0x1d18c, 0x1d1a9, - 0x1d1ae, 0x1d1dd, - 0x1d300, 0x1d356, - 0x1d400, 0x1d454, - 0x1d456, 0x1d49c, - 0x1d49e, 0x1d49f, - 0x1d4a2, 0x1d4a2, - 0x1d4a5, 0x1d4a6, - 0x1d4a9, 0x1d4ac, - 0x1d4ae, 0x1d4b9, - 0x1d4bb, 0x1d4bb, - 0x1d4bd, 0x1d4c3, - 0x1d4c5, 0x1d505, - 0x1d507, 0x1d50a, - 0x1d50d, 0x1d514, - 0x1d516, 0x1d51c, - 0x1d51e, 0x1d539, - 0x1d53b, 0x1d53e, - 0x1d540, 0x1d544, - 0x1d546, 0x1d546, - 0x1d54a, 0x1d550, - 0x1d552, 0x1d6a5, - 0x1d6a8, 0x1d7c9, - 0x1d7ce, 0x1d7ff, - 0xe0001, 0xe0001, - 0xe0020, 0xe007f, - 0xf0000, 0xffffd, - 0x100000, 0x10fffd -}; /* CR_Common */ - -/* 'Coptic': Script */ -static const OnigCodePoint CR_Coptic[] = { - 3, - 0x03e2, 0x03ef, - 0x2c80, 0x2cea, - 0x2cf9, 0x2cff -}; /* CR_Coptic */ - -/* 'Cypriot': Script */ -static const OnigCodePoint CR_Cypriot[] = { - 6, - 0x10800, 0x10805, - 0x10808, 0x10808, - 0x1080a, 0x10835, - 0x10837, 0x10838, - 0x1083c, 0x1083c, - 0x1083f, 0x1083f -}; /* CR_Cypriot */ - -/* 'Cyrillic': Script */ -static const OnigCodePoint CR_Cyrillic[] = { - 6, - 0x0400, 0x0486, - 0x0488, 0x04ce, - 0x04d0, 0x04f9, - 0x0500, 0x050f, - 0x1d2b, 0x1d2b, - 0x1d78, 0x1d78 -}; /* CR_Cyrillic */ - -/* 'Deseret': Script */ -static const OnigCodePoint CR_Deseret[] = { - 1, - 0x10400, 0x1044f -}; /* CR_Deseret */ - -/* 'Devanagari': Script */ -static const OnigCodePoint CR_Devanagari[] = { - 6, - 0x0901, 0x0939, - 0x093c, 0x094d, - 0x0950, 0x0954, - 0x0958, 0x0963, - 0x0966, 0x096f, - 0x097d, 0x097d -}; /* CR_Devanagari */ - -/* 'Ethiopic': Script */ -static const OnigCodePoint CR_Ethiopic[] = { - 27, - 0x1200, 0x1248, - 0x124a, 0x124d, - 0x1250, 0x1256, - 0x1258, 0x1258, - 0x125a, 0x125d, - 0x1260, 0x1288, - 0x128a, 0x128d, - 0x1290, 0x12b0, - 0x12b2, 0x12b5, - 0x12b8, 0x12be, - 0x12c0, 0x12c0, - 0x12c2, 0x12c5, - 0x12c8, 0x12d6, - 0x12d8, 0x1310, - 0x1312, 0x1315, - 0x1318, 0x135a, - 0x135f, 0x137c, - 0x1380, 0x1399, - 0x2d80, 0x2d96, - 0x2da0, 0x2da6, - 0x2da8, 0x2dae, - 0x2db0, 0x2db6, - 0x2db8, 0x2dbe, - 0x2dc0, 0x2dc6, - 0x2dc8, 0x2dce, - 0x2dd0, 0x2dd6, - 0x2dd8, 0x2dde -}; /* CR_Ethiopic */ - -/* 'Georgian': Script */ -static const OnigCodePoint CR_Georgian[] = { - 4, - 0x10a0, 0x10c5, - 0x10d0, 0x10fa, - 0x10fc, 0x10fc, - 0x2d00, 0x2d25 -}; /* CR_Georgian */ - -/* 'Glagolitic': Script */ -static const OnigCodePoint CR_Glagolitic[] = { - 2, - 0x2c00, 0x2c2e, - 0x2c30, 0x2c5e -}; /* CR_Glagolitic */ - -/* 'Gothic': Script */ -static const OnigCodePoint CR_Gothic[] = { - 1, - 0x10330, 0x1034a -}; /* CR_Gothic */ - -/* 'Greek': Script */ -static const OnigCodePoint CR_Greek[] = { - 31, - 0x0374, 0x0375, - 0x037a, 0x037a, - 0x0384, 0x0386, - 0x0388, 0x038a, - 0x038c, 0x038c, - 0x038e, 0x03a1, - 0x03a3, 0x03ce, - 0x03d0, 0x03e1, - 0x03f0, 0x03ff, - 0x1d26, 0x1d2a, - 0x1d5d, 0x1d61, - 0x1d66, 0x1d6a, - 0x1f00, 0x1f15, - 0x1f18, 0x1f1d, - 0x1f20, 0x1f45, - 0x1f48, 0x1f4d, - 0x1f50, 0x1f57, - 0x1f59, 0x1f59, - 0x1f5b, 0x1f5b, - 0x1f5d, 0x1f5d, - 0x1f5f, 0x1f7d, - 0x1f80, 0x1fb4, - 0x1fb6, 0x1fc4, - 0x1fc6, 0x1fd3, - 0x1fd6, 0x1fdb, - 0x1fdd, 0x1fef, - 0x1ff2, 0x1ff4, - 0x1ff6, 0x1ffe, - 0x2126, 0x2126, - 0x10140, 0x1018a, - 0x1d200, 0x1d245 -}; /* CR_Greek */ - -/* 'Gujarati': Script */ -static const OnigCodePoint CR_Gujarati[] = { - 14, - 0x0a81, 0x0a83, - 0x0a85, 0x0a8d, - 0x0a8f, 0x0a91, - 0x0a93, 0x0aa8, - 0x0aaa, 0x0ab0, - 0x0ab2, 0x0ab3, - 0x0ab5, 0x0ab9, - 0x0abc, 0x0ac5, - 0x0ac7, 0x0ac9, - 0x0acb, 0x0acd, - 0x0ad0, 0x0ad0, - 0x0ae0, 0x0ae3, - 0x0ae6, 0x0aef, - 0x0af1, 0x0af1 -}; /* CR_Gujarati */ - -/* 'Gurmukhi': Script */ -static const OnigCodePoint CR_Gurmukhi[] = { - 15, - 0x0a01, 0x0a03, - 0x0a05, 0x0a0a, - 0x0a0f, 0x0a10, - 0x0a13, 0x0a28, - 0x0a2a, 0x0a30, - 0x0a32, 0x0a33, - 0x0a35, 0x0a36, - 0x0a38, 0x0a39, - 0x0a3c, 0x0a3c, - 0x0a3e, 0x0a42, - 0x0a47, 0x0a48, - 0x0a4b, 0x0a4d, - 0x0a59, 0x0a5c, - 0x0a5e, 0x0a5e, - 0x0a66, 0x0a74 -}; /* CR_Gurmukhi */ - -/* 'Han': Script */ -static const OnigCodePoint CR_Han[] = { - 14, - 0x2e80, 0x2e99, - 0x2e9b, 0x2ef3, - 0x2f00, 0x2fd5, - 0x3005, 0x3005, - 0x3007, 0x3007, - 0x3021, 0x3029, - 0x3038, 0x303b, - 0x3400, 0x4db5, - 0x4e00, 0x9fbb, - 0xf900, 0xfa2d, - 0xfa30, 0xfa6a, - 0xfa70, 0xfad9, - 0x20000, 0x2a6d6, - 0x2f800, 0x2fa1d -}; /* CR_Han */ - -/* 'Hangul': Script */ -static const OnigCodePoint CR_Hangul[] = { - 12, - 0x1100, 0x1159, - 0x115f, 0x11a2, - 0x11a8, 0x11f9, - 0x3131, 0x318e, - 0x3200, 0x321e, - 0x3260, 0x327d, - 0xac00, 0xd7a3, - 0xffa0, 0xffbe, - 0xffc2, 0xffc7, - 0xffca, 0xffcf, - 0xffd2, 0xffd7, - 0xffda, 0xffdc -}; /* CR_Hangul */ - -/* 'Hanunoo': Script */ -static const OnigCodePoint CR_Hanunoo[] = { - 1, - 0x1720, 0x1734 -}; /* CR_Hanunoo */ - -/* 'Hebrew': Script */ -static const OnigCodePoint CR_Hebrew[] = { - 10, - 0x0591, 0x05b9, - 0x05bb, 0x05c7, - 0x05d0, 0x05ea, - 0x05f0, 0x05f4, - 0xfb1d, 0xfb36, - 0xfb38, 0xfb3c, - 0xfb3e, 0xfb3e, - 0xfb40, 0xfb41, - 0xfb43, 0xfb44, - 0xfb46, 0xfb4f -}; /* CR_Hebrew */ - -/* 'Hiragana': Script */ -static const OnigCodePoint CR_Hiragana[] = { - 2, - 0x3041, 0x3096, - 0x309d, 0x309f -}; /* CR_Hiragana */ - -/* 'Inherited': Script */ -static const OnigCodePoint CR_Inherited[] = { - 15, - 0x0300, 0x036f, - 0x064b, 0x0655, - 0x0670, 0x0670, - 0x1dc0, 0x1dc3, - 0x200c, 0x200d, - 0x20d0, 0x20eb, - 0x302a, 0x302f, - 0x3099, 0x309a, - 0xfe00, 0xfe0f, - 0xfe20, 0xfe23, - 0x1d167, 0x1d169, - 0x1d17b, 0x1d182, - 0x1d185, 0x1d18b, - 0x1d1aa, 0x1d1ad, - 0xe0100, 0xe01ef -}; /* CR_Inherited */ - -/* 'Kannada': Script */ -static const OnigCodePoint CR_Kannada[] = { - 13, - 0x0c82, 0x0c83, - 0x0c85, 0x0c8c, - 0x0c8e, 0x0c90, - 0x0c92, 0x0ca8, - 0x0caa, 0x0cb3, - 0x0cb5, 0x0cb9, - 0x0cbc, 0x0cc4, - 0x0cc6, 0x0cc8, - 0x0cca, 0x0ccd, - 0x0cd5, 0x0cd6, - 0x0cde, 0x0cde, - 0x0ce0, 0x0ce1, - 0x0ce6, 0x0cef -}; /* CR_Kannada */ - -/* 'Katakana': Script */ -static const OnigCodePoint CR_Katakana[] = { - 5, - 0x30a1, 0x30fa, - 0x30fd, 0x30ff, - 0x31f0, 0x31ff, - 0xff66, 0xff6f, - 0xff71, 0xff9d -}; /* CR_Katakana */ - -/* 'Kharoshthi': Script */ -static const OnigCodePoint CR_Kharoshthi[] = { - 8, - 0x10a00, 0x10a03, - 0x10a05, 0x10a06, - 0x10a0c, 0x10a13, - 0x10a15, 0x10a17, - 0x10a19, 0x10a33, - 0x10a38, 0x10a3a, - 0x10a3f, 0x10a47, - 0x10a50, 0x10a58 -}; /* CR_Kharoshthi */ - -/* 'Khmer': Script */ -static const OnigCodePoint CR_Khmer[] = { - 4, - 0x1780, 0x17dd, - 0x17e0, 0x17e9, - 0x17f0, 0x17f9, - 0x19e0, 0x19ff -}; /* CR_Khmer */ - -/* 'Lao': Script */ -static const OnigCodePoint CR_Lao[] = { - 18, - 0x0e81, 0x0e82, - 0x0e84, 0x0e84, - 0x0e87, 0x0e88, - 0x0e8a, 0x0e8a, - 0x0e8d, 0x0e8d, - 0x0e94, 0x0e97, - 0x0e99, 0x0e9f, - 0x0ea1, 0x0ea3, - 0x0ea5, 0x0ea5, - 0x0ea7, 0x0ea7, - 0x0eaa, 0x0eab, - 0x0ead, 0x0eb9, - 0x0ebb, 0x0ebd, - 0x0ec0, 0x0ec4, - 0x0ec6, 0x0ec6, - 0x0ec8, 0x0ecd, - 0x0ed0, 0x0ed9, - 0x0edc, 0x0edd -}; /* CR_Lao */ - -/* 'Latin': Script */ -static const OnigCodePoint CR_Latin[] = { - 23, - 0x0041, 0x005a, - 0x0061, 0x007a, - 0x00aa, 0x00aa, - 0x00ba, 0x00ba, - 0x00c0, 0x00d6, - 0x00d8, 0x00f6, - 0x00f8, 0x0241, - 0x0250, 0x02b8, - 0x02e0, 0x02e4, - 0x1d00, 0x1d25, - 0x1d2c, 0x1d5c, - 0x1d62, 0x1d65, - 0x1d6b, 0x1d77, - 0x1d79, 0x1dbf, - 0x1e00, 0x1e9b, - 0x1ea0, 0x1ef9, - 0x2071, 0x2071, - 0x207f, 0x207f, - 0x2090, 0x2094, - 0x212a, 0x212b, - 0xfb00, 0xfb06, - 0xff21, 0xff3a, - 0xff41, 0xff5a -}; /* CR_Latin */ - -/* 'Limbu': Script */ -static const OnigCodePoint CR_Limbu[] = { - 5, - 0x1900, 0x191c, - 0x1920, 0x192b, - 0x1930, 0x193b, - 0x1940, 0x1940, - 0x1944, 0x194f -}; /* CR_Limbu */ - -/* 'Linear_B': Script */ -static const OnigCodePoint CR_Linear_B[] = { - 7, - 0x10000, 0x1000b, - 0x1000d, 0x10026, - 0x10028, 0x1003a, - 0x1003c, 0x1003d, - 0x1003f, 0x1004d, - 0x10050, 0x1005d, - 0x10080, 0x100fa -}; /* CR_Linear_B */ - -/* 'Malayalam': Script */ -static const OnigCodePoint CR_Malayalam[] = { - 11, - 0x0d02, 0x0d03, - 0x0d05, 0x0d0c, - 0x0d0e, 0x0d10, - 0x0d12, 0x0d28, - 0x0d2a, 0x0d39, - 0x0d3e, 0x0d43, - 0x0d46, 0x0d48, - 0x0d4a, 0x0d4d, - 0x0d57, 0x0d57, - 0x0d60, 0x0d61, - 0x0d66, 0x0d6f -}; /* CR_Malayalam */ - -/* 'Mongolian': Script */ -static const OnigCodePoint CR_Mongolian[] = { - 4, - 0x1800, 0x180e, - 0x1810, 0x1819, - 0x1820, 0x1877, - 0x1880, 0x18a9 -}; /* CR_Mongolian */ - -/* 'Myanmar': Script */ -static const OnigCodePoint CR_Myanmar[] = { - 6, - 0x1000, 0x1021, - 0x1023, 0x1027, - 0x1029, 0x102a, - 0x102c, 0x1032, - 0x1036, 0x1039, - 0x1040, 0x1059 -}; /* CR_Myanmar */ - -/* 'New_Tai_Lue': Script */ -static const OnigCodePoint CR_New_Tai_Lue[] = { - 4, - 0x1980, 0x19a9, - 0x19b0, 0x19c9, - 0x19d0, 0x19d9, - 0x19de, 0x19df -}; /* CR_New_Tai_Lue */ - -/* 'Ogham': Script */ -static const OnigCodePoint CR_Ogham[] = { - 1, - 0x1680, 0x169c -}; /* CR_Ogham */ - -/* 'Old_Italic': Script */ -static const OnigCodePoint CR_Old_Italic[] = { - 2, - 0x10300, 0x1031e, - 0x10320, 0x10323 -}; /* CR_Old_Italic */ - -/* 'Old_Persian': Script */ -static const OnigCodePoint CR_Old_Persian[] = { - 2, - 0x103a0, 0x103c3, - 0x103c8, 0x103d5 -}; /* CR_Old_Persian */ - -/* 'Oriya': Script */ -static const OnigCodePoint CR_Oriya[] = { - 14, - 0x0b01, 0x0b03, - 0x0b05, 0x0b0c, - 0x0b0f, 0x0b10, - 0x0b13, 0x0b28, - 0x0b2a, 0x0b30, - 0x0b32, 0x0b33, - 0x0b35, 0x0b39, - 0x0b3c, 0x0b43, - 0x0b47, 0x0b48, - 0x0b4b, 0x0b4d, - 0x0b56, 0x0b57, - 0x0b5c, 0x0b5d, - 0x0b5f, 0x0b61, - 0x0b66, 0x0b71 -}; /* CR_Oriya */ - -/* 'Osmanya': Script */ -static const OnigCodePoint CR_Osmanya[] = { - 2, - 0x10480, 0x1049d, - 0x104a0, 0x104a9 -}; /* CR_Osmanya */ - -/* 'Runic': Script */ -static const OnigCodePoint CR_Runic[] = { - 2, - 0x16a0, 0x16ea, - 0x16ee, 0x16f0 -}; /* CR_Runic */ - -/* 'Shavian': Script */ -static const OnigCodePoint CR_Shavian[] = { - 1, - 0x10450, 0x1047f -}; /* CR_Shavian */ - -/* 'Sinhala': Script */ -static const OnigCodePoint CR_Sinhala[] = { - 11, - 0x0d82, 0x0d83, - 0x0d85, 0x0d96, - 0x0d9a, 0x0db1, - 0x0db3, 0x0dbb, - 0x0dbd, 0x0dbd, - 0x0dc0, 0x0dc6, - 0x0dca, 0x0dca, - 0x0dcf, 0x0dd4, - 0x0dd6, 0x0dd6, - 0x0dd8, 0x0ddf, - 0x0df2, 0x0df4 -}; /* CR_Sinhala */ - -/* 'Syloti_Nagri': Script */ -static const OnigCodePoint CR_Syloti_Nagri[] = { - 1, - 0xa800, 0xa82b -}; /* CR_Syloti_Nagri */ - -/* 'Syriac': Script */ -static const OnigCodePoint CR_Syriac[] = { - 3, - 0x0700, 0x070d, - 0x070f, 0x074a, - 0x074d, 0x074f -}; /* CR_Syriac */ - -/* 'Tagalog': Script */ -static const OnigCodePoint CR_Tagalog[] = { - 2, - 0x1700, 0x170c, - 0x170e, 0x1714 -}; /* CR_Tagalog */ - -/* 'Tagbanwa': Script */ -static const OnigCodePoint CR_Tagbanwa[] = { - 3, - 0x1760, 0x176c, - 0x176e, 0x1770, - 0x1772, 0x1773 -}; /* CR_Tagbanwa */ - -/* 'Tai_Le': Script */ -static const OnigCodePoint CR_Tai_Le[] = { - 2, - 0x1950, 0x196d, - 0x1970, 0x1974 -}; /* CR_Tai_Le */ - -/* 'Tamil': Script */ -static const OnigCodePoint CR_Tamil[] = { - 15, - 0x0b82, 0x0b83, - 0x0b85, 0x0b8a, - 0x0b8e, 0x0b90, - 0x0b92, 0x0b95, - 0x0b99, 0x0b9a, - 0x0b9c, 0x0b9c, - 0x0b9e, 0x0b9f, - 0x0ba3, 0x0ba4, - 0x0ba8, 0x0baa, - 0x0bae, 0x0bb9, - 0x0bbe, 0x0bc2, - 0x0bc6, 0x0bc8, - 0x0bca, 0x0bcd, - 0x0bd7, 0x0bd7, - 0x0be6, 0x0bfa -}; /* CR_Tamil */ - -/* 'Telugu': Script */ -static const OnigCodePoint CR_Telugu[] = { - 12, - 0x0c01, 0x0c03, - 0x0c05, 0x0c0c, - 0x0c0e, 0x0c10, - 0x0c12, 0x0c28, - 0x0c2a, 0x0c33, - 0x0c35, 0x0c39, - 0x0c3e, 0x0c44, - 0x0c46, 0x0c48, - 0x0c4a, 0x0c4d, - 0x0c55, 0x0c56, - 0x0c60, 0x0c61, - 0x0c66, 0x0c6f -}; /* CR_Telugu */ - -/* 'Thaana': Script */ -static const OnigCodePoint CR_Thaana[] = { - 1, - 0x0780, 0x07b1 -}; /* CR_Thaana */ - -/* 'Thai': Script */ -static const OnigCodePoint CR_Thai[] = { - 2, - 0x0e01, 0x0e3a, - 0x0e40, 0x0e5b -}; /* CR_Thai */ - -/* 'Tibetan': Script */ -static const OnigCodePoint CR_Tibetan[] = { - 7, - 0x0f00, 0x0f47, - 0x0f49, 0x0f6a, - 0x0f71, 0x0f8b, - 0x0f90, 0x0f97, - 0x0f99, 0x0fbc, - 0x0fbe, 0x0fcc, - 0x0fcf, 0x0fd1 -}; /* CR_Tibetan */ - -/* 'Tifinagh': Script */ -static const OnigCodePoint CR_Tifinagh[] = { - 2, - 0x2d30, 0x2d65, - 0x2d6f, 0x2d6f -}; /* CR_Tifinagh */ - -/* 'Ugaritic': Script */ -static const OnigCodePoint CR_Ugaritic[] = { - 2, - 0x10380, 0x1039d, - 0x1039f, 0x1039f -}; /* CR_Ugaritic */ - -/* 'Yi': Script */ -static const OnigCodePoint CR_Yi[] = { - 2, - 0xa000, 0xa48c, - 0xa490, 0xa4c6 -}; /* CR_Yi */ - - -#endif /* USE_UNICODE_PROPERTIES */ - - -typedef struct { - int n; - OnigCodePoint code[3]; -} CodePointList3; - -typedef struct { - OnigCodePoint from; - CodePointList3 to; -} CaseFold_11_Type; - -typedef struct { - OnigCodePoint from; - CodePointList3 to; -} CaseUnfold_11_Type; - -typedef struct { - int n; - OnigCodePoint code[2]; -} CodePointList2; - -typedef struct { - OnigCodePoint from[2]; - CodePointList2 to; -} CaseUnfold_12_Type; - -typedef struct { - OnigCodePoint from[3]; - CodePointList2 to; -} CaseUnfold_13_Type; - -static const CaseFold_11_Type CaseFold[] = { - { 0x0041, {1, {0x0061}}}, - { 0x0042, {1, {0x0062}}}, - { 0x0043, {1, {0x0063}}}, - { 0x0044, {1, {0x0064}}}, - { 0x0045, {1, {0x0065}}}, - { 0x0046, {1, {0x0066}}}, - { 0x0047, {1, {0x0067}}}, - { 0x0048, {1, {0x0068}}}, - { 0x004a, {1, {0x006a}}}, - { 0x004b, {1, {0x006b}}}, - { 0x004c, {1, {0x006c}}}, - { 0x004d, {1, {0x006d}}}, - { 0x004e, {1, {0x006e}}}, - { 0x004f, {1, {0x006f}}}, - { 0x0050, {1, {0x0070}}}, - { 0x0051, {1, {0x0071}}}, - { 0x0052, {1, {0x0072}}}, - { 0x0053, {1, {0x0073}}}, - { 0x0054, {1, {0x0074}}}, - { 0x0055, {1, {0x0075}}}, - { 0x0056, {1, {0x0076}}}, - { 0x0057, {1, {0x0077}}}, - { 0x0058, {1, {0x0078}}}, - { 0x0059, {1, {0x0079}}}, - { 0x005a, {1, {0x007a}}}, - { 0x00b5, {1, {0x03bc}}}, - { 0x00c0, {1, {0x00e0}}}, - { 0x00c1, {1, {0x00e1}}}, - { 0x00c2, {1, {0x00e2}}}, - { 0x00c3, {1, {0x00e3}}}, - { 0x00c4, {1, {0x00e4}}}, - { 0x00c5, {1, {0x00e5}}}, - { 0x00c6, {1, {0x00e6}}}, - { 0x00c7, {1, {0x00e7}}}, - { 0x00c8, {1, {0x00e8}}}, - { 0x00c9, {1, {0x00e9}}}, - { 0x00ca, {1, {0x00ea}}}, - { 0x00cb, {1, {0x00eb}}}, - { 0x00cc, {1, {0x00ec}}}, - { 0x00cd, {1, {0x00ed}}}, - { 0x00ce, {1, {0x00ee}}}, - { 0x00cf, {1, {0x00ef}}}, - { 0x00d0, {1, {0x00f0}}}, - { 0x00d1, {1, {0x00f1}}}, - { 0x00d2, {1, {0x00f2}}}, - { 0x00d3, {1, {0x00f3}}}, - { 0x00d4, {1, {0x00f4}}}, - { 0x00d5, {1, {0x00f5}}}, - { 0x00d6, {1, {0x00f6}}}, - { 0x00d8, {1, {0x00f8}}}, - { 0x00d9, {1, {0x00f9}}}, - { 0x00da, {1, {0x00fa}}}, - { 0x00db, {1, {0x00fb}}}, - { 0x00dc, {1, {0x00fc}}}, - { 0x00dd, {1, {0x00fd}}}, - { 0x00de, {1, {0x00fe}}}, - { 0x00df, {2, {0x0073, 0x0073}}}, - { 0x0100, {1, {0x0101}}}, - { 0x0102, {1, {0x0103}}}, - { 0x0104, {1, {0x0105}}}, - { 0x0106, {1, {0x0107}}}, - { 0x0108, {1, {0x0109}}}, - { 0x010a, {1, {0x010b}}}, - { 0x010c, {1, {0x010d}}}, - { 0x010e, {1, {0x010f}}}, - { 0x0110, {1, {0x0111}}}, - { 0x0112, {1, {0x0113}}}, - { 0x0114, {1, {0x0115}}}, - { 0x0116, {1, {0x0117}}}, - { 0x0118, {1, {0x0119}}}, - { 0x011a, {1, {0x011b}}}, - { 0x011c, {1, {0x011d}}}, - { 0x011e, {1, {0x011f}}}, - { 0x0120, {1, {0x0121}}}, - { 0x0122, {1, {0x0123}}}, - { 0x0124, {1, {0x0125}}}, - { 0x0126, {1, {0x0127}}}, - { 0x0128, {1, {0x0129}}}, - { 0x012a, {1, {0x012b}}}, - { 0x012c, {1, {0x012d}}}, - { 0x012e, {1, {0x012f}}}, - { 0x0132, {1, {0x0133}}}, - { 0x0134, {1, {0x0135}}}, - { 0x0136, {1, {0x0137}}}, - { 0x0139, {1, {0x013a}}}, - { 0x013b, {1, {0x013c}}}, - { 0x013d, {1, {0x013e}}}, - { 0x013f, {1, {0x0140}}}, - { 0x0141, {1, {0x0142}}}, - { 0x0143, {1, {0x0144}}}, - { 0x0145, {1, {0x0146}}}, - { 0x0147, {1, {0x0148}}}, - { 0x0149, {2, {0x02bc, 0x006e}}}, - { 0x014a, {1, {0x014b}}}, - { 0x014c, {1, {0x014d}}}, - { 0x014e, {1, {0x014f}}}, - { 0x0150, {1, {0x0151}}}, - { 0x0152, {1, {0x0153}}}, - { 0x0154, {1, {0x0155}}}, - { 0x0156, {1, {0x0157}}}, - { 0x0158, {1, {0x0159}}}, - { 0x015a, {1, {0x015b}}}, - { 0x015c, {1, {0x015d}}}, - { 0x015e, {1, {0x015f}}}, - { 0x0160, {1, {0x0161}}}, - { 0x0162, {1, {0x0163}}}, - { 0x0164, {1, {0x0165}}}, - { 0x0166, {1, {0x0167}}}, - { 0x0168, {1, {0x0169}}}, - { 0x016a, {1, {0x016b}}}, - { 0x016c, {1, {0x016d}}}, - { 0x016e, {1, {0x016f}}}, - { 0x0170, {1, {0x0171}}}, - { 0x0172, {1, {0x0173}}}, - { 0x0174, {1, {0x0175}}}, - { 0x0176, {1, {0x0177}}}, - { 0x0178, {1, {0x00ff}}}, - { 0x0179, {1, {0x017a}}}, - { 0x017b, {1, {0x017c}}}, - { 0x017d, {1, {0x017e}}}, - { 0x017f, {1, {0x0073}}}, - { 0x0181, {1, {0x0253}}}, - { 0x0182, {1, {0x0183}}}, - { 0x0184, {1, {0x0185}}}, - { 0x0186, {1, {0x0254}}}, - { 0x0187, {1, {0x0188}}}, - { 0x0189, {1, {0x0256}}}, - { 0x018a, {1, {0x0257}}}, - { 0x018b, {1, {0x018c}}}, - { 0x018e, {1, {0x01dd}}}, - { 0x018f, {1, {0x0259}}}, - { 0x0190, {1, {0x025b}}}, - { 0x0191, {1, {0x0192}}}, - { 0x0193, {1, {0x0260}}}, - { 0x0194, {1, {0x0263}}}, - { 0x0196, {1, {0x0269}}}, - { 0x0197, {1, {0x0268}}}, - { 0x0198, {1, {0x0199}}}, - { 0x019c, {1, {0x026f}}}, - { 0x019d, {1, {0x0272}}}, - { 0x019f, {1, {0x0275}}}, - { 0x01a0, {1, {0x01a1}}}, - { 0x01a2, {1, {0x01a3}}}, - { 0x01a4, {1, {0x01a5}}}, - { 0x01a6, {1, {0x0280}}}, - { 0x01a7, {1, {0x01a8}}}, - { 0x01a9, {1, {0x0283}}}, - { 0x01ac, {1, {0x01ad}}}, - { 0x01ae, {1, {0x0288}}}, - { 0x01af, {1, {0x01b0}}}, - { 0x01b1, {1, {0x028a}}}, - { 0x01b2, {1, {0x028b}}}, - { 0x01b3, {1, {0x01b4}}}, - { 0x01b5, {1, {0x01b6}}}, - { 0x01b7, {1, {0x0292}}}, - { 0x01b8, {1, {0x01b9}}}, - { 0x01bc, {1, {0x01bd}}}, - { 0x01c4, {1, {0x01c6}}}, - { 0x01c5, {1, {0x01c6}}}, - { 0x01c7, {1, {0x01c9}}}, - { 0x01c8, {1, {0x01c9}}}, - { 0x01ca, {1, {0x01cc}}}, - { 0x01cb, {1, {0x01cc}}}, - { 0x01cd, {1, {0x01ce}}}, - { 0x01cf, {1, {0x01d0}}}, - { 0x01d1, {1, {0x01d2}}}, - { 0x01d3, {1, {0x01d4}}}, - { 0x01d5, {1, {0x01d6}}}, - { 0x01d7, {1, {0x01d8}}}, - { 0x01d9, {1, {0x01da}}}, - { 0x01db, {1, {0x01dc}}}, - { 0x01de, {1, {0x01df}}}, - { 0x01e0, {1, {0x01e1}}}, - { 0x01e2, {1, {0x01e3}}}, - { 0x01e4, {1, {0x01e5}}}, - { 0x01e6, {1, {0x01e7}}}, - { 0x01e8, {1, {0x01e9}}}, - { 0x01ea, {1, {0x01eb}}}, - { 0x01ec, {1, {0x01ed}}}, - { 0x01ee, {1, {0x01ef}}}, - { 0x01f0, {2, {0x006a, 0x030c}}}, - { 0x01f1, {1, {0x01f3}}}, - { 0x01f2, {1, {0x01f3}}}, - { 0x01f4, {1, {0x01f5}}}, - { 0x01f6, {1, {0x0195}}}, - { 0x01f7, {1, {0x01bf}}}, - { 0x01f8, {1, {0x01f9}}}, - { 0x01fa, {1, {0x01fb}}}, - { 0x01fc, {1, {0x01fd}}}, - { 0x01fe, {1, {0x01ff}}}, - { 0x0200, {1, {0x0201}}}, - { 0x0202, {1, {0x0203}}}, - { 0x0204, {1, {0x0205}}}, - { 0x0206, {1, {0x0207}}}, - { 0x0208, {1, {0x0209}}}, - { 0x020a, {1, {0x020b}}}, - { 0x020c, {1, {0x020d}}}, - { 0x020e, {1, {0x020f}}}, - { 0x0210, {1, {0x0211}}}, - { 0x0212, {1, {0x0213}}}, - { 0x0214, {1, {0x0215}}}, - { 0x0216, {1, {0x0217}}}, - { 0x0218, {1, {0x0219}}}, - { 0x021a, {1, {0x021b}}}, - { 0x021c, {1, {0x021d}}}, - { 0x021e, {1, {0x021f}}}, - { 0x0220, {1, {0x019e}}}, - { 0x0222, {1, {0x0223}}}, - { 0x0224, {1, {0x0225}}}, - { 0x0226, {1, {0x0227}}}, - { 0x0228, {1, {0x0229}}}, - { 0x022a, {1, {0x022b}}}, - { 0x022c, {1, {0x022d}}}, - { 0x022e, {1, {0x022f}}}, - { 0x0230, {1, {0x0231}}}, - { 0x0232, {1, {0x0233}}}, - { 0x023b, {1, {0x023c}}}, - { 0x023d, {1, {0x019a}}}, - { 0x0241, {1, {0x0294}}}, - { 0x0345, {1, {0x03b9}}}, - { 0x0386, {1, {0x03ac}}}, - { 0x0388, {1, {0x03ad}}}, - { 0x0389, {1, {0x03ae}}}, - { 0x038a, {1, {0x03af}}}, - { 0x038c, {1, {0x03cc}}}, - { 0x038e, {1, {0x03cd}}}, - { 0x038f, {1, {0x03ce}}}, - { 0x0390, {3, {0x03b9, 0x0308, 0x0301}}}, - { 0x0391, {1, {0x03b1}}}, - { 0x0392, {1, {0x03b2}}}, - { 0x0393, {1, {0x03b3}}}, - { 0x0394, {1, {0x03b4}}}, - { 0x0395, {1, {0x03b5}}}, - { 0x0396, {1, {0x03b6}}}, - { 0x0397, {1, {0x03b7}}}, - { 0x0398, {1, {0x03b8}}}, - { 0x0399, {1, {0x03b9}}}, - { 0x039a, {1, {0x03ba}}}, - { 0x039b, {1, {0x03bb}}}, - { 0x039c, {1, {0x03bc}}}, - { 0x039d, {1, {0x03bd}}}, - { 0x039e, {1, {0x03be}}}, - { 0x039f, {1, {0x03bf}}}, - { 0x03a0, {1, {0x03c0}}}, - { 0x03a1, {1, {0x03c1}}}, - { 0x03a3, {1, {0x03c3}}}, - { 0x03a4, {1, {0x03c4}}}, - { 0x03a5, {1, {0x03c5}}}, - { 0x03a6, {1, {0x03c6}}}, - { 0x03a7, {1, {0x03c7}}}, - { 0x03a8, {1, {0x03c8}}}, - { 0x03a9, {1, {0x03c9}}}, - { 0x03aa, {1, {0x03ca}}}, - { 0x03ab, {1, {0x03cb}}}, - { 0x03b0, {3, {0x03c5, 0x0308, 0x0301}}}, - { 0x03c2, {1, {0x03c3}}}, - { 0x03d0, {1, {0x03b2}}}, - { 0x03d1, {1, {0x03b8}}}, - { 0x03d5, {1, {0x03c6}}}, - { 0x03d6, {1, {0x03c0}}}, - { 0x03d8, {1, {0x03d9}}}, - { 0x03da, {1, {0x03db}}}, - { 0x03dc, {1, {0x03dd}}}, - { 0x03de, {1, {0x03df}}}, - { 0x03e0, {1, {0x03e1}}}, - { 0x03e2, {1, {0x03e3}}}, - { 0x03e4, {1, {0x03e5}}}, - { 0x03e6, {1, {0x03e7}}}, - { 0x03e8, {1, {0x03e9}}}, - { 0x03ea, {1, {0x03eb}}}, - { 0x03ec, {1, {0x03ed}}}, - { 0x03ee, {1, {0x03ef}}}, - { 0x03f0, {1, {0x03ba}}}, - { 0x03f1, {1, {0x03c1}}}, - { 0x03f4, {1, {0x03b8}}}, - { 0x03f5, {1, {0x03b5}}}, - { 0x03f7, {1, {0x03f8}}}, - { 0x03f9, {1, {0x03f2}}}, - { 0x03fa, {1, {0x03fb}}}, - { 0x0400, {1, {0x0450}}}, - { 0x0401, {1, {0x0451}}}, - { 0x0402, {1, {0x0452}}}, - { 0x0403, {1, {0x0453}}}, - { 0x0404, {1, {0x0454}}}, - { 0x0405, {1, {0x0455}}}, - { 0x0406, {1, {0x0456}}}, - { 0x0407, {1, {0x0457}}}, - { 0x0408, {1, {0x0458}}}, - { 0x0409, {1, {0x0459}}}, - { 0x040a, {1, {0x045a}}}, - { 0x040b, {1, {0x045b}}}, - { 0x040c, {1, {0x045c}}}, - { 0x040d, {1, {0x045d}}}, - { 0x040e, {1, {0x045e}}}, - { 0x040f, {1, {0x045f}}}, - { 0x0410, {1, {0x0430}}}, - { 0x0411, {1, {0x0431}}}, - { 0x0412, {1, {0x0432}}}, - { 0x0413, {1, {0x0433}}}, - { 0x0414, {1, {0x0434}}}, - { 0x0415, {1, {0x0435}}}, - { 0x0416, {1, {0x0436}}}, - { 0x0417, {1, {0x0437}}}, - { 0x0418, {1, {0x0438}}}, - { 0x0419, {1, {0x0439}}}, - { 0x041a, {1, {0x043a}}}, - { 0x041b, {1, {0x043b}}}, - { 0x041c, {1, {0x043c}}}, - { 0x041d, {1, {0x043d}}}, - { 0x041e, {1, {0x043e}}}, - { 0x041f, {1, {0x043f}}}, - { 0x0420, {1, {0x0440}}}, - { 0x0421, {1, {0x0441}}}, - { 0x0422, {1, {0x0442}}}, - { 0x0423, {1, {0x0443}}}, - { 0x0424, {1, {0x0444}}}, - { 0x0425, {1, {0x0445}}}, - { 0x0426, {1, {0x0446}}}, - { 0x0427, {1, {0x0447}}}, - { 0x0428, {1, {0x0448}}}, - { 0x0429, {1, {0x0449}}}, - { 0x042a, {1, {0x044a}}}, - { 0x042b, {1, {0x044b}}}, - { 0x042c, {1, {0x044c}}}, - { 0x042d, {1, {0x044d}}}, - { 0x042e, {1, {0x044e}}}, - { 0x042f, {1, {0x044f}}}, - { 0x0460, {1, {0x0461}}}, - { 0x0462, {1, {0x0463}}}, - { 0x0464, {1, {0x0465}}}, - { 0x0466, {1, {0x0467}}}, - { 0x0468, {1, {0x0469}}}, - { 0x046a, {1, {0x046b}}}, - { 0x046c, {1, {0x046d}}}, - { 0x046e, {1, {0x046f}}}, - { 0x0470, {1, {0x0471}}}, - { 0x0472, {1, {0x0473}}}, - { 0x0474, {1, {0x0475}}}, - { 0x0476, {1, {0x0477}}}, - { 0x0478, {1, {0x0479}}}, - { 0x047a, {1, {0x047b}}}, - { 0x047c, {1, {0x047d}}}, - { 0x047e, {1, {0x047f}}}, - { 0x0480, {1, {0x0481}}}, - { 0x048a, {1, {0x048b}}}, - { 0x048c, {1, {0x048d}}}, - { 0x048e, {1, {0x048f}}}, - { 0x0490, {1, {0x0491}}}, - { 0x0492, {1, {0x0493}}}, - { 0x0494, {1, {0x0495}}}, - { 0x0496, {1, {0x0497}}}, - { 0x0498, {1, {0x0499}}}, - { 0x049a, {1, {0x049b}}}, - { 0x049c, {1, {0x049d}}}, - { 0x049e, {1, {0x049f}}}, - { 0x04a0, {1, {0x04a1}}}, - { 0x04a2, {1, {0x04a3}}}, - { 0x04a4, {1, {0x04a5}}}, - { 0x04a6, {1, {0x04a7}}}, - { 0x04a8, {1, {0x04a9}}}, - { 0x04aa, {1, {0x04ab}}}, - { 0x04ac, {1, {0x04ad}}}, - { 0x04ae, {1, {0x04af}}}, - { 0x04b0, {1, {0x04b1}}}, - { 0x04b2, {1, {0x04b3}}}, - { 0x04b4, {1, {0x04b5}}}, - { 0x04b6, {1, {0x04b7}}}, - { 0x04b8, {1, {0x04b9}}}, - { 0x04ba, {1, {0x04bb}}}, - { 0x04bc, {1, {0x04bd}}}, - { 0x04be, {1, {0x04bf}}}, - { 0x04c1, {1, {0x04c2}}}, - { 0x04c3, {1, {0x04c4}}}, - { 0x04c5, {1, {0x04c6}}}, - { 0x04c7, {1, {0x04c8}}}, - { 0x04c9, {1, {0x04ca}}}, - { 0x04cb, {1, {0x04cc}}}, - { 0x04cd, {1, {0x04ce}}}, - { 0x04d0, {1, {0x04d1}}}, - { 0x04d2, {1, {0x04d3}}}, - { 0x04d4, {1, {0x04d5}}}, - { 0x04d6, {1, {0x04d7}}}, - { 0x04d8, {1, {0x04d9}}}, - { 0x04da, {1, {0x04db}}}, - { 0x04dc, {1, {0x04dd}}}, - { 0x04de, {1, {0x04df}}}, - { 0x04e0, {1, {0x04e1}}}, - { 0x04e2, {1, {0x04e3}}}, - { 0x04e4, {1, {0x04e5}}}, - { 0x04e6, {1, {0x04e7}}}, - { 0x04e8, {1, {0x04e9}}}, - { 0x04ea, {1, {0x04eb}}}, - { 0x04ec, {1, {0x04ed}}}, - { 0x04ee, {1, {0x04ef}}}, - { 0x04f0, {1, {0x04f1}}}, - { 0x04f2, {1, {0x04f3}}}, - { 0x04f4, {1, {0x04f5}}}, - { 0x04f6, {1, {0x04f7}}}, - { 0x04f8, {1, {0x04f9}}}, - { 0x0500, {1, {0x0501}}}, - { 0x0502, {1, {0x0503}}}, - { 0x0504, {1, {0x0505}}}, - { 0x0506, {1, {0x0507}}}, - { 0x0508, {1, {0x0509}}}, - { 0x050a, {1, {0x050b}}}, - { 0x050c, {1, {0x050d}}}, - { 0x050e, {1, {0x050f}}}, - { 0x0531, {1, {0x0561}}}, - { 0x0532, {1, {0x0562}}}, - { 0x0533, {1, {0x0563}}}, - { 0x0534, {1, {0x0564}}}, - { 0x0535, {1, {0x0565}}}, - { 0x0536, {1, {0x0566}}}, - { 0x0537, {1, {0x0567}}}, - { 0x0538, {1, {0x0568}}}, - { 0x0539, {1, {0x0569}}}, - { 0x053a, {1, {0x056a}}}, - { 0x053b, {1, {0x056b}}}, - { 0x053c, {1, {0x056c}}}, - { 0x053d, {1, {0x056d}}}, - { 0x053e, {1, {0x056e}}}, - { 0x053f, {1, {0x056f}}}, - { 0x0540, {1, {0x0570}}}, - { 0x0541, {1, {0x0571}}}, - { 0x0542, {1, {0x0572}}}, - { 0x0543, {1, {0x0573}}}, - { 0x0544, {1, {0x0574}}}, - { 0x0545, {1, {0x0575}}}, - { 0x0546, {1, {0x0576}}}, - { 0x0547, {1, {0x0577}}}, - { 0x0548, {1, {0x0578}}}, - { 0x0549, {1, {0x0579}}}, - { 0x054a, {1, {0x057a}}}, - { 0x054b, {1, {0x057b}}}, - { 0x054c, {1, {0x057c}}}, - { 0x054d, {1, {0x057d}}}, - { 0x054e, {1, {0x057e}}}, - { 0x054f, {1, {0x057f}}}, - { 0x0550, {1, {0x0580}}}, - { 0x0551, {1, {0x0581}}}, - { 0x0552, {1, {0x0582}}}, - { 0x0553, {1, {0x0583}}}, - { 0x0554, {1, {0x0584}}}, - { 0x0555, {1, {0x0585}}}, - { 0x0556, {1, {0x0586}}}, - { 0x0587, {2, {0x0565, 0x0582}}}, - { 0x10a0, {1, {0x2d00}}}, - { 0x10a1, {1, {0x2d01}}}, - { 0x10a2, {1, {0x2d02}}}, - { 0x10a3, {1, {0x2d03}}}, - { 0x10a4, {1, {0x2d04}}}, - { 0x10a5, {1, {0x2d05}}}, - { 0x10a6, {1, {0x2d06}}}, - { 0x10a7, {1, {0x2d07}}}, - { 0x10a8, {1, {0x2d08}}}, - { 0x10a9, {1, {0x2d09}}}, - { 0x10aa, {1, {0x2d0a}}}, - { 0x10ab, {1, {0x2d0b}}}, - { 0x10ac, {1, {0x2d0c}}}, - { 0x10ad, {1, {0x2d0d}}}, - { 0x10ae, {1, {0x2d0e}}}, - { 0x10af, {1, {0x2d0f}}}, - { 0x10b0, {1, {0x2d10}}}, - { 0x10b1, {1, {0x2d11}}}, - { 0x10b2, {1, {0x2d12}}}, - { 0x10b3, {1, {0x2d13}}}, - { 0x10b4, {1, {0x2d14}}}, - { 0x10b5, {1, {0x2d15}}}, - { 0x10b6, {1, {0x2d16}}}, - { 0x10b7, {1, {0x2d17}}}, - { 0x10b8, {1, {0x2d18}}}, - { 0x10b9, {1, {0x2d19}}}, - { 0x10ba, {1, {0x2d1a}}}, - { 0x10bb, {1, {0x2d1b}}}, - { 0x10bc, {1, {0x2d1c}}}, - { 0x10bd, {1, {0x2d1d}}}, - { 0x10be, {1, {0x2d1e}}}, - { 0x10bf, {1, {0x2d1f}}}, - { 0x10c0, {1, {0x2d20}}}, - { 0x10c1, {1, {0x2d21}}}, - { 0x10c2, {1, {0x2d22}}}, - { 0x10c3, {1, {0x2d23}}}, - { 0x10c4, {1, {0x2d24}}}, - { 0x10c5, {1, {0x2d25}}}, - { 0x1e00, {1, {0x1e01}}}, - { 0x1e02, {1, {0x1e03}}}, - { 0x1e04, {1, {0x1e05}}}, - { 0x1e06, {1, {0x1e07}}}, - { 0x1e08, {1, {0x1e09}}}, - { 0x1e0a, {1, {0x1e0b}}}, - { 0x1e0c, {1, {0x1e0d}}}, - { 0x1e0e, {1, {0x1e0f}}}, - { 0x1e10, {1, {0x1e11}}}, - { 0x1e12, {1, {0x1e13}}}, - { 0x1e14, {1, {0x1e15}}}, - { 0x1e16, {1, {0x1e17}}}, - { 0x1e18, {1, {0x1e19}}}, - { 0x1e1a, {1, {0x1e1b}}}, - { 0x1e1c, {1, {0x1e1d}}}, - { 0x1e1e, {1, {0x1e1f}}}, - { 0x1e20, {1, {0x1e21}}}, - { 0x1e22, {1, {0x1e23}}}, - { 0x1e24, {1, {0x1e25}}}, - { 0x1e26, {1, {0x1e27}}}, - { 0x1e28, {1, {0x1e29}}}, - { 0x1e2a, {1, {0x1e2b}}}, - { 0x1e2c, {1, {0x1e2d}}}, - { 0x1e2e, {1, {0x1e2f}}}, - { 0x1e30, {1, {0x1e31}}}, - { 0x1e32, {1, {0x1e33}}}, - { 0x1e34, {1, {0x1e35}}}, - { 0x1e36, {1, {0x1e37}}}, - { 0x1e38, {1, {0x1e39}}}, - { 0x1e3a, {1, {0x1e3b}}}, - { 0x1e3c, {1, {0x1e3d}}}, - { 0x1e3e, {1, {0x1e3f}}}, - { 0x1e40, {1, {0x1e41}}}, - { 0x1e42, {1, {0x1e43}}}, - { 0x1e44, {1, {0x1e45}}}, - { 0x1e46, {1, {0x1e47}}}, - { 0x1e48, {1, {0x1e49}}}, - { 0x1e4a, {1, {0x1e4b}}}, - { 0x1e4c, {1, {0x1e4d}}}, - { 0x1e4e, {1, {0x1e4f}}}, - { 0x1e50, {1, {0x1e51}}}, - { 0x1e52, {1, {0x1e53}}}, - { 0x1e54, {1, {0x1e55}}}, - { 0x1e56, {1, {0x1e57}}}, - { 0x1e58, {1, {0x1e59}}}, - { 0x1e5a, {1, {0x1e5b}}}, - { 0x1e5c, {1, {0x1e5d}}}, - { 0x1e5e, {1, {0x1e5f}}}, - { 0x1e60, {1, {0x1e61}}}, - { 0x1e62, {1, {0x1e63}}}, - { 0x1e64, {1, {0x1e65}}}, - { 0x1e66, {1, {0x1e67}}}, - { 0x1e68, {1, {0x1e69}}}, - { 0x1e6a, {1, {0x1e6b}}}, - { 0x1e6c, {1, {0x1e6d}}}, - { 0x1e6e, {1, {0x1e6f}}}, - { 0x1e70, {1, {0x1e71}}}, - { 0x1e72, {1, {0x1e73}}}, - { 0x1e74, {1, {0x1e75}}}, - { 0x1e76, {1, {0x1e77}}}, - { 0x1e78, {1, {0x1e79}}}, - { 0x1e7a, {1, {0x1e7b}}}, - { 0x1e7c, {1, {0x1e7d}}}, - { 0x1e7e, {1, {0x1e7f}}}, - { 0x1e80, {1, {0x1e81}}}, - { 0x1e82, {1, {0x1e83}}}, - { 0x1e84, {1, {0x1e85}}}, - { 0x1e86, {1, {0x1e87}}}, - { 0x1e88, {1, {0x1e89}}}, - { 0x1e8a, {1, {0x1e8b}}}, - { 0x1e8c, {1, {0x1e8d}}}, - { 0x1e8e, {1, {0x1e8f}}}, - { 0x1e90, {1, {0x1e91}}}, - { 0x1e92, {1, {0x1e93}}}, - { 0x1e94, {1, {0x1e95}}}, - { 0x1e96, {2, {0x0068, 0x0331}}}, - { 0x1e97, {2, {0x0074, 0x0308}}}, - { 0x1e98, {2, {0x0077, 0x030a}}}, - { 0x1e99, {2, {0x0079, 0x030a}}}, - { 0x1e9a, {2, {0x0061, 0x02be}}}, - { 0x1e9b, {1, {0x1e61}}}, - { 0x1ea0, {1, {0x1ea1}}}, - { 0x1ea2, {1, {0x1ea3}}}, - { 0x1ea4, {1, {0x1ea5}}}, - { 0x1ea6, {1, {0x1ea7}}}, - { 0x1ea8, {1, {0x1ea9}}}, - { 0x1eaa, {1, {0x1eab}}}, - { 0x1eac, {1, {0x1ead}}}, - { 0x1eae, {1, {0x1eaf}}}, - { 0x1eb0, {1, {0x1eb1}}}, - { 0x1eb2, {1, {0x1eb3}}}, - { 0x1eb4, {1, {0x1eb5}}}, - { 0x1eb6, {1, {0x1eb7}}}, - { 0x1eb8, {1, {0x1eb9}}}, - { 0x1eba, {1, {0x1ebb}}}, - { 0x1ebc, {1, {0x1ebd}}}, - { 0x1ebe, {1, {0x1ebf}}}, - { 0x1ec0, {1, {0x1ec1}}}, - { 0x1ec2, {1, {0x1ec3}}}, - { 0x1ec4, {1, {0x1ec5}}}, - { 0x1ec6, {1, {0x1ec7}}}, - { 0x1ec8, {1, {0x1ec9}}}, - { 0x1eca, {1, {0x1ecb}}}, - { 0x1ecc, {1, {0x1ecd}}}, - { 0x1ece, {1, {0x1ecf}}}, - { 0x1ed0, {1, {0x1ed1}}}, - { 0x1ed2, {1, {0x1ed3}}}, - { 0x1ed4, {1, {0x1ed5}}}, - { 0x1ed6, {1, {0x1ed7}}}, - { 0x1ed8, {1, {0x1ed9}}}, - { 0x1eda, {1, {0x1edb}}}, - { 0x1edc, {1, {0x1edd}}}, - { 0x1ede, {1, {0x1edf}}}, - { 0x1ee0, {1, {0x1ee1}}}, - { 0x1ee2, {1, {0x1ee3}}}, - { 0x1ee4, {1, {0x1ee5}}}, - { 0x1ee6, {1, {0x1ee7}}}, - { 0x1ee8, {1, {0x1ee9}}}, - { 0x1eea, {1, {0x1eeb}}}, - { 0x1eec, {1, {0x1eed}}}, - { 0x1eee, {1, {0x1eef}}}, - { 0x1ef0, {1, {0x1ef1}}}, - { 0x1ef2, {1, {0x1ef3}}}, - { 0x1ef4, {1, {0x1ef5}}}, - { 0x1ef6, {1, {0x1ef7}}}, - { 0x1ef8, {1, {0x1ef9}}}, - { 0x1f08, {1, {0x1f00}}}, - { 0x1f09, {1, {0x1f01}}}, - { 0x1f0a, {1, {0x1f02}}}, - { 0x1f0b, {1, {0x1f03}}}, - { 0x1f0c, {1, {0x1f04}}}, - { 0x1f0d, {1, {0x1f05}}}, - { 0x1f0e, {1, {0x1f06}}}, - { 0x1f0f, {1, {0x1f07}}}, - { 0x1f18, {1, {0x1f10}}}, - { 0x1f19, {1, {0x1f11}}}, - { 0x1f1a, {1, {0x1f12}}}, - { 0x1f1b, {1, {0x1f13}}}, - { 0x1f1c, {1, {0x1f14}}}, - { 0x1f1d, {1, {0x1f15}}}, - { 0x1f28, {1, {0x1f20}}}, - { 0x1f29, {1, {0x1f21}}}, - { 0x1f2a, {1, {0x1f22}}}, - { 0x1f2b, {1, {0x1f23}}}, - { 0x1f2c, {1, {0x1f24}}}, - { 0x1f2d, {1, {0x1f25}}}, - { 0x1f2e, {1, {0x1f26}}}, - { 0x1f2f, {1, {0x1f27}}}, - { 0x1f38, {1, {0x1f30}}}, - { 0x1f39, {1, {0x1f31}}}, - { 0x1f3a, {1, {0x1f32}}}, - { 0x1f3b, {1, {0x1f33}}}, - { 0x1f3c, {1, {0x1f34}}}, - { 0x1f3d, {1, {0x1f35}}}, - { 0x1f3e, {1, {0x1f36}}}, - { 0x1f3f, {1, {0x1f37}}}, - { 0x1f48, {1, {0x1f40}}}, - { 0x1f49, {1, {0x1f41}}}, - { 0x1f4a, {1, {0x1f42}}}, - { 0x1f4b, {1, {0x1f43}}}, - { 0x1f4c, {1, {0x1f44}}}, - { 0x1f4d, {1, {0x1f45}}}, - { 0x1f50, {2, {0x03c5, 0x0313}}}, - { 0x1f52, {3, {0x03c5, 0x0313, 0x0300}}}, - { 0x1f54, {3, {0x03c5, 0x0313, 0x0301}}}, - { 0x1f56, {3, {0x03c5, 0x0313, 0x0342}}}, - { 0x1f59, {1, {0x1f51}}}, - { 0x1f5b, {1, {0x1f53}}}, - { 0x1f5d, {1, {0x1f55}}}, - { 0x1f5f, {1, {0x1f57}}}, - { 0x1f68, {1, {0x1f60}}}, - { 0x1f69, {1, {0x1f61}}}, - { 0x1f6a, {1, {0x1f62}}}, - { 0x1f6b, {1, {0x1f63}}}, - { 0x1f6c, {1, {0x1f64}}}, - { 0x1f6d, {1, {0x1f65}}}, - { 0x1f6e, {1, {0x1f66}}}, - { 0x1f6f, {1, {0x1f67}}}, - { 0x1f80, {2, {0x1f00, 0x03b9}}}, - { 0x1f81, {2, {0x1f01, 0x03b9}}}, - { 0x1f82, {2, {0x1f02, 0x03b9}}}, - { 0x1f83, {2, {0x1f03, 0x03b9}}}, - { 0x1f84, {2, {0x1f04, 0x03b9}}}, - { 0x1f85, {2, {0x1f05, 0x03b9}}}, - { 0x1f86, {2, {0x1f06, 0x03b9}}}, - { 0x1f87, {2, {0x1f07, 0x03b9}}}, - { 0x1f88, {2, {0x1f00, 0x03b9}}}, - { 0x1f89, {2, {0x1f01, 0x03b9}}}, - { 0x1f8a, {2, {0x1f02, 0x03b9}}}, - { 0x1f8b, {2, {0x1f03, 0x03b9}}}, - { 0x1f8c, {2, {0x1f04, 0x03b9}}}, - { 0x1f8d, {2, {0x1f05, 0x03b9}}}, - { 0x1f8e, {2, {0x1f06, 0x03b9}}}, - { 0x1f8f, {2, {0x1f07, 0x03b9}}}, - { 0x1f90, {2, {0x1f20, 0x03b9}}}, - { 0x1f91, {2, {0x1f21, 0x03b9}}}, - { 0x1f92, {2, {0x1f22, 0x03b9}}}, - { 0x1f93, {2, {0x1f23, 0x03b9}}}, - { 0x1f94, {2, {0x1f24, 0x03b9}}}, - { 0x1f95, {2, {0x1f25, 0x03b9}}}, - { 0x1f96, {2, {0x1f26, 0x03b9}}}, - { 0x1f97, {2, {0x1f27, 0x03b9}}}, - { 0x1f98, {2, {0x1f20, 0x03b9}}}, - { 0x1f99, {2, {0x1f21, 0x03b9}}}, - { 0x1f9a, {2, {0x1f22, 0x03b9}}}, - { 0x1f9b, {2, {0x1f23, 0x03b9}}}, - { 0x1f9c, {2, {0x1f24, 0x03b9}}}, - { 0x1f9d, {2, {0x1f25, 0x03b9}}}, - { 0x1f9e, {2, {0x1f26, 0x03b9}}}, - { 0x1f9f, {2, {0x1f27, 0x03b9}}}, - { 0x1fa0, {2, {0x1f60, 0x03b9}}}, - { 0x1fa1, {2, {0x1f61, 0x03b9}}}, - { 0x1fa2, {2, {0x1f62, 0x03b9}}}, - { 0x1fa3, {2, {0x1f63, 0x03b9}}}, - { 0x1fa4, {2, {0x1f64, 0x03b9}}}, - { 0x1fa5, {2, {0x1f65, 0x03b9}}}, - { 0x1fa6, {2, {0x1f66, 0x03b9}}}, - { 0x1fa7, {2, {0x1f67, 0x03b9}}}, - { 0x1fa8, {2, {0x1f60, 0x03b9}}}, - { 0x1fa9, {2, {0x1f61, 0x03b9}}}, - { 0x1faa, {2, {0x1f62, 0x03b9}}}, - { 0x1fab, {2, {0x1f63, 0x03b9}}}, - { 0x1fac, {2, {0x1f64, 0x03b9}}}, - { 0x1fad, {2, {0x1f65, 0x03b9}}}, - { 0x1fae, {2, {0x1f66, 0x03b9}}}, - { 0x1faf, {2, {0x1f67, 0x03b9}}}, - { 0x1fb2, {2, {0x1f70, 0x03b9}}}, - { 0x1fb3, {2, {0x03b1, 0x03b9}}}, - { 0x1fb4, {2, {0x03ac, 0x03b9}}}, - { 0x1fb6, {2, {0x03b1, 0x0342}}}, - { 0x1fb7, {3, {0x03b1, 0x0342, 0x03b9}}}, - { 0x1fb8, {1, {0x1fb0}}}, - { 0x1fb9, {1, {0x1fb1}}}, - { 0x1fba, {1, {0x1f70}}}, - { 0x1fbb, {1, {0x1f71}}}, - { 0x1fbc, {2, {0x03b1, 0x03b9}}}, - { 0x1fbe, {1, {0x03b9}}}, - { 0x1fc2, {2, {0x1f74, 0x03b9}}}, - { 0x1fc3, {2, {0x03b7, 0x03b9}}}, - { 0x1fc4, {2, {0x03ae, 0x03b9}}}, - { 0x1fc6, {2, {0x03b7, 0x0342}}}, - { 0x1fc7, {3, {0x03b7, 0x0342, 0x03b9}}}, - { 0x1fc8, {1, {0x1f72}}}, - { 0x1fc9, {1, {0x1f73}}}, - { 0x1fca, {1, {0x1f74}}}, - { 0x1fcb, {1, {0x1f75}}}, - { 0x1fcc, {2, {0x03b7, 0x03b9}}}, - { 0x1fd2, {3, {0x03b9, 0x0308, 0x0300}}}, - { 0x1fd3, {3, {0x03b9, 0x0308, 0x0301}}}, - { 0x1fd6, {2, {0x03b9, 0x0342}}}, - { 0x1fd7, {3, {0x03b9, 0x0308, 0x0342}}}, - { 0x1fd8, {1, {0x1fd0}}}, - { 0x1fd9, {1, {0x1fd1}}}, - { 0x1fda, {1, {0x1f76}}}, - { 0x1fdb, {1, {0x1f77}}}, - { 0x1fe2, {3, {0x03c5, 0x0308, 0x0300}}}, - { 0x1fe3, {3, {0x03c5, 0x0308, 0x0301}}}, - { 0x1fe4, {2, {0x03c1, 0x0313}}}, - { 0x1fe6, {2, {0x03c5, 0x0342}}}, - { 0x1fe7, {3, {0x03c5, 0x0308, 0x0342}}}, - { 0x1fe8, {1, {0x1fe0}}}, - { 0x1fe9, {1, {0x1fe1}}}, - { 0x1fea, {1, {0x1f7a}}}, - { 0x1feb, {1, {0x1f7b}}}, - { 0x1fec, {1, {0x1fe5}}}, - { 0x1ff2, {2, {0x1f7c, 0x03b9}}}, - { 0x1ff3, {2, {0x03c9, 0x03b9}}}, - { 0x1ff4, {2, {0x03ce, 0x03b9}}}, - { 0x1ff6, {2, {0x03c9, 0x0342}}}, - { 0x1ff7, {3, {0x03c9, 0x0342, 0x03b9}}}, - { 0x1ff8, {1, {0x1f78}}}, - { 0x1ff9, {1, {0x1f79}}}, - { 0x1ffa, {1, {0x1f7c}}}, - { 0x1ffb, {1, {0x1f7d}}}, - { 0x1ffc, {2, {0x03c9, 0x03b9}}}, - { 0x2126, {1, {0x03c9}}}, - { 0x212a, {1, {0x006b}}}, - { 0x212b, {1, {0x00e5}}}, - { 0x2160, {1, {0x2170}}}, - { 0x2161, {1, {0x2171}}}, - { 0x2162, {1, {0x2172}}}, - { 0x2163, {1, {0x2173}}}, - { 0x2164, {1, {0x2174}}}, - { 0x2165, {1, {0x2175}}}, - { 0x2166, {1, {0x2176}}}, - { 0x2167, {1, {0x2177}}}, - { 0x2168, {1, {0x2178}}}, - { 0x2169, {1, {0x2179}}}, - { 0x216a, {1, {0x217a}}}, - { 0x216b, {1, {0x217b}}}, - { 0x216c, {1, {0x217c}}}, - { 0x216d, {1, {0x217d}}}, - { 0x216e, {1, {0x217e}}}, - { 0x216f, {1, {0x217f}}}, - { 0x24b6, {1, {0x24d0}}}, - { 0x24b7, {1, {0x24d1}}}, - { 0x24b8, {1, {0x24d2}}}, - { 0x24b9, {1, {0x24d3}}}, - { 0x24ba, {1, {0x24d4}}}, - { 0x24bb, {1, {0x24d5}}}, - { 0x24bc, {1, {0x24d6}}}, - { 0x24bd, {1, {0x24d7}}}, - { 0x24be, {1, {0x24d8}}}, - { 0x24bf, {1, {0x24d9}}}, - { 0x24c0, {1, {0x24da}}}, - { 0x24c1, {1, {0x24db}}}, - { 0x24c2, {1, {0x24dc}}}, - { 0x24c3, {1, {0x24dd}}}, - { 0x24c4, {1, {0x24de}}}, - { 0x24c5, {1, {0x24df}}}, - { 0x24c6, {1, {0x24e0}}}, - { 0x24c7, {1, {0x24e1}}}, - { 0x24c8, {1, {0x24e2}}}, - { 0x24c9, {1, {0x24e3}}}, - { 0x24ca, {1, {0x24e4}}}, - { 0x24cb, {1, {0x24e5}}}, - { 0x24cc, {1, {0x24e6}}}, - { 0x24cd, {1, {0x24e7}}}, - { 0x24ce, {1, {0x24e8}}}, - { 0x24cf, {1, {0x24e9}}}, - { 0x2c00, {1, {0x2c30}}}, - { 0x2c01, {1, {0x2c31}}}, - { 0x2c02, {1, {0x2c32}}}, - { 0x2c03, {1, {0x2c33}}}, - { 0x2c04, {1, {0x2c34}}}, - { 0x2c05, {1, {0x2c35}}}, - { 0x2c06, {1, {0x2c36}}}, - { 0x2c07, {1, {0x2c37}}}, - { 0x2c08, {1, {0x2c38}}}, - { 0x2c09, {1, {0x2c39}}}, - { 0x2c0a, {1, {0x2c3a}}}, - { 0x2c0b, {1, {0x2c3b}}}, - { 0x2c0c, {1, {0x2c3c}}}, - { 0x2c0d, {1, {0x2c3d}}}, - { 0x2c0e, {1, {0x2c3e}}}, - { 0x2c0f, {1, {0x2c3f}}}, - { 0x2c10, {1, {0x2c40}}}, - { 0x2c11, {1, {0x2c41}}}, - { 0x2c12, {1, {0x2c42}}}, - { 0x2c13, {1, {0x2c43}}}, - { 0x2c14, {1, {0x2c44}}}, - { 0x2c15, {1, {0x2c45}}}, - { 0x2c16, {1, {0x2c46}}}, - { 0x2c17, {1, {0x2c47}}}, - { 0x2c18, {1, {0x2c48}}}, - { 0x2c19, {1, {0x2c49}}}, - { 0x2c1a, {1, {0x2c4a}}}, - { 0x2c1b, {1, {0x2c4b}}}, - { 0x2c1c, {1, {0x2c4c}}}, - { 0x2c1d, {1, {0x2c4d}}}, - { 0x2c1e, {1, {0x2c4e}}}, - { 0x2c1f, {1, {0x2c4f}}}, - { 0x2c20, {1, {0x2c50}}}, - { 0x2c21, {1, {0x2c51}}}, - { 0x2c22, {1, {0x2c52}}}, - { 0x2c23, {1, {0x2c53}}}, - { 0x2c24, {1, {0x2c54}}}, - { 0x2c25, {1, {0x2c55}}}, - { 0x2c26, {1, {0x2c56}}}, - { 0x2c27, {1, {0x2c57}}}, - { 0x2c28, {1, {0x2c58}}}, - { 0x2c29, {1, {0x2c59}}}, - { 0x2c2a, {1, {0x2c5a}}}, - { 0x2c2b, {1, {0x2c5b}}}, - { 0x2c2c, {1, {0x2c5c}}}, - { 0x2c2d, {1, {0x2c5d}}}, - { 0x2c2e, {1, {0x2c5e}}}, - { 0x2c80, {1, {0x2c81}}}, - { 0x2c82, {1, {0x2c83}}}, - { 0x2c84, {1, {0x2c85}}}, - { 0x2c86, {1, {0x2c87}}}, - { 0x2c88, {1, {0x2c89}}}, - { 0x2c8a, {1, {0x2c8b}}}, - { 0x2c8c, {1, {0x2c8d}}}, - { 0x2c8e, {1, {0x2c8f}}}, - { 0x2c90, {1, {0x2c91}}}, - { 0x2c92, {1, {0x2c93}}}, - { 0x2c94, {1, {0x2c95}}}, - { 0x2c96, {1, {0x2c97}}}, - { 0x2c98, {1, {0x2c99}}}, - { 0x2c9a, {1, {0x2c9b}}}, - { 0x2c9c, {1, {0x2c9d}}}, - { 0x2c9e, {1, {0x2c9f}}}, - { 0x2ca0, {1, {0x2ca1}}}, - { 0x2ca2, {1, {0x2ca3}}}, - { 0x2ca4, {1, {0x2ca5}}}, - { 0x2ca6, {1, {0x2ca7}}}, - { 0x2ca8, {1, {0x2ca9}}}, - { 0x2caa, {1, {0x2cab}}}, - { 0x2cac, {1, {0x2cad}}}, - { 0x2cae, {1, {0x2caf}}}, - { 0x2cb0, {1, {0x2cb1}}}, - { 0x2cb2, {1, {0x2cb3}}}, - { 0x2cb4, {1, {0x2cb5}}}, - { 0x2cb6, {1, {0x2cb7}}}, - { 0x2cb8, {1, {0x2cb9}}}, - { 0x2cba, {1, {0x2cbb}}}, - { 0x2cbc, {1, {0x2cbd}}}, - { 0x2cbe, {1, {0x2cbf}}}, - { 0x2cc0, {1, {0x2cc1}}}, - { 0x2cc2, {1, {0x2cc3}}}, - { 0x2cc4, {1, {0x2cc5}}}, - { 0x2cc6, {1, {0x2cc7}}}, - { 0x2cc8, {1, {0x2cc9}}}, - { 0x2cca, {1, {0x2ccb}}}, - { 0x2ccc, {1, {0x2ccd}}}, - { 0x2cce, {1, {0x2ccf}}}, - { 0x2cd0, {1, {0x2cd1}}}, - { 0x2cd2, {1, {0x2cd3}}}, - { 0x2cd4, {1, {0x2cd5}}}, - { 0x2cd6, {1, {0x2cd7}}}, - { 0x2cd8, {1, {0x2cd9}}}, - { 0x2cda, {1, {0x2cdb}}}, - { 0x2cdc, {1, {0x2cdd}}}, - { 0x2cde, {1, {0x2cdf}}}, - { 0x2ce0, {1, {0x2ce1}}}, - { 0x2ce2, {1, {0x2ce3}}}, - { 0xfb00, {2, {0x0066, 0x0066}}}, - { 0xfb01, {2, {0x0066, 0x0069}}}, - { 0xfb02, {2, {0x0066, 0x006c}}}, - { 0xfb03, {3, {0x0066, 0x0066, 0x0069}}}, - { 0xfb04, {3, {0x0066, 0x0066, 0x006c}}}, - { 0xfb05, {2, {0x0073, 0x0074}}}, - { 0xfb06, {2, {0x0073, 0x0074}}}, - { 0xfb13, {2, {0x0574, 0x0576}}}, - { 0xfb14, {2, {0x0574, 0x0565}}}, - { 0xfb15, {2, {0x0574, 0x056b}}}, - { 0xfb16, {2, {0x057e, 0x0576}}}, - { 0xfb17, {2, {0x0574, 0x056d}}}, - { 0xff21, {1, {0xff41}}}, - { 0xff22, {1, {0xff42}}}, - { 0xff23, {1, {0xff43}}}, - { 0xff24, {1, {0xff44}}}, - { 0xff25, {1, {0xff45}}}, - { 0xff26, {1, {0xff46}}}, - { 0xff27, {1, {0xff47}}}, - { 0xff28, {1, {0xff48}}}, - { 0xff29, {1, {0xff49}}}, - { 0xff2a, {1, {0xff4a}}}, - { 0xff2b, {1, {0xff4b}}}, - { 0xff2c, {1, {0xff4c}}}, - { 0xff2d, {1, {0xff4d}}}, - { 0xff2e, {1, {0xff4e}}}, - { 0xff2f, {1, {0xff4f}}}, - { 0xff30, {1, {0xff50}}}, - { 0xff31, {1, {0xff51}}}, - { 0xff32, {1, {0xff52}}}, - { 0xff33, {1, {0xff53}}}, - { 0xff34, {1, {0xff54}}}, - { 0xff35, {1, {0xff55}}}, - { 0xff36, {1, {0xff56}}}, - { 0xff37, {1, {0xff57}}}, - { 0xff38, {1, {0xff58}}}, - { 0xff39, {1, {0xff59}}}, - { 0xff3a, {1, {0xff5a}}}, - { 0x10400, {1, {0x10428}}}, - { 0x10401, {1, {0x10429}}}, - { 0x10402, {1, {0x1042a}}}, - { 0x10403, {1, {0x1042b}}}, - { 0x10404, {1, {0x1042c}}}, - { 0x10405, {1, {0x1042d}}}, - { 0x10406, {1, {0x1042e}}}, - { 0x10407, {1, {0x1042f}}}, - { 0x10408, {1, {0x10430}}}, - { 0x10409, {1, {0x10431}}}, - { 0x1040a, {1, {0x10432}}}, - { 0x1040b, {1, {0x10433}}}, - { 0x1040c, {1, {0x10434}}}, - { 0x1040d, {1, {0x10435}}}, - { 0x1040e, {1, {0x10436}}}, - { 0x1040f, {1, {0x10437}}}, - { 0x10410, {1, {0x10438}}}, - { 0x10411, {1, {0x10439}}}, - { 0x10412, {1, {0x1043a}}}, - { 0x10413, {1, {0x1043b}}}, - { 0x10414, {1, {0x1043c}}}, - { 0x10415, {1, {0x1043d}}}, - { 0x10416, {1, {0x1043e}}}, - { 0x10417, {1, {0x1043f}}}, - { 0x10418, {1, {0x10440}}}, - { 0x10419, {1, {0x10441}}}, - { 0x1041a, {1, {0x10442}}}, - { 0x1041b, {1, {0x10443}}}, - { 0x1041c, {1, {0x10444}}}, - { 0x1041d, {1, {0x10445}}}, - { 0x1041e, {1, {0x10446}}}, - { 0x1041f, {1, {0x10447}}}, - { 0x10420, {1, {0x10448}}}, - { 0x10421, {1, {0x10449}}}, - { 0x10422, {1, {0x1044a}}}, - { 0x10423, {1, {0x1044b}}}, - { 0x10424, {1, {0x1044c}}}, - { 0x10425, {1, {0x1044d}}}, - { 0x10426, {1, {0x1044e}}}, - { 0x10427, {1, {0x1044f}}} -}; - -static const CaseFold_11_Type CaseFold_Locale[] = { - { 0x0049, {1, {0x0069}}}, - { 0x0130, {2, {0x0069, 0x0307}}} -}; - -static const CaseUnfold_11_Type CaseUnfold_11[] = { - { 0x0061, {1, {0x0041 }}}, - { 0x0062, {1, {0x0042 }}}, - { 0x0063, {1, {0x0043 }}}, - { 0x0064, {1, {0x0044 }}}, - { 0x0065, {1, {0x0045 }}}, - { 0x0066, {1, {0x0046 }}}, - { 0x0067, {1, {0x0047 }}}, - { 0x0068, {1, {0x0048 }}}, - { 0x006a, {1, {0x004a }}}, - { 0x006b, {2, {0x212a, 0x004b }}}, - { 0x006c, {1, {0x004c }}}, - { 0x006d, {1, {0x004d }}}, - { 0x006e, {1, {0x004e }}}, - { 0x006f, {1, {0x004f }}}, - { 0x0070, {1, {0x0050 }}}, - { 0x0071, {1, {0x0051 }}}, - { 0x0072, {1, {0x0052 }}}, - { 0x0073, {2, {0x0053, 0x017f }}}, - { 0x0074, {1, {0x0054 }}}, - { 0x0075, {1, {0x0055 }}}, - { 0x0076, {1, {0x0056 }}}, - { 0x0077, {1, {0x0057 }}}, - { 0x0078, {1, {0x0058 }}}, - { 0x0079, {1, {0x0059 }}}, - { 0x007a, {1, {0x005a }}}, - { 0x00e0, {1, {0x00c0 }}}, - { 0x00e1, {1, {0x00c1 }}}, - { 0x00e2, {1, {0x00c2 }}}, - { 0x00e3, {1, {0x00c3 }}}, - { 0x00e4, {1, {0x00c4 }}}, - { 0x00e5, {2, {0x212b, 0x00c5 }}}, - { 0x00e6, {1, {0x00c6 }}}, - { 0x00e7, {1, {0x00c7 }}}, - { 0x00e8, {1, {0x00c8 }}}, - { 0x00e9, {1, {0x00c9 }}}, - { 0x00ea, {1, {0x00ca }}}, - { 0x00eb, {1, {0x00cb }}}, - { 0x00ec, {1, {0x00cc }}}, - { 0x00ed, {1, {0x00cd }}}, - { 0x00ee, {1, {0x00ce }}}, - { 0x00ef, {1, {0x00cf }}}, - { 0x00f0, {1, {0x00d0 }}}, - { 0x00f1, {1, {0x00d1 }}}, - { 0x00f2, {1, {0x00d2 }}}, - { 0x00f3, {1, {0x00d3 }}}, - { 0x00f4, {1, {0x00d4 }}}, - { 0x00f5, {1, {0x00d5 }}}, - { 0x00f6, {1, {0x00d6 }}}, - { 0x00f8, {1, {0x00d8 }}}, - { 0x00f9, {1, {0x00d9 }}}, - { 0x00fa, {1, {0x00da }}}, - { 0x00fb, {1, {0x00db }}}, - { 0x00fc, {1, {0x00dc }}}, - { 0x00fd, {1, {0x00dd }}}, - { 0x00fe, {1, {0x00de }}}, - { 0x00ff, {1, {0x0178 }}}, - { 0x0101, {1, {0x0100 }}}, - { 0x0103, {1, {0x0102 }}}, - { 0x0105, {1, {0x0104 }}}, - { 0x0107, {1, {0x0106 }}}, - { 0x0109, {1, {0x0108 }}}, - { 0x010b, {1, {0x010a }}}, - { 0x010d, {1, {0x010c }}}, - { 0x010f, {1, {0x010e }}}, - { 0x0111, {1, {0x0110 }}}, - { 0x0113, {1, {0x0112 }}}, - { 0x0115, {1, {0x0114 }}}, - { 0x0117, {1, {0x0116 }}}, - { 0x0119, {1, {0x0118 }}}, - { 0x011b, {1, {0x011a }}}, - { 0x011d, {1, {0x011c }}}, - { 0x011f, {1, {0x011e }}}, - { 0x0121, {1, {0x0120 }}}, - { 0x0123, {1, {0x0122 }}}, - { 0x0125, {1, {0x0124 }}}, - { 0x0127, {1, {0x0126 }}}, - { 0x0129, {1, {0x0128 }}}, - { 0x012b, {1, {0x012a }}}, - { 0x012d, {1, {0x012c }}}, - { 0x012f, {1, {0x012e }}}, - { 0x0133, {1, {0x0132 }}}, - { 0x0135, {1, {0x0134 }}}, - { 0x0137, {1, {0x0136 }}}, - { 0x013a, {1, {0x0139 }}}, - { 0x013c, {1, {0x013b }}}, - { 0x013e, {1, {0x013d }}}, - { 0x0140, {1, {0x013f }}}, - { 0x0142, {1, {0x0141 }}}, - { 0x0144, {1, {0x0143 }}}, - { 0x0146, {1, {0x0145 }}}, - { 0x0148, {1, {0x0147 }}}, - { 0x014b, {1, {0x014a }}}, - { 0x014d, {1, {0x014c }}}, - { 0x014f, {1, {0x014e }}}, - { 0x0151, {1, {0x0150 }}}, - { 0x0153, {1, {0x0152 }}}, - { 0x0155, {1, {0x0154 }}}, - { 0x0157, {1, {0x0156 }}}, - { 0x0159, {1, {0x0158 }}}, - { 0x015b, {1, {0x015a }}}, - { 0x015d, {1, {0x015c }}}, - { 0x015f, {1, {0x015e }}}, - { 0x0161, {1, {0x0160 }}}, - { 0x0163, {1, {0x0162 }}}, - { 0x0165, {1, {0x0164 }}}, - { 0x0167, {1, {0x0166 }}}, - { 0x0169, {1, {0x0168 }}}, - { 0x016b, {1, {0x016a }}}, - { 0x016d, {1, {0x016c }}}, - { 0x016f, {1, {0x016e }}}, - { 0x0171, {1, {0x0170 }}}, - { 0x0173, {1, {0x0172 }}}, - { 0x0175, {1, {0x0174 }}}, - { 0x0177, {1, {0x0176 }}}, - { 0x017a, {1, {0x0179 }}}, - { 0x017c, {1, {0x017b }}}, - { 0x017e, {1, {0x017d }}}, - { 0x0183, {1, {0x0182 }}}, - { 0x0185, {1, {0x0184 }}}, - { 0x0188, {1, {0x0187 }}}, - { 0x018c, {1, {0x018b }}}, - { 0x0192, {1, {0x0191 }}}, - { 0x0195, {1, {0x01f6 }}}, - { 0x0199, {1, {0x0198 }}}, - { 0x019a, {1, {0x023d }}}, - { 0x019e, {1, {0x0220 }}}, - { 0x01a1, {1, {0x01a0 }}}, - { 0x01a3, {1, {0x01a2 }}}, - { 0x01a5, {1, {0x01a4 }}}, - { 0x01a8, {1, {0x01a7 }}}, - { 0x01ad, {1, {0x01ac }}}, - { 0x01b0, {1, {0x01af }}}, - { 0x01b4, {1, {0x01b3 }}}, - { 0x01b6, {1, {0x01b5 }}}, - { 0x01b9, {1, {0x01b8 }}}, - { 0x01bd, {1, {0x01bc }}}, - { 0x01bf, {1, {0x01f7 }}}, - { 0x01c6, {2, {0x01c4, 0x01c5 }}}, - { 0x01c9, {2, {0x01c7, 0x01c8 }}}, - { 0x01cc, {2, {0x01ca, 0x01cb }}}, - { 0x01ce, {1, {0x01cd }}}, - { 0x01d0, {1, {0x01cf }}}, - { 0x01d2, {1, {0x01d1 }}}, - { 0x01d4, {1, {0x01d3 }}}, - { 0x01d6, {1, {0x01d5 }}}, - { 0x01d8, {1, {0x01d7 }}}, - { 0x01da, {1, {0x01d9 }}}, - { 0x01dc, {1, {0x01db }}}, - { 0x01dd, {1, {0x018e }}}, - { 0x01df, {1, {0x01de }}}, - { 0x01e1, {1, {0x01e0 }}}, - { 0x01e3, {1, {0x01e2 }}}, - { 0x01e5, {1, {0x01e4 }}}, - { 0x01e7, {1, {0x01e6 }}}, - { 0x01e9, {1, {0x01e8 }}}, - { 0x01eb, {1, {0x01ea }}}, - { 0x01ed, {1, {0x01ec }}}, - { 0x01ef, {1, {0x01ee }}}, - { 0x01f3, {2, {0x01f1, 0x01f2 }}}, - { 0x01f5, {1, {0x01f4 }}}, - { 0x01f9, {1, {0x01f8 }}}, - { 0x01fb, {1, {0x01fa }}}, - { 0x01fd, {1, {0x01fc }}}, - { 0x01ff, {1, {0x01fe }}}, - { 0x0201, {1, {0x0200 }}}, - { 0x0203, {1, {0x0202 }}}, - { 0x0205, {1, {0x0204 }}}, - { 0x0207, {1, {0x0206 }}}, - { 0x0209, {1, {0x0208 }}}, - { 0x020b, {1, {0x020a }}}, - { 0x020d, {1, {0x020c }}}, - { 0x020f, {1, {0x020e }}}, - { 0x0211, {1, {0x0210 }}}, - { 0x0213, {1, {0x0212 }}}, - { 0x0215, {1, {0x0214 }}}, - { 0x0217, {1, {0x0216 }}}, - { 0x0219, {1, {0x0218 }}}, - { 0x021b, {1, {0x021a }}}, - { 0x021d, {1, {0x021c }}}, - { 0x021f, {1, {0x021e }}}, - { 0x0223, {1, {0x0222 }}}, - { 0x0225, {1, {0x0224 }}}, - { 0x0227, {1, {0x0226 }}}, - { 0x0229, {1, {0x0228 }}}, - { 0x022b, {1, {0x022a }}}, - { 0x022d, {1, {0x022c }}}, - { 0x022f, {1, {0x022e }}}, - { 0x0231, {1, {0x0230 }}}, - { 0x0233, {1, {0x0232 }}}, - { 0x023c, {1, {0x023b }}}, - { 0x0253, {1, {0x0181 }}}, - { 0x0254, {1, {0x0186 }}}, - { 0x0256, {1, {0x0189 }}}, - { 0x0257, {1, {0x018a }}}, - { 0x0259, {1, {0x018f }}}, - { 0x025b, {1, {0x0190 }}}, - { 0x0260, {1, {0x0193 }}}, - { 0x0263, {1, {0x0194 }}}, - { 0x0268, {1, {0x0197 }}}, - { 0x0269, {1, {0x0196 }}}, - { 0x026f, {1, {0x019c }}}, - { 0x0272, {1, {0x019d }}}, - { 0x0275, {1, {0x019f }}}, - { 0x0280, {1, {0x01a6 }}}, - { 0x0283, {1, {0x01a9 }}}, - { 0x0288, {1, {0x01ae }}}, - { 0x028a, {1, {0x01b1 }}}, - { 0x028b, {1, {0x01b2 }}}, - { 0x0292, {1, {0x01b7 }}}, - { 0x0294, {1, {0x0241 }}}, - { 0x03ac, {1, {0x0386 }}}, - { 0x03ad, {1, {0x0388 }}}, - { 0x03ae, {1, {0x0389 }}}, - { 0x03af, {1, {0x038a }}}, - { 0x03b1, {1, {0x0391 }}}, - { 0x03b2, {2, {0x0392, 0x03d0 }}}, - { 0x03b3, {1, {0x0393 }}}, - { 0x03b4, {1, {0x0394 }}}, - { 0x03b5, {2, {0x03f5, 0x0395 }}}, - { 0x03b6, {1, {0x0396 }}}, - { 0x03b7, {1, {0x0397 }}}, - { 0x03b8, {3, {0x03f4, 0x0398, 0x03d1 }}}, - { 0x03b9, {3, {0x1fbe, 0x0399, 0x0345 }}}, - { 0x03ba, {2, {0x03f0, 0x039a }}}, - { 0x03bb, {1, {0x039b }}}, - { 0x03bc, {2, {0x00b5, 0x039c }}}, - { 0x03bd, {1, {0x039d }}}, - { 0x03be, {1, {0x039e }}}, - { 0x03bf, {1, {0x039f }}}, - { 0x03c0, {2, {0x03a0, 0x03d6 }}}, - { 0x03c1, {2, {0x03f1, 0x03a1 }}}, - { 0x03c3, {2, {0x03a3, 0x03c2 }}}, - { 0x03c4, {1, {0x03a4 }}}, - { 0x03c5, {1, {0x03a5 }}}, - { 0x03c6, {2, {0x03a6, 0x03d5 }}}, - { 0x03c7, {1, {0x03a7 }}}, - { 0x03c8, {1, {0x03a8 }}}, - { 0x03c9, {2, {0x03a9, 0x2126 }}}, - { 0x03ca, {1, {0x03aa }}}, - { 0x03cb, {1, {0x03ab }}}, - { 0x03cc, {1, {0x038c }}}, - { 0x03cd, {1, {0x038e }}}, - { 0x03ce, {1, {0x038f }}}, - { 0x03d9, {1, {0x03d8 }}}, - { 0x03db, {1, {0x03da }}}, - { 0x03dd, {1, {0x03dc }}}, - { 0x03df, {1, {0x03de }}}, - { 0x03e1, {1, {0x03e0 }}}, - { 0x03e3, {1, {0x03e2 }}}, - { 0x03e5, {1, {0x03e4 }}}, - { 0x03e7, {1, {0x03e6 }}}, - { 0x03e9, {1, {0x03e8 }}}, - { 0x03eb, {1, {0x03ea }}}, - { 0x03ed, {1, {0x03ec }}}, - { 0x03ef, {1, {0x03ee }}}, - { 0x03f2, {1, {0x03f9 }}}, - { 0x03f8, {1, {0x03f7 }}}, - { 0x03fb, {1, {0x03fa }}}, - { 0x0430, {1, {0x0410 }}}, - { 0x0431, {1, {0x0411 }}}, - { 0x0432, {1, {0x0412 }}}, - { 0x0433, {1, {0x0413 }}}, - { 0x0434, {1, {0x0414 }}}, - { 0x0435, {1, {0x0415 }}}, - { 0x0436, {1, {0x0416 }}}, - { 0x0437, {1, {0x0417 }}}, - { 0x0438, {1, {0x0418 }}}, - { 0x0439, {1, {0x0419 }}}, - { 0x043a, {1, {0x041a }}}, - { 0x043b, {1, {0x041b }}}, - { 0x043c, {1, {0x041c }}}, - { 0x043d, {1, {0x041d }}}, - { 0x043e, {1, {0x041e }}}, - { 0x043f, {1, {0x041f }}}, - { 0x0440, {1, {0x0420 }}}, - { 0x0441, {1, {0x0421 }}}, - { 0x0442, {1, {0x0422 }}}, - { 0x0443, {1, {0x0423 }}}, - { 0x0444, {1, {0x0424 }}}, - { 0x0445, {1, {0x0425 }}}, - { 0x0446, {1, {0x0426 }}}, - { 0x0447, {1, {0x0427 }}}, - { 0x0448, {1, {0x0428 }}}, - { 0x0449, {1, {0x0429 }}}, - { 0x044a, {1, {0x042a }}}, - { 0x044b, {1, {0x042b }}}, - { 0x044c, {1, {0x042c }}}, - { 0x044d, {1, {0x042d }}}, - { 0x044e, {1, {0x042e }}}, - { 0x044f, {1, {0x042f }}}, - { 0x0450, {1, {0x0400 }}}, - { 0x0451, {1, {0x0401 }}}, - { 0x0452, {1, {0x0402 }}}, - { 0x0453, {1, {0x0403 }}}, - { 0x0454, {1, {0x0404 }}}, - { 0x0455, {1, {0x0405 }}}, - { 0x0456, {1, {0x0406 }}}, - { 0x0457, {1, {0x0407 }}}, - { 0x0458, {1, {0x0408 }}}, - { 0x0459, {1, {0x0409 }}}, - { 0x045a, {1, {0x040a }}}, - { 0x045b, {1, {0x040b }}}, - { 0x045c, {1, {0x040c }}}, - { 0x045d, {1, {0x040d }}}, - { 0x045e, {1, {0x040e }}}, - { 0x045f, {1, {0x040f }}}, - { 0x0461, {1, {0x0460 }}}, - { 0x0463, {1, {0x0462 }}}, - { 0x0465, {1, {0x0464 }}}, - { 0x0467, {1, {0x0466 }}}, - { 0x0469, {1, {0x0468 }}}, - { 0x046b, {1, {0x046a }}}, - { 0x046d, {1, {0x046c }}}, - { 0x046f, {1, {0x046e }}}, - { 0x0471, {1, {0x0470 }}}, - { 0x0473, {1, {0x0472 }}}, - { 0x0475, {1, {0x0474 }}}, - { 0x0477, {1, {0x0476 }}}, - { 0x0479, {1, {0x0478 }}}, - { 0x047b, {1, {0x047a }}}, - { 0x047d, {1, {0x047c }}}, - { 0x047f, {1, {0x047e }}}, - { 0x0481, {1, {0x0480 }}}, - { 0x048b, {1, {0x048a }}}, - { 0x048d, {1, {0x048c }}}, - { 0x048f, {1, {0x048e }}}, - { 0x0491, {1, {0x0490 }}}, - { 0x0493, {1, {0x0492 }}}, - { 0x0495, {1, {0x0494 }}}, - { 0x0497, {1, {0x0496 }}}, - { 0x0499, {1, {0x0498 }}}, - { 0x049b, {1, {0x049a }}}, - { 0x049d, {1, {0x049c }}}, - { 0x049f, {1, {0x049e }}}, - { 0x04a1, {1, {0x04a0 }}}, - { 0x04a3, {1, {0x04a2 }}}, - { 0x04a5, {1, {0x04a4 }}}, - { 0x04a7, {1, {0x04a6 }}}, - { 0x04a9, {1, {0x04a8 }}}, - { 0x04ab, {1, {0x04aa }}}, - { 0x04ad, {1, {0x04ac }}}, - { 0x04af, {1, {0x04ae }}}, - { 0x04b1, {1, {0x04b0 }}}, - { 0x04b3, {1, {0x04b2 }}}, - { 0x04b5, {1, {0x04b4 }}}, - { 0x04b7, {1, {0x04b6 }}}, - { 0x04b9, {1, {0x04b8 }}}, - { 0x04bb, {1, {0x04ba }}}, - { 0x04bd, {1, {0x04bc }}}, - { 0x04bf, {1, {0x04be }}}, - { 0x04c2, {1, {0x04c1 }}}, - { 0x04c4, {1, {0x04c3 }}}, - { 0x04c6, {1, {0x04c5 }}}, - { 0x04c8, {1, {0x04c7 }}}, - { 0x04ca, {1, {0x04c9 }}}, - { 0x04cc, {1, {0x04cb }}}, - { 0x04ce, {1, {0x04cd }}}, - { 0x04d1, {1, {0x04d0 }}}, - { 0x04d3, {1, {0x04d2 }}}, - { 0x04d5, {1, {0x04d4 }}}, - { 0x04d7, {1, {0x04d6 }}}, - { 0x04d9, {1, {0x04d8 }}}, - { 0x04db, {1, {0x04da }}}, - { 0x04dd, {1, {0x04dc }}}, - { 0x04df, {1, {0x04de }}}, - { 0x04e1, {1, {0x04e0 }}}, - { 0x04e3, {1, {0x04e2 }}}, - { 0x04e5, {1, {0x04e4 }}}, - { 0x04e7, {1, {0x04e6 }}}, - { 0x04e9, {1, {0x04e8 }}}, - { 0x04eb, {1, {0x04ea }}}, - { 0x04ed, {1, {0x04ec }}}, - { 0x04ef, {1, {0x04ee }}}, - { 0x04f1, {1, {0x04f0 }}}, - { 0x04f3, {1, {0x04f2 }}}, - { 0x04f5, {1, {0x04f4 }}}, - { 0x04f7, {1, {0x04f6 }}}, - { 0x04f9, {1, {0x04f8 }}}, - { 0x0501, {1, {0x0500 }}}, - { 0x0503, {1, {0x0502 }}}, - { 0x0505, {1, {0x0504 }}}, - { 0x0507, {1, {0x0506 }}}, - { 0x0509, {1, {0x0508 }}}, - { 0x050b, {1, {0x050a }}}, - { 0x050d, {1, {0x050c }}}, - { 0x050f, {1, {0x050e }}}, - { 0x0561, {1, {0x0531 }}}, - { 0x0562, {1, {0x0532 }}}, - { 0x0563, {1, {0x0533 }}}, - { 0x0564, {1, {0x0534 }}}, - { 0x0565, {1, {0x0535 }}}, - { 0x0566, {1, {0x0536 }}}, - { 0x0567, {1, {0x0537 }}}, - { 0x0568, {1, {0x0538 }}}, - { 0x0569, {1, {0x0539 }}}, - { 0x056a, {1, {0x053a }}}, - { 0x056b, {1, {0x053b }}}, - { 0x056c, {1, {0x053c }}}, - { 0x056d, {1, {0x053d }}}, - { 0x056e, {1, {0x053e }}}, - { 0x056f, {1, {0x053f }}}, - { 0x0570, {1, {0x0540 }}}, - { 0x0571, {1, {0x0541 }}}, - { 0x0572, {1, {0x0542 }}}, - { 0x0573, {1, {0x0543 }}}, - { 0x0574, {1, {0x0544 }}}, - { 0x0575, {1, {0x0545 }}}, - { 0x0576, {1, {0x0546 }}}, - { 0x0577, {1, {0x0547 }}}, - { 0x0578, {1, {0x0548 }}}, - { 0x0579, {1, {0x0549 }}}, - { 0x057a, {1, {0x054a }}}, - { 0x057b, {1, {0x054b }}}, - { 0x057c, {1, {0x054c }}}, - { 0x057d, {1, {0x054d }}}, - { 0x057e, {1, {0x054e }}}, - { 0x057f, {1, {0x054f }}}, - { 0x0580, {1, {0x0550 }}}, - { 0x0581, {1, {0x0551 }}}, - { 0x0582, {1, {0x0552 }}}, - { 0x0583, {1, {0x0553 }}}, - { 0x0584, {1, {0x0554 }}}, - { 0x0585, {1, {0x0555 }}}, - { 0x0586, {1, {0x0556 }}}, - { 0x1e01, {1, {0x1e00 }}}, - { 0x1e03, {1, {0x1e02 }}}, - { 0x1e05, {1, {0x1e04 }}}, - { 0x1e07, {1, {0x1e06 }}}, - { 0x1e09, {1, {0x1e08 }}}, - { 0x1e0b, {1, {0x1e0a }}}, - { 0x1e0d, {1, {0x1e0c }}}, - { 0x1e0f, {1, {0x1e0e }}}, - { 0x1e11, {1, {0x1e10 }}}, - { 0x1e13, {1, {0x1e12 }}}, - { 0x1e15, {1, {0x1e14 }}}, - { 0x1e17, {1, {0x1e16 }}}, - { 0x1e19, {1, {0x1e18 }}}, - { 0x1e1b, {1, {0x1e1a }}}, - { 0x1e1d, {1, {0x1e1c }}}, - { 0x1e1f, {1, {0x1e1e }}}, - { 0x1e21, {1, {0x1e20 }}}, - { 0x1e23, {1, {0x1e22 }}}, - { 0x1e25, {1, {0x1e24 }}}, - { 0x1e27, {1, {0x1e26 }}}, - { 0x1e29, {1, {0x1e28 }}}, - { 0x1e2b, {1, {0x1e2a }}}, - { 0x1e2d, {1, {0x1e2c }}}, - { 0x1e2f, {1, {0x1e2e }}}, - { 0x1e31, {1, {0x1e30 }}}, - { 0x1e33, {1, {0x1e32 }}}, - { 0x1e35, {1, {0x1e34 }}}, - { 0x1e37, {1, {0x1e36 }}}, - { 0x1e39, {1, {0x1e38 }}}, - { 0x1e3b, {1, {0x1e3a }}}, - { 0x1e3d, {1, {0x1e3c }}}, - { 0x1e3f, {1, {0x1e3e }}}, - { 0x1e41, {1, {0x1e40 }}}, - { 0x1e43, {1, {0x1e42 }}}, - { 0x1e45, {1, {0x1e44 }}}, - { 0x1e47, {1, {0x1e46 }}}, - { 0x1e49, {1, {0x1e48 }}}, - { 0x1e4b, {1, {0x1e4a }}}, - { 0x1e4d, {1, {0x1e4c }}}, - { 0x1e4f, {1, {0x1e4e }}}, - { 0x1e51, {1, {0x1e50 }}}, - { 0x1e53, {1, {0x1e52 }}}, - { 0x1e55, {1, {0x1e54 }}}, - { 0x1e57, {1, {0x1e56 }}}, - { 0x1e59, {1, {0x1e58 }}}, - { 0x1e5b, {1, {0x1e5a }}}, - { 0x1e5d, {1, {0x1e5c }}}, - { 0x1e5f, {1, {0x1e5e }}}, - { 0x1e61, {2, {0x1e9b, 0x1e60 }}}, - { 0x1e63, {1, {0x1e62 }}}, - { 0x1e65, {1, {0x1e64 }}}, - { 0x1e67, {1, {0x1e66 }}}, - { 0x1e69, {1, {0x1e68 }}}, - { 0x1e6b, {1, {0x1e6a }}}, - { 0x1e6d, {1, {0x1e6c }}}, - { 0x1e6f, {1, {0x1e6e }}}, - { 0x1e71, {1, {0x1e70 }}}, - { 0x1e73, {1, {0x1e72 }}}, - { 0x1e75, {1, {0x1e74 }}}, - { 0x1e77, {1, {0x1e76 }}}, - { 0x1e79, {1, {0x1e78 }}}, - { 0x1e7b, {1, {0x1e7a }}}, - { 0x1e7d, {1, {0x1e7c }}}, - { 0x1e7f, {1, {0x1e7e }}}, - { 0x1e81, {1, {0x1e80 }}}, - { 0x1e83, {1, {0x1e82 }}}, - { 0x1e85, {1, {0x1e84 }}}, - { 0x1e87, {1, {0x1e86 }}}, - { 0x1e89, {1, {0x1e88 }}}, - { 0x1e8b, {1, {0x1e8a }}}, - { 0x1e8d, {1, {0x1e8c }}}, - { 0x1e8f, {1, {0x1e8e }}}, - { 0x1e91, {1, {0x1e90 }}}, - { 0x1e93, {1, {0x1e92 }}}, - { 0x1e95, {1, {0x1e94 }}}, - { 0x1ea1, {1, {0x1ea0 }}}, - { 0x1ea3, {1, {0x1ea2 }}}, - { 0x1ea5, {1, {0x1ea4 }}}, - { 0x1ea7, {1, {0x1ea6 }}}, - { 0x1ea9, {1, {0x1ea8 }}}, - { 0x1eab, {1, {0x1eaa }}}, - { 0x1ead, {1, {0x1eac }}}, - { 0x1eaf, {1, {0x1eae }}}, - { 0x1eb1, {1, {0x1eb0 }}}, - { 0x1eb3, {1, {0x1eb2 }}}, - { 0x1eb5, {1, {0x1eb4 }}}, - { 0x1eb7, {1, {0x1eb6 }}}, - { 0x1eb9, {1, {0x1eb8 }}}, - { 0x1ebb, {1, {0x1eba }}}, - { 0x1ebd, {1, {0x1ebc }}}, - { 0x1ebf, {1, {0x1ebe }}}, - { 0x1ec1, {1, {0x1ec0 }}}, - { 0x1ec3, {1, {0x1ec2 }}}, - { 0x1ec5, {1, {0x1ec4 }}}, - { 0x1ec7, {1, {0x1ec6 }}}, - { 0x1ec9, {1, {0x1ec8 }}}, - { 0x1ecb, {1, {0x1eca }}}, - { 0x1ecd, {1, {0x1ecc }}}, - { 0x1ecf, {1, {0x1ece }}}, - { 0x1ed1, {1, {0x1ed0 }}}, - { 0x1ed3, {1, {0x1ed2 }}}, - { 0x1ed5, {1, {0x1ed4 }}}, - { 0x1ed7, {1, {0x1ed6 }}}, - { 0x1ed9, {1, {0x1ed8 }}}, - { 0x1edb, {1, {0x1eda }}}, - { 0x1edd, {1, {0x1edc }}}, - { 0x1edf, {1, {0x1ede }}}, - { 0x1ee1, {1, {0x1ee0 }}}, - { 0x1ee3, {1, {0x1ee2 }}}, - { 0x1ee5, {1, {0x1ee4 }}}, - { 0x1ee7, {1, {0x1ee6 }}}, - { 0x1ee9, {1, {0x1ee8 }}}, - { 0x1eeb, {1, {0x1eea }}}, - { 0x1eed, {1, {0x1eec }}}, - { 0x1eef, {1, {0x1eee }}}, - { 0x1ef1, {1, {0x1ef0 }}}, - { 0x1ef3, {1, {0x1ef2 }}}, - { 0x1ef5, {1, {0x1ef4 }}}, - { 0x1ef7, {1, {0x1ef6 }}}, - { 0x1ef9, {1, {0x1ef8 }}}, - { 0x1f00, {1, {0x1f08 }}}, - { 0x1f01, {1, {0x1f09 }}}, - { 0x1f02, {1, {0x1f0a }}}, - { 0x1f03, {1, {0x1f0b }}}, - { 0x1f04, {1, {0x1f0c }}}, - { 0x1f05, {1, {0x1f0d }}}, - { 0x1f06, {1, {0x1f0e }}}, - { 0x1f07, {1, {0x1f0f }}}, - { 0x1f10, {1, {0x1f18 }}}, - { 0x1f11, {1, {0x1f19 }}}, - { 0x1f12, {1, {0x1f1a }}}, - { 0x1f13, {1, {0x1f1b }}}, - { 0x1f14, {1, {0x1f1c }}}, - { 0x1f15, {1, {0x1f1d }}}, - { 0x1f20, {1, {0x1f28 }}}, - { 0x1f21, {1, {0x1f29 }}}, - { 0x1f22, {1, {0x1f2a }}}, - { 0x1f23, {1, {0x1f2b }}}, - { 0x1f24, {1, {0x1f2c }}}, - { 0x1f25, {1, {0x1f2d }}}, - { 0x1f26, {1, {0x1f2e }}}, - { 0x1f27, {1, {0x1f2f }}}, - { 0x1f30, {1, {0x1f38 }}}, - { 0x1f31, {1, {0x1f39 }}}, - { 0x1f32, {1, {0x1f3a }}}, - { 0x1f33, {1, {0x1f3b }}}, - { 0x1f34, {1, {0x1f3c }}}, - { 0x1f35, {1, {0x1f3d }}}, - { 0x1f36, {1, {0x1f3e }}}, - { 0x1f37, {1, {0x1f3f }}}, - { 0x1f40, {1, {0x1f48 }}}, - { 0x1f41, {1, {0x1f49 }}}, - { 0x1f42, {1, {0x1f4a }}}, - { 0x1f43, {1, {0x1f4b }}}, - { 0x1f44, {1, {0x1f4c }}}, - { 0x1f45, {1, {0x1f4d }}}, - { 0x1f51, {1, {0x1f59 }}}, - { 0x1f53, {1, {0x1f5b }}}, - { 0x1f55, {1, {0x1f5d }}}, - { 0x1f57, {1, {0x1f5f }}}, - { 0x1f60, {1, {0x1f68 }}}, - { 0x1f61, {1, {0x1f69 }}}, - { 0x1f62, {1, {0x1f6a }}}, - { 0x1f63, {1, {0x1f6b }}}, - { 0x1f64, {1, {0x1f6c }}}, - { 0x1f65, {1, {0x1f6d }}}, - { 0x1f66, {1, {0x1f6e }}}, - { 0x1f67, {1, {0x1f6f }}}, - { 0x1f70, {1, {0x1fba }}}, - { 0x1f71, {1, {0x1fbb }}}, - { 0x1f72, {1, {0x1fc8 }}}, - { 0x1f73, {1, {0x1fc9 }}}, - { 0x1f74, {1, {0x1fca }}}, - { 0x1f75, {1, {0x1fcb }}}, - { 0x1f76, {1, {0x1fda }}}, - { 0x1f77, {1, {0x1fdb }}}, - { 0x1f78, {1, {0x1ff8 }}}, - { 0x1f79, {1, {0x1ff9 }}}, - { 0x1f7a, {1, {0x1fea }}}, - { 0x1f7b, {1, {0x1feb }}}, - { 0x1f7c, {1, {0x1ffa }}}, - { 0x1f7d, {1, {0x1ffb }}}, - { 0x1fb0, {1, {0x1fb8 }}}, - { 0x1fb1, {1, {0x1fb9 }}}, - { 0x1fd0, {1, {0x1fd8 }}}, - { 0x1fd1, {1, {0x1fd9 }}}, - { 0x1fe0, {1, {0x1fe8 }}}, - { 0x1fe1, {1, {0x1fe9 }}}, - { 0x1fe5, {1, {0x1fec }}}, - { 0x2170, {1, {0x2160 }}}, - { 0x2171, {1, {0x2161 }}}, - { 0x2172, {1, {0x2162 }}}, - { 0x2173, {1, {0x2163 }}}, - { 0x2174, {1, {0x2164 }}}, - { 0x2175, {1, {0x2165 }}}, - { 0x2176, {1, {0x2166 }}}, - { 0x2177, {1, {0x2167 }}}, - { 0x2178, {1, {0x2168 }}}, - { 0x2179, {1, {0x2169 }}}, - { 0x217a, {1, {0x216a }}}, - { 0x217b, {1, {0x216b }}}, - { 0x217c, {1, {0x216c }}}, - { 0x217d, {1, {0x216d }}}, - { 0x217e, {1, {0x216e }}}, - { 0x217f, {1, {0x216f }}}, - { 0x24d0, {1, {0x24b6 }}}, - { 0x24d1, {1, {0x24b7 }}}, - { 0x24d2, {1, {0x24b8 }}}, - { 0x24d3, {1, {0x24b9 }}}, - { 0x24d4, {1, {0x24ba }}}, - { 0x24d5, {1, {0x24bb }}}, - { 0x24d6, {1, {0x24bc }}}, - { 0x24d7, {1, {0x24bd }}}, - { 0x24d8, {1, {0x24be }}}, - { 0x24d9, {1, {0x24bf }}}, - { 0x24da, {1, {0x24c0 }}}, - { 0x24db, {1, {0x24c1 }}}, - { 0x24dc, {1, {0x24c2 }}}, - { 0x24dd, {1, {0x24c3 }}}, - { 0x24de, {1, {0x24c4 }}}, - { 0x24df, {1, {0x24c5 }}}, - { 0x24e0, {1, {0x24c6 }}}, - { 0x24e1, {1, {0x24c7 }}}, - { 0x24e2, {1, {0x24c8 }}}, - { 0x24e3, {1, {0x24c9 }}}, - { 0x24e4, {1, {0x24ca }}}, - { 0x24e5, {1, {0x24cb }}}, - { 0x24e6, {1, {0x24cc }}}, - { 0x24e7, {1, {0x24cd }}}, - { 0x24e8, {1, {0x24ce }}}, - { 0x24e9, {1, {0x24cf }}}, - { 0x2c30, {1, {0x2c00 }}}, - { 0x2c31, {1, {0x2c01 }}}, - { 0x2c32, {1, {0x2c02 }}}, - { 0x2c33, {1, {0x2c03 }}}, - { 0x2c34, {1, {0x2c04 }}}, - { 0x2c35, {1, {0x2c05 }}}, - { 0x2c36, {1, {0x2c06 }}}, - { 0x2c37, {1, {0x2c07 }}}, - { 0x2c38, {1, {0x2c08 }}}, - { 0x2c39, {1, {0x2c09 }}}, - { 0x2c3a, {1, {0x2c0a }}}, - { 0x2c3b, {1, {0x2c0b }}}, - { 0x2c3c, {1, {0x2c0c }}}, - { 0x2c3d, {1, {0x2c0d }}}, - { 0x2c3e, {1, {0x2c0e }}}, - { 0x2c3f, {1, {0x2c0f }}}, - { 0x2c40, {1, {0x2c10 }}}, - { 0x2c41, {1, {0x2c11 }}}, - { 0x2c42, {1, {0x2c12 }}}, - { 0x2c43, {1, {0x2c13 }}}, - { 0x2c44, {1, {0x2c14 }}}, - { 0x2c45, {1, {0x2c15 }}}, - { 0x2c46, {1, {0x2c16 }}}, - { 0x2c47, {1, {0x2c17 }}}, - { 0x2c48, {1, {0x2c18 }}}, - { 0x2c49, {1, {0x2c19 }}}, - { 0x2c4a, {1, {0x2c1a }}}, - { 0x2c4b, {1, {0x2c1b }}}, - { 0x2c4c, {1, {0x2c1c }}}, - { 0x2c4d, {1, {0x2c1d }}}, - { 0x2c4e, {1, {0x2c1e }}}, - { 0x2c4f, {1, {0x2c1f }}}, - { 0x2c50, {1, {0x2c20 }}}, - { 0x2c51, {1, {0x2c21 }}}, - { 0x2c52, {1, {0x2c22 }}}, - { 0x2c53, {1, {0x2c23 }}}, - { 0x2c54, {1, {0x2c24 }}}, - { 0x2c55, {1, {0x2c25 }}}, - { 0x2c56, {1, {0x2c26 }}}, - { 0x2c57, {1, {0x2c27 }}}, - { 0x2c58, {1, {0x2c28 }}}, - { 0x2c59, {1, {0x2c29 }}}, - { 0x2c5a, {1, {0x2c2a }}}, - { 0x2c5b, {1, {0x2c2b }}}, - { 0x2c5c, {1, {0x2c2c }}}, - { 0x2c5d, {1, {0x2c2d }}}, - { 0x2c5e, {1, {0x2c2e }}}, - { 0x2c81, {1, {0x2c80 }}}, - { 0x2c83, {1, {0x2c82 }}}, - { 0x2c85, {1, {0x2c84 }}}, - { 0x2c87, {1, {0x2c86 }}}, - { 0x2c89, {1, {0x2c88 }}}, - { 0x2c8b, {1, {0x2c8a }}}, - { 0x2c8d, {1, {0x2c8c }}}, - { 0x2c8f, {1, {0x2c8e }}}, - { 0x2c91, {1, {0x2c90 }}}, - { 0x2c93, {1, {0x2c92 }}}, - { 0x2c95, {1, {0x2c94 }}}, - { 0x2c97, {1, {0x2c96 }}}, - { 0x2c99, {1, {0x2c98 }}}, - { 0x2c9b, {1, {0x2c9a }}}, - { 0x2c9d, {1, {0x2c9c }}}, - { 0x2c9f, {1, {0x2c9e }}}, - { 0x2ca1, {1, {0x2ca0 }}}, - { 0x2ca3, {1, {0x2ca2 }}}, - { 0x2ca5, {1, {0x2ca4 }}}, - { 0x2ca7, {1, {0x2ca6 }}}, - { 0x2ca9, {1, {0x2ca8 }}}, - { 0x2cab, {1, {0x2caa }}}, - { 0x2cad, {1, {0x2cac }}}, - { 0x2caf, {1, {0x2cae }}}, - { 0x2cb1, {1, {0x2cb0 }}}, - { 0x2cb3, {1, {0x2cb2 }}}, - { 0x2cb5, {1, {0x2cb4 }}}, - { 0x2cb7, {1, {0x2cb6 }}}, - { 0x2cb9, {1, {0x2cb8 }}}, - { 0x2cbb, {1, {0x2cba }}}, - { 0x2cbd, {1, {0x2cbc }}}, - { 0x2cbf, {1, {0x2cbe }}}, - { 0x2cc1, {1, {0x2cc0 }}}, - { 0x2cc3, {1, {0x2cc2 }}}, - { 0x2cc5, {1, {0x2cc4 }}}, - { 0x2cc7, {1, {0x2cc6 }}}, - { 0x2cc9, {1, {0x2cc8 }}}, - { 0x2ccb, {1, {0x2cca }}}, - { 0x2ccd, {1, {0x2ccc }}}, - { 0x2ccf, {1, {0x2cce }}}, - { 0x2cd1, {1, {0x2cd0 }}}, - { 0x2cd3, {1, {0x2cd2 }}}, - { 0x2cd5, {1, {0x2cd4 }}}, - { 0x2cd7, {1, {0x2cd6 }}}, - { 0x2cd9, {1, {0x2cd8 }}}, - { 0x2cdb, {1, {0x2cda }}}, - { 0x2cdd, {1, {0x2cdc }}}, - { 0x2cdf, {1, {0x2cde }}}, - { 0x2ce1, {1, {0x2ce0 }}}, - { 0x2ce3, {1, {0x2ce2 }}}, - { 0x2d00, {1, {0x10a0 }}}, - { 0x2d01, {1, {0x10a1 }}}, - { 0x2d02, {1, {0x10a2 }}}, - { 0x2d03, {1, {0x10a3 }}}, - { 0x2d04, {1, {0x10a4 }}}, - { 0x2d05, {1, {0x10a5 }}}, - { 0x2d06, {1, {0x10a6 }}}, - { 0x2d07, {1, {0x10a7 }}}, - { 0x2d08, {1, {0x10a8 }}}, - { 0x2d09, {1, {0x10a9 }}}, - { 0x2d0a, {1, {0x10aa }}}, - { 0x2d0b, {1, {0x10ab }}}, - { 0x2d0c, {1, {0x10ac }}}, - { 0x2d0d, {1, {0x10ad }}}, - { 0x2d0e, {1, {0x10ae }}}, - { 0x2d0f, {1, {0x10af }}}, - { 0x2d10, {1, {0x10b0 }}}, - { 0x2d11, {1, {0x10b1 }}}, - { 0x2d12, {1, {0x10b2 }}}, - { 0x2d13, {1, {0x10b3 }}}, - { 0x2d14, {1, {0x10b4 }}}, - { 0x2d15, {1, {0x10b5 }}}, - { 0x2d16, {1, {0x10b6 }}}, - { 0x2d17, {1, {0x10b7 }}}, - { 0x2d18, {1, {0x10b8 }}}, - { 0x2d19, {1, {0x10b9 }}}, - { 0x2d1a, {1, {0x10ba }}}, - { 0x2d1b, {1, {0x10bb }}}, - { 0x2d1c, {1, {0x10bc }}}, - { 0x2d1d, {1, {0x10bd }}}, - { 0x2d1e, {1, {0x10be }}}, - { 0x2d1f, {1, {0x10bf }}}, - { 0x2d20, {1, {0x10c0 }}}, - { 0x2d21, {1, {0x10c1 }}}, - { 0x2d22, {1, {0x10c2 }}}, - { 0x2d23, {1, {0x10c3 }}}, - { 0x2d24, {1, {0x10c4 }}}, - { 0x2d25, {1, {0x10c5 }}}, - { 0xff41, {1, {0xff21 }}}, - { 0xff42, {1, {0xff22 }}}, - { 0xff43, {1, {0xff23 }}}, - { 0xff44, {1, {0xff24 }}}, - { 0xff45, {1, {0xff25 }}}, - { 0xff46, {1, {0xff26 }}}, - { 0xff47, {1, {0xff27 }}}, - { 0xff48, {1, {0xff28 }}}, - { 0xff49, {1, {0xff29 }}}, - { 0xff4a, {1, {0xff2a }}}, - { 0xff4b, {1, {0xff2b }}}, - { 0xff4c, {1, {0xff2c }}}, - { 0xff4d, {1, {0xff2d }}}, - { 0xff4e, {1, {0xff2e }}}, - { 0xff4f, {1, {0xff2f }}}, - { 0xff50, {1, {0xff30 }}}, - { 0xff51, {1, {0xff31 }}}, - { 0xff52, {1, {0xff32 }}}, - { 0xff53, {1, {0xff33 }}}, - { 0xff54, {1, {0xff34 }}}, - { 0xff55, {1, {0xff35 }}}, - { 0xff56, {1, {0xff36 }}}, - { 0xff57, {1, {0xff37 }}}, - { 0xff58, {1, {0xff38 }}}, - { 0xff59, {1, {0xff39 }}}, - { 0xff5a, {1, {0xff3a }}}, - { 0x10428, {1, {0x10400 }}}, - { 0x10429, {1, {0x10401 }}}, - { 0x1042a, {1, {0x10402 }}}, - { 0x1042b, {1, {0x10403 }}}, - { 0x1042c, {1, {0x10404 }}}, - { 0x1042d, {1, {0x10405 }}}, - { 0x1042e, {1, {0x10406 }}}, - { 0x1042f, {1, {0x10407 }}}, - { 0x10430, {1, {0x10408 }}}, - { 0x10431, {1, {0x10409 }}}, - { 0x10432, {1, {0x1040a }}}, - { 0x10433, {1, {0x1040b }}}, - { 0x10434, {1, {0x1040c }}}, - { 0x10435, {1, {0x1040d }}}, - { 0x10436, {1, {0x1040e }}}, - { 0x10437, {1, {0x1040f }}}, - { 0x10438, {1, {0x10410 }}}, - { 0x10439, {1, {0x10411 }}}, - { 0x1043a, {1, {0x10412 }}}, - { 0x1043b, {1, {0x10413 }}}, - { 0x1043c, {1, {0x10414 }}}, - { 0x1043d, {1, {0x10415 }}}, - { 0x1043e, {1, {0x10416 }}}, - { 0x1043f, {1, {0x10417 }}}, - { 0x10440, {1, {0x10418 }}}, - { 0x10441, {1, {0x10419 }}}, - { 0x10442, {1, {0x1041a }}}, - { 0x10443, {1, {0x1041b }}}, - { 0x10444, {1, {0x1041c }}}, - { 0x10445, {1, {0x1041d }}}, - { 0x10446, {1, {0x1041e }}}, - { 0x10447, {1, {0x1041f }}}, - { 0x10448, {1, {0x10420 }}}, - { 0x10449, {1, {0x10421 }}}, - { 0x1044a, {1, {0x10422 }}}, - { 0x1044b, {1, {0x10423 }}}, - { 0x1044c, {1, {0x10424 }}}, - { 0x1044d, {1, {0x10425 }}}, - { 0x1044e, {1, {0x10426 }}}, - { 0x1044f, {1, {0x10427 }}} -}; - -static const CaseUnfold_11_Type CaseUnfold_11_Locale[] = { - { 0x0069, {1, {0x0049 }}} -}; - -static const CaseUnfold_12_Type CaseUnfold_12[] = { - { {0x0061, 0x02be}, {1, {0x1e9a }}}, - { {0x0066, 0x0066}, {1, {0xfb00 }}}, - { {0x0066, 0x0069}, {1, {0xfb01 }}}, - { {0x0066, 0x006c}, {1, {0xfb02 }}}, - { {0x0068, 0x0331}, {1, {0x1e96 }}}, - { {0x006a, 0x030c}, {1, {0x01f0 }}}, - { {0x0073, 0x0073}, {1, {0x00df }}}, - { {0x0073, 0x0074}, {2, {0xfb05, 0xfb06 }}}, - { {0x0074, 0x0308}, {1, {0x1e97 }}}, - { {0x0077, 0x030a}, {1, {0x1e98 }}}, - { {0x0079, 0x030a}, {1, {0x1e99 }}}, - { {0x02bc, 0x006e}, {1, {0x0149 }}}, - { {0x03ac, 0x03b9}, {1, {0x1fb4 }}}, - { {0x03ae, 0x03b9}, {1, {0x1fc4 }}}, - { {0x03b1, 0x0342}, {1, {0x1fb6 }}}, - { {0x03b1, 0x03b9}, {2, {0x1fb3, 0x1fbc }}}, - { {0x03b7, 0x0342}, {1, {0x1fc6 }}}, - { {0x03b7, 0x03b9}, {2, {0x1fc3, 0x1fcc }}}, - { {0x03b9, 0x0342}, {1, {0x1fd6 }}}, - { {0x03c1, 0x0313}, {1, {0x1fe4 }}}, - { {0x03c5, 0x0313}, {1, {0x1f50 }}}, - { {0x03c5, 0x0342}, {1, {0x1fe6 }}}, - { {0x03c9, 0x0342}, {1, {0x1ff6 }}}, - { {0x03c9, 0x03b9}, {2, {0x1ff3, 0x1ffc }}}, - { {0x03ce, 0x03b9}, {1, {0x1ff4 }}}, - { {0x0565, 0x0582}, {1, {0x0587 }}}, - { {0x0574, 0x0565}, {1, {0xfb14 }}}, - { {0x0574, 0x056b}, {1, {0xfb15 }}}, - { {0x0574, 0x056d}, {1, {0xfb17 }}}, - { {0x0574, 0x0576}, {1, {0xfb13 }}}, - { {0x057e, 0x0576}, {1, {0xfb16 }}}, - { {0x1f00, 0x03b9}, {2, {0x1f88, 0x1f80 }}}, - { {0x1f01, 0x03b9}, {2, {0x1f81, 0x1f89 }}}, - { {0x1f02, 0x03b9}, {2, {0x1f82, 0x1f8a }}}, - { {0x1f03, 0x03b9}, {2, {0x1f83, 0x1f8b }}}, - { {0x1f04, 0x03b9}, {2, {0x1f84, 0x1f8c }}}, - { {0x1f05, 0x03b9}, {2, {0x1f85, 0x1f8d }}}, - { {0x1f06, 0x03b9}, {2, {0x1f86, 0x1f8e }}}, - { {0x1f07, 0x03b9}, {2, {0x1f87, 0x1f8f }}}, - { {0x1f20, 0x03b9}, {2, {0x1f90, 0x1f98 }}}, - { {0x1f21, 0x03b9}, {2, {0x1f91, 0x1f99 }}}, - { {0x1f22, 0x03b9}, {2, {0x1f92, 0x1f9a }}}, - { {0x1f23, 0x03b9}, {2, {0x1f93, 0x1f9b }}}, - { {0x1f24, 0x03b9}, {2, {0x1f94, 0x1f9c }}}, - { {0x1f25, 0x03b9}, {2, {0x1f95, 0x1f9d }}}, - { {0x1f26, 0x03b9}, {2, {0x1f96, 0x1f9e }}}, - { {0x1f27, 0x03b9}, {2, {0x1f97, 0x1f9f }}}, - { {0x1f60, 0x03b9}, {2, {0x1fa0, 0x1fa8 }}}, - { {0x1f61, 0x03b9}, {2, {0x1fa1, 0x1fa9 }}}, - { {0x1f62, 0x03b9}, {2, {0x1fa2, 0x1faa }}}, - { {0x1f63, 0x03b9}, {2, {0x1fa3, 0x1fab }}}, - { {0x1f64, 0x03b9}, {2, {0x1fa4, 0x1fac }}}, - { {0x1f65, 0x03b9}, {2, {0x1fa5, 0x1fad }}}, - { {0x1f66, 0x03b9}, {2, {0x1fa6, 0x1fae }}}, - { {0x1f67, 0x03b9}, {2, {0x1fa7, 0x1faf }}}, - { {0x1f70, 0x03b9}, {1, {0x1fb2 }}}, - { {0x1f74, 0x03b9}, {1, {0x1fc2 }}}, - { {0x1f7c, 0x03b9}, {1, {0x1ff2 }}} -}; - -static const CaseUnfold_12_Type CaseUnfold_12_Locale[] = { - { {0x0069, 0x0307}, {1, {0x0130 }}} -}; - -static const CaseUnfold_13_Type CaseUnfold_13[] = { - { {0x0066, 0x0066, 0x0069}, {1, {0xfb03 }}}, - { {0x0066, 0x0066, 0x006c}, {1, {0xfb04 }}}, - { {0x03b1, 0x0342, 0x03b9}, {1, {0x1fb7 }}}, - { {0x03b7, 0x0342, 0x03b9}, {1, {0x1fc7 }}}, - { {0x03b9, 0x0308, 0x0300}, {1, {0x1fd2 }}}, - { {0x03b9, 0x0308, 0x0301}, {2, {0x0390, 0x1fd3 }}}, - { {0x03b9, 0x0308, 0x0342}, {1, {0x1fd7 }}}, - { {0x03c5, 0x0308, 0x0300}, {1, {0x1fe2 }}}, - { {0x03c5, 0x0308, 0x0301}, {2, {0x03b0, 0x1fe3 }}}, - { {0x03c5, 0x0308, 0x0342}, {1, {0x1fe7 }}}, - { {0x03c5, 0x0313, 0x0300}, {1, {0x1f52 }}}, - { {0x03c5, 0x0313, 0x0301}, {1, {0x1f54 }}}, - { {0x03c5, 0x0313, 0x0342}, {1, {0x1f56 }}}, - { {0x03c9, 0x0342, 0x03b9}, {1, {0x1ff7 }}} -}; - - -static PosixBracketEntryType HashEntryData[] = { - { (UChar* )"NEWLINE", 0, 7 }, - { (UChar* )"Alpha", 1, 5 }, - { (UChar* )"Blank", 2, 5 }, - { (UChar* )"Cntrl", 3, 5 }, - { (UChar* )"Digit", 4, 5 }, - { (UChar* )"Graph", 5, 5 }, - { (UChar* )"Lower", 6, 5 }, - { (UChar* )"Print", 7, 5 }, - { (UChar* )"Punct", 8, 5 }, - { (UChar* )"Space", 9, 5 }, - { (UChar* )"Upper", 10, 5 }, - { (UChar* )"XDigit", 11, 6 }, - { (UChar* )"Word", 12, 4 }, - { (UChar* )"Alnum", 13, 5 }, - { (UChar* )"ASCII", 14, 5 }, - -#ifdef USE_UNICODE_PROPERTIES - { (UChar* )"Any", 15, 3 }, - { (UChar* )"Assigned", 16, 8 }, - { (UChar* )"C", 17, 1 }, - { (UChar* )"Cc", 18, 2 }, - { (UChar* )"Cf", 19, 2 }, - { (UChar* )"Cn", 20, 2 }, - { (UChar* )"Co", 21, 2 }, - { (UChar* )"Cs", 22, 2 }, - { (UChar* )"L", 23, 1 }, - { (UChar* )"Ll", 24, 2 }, - { (UChar* )"Lm", 25, 2 }, - { (UChar* )"Lo", 26, 2 }, - { (UChar* )"Lt", 27, 2 }, - { (UChar* )"Lu", 28, 2 }, - { (UChar* )"M", 29, 1 }, - { (UChar* )"Mc", 30, 2 }, - { (UChar* )"Me", 31, 2 }, - { (UChar* )"Mn", 32, 2 }, - { (UChar* )"N", 33, 1 }, - { (UChar* )"Nd", 34, 2 }, - { (UChar* )"Nl", 35, 2 }, - { (UChar* )"No", 36, 2 }, - { (UChar* )"P", 37, 1 }, - { (UChar* )"Pc", 38, 2 }, - { (UChar* )"Pd", 39, 2 }, - { (UChar* )"Pe", 40, 2 }, - { (UChar* )"Pf", 41, 2 }, - { (UChar* )"Pi", 42, 2 }, - { (UChar* )"Po", 43, 2 }, - { (UChar* )"Ps", 44, 2 }, - { (UChar* )"S", 45, 1 }, - { (UChar* )"Sc", 46, 2 }, - { (UChar* )"Sk", 47, 2 }, - { (UChar* )"Sm", 48, 2 }, - { (UChar* )"So", 49, 2 }, - { (UChar* )"Z", 50, 1 }, - { (UChar* )"Zl", 51, 2 }, - { (UChar* )"Zp", 52, 2 }, - { (UChar* )"Zs", 53, 2 }, - { (UChar* )"Arabic", 54, 6 }, - { (UChar* )"Armenian", 55, 8 }, - { (UChar* )"Bengali", 56, 7 }, - { (UChar* )"Bopomofo", 57, 8 }, - { (UChar* )"Braille", 58, 7 }, - { (UChar* )"Buginese", 59, 8 }, - { (UChar* )"Buhid", 60, 5 }, - { (UChar* )"Canadian_Aboriginal", 61, 19 }, - { (UChar* )"Cherokee", 62, 8 }, - { (UChar* )"Common", 63, 6 }, - { (UChar* )"Coptic", 64, 6 }, - { (UChar* )"Cypriot", 65, 7 }, - { (UChar* )"Cyrillic", 66, 8 }, - { (UChar* )"Deseret", 67, 7 }, - { (UChar* )"Devanagari", 68, 10 }, - { (UChar* )"Ethiopic", 69, 8 }, - { (UChar* )"Georgian", 70, 8 }, - { (UChar* )"Glagolitic", 71, 10 }, - { (UChar* )"Gothic", 72, 6 }, - { (UChar* )"Greek", 73, 5 }, - { (UChar* )"Gujarati", 74, 8 }, - { (UChar* )"Gurmukhi", 75, 8 }, - { (UChar* )"Han", 76, 3 }, - { (UChar* )"Hangul", 77, 6 }, - { (UChar* )"Hanunoo", 78, 7 }, - { (UChar* )"Hebrew", 79, 6 }, - { (UChar* )"Hiragana", 80, 8 }, - { (UChar* )"Inherited", 81, 9 }, - { (UChar* )"Kannada", 82, 7 }, - { (UChar* )"Katakana", 83, 8 }, - { (UChar* )"Kharoshthi", 84, 10 }, - { (UChar* )"Khmer", 85, 5 }, - { (UChar* )"Lao", 86, 3 }, - { (UChar* )"Latin", 87, 5 }, - { (UChar* )"Limbu", 88, 5 }, - { (UChar* )"Linear_B", 89, 8 }, - { (UChar* )"Malayalam", 90, 9 }, - { (UChar* )"Mongolian", 91, 9 }, - { (UChar* )"Myanmar", 92, 7 }, - { (UChar* )"New_Tai_Lue", 93, 11 }, - { (UChar* )"Ogham", 94, 5 }, - { (UChar* )"Old_Italic", 95, 10 }, - { (UChar* )"Old_Persian", 96, 11 }, - { (UChar* )"Oriya", 97, 5 }, - { (UChar* )"Osmanya", 98, 7 }, - { (UChar* )"Runic", 99, 5 }, - { (UChar* )"Shavian", 100, 7 }, - { (UChar* )"Sinhala", 101, 7 }, - { (UChar* )"Syloti_Nagri", 102, 12 }, - { (UChar* )"Syriac", 103, 6 }, - { (UChar* )"Tagalog", 104, 7 }, - { (UChar* )"Tagbanwa", 105, 8 }, - { (UChar* )"Tai_Le", 106, 6 }, - { (UChar* )"Tamil", 107, 5 }, - { (UChar* )"Telugu", 108, 6 }, - { (UChar* )"Thaana", 109, 6 }, - { (UChar* )"Thai", 110, 4 }, - { (UChar* )"Tibetan", 111, 7 }, - { (UChar* )"Tifinagh", 112, 8 }, - { (UChar* )"Ugaritic", 113, 8 }, - { (UChar* )"Yi", 114, 2 }, -#endif /* USE_UNICODE_PROPERTIES */ - { (UChar* )NULL, -1, 0 } -}; - -#ifdef USE_UNICODE_PROPERTIES -#define CODE_RANGES_NUM 115 -#else -#define CODE_RANGES_NUM 15 -#endif - -static const OnigCodePoint* CodeRanges[CODE_RANGES_NUM]; -static int CodeRangeTableInited = 0; - -static void init_code_range_array(void) { - THREAD_ATOMIC_START; - - CodeRanges[0] = CR_NEWLINE; - CodeRanges[1] = CR_Alpha; - CodeRanges[2] = CR_Blank; - CodeRanges[3] = CR_Cntrl; - CodeRanges[4] = CR_Digit; - CodeRanges[5] = CR_Graph; - CodeRanges[6] = CR_Lower; - CodeRanges[7] = CR_Print; - CodeRanges[8] = CR_Punct; - CodeRanges[9] = CR_Space; - CodeRanges[10] = CR_Upper; - CodeRanges[11] = CR_XDigit; - CodeRanges[12] = CR_Word; - CodeRanges[13] = CR_Alnum; - CodeRanges[14] = CR_ASCII; - -#ifdef USE_UNICODE_PROPERTIES - CodeRanges[15] = CR_Any; - CodeRanges[16] = CR_Assigned; - CodeRanges[17] = CR_C; - CodeRanges[18] = CR_Cc; - CodeRanges[19] = CR_Cf; - CodeRanges[20] = CR_Cn; - CodeRanges[21] = CR_Co; - CodeRanges[22] = CR_Cs; - CodeRanges[23] = CR_L; - CodeRanges[24] = CR_Ll; - CodeRanges[25] = CR_Lm; - CodeRanges[26] = CR_Lo; - CodeRanges[27] = CR_Lt; - CodeRanges[28] = CR_Lu; - CodeRanges[29] = CR_M; - CodeRanges[30] = CR_Mc; - CodeRanges[31] = CR_Me; - CodeRanges[32] = CR_Mn; - CodeRanges[33] = CR_N; - CodeRanges[34] = CR_Nd; - CodeRanges[35] = CR_Nl; - CodeRanges[36] = CR_No; - CodeRanges[37] = CR_P; - CodeRanges[38] = CR_Pc; - CodeRanges[39] = CR_Pd; - CodeRanges[40] = CR_Pe; - CodeRanges[41] = CR_Pf; - CodeRanges[42] = CR_Pi; - CodeRanges[43] = CR_Po; - CodeRanges[44] = CR_Ps; - CodeRanges[45] = CR_S; - CodeRanges[46] = CR_Sc; - CodeRanges[47] = CR_Sk; - CodeRanges[48] = CR_Sm; - CodeRanges[49] = CR_So; - CodeRanges[50] = CR_Z; - CodeRanges[51] = CR_Zl; - CodeRanges[52] = CR_Zp; - CodeRanges[53] = CR_Zs; - CodeRanges[54] = CR_Arabic; - CodeRanges[55] = CR_Armenian; - CodeRanges[56] = CR_Bengali; - CodeRanges[57] = CR_Bopomofo; - CodeRanges[58] = CR_Braille; - CodeRanges[59] = CR_Buginese; - CodeRanges[60] = CR_Buhid; - CodeRanges[61] = CR_Canadian_Aboriginal; - CodeRanges[62] = CR_Cherokee; - CodeRanges[63] = CR_Common; - CodeRanges[64] = CR_Coptic; - CodeRanges[65] = CR_Cypriot; - CodeRanges[66] = CR_Cyrillic; - CodeRanges[67] = CR_Deseret; - CodeRanges[68] = CR_Devanagari; - CodeRanges[69] = CR_Ethiopic; - CodeRanges[70] = CR_Georgian; - CodeRanges[71] = CR_Glagolitic; - CodeRanges[72] = CR_Gothic; - CodeRanges[73] = CR_Greek; - CodeRanges[74] = CR_Gujarati; - CodeRanges[75] = CR_Gurmukhi; - CodeRanges[76] = CR_Han; - CodeRanges[77] = CR_Hangul; - CodeRanges[78] = CR_Hanunoo; - CodeRanges[79] = CR_Hebrew; - CodeRanges[80] = CR_Hiragana; - CodeRanges[81] = CR_Inherited; - CodeRanges[82] = CR_Kannada; - CodeRanges[83] = CR_Katakana; - CodeRanges[84] = CR_Kharoshthi; - CodeRanges[85] = CR_Khmer; - CodeRanges[86] = CR_Lao; - CodeRanges[87] = CR_Latin; - CodeRanges[88] = CR_Limbu; - CodeRanges[89] = CR_Linear_B; - CodeRanges[90] = CR_Malayalam; - CodeRanges[91] = CR_Mongolian; - CodeRanges[92] = CR_Myanmar; - CodeRanges[93] = CR_New_Tai_Lue; - CodeRanges[94] = CR_Ogham; - CodeRanges[95] = CR_Old_Italic; - CodeRanges[96] = CR_Old_Persian; - CodeRanges[97] = CR_Oriya; - CodeRanges[98] = CR_Osmanya; - CodeRanges[99] = CR_Runic; - CodeRanges[100] = CR_Shavian; - CodeRanges[101] = CR_Sinhala; - CodeRanges[102] = CR_Syloti_Nagri; - CodeRanges[103] = CR_Syriac; - CodeRanges[104] = CR_Tagalog; - CodeRanges[105] = CR_Tagbanwa; - CodeRanges[106] = CR_Tai_Le; - CodeRanges[107] = CR_Tamil; - CodeRanges[108] = CR_Telugu; - CodeRanges[109] = CR_Thaana; - CodeRanges[110] = CR_Thai; - CodeRanges[111] = CR_Tibetan; - CodeRanges[112] = CR_Tifinagh; - CodeRanges[113] = CR_Ugaritic; - CodeRanges[114] = CR_Yi; -#endif /* USE_UNICODE_PROPERTIES */ - - CodeRangeTableInited = 1; - THREAD_ATOMIC_END; -} - -extern int -onigenc_unicode_is_code_ctype(OnigCodePoint code, unsigned int ctype) -{ - if ( -#ifdef USE_UNICODE_PROPERTIES - ctype <= ONIGENC_MAX_STD_CTYPE && -#endif - code < 256) { - return ONIGENC_IS_UNICODE_ISO_8859_1_CTYPE(code, ctype); - } - - if (ctype >= CODE_RANGES_NUM) { - return ONIGERR_TYPE_BUG; - } - - if (CodeRangeTableInited == 0) init_code_range_array(); - - return onig_is_in_code_range((UChar* )CodeRanges[ctype], code); -} - - -extern int -onigenc_unicode_ctype_code_range(int ctype, const OnigCodePoint* ranges[]) -{ - if (ctype >= CODE_RANGES_NUM) { - return ONIGERR_TYPE_BUG; - } - - if (CodeRangeTableInited == 0) init_code_range_array(); - - *ranges = CodeRanges[ctype]; - - return 0; -} - -extern int -onigenc_utf16_32_get_ctype_code_range(OnigCtype ctype, OnigCodePoint* sb_out, - const OnigCodePoint* ranges[]) -{ - *sb_out = 0x00; - return onigenc_unicode_ctype_code_range(ctype, ranges); -} - -#include "st.h" - -#define PROPERTY_NAME_MAX_SIZE 20 - -static st_table* NameCtypeTable; -static int NameTableInited = 0; - -static int init_name_ctype_table(void) -{ - PosixBracketEntryType *pb; - - THREAD_ATOMIC_START; - - NameCtypeTable = onig_st_init_strend_table_with_size(100); - if (ONIG_IS_NULL(NameCtypeTable)) return ONIGERR_MEMORY; - - for (pb = HashEntryData; ONIG_IS_NOT_NULL(pb->name); pb++) { - onig_st_insert_strend(NameCtypeTable, pb->name, pb->name + pb->len, - (st_data_t )pb->ctype); - } - - NameTableInited = 1; - THREAD_ATOMIC_END; - return 0; -} - -extern int -onigenc_unicode_property_name_to_ctype(OnigEncoding enc, UChar* name, UChar* end) -{ - int len; - hash_data_type ctype; - UChar buf[PROPERTY_NAME_MAX_SIZE]; - UChar *p; - OnigCodePoint code; - - p = name; - len = 0; - while (p < end) { - code = ONIGENC_MBC_TO_CODE(enc, p, end); - if (code >= 0x80) - return ONIGERR_INVALID_CHAR_PROPERTY_NAME; - - buf[len++] = (UChar )code; - if (len >= PROPERTY_NAME_MAX_SIZE) - return ONIGERR_INVALID_CHAR_PROPERTY_NAME; - - p += enclen(enc, p); - } - - buf[len] = 0; - - if (NameTableInited == 0) init_name_ctype_table(); - - if (onig_st_lookup_strend(NameCtypeTable, buf, buf + len, &ctype) == 0) { - return ONIGERR_INVALID_CHAR_PROPERTY_NAME; - } - - return (int )ctype; -} - - -static int -code2_cmp(OnigCodePoint* x, OnigCodePoint* y) -{ - if (x[0] == y[0] && x[1] == y[1]) return 0; - return 1; -} - -static int -code2_hash(OnigCodePoint* x) -{ - return (int )(x[0] + x[1]); -} - -static struct st_hash_type type_code2_hash = { - code2_cmp, - code2_hash, -}; - -static int -code3_cmp(OnigCodePoint* x, OnigCodePoint* y) -{ - if (x[0] == y[0] && x[1] == y[1] && x[2] == y[2]) return 0; - return 1; -} - -static int -code3_hash(OnigCodePoint* x) -{ - return (int )(x[0] + x[1] + x[2]); -} - -static struct st_hash_type type_code3_hash = { - code3_cmp, - code3_hash, -}; - - -static st_table* FoldTable; /* fold-1, fold-2, fold-3 */ -static st_table* Unfold1Table; -static st_table* Unfold2Table; -static st_table* Unfold3Table; -static int CaseFoldInited = 0; - - -extern void onigenc_end_unicode(void) -{ - THREAD_ATOMIC_START; - - if (FoldTable != 0) st_free_table(FoldTable); - if (Unfold1Table != 0) st_free_table(Unfold1Table); - if (Unfold2Table != 0) st_free_table(Unfold2Table); - if (Unfold3Table != 0) st_free_table(Unfold3Table); - - CaseFoldInited = 0; - - THREAD_ATOMIC_END; -} - -static int init_case_fold_table(void) -{ - const CaseFold_11_Type *p; - const CaseUnfold_11_Type *p1; - const CaseUnfold_12_Type *p2; - const CaseUnfold_13_Type *p3; - int i; - - THREAD_ATOMIC_START; - - FoldTable = st_init_numtable_with_size(1200); - if (ONIG_IS_NULL(FoldTable)) return ONIGERR_MEMORY; - for (i = 0; i < (int )(sizeof(CaseFold)/sizeof(CaseFold_11_Type)); i++) { - p = &CaseFold[i]; - st_add_direct(FoldTable, (st_data_t )p->from, (st_data_t )&(p->to)); - } - for (i = 0; i < (int )(sizeof(CaseFold_Locale)/sizeof(CaseFold_11_Type)); - i++) { - p = &CaseFold_Locale[i]; - st_add_direct(FoldTable, (st_data_t )p->from, (st_data_t )&(p->to)); - } - - Unfold1Table = st_init_numtable_with_size(1000); - if (ONIG_IS_NULL(Unfold1Table)) return ONIGERR_MEMORY; - - for (i = 0; i < (int )(sizeof(CaseUnfold_11)/sizeof(CaseUnfold_11_Type)); - i++) { - p1 = &CaseUnfold_11[i]; - st_add_direct(Unfold1Table, (st_data_t )p1->from, (st_data_t )&(p1->to)); - } - for (i = 0; - i < (int )(sizeof(CaseUnfold_11_Locale)/sizeof(CaseUnfold_11_Type)); - i++) { - p1 = &CaseUnfold_11_Locale[i]; - st_add_direct(Unfold1Table, (st_data_t )p1->from, (st_data_t )&(p1->to)); - } - - Unfold2Table = st_init_table_with_size(&type_code2_hash, 200); - if (ONIG_IS_NULL(Unfold2Table)) return ONIGERR_MEMORY; - - for (i = 0; i < (int )(sizeof(CaseUnfold_12)/sizeof(CaseUnfold_12_Type)); - i++) { - p2 = &CaseUnfold_12[i]; - st_add_direct(Unfold2Table, (st_data_t )p2->from, (st_data_t )(&p2->to)); - } - for (i = 0; - i < (int )(sizeof(CaseUnfold_12_Locale)/sizeof(CaseUnfold_12_Type)); - i++) { - p2 = &CaseUnfold_12_Locale[i]; - st_add_direct(Unfold2Table, (st_data_t )p2->from, (st_data_t )(&p2->to)); - } - - Unfold3Table = st_init_table_with_size(&type_code3_hash, 30); - if (ONIG_IS_NULL(Unfold3Table)) return ONIGERR_MEMORY; - - for (i = 0; i < (int )(sizeof(CaseUnfold_13)/sizeof(CaseUnfold_13_Type)); - i++) { - p3 = &CaseUnfold_13[i]; - st_add_direct(Unfold3Table, (st_data_t )p3->from, (st_data_t )(&p3->to)); - } - - - onig_add_end_call(onigenc_end_unicode); - - CaseFoldInited = 1; - THREAD_ATOMIC_END; - return 0; -} - -extern int -onigenc_unicode_mbc_case_fold(OnigEncoding enc, - OnigCaseFoldType flag ARG_UNUSED, const UChar** pp, const UChar* end, - UChar* fold) -{ - CodePointList3 *to; - OnigCodePoint code; - int i, len, rlen; - const UChar *p = *pp; - - if (CaseFoldInited == 0) init_case_fold_table(); - - code = ONIGENC_MBC_TO_CODE(enc, p, end); - len = enclen(enc, p); - *pp += len; - -#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI - if ((flag & ONIGENC_CASE_FOLD_TURKISH_AZERI) != 0) { - if (code == 0x0049) { - return ONIGENC_CODE_TO_MBC(enc, 0x0131, fold); - } - else if (code == 0x0130) { - return ONIGENC_CODE_TO_MBC(enc, 0x0069, fold); - } - } -#endif - - if (onig_st_lookup(FoldTable, (st_data_t )code, (void* )&to) != 0) { - if (to->n == 1) { - return ONIGENC_CODE_TO_MBC(enc, to->code[0], fold); - } -#if 0 - /* NO NEEDS TO CHECK */ - else if ((flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) { -#else - else { -#endif - rlen = 0; - for (i = 0; i < to->n; i++) { - len = ONIGENC_CODE_TO_MBC(enc, to->code[i], fold); - fold += len; - rlen += len; - } - return rlen; - } - } - - for (i = 0; i < len; i++) { - *fold++ = *p++; - } - return len; -} - -extern int -onigenc_unicode_apply_all_case_fold(OnigCaseFoldType flag, - OnigApplyAllCaseFoldFunc f, void* arg) -{ - const CaseUnfold_11_Type* p11; - OnigCodePoint code; - int i, j, k, r; - - /* if (CaseFoldInited == 0) init_case_fold_table(); */ - - for (i = 0; i < (int )(sizeof(CaseUnfold_11)/sizeof(CaseUnfold_11_Type)); - i++) { - p11 = &CaseUnfold_11[i]; - for (j = 0; j < p11->to.n; j++) { - code = p11->from; - r = (*f)(p11->to.code[j], &code, 1, arg); - if (r != 0) return r; - - code = p11->to.code[j]; - r = (*f)(p11->from, &code, 1, arg); - if (r != 0) return r; - - for (k = 0; k < j; k++) { - r = (*f)(p11->to.code[j], (OnigCodePoint* )(&p11->to.code[k]), 1, arg); - if (r != 0) return r; - - r = (*f)(p11->to.code[k], (OnigCodePoint* )(&p11->to.code[j]), 1, arg); - if (r != 0) return r; - } - } - } - -#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI - if ((flag & ONIGENC_CASE_FOLD_TURKISH_AZERI) != 0) { - code = 0x0131; - r = (*f)(0x0049, &code, 1, arg); - if (r != 0) return r; - code = 0x0049; - r = (*f)(0x0131, &code, 1, arg); - if (r != 0) return r; - - code = 0x0130; - r = (*f)(0x0069, &code, 1, arg); - if (r != 0) return r; - code = 0x0069; - r = (*f)(0x0130, &code, 1, arg); - if (r != 0) return r; - } - else { -#endif - for (i = 0; - i < (int )(sizeof(CaseUnfold_11_Locale)/sizeof(CaseUnfold_11_Type)); - i++) { - p11 = &CaseUnfold_11_Locale[i]; - for (j = 0; j < p11->to.n; j++) { - code = p11->from; - r = (*f)(p11->to.code[j], &code, 1, arg); - if (r != 0) return r; - - code = p11->to.code[j]; - r = (*f)(p11->from, &code, 1, arg); - if (r != 0) return r; - - for (k = 0; k < j; k++) { - r = (*f)(p11->to.code[j], (OnigCodePoint* )(&p11->to.code[k]), - 1, arg); - if (r != 0) return r; - - r = (*f)(p11->to.code[k], (OnigCodePoint* )(&p11->to.code[j]), - 1, arg); - if (r != 0) return r; - } - } - } -#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI - } -#endif - - if ((flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) { - for (i = 0; i < (int )(sizeof(CaseUnfold_12)/sizeof(CaseUnfold_12_Type)); - i++) { - for (j = 0; j < CaseUnfold_12[i].to.n; j++) { - r = (*f)(CaseUnfold_12[i].to.code[j], - (OnigCodePoint* )CaseUnfold_12[i].from, 2, arg); - if (r != 0) return r; - - for (k = 0; k < CaseUnfold_12[i].to.n; k++) { - if (k == j) continue; - - r = (*f)(CaseUnfold_12[i].to.code[j], - (OnigCodePoint* )(&CaseUnfold_12[i].to.code[k]), 1, arg); - if (r != 0) return r; - } - } - } - -#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI - if ((flag & ONIGENC_CASE_FOLD_TURKISH_AZERI) == 0) { -#endif - for (i = 0; - i < (int )(sizeof(CaseUnfold_12_Locale)/sizeof(CaseUnfold_12_Type)); - i++) { - for (j = 0; j < CaseUnfold_12_Locale[i].to.n; j++) { - r = (*f)(CaseUnfold_12_Locale[i].to.code[j], - (OnigCodePoint* )CaseUnfold_12_Locale[i].from, 2, arg); - if (r != 0) return r; - - for (k = 0; k < CaseUnfold_12_Locale[i].to.n; k++) { - if (k == j) continue; - - r = (*f)(CaseUnfold_12_Locale[i].to.code[j], - (OnigCodePoint* )(&CaseUnfold_12_Locale[i].to.code[k]), - 1, arg); - if (r != 0) return r; - } - } - } -#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI - } -#endif - - for (i = 0; i < (int )(sizeof(CaseUnfold_13)/sizeof(CaseUnfold_13_Type)); - i++) { - for (j = 0; j < CaseUnfold_13[i].to.n; j++) { - r = (*f)(CaseUnfold_13[i].to.code[j], - (OnigCodePoint* )CaseUnfold_13[i].from, 3, arg); - if (r != 0) return r; - - for (k = 0; k < CaseUnfold_13[i].to.n; k++) { - if (k == j) continue; - - r = (*f)(CaseUnfold_13[i].to.code[j], - (OnigCodePoint* )(&CaseUnfold_13[i].to.code[k]), 1, arg); - if (r != 0) return r; - } - } - } - } - - return 0; -} - -extern int -onigenc_unicode_get_case_fold_codes_by_str(OnigEncoding enc, - OnigCaseFoldType flag, const OnigUChar* p, const OnigUChar* end, - OnigCaseFoldCodeItem items[]) -{ - int n, i, j, k, len; - OnigCodePoint code, codes[3]; - CodePointList3 *to, *z3; - CodePointList2 *z2; - - if (CaseFoldInited == 0) init_case_fold_table(); - - n = 0; - - code = ONIGENC_MBC_TO_CODE(enc, p, end); - len = enclen(enc, p); - -#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI - if ((flag & ONIGENC_CASE_FOLD_TURKISH_AZERI) != 0) { - if (code == 0x0049) { - items[0].byte_len = len; - items[0].code_len = 1; - items[0].code[0] = 0x0131; - return 1; - } - else if (code == 0x0130) { - items[0].byte_len = len; - items[0].code_len = 1; - items[0].code[0] = 0x0069; - return 1; - } - else if (code == 0x0131) { - items[0].byte_len = len; - items[0].code_len = 1; - items[0].code[0] = 0x0049; - return 1; - } - else if (code == 0x0069) { - items[0].byte_len = len; - items[0].code_len = 1; - items[0].code[0] = 0x0130; - return 1; - } - } -#endif - - if (onig_st_lookup(FoldTable, (st_data_t )code, (void* )&to) != 0) { - if (to->n == 1) { - OnigCodePoint orig_code = code; - - items[0].byte_len = len; - items[0].code_len = 1; - items[0].code[0] = to->code[0]; - n++; - - code = to->code[0]; - if (onig_st_lookup(Unfold1Table, (st_data_t )code, (void* )&to) != 0) { - for (i = 0; i < to->n; i++) { - if (to->code[i] != orig_code) { - items[n].byte_len = len; - items[n].code_len = 1; - items[n].code[0] = to->code[i]; - n++; - } - } - } - } - else if ((flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) { - OnigCodePoint cs[3][4]; - int fn, ncs[3]; - - for (fn = 0; fn < to->n; fn++) { - cs[fn][0] = to->code[fn]; - if (onig_st_lookup(Unfold1Table, (st_data_t )cs[fn][0], - (void* )&z3) != 0) { - for (i = 0; i < z3->n; i++) { - cs[fn][i+1] = z3->code[i]; - } - ncs[fn] = z3->n + 1; - } - else - ncs[fn] = 1; - } - - if (fn == 2) { - for (i = 0; i < ncs[0]; i++) { - for (j = 0; j < ncs[1]; j++) { - items[n].byte_len = len; - items[n].code_len = 2; - items[n].code[0] = cs[0][i]; - items[n].code[1] = cs[1][j]; - n++; - } - } - - if (onig_st_lookup(Unfold2Table, (st_data_t )to->code, - (void* )&z2) != 0) { - for (i = 0; i < z2->n; i++) { - if (z2->code[i] == code) continue; - - items[n].byte_len = len; - items[n].code_len = 1; - items[n].code[0] = z2->code[i]; - n++; - } - } - } - else { - for (i = 0; i < ncs[0]; i++) { - for (j = 0; j < ncs[1]; j++) { - for (k = 0; k < ncs[2]; k++) { - items[n].byte_len = len; - items[n].code_len = 3; - items[n].code[0] = cs[0][i]; - items[n].code[1] = cs[1][j]; - items[n].code[2] = cs[2][k]; - n++; - } - } - } - - if (onig_st_lookup(Unfold3Table, (st_data_t )to->code, - (void* )&z2) != 0) { - for (i = 0; i < z2->n; i++) { - if (z2->code[i] == code) continue; - - items[n].byte_len = len; - items[n].code_len = 1; - items[n].code[0] = z2->code[i]; - n++; - } - } - } - - /* multi char folded code is not head of another folded multi char */ - flag = 0; /* DISABLE_CASE_FOLD_MULTI_CHAR(flag); */ - } - } - else { - if (onig_st_lookup(Unfold1Table, (st_data_t )code, (void* )&to) != 0) { - for (i = 0; i < to->n; i++) { - items[n].byte_len = len; - items[n].code_len = 1; - items[n].code[0] = to->code[i]; - n++; - } - } - } - - - if ((flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) { - p += len; - if (p < end) { - int clen; - - codes[0] = code; - code = ONIGENC_MBC_TO_CODE(enc, p, end); - if (onig_st_lookup(FoldTable, (st_data_t )code, (void* )&to) != 0 - && to->n == 1) { - codes[1] = to->code[0]; - } - else - codes[1] = code; - - clen = enclen(enc, p); - len += clen; - if (onig_st_lookup(Unfold2Table, (st_data_t )codes, (void* )&z2) != 0) { - for (i = 0; i < z2->n; i++) { - items[n].byte_len = len; - items[n].code_len = 1; - items[n].code[0] = z2->code[i]; - n++; - } - } - - p += clen; - if (p < end) { - code = ONIGENC_MBC_TO_CODE(enc, p, end); - if (onig_st_lookup(FoldTable, (st_data_t )code, (void* )&to) != 0 - && to->n == 1) { - codes[2] = to->code[0]; - } - else - codes[2] = code; - - clen = enclen(enc, p); - len += clen; - if (onig_st_lookup(Unfold3Table, (st_data_t )codes, - (void* )&z2) != 0) { - for (i = 0; i < z2->n; i++) { - items[n].byte_len = len; - items[n].code_len = 1; - items[n].code[0] = z2->code[i]; - n++; - } - } - } - } - } - - return n; -} diff --git a/MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/enc/utf16_le.c b/MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/enc/utf16_le.c deleted file mode 100644 index b8685c4393..0000000000 --- a/MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/enc/utf16_le.c +++ /dev/null @@ -1,226 +0,0 @@ -/********************************************************************** - utf16_le.c - Oniguruma (regular expression library) -**********************************************************************/ -/*- - * Copyright (c) 2002-2008 K.Kosako - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include "regenc.h" - -static const int EncLen_UTF16[] = { - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 4, 4, 4, 4, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 -}; - -static int -utf16le_code_to_mbclen(OnigCodePoint code) -{ - return (code > 0xffff ? 4 : 2); -} - -static int -utf16le_mbc_enc_len(const UChar* p) -{ - return EncLen_UTF16[*(p+1)]; -} - -static int -utf16le_is_mbc_newline(const UChar* p, const UChar* end) -{ - if (p + 1 < end) { - if (*p == 0x0a && *(p+1) == 0x00) - return 1; -#ifdef USE_UNICODE_ALL_LINE_TERMINATORS - if (( -#ifndef USE_CRNL_AS_LINE_TERMINATOR - *p == 0x0d || -#endif - *p == 0x85) && *(p+1) == 0x00) - return 1; - if (*(p+1) == 0x20 && (*p == 0x29 || *p == 0x28)) - return 1; -#endif - } - return 0; -} - -static OnigCodePoint -utf16le_mbc_to_code(const UChar* p, const UChar* end ARG_UNUSED) -{ - OnigCodePoint code; - UChar c0 = *p; - UChar c1 = *(p+1); - - if (UTF16_IS_SURROGATE_FIRST(c1)) { - code = ((((c1 - 0xd8) << 2) + ((c0 & 0xc0) >> 6) + 1) << 16) - + ((((c0 & 0x3f) << 2) + (p[3] - 0xdc)) << 8) - + p[2]; - } - else { - code = c1 * 256 + p[0]; - } - return code; -} - -static int -utf16le_code_to_mbc(OnigCodePoint code, UChar *buf) -{ - UChar* p = buf; - - if (code > 0xffff) { - unsigned int plane, high; - - plane = (code >> 16) - 1; - high = (code & 0xff00) >> 8; - - *p++ = (UChar)(((plane & 0x03) << 6) + (high >> 2)); - *p++ = (UChar)((plane >> 2) + 0xd8); - *p++ = (UChar )(code & 0xff); - *p = (high & 0x03) + 0xdc; - return 4; - } - else { - *p++ = (UChar )(code & 0xff); - *p++ = (UChar )((code & 0xff00) >> 8); - return 2; - } -} - -static int -utf16le_mbc_case_fold(OnigCaseFoldType flag, - const UChar** pp, const UChar* end, UChar* fold) -{ - const UChar* p = *pp; - - if (ONIGENC_IS_ASCII_CODE(*p) && *(p+1) == 0) { -#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI - if ((flag & ONIGENC_CASE_FOLD_TURKISH_AZERI) != 0) { - if (*p == 0x49) { - *fold++ = 0x31; - *fold = 0x01; - (*pp) += 2; - return 2; - } - } -#endif - - *fold++ = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p); - *fold = 0; - *pp += 2; - return 2; - } - else - return onigenc_unicode_mbc_case_fold(ONIG_ENCODING_UTF16_LE, flag, pp, end, - fold); -} - -#if 0 -static int -utf16le_is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, - const UChar* end) -{ - const UChar* p = *pp; - - (*pp) += EncLen_UTF16[*(p+1)]; - - if (*(p+1) == 0) { - int c, v; - - if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) { - return TRUE; - } - - c = *p; - v = ONIGENC_IS_UNICODE_ISO_8859_1_BIT_CTYPE(c, - (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER)); - if ((v | BIT_CTYPE_LOWER) != 0) { - /* 0xaa, 0xb5, 0xba are lower case letter, but can't convert. */ - if (c >= 0xaa && c <= 0xba) - return FALSE; - else - return TRUE; - } - return (v != 0 ? TRUE : FALSE); - } - - return FALSE; -} -#endif - -static UChar* -utf16le_left_adjust_char_head(const UChar* start, const UChar* s) -{ - if (s <= start) return (UChar* )s; - - if ((s - start) % 2 == 1) { - s--; - } - - if (UTF16_IS_SURROGATE_SECOND(*(s+1)) && s > start + 1) - s -= 2; - - return (UChar* )s; -} - -static int -utf16le_get_case_fold_codes_by_str(OnigCaseFoldType flag, - const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[]) -{ - return onigenc_unicode_get_case_fold_codes_by_str(ONIG_ENCODING_UTF16_LE, - flag, p, end, items); -} - -OnigEncodingType OnigEncodingUTF16_LE = { - utf16le_mbc_enc_len, - "UTF-16LE", /* name */ - 4, /* max byte length */ - 2, /* min byte length */ - utf16le_is_mbc_newline, - utf16le_mbc_to_code, - utf16le_code_to_mbclen, - utf16le_code_to_mbc, - utf16le_mbc_case_fold, - onigenc_unicode_apply_all_case_fold, - utf16le_get_case_fold_codes_by_str, - onigenc_unicode_property_name_to_ctype, - onigenc_unicode_is_code_ctype, - onigenc_utf16_32_get_ctype_code_range, - utf16le_left_adjust_char_head, - onigenc_always_false_is_allowed_reverse_match -}; diff --git a/MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/oniggnu.h b/MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/oniggnu.h deleted file mode 100644 index 7ec8e2e7f1..0000000000 --- a/MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/oniggnu.h +++ /dev/null @@ -1,85 +0,0 @@ -#ifndef ONIGGNU_H -#define ONIGGNU_H -/********************************************************************** - oniggnu.h - Oniguruma (regular expression library) -**********************************************************************/ -/*- - * Copyright (c) 2002-2005 K.Kosako - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include "oniguruma.h" - -#ifdef __cplusplus -extern "C" { -#endif - -#define RE_MBCTYPE_ASCII 0 -#define RE_MBCTYPE_EUC 1 -#define RE_MBCTYPE_SJIS 2 -#define RE_MBCTYPE_UTF8 3 - -/* GNU regex options */ -#ifndef RE_NREGS -#define RE_NREGS ONIG_NREGION -#endif - -#define RE_OPTION_IGNORECASE ONIG_OPTION_IGNORECASE -#define RE_OPTION_EXTENDED ONIG_OPTION_EXTEND -#define RE_OPTION_MULTILINE ONIG_OPTION_MULTILINE -#define RE_OPTION_SINGLELINE ONIG_OPTION_SINGLELINE -#define RE_OPTION_LONGEST ONIG_OPTION_FIND_LONGEST -#define RE_OPTION_POSIXLINE (RE_OPTION_MULTILINE|RE_OPTION_SINGLELINE) -#define RE_OPTION_FIND_NOT_EMPTY ONIG_OPTION_FIND_NOT_EMPTY -#define RE_OPTION_NEGATE_SINGLELINE ONIG_OPTION_NEGATE_SINGLELINE -#define RE_OPTION_DONT_CAPTURE_GROUP ONIG_OPTION_DONT_CAPTURE_GROUP -#define RE_OPTION_CAPTURE_GROUP ONIG_OPTION_CAPTURE_GROUP - - -ONIG_EXTERN -void re_mbcinit P_((int)); -ONIG_EXTERN -int re_compile_pattern P_((const char*, int, struct re_pattern_buffer*, char* err_buf)); -ONIG_EXTERN -int re_recompile_pattern P_((const char*, int, struct re_pattern_buffer*, char* err_buf)); -ONIG_EXTERN -void re_free_pattern P_((struct re_pattern_buffer*)); -ONIG_EXTERN -int re_adjust_startpos P_((struct re_pattern_buffer*, const char*, int, int, int)); -ONIG_EXTERN -int re_search P_((struct re_pattern_buffer*, const char*, int, int, int, struct re_registers*)); -ONIG_EXTERN -int re_match P_((struct re_pattern_buffer*, const char *, int, int, struct re_registers*)); -ONIG_EXTERN -void re_set_casetable P_((const char*)); -ONIG_EXTERN -void re_free_registers P_((struct re_registers*)); -ONIG_EXTERN -int re_alloc_pattern P_((struct re_pattern_buffer**)); /* added */ - -#ifdef __cplusplus -} -#endif - -#endif /* ONIGGNU_H */ diff --git a/MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/onigposix.h b/MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/onigposix.h deleted file mode 100644 index 7c2cad123b..0000000000 --- a/MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/onigposix.h +++ /dev/null @@ -1,169 +0,0 @@ -#ifndef ONIGPOSIX_H -#define ONIGPOSIX_H -/********************************************************************** - onigposix.h - Oniguruma (regular expression library) -**********************************************************************/ -/*- - * Copyright (c) 2002-2005 K.Kosako - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ -#include "OnigurumaUefiPort.h" - -#ifdef __cplusplus -extern "C" { -#endif - -/* options */ -#define REG_ICASE (1<<0) -#define REG_NEWLINE (1<<1) -#define REG_NOTBOL (1<<2) -#define REG_NOTEOL (1<<3) -#define REG_EXTENDED (1<<4) /* if not setted, Basic Onigular Expression */ -#define REG_NOSUB (1<<5) - -/* POSIX error codes */ -#define REG_NOMATCH 1 -#define REG_BADPAT 2 -#define REG_ECOLLATE 3 -#define REG_ECTYPE 4 -#define REG_EESCAPE 5 -#define REG_ESUBREG 6 -#define REG_EBRACK 7 -#define REG_EPAREN 8 -#define REG_EBRACE 9 -#define REG_BADBR 10 -#define REG_ERANGE 11 -#define REG_ESPACE 12 -#define REG_BADRPT 13 - -/* extended error codes */ -#define REG_EONIG_INTERNAL 14 -#define REG_EONIG_BADWC 15 -#define REG_EONIG_BADARG 16 -#define REG_EONIG_THREAD 17 - -/* character encodings (for reg_set_encoding()) */ -#define REG_POSIX_ENCODING_ASCII 0 -#define REG_POSIX_ENCODING_EUC_JP 1 -#define REG_POSIX_ENCODING_SJIS 2 -#define REG_POSIX_ENCODING_UTF8 3 -#define REG_POSIX_ENCODING_UTF16_BE 4 -#define REG_POSIX_ENCODING_UTF16_LE 5 - - -typedef int regoff_t; - -typedef struct { - regoff_t rm_so; - regoff_t rm_eo; -} regmatch_t; - -/* POSIX regex_t */ -typedef struct { - void* onig; /* Oniguruma regex_t* */ - size_t re_nsub; - int comp_options; -} regex_t; - - -#ifndef P_ -#if defined(__STDC__) || defined(_WIN32) -# define P_(args) args -#else -# define P_(args) () -#endif -#endif - -#ifndef ONIG_EXTERN -#if defined(_WIN32) && !defined(__GNUC__) -#if defined(EXPORT) -#define ONIG_EXTERN extern __declspec(dllexport) -#else -#define ONIG_EXTERN extern __declspec(dllimport) -#endif -#endif -#endif - -#ifndef ONIG_EXTERN -#define ONIG_EXTERN extern -#endif - -#ifndef ONIGURUMA_H -typedef unsigned int OnigOptionType; - -/* syntax */ -typedef struct { - unsigned int op; - unsigned int op2; - unsigned int behavior; - OnigOptionType options; /* default option */ -} OnigSyntaxType; - -ONIG_EXTERN OnigSyntaxType OnigSyntaxPosixBasic; -ONIG_EXTERN OnigSyntaxType OnigSyntaxPosixExtended; -ONIG_EXTERN OnigSyntaxType OnigSyntaxEmacs; -ONIG_EXTERN OnigSyntaxType OnigSyntaxGrep; -ONIG_EXTERN OnigSyntaxType OnigSyntaxGnuRegex; -ONIG_EXTERN OnigSyntaxType OnigSyntaxJava; -ONIG_EXTERN OnigSyntaxType OnigSyntaxPerl; -ONIG_EXTERN OnigSyntaxType OnigSyntaxRuby; - -/* predefined syntaxes (see regsyntax.c) */ -#define ONIG_SYNTAX_POSIX_BASIC (&OnigSyntaxPosixBasic) -#define ONIG_SYNTAX_POSIX_EXTENDED (&OnigSyntaxPosixExtended) -#define ONIG_SYNTAX_EMACS (&OnigSyntaxEmacs) -#define ONIG_SYNTAX_GREP (&OnigSyntaxGrep) -#define ONIG_SYNTAX_GNU_REGEX (&OnigSyntaxGnuRegex) -#define ONIG_SYNTAX_JAVA (&OnigSyntaxJava) -#define ONIG_SYNTAX_PERL (&OnigSyntaxPerl) -#define ONIG_SYNTAX_RUBY (&OnigSyntaxRuby) -/* default syntax */ -#define ONIG_SYNTAX_DEFAULT OnigDefaultSyntax - -ONIG_EXTERN OnigSyntaxType* OnigDefaultSyntax; - -ONIG_EXTERN int onig_set_default_syntax P_((OnigSyntaxType* syntax)); -ONIG_EXTERN void onig_copy_syntax P_((OnigSyntaxType* to, OnigSyntaxType* from)); -ONIG_EXTERN const char* onig_version P_((void)); -ONIG_EXTERN const char* onig_copyright P_((void)); - -#endif /* ONIGURUMA_H */ - - -ONIG_EXTERN int regcomp P_((regex_t* reg, const char* pat, int options)); -ONIG_EXTERN int regexec P_((regex_t* reg, const char* str, size_t nmatch, regmatch_t* matches, int options)); -ONIG_EXTERN void regfree P_((regex_t* reg)); -ONIG_EXTERN size_t regerror P_((int code, const regex_t* reg, char* buf, size_t size)); - -/* extended API */ -ONIG_EXTERN void reg_set_encoding P_((int enc)); -ONIG_EXTERN int reg_name_to_group_numbers P_((regex_t* reg, const unsigned char* name, const unsigned char* name_end, int** nums)); -ONIG_EXTERN int reg_foreach_name P_((regex_t* reg, int (*func)(const unsigned char*, const unsigned char*,int,int*,regex_t*,void*), void* arg)); -ONIG_EXTERN int reg_number_of_names P_((regex_t* reg)); - -#ifdef __cplusplus -} -#endif - -#endif /* ONIGPOSIX_H */ diff --git a/MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/oniguruma.h b/MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/oniguruma.h deleted file mode 100644 index 034a2ccff1..0000000000 --- a/MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/oniguruma.h +++ /dev/null @@ -1,829 +0,0 @@ -#ifndef ONIGURUMA_H -#define ONIGURUMA_H -/********************************************************************** - oniguruma.h - Oniguruma (regular expression library) -**********************************************************************/ -/*- - * Copyright (c) 2002-2009 K.Kosako - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include "OnigurumaUefiPort.h" - -#ifdef __cplusplus -extern "C" { -#endif - -#define ONIGURUMA -#define ONIGURUMA_VERSION_MAJOR 5 -#define ONIGURUMA_VERSION_MINOR 9 -#define ONIGURUMA_VERSION_TEENY 6 - -#ifdef __cplusplus -# ifndef HAVE_PROTOTYPES -# define HAVE_PROTOTYPES 1 -# endif -# ifndef HAVE_STDARG_PROTOTYPES -# define HAVE_STDARG_PROTOTYPES 1 -# endif -#endif - -/* escape Mac OS X/Xcode 2.4/gcc 4.0.1 problem */ -#if defined(__APPLE__) && defined(__GNUC__) && __GNUC__ >= 4 -# ifndef HAVE_STDARG_PROTOTYPES -# define HAVE_STDARG_PROTOTYPES 1 -# endif -#endif - -#ifdef HAVE_STDARG_H -# ifndef HAVE_STDARG_PROTOTYPES -# define HAVE_STDARG_PROTOTYPES 1 -# endif -#endif - -#ifndef P_ -#if defined(__STDC__) || defined(_WIN32) -# define P_(args) args -#else -# define P_(args) () -#endif -#endif - -#ifndef PV_ -#ifdef HAVE_STDARG_PROTOTYPES -# define PV_(args) args -#else -# define PV_(args) () -#endif -#endif - -#ifndef ONIG_EXTERN -#if defined(_WIN32) && !defined(__GNUC__) -#if defined(EXPORT) || defined(RUBY_EXPORT) -#define ONIG_EXTERN extern __declspec(dllexport) -#else -#define ONIG_EXTERN extern __declspec(dllimport) -#endif -#endif -#endif - -#ifndef ONIG_EXTERN -#define ONIG_EXTERN extern -#endif - -/* PART: character encoding */ - -#ifndef ONIG_ESCAPE_UCHAR_COLLISION -#define UChar OnigUChar -#endif - -#ifdef _WIN32 -# include -typedef ULONG_PTR OnigCodePoint; -#else -typedef unsigned long OnigCodePoint; -#endif -typedef unsigned char OnigUChar; -typedef unsigned int OnigCtype; -typedef unsigned int OnigDistance; - -#define ONIG_INFINITE_DISTANCE ~((OnigDistance )0) - -typedef unsigned int OnigCaseFoldType; /* case fold flag */ - -ONIG_EXTERN OnigCaseFoldType OnigDefaultCaseFoldFlag; - -/* #define ONIGENC_CASE_FOLD_HIRAGANA_KATAKANA (1<<1) */ -/* #define ONIGENC_CASE_FOLD_KATAKANA_WIDTH (1<<2) */ -#define ONIGENC_CASE_FOLD_TURKISH_AZERI (1<<20) -#define INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR (1<<30) - -#define ONIGENC_CASE_FOLD_MIN INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR -#define ONIGENC_CASE_FOLD_DEFAULT OnigDefaultCaseFoldFlag - - -#define ONIGENC_MAX_COMP_CASE_FOLD_CODE_LEN 3 -#define ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM 13 -/* 13 => Unicode:0x1ffc */ - -/* code range */ -#define ONIGENC_CODE_RANGE_NUM(range) ((int )range[0]) -#define ONIGENC_CODE_RANGE_FROM(range,i) range[((i)*2) + 1] -#define ONIGENC_CODE_RANGE_TO(range,i) range[((i)*2) + 2] - -typedef struct { - int byte_len; /* argument(original) character(s) byte length */ - int code_len; /* number of code */ - OnigCodePoint code[ONIGENC_MAX_COMP_CASE_FOLD_CODE_LEN]; -} OnigCaseFoldCodeItem; - -typedef struct { - OnigCodePoint esc; - OnigCodePoint anychar; - OnigCodePoint anytime; - OnigCodePoint zero_or_one_time; - OnigCodePoint one_or_more_time; - OnigCodePoint anychar_anytime; -} OnigMetaCharTableType; - -typedef int (*OnigApplyAllCaseFoldFunc)(OnigCodePoint from, OnigCodePoint* to, int to_len, void* arg); - -typedef struct OnigEncodingTypeST { - int (*mbc_enc_len)(const OnigUChar* p); - const char* name; - int max_enc_len; - int min_enc_len; - int (*is_mbc_newline)(const OnigUChar* p, const OnigUChar* end); - OnigCodePoint (*mbc_to_code)(const OnigUChar* p, const OnigUChar* end); - int (*code_to_mbclen)(OnigCodePoint code); - int (*code_to_mbc)(OnigCodePoint code, OnigUChar *buf); - int (*mbc_case_fold)(OnigCaseFoldType flag, const OnigUChar** pp, const OnigUChar* end, OnigUChar* to); - int (*apply_all_case_fold)(OnigCaseFoldType flag, OnigApplyAllCaseFoldFunc f, void* arg); - int (*get_case_fold_codes_by_str)(OnigCaseFoldType flag, const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem acs[]); - int (*property_name_to_ctype)(struct OnigEncodingTypeST* enc, OnigUChar* p, OnigUChar* end); - int (*is_code_ctype)(OnigCodePoint code, OnigCtype ctype); - int (*get_ctype_code_range)(OnigCtype ctype, OnigCodePoint* sb_out, const OnigCodePoint* ranges[]); - OnigUChar* (*left_adjust_char_head)(const OnigUChar* start, const OnigUChar* p); - int (*is_allowed_reverse_match)(const OnigUChar* p, const OnigUChar* end); -} OnigEncodingType; - -typedef OnigEncodingType* OnigEncoding; - -ONIG_EXTERN OnigEncodingType OnigEncodingASCII; -ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_1; -ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_2; -ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_3; -ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_4; -ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_5; -ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_6; -ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_7; -ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_8; -ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_9; -ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_10; -ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_11; -ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_13; -ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_14; -ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_15; -ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_16; -ONIG_EXTERN OnigEncodingType OnigEncodingUTF8; -ONIG_EXTERN OnigEncodingType OnigEncodingUTF16_BE; -ONIG_EXTERN OnigEncodingType OnigEncodingUTF16_LE; -ONIG_EXTERN OnigEncodingType OnigEncodingUTF32_BE; -ONIG_EXTERN OnigEncodingType OnigEncodingUTF32_LE; -ONIG_EXTERN OnigEncodingType OnigEncodingEUC_JP; -ONIG_EXTERN OnigEncodingType OnigEncodingEUC_TW; -ONIG_EXTERN OnigEncodingType OnigEncodingEUC_KR; -ONIG_EXTERN OnigEncodingType OnigEncodingEUC_CN; -ONIG_EXTERN OnigEncodingType OnigEncodingSJIS; -ONIG_EXTERN OnigEncodingType OnigEncodingKOI8; -ONIG_EXTERN OnigEncodingType OnigEncodingKOI8_R; -ONIG_EXTERN OnigEncodingType OnigEncodingCP1251; -ONIG_EXTERN OnigEncodingType OnigEncodingBIG5; -ONIG_EXTERN OnigEncodingType OnigEncodingGB18030; - -#define ONIG_ENCODING_ASCII (&OnigEncodingASCII) -#define ONIG_ENCODING_ISO_8859_1 (&OnigEncodingISO_8859_1) -#define ONIG_ENCODING_ISO_8859_2 (&OnigEncodingISO_8859_2) -#define ONIG_ENCODING_ISO_8859_3 (&OnigEncodingISO_8859_3) -#define ONIG_ENCODING_ISO_8859_4 (&OnigEncodingISO_8859_4) -#define ONIG_ENCODING_ISO_8859_5 (&OnigEncodingISO_8859_5) -#define ONIG_ENCODING_ISO_8859_6 (&OnigEncodingISO_8859_6) -#define ONIG_ENCODING_ISO_8859_7 (&OnigEncodingISO_8859_7) -#define ONIG_ENCODING_ISO_8859_8 (&OnigEncodingISO_8859_8) -#define ONIG_ENCODING_ISO_8859_9 (&OnigEncodingISO_8859_9) -#define ONIG_ENCODING_ISO_8859_10 (&OnigEncodingISO_8859_10) -#define ONIG_ENCODING_ISO_8859_11 (&OnigEncodingISO_8859_11) -#define ONIG_ENCODING_ISO_8859_13 (&OnigEncodingISO_8859_13) -#define ONIG_ENCODING_ISO_8859_14 (&OnigEncodingISO_8859_14) -#define ONIG_ENCODING_ISO_8859_15 (&OnigEncodingISO_8859_15) -#define ONIG_ENCODING_ISO_8859_16 (&OnigEncodingISO_8859_16) -#define ONIG_ENCODING_UTF8 (&OnigEncodingUTF8) -#define ONIG_ENCODING_UTF16_BE (&OnigEncodingUTF16_BE) -#define ONIG_ENCODING_UTF16_LE (&OnigEncodingUTF16_LE) -#define ONIG_ENCODING_UTF32_BE (&OnigEncodingUTF32_BE) -#define ONIG_ENCODING_UTF32_LE (&OnigEncodingUTF32_LE) -#define ONIG_ENCODING_EUC_JP (&OnigEncodingEUC_JP) -#define ONIG_ENCODING_EUC_TW (&OnigEncodingEUC_TW) -#define ONIG_ENCODING_EUC_KR (&OnigEncodingEUC_KR) -#define ONIG_ENCODING_EUC_CN (&OnigEncodingEUC_CN) -#define ONIG_ENCODING_SJIS (&OnigEncodingSJIS) -#define ONIG_ENCODING_KOI8 (&OnigEncodingKOI8) -#define ONIG_ENCODING_KOI8_R (&OnigEncodingKOI8_R) -#define ONIG_ENCODING_CP1251 (&OnigEncodingCP1251) -#define ONIG_ENCODING_BIG5 (&OnigEncodingBIG5) -#define ONIG_ENCODING_GB18030 (&OnigEncodingGB18030) - -#define ONIG_ENCODING_UNDEF ((OnigEncoding )0) - - -/* work size */ -#define ONIGENC_CODE_TO_MBC_MAXLEN 7 -#define ONIGENC_MBC_CASE_FOLD_MAXLEN 18 -/* 18: 6(max-byte) * 3(case-fold chars) */ - -/* character types */ -#define ONIGENC_CTYPE_NEWLINE 0 -#define ONIGENC_CTYPE_ALPHA 1 -#define ONIGENC_CTYPE_BLANK 2 -#define ONIGENC_CTYPE_CNTRL 3 -#define ONIGENC_CTYPE_DIGIT 4 -#define ONIGENC_CTYPE_GRAPH 5 -#define ONIGENC_CTYPE_LOWER 6 -#define ONIGENC_CTYPE_PRINT 7 -#define ONIGENC_CTYPE_PUNCT 8 -#define ONIGENC_CTYPE_SPACE 9 -#define ONIGENC_CTYPE_UPPER 10 -#define ONIGENC_CTYPE_XDIGIT 11 -#define ONIGENC_CTYPE_WORD 12 -#define ONIGENC_CTYPE_ALNUM 13 /* alpha || digit */ -#define ONIGENC_CTYPE_ASCII 14 -#define ONIGENC_MAX_STD_CTYPE ONIGENC_CTYPE_ASCII - - -#define onig_enc_len(enc,p,end) ONIGENC_MBC_ENC_LEN(enc,p) - -#define ONIGENC_IS_UNDEF(enc) ((enc) == ONIG_ENCODING_UNDEF) -#define ONIGENC_IS_SINGLEBYTE(enc) (ONIGENC_MBC_MAXLEN(enc) == 1) -#define ONIGENC_IS_MBC_HEAD(enc,p) (ONIGENC_MBC_ENC_LEN(enc,p) != 1) -#define ONIGENC_IS_MBC_ASCII(p) (*(p) < 128) -#define ONIGENC_IS_CODE_ASCII(code) ((code) < 128) -#define ONIGENC_IS_MBC_WORD(enc,s,end) \ - ONIGENC_IS_CODE_WORD(enc,ONIGENC_MBC_TO_CODE(enc,s,end)) - - -#define ONIGENC_NAME(enc) ((enc)->name) - -#define ONIGENC_MBC_CASE_FOLD(enc,flag,pp,end,buf) \ - (enc)->mbc_case_fold(flag,(const OnigUChar** )pp,end,buf) -#define ONIGENC_IS_ALLOWED_REVERSE_MATCH(enc,s,end) \ - (enc)->is_allowed_reverse_match(s,end) -#define ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc,start,s) \ - (enc)->left_adjust_char_head(start, s) -#define ONIGENC_APPLY_ALL_CASE_FOLD(enc,case_fold_flag,f,arg) \ - (enc)->apply_all_case_fold(case_fold_flag,f,arg) -#define ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc,case_fold_flag,p,end,acs) \ - (enc)->get_case_fold_codes_by_str(case_fold_flag,p,end,acs) -#define ONIGENC_STEP_BACK(enc,start,s,n) \ - onigenc_step_back((enc),(start),(s),(n)) - -#define ONIGENC_MBC_ENC_LEN(enc,p) (enc)->mbc_enc_len(p) -#define ONIGENC_MBC_MAXLEN(enc) ((enc)->max_enc_len) -#define ONIGENC_MBC_MAXLEN_DIST(enc) ONIGENC_MBC_MAXLEN(enc) -#define ONIGENC_MBC_MINLEN(enc) ((enc)->min_enc_len) -#define ONIGENC_IS_MBC_NEWLINE(enc,p,end) (enc)->is_mbc_newline((p),(end)) -#define ONIGENC_MBC_TO_CODE(enc,p,end) (enc)->mbc_to_code((p),(end)) -#define ONIGENC_CODE_TO_MBCLEN(enc,code) (enc)->code_to_mbclen(code) -#define ONIGENC_CODE_TO_MBC(enc,code,buf) (enc)->code_to_mbc(code,buf) -#define ONIGENC_PROPERTY_NAME_TO_CTYPE(enc,p,end) \ - (enc)->property_name_to_ctype(enc,p,end) - -#define ONIGENC_IS_CODE_CTYPE(enc,code,ctype) (enc)->is_code_ctype(code,ctype) - -#define ONIGENC_IS_CODE_NEWLINE(enc,code) \ - ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_NEWLINE) -#define ONIGENC_IS_CODE_GRAPH(enc,code) \ - ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_GRAPH) -#define ONIGENC_IS_CODE_PRINT(enc,code) \ - ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_PRINT) -#define ONIGENC_IS_CODE_ALNUM(enc,code) \ - ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_ALNUM) -#define ONIGENC_IS_CODE_ALPHA(enc,code) \ - ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_ALPHA) -#define ONIGENC_IS_CODE_LOWER(enc,code) \ - ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_LOWER) -#define ONIGENC_IS_CODE_UPPER(enc,code) \ - ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_UPPER) -#define ONIGENC_IS_CODE_CNTRL(enc,code) \ - ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_CNTRL) -#define ONIGENC_IS_CODE_PUNCT(enc,code) \ - ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_PUNCT) -#define ONIGENC_IS_CODE_SPACE(enc,code) \ - ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_SPACE) -#define ONIGENC_IS_CODE_BLANK(enc,code) \ - ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_BLANK) -#define ONIGENC_IS_CODE_DIGIT(enc,code) \ - ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_DIGIT) -#define ONIGENC_IS_CODE_XDIGIT(enc,code) \ - ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_XDIGIT) -#define ONIGENC_IS_CODE_WORD(enc,code) \ - ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_WORD) - -#define ONIGENC_GET_CTYPE_CODE_RANGE(enc,ctype,sbout,ranges) \ - (enc)->get_ctype_code_range(ctype,sbout,ranges) - -ONIG_EXTERN -OnigUChar* onigenc_step_back P_((OnigEncoding enc, const OnigUChar* start, const OnigUChar* s, int n)); - - -/* encoding API */ -ONIG_EXTERN -int onigenc_init P_((void)); -ONIG_EXTERN -int onigenc_set_default_encoding P_((OnigEncoding enc)); -ONIG_EXTERN -OnigEncoding onigenc_get_default_encoding P_((void)); -ONIG_EXTERN -void onigenc_set_default_caseconv_table P_((const OnigUChar* table)); -ONIG_EXTERN -OnigUChar* onigenc_get_right_adjust_char_head_with_prev P_((OnigEncoding enc, const OnigUChar* start, const OnigUChar* s, const OnigUChar** prev)); -ONIG_EXTERN -OnigUChar* onigenc_get_prev_char_head P_((OnigEncoding enc, const OnigUChar* start, const OnigUChar* s)); -ONIG_EXTERN -OnigUChar* onigenc_get_left_adjust_char_head P_((OnigEncoding enc, const OnigUChar* start, const OnigUChar* s)); -ONIG_EXTERN -OnigUChar* onigenc_get_right_adjust_char_head P_((OnigEncoding enc, const OnigUChar* start, const OnigUChar* s)); -ONIG_EXTERN -int onigenc_strlen P_((OnigEncoding enc, const OnigUChar* p, const OnigUChar* end)); -ONIG_EXTERN -int onigenc_strlen_null P_((OnigEncoding enc, const OnigUChar* p)); -ONIG_EXTERN -int onigenc_str_bytelen_null P_((OnigEncoding enc, const OnigUChar* p)); - - - -/* PART: regular expression */ - -/* config parameters */ -#define ONIG_NREGION 10 -#define ONIG_MAX_BACKREF_NUM 1000 -#define ONIG_MAX_REPEAT_NUM 100000 -#define ONIG_MAX_MULTI_BYTE_RANGES_NUM 10000 -/* constants */ -#define ONIG_MAX_ERROR_MESSAGE_LEN 90 - -typedef unsigned int OnigOptionType; - -#define ONIG_OPTION_DEFAULT ONIG_OPTION_NONE - -/* options */ -#define ONIG_OPTION_NONE 0U -#define ONIG_OPTION_IGNORECASE 1U -#define ONIG_OPTION_EXTEND (ONIG_OPTION_IGNORECASE << 1) -#define ONIG_OPTION_MULTILINE (ONIG_OPTION_EXTEND << 1) -#define ONIG_OPTION_SINGLELINE (ONIG_OPTION_MULTILINE << 1) -#define ONIG_OPTION_FIND_LONGEST (ONIG_OPTION_SINGLELINE << 1) -#define ONIG_OPTION_FIND_NOT_EMPTY (ONIG_OPTION_FIND_LONGEST << 1) -#define ONIG_OPTION_NEGATE_SINGLELINE (ONIG_OPTION_FIND_NOT_EMPTY << 1) -#define ONIG_OPTION_DONT_CAPTURE_GROUP (ONIG_OPTION_NEGATE_SINGLELINE << 1) -#define ONIG_OPTION_CAPTURE_GROUP (ONIG_OPTION_DONT_CAPTURE_GROUP << 1) -/* options (search time) */ -#define ONIG_OPTION_NOTBOL (ONIG_OPTION_CAPTURE_GROUP << 1) -#define ONIG_OPTION_NOTEOL (ONIG_OPTION_NOTBOL << 1) -#define ONIG_OPTION_POSIX_REGION (ONIG_OPTION_NOTEOL << 1) -#define ONIG_OPTION_MAXBIT ONIG_OPTION_POSIX_REGION /* limit */ - -#define ONIG_OPTION_ON(options,regopt) ((options) |= (regopt)) -#define ONIG_OPTION_OFF(options,regopt) ((options) &= ~(regopt)) -#define ONIG_IS_OPTION_ON(options,option) ((options) & (option)) - -/* syntax */ -typedef struct { - unsigned int op; - unsigned int op2; - unsigned int behavior; - OnigOptionType options; /* default option */ - OnigMetaCharTableType meta_char_table; -} OnigSyntaxType; - -ONIG_EXTERN OnigSyntaxType OnigSyntaxASIS; -ONIG_EXTERN OnigSyntaxType OnigSyntaxPosixBasic; -ONIG_EXTERN OnigSyntaxType OnigSyntaxPosixExtended; -ONIG_EXTERN OnigSyntaxType OnigSyntaxEmacs; -ONIG_EXTERN OnigSyntaxType OnigSyntaxGrep; -ONIG_EXTERN OnigSyntaxType OnigSyntaxGnuRegex; -ONIG_EXTERN OnigSyntaxType OnigSyntaxJava; -ONIG_EXTERN OnigSyntaxType OnigSyntaxPerl; -ONIG_EXTERN OnigSyntaxType OnigSyntaxPerl_NG; -ONIG_EXTERN OnigSyntaxType OnigSyntaxRuby; - -/* predefined syntaxes (see regsyntax.c) */ -#define ONIG_SYNTAX_ASIS (&OnigSyntaxASIS) -#define ONIG_SYNTAX_POSIX_BASIC (&OnigSyntaxPosixBasic) -#define ONIG_SYNTAX_POSIX_EXTENDED (&OnigSyntaxPosixExtended) -#define ONIG_SYNTAX_EMACS (&OnigSyntaxEmacs) -#define ONIG_SYNTAX_GREP (&OnigSyntaxGrep) -#define ONIG_SYNTAX_GNU_REGEX (&OnigSyntaxGnuRegex) -#define ONIG_SYNTAX_JAVA (&OnigSyntaxJava) -#define ONIG_SYNTAX_PERL (&OnigSyntaxPerl) -#define ONIG_SYNTAX_PERL_NG (&OnigSyntaxPerl_NG) -#define ONIG_SYNTAX_RUBY (&OnigSyntaxRuby) - -/* default syntax */ -ONIG_EXTERN OnigSyntaxType* OnigDefaultSyntax; -#define ONIG_SYNTAX_DEFAULT OnigDefaultSyntax - -/* syntax (operators) */ -#define ONIG_SYN_OP_VARIABLE_META_CHARACTERS (1U<<0) -#define ONIG_SYN_OP_DOT_ANYCHAR (1U<<1) /* . */ -#define ONIG_SYN_OP_ASTERISK_ZERO_INF (1U<<2) /* * */ -#define ONIG_SYN_OP_ESC_ASTERISK_ZERO_INF (1U<<3) -#define ONIG_SYN_OP_PLUS_ONE_INF (1U<<4) /* + */ -#define ONIG_SYN_OP_ESC_PLUS_ONE_INF (1U<<5) -#define ONIG_SYN_OP_QMARK_ZERO_ONE (1U<<6) /* ? */ -#define ONIG_SYN_OP_ESC_QMARK_ZERO_ONE (1U<<7) -#define ONIG_SYN_OP_BRACE_INTERVAL (1U<<8) /* {lower,upper} */ -#define ONIG_SYN_OP_ESC_BRACE_INTERVAL (1U<<9) /* \{lower,upper\} */ -#define ONIG_SYN_OP_VBAR_ALT (1U<<10) /* | */ -#define ONIG_SYN_OP_ESC_VBAR_ALT (1U<<11) /* \| */ -#define ONIG_SYN_OP_LPAREN_SUBEXP (1U<<12) /* (...) */ -#define ONIG_SYN_OP_ESC_LPAREN_SUBEXP (1U<<13) /* \(...\) */ -#define ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR (1U<<14) /* \A, \Z, \z */ -#define ONIG_SYN_OP_ESC_CAPITAL_G_BEGIN_ANCHOR (1U<<15) /* \G */ -#define ONIG_SYN_OP_DECIMAL_BACKREF (1U<<16) /* \num */ -#define ONIG_SYN_OP_BRACKET_CC (1U<<17) /* [...] */ -#define ONIG_SYN_OP_ESC_W_WORD (1U<<18) /* \w, \W */ -#define ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END (1U<<19) /* \<. \> */ -#define ONIG_SYN_OP_ESC_B_WORD_BOUND (1U<<20) /* \b, \B */ -#define ONIG_SYN_OP_ESC_S_WHITE_SPACE (1U<<21) /* \s, \S */ -#define ONIG_SYN_OP_ESC_D_DIGIT (1U<<22) /* \d, \D */ -#define ONIG_SYN_OP_LINE_ANCHOR (1U<<23) /* ^, $ */ -#define ONIG_SYN_OP_POSIX_BRACKET (1U<<24) /* [:xxxx:] */ -#define ONIG_SYN_OP_QMARK_NON_GREEDY (1U<<25) /* ??,*?,+?,{n,m}? */ -#define ONIG_SYN_OP_ESC_CONTROL_CHARS (1U<<26) /* \n,\r,\t,\a ... */ -#define ONIG_SYN_OP_ESC_C_CONTROL (1U<<27) /* \cx */ -#define ONIG_SYN_OP_ESC_OCTAL3 (1U<<28) /* \OOO */ -#define ONIG_SYN_OP_ESC_X_HEX2 (1U<<29) /* \xHH */ -#define ONIG_SYN_OP_ESC_X_BRACE_HEX8 (1U<<30) /* \x{7HHHHHHH} */ - -#define ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE (1U<<0) /* \Q...\E */ -#define ONIG_SYN_OP2_QMARK_GROUP_EFFECT (1U<<1) /* (?...) */ -#define ONIG_SYN_OP2_OPTION_PERL (1U<<2) /* (?imsx),(?-imsx) */ -#define ONIG_SYN_OP2_OPTION_RUBY (1U<<3) /* (?imx), (?-imx) */ -#define ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT (1U<<4) /* ?+,*+,++ */ -#define ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL (1U<<5) /* {n,m}+ */ -#define ONIG_SYN_OP2_CCLASS_SET_OP (1U<<6) /* [...&&..[..]..] */ -#define ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP (1U<<7) /* (?...) */ -#define ONIG_SYN_OP2_ESC_K_NAMED_BACKREF (1U<<8) /* \k */ -#define ONIG_SYN_OP2_ESC_G_SUBEXP_CALL (1U<<9) /* \g, \g */ -#define ONIG_SYN_OP2_ATMARK_CAPTURE_HISTORY (1U<<10) /* (?@..),(?@..) */ -#define ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL (1U<<11) /* \C-x */ -#define ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META (1U<<12) /* \M-x */ -#define ONIG_SYN_OP2_ESC_V_VTAB (1U<<13) /* \v as VTAB */ -#define ONIG_SYN_OP2_ESC_U_HEX4 (1U<<14) /* \uHHHH */ -#define ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR (1U<<15) /* \`, \' */ -#define ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY (1U<<16) /* \p{...}, \P{...} */ -#define ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT (1U<<17) /* \p{^..}, \P{^..} */ -/* #define ONIG_SYN_OP2_CHAR_PROPERTY_PREFIX_IS (1U<<18) */ -#define ONIG_SYN_OP2_ESC_H_XDIGIT (1U<<19) /* \h, \H */ -#define ONIG_SYN_OP2_INEFFECTIVE_ESCAPE (1U<<20) /* \ */ - -/* syntax (behavior) */ -#define ONIG_SYN_CONTEXT_INDEP_ANCHORS (1U<<31) /* not implemented */ -#define ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS (1U<<0) /* ?, *, +, {n,m} */ -#define ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS (1U<<1) /* error or ignore */ -#define ONIG_SYN_ALLOW_UNMATCHED_CLOSE_SUBEXP (1U<<2) /* ...)... */ -#define ONIG_SYN_ALLOW_INVALID_INTERVAL (1U<<3) /* {??? */ -#define ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV (1U<<4) /* {,n} => {0,n} */ -#define ONIG_SYN_STRICT_CHECK_BACKREF (1U<<5) /* /(\1)/,/\1()/ ..*/ -#define ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND (1U<<6) /* (?<=a|bc) */ -#define ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP (1U<<7) /* see doc/RE */ -#define ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME (1U<<8) /* (?)(?) */ -#define ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY (1U<<9) /* a{n}?=(?:a{n})? */ - -/* syntax (behavior) in char class [...] */ -#define ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC (1U<<20) /* [^...] */ -#define ONIG_SYN_BACKSLASH_ESCAPE_IN_CC (1U<<21) /* [..\w..] etc.. */ -#define ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC (1U<<22) -#define ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC (1U<<23) /* [0-9-a]=[0-9\-a] */ -/* syntax (behavior) warning */ -#define ONIG_SYN_WARN_CC_OP_NOT_ESCAPED (1U<<24) /* [,-,] */ -#define ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT (1U<<25) /* (?:a*)+ */ - -/* meta character specifiers (onig_set_meta_char()) */ -#define ONIG_META_CHAR_ESCAPE 0 -#define ONIG_META_CHAR_ANYCHAR 1 -#define ONIG_META_CHAR_ANYTIME 2 -#define ONIG_META_CHAR_ZERO_OR_ONE_TIME 3 -#define ONIG_META_CHAR_ONE_OR_MORE_TIME 4 -#define ONIG_META_CHAR_ANYCHAR_ANYTIME 5 - -#define ONIG_INEFFECTIVE_META_CHAR 0 - -/* error codes */ -#define ONIG_IS_PATTERN_ERROR(ecode) ((ecode) <= -100 && (ecode) > -1000) -/* normal return */ -#define ONIG_NORMAL 0 -#define ONIG_MISMATCH -1 -#define ONIG_NO_SUPPORT_CONFIG -2 - -/* internal error */ -#define ONIGERR_MEMORY -5 -#define ONIGERR_TYPE_BUG -6 -#define ONIGERR_PARSER_BUG -11 -#define ONIGERR_STACK_BUG -12 -#define ONIGERR_UNDEFINED_BYTECODE -13 -#define ONIGERR_UNEXPECTED_BYTECODE -14 -#define ONIGERR_MATCH_STACK_LIMIT_OVER -15 -#define ONIGERR_DEFAULT_ENCODING_IS_NOT_SETTED -21 -#define ONIGERR_SPECIFIED_ENCODING_CANT_CONVERT_TO_WIDE_CHAR -22 -/* general error */ -#define ONIGERR_INVALID_ARGUMENT -30 -/* syntax error */ -#define ONIGERR_END_PATTERN_AT_LEFT_BRACE -100 -#define ONIGERR_END_PATTERN_AT_LEFT_BRACKET -101 -#define ONIGERR_EMPTY_CHAR_CLASS -102 -#define ONIGERR_PREMATURE_END_OF_CHAR_CLASS -103 -#define ONIGERR_END_PATTERN_AT_ESCAPE -104 -#define ONIGERR_END_PATTERN_AT_META -105 -#define ONIGERR_END_PATTERN_AT_CONTROL -106 -#define ONIGERR_META_CODE_SYNTAX -108 -#define ONIGERR_CONTROL_CODE_SYNTAX -109 -#define ONIGERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE -110 -#define ONIGERR_CHAR_CLASS_VALUE_AT_START_OF_RANGE -111 -#define ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS -112 -#define ONIGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED -113 -#define ONIGERR_TARGET_OF_REPEAT_OPERATOR_INVALID -114 -#define ONIGERR_NESTED_REPEAT_OPERATOR -115 -#define ONIGERR_UNMATCHED_CLOSE_PARENTHESIS -116 -#define ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS -117 -#define ONIGERR_END_PATTERN_IN_GROUP -118 -#define ONIGERR_UNDEFINED_GROUP_OPTION -119 -#define ONIGERR_INVALID_POSIX_BRACKET_TYPE -121 -#define ONIGERR_INVALID_LOOK_BEHIND_PATTERN -122 -#define ONIGERR_INVALID_REPEAT_RANGE_PATTERN -123 -/* values error (syntax error) */ -#define ONIGERR_TOO_BIG_NUMBER -200 -#define ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE -201 -#define ONIGERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE -202 -#define ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS -203 -#define ONIGERR_MISMATCH_CODE_LENGTH_IN_CLASS_RANGE -204 -#define ONIGERR_TOO_MANY_MULTI_BYTE_RANGES -205 -#define ONIGERR_TOO_SHORT_MULTI_BYTE_STRING -206 -#define ONIGERR_TOO_BIG_BACKREF_NUMBER -207 -#define ONIGERR_INVALID_BACKREF -208 -#define ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED -209 -#define ONIGERR_TOO_LONG_WIDE_CHAR_VALUE -212 -#define ONIGERR_EMPTY_GROUP_NAME -214 -#define ONIGERR_INVALID_GROUP_NAME -215 -#define ONIGERR_INVALID_CHAR_IN_GROUP_NAME -216 -#define ONIGERR_UNDEFINED_NAME_REFERENCE -217 -#define ONIGERR_UNDEFINED_GROUP_REFERENCE -218 -#define ONIGERR_MULTIPLEX_DEFINED_NAME -219 -#define ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL -220 -#define ONIGERR_NEVER_ENDING_RECURSION -221 -#define ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY -222 -#define ONIGERR_INVALID_CHAR_PROPERTY_NAME -223 -#define ONIGERR_INVALID_CODE_POINT_VALUE -400 -#define ONIGERR_INVALID_WIDE_CHAR_VALUE -400 -#define ONIGERR_TOO_BIG_WIDE_CHAR_VALUE -401 -#define ONIGERR_NOT_SUPPORTED_ENCODING_COMBINATION -402 -#define ONIGERR_INVALID_COMBINATION_OF_OPTIONS -403 - -/* errors related to thread */ -#define ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT -1001 - - -/* must be smaller than BIT_STATUS_BITS_NUM (unsigned int * 8) */ -#define ONIG_MAX_CAPTURE_HISTORY_GROUP 31 -#define ONIG_IS_CAPTURE_HISTORY_GROUP(r, i) \ - ((i) <= ONIG_MAX_CAPTURE_HISTORY_GROUP && (r)->list && (r)->list[i]) - -typedef struct OnigCaptureTreeNodeStruct { - int group; /* group number */ - int beg; - int end; - int allocated; - int num_childs; - struct OnigCaptureTreeNodeStruct** childs; -} OnigCaptureTreeNode; - -/* match result region type */ -struct re_registers { - int allocated; - int num_regs; - int* beg; - int* end; - /* extended */ - OnigCaptureTreeNode* history_root; /* capture history tree root */ -}; - -/* capture tree traverse */ -#define ONIG_TRAVERSE_CALLBACK_AT_FIRST 1 -#define ONIG_TRAVERSE_CALLBACK_AT_LAST 2 -#define ONIG_TRAVERSE_CALLBACK_AT_BOTH \ - ( ONIG_TRAVERSE_CALLBACK_AT_FIRST | ONIG_TRAVERSE_CALLBACK_AT_LAST ) - - -#define ONIG_REGION_NOTPOS -1 - -typedef struct re_registers OnigRegion; - -typedef struct { - OnigEncoding enc; - OnigUChar* par; - OnigUChar* par_end; -} OnigErrorInfo; - -typedef struct { - int lower; - int upper; -} OnigRepeatRange; - -typedef void (*OnigWarnFunc) P_((const char* s)); -extern void onig_null_warn P_((const char* s)); -#define ONIG_NULL_WARN onig_null_warn - -#define ONIG_CHAR_TABLE_SIZE 256 - -/* regex_t state */ -#define ONIG_STATE_NORMAL 0 -#define ONIG_STATE_SEARCHING 1 -#define ONIG_STATE_COMPILING -1 -#define ONIG_STATE_MODIFY -2 - -#define ONIG_STATE(reg) \ - ((reg)->state > 0 ? ONIG_STATE_SEARCHING : (reg)->state) - -typedef struct re_pattern_buffer { - /* common members of BBuf(bytes-buffer) */ - unsigned char* p; /* compiled pattern */ - unsigned int used; /* used space for p */ - unsigned int alloc; /* allocated space for p */ - - int state; /* normal, searching, compiling */ - int num_mem; /* used memory(...) num counted from 1 */ - int num_repeat; /* OP_REPEAT/OP_REPEAT_NG id-counter */ - int num_null_check; /* OP_NULL_CHECK_START/END id counter */ - int num_comb_exp_check; /* combination explosion check */ - int num_call; /* number of subexp call */ - unsigned int capture_history; /* (?@...) flag (1-31) */ - unsigned int bt_mem_start; /* need backtrack flag */ - unsigned int bt_mem_end; /* need backtrack flag */ - int stack_pop_level; - int repeat_range_alloc; - OnigRepeatRange* repeat_range; - - OnigEncoding enc; - OnigOptionType options; - OnigSyntaxType* syntax; - OnigCaseFoldType case_fold_flag; - void* name_table; - - /* optimization info (string search, char-map and anchors) */ - int optimize; /* optimize flag */ - int threshold_len; /* search str-length for apply optimize */ - int anchor; /* BEGIN_BUF, BEGIN_POS, (SEMI_)END_BUF */ - OnigDistance anchor_dmin; /* (SEMI_)END_BUF anchor distance */ - OnigDistance anchor_dmax; /* (SEMI_)END_BUF anchor distance */ - int sub_anchor; /* start-anchor for exact or map */ - unsigned char *exact; - unsigned char *exact_end; - unsigned char map[ONIG_CHAR_TABLE_SIZE]; /* used as BM skip or char-map */ - int *int_map; /* BM skip for exact_len > 255 */ - int *int_map_backward; /* BM skip for backward search */ - OnigDistance dmin; /* min-distance of exact or map */ - OnigDistance dmax; /* max-distance of exact or map */ - - /* regex_t link chain */ - struct re_pattern_buffer* chain; /* escape compile-conflict */ -} OnigRegexType; - -typedef OnigRegexType* OnigRegex; - -#ifndef ONIG_ESCAPE_REGEX_T_COLLISION - typedef OnigRegexType regex_t; -#endif - - -typedef struct { - int num_of_elements; - OnigEncoding pattern_enc; - OnigEncoding target_enc; - OnigSyntaxType* syntax; - OnigOptionType option; - OnigCaseFoldType case_fold_flag; -} OnigCompileInfo; - -/* Oniguruma Native API */ -ONIG_EXTERN -int onig_init P_((void)); -ONIG_EXTERN -int onig_error_code_to_str PV_((OnigUChar* s, int err_code, ...)); -ONIG_EXTERN -void onig_set_warn_func P_((OnigWarnFunc f)); -ONIG_EXTERN -void onig_set_verb_warn_func P_((OnigWarnFunc f)); -ONIG_EXTERN -int onig_new P_((OnigRegex*, const OnigUChar* pattern, const OnigUChar* pattern_end, OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax, OnigErrorInfo* einfo)); -ONIG_EXTERN -int onig_reg_init P_((regex_t* reg, OnigOptionType option, OnigCaseFoldType case_fold_flag, OnigEncoding enc, OnigSyntaxType* syntax)); -int onig_new_without_alloc P_((OnigRegex, const OnigUChar* pattern, const OnigUChar* pattern_end, OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax, OnigErrorInfo* einfo)); -ONIG_EXTERN -int onig_new_deluxe P_((OnigRegex* reg, const OnigUChar* pattern, const OnigUChar* pattern_end, OnigCompileInfo* ci, OnigErrorInfo* einfo)); -ONIG_EXTERN -void onig_free P_((OnigRegex)); -ONIG_EXTERN -void onig_free_body P_((OnigRegex)); -ONIG_EXTERN -int onig_recompile P_((OnigRegex, const OnigUChar* pattern, const OnigUChar* pattern_end, OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax, OnigErrorInfo* einfo)); -ONIG_EXTERN -int onig_recompile_deluxe P_((OnigRegex reg, const OnigUChar* pattern, const OnigUChar* pattern_end, OnigCompileInfo* ci, OnigErrorInfo* einfo)); -ONIG_EXTERN -int onig_search P_((OnigRegex, const OnigUChar* str, const OnigUChar* end, const OnigUChar* start, const OnigUChar* range, OnigRegion* region, OnigOptionType option)); -ONIG_EXTERN -int onig_match P_((OnigRegex, const OnigUChar* str, const OnigUChar* end, const OnigUChar* at, OnigRegion* region, OnigOptionType option)); -ONIG_EXTERN -OnigRegion* onig_region_new P_((void)); -ONIG_EXTERN -void onig_region_init P_((OnigRegion* region)); -ONIG_EXTERN -void onig_region_free P_((OnigRegion* region, int free_self)); -ONIG_EXTERN -void onig_region_copy P_((OnigRegion* to, OnigRegion* from)); -ONIG_EXTERN -void onig_region_clear P_((OnigRegion* region)); -ONIG_EXTERN -int onig_region_resize P_((OnigRegion* region, int n)); -ONIG_EXTERN -int onig_region_set P_((OnigRegion* region, int at, int beg, int end)); -ONIG_EXTERN -int onig_name_to_group_numbers P_((OnigRegex reg, const OnigUChar* name, const OnigUChar* name_end, int** nums)); -ONIG_EXTERN -int onig_name_to_backref_number P_((OnigRegex reg, const OnigUChar* name, const OnigUChar* name_end, OnigRegion *region)); -ONIG_EXTERN -int onig_foreach_name P_((OnigRegex reg, int (*func)(const OnigUChar*, const OnigUChar*,int,int*,OnigRegex,void*), void* arg)); -ONIG_EXTERN -int onig_number_of_names P_((OnigRegex reg)); -ONIG_EXTERN -int onig_number_of_captures P_((OnigRegex reg)); -ONIG_EXTERN -int onig_number_of_capture_histories P_((OnigRegex reg)); -ONIG_EXTERN -OnigCaptureTreeNode* onig_get_capture_tree P_((OnigRegion* region)); -ONIG_EXTERN -int onig_capture_tree_traverse P_((OnigRegion* region, int at, int(*callback_func)(int,int,int,int,int,void*), void* arg)); -ONIG_EXTERN -int onig_noname_group_capture_is_active P_((OnigRegex reg)); -ONIG_EXTERN -OnigEncoding onig_get_encoding P_((OnigRegex reg)); -ONIG_EXTERN -OnigOptionType onig_get_options P_((OnigRegex reg)); -ONIG_EXTERN -OnigCaseFoldType onig_get_case_fold_flag P_((OnigRegex reg)); -ONIG_EXTERN -OnigSyntaxType* onig_get_syntax P_((OnigRegex reg)); -ONIG_EXTERN -int onig_set_default_syntax P_((OnigSyntaxType* syntax)); -ONIG_EXTERN -void onig_copy_syntax P_((OnigSyntaxType* to, OnigSyntaxType* from)); -ONIG_EXTERN -unsigned int onig_get_syntax_op P_((OnigSyntaxType* syntax)); -ONIG_EXTERN -unsigned int onig_get_syntax_op2 P_((OnigSyntaxType* syntax)); -ONIG_EXTERN -unsigned int onig_get_syntax_behavior P_((OnigSyntaxType* syntax)); -ONIG_EXTERN -OnigOptionType onig_get_syntax_options P_((OnigSyntaxType* syntax)); -ONIG_EXTERN -void onig_set_syntax_op P_((OnigSyntaxType* syntax, unsigned int op)); -ONIG_EXTERN -void onig_set_syntax_op2 P_((OnigSyntaxType* syntax, unsigned int op2)); -ONIG_EXTERN -void onig_set_syntax_behavior P_((OnigSyntaxType* syntax, unsigned int behavior)); -ONIG_EXTERN -void onig_set_syntax_options P_((OnigSyntaxType* syntax, OnigOptionType options)); -ONIG_EXTERN -int onig_set_meta_char P_((OnigSyntaxType* syntax, unsigned int what, OnigCodePoint code)); -ONIG_EXTERN -void onig_copy_encoding P_((OnigEncoding to, OnigEncoding from)); -ONIG_EXTERN -OnigCaseFoldType onig_get_default_case_fold_flag P_((void)); -ONIG_EXTERN -int onig_set_default_case_fold_flag P_((OnigCaseFoldType case_fold_flag)); -ONIG_EXTERN -unsigned int onig_get_match_stack_limit_size P_((void)); -ONIG_EXTERN -int onig_set_match_stack_limit_size P_((unsigned int size)); -ONIG_EXTERN -int onig_end P_((void)); -ONIG_EXTERN -const char* onig_version P_((void)); -ONIG_EXTERN -const char* onig_copyright P_((void)); - -#ifdef __cplusplus -} -#endif - -#endif /* ONIGURUMA_H */ diff --git a/MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/regcomp.c b/MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/regcomp.c deleted file mode 100644 index 6dc6c28772..0000000000 --- a/MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/regcomp.c +++ /dev/null @@ -1,6291 +0,0 @@ -/********************************************************************** - regcomp.c - Oniguruma (regular expression library) -**********************************************************************/ -/*- - * Copyright (c) 2002-2013 K.Kosako - * All rights reserved. - * - * Copyright (c) 2015, Hewlett Packard Enterprise Development, L.P.
- * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include "regparse.h" - -OnigCaseFoldType OnigDefaultCaseFoldFlag = ONIGENC_CASE_FOLD_MIN; - -extern OnigCaseFoldType -onig_get_default_case_fold_flag(void) -{ - return OnigDefaultCaseFoldFlag; -} - -extern int -onig_set_default_case_fold_flag(OnigCaseFoldType case_fold_flag) -{ - OnigDefaultCaseFoldFlag = case_fold_flag; - return 0; -} - - -#ifndef PLATFORM_UNALIGNED_WORD_ACCESS -static unsigned char PadBuf[WORD_ALIGNMENT_SIZE]; -#endif - -static UChar* -str_dup(UChar* s, UChar* end) -{ - int len = (int)(end - s); - - if (len > 0) { - UChar* r = (UChar* )xmalloc(len + 1); - CHECK_NULL_RETURN(r); - xmemcpy(r, s, len); - r[len] = (UChar )0; - return r; - } - else return NULL; -} - -static void -swap_node(Node* a, Node* b) -{ - Node c; - c = *a; *a = *b; *b = c; - - if (NTYPE(a) == NT_STR) { - StrNode* sn = NSTR(a); - if (sn->capa == 0) { - int len = (int)(sn->end - sn->s); - sn->s = sn->buf; - sn->end = sn->s + len; - } - } - - if (NTYPE(b) == NT_STR) { - StrNode* sn = NSTR(b); - if (sn->capa == 0) { - int len = (int)(sn->end - sn->s); - sn->s = sn->buf; - sn->end = sn->s + len; - } - } -} - -static OnigDistance -distance_add(OnigDistance d1, OnigDistance d2) -{ - if (d1 == ONIG_INFINITE_DISTANCE || d2 == ONIG_INFINITE_DISTANCE) - return ONIG_INFINITE_DISTANCE; - else { - if (d1 <= ONIG_INFINITE_DISTANCE - d2) return d1 + d2; - else return ONIG_INFINITE_DISTANCE; - } -} - -static OnigDistance -distance_multiply(OnigDistance d, int m) -{ - if (m == 0) return 0; - - if (d < ONIG_INFINITE_DISTANCE / m) - return d * m; - else - return ONIG_INFINITE_DISTANCE; -} - -static int -bitset_is_empty(BitSetRef bs) -{ - int i; - for (i = 0; i < (int )BITSET_SIZE; i++) { - if (bs[i] != 0) return 0; - } - return 1; -} - -#ifdef ONIG_DEBUG -static int -bitset_on_num(BitSetRef bs) -{ - int i, n; - - n = 0; - for (i = 0; i < SINGLE_BYTE_SIZE; i++) { - if (BITSET_AT(bs, i)) n++; - } - return n; -} -#endif - -extern int -onig_bbuf_init(BBuf* buf, int size) -{ - if (size <= 0) { - size = 0; - buf->p = NULL; - } - else { - buf->p = (UChar* )xmalloc(size); - if (IS_NULL(buf->p)) return(ONIGERR_MEMORY); - } - - buf->alloc = size; - buf->used = 0; - return 0; -} - - -#ifdef USE_SUBEXP_CALL - -static int -unset_addr_list_init(UnsetAddrList* uslist, int size) -{ - UnsetAddr* p; - - p = (UnsetAddr* )xmalloc(sizeof(UnsetAddr)* size); - CHECK_NULL_RETURN_MEMERR(p); - uslist->num = 0; - uslist->alloc = size; - uslist->us = p; - return 0; -} - -static void -unset_addr_list_end(UnsetAddrList* uslist) -{ - if (IS_NOT_NULL(uslist->us)) - xfree(uslist->us); -} - -static int -unset_addr_list_add(UnsetAddrList* uslist, int offset, struct _Node* node) -{ - UnsetAddr* p; - int size; - - if (uslist->num >= uslist->alloc) { - size = uslist->alloc * 2; - p = (UnsetAddr* )xrealloc(uslist->us, sizeof(UnsetAddr) * size, sizeof(UnsetAddr) * uslist->alloc); - CHECK_NULL_RETURN_MEMERR(p); - uslist->alloc = size; - uslist->us = p; - } - - uslist->us[uslist->num].offset = offset; - uslist->us[uslist->num].target = node; - uslist->num++; - return 0; -} -#endif /* USE_SUBEXP_CALL */ - - -static int -add_opcode(regex_t* reg, int opcode) -{ - BBUF_ADD1(reg, ((unsigned char)opcode)); - return 0; -} - -#ifdef USE_COMBINATION_EXPLOSION_CHECK -static int -add_state_check_num(regex_t* reg, int num) -{ - StateCheckNumType n = (StateCheckNumType )num; - - BBUF_ADD(reg, &n, SIZE_STATE_CHECK_NUM); - return 0; -} -#endif - -static int -add_rel_addr(regex_t* reg, int addr) -{ - RelAddrType ra = (RelAddrType )addr; - - BBUF_ADD(reg, &ra, SIZE_RELADDR); - return 0; -} - -static int -add_abs_addr(regex_t* reg, int addr) -{ - AbsAddrType ra = (AbsAddrType )addr; - - BBUF_ADD(reg, &ra, SIZE_ABSADDR); - return 0; -} - -static int -add_length(regex_t* reg, int len) -{ - LengthType l = (LengthType )len; - - BBUF_ADD(reg, &l, SIZE_LENGTH); - return 0; -} - -static int -add_mem_num(regex_t* reg, int num) -{ - MemNumType n = (MemNumType )num; - - BBUF_ADD(reg, &n, SIZE_MEMNUM); - return 0; -} - -static int -add_pointer(regex_t* reg, void* addr) -{ - PointerType ptr = (PointerType )addr; - - BBUF_ADD(reg, &ptr, SIZE_POINTER); - return 0; -} - -static int -add_option(regex_t* reg, OnigOptionType option) -{ - BBUF_ADD(reg, &option, SIZE_OPTION); - return 0; -} - -static int -add_opcode_rel_addr(regex_t* reg, int opcode, int addr) -{ - int r; - - r = add_opcode(reg, opcode); - if (r) return r; - r = add_rel_addr(reg, addr); - return r; -} - -static int -add_bytes(regex_t* reg, UChar* bytes, int len) -{ - BBUF_ADD(reg, bytes, len); - return 0; -} - -static int -add_bitset(regex_t* reg, BitSetRef bs) -{ - BBUF_ADD(reg, bs, SIZE_BITSET); - return 0; -} - -static int -add_opcode_option(regex_t* reg, int opcode, OnigOptionType option) -{ - int r; - - r = add_opcode(reg, opcode); - if (r) return r; - r = add_option(reg, option); - return r; -} - -static int compile_length_tree(Node* node, regex_t* reg); -static int compile_tree(Node* node, regex_t* reg); - - -#define IS_NEED_STR_LEN_OP_EXACT(op) \ - ((op) == OP_EXACTN || (op) == OP_EXACTMB2N ||\ - (op) == OP_EXACTMB3N || (op) == OP_EXACTMBN || (op) == OP_EXACTN_IC) - -static int -select_str_opcode(int mb_len, int str_len, int ignore_case) -{ - int op; - - if (ignore_case) { - switch (str_len) { - case 1: op = OP_EXACT1_IC; break; - default: op = OP_EXACTN_IC; break; - } - } - else { - switch (mb_len) { - case 1: - switch (str_len) { - case 1: op = OP_EXACT1; break; - case 2: op = OP_EXACT2; break; - case 3: op = OP_EXACT3; break; - case 4: op = OP_EXACT4; break; - case 5: op = OP_EXACT5; break; - default: op = OP_EXACTN; break; - } - break; - - case 2: - switch (str_len) { - case 1: op = OP_EXACTMB2N1; break; - case 2: op = OP_EXACTMB2N2; break; - case 3: op = OP_EXACTMB2N3; break; - default: op = OP_EXACTMB2N; break; - } - break; - - case 3: - op = OP_EXACTMB3N; - break; - - default: - op = OP_EXACTMBN; - break; - } - } - return op; -} - -static int -compile_tree_empty_check(Node* node, regex_t* reg, int empty_info) -{ - int r; - int saved_num_null_check = reg->num_null_check; - - if (empty_info != 0) { - r = add_opcode(reg, OP_NULL_CHECK_START); - if (r) return r; - r = add_mem_num(reg, reg->num_null_check); /* NULL CHECK ID */ - if (r) return r; - reg->num_null_check++; - } - - r = compile_tree(node, reg); - if (r) return r; - - if (empty_info != 0) { - if (empty_info == NQ_TARGET_IS_EMPTY) - r = add_opcode(reg, OP_NULL_CHECK_END); - else if (empty_info == NQ_TARGET_IS_EMPTY_MEM) - r = add_opcode(reg, OP_NULL_CHECK_END_MEMST); - else if (empty_info == NQ_TARGET_IS_EMPTY_REC) - r = add_opcode(reg, OP_NULL_CHECK_END_MEMST_PUSH); - - if (r) return r; - r = add_mem_num(reg, saved_num_null_check); /* NULL CHECK ID */ - } - return r; -} - -#ifdef USE_SUBEXP_CALL -static int -compile_call(CallNode* node, regex_t* reg) -{ - int r; - - r = add_opcode(reg, OP_CALL); - if (r) return r; - r = unset_addr_list_add(node->unset_addr_list, BBUF_GET_OFFSET_POS(reg), - node->target); - if (r) return r; - r = add_abs_addr(reg, 0 /*dummy addr.*/); - return r; -} -#endif - -static int -compile_tree_n_times(Node* node, int n, regex_t* reg) -{ - int i, r; - - for (i = 0; i < n; i++) { - r = compile_tree(node, reg); - if (r) return r; - } - return 0; -} - -static int -add_compile_string_length(UChar* s ARG_UNUSED, int mb_len, int str_len, - regex_t* reg ARG_UNUSED, int ignore_case) -{ - int len; - int op = select_str_opcode(mb_len, str_len, ignore_case); - - len = SIZE_OPCODE; - - if (op == OP_EXACTMBN) len += SIZE_LENGTH; - if (IS_NEED_STR_LEN_OP_EXACT(op)) - len += SIZE_LENGTH; - - len += mb_len * str_len; - return len; -} - -static int -add_compile_string(UChar* s, int mb_len, int str_len, - regex_t* reg, int ignore_case) -{ - int op = select_str_opcode(mb_len, str_len, ignore_case); - add_opcode(reg, op); - - if (op == OP_EXACTMBN) - add_length(reg, mb_len); - - if (IS_NEED_STR_LEN_OP_EXACT(op)) { - if (op == OP_EXACTN_IC) - add_length(reg, mb_len * str_len); - else - add_length(reg, str_len); - } - - add_bytes(reg, s, mb_len * str_len); - return 0; -} - - -static int -compile_length_string_node(Node* node, regex_t* reg) -{ - int rlen, r, len, prev_len, slen, ambig; - OnigEncoding enc = reg->enc; - UChar *p, *prev; - StrNode* sn; - - sn = NSTR(node); - if (sn->end <= sn->s) - return 0; - - ambig = NSTRING_IS_AMBIG(node); - - p = prev = sn->s; - prev_len = enclen(enc, p); - p += prev_len; - slen = 1; - rlen = 0; - - for (; p < sn->end; ) { - len = enclen(enc, p); - if (len == prev_len) { - slen++; - } - else { - r = add_compile_string_length(prev, prev_len, slen, reg, ambig); - rlen += r; - prev = p; - slen = 1; - prev_len = len; - } - p += len; - } - r = add_compile_string_length(prev, prev_len, slen, reg, ambig); - rlen += r; - return rlen; -} - -static int -compile_length_string_raw_node(StrNode* sn, regex_t* reg) -{ - if (sn->end <= sn->s) - return 0; - - return add_compile_string_length(sn->s, 1 /* sb */, (int)(sn->end - sn->s), reg, 0); -} - -static int -compile_string_node(Node* node, regex_t* reg) -{ - int r, len, prev_len, slen, ambig; - OnigEncoding enc = reg->enc; - UChar *p, *prev, *end; - StrNode* sn; - - sn = NSTR(node); - if (sn->end <= sn->s) - return 0; - - end = sn->end; - ambig = NSTRING_IS_AMBIG(node); - - p = prev = sn->s; - prev_len = enclen(enc, p); - p += prev_len; - slen = 1; - - for (; p < end; ) { - len = enclen(enc, p); - if (len == prev_len) { - slen++; - } - else { - r = add_compile_string(prev, prev_len, slen, reg, ambig); - if (r) return r; - - prev = p; - slen = 1; - prev_len = len; - } - - p += len; - } - return add_compile_string(prev, prev_len, slen, reg, ambig); -} - -static int -compile_string_raw_node(StrNode* sn, regex_t* reg) -{ - if (sn->end <= sn->s) - return 0; - - return add_compile_string(sn->s, 1 /* sb */, (int)(sn->end - sn->s), reg, 0); -} - -static int -add_multi_byte_cclass(BBuf* mbuf, regex_t* reg) -{ -#ifdef PLATFORM_UNALIGNED_WORD_ACCESS - add_length(reg, mbuf->used); - return add_bytes(reg, mbuf->p, mbuf->used); -#else - int r, pad_size; - UChar* p = BBUF_GET_ADD_ADDRESS(reg) + SIZE_LENGTH; - - GET_ALIGNMENT_PAD_SIZE(p, pad_size); - add_length(reg, mbuf->used + (WORD_ALIGNMENT_SIZE - 1)); - if (pad_size != 0) add_bytes(reg, PadBuf, pad_size); - - r = add_bytes(reg, mbuf->p, mbuf->used); - - /* padding for return value from compile_length_cclass_node() to be fix. */ - pad_size = (WORD_ALIGNMENT_SIZE - 1) - pad_size; - if (pad_size != 0) add_bytes(reg, PadBuf, pad_size); - return r; -#endif -} - -static int -compile_length_cclass_node(CClassNode* cc, regex_t* reg) -{ - int len; - - if (IS_NCCLASS_SHARE(cc)) { - len = SIZE_OPCODE + SIZE_POINTER; - return len; - } - - if (IS_NULL(cc->mbuf)) { - len = SIZE_OPCODE + SIZE_BITSET; - } - else { - if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) { - len = SIZE_OPCODE; - } - else { - len = SIZE_OPCODE + SIZE_BITSET; - } -#ifdef PLATFORM_UNALIGNED_WORD_ACCESS - len += SIZE_LENGTH + cc->mbuf->used; -#else - len += SIZE_LENGTH + cc->mbuf->used + (WORD_ALIGNMENT_SIZE - 1); -#endif - } - - return len; -} - -static int -compile_cclass_node(CClassNode* cc, regex_t* reg) -{ - int r; - - if (IS_NCCLASS_SHARE(cc)) { - add_opcode(reg, OP_CCLASS_NODE); - r = add_pointer(reg, cc); - return r; - } - - if (IS_NULL(cc->mbuf)) { - if (IS_NCCLASS_NOT(cc)) - add_opcode(reg, OP_CCLASS_NOT); - else - add_opcode(reg, OP_CCLASS); - - r = add_bitset(reg, cc->bs); - } - else { - if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) { - if (IS_NCCLASS_NOT(cc)) - add_opcode(reg, OP_CCLASS_MB_NOT); - else - add_opcode(reg, OP_CCLASS_MB); - - r = add_multi_byte_cclass(cc->mbuf, reg); - } - else { - if (IS_NCCLASS_NOT(cc)) - add_opcode(reg, OP_CCLASS_MIX_NOT); - else - add_opcode(reg, OP_CCLASS_MIX); - - r = add_bitset(reg, cc->bs); - if (r) return r; - r = add_multi_byte_cclass(cc->mbuf, reg); - } - } - - return r; -} - -static int -entry_repeat_range(regex_t* reg, int id, int lower, int upper) -{ -#define REPEAT_RANGE_ALLOC 4 - - OnigRepeatRange* p; - - if (reg->repeat_range_alloc == 0) { - p = (OnigRepeatRange* )xmalloc(sizeof(OnigRepeatRange) * REPEAT_RANGE_ALLOC); - CHECK_NULL_RETURN_MEMERR(p); - reg->repeat_range = p; - reg->repeat_range_alloc = REPEAT_RANGE_ALLOC; - } - else if (reg->repeat_range_alloc <= id) { - int n; - n = reg->repeat_range_alloc + REPEAT_RANGE_ALLOC; - p = (OnigRepeatRange* )xrealloc(reg->repeat_range, - sizeof(OnigRepeatRange) * n, - sizeof(OnigRepeatRange) * reg->repeat_range_alloc); - CHECK_NULL_RETURN_MEMERR(p); - reg->repeat_range = p; - reg->repeat_range_alloc = n; - } - else { - p = reg->repeat_range; - } - - p[id].lower = lower; - p[id].upper = (IS_REPEAT_INFINITE(upper) ? 0x7fffffff : upper); - return 0; -} - -static int -compile_range_repeat_node(QtfrNode* qn, int target_len, int empty_info, - regex_t* reg) -{ - int r; - int num_repeat = reg->num_repeat; - - r = add_opcode(reg, qn->greedy ? OP_REPEAT : OP_REPEAT_NG); - if (r) return r; - r = add_mem_num(reg, num_repeat); /* OP_REPEAT ID */ - reg->num_repeat++; - if (r) return r; - r = add_rel_addr(reg, target_len + SIZE_OP_REPEAT_INC); - if (r) return r; - - r = entry_repeat_range(reg, num_repeat, qn->lower, qn->upper); - if (r) return r; - - r = compile_tree_empty_check(qn->target, reg, empty_info); - if (r) return r; - - if ( -#ifdef USE_SUBEXP_CALL - reg->num_call > 0 || -#endif - IS_QUANTIFIER_IN_REPEAT(qn)) { - r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC_SG : OP_REPEAT_INC_NG_SG); - } - else { - r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC : OP_REPEAT_INC_NG); - } - if (r) return r; - r = add_mem_num(reg, num_repeat); /* OP_REPEAT ID */ - return r; -} - -static int -is_anychar_star_quantifier(QtfrNode* qn) -{ - if (qn->greedy && IS_REPEAT_INFINITE(qn->upper) && - NTYPE(qn->target) == NT_CANY) - return 1; - else - return 0; -} - -#define QUANTIFIER_EXPAND_LIMIT_SIZE 50 -#define CKN_ON (ckn > 0) - -#ifdef USE_COMBINATION_EXPLOSION_CHECK - -static int -compile_length_quantifier_node(QtfrNode* qn, regex_t* reg) -{ - int len, mod_tlen, cklen; - int ckn; - int infinite = IS_REPEAT_INFINITE(qn->upper); - int empty_info = qn->target_empty_info; - int tlen = compile_length_tree(qn->target, reg); - - if (tlen < 0) return tlen; - - ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0); - - cklen = (CKN_ON ? SIZE_STATE_CHECK_NUM: 0); - - /* anychar repeat */ - if (NTYPE(qn->target) == NT_CANY) { - if (qn->greedy && infinite) { - if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON) - return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower + cklen; - else - return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower + cklen; - } - } - - if (empty_info != 0) - mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END); - else - mod_tlen = tlen; - - if (infinite && qn->lower <= 1) { - if (qn->greedy) { - if (qn->lower == 1) - len = SIZE_OP_JUMP; - else - len = 0; - - len += SIZE_OP_PUSH + cklen + mod_tlen + SIZE_OP_JUMP; - } - else { - if (qn->lower == 0) - len = SIZE_OP_JUMP; - else - len = 0; - - len += mod_tlen + SIZE_OP_PUSH + cklen; - } - } - else if (qn->upper == 0) { - if (qn->is_refered != 0) /* /(?..){0}/ */ - len = SIZE_OP_JUMP + tlen; - else - len = 0; - } - else if (qn->upper == 1 && qn->greedy) { - if (qn->lower == 0) { - if (CKN_ON) { - len = SIZE_OP_STATE_CHECK_PUSH + tlen; - } - else { - len = SIZE_OP_PUSH + tlen; - } - } - else { - len = tlen; - } - } - else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */ - len = SIZE_OP_PUSH + cklen + SIZE_OP_JUMP + tlen; - } - else { - len = SIZE_OP_REPEAT_INC - + mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM; - if (CKN_ON) - len += SIZE_OP_STATE_CHECK; - } - - return len; -} - -static int -compile_quantifier_node(QtfrNode* qn, regex_t* reg) -{ - int r, mod_tlen; - int ckn; - int infinite = IS_REPEAT_INFINITE(qn->upper); - int empty_info = qn->target_empty_info; - int tlen = compile_length_tree(qn->target, reg); - - if (tlen < 0) return tlen; - - ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0); - - if (is_anychar_star_quantifier(qn)) { - r = compile_tree_n_times(qn->target, qn->lower, reg); - if (r) return r; - if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON) { - if (IS_MULTILINE(reg->options)) - r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT); - else - r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT); - if (r) return r; - if (CKN_ON) { - r = add_state_check_num(reg, ckn); - if (r) return r; - } - - return add_bytes(reg, NSTR(qn->next_head_exact)->s, 1); - } - else { - if (IS_MULTILINE(reg->options)) { - r = add_opcode(reg, (CKN_ON ? - OP_STATE_CHECK_ANYCHAR_ML_STAR - : OP_ANYCHAR_ML_STAR)); - } - else { - r = add_opcode(reg, (CKN_ON ? - OP_STATE_CHECK_ANYCHAR_STAR - : OP_ANYCHAR_STAR)); - } - if (r) return r; - if (CKN_ON) - r = add_state_check_num(reg, ckn); - - return r; - } - } - - if (empty_info != 0) - mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END); - else - mod_tlen = tlen; - - if (infinite && qn->lower <= 1) { - if (qn->greedy) { - if (qn->lower == 1) { - r = add_opcode_rel_addr(reg, OP_JUMP, - (CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH)); - if (r) return r; - } - - if (CKN_ON) { - r = add_opcode(reg, OP_STATE_CHECK_PUSH); - if (r) return r; - r = add_state_check_num(reg, ckn); - if (r) return r; - r = add_rel_addr(reg, mod_tlen + SIZE_OP_JUMP); - } - else { - r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP); - } - if (r) return r; - r = compile_tree_empty_check(qn->target, reg, empty_info); - if (r) return r; - r = add_opcode_rel_addr(reg, OP_JUMP, - -(mod_tlen + (int )SIZE_OP_JUMP - + (int )(CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH))); - } - else { - if (qn->lower == 0) { - r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen); - if (r) return r; - } - r = compile_tree_empty_check(qn->target, reg, empty_info); - if (r) return r; - if (CKN_ON) { - r = add_opcode(reg, OP_STATE_CHECK_PUSH_OR_JUMP); - if (r) return r; - r = add_state_check_num(reg, ckn); - if (r) return r; - r = add_rel_addr(reg, - -(mod_tlen + (int )SIZE_OP_STATE_CHECK_PUSH_OR_JUMP)); - } - else - r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (int )SIZE_OP_PUSH)); - } - } - else if (qn->upper == 0) { - if (qn->is_refered != 0) { /* /(?..){0}/ */ - r = add_opcode_rel_addr(reg, OP_JUMP, tlen); - if (r) return r; - r = compile_tree(qn->target, reg); - } - else - r = 0; - } - else if (qn->upper == 1 && qn->greedy) { - if (qn->lower == 0) { - if (CKN_ON) { - r = add_opcode(reg, OP_STATE_CHECK_PUSH); - if (r) return r; - r = add_state_check_num(reg, ckn); - if (r) return r; - r = add_rel_addr(reg, tlen); - } - else { - r = add_opcode_rel_addr(reg, OP_PUSH, tlen); - } - if (r) return r; - } - - r = compile_tree(qn->target, reg); - } - else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */ - if (CKN_ON) { - r = add_opcode(reg, OP_STATE_CHECK_PUSH); - if (r) return r; - r = add_state_check_num(reg, ckn); - if (r) return r; - r = add_rel_addr(reg, SIZE_OP_JUMP); - } - else { - r = add_opcode_rel_addr(reg, OP_PUSH, SIZE_OP_JUMP); - } - - if (r) return r; - r = add_opcode_rel_addr(reg, OP_JUMP, tlen); - if (r) return r; - r = compile_tree(qn->target, reg); - } - else { - r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg); - if (CKN_ON) { - if (r) return r; - r = add_opcode(reg, OP_STATE_CHECK); - if (r) return r; - r = add_state_check_num(reg, ckn); - } - } - return r; -} - -#else /* USE_COMBINATION_EXPLOSION_CHECK */ - -static int -compile_length_quantifier_node(QtfrNode* qn, regex_t* reg) -{ - int len, mod_tlen; - int infinite = IS_REPEAT_INFINITE(qn->upper); - int empty_info = qn->target_empty_info; - int tlen = compile_length_tree(qn->target, reg); - - if (tlen < 0) return tlen; - - /* anychar repeat */ - if (NTYPE(qn->target) == NT_CANY) { - if (qn->greedy && infinite) { - if (IS_NOT_NULL(qn->next_head_exact)) - return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower; - else - return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower; - } - } - - if (empty_info != 0) - mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END); - else - mod_tlen = tlen; - - if (infinite && - (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) { - if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) { - len = SIZE_OP_JUMP; - } - else { - len = tlen * qn->lower; - } - - if (qn->greedy) { - if (IS_NOT_NULL(qn->head_exact)) - len += SIZE_OP_PUSH_OR_JUMP_EXACT1 + mod_tlen + SIZE_OP_JUMP; - else if (IS_NOT_NULL(qn->next_head_exact)) - len += SIZE_OP_PUSH_IF_PEEK_NEXT + mod_tlen + SIZE_OP_JUMP; - else - len += SIZE_OP_PUSH + mod_tlen + SIZE_OP_JUMP; - } - else - len += SIZE_OP_JUMP + mod_tlen + SIZE_OP_PUSH; - } - else if (qn->upper == 0 && qn->is_refered != 0) { /* /(?..){0}/ */ - len = SIZE_OP_JUMP + tlen; - } - else if (!infinite && qn->greedy && - (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper - <= QUANTIFIER_EXPAND_LIMIT_SIZE)) { - len = tlen * qn->lower; - len += (SIZE_OP_PUSH + tlen) * (qn->upper - qn->lower); - } - else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */ - len = SIZE_OP_PUSH + SIZE_OP_JUMP + tlen; - } - else { - len = SIZE_OP_REPEAT_INC - + mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM; - } - - return len; -} - -static int -compile_quantifier_node(QtfrNode* qn, regex_t* reg) -{ - int i, r, mod_tlen; - int infinite = IS_REPEAT_INFINITE(qn->upper); - int empty_info = qn->target_empty_info; - int tlen = compile_length_tree(qn->target, reg); - - if (tlen < 0) return tlen; - - if (is_anychar_star_quantifier(qn)) { - r = compile_tree_n_times(qn->target, qn->lower, reg); - if (r) return r; - if (IS_NOT_NULL(qn->next_head_exact)) { - if (IS_MULTILINE(reg->options)) - r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT); - else - r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT); - if (r) return r; - return add_bytes(reg, NSTR(qn->next_head_exact)->s, 1); - } - else { - if (IS_MULTILINE(reg->options)) - return add_opcode(reg, OP_ANYCHAR_ML_STAR); - else - return add_opcode(reg, OP_ANYCHAR_STAR); - } - } - - if (empty_info != 0) - mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END); - else - mod_tlen = tlen; - - if (infinite && - (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) { - if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) { - if (qn->greedy) { - if (IS_NOT_NULL(qn->head_exact)) - r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_OR_JUMP_EXACT1); - else if (IS_NOT_NULL(qn->next_head_exact)) - r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_IF_PEEK_NEXT); - else - r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH); - } - else { - r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_JUMP); - } - if (r) return r; - } - else { - r = compile_tree_n_times(qn->target, qn->lower, reg); - if (r) return r; - } - - if (qn->greedy) { - if (IS_NOT_NULL(qn->head_exact)) { - r = add_opcode_rel_addr(reg, OP_PUSH_OR_JUMP_EXACT1, - mod_tlen + SIZE_OP_JUMP); - if (r) return r; - add_bytes(reg, NSTR(qn->head_exact)->s, 1); - r = compile_tree_empty_check(qn->target, reg, empty_info); - if (r) return r; - r = add_opcode_rel_addr(reg, OP_JUMP, - -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_OR_JUMP_EXACT1)); - } - else if (IS_NOT_NULL(qn->next_head_exact)) { - r = add_opcode_rel_addr(reg, OP_PUSH_IF_PEEK_NEXT, - mod_tlen + SIZE_OP_JUMP); - if (r) return r; - add_bytes(reg, NSTR(qn->next_head_exact)->s, 1); - r = compile_tree_empty_check(qn->target, reg, empty_info); - if (r) return r; - r = add_opcode_rel_addr(reg, OP_JUMP, - -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_IF_PEEK_NEXT)); - } - else { - r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP); - if (r) return r; - r = compile_tree_empty_check(qn->target, reg, empty_info); - if (r) return r; - r = add_opcode_rel_addr(reg, OP_JUMP, - -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH)); - } - } - else { - r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen); - if (r) return r; - r = compile_tree_empty_check(qn->target, reg, empty_info); - if (r) return r; - r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (int )SIZE_OP_PUSH)); - } - } - else if (qn->upper == 0 && qn->is_refered != 0) { /* /(?..){0}/ */ - r = add_opcode_rel_addr(reg, OP_JUMP, tlen); - if (r) return r; - r = compile_tree(qn->target, reg); - } - else if (!infinite && qn->greedy && - (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper - <= QUANTIFIER_EXPAND_LIMIT_SIZE)) { - int n = qn->upper - qn->lower; - - r = compile_tree_n_times(qn->target, qn->lower, reg); - if (r) return r; - - for (i = 0; i < n; i++) { - r = add_opcode_rel_addr(reg, OP_PUSH, - (n - i) * tlen + (n - i - 1) * SIZE_OP_PUSH); - if (r) return r; - r = compile_tree(qn->target, reg); - if (r) return r; - } - } - else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */ - r = add_opcode_rel_addr(reg, OP_PUSH, SIZE_OP_JUMP); - if (r) return r; - r = add_opcode_rel_addr(reg, OP_JUMP, tlen); - if (r) return r; - r = compile_tree(qn->target, reg); - } - else { - r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg); - } - return r; -} -#endif /* USE_COMBINATION_EXPLOSION_CHECK */ - -static int -compile_length_option_node(EncloseNode* node, regex_t* reg) -{ - int tlen; - OnigOptionType prev = reg->options; - - reg->options = node->option; - tlen = compile_length_tree(node->target, reg); - reg->options = prev; - - if (tlen < 0) return tlen; - - if (IS_DYNAMIC_OPTION(prev ^ node->option)) { - return SIZE_OP_SET_OPTION_PUSH + SIZE_OP_SET_OPTION + SIZE_OP_FAIL - + tlen + SIZE_OP_SET_OPTION; - } - else - return tlen; -} - -static int -compile_option_node(EncloseNode* node, regex_t* reg) -{ - int r; - OnigOptionType prev = reg->options; - - if (IS_DYNAMIC_OPTION(prev ^ node->option)) { - r = add_opcode_option(reg, OP_SET_OPTION_PUSH, node->option); - if (r) return r; - r = add_opcode_option(reg, OP_SET_OPTION, prev); - if (r) return r; - r = add_opcode(reg, OP_FAIL); - if (r) return r; - } - - reg->options = node->option; - r = compile_tree(node->target, reg); - reg->options = prev; - - if (IS_DYNAMIC_OPTION(prev ^ node->option)) { - if (r) return r; - r = add_opcode_option(reg, OP_SET_OPTION, prev); - } - return r; -} - -static int -compile_length_enclose_node(EncloseNode* node, regex_t* reg) -{ - int len; - int tlen; - - if (node->type == ENCLOSE_OPTION) - return compile_length_option_node(node, reg); - - if (node->target) { - tlen = compile_length_tree(node->target, reg); - if (tlen < 0) return tlen; - } - else - tlen = 0; - - switch (node->type) { - case ENCLOSE_MEMORY: -#ifdef USE_SUBEXP_CALL - if (IS_ENCLOSE_CALLED(node)) { - len = SIZE_OP_MEMORY_START_PUSH + tlen - + SIZE_OP_CALL + SIZE_OP_JUMP + SIZE_OP_RETURN; - if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)) - len += (IS_ENCLOSE_RECURSION(node) - ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH); - else - len += (IS_ENCLOSE_RECURSION(node) - ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END); - } - else -#endif - { - if (BIT_STATUS_AT(reg->bt_mem_start, node->regnum)) - len = SIZE_OP_MEMORY_START_PUSH; - else - len = SIZE_OP_MEMORY_START; - - len += tlen + (BIT_STATUS_AT(reg->bt_mem_end, node->regnum) - ? SIZE_OP_MEMORY_END_PUSH : SIZE_OP_MEMORY_END); - } - break; - - case ENCLOSE_STOP_BACKTRACK: - if (IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(node)) { - QtfrNode* qn = NQTFR(node->target); - tlen = compile_length_tree(qn->target, reg); - if (tlen < 0) return tlen; - - len = tlen * qn->lower - + SIZE_OP_PUSH + tlen + SIZE_OP_POP + SIZE_OP_JUMP; - } - else { - len = SIZE_OP_PUSH_STOP_BT + tlen + SIZE_OP_POP_STOP_BT; - } - break; - - default: - return ONIGERR_TYPE_BUG; - break; - } - - return len; -} - -static int get_char_length_tree(Node* node, regex_t* reg, int* len); - -static int -compile_enclose_node(EncloseNode* node, regex_t* reg) -{ - int r, len; - - if (node->type == ENCLOSE_OPTION) - return compile_option_node(node, reg); - - switch (node->type) { - case ENCLOSE_MEMORY: -#ifdef USE_SUBEXP_CALL - if (IS_ENCLOSE_CALLED(node)) { - r = add_opcode(reg, OP_CALL); - if (r) return r; - node->call_addr = BBUF_GET_OFFSET_POS(reg) + SIZE_ABSADDR + SIZE_OP_JUMP; - node->state |= NST_ADDR_FIXED; - r = add_abs_addr(reg, (int )node->call_addr); - if (r) return r; - len = compile_length_tree(node->target, reg); - len += (SIZE_OP_MEMORY_START_PUSH + SIZE_OP_RETURN); - if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)) - len += (IS_ENCLOSE_RECURSION(node) - ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH); - else - len += (IS_ENCLOSE_RECURSION(node) - ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END); - - r = add_opcode_rel_addr(reg, OP_JUMP, len); - if (r) return r; - } -#endif - if (BIT_STATUS_AT(reg->bt_mem_start, node->regnum)) - r = add_opcode(reg, OP_MEMORY_START_PUSH); - else - r = add_opcode(reg, OP_MEMORY_START); - if (r) return r; - r = add_mem_num(reg, node->regnum); - if (r) return r; - r = compile_tree(node->target, reg); - if (r) return r; -#ifdef USE_SUBEXP_CALL - if (IS_ENCLOSE_CALLED(node)) { - if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)) - r = add_opcode(reg, (IS_ENCLOSE_RECURSION(node) - ? OP_MEMORY_END_PUSH_REC : OP_MEMORY_END_PUSH)); - else - r = add_opcode(reg, (IS_ENCLOSE_RECURSION(node) - ? OP_MEMORY_END_REC : OP_MEMORY_END)); - - if (r) return r; - r = add_mem_num(reg, node->regnum); - if (r) return r; - r = add_opcode(reg, OP_RETURN); - } - else -#endif - { - if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)) - r = add_opcode(reg, OP_MEMORY_END_PUSH); - else - r = add_opcode(reg, OP_MEMORY_END); - if (r) return r; - r = add_mem_num(reg, node->regnum); - } - break; - - case ENCLOSE_STOP_BACKTRACK: - if (IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(node)) { - QtfrNode* qn = NQTFR(node->target); - r = compile_tree_n_times(qn->target, qn->lower, reg); - if (r) return r; - - len = compile_length_tree(qn->target, reg); - if (len < 0) return len; - - r = add_opcode_rel_addr(reg, OP_PUSH, len + SIZE_OP_POP + SIZE_OP_JUMP); - if (r) return r; - r = compile_tree(qn->target, reg); - if (r) return r; - r = add_opcode(reg, OP_POP); - if (r) return r; - r = add_opcode_rel_addr(reg, OP_JUMP, - -((int )SIZE_OP_PUSH + len + (int )SIZE_OP_POP + (int )SIZE_OP_JUMP)); - } - else { - r = add_opcode(reg, OP_PUSH_STOP_BT); - if (r) return r; - r = compile_tree(node->target, reg); - if (r) return r; - r = add_opcode(reg, OP_POP_STOP_BT); - } - break; - - default: - return ONIGERR_TYPE_BUG; - break; - } - - return r; -} - -static int -compile_length_anchor_node(AnchorNode* node, regex_t* reg) -{ - int len; - int tlen = 0; - - if (node->target) { - tlen = compile_length_tree(node->target, reg); - if (tlen < 0) return tlen; - } - - switch (node->type) { - case ANCHOR_PREC_READ: - len = SIZE_OP_PUSH_POS + tlen + SIZE_OP_POP_POS; - break; - case ANCHOR_PREC_READ_NOT: - len = SIZE_OP_PUSH_POS_NOT + tlen + SIZE_OP_FAIL_POS; - break; - case ANCHOR_LOOK_BEHIND: - len = SIZE_OP_LOOK_BEHIND + tlen; - break; - case ANCHOR_LOOK_BEHIND_NOT: - len = SIZE_OP_PUSH_LOOK_BEHIND_NOT + tlen + SIZE_OP_FAIL_LOOK_BEHIND_NOT; - break; - - default: - len = SIZE_OPCODE; - break; - } - - return len; -} - -static int -compile_anchor_node(AnchorNode* node, regex_t* reg) -{ - int r, len; - - switch (node->type) { - case ANCHOR_BEGIN_BUF: r = add_opcode(reg, OP_BEGIN_BUF); break; - case ANCHOR_END_BUF: r = add_opcode(reg, OP_END_BUF); break; - case ANCHOR_BEGIN_LINE: r = add_opcode(reg, OP_BEGIN_LINE); break; - case ANCHOR_END_LINE: r = add_opcode(reg, OP_END_LINE); break; - case ANCHOR_SEMI_END_BUF: r = add_opcode(reg, OP_SEMI_END_BUF); break; - case ANCHOR_BEGIN_POSITION: r = add_opcode(reg, OP_BEGIN_POSITION); break; - - case ANCHOR_WORD_BOUND: r = add_opcode(reg, OP_WORD_BOUND); break; - case ANCHOR_NOT_WORD_BOUND: r = add_opcode(reg, OP_NOT_WORD_BOUND); break; -#ifdef USE_WORD_BEGIN_END - case ANCHOR_WORD_BEGIN: r = add_opcode(reg, OP_WORD_BEGIN); break; - case ANCHOR_WORD_END: r = add_opcode(reg, OP_WORD_END); break; -#endif - - case ANCHOR_PREC_READ: - r = add_opcode(reg, OP_PUSH_POS); - if (r) return r; - r = compile_tree(node->target, reg); - if (r) return r; - r = add_opcode(reg, OP_POP_POS); - break; - - case ANCHOR_PREC_READ_NOT: - len = compile_length_tree(node->target, reg); - if (len < 0) return len; - r = add_opcode_rel_addr(reg, OP_PUSH_POS_NOT, len + SIZE_OP_FAIL_POS); - if (r) return r; - r = compile_tree(node->target, reg); - if (r) return r; - r = add_opcode(reg, OP_FAIL_POS); - break; - - case ANCHOR_LOOK_BEHIND: - { - int n; - r = add_opcode(reg, OP_LOOK_BEHIND); - if (r) return r; - if (node->char_len < 0) { - r = get_char_length_tree(node->target, reg, &n); - if (r) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN; - } - else - n = node->char_len; - r = add_length(reg, n); - if (r) return r; - r = compile_tree(node->target, reg); - } - break; - - case ANCHOR_LOOK_BEHIND_NOT: - { - int n; - len = compile_length_tree(node->target, reg); - r = add_opcode_rel_addr(reg, OP_PUSH_LOOK_BEHIND_NOT, - len + SIZE_OP_FAIL_LOOK_BEHIND_NOT); - if (r) return r; - if (node->char_len < 0) { - r = get_char_length_tree(node->target, reg, &n); - if (r) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN; - } - else - n = node->char_len; - r = add_length(reg, n); - if (r) return r; - r = compile_tree(node->target, reg); - if (r) return r; - r = add_opcode(reg, OP_FAIL_LOOK_BEHIND_NOT); - } - break; - - default: - return ONIGERR_TYPE_BUG; - break; - } - - return r; -} - -static int -compile_length_tree(Node* node, regex_t* reg) -{ - int len, type, r; - - type = NTYPE(node); - switch (type) { - case NT_LIST: - len = 0; - do { - r = compile_length_tree(NCAR(node), reg); - if (r < 0) return r; - len += r; - } while (IS_NOT_NULL(node = NCDR(node))); - r = len; - break; - - case NT_ALT: - { - int n; - - n = r = 0; - do { - r += compile_length_tree(NCAR(node), reg); - n++; - } while (IS_NOT_NULL(node = NCDR(node))); - r += (SIZE_OP_PUSH + SIZE_OP_JUMP) * (n - 1); - } - break; - - case NT_STR: - if (NSTRING_IS_RAW(node)) - r = compile_length_string_raw_node(NSTR(node), reg); - else - r = compile_length_string_node(node, reg); - break; - - case NT_CCLASS: - r = compile_length_cclass_node(NCCLASS(node), reg); - break; - - case NT_CTYPE: - case NT_CANY: - r = SIZE_OPCODE; - break; - - case NT_BREF: - { - BRefNode* br = NBREF(node); - -#ifdef USE_BACKREF_WITH_LEVEL - if (IS_BACKREF_NEST_LEVEL(br)) { - r = SIZE_OPCODE + SIZE_OPTION + SIZE_LENGTH + - SIZE_LENGTH + (SIZE_MEMNUM * br->back_num); - } - else -#endif - if (br->back_num == 1) { - r = ((!IS_IGNORECASE(reg->options) && br->back_static[0] <= 2) - ? SIZE_OPCODE : (SIZE_OPCODE + SIZE_MEMNUM)); - } - else { - r = SIZE_OPCODE + SIZE_LENGTH + (SIZE_MEMNUM * br->back_num); - } - } - break; - -#ifdef USE_SUBEXP_CALL - case NT_CALL: - r = SIZE_OP_CALL; - break; -#endif - - case NT_QTFR: - r = compile_length_quantifier_node(NQTFR(node), reg); - break; - - case NT_ENCLOSE: - r = compile_length_enclose_node(NENCLOSE(node), reg); - break; - - case NT_ANCHOR: - r = compile_length_anchor_node(NANCHOR(node), reg); - break; - - default: - return ONIGERR_TYPE_BUG; - break; - } - - return r; -} - -static int -compile_tree(Node* node, regex_t* reg) -{ - int n, type, len, pos, r = 0; - - type = NTYPE(node); - switch (type) { - case NT_LIST: - do { - r = compile_tree(NCAR(node), reg); - } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); - break; - - case NT_ALT: - { - Node* x = node; - len = 0; - do { - len += compile_length_tree(NCAR(x), reg); - if (NCDR(x) != NULL) { - len += SIZE_OP_PUSH + SIZE_OP_JUMP; - } - } while (IS_NOT_NULL(x = NCDR(x))); - pos = reg->used + len; /* goal position */ - - do { - len = compile_length_tree(NCAR(node), reg); - if (IS_NOT_NULL(NCDR(node))) { - r = add_opcode_rel_addr(reg, OP_PUSH, len + SIZE_OP_JUMP); - if (r) break; - } - r = compile_tree(NCAR(node), reg); - if (r) break; - if (IS_NOT_NULL(NCDR(node))) { - len = pos - (reg->used + SIZE_OP_JUMP); - r = add_opcode_rel_addr(reg, OP_JUMP, len); - if (r) break; - } - } while (IS_NOT_NULL(node = NCDR(node))); - } - break; - - case NT_STR: - if (NSTRING_IS_RAW(node)) - r = compile_string_raw_node(NSTR(node), reg); - else - r = compile_string_node(node, reg); - break; - - case NT_CCLASS: - r = compile_cclass_node(NCCLASS(node), reg); - break; - - case NT_CTYPE: - { - int op; - - switch (NCTYPE(node)->ctype) { - case ONIGENC_CTYPE_WORD: - if (NCTYPE(node)->not != 0) op = OP_NOT_WORD; - else op = OP_WORD; - break; - default: - return ONIGERR_TYPE_BUG; - break; - } - r = add_opcode(reg, op); - } - break; - - case NT_CANY: - if (IS_MULTILINE(reg->options)) - r = add_opcode(reg, OP_ANYCHAR_ML); - else - r = add_opcode(reg, OP_ANYCHAR); - break; - - case NT_BREF: - { - BRefNode* br = NBREF(node); - -#ifdef USE_BACKREF_WITH_LEVEL - if (IS_BACKREF_NEST_LEVEL(br)) { - r = add_opcode(reg, OP_BACKREF_WITH_LEVEL); - if (r) return r; - r = add_option(reg, (reg->options & ONIG_OPTION_IGNORECASE)); - if (r) return r; - r = add_length(reg, br->nest_level); - if (r) return r; - - goto add_bacref_mems; - } - else -#endif - if (br->back_num == 1) { - n = br->back_static[0]; - if (IS_IGNORECASE(reg->options)) { - r = add_opcode(reg, OP_BACKREFN_IC); - if (r) return r; - r = add_mem_num(reg, n); - } - else { - switch (n) { - case 1: r = add_opcode(reg, OP_BACKREF1); break; - case 2: r = add_opcode(reg, OP_BACKREF2); break; - default: - r = add_opcode(reg, OP_BACKREFN); - if (r) return r; - r = add_mem_num(reg, n); - break; - } - } - } - else { - int i; - int* p; - - if (IS_IGNORECASE(reg->options)) { - r = add_opcode(reg, OP_BACKREF_MULTI_IC); - } - else { - r = add_opcode(reg, OP_BACKREF_MULTI); - } - if (r) return r; - -#ifdef USE_BACKREF_WITH_LEVEL - add_bacref_mems: -#endif - r = add_length(reg, br->back_num); - if (r) return r; - p = BACKREFS_P(br); - for (i = br->back_num - 1; i >= 0; i--) { - r = add_mem_num(reg, p[i]); - if (r) return r; - } - } - } - break; - -#ifdef USE_SUBEXP_CALL - case NT_CALL: - r = compile_call(NCALL(node), reg); - break; -#endif - - case NT_QTFR: - r = compile_quantifier_node(NQTFR(node), reg); - break; - - case NT_ENCLOSE: - r = compile_enclose_node(NENCLOSE(node), reg); - break; - - case NT_ANCHOR: - r = compile_anchor_node(NANCHOR(node), reg); - break; - - default: -#ifdef ONIG_DEBUG - fprintf(stderr, "compile_tree: undefined node type %d\n", NTYPE(node)); -#endif - break; - } - - return r; -} - -#ifdef USE_NAMED_GROUP - -static int -noname_disable_map(Node** plink, GroupNumRemap* map, int* counter) -{ - int r = 0; - Node* node = *plink; - - switch (NTYPE(node)) { - case NT_LIST: - case NT_ALT: - do { - r = noname_disable_map(&(NCAR(node)), map, counter); - } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); - break; - - case NT_QTFR: - { - Node** ptarget = &(NQTFR(node)->target); - Node* old = *ptarget; - r = noname_disable_map(ptarget, map, counter); - if (*ptarget != old && NTYPE(*ptarget) == NT_QTFR) { - onig_reduce_nested_quantifier(node, *ptarget); - } - } - break; - - case NT_ENCLOSE: - { - EncloseNode* en = NENCLOSE(node); - if (en->type == ENCLOSE_MEMORY) { - if (IS_ENCLOSE_NAMED_GROUP(en)) { - (*counter)++; - map[en->regnum].new_val = *counter; - en->regnum = *counter; - r = noname_disable_map(&(en->target), map, counter); - } - else { - *plink = en->target; - en->target = NULL_NODE; - onig_node_free(node); - r = noname_disable_map(plink, map, counter); - } - } - else - r = noname_disable_map(&(en->target), map, counter); - } - break; - - default: - break; - } - - return r; -} - -static int -renumber_node_backref(Node* node, GroupNumRemap* map) -{ - int i, pos, n, old_num; - int *backs; - BRefNode* bn = NBREF(node); - - if (! IS_BACKREF_NAME_REF(bn)) - return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED; - - old_num = bn->back_num; - if (IS_NULL(bn->back_dynamic)) - backs = bn->back_static; - else - backs = bn->back_dynamic; - - for (i = 0, pos = 0; i < old_num; i++) { - n = map[backs[i]].new_val; - if (n > 0) { - backs[pos] = n; - pos++; - } - } - - bn->back_num = pos; - return 0; -} - -static int -renumber_by_map(Node* node, GroupNumRemap* map) -{ - int r = 0; - - switch (NTYPE(node)) { - case NT_LIST: - case NT_ALT: - do { - r = renumber_by_map(NCAR(node), map); - } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); - break; - case NT_QTFR: - r = renumber_by_map(NQTFR(node)->target, map); - break; - case NT_ENCLOSE: - r = renumber_by_map(NENCLOSE(node)->target, map); - break; - - case NT_BREF: - r = renumber_node_backref(node, map); - break; - - default: - break; - } - - return r; -} - -static int -numbered_ref_check(Node* node) -{ - int r = 0; - - switch (NTYPE(node)) { - case NT_LIST: - case NT_ALT: - do { - r = numbered_ref_check(NCAR(node)); - } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); - break; - case NT_QTFR: - r = numbered_ref_check(NQTFR(node)->target); - break; - case NT_ENCLOSE: - r = numbered_ref_check(NENCLOSE(node)->target); - break; - - case NT_BREF: - if (! IS_BACKREF_NAME_REF(NBREF(node))) - return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED; - break; - - default: - break; - } - - return r; -} - -static int -disable_noname_group_capture(Node** root, regex_t* reg, ScanEnv* env) -{ - int r, i, pos, counter; - int Result; - BitStatusType loc; - GroupNumRemap* map; - - map = (GroupNumRemap* )xmalloc(sizeof(GroupNumRemap) * (env->num_mem + 1)); - CHECK_NULL_RETURN_MEMERR(map); - for (i = 1; i <= env->num_mem; i++) { - map[i].new_val = 0; - } - counter = 0; - r = noname_disable_map(root, map, &counter); - if (r != 0) return r; - - r = renumber_by_map(*root, map); - if (r != 0) return r; - - for (i = 1, pos = 1; i <= env->num_mem; i++) { - if (map[i].new_val > 0) { - SCANENV_MEM_NODES(env)[pos] = SCANENV_MEM_NODES(env)[i]; - pos++; - } - } - - loc = env->capture_history; - BIT_STATUS_CLEAR(env->capture_history); - for (i = 1; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) { - if (BIT_STATUS_AT(loc, i)) { - BIT_STATUS_ON_AT_SIMPLE(env->capture_history, map[i].new_val); - } - } - - env->num_mem = env->num_named; - reg->num_mem = env->num_named; - - Result = onig_renumber_name_table(reg, map); - xfree(map); - return Result; -} -#endif /* USE_NAMED_GROUP */ - -#ifdef USE_SUBEXP_CALL -static int -unset_addr_list_fix(UnsetAddrList* uslist, regex_t* reg) -{ - int i, offset; - EncloseNode* en; - AbsAddrType addr; - - for (i = 0; i < uslist->num; i++) { - en = NENCLOSE(uslist->us[i].target); - if (! IS_ENCLOSE_ADDR_FIXED(en)) return ONIGERR_PARSER_BUG; - addr = en->call_addr; - offset = uslist->us[i].offset; - - BBUF_WRITE(reg, offset, &addr, SIZE_ABSADDR); - } - return 0; -} -#endif - -#ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT -static int -quantifiers_memory_node_info(Node* node) -{ - int r = 0; - - switch (NTYPE(node)) { - case NT_LIST: - case NT_ALT: - { - int v; - do { - v = quantifiers_memory_node_info(NCAR(node)); - if (v > r) r = v; - } while (v >= 0 && IS_NOT_NULL(node = NCDR(node))); - } - break; - -#ifdef USE_SUBEXP_CALL - case NT_CALL: - if (IS_CALL_RECURSION(NCALL(node))) { - return NQ_TARGET_IS_EMPTY_REC; /* tiny version */ - } - else - r = quantifiers_memory_node_info(NCALL(node)->target); - break; -#endif - - case NT_QTFR: - { - QtfrNode* qn = NQTFR(node); - if (qn->upper != 0) { - r = quantifiers_memory_node_info(qn->target); - } - } - break; - - case NT_ENCLOSE: - { - EncloseNode* en = NENCLOSE(node); - switch (en->type) { - case ENCLOSE_MEMORY: - return NQ_TARGET_IS_EMPTY_MEM; - break; - - case ENCLOSE_OPTION: - case ENCLOSE_STOP_BACKTRACK: - r = quantifiers_memory_node_info(en->target); - break; - default: - break; - } - } - break; - - case NT_BREF: - case NT_STR: - case NT_CTYPE: - case NT_CCLASS: - case NT_CANY: - case NT_ANCHOR: - default: - break; - } - - return r; -} -#endif /* USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT */ - -static int -get_min_match_length(Node* node, OnigDistance *min, ScanEnv* env) -{ - OnigDistance tmin; - int r = 0; - - *min = 0; - switch (NTYPE(node)) { - case NT_BREF: - { - int i; - int* backs; - Node** nodes = SCANENV_MEM_NODES(env); - BRefNode* br = NBREF(node); - if (br->state & NST_RECURSION) break; - - backs = BACKREFS_P(br); - if (backs[0] > env->num_mem) return ONIGERR_INVALID_BACKREF; - r = get_min_match_length(nodes[backs[0]], min, env); - if (r != 0) break; - for (i = 1; i < br->back_num; i++) { - if (backs[i] > env->num_mem) return ONIGERR_INVALID_BACKREF; - r = get_min_match_length(nodes[backs[i]], &tmin, env); - if (r != 0) break; - if (*min > tmin) *min = tmin; - } - } - break; - -#ifdef USE_SUBEXP_CALL - case NT_CALL: - if (IS_CALL_RECURSION(NCALL(node))) { - EncloseNode* en = NENCLOSE(NCALL(node)->target); - if (IS_ENCLOSE_MIN_FIXED(en)) - *min = en->min_len; - } - else - r = get_min_match_length(NCALL(node)->target, min, env); - break; -#endif - - case NT_LIST: - do { - r = get_min_match_length(NCAR(node), &tmin, env); - if (r == 0) *min += tmin; - } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); - break; - - case NT_ALT: - { - Node *x, *y; - y = node; - do { - x = NCAR(y); - r = get_min_match_length(x, &tmin, env); - if (r != 0) break; - if (y == node) *min = tmin; - else if (*min > tmin) *min = tmin; - } while (r == 0 && IS_NOT_NULL(y = NCDR(y))); - } - break; - - case NT_STR: - { - StrNode* sn = NSTR(node); - *min = (OnigDistance)(sn->end - sn->s); - } - break; - - case NT_CTYPE: - *min = 1; - break; - - case NT_CCLASS: - case NT_CANY: - *min = 1; - break; - - case NT_QTFR: - { - QtfrNode* qn = NQTFR(node); - - if (qn->lower > 0) { - r = get_min_match_length(qn->target, min, env); - if (r == 0) - *min = distance_multiply(*min, qn->lower); - } - } - break; - - case NT_ENCLOSE: - { - EncloseNode* en = NENCLOSE(node); - switch (en->type) { - case ENCLOSE_MEMORY: -#ifdef USE_SUBEXP_CALL - if (IS_ENCLOSE_MIN_FIXED(en)) - *min = en->min_len; - else { - r = get_min_match_length(en->target, min, env); - if (r == 0) { - en->min_len = *min; - SET_ENCLOSE_STATUS(node, NST_MIN_FIXED); - } - } - break; -#endif - case ENCLOSE_OPTION: - case ENCLOSE_STOP_BACKTRACK: - r = get_min_match_length(en->target, min, env); - break; - } - } - break; - - case NT_ANCHOR: - default: - break; - } - - return r; -} - -static int -get_max_match_length(Node* node, OnigDistance *max, ScanEnv* env) -{ - OnigDistance tmax; - int r = 0; - - *max = 0; - switch (NTYPE(node)) { - case NT_LIST: - do { - r = get_max_match_length(NCAR(node), &tmax, env); - if (r == 0) - *max = distance_add(*max, tmax); - } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); - break; - - case NT_ALT: - do { - r = get_max_match_length(NCAR(node), &tmax, env); - if (r == 0 && *max < tmax) *max = tmax; - } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); - break; - - case NT_STR: - { - StrNode* sn = NSTR(node); - *max = (OnigDistance)(sn->end - sn->s); - } - break; - - case NT_CTYPE: - *max = ONIGENC_MBC_MAXLEN_DIST(env->enc); - break; - - case NT_CCLASS: - case NT_CANY: - *max = ONIGENC_MBC_MAXLEN_DIST(env->enc); - break; - - case NT_BREF: - { - int i; - int* backs; - Node** nodes = SCANENV_MEM_NODES(env); - BRefNode* br = NBREF(node); - if (br->state & NST_RECURSION) { - *max = ONIG_INFINITE_DISTANCE; - break; - } - backs = BACKREFS_P(br); - for (i = 0; i < br->back_num; i++) { - if (backs[i] > env->num_mem) return ONIGERR_INVALID_BACKREF; - r = get_max_match_length(nodes[backs[i]], &tmax, env); - if (r != 0) break; - if (*max < tmax) *max = tmax; - } - } - break; - -#ifdef USE_SUBEXP_CALL - case NT_CALL: - if (! IS_CALL_RECURSION(NCALL(node))) - r = get_max_match_length(NCALL(node)->target, max, env); - else - *max = ONIG_INFINITE_DISTANCE; - break; -#endif - - case NT_QTFR: - { - QtfrNode* qn = NQTFR(node); - - if (qn->upper != 0) { - r = get_max_match_length(qn->target, max, env); - if (r == 0 && *max != 0) { - if (! IS_REPEAT_INFINITE(qn->upper)) - *max = distance_multiply(*max, qn->upper); - else - *max = ONIG_INFINITE_DISTANCE; - } - } - } - break; - - case NT_ENCLOSE: - { - EncloseNode* en = NENCLOSE(node); - switch (en->type) { - case ENCLOSE_MEMORY: -#ifdef USE_SUBEXP_CALL - if (IS_ENCLOSE_MAX_FIXED(en)) - *max = en->max_len; - else { - r = get_max_match_length(en->target, max, env); - if (r == 0) { - en->max_len = *max; - SET_ENCLOSE_STATUS(node, NST_MAX_FIXED); - } - } - break; -#endif - case ENCLOSE_OPTION: - case ENCLOSE_STOP_BACKTRACK: - r = get_max_match_length(en->target, max, env); - break; - } - } - break; - - case NT_ANCHOR: - default: - break; - } - - return r; -} - -#define GET_CHAR_LEN_VARLEN -1 -#define GET_CHAR_LEN_TOP_ALT_VARLEN -2 - -/* fixed size pattern node only */ -static int -get_char_length_tree1(Node* node, regex_t* reg, int* len, int level) -{ - int tlen; - int r = 0; - - level++; - *len = 0; - switch (NTYPE(node)) { - case NT_LIST: - do { - r = get_char_length_tree1(NCAR(node), reg, &tlen, level); - if (r == 0) - *len = distance_add(*len, tlen); - } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); - break; - - case NT_ALT: - { - int tlen2; - int varlen = 0; - - r = get_char_length_tree1(NCAR(node), reg, &tlen, level); - while (r == 0 && IS_NOT_NULL(node = NCDR(node))) { - r = get_char_length_tree1(NCAR(node), reg, &tlen2, level); - if (r == 0) { - if (tlen != tlen2) - varlen = 1; - } - } - if (r == 0) { - if (varlen != 0) { - if (level == 1) - r = GET_CHAR_LEN_TOP_ALT_VARLEN; - else - r = GET_CHAR_LEN_VARLEN; - } - else - *len = tlen; - } - } - break; - - case NT_STR: - { - StrNode* sn = NSTR(node); - UChar *s = sn->s; - while (s < sn->end) { - s += enclen(reg->enc, s); - (*len)++; - } - } - break; - - case NT_QTFR: - { - QtfrNode* qn = NQTFR(node); - if (qn->lower == qn->upper) { - r = get_char_length_tree1(qn->target, reg, &tlen, level); - if (r == 0) - *len = distance_multiply(tlen, qn->lower); - } - else - r = GET_CHAR_LEN_VARLEN; - } - break; - -#ifdef USE_SUBEXP_CALL - case NT_CALL: - if (! IS_CALL_RECURSION(NCALL(node))) - r = get_char_length_tree1(NCALL(node)->target, reg, len, level); - else - r = GET_CHAR_LEN_VARLEN; - break; -#endif - - case NT_CTYPE: - *len = 1; - break; - - case NT_CCLASS: - case NT_CANY: - *len = 1; - break; - - case NT_ENCLOSE: - { - EncloseNode* en = NENCLOSE(node); - switch (en->type) { - case ENCLOSE_MEMORY: -#ifdef USE_SUBEXP_CALL - if (IS_ENCLOSE_CLEN_FIXED(en)) - *len = en->char_len; - else { - r = get_char_length_tree1(en->target, reg, len, level); - if (r == 0) { - en->char_len = *len; - SET_ENCLOSE_STATUS(node, NST_CLEN_FIXED); - } - } - break; -#endif - case ENCLOSE_OPTION: - case ENCLOSE_STOP_BACKTRACK: - r = get_char_length_tree1(en->target, reg, len, level); - break; - default: - break; - } - } - break; - - case NT_ANCHOR: - break; - - default: - r = GET_CHAR_LEN_VARLEN; - break; - } - - return r; -} - -static int -get_char_length_tree(Node* node, regex_t* reg, int* len) -{ - return get_char_length_tree1(node, reg, len, 0); -} - -/* x is not included y ==> 1 : 0 */ -static int -is_not_included(Node* x, Node* y, regex_t* reg) -{ - int i, len; - OnigCodePoint code; - UChar *p; - int ytype; - - retry: - ytype = NTYPE(y); - switch (NTYPE(x)) { - case NT_CTYPE: - { - switch (ytype) { - case NT_CTYPE: - if (NCTYPE(y)->ctype == NCTYPE(x)->ctype && - NCTYPE(y)->not != NCTYPE(x)->not) - return 1; - else - return 0; - break; - - case NT_CCLASS: - swap: - { - Node* tmp; - tmp = x; x = y; y = tmp; - goto retry; - } - break; - - case NT_STR: - goto swap; - break; - - default: - break; - } - } - break; - - case NT_CCLASS: - { - CClassNode* xc = NCCLASS(x); - switch (ytype) { - case NT_CTYPE: - switch (NCTYPE(y)->ctype) { - case ONIGENC_CTYPE_WORD: - if (NCTYPE(y)->not == 0) { - if (IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) { - for (i = 0; i < SINGLE_BYTE_SIZE; i++) { - if (BITSET_AT(xc->bs, i)) { - if (IS_CODE_SB_WORD(reg->enc, i)) return 0; - } - } - return 1; - } - return 0; - } - else { - for (i = 0; i < SINGLE_BYTE_SIZE; i++) { - if (! IS_CODE_SB_WORD(reg->enc, i)) { - if (!IS_NCCLASS_NOT(xc)) { - if (BITSET_AT(xc->bs, i)) - return 0; - } - else { - if (! BITSET_AT(xc->bs, i)) - return 0; - } - } - } - return 1; - } - break; - - default: - break; - } - break; - - case NT_CCLASS: - { - int v; - CClassNode* yc = NCCLASS(y); - - for (i = 0; i < SINGLE_BYTE_SIZE; i++) { - v = BITSET_AT(xc->bs, i); - if ((v != 0 && !IS_NCCLASS_NOT(xc)) || - (v == 0 && IS_NCCLASS_NOT(xc))) { - v = BITSET_AT(yc->bs, i); - if ((v != 0 && !IS_NCCLASS_NOT(yc)) || - (v == 0 && IS_NCCLASS_NOT(yc))) - return 0; - } - } - if ((IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) || - (IS_NULL(yc->mbuf) && !IS_NCCLASS_NOT(yc))) - return 1; - return 0; - } - break; - - case NT_STR: - goto swap; - break; - - default: - break; - } - } - break; - - case NT_STR: - { - StrNode* xs = NSTR(x); - if (NSTRING_LEN(x) == 0) - break; - - //c = *(xs->s); - switch (ytype) { - case NT_CTYPE: - switch (NCTYPE(y)->ctype) { - case ONIGENC_CTYPE_WORD: - if (ONIGENC_IS_MBC_WORD(reg->enc, xs->s, xs->end)) - return NCTYPE(y)->not; - else - return !(NCTYPE(y)->not); - break; - default: - break; - } - break; - - case NT_CCLASS: - { - CClassNode* cc = NCCLASS(y); - - code = ONIGENC_MBC_TO_CODE(reg->enc, xs->s, - xs->s + ONIGENC_MBC_MAXLEN(reg->enc)); - return (onig_is_code_in_cc(reg->enc, code, cc) != 0 ? 0 : 1); - } - break; - - case NT_STR: - { - UChar *q; - StrNode* ys = NSTR(y); - len = NSTRING_LEN(x); - if (len > NSTRING_LEN(y)) len = NSTRING_LEN(y); - if (NSTRING_IS_AMBIG(x) || NSTRING_IS_AMBIG(y)) { - /* tiny version */ - return 0; - } - else { - for (i = 0, p = ys->s, q = xs->s; i < len; i++, p++, q++) { - if (*p != *q) return 1; - } - } - } - break; - - default: - break; - } - } - break; - - default: - break; - } - - return 0; -} - -static Node* -get_head_value_node(Node* node, int exact, regex_t* reg) -{ - Node* n = NULL_NODE; - - switch (NTYPE(node)) { - case NT_BREF: - case NT_ALT: - case NT_CANY: -#ifdef USE_SUBEXP_CALL - case NT_CALL: -#endif - break; - - case NT_CTYPE: - case NT_CCLASS: - if (exact == 0) { - n = node; - } - break; - - case NT_LIST: - n = get_head_value_node(NCAR(node), exact, reg); - break; - - case NT_STR: - { - StrNode* sn = NSTR(node); - - if (sn->end <= sn->s) - break; - - if (exact != 0 && - !NSTRING_IS_RAW(node) && IS_IGNORECASE(reg->options)) { - } - else { - n = node; - } - } - break; - - case NT_QTFR: - { - QtfrNode* qn = NQTFR(node); - if (qn->lower > 0) { - if (IS_NOT_NULL(qn->head_exact)) - n = qn->head_exact; - else - n = get_head_value_node(qn->target, exact, reg); - } - } - break; - - case NT_ENCLOSE: - { - EncloseNode* en = NENCLOSE(node); - switch (en->type) { - case ENCLOSE_OPTION: - { - OnigOptionType options = reg->options; - - reg->options = NENCLOSE(node)->option; - n = get_head_value_node(NENCLOSE(node)->target, exact, reg); - reg->options = options; - } - break; - - case ENCLOSE_MEMORY: - case ENCLOSE_STOP_BACKTRACK: - n = get_head_value_node(en->target, exact, reg); - break; - } - } - break; - - case NT_ANCHOR: - if (NANCHOR(node)->type == ANCHOR_PREC_READ) - n = get_head_value_node(NANCHOR(node)->target, exact, reg); - break; - - default: - break; - } - - return n; -} - -static int -check_type_tree(Node* node, int type_mask, int enclose_mask, int anchor_mask) -{ - int type, r = 0; - - type = NTYPE(node); - if ((NTYPE2BIT(type) & type_mask) == 0) - return 1; - - switch (type) { - case NT_LIST: - case NT_ALT: - do { - r = check_type_tree(NCAR(node), type_mask, enclose_mask, - anchor_mask); - } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); - break; - - case NT_QTFR: - r = check_type_tree(NQTFR(node)->target, type_mask, enclose_mask, - anchor_mask); - break; - - case NT_ENCLOSE: - { - EncloseNode* en = NENCLOSE(node); - if ((en->type & enclose_mask) == 0) - return 1; - - r = check_type_tree(en->target, type_mask, enclose_mask, anchor_mask); - } - break; - - case NT_ANCHOR: - type = NANCHOR(node)->type; - if ((type & anchor_mask) == 0) - return 1; - - if (NANCHOR(node)->target) - r = check_type_tree(NANCHOR(node)->target, - type_mask, enclose_mask, anchor_mask); - break; - - default: - break; - } - return r; -} - -#ifdef USE_SUBEXP_CALL - -#define RECURSION_EXIST 1 -#define RECURSION_INFINITE 2 - -static int -subexp_inf_recursive_check(Node* node, ScanEnv* env, int head) -{ - int type; - int r = 0; - - type = NTYPE(node); - switch (type) { - case NT_LIST: - { - Node *x; - OnigDistance min; - int ret; - - x = node; - do { - ret = subexp_inf_recursive_check(NCAR(x), env, head); - if (ret < 0 || ret == RECURSION_INFINITE) return ret; - r |= ret; - if (head) { - ret = get_min_match_length(NCAR(x), &min, env); - if (ret != 0) return ret; - if (min != 0) head = 0; - } - } while (IS_NOT_NULL(x = NCDR(x))); - } - break; - - case NT_ALT: - { - int ret; - r = RECURSION_EXIST; - do { - ret = subexp_inf_recursive_check(NCAR(node), env, head); - if (ret < 0 || ret == RECURSION_INFINITE) return ret; - r &= ret; - } while (IS_NOT_NULL(node = NCDR(node))); - } - break; - - case NT_QTFR: - r = subexp_inf_recursive_check(NQTFR(node)->target, env, head); - if (r == RECURSION_EXIST) { - if (NQTFR(node)->lower == 0) r = 0; - } - break; - - case NT_ANCHOR: - { - AnchorNode* an = NANCHOR(node); - switch (an->type) { - case ANCHOR_PREC_READ: - case ANCHOR_PREC_READ_NOT: - case ANCHOR_LOOK_BEHIND: - case ANCHOR_LOOK_BEHIND_NOT: - r = subexp_inf_recursive_check(an->target, env, head); - break; - } - } - break; - - case NT_CALL: - r = subexp_inf_recursive_check(NCALL(node)->target, env, head); - break; - - case NT_ENCLOSE: - if (IS_ENCLOSE_MARK2(NENCLOSE(node))) - return 0; - else if (IS_ENCLOSE_MARK1(NENCLOSE(node))) - return (head == 0 ? RECURSION_EXIST : RECURSION_INFINITE); - else { - SET_ENCLOSE_STATUS(node, NST_MARK2); - r = subexp_inf_recursive_check(NENCLOSE(node)->target, env, head); - CLEAR_ENCLOSE_STATUS(node, NST_MARK2); - } - break; - - default: - break; - } - - return r; -} - -static int -subexp_inf_recursive_check_trav(Node* node, ScanEnv* env) -{ - int type; - int r = 0; - - type = NTYPE(node); - switch (type) { - case NT_LIST: - case NT_ALT: - do { - r = subexp_inf_recursive_check_trav(NCAR(node), env); - } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); - break; - - case NT_QTFR: - r = subexp_inf_recursive_check_trav(NQTFR(node)->target, env); - break; - - case NT_ANCHOR: - { - AnchorNode* an = NANCHOR(node); - switch (an->type) { - case ANCHOR_PREC_READ: - case ANCHOR_PREC_READ_NOT: - case ANCHOR_LOOK_BEHIND: - case ANCHOR_LOOK_BEHIND_NOT: - r = subexp_inf_recursive_check_trav(an->target, env); - break; - } - } - break; - - case NT_ENCLOSE: - { - EncloseNode* en = NENCLOSE(node); - - if (IS_ENCLOSE_RECURSION(en)) { - SET_ENCLOSE_STATUS(node, NST_MARK1); - r = subexp_inf_recursive_check(en->target, env, 1); - if (r > 0) return ONIGERR_NEVER_ENDING_RECURSION; - CLEAR_ENCLOSE_STATUS(node, NST_MARK1); - } - r = subexp_inf_recursive_check_trav(en->target, env); - } - - break; - - default: - break; - } - - return r; -} - -static int -subexp_recursive_check(Node* node) -{ - int r = 0; - - switch (NTYPE(node)) { - case NT_LIST: - case NT_ALT: - do { - r |= subexp_recursive_check(NCAR(node)); - } while (IS_NOT_NULL(node = NCDR(node))); - break; - - case NT_QTFR: - r = subexp_recursive_check(NQTFR(node)->target); - break; - - case NT_ANCHOR: - { - AnchorNode* an = NANCHOR(node); - switch (an->type) { - case ANCHOR_PREC_READ: - case ANCHOR_PREC_READ_NOT: - case ANCHOR_LOOK_BEHIND: - case ANCHOR_LOOK_BEHIND_NOT: - r = subexp_recursive_check(an->target); - break; - } - } - break; - - case NT_CALL: - r = subexp_recursive_check(NCALL(node)->target); - if (r != 0) SET_CALL_RECURSION(node); - break; - - case NT_ENCLOSE: - if (IS_ENCLOSE_MARK2(NENCLOSE(node))) - return 0; - else if (IS_ENCLOSE_MARK1(NENCLOSE(node))) - return 1; /* recursion */ - else { - SET_ENCLOSE_STATUS(node, NST_MARK2); - r = subexp_recursive_check(NENCLOSE(node)->target); - CLEAR_ENCLOSE_STATUS(node, NST_MARK2); - } - break; - - default: - break; - } - - return r; -} - - -static int -subexp_recursive_check_trav(Node* node, ScanEnv* env) -{ -#define FOUND_CALLED_NODE 1 - - int type; - int r = 0; - - type = NTYPE(node); - switch (type) { - case NT_LIST: - case NT_ALT: - { - int ret; - do { - ret = subexp_recursive_check_trav(NCAR(node), env); - if (ret == FOUND_CALLED_NODE) r = FOUND_CALLED_NODE; - else if (ret < 0) return ret; - } while (IS_NOT_NULL(node = NCDR(node))); - } - break; - - case NT_QTFR: - r = subexp_recursive_check_trav(NQTFR(node)->target, env); - if (NQTFR(node)->upper == 0) { - if (r == FOUND_CALLED_NODE) - NQTFR(node)->is_refered = 1; - } - break; - - case NT_ANCHOR: - { - AnchorNode* an = NANCHOR(node); - switch (an->type) { - case ANCHOR_PREC_READ: - case ANCHOR_PREC_READ_NOT: - case ANCHOR_LOOK_BEHIND: - case ANCHOR_LOOK_BEHIND_NOT: - r = subexp_recursive_check_trav(an->target, env); - break; - } - } - break; - - case NT_ENCLOSE: - { - EncloseNode* en = NENCLOSE(node); - - if (! IS_ENCLOSE_RECURSION(en)) { - if (IS_ENCLOSE_CALLED(en)) { - SET_ENCLOSE_STATUS(node, NST_MARK1); - r = subexp_recursive_check(en->target); - if (r != 0) SET_ENCLOSE_STATUS(node, NST_RECURSION); - CLEAR_ENCLOSE_STATUS(node, NST_MARK1); - } - } - r = subexp_recursive_check_trav(en->target, env); - if (IS_ENCLOSE_CALLED(en)) - r |= FOUND_CALLED_NODE; - } - break; - - default: - break; - } - - return r; -} - -static int -setup_subexp_call(Node* node, ScanEnv* env) -{ - int type; - int r = 0; - - type = NTYPE(node); - switch (type) { - case NT_LIST: - do { - r = setup_subexp_call(NCAR(node), env); - } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); - break; - - case NT_ALT: - do { - r = setup_subexp_call(NCAR(node), env); - } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); - break; - - case NT_QTFR: - r = setup_subexp_call(NQTFR(node)->target, env); - break; - case NT_ENCLOSE: - r = setup_subexp_call(NENCLOSE(node)->target, env); - break; - - case NT_CALL: - { - CallNode* cn = NCALL(node); - Node** nodes = SCANENV_MEM_NODES(env); - - if (cn->group_num != 0) { - int gnum = cn->group_num; - -#ifdef USE_NAMED_GROUP - if (env->num_named > 0 && - IS_SYNTAX_BV(env->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) && - !ONIG_IS_OPTION_ON(env->option, ONIG_OPTION_CAPTURE_GROUP)) { - return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED; - } -#endif - if (gnum > env->num_mem) { - onig_scan_env_set_error_string(env, - ONIGERR_UNDEFINED_GROUP_REFERENCE, cn->name, cn->name_end); - return ONIGERR_UNDEFINED_GROUP_REFERENCE; - } - -#ifdef USE_NAMED_GROUP - set_call_attr: -#endif - cn->target = nodes[cn->group_num]; - if (IS_NULL(cn->target)) { - onig_scan_env_set_error_string(env, - ONIGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end); - return ONIGERR_UNDEFINED_NAME_REFERENCE; - } - SET_ENCLOSE_STATUS(cn->target, NST_CALLED); - BIT_STATUS_ON_AT(env->bt_mem_start, cn->group_num); - cn->unset_addr_list = env->unset_addr_list; - } -#ifdef USE_NAMED_GROUP - else { - int *refs; - - int n = onig_name_to_group_numbers(env->reg, cn->name, cn->name_end, - &refs); - if (n <= 0) { - onig_scan_env_set_error_string(env, - ONIGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end); - return ONIGERR_UNDEFINED_NAME_REFERENCE; - } - else if (n > 1) { - onig_scan_env_set_error_string(env, - ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL, cn->name, cn->name_end); - return ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL; - } - else { - cn->group_num = refs[0]; - goto set_call_attr; - } - } -#endif - } - break; - - case NT_ANCHOR: - { - AnchorNode* an = NANCHOR(node); - - switch (an->type) { - case ANCHOR_PREC_READ: - case ANCHOR_PREC_READ_NOT: - case ANCHOR_LOOK_BEHIND: - case ANCHOR_LOOK_BEHIND_NOT: - r = setup_subexp_call(an->target, env); - break; - } - } - break; - - default: - break; - } - - return r; -} -#endif - -/* divide different length alternatives in look-behind. - (?<=A|B) ==> (?<=A)|(?<=B) - (? (?type; - - head = an->target; - np = NCAR(head); - swap_node(node, head); - NCAR(node) = head; - NANCHOR(head)->target = np; - - np = node; - while ((np = NCDR(np)) != NULL_NODE) { - insert_node = onig_node_new_anchor(anc_type); - CHECK_NULL_RETURN_MEMERR(insert_node); - NANCHOR(insert_node)->target = NCAR(np); - NCAR(np) = insert_node; - } - - if (anc_type == ANCHOR_LOOK_BEHIND_NOT) { - np = node; - do { - SET_NTYPE(np, NT_LIST); /* alt -> list */ - } while ((np = NCDR(np)) != NULL_NODE); - } - return 0; -} - -static int -setup_look_behind(Node* node, regex_t* reg, ScanEnv* env) -{ - int r, len; - AnchorNode* an = NANCHOR(node); - - r = get_char_length_tree(an->target, reg, &len); - if (r == 0) - an->char_len = len; - else if (r == GET_CHAR_LEN_VARLEN) - r = ONIGERR_INVALID_LOOK_BEHIND_PATTERN; - else if (r == GET_CHAR_LEN_TOP_ALT_VARLEN) { - if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND)) - r = divide_look_behind_alternatives(node); - else - r = ONIGERR_INVALID_LOOK_BEHIND_PATTERN; - } - - return r; -} - -static int -next_setup(Node* node, Node* next_node, regex_t* reg) -{ - int type; - - retry: - type = NTYPE(node); - if (type == NT_QTFR) { - QtfrNode* qn = NQTFR(node); - if (qn->greedy && IS_REPEAT_INFINITE(qn->upper)) { -#ifdef USE_QTFR_PEEK_NEXT - Node* n = get_head_value_node(next_node, 1, reg); - /* '\0': for UTF-16BE etc... */ - if (IS_NOT_NULL(n) && NSTR(n)->s[0] != '\0') { - qn->next_head_exact = n; - } -#endif - /* automatic posseivation a*b ==> (?>a*)b */ - if (qn->lower <= 1) { - int ttype = NTYPE(qn->target); - if (IS_NODE_TYPE_SIMPLE(ttype)) { - Node *x, *y; - x = get_head_value_node(qn->target, 0, reg); - if (IS_NOT_NULL(x)) { - y = get_head_value_node(next_node, 0, reg); - if (IS_NOT_NULL(y) && is_not_included(x, y, reg)) { - Node* en = onig_node_new_enclose(ENCLOSE_STOP_BACKTRACK); - CHECK_NULL_RETURN_MEMERR(en); - SET_ENCLOSE_STATUS(en, NST_STOP_BT_SIMPLE_REPEAT); - swap_node(node, en); - NENCLOSE(node)->target = en; - } - } - } - } - } - } - else if (type == NT_ENCLOSE) { - EncloseNode* en = NENCLOSE(node); - if (en->type == ENCLOSE_MEMORY) { - node = en->target; - goto retry; - } - } - return 0; -} - - -static int -update_string_node_case_fold(regex_t* reg, Node *node) -{ - UChar *p, *end, buf[ONIGENC_MBC_CASE_FOLD_MAXLEN]; - UChar *sbuf, *ebuf, *sp; - int r, i, len, sbuf_size; - StrNode* sn = NSTR(node); - - end = sn->end; - sbuf_size = (int)(end - sn->s) * 2; - sbuf = (UChar* )xmalloc(sbuf_size); - CHECK_NULL_RETURN_MEMERR(sbuf); - ebuf = sbuf + sbuf_size; - - sp = sbuf; - p = sn->s; - while (p < end) { - len = ONIGENC_MBC_CASE_FOLD(reg->enc, reg->case_fold_flag, &p, end, buf); - for (i = 0; i < len; i++) { - if (sp >= ebuf) { - sbuf = (UChar* )xrealloc(sbuf, sbuf_size * 2, sbuf_size); - CHECK_NULL_RETURN_MEMERR(sbuf); - sp = sbuf + sbuf_size; - sbuf_size *= 2; - ebuf = sbuf + sbuf_size; - } - - *sp++ = buf[i]; - } - } - - r = onig_node_str_set(node, sbuf, sp); - if (r != 0) { - xfree(sbuf); - return r; - } - - xfree(sbuf); - return 0; -} - -static int -expand_case_fold_make_rem_string(Node** rnode, UChar *s, UChar *end, - regex_t* reg) -{ - int r; - Node *node; - - node = onig_node_new_str(s, end); - if (IS_NULL(node)) return ONIGERR_MEMORY; - - r = update_string_node_case_fold(reg, node); - if (r != 0) { - onig_node_free(node); - return r; - } - - NSTRING_SET_AMBIG(node); - NSTRING_SET_DONT_GET_OPT_INFO(node); - *rnode = node; - return 0; -} - -static int -expand_case_fold_string_alt(int item_num, OnigCaseFoldCodeItem items[], - UChar *p, int slen, UChar *end, - regex_t* reg, Node **rnode) -{ - int r, i, j, len, varlen; - Node *anode, *var_anode, *snode, *xnode, *an; - UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN]; - - *rnode = var_anode = NULL_NODE; - - varlen = 0; - for (i = 0; i < item_num; i++) { - if (items[i].byte_len != slen) { - varlen = 1; - break; - } - } - - if (varlen != 0) { - *rnode = var_anode = onig_node_new_alt(NULL_NODE, NULL_NODE); - if (IS_NULL(var_anode)) return ONIGERR_MEMORY; - - xnode = onig_node_new_list(NULL, NULL); - if (IS_NULL(xnode)) goto mem_err; - NCAR(var_anode) = xnode; - - anode = onig_node_new_alt(NULL_NODE, NULL_NODE); - if (IS_NULL(anode)) goto mem_err; - NCAR(xnode) = anode; - } - else { - *rnode = anode = onig_node_new_alt(NULL_NODE, NULL_NODE); - if (IS_NULL(anode)) return ONIGERR_MEMORY; - } - - snode = onig_node_new_str(p, p + slen); - if (IS_NULL(snode)) goto mem_err; - - NCAR(anode) = snode; - - for (i = 0; i < item_num; i++) { - snode = onig_node_new_str(NULL, NULL); - if (IS_NULL(snode)) goto mem_err; - - for (j = 0; j < items[i].code_len; j++) { - len = ONIGENC_CODE_TO_MBC(reg->enc, items[i].code[j], buf); - if (len < 0) { - r = len; - goto mem_err2; - } - - r = onig_node_str_cat(snode, buf, buf + len); - if (r != 0) goto mem_err2; - } - - an = onig_node_new_alt(NULL_NODE, NULL_NODE); - if (IS_NULL(an)) { - goto mem_err2; - } - - if (items[i].byte_len != slen) { - Node *rem; - UChar *q = p + items[i].byte_len; - - if (q < end) { - r = expand_case_fold_make_rem_string(&rem, q, end, reg); - if (r != 0) { - onig_node_free(an); - goto mem_err2; - } - - xnode = onig_node_list_add(NULL_NODE, snode); - if (IS_NULL(xnode)) { - onig_node_free(an); - onig_node_free(rem); - goto mem_err2; - } - if (IS_NULL(onig_node_list_add(xnode, rem))) { - onig_node_free(an); - onig_node_free(xnode); - onig_node_free(rem); - goto mem_err; - } - - NCAR(an) = xnode; - } - else { - NCAR(an) = snode; - } - - NCDR(var_anode) = an; - var_anode = an; - } - else { - NCAR(an) = snode; - NCDR(anode) = an; - anode = an; - } - } - - return varlen; - - mem_err2: - onig_node_free(snode); - - mem_err: - onig_node_free(*rnode); - - return ONIGERR_MEMORY; -} - -static int -expand_case_fold_string(Node* node, regex_t* reg) -{ -#define THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION 8 - - int r, n, len, alt_num; - UChar *start, *end, *p; - Node *top_root, *root, *snode, *prev_node; - OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM]; - StrNode* sn = NSTR(node); - - if (NSTRING_IS_AMBIG(node)) return 0; - - start = sn->s; - end = sn->end; - if (start >= end) return 0; - - r = 0; - top_root = root = prev_node = snode = NULL_NODE; - alt_num = 1; - p = start; - while (p < end) { - n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(reg->enc, reg->case_fold_flag, - p, end, items); - if (n < 0) { - r = n; - goto err; - } - - len = enclen(reg->enc, p); - - if (n == 0) { - if (IS_NULL(snode)) { - if (IS_NULL(root) && IS_NOT_NULL(prev_node)) { - top_root = root = onig_node_list_add(NULL_NODE, prev_node); - if (IS_NULL(root)) { - onig_node_free(prev_node); - goto mem_err; - } - } - - prev_node = snode = onig_node_new_str(NULL, NULL); - if (IS_NULL(snode)) goto mem_err; - if (IS_NOT_NULL(root)) { - if (IS_NULL(onig_node_list_add(root, snode))) { - onig_node_free(snode); - goto mem_err; - } - } - } - - r = onig_node_str_cat(snode, p, p + len); - if (r != 0) goto err; - } - else { - alt_num *= (n + 1); - if (alt_num > THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION) break; - - if (IS_NULL(root) && IS_NOT_NULL(prev_node)) { - top_root = root = onig_node_list_add(NULL_NODE, prev_node); - if (IS_NULL(root)) { - onig_node_free(prev_node); - goto mem_err; - } - } - - r = expand_case_fold_string_alt(n, items, p, len, end, reg, &prev_node); - if (r < 0) goto mem_err; - if (r == 1) { - if (IS_NULL(root)) { - top_root = prev_node; - } - else { - if (IS_NULL(onig_node_list_add(root, prev_node))) { - onig_node_free(prev_node); - goto mem_err; - } - } - - root = NCAR(prev_node); - } - else { /* r == 0 */ - if (IS_NOT_NULL(root)) { - if (IS_NULL(onig_node_list_add(root, prev_node))) { - onig_node_free(prev_node); - goto mem_err; - } - } - } - - snode = NULL_NODE; - } - - p += len; - } - - if (p < end) { - Node *srem; - - r = expand_case_fold_make_rem_string(&srem, p, end, reg); - if (r != 0) goto mem_err; - - if (IS_NOT_NULL(prev_node) && IS_NULL(root)) { - top_root = root = onig_node_list_add(NULL_NODE, prev_node); - if (IS_NULL(root)) { - onig_node_free(srem); - onig_node_free(prev_node); - goto mem_err; - } - } - - if (IS_NULL(root)) { - prev_node = srem; - } - else { - if (IS_NULL(onig_node_list_add(root, srem))) { - onig_node_free(srem); - goto mem_err; - } - } - } - - /* ending */ - top_root = (IS_NOT_NULL(top_root) ? top_root : prev_node); - swap_node(node, top_root); - onig_node_free(top_root); - return 0; - - mem_err: - r = ONIGERR_MEMORY; - - err: - onig_node_free(top_root); - return r; -} - - -#ifdef USE_COMBINATION_EXPLOSION_CHECK - -#define CEC_THRES_NUM_BIG_REPEAT 512 -#define CEC_INFINITE_NUM 0x7fffffff - -#define CEC_IN_INFINITE_REPEAT (1<<0) -#define CEC_IN_FINITE_REPEAT (1<<1) -#define CEC_CONT_BIG_REPEAT (1<<2) - -static int -setup_comb_exp_check(Node* node, int state, ScanEnv* env) -{ - int type; - int r = state; - - type = NTYPE(node); - switch (type) { - case NT_LIST: - { - Node* prev = NULL_NODE; - do { - r = setup_comb_exp_check(NCAR(node), r, env); - prev = NCAR(node); - } while (r >= 0 && IS_NOT_NULL(node = NCDR(node))); - } - break; - - case NT_ALT: - { - int ret; - do { - ret = setup_comb_exp_check(NCAR(node), state, env); - r |= ret; - } while (ret >= 0 && IS_NOT_NULL(node = NCDR(node))); - } - break; - - case NT_QTFR: - { - int child_state = state; - int add_state = 0; - QtfrNode* qn = NQTFR(node); - Node* target = qn->target; - int var_num; - - if (! IS_REPEAT_INFINITE(qn->upper)) { - if (qn->upper > 1) { - /* {0,1}, {1,1} are allowed */ - child_state |= CEC_IN_FINITE_REPEAT; - - /* check (a*){n,m}, (a+){n,m} => (a*){n,n}, (a+){n,n} */ - if (env->backrefed_mem == 0) { - if (NTYPE(qn->target) == NT_ENCLOSE) { - EncloseNode* en = NENCLOSE(qn->target); - if (en->type == ENCLOSE_MEMORY) { - if (NTYPE(en->target) == NT_QTFR) { - QtfrNode* q = NQTFR(en->target); - if (IS_REPEAT_INFINITE(q->upper) - && q->greedy == qn->greedy) { - qn->upper = (qn->lower == 0 ? 1 : qn->lower); - if (qn->upper == 1) - child_state = state; - } - } - } - } - } - } - } - - if (state & CEC_IN_FINITE_REPEAT) { - qn->comb_exp_check_num = -1; - } - else { - if (IS_REPEAT_INFINITE(qn->upper)) { - var_num = CEC_INFINITE_NUM; - child_state |= CEC_IN_INFINITE_REPEAT; - } - else { - var_num = qn->upper - qn->lower; - } - - if (var_num >= CEC_THRES_NUM_BIG_REPEAT) - add_state |= CEC_CONT_BIG_REPEAT; - - if (((state & CEC_IN_INFINITE_REPEAT) != 0 && var_num != 0) || - ((state & CEC_CONT_BIG_REPEAT) != 0 && - var_num >= CEC_THRES_NUM_BIG_REPEAT)) { - if (qn->comb_exp_check_num == 0) { - env->num_comb_exp_check++; - qn->comb_exp_check_num = env->num_comb_exp_check; - if (env->curr_max_regnum > env->comb_exp_max_regnum) - env->comb_exp_max_regnum = env->curr_max_regnum; - } - } - } - - r = setup_comb_exp_check(target, child_state, env); - r |= add_state; - } - break; - - case NT_ENCLOSE: - { - EncloseNode* en = NENCLOSE(node); - - switch (en->type) { - case ENCLOSE_MEMORY: - { - if (env->curr_max_regnum < en->regnum) - env->curr_max_regnum = en->regnum; - - r = setup_comb_exp_check(en->target, state, env); - } - break; - - default: - r = setup_comb_exp_check(en->target, state, env); - break; - } - } - break; - -#ifdef USE_SUBEXP_CALL - case NT_CALL: - if (IS_CALL_RECURSION(NCALL(node))) - env->has_recursion = 1; - else - r = setup_comb_exp_check(NCALL(node)->target, state, env); - break; -#endif - - default: - break; - } - - return r; -} -#endif - -#define IN_ALT (1<<0) -#define IN_NOT (1<<1) -#define IN_REPEAT (1<<2) -#define IN_VAR_REPEAT (1<<3) - -/* setup_tree does the following work. - 1. check empty loop. (set qn->target_empty_info) - 2. expand ignore-case in char class. - 3. set memory status bit flags. (reg->mem_stats) - 4. set qn->head_exact for [push, exact] -> [push_or_jump_exact1, exact]. - 5. find invalid patterns in look-behind. - 6. expand repeated string. - */ -static int -setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env) -{ - int type; - int r = 0; - - type = NTYPE(node); - switch (type) { - case NT_LIST: - { - Node* prev = NULL_NODE; - do { - r = setup_tree(NCAR(node), reg, state, env); - if (IS_NOT_NULL(prev) && r == 0) { - r = next_setup(prev, NCAR(node), reg); - } - prev = NCAR(node); - } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); - } - break; - - case NT_ALT: - do { - r = setup_tree(NCAR(node), reg, (state | IN_ALT), env); - } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); - break; - - case NT_CCLASS: - break; - - case NT_STR: - if (IS_IGNORECASE(reg->options) && !NSTRING_IS_RAW(node)) { - r = expand_case_fold_string(node, reg); - } - break; - - case NT_CTYPE: - case NT_CANY: - break; - -#ifdef USE_SUBEXP_CALL - case NT_CALL: - break; -#endif - - case NT_BREF: - { - int i; - int* p; - Node** nodes = SCANENV_MEM_NODES(env); - BRefNode* br = NBREF(node); - p = BACKREFS_P(br); - for (i = 0; i < br->back_num; i++) { - if (p[i] > env->num_mem) return ONIGERR_INVALID_BACKREF; - BIT_STATUS_ON_AT(env->backrefed_mem, p[i]); - BIT_STATUS_ON_AT(env->bt_mem_start, p[i]); -#ifdef USE_BACKREF_WITH_LEVEL - if (IS_BACKREF_NEST_LEVEL(br)) { - BIT_STATUS_ON_AT(env->bt_mem_end, p[i]); - } -#endif - SET_ENCLOSE_STATUS(nodes[p[i]], NST_MEM_BACKREFED); - } - } - break; - - case NT_QTFR: - { - OnigDistance d; - QtfrNode* qn = NQTFR(node); - Node* target = qn->target; - - if ((state & IN_REPEAT) != 0) { - qn->state |= NST_IN_REPEAT; - } - - if (IS_REPEAT_INFINITE(qn->upper) || qn->upper >= 1) { - r = get_min_match_length(target, &d, env); - if (r) break; - if (d == 0) { - qn->target_empty_info = NQ_TARGET_IS_EMPTY; -#ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT - r = quantifiers_memory_node_info(target); - if (r < 0) break; - if (r > 0) { - qn->target_empty_info = r; - } -#endif -#if 0 - r = get_max_match_length(target, &d, env); - if (r == 0 && d == 0) { - /* ()* ==> ()?, ()+ ==> () */ - qn->upper = 1; - if (qn->lower > 1) qn->lower = 1; - if (NTYPE(target) == NT_STR) { - qn->upper = qn->lower = 0; /* /(?:)+/ ==> // */ - } - } -#endif - } - } - - state |= IN_REPEAT; - if (qn->lower != qn->upper) - state |= IN_VAR_REPEAT; - r = setup_tree(target, reg, state, env); - if (r) break; - - /* expand string */ -#define EXPAND_STRING_MAX_LENGTH 100 - if (NTYPE(target) == NT_STR) { - if (!IS_REPEAT_INFINITE(qn->lower) && qn->lower == qn->upper && - qn->lower > 1 && qn->lower <= EXPAND_STRING_MAX_LENGTH) { - int len = NSTRING_LEN(target); - StrNode* sn = NSTR(target); - - if (len * qn->lower <= EXPAND_STRING_MAX_LENGTH) { - int i, n = qn->lower; - onig_node_conv_to_str_node(node, NSTR(target)->flag); - for (i = 0; i < n; i++) { - r = onig_node_str_cat(node, sn->s, sn->end); - if (r) break; - } - onig_node_free(target); - break; /* break case NT_QTFR: */ - } - } - } - -#ifdef USE_OP_PUSH_OR_JUMP_EXACT - if (qn->greedy && (qn->target_empty_info != 0)) { - if (NTYPE(target) == NT_QTFR) { - QtfrNode* tqn = NQTFR(target); - if (IS_NOT_NULL(tqn->head_exact)) { - qn->head_exact = tqn->head_exact; - tqn->head_exact = NULL; - } - } - else { - qn->head_exact = get_head_value_node(qn->target, 1, reg); - } - } -#endif - } - break; - - case NT_ENCLOSE: - { - EncloseNode* en = NENCLOSE(node); - - switch (en->type) { - case ENCLOSE_OPTION: - { - OnigOptionType options = reg->options; - reg->options = NENCLOSE(node)->option; - r = setup_tree(NENCLOSE(node)->target, reg, state, env); - reg->options = options; - } - break; - - case ENCLOSE_MEMORY: - if ((state & (IN_ALT | IN_NOT | IN_VAR_REPEAT)) != 0) { - BIT_STATUS_ON_AT(env->bt_mem_start, en->regnum); - /* SET_ENCLOSE_STATUS(node, NST_MEM_IN_ALT_NOT); */ - } - r = setup_tree(en->target, reg, state, env); - break; - - case ENCLOSE_STOP_BACKTRACK: - { - Node* target = en->target; - r = setup_tree(target, reg, state, env); - if (NTYPE(target) == NT_QTFR) { - QtfrNode* tqn = NQTFR(target); - if (IS_REPEAT_INFINITE(tqn->upper) && tqn->lower <= 1 && - tqn->greedy != 0) { /* (?>a*), a*+ etc... */ - int qtype = NTYPE(tqn->target); - if (IS_NODE_TYPE_SIMPLE(qtype)) - SET_ENCLOSE_STATUS(node, NST_STOP_BT_SIMPLE_REPEAT); - } - } - } - break; - } - } - break; - - case NT_ANCHOR: - { - AnchorNode* an = NANCHOR(node); - - switch (an->type) { - case ANCHOR_PREC_READ: - r = setup_tree(an->target, reg, state, env); - break; - case ANCHOR_PREC_READ_NOT: - r = setup_tree(an->target, reg, (state | IN_NOT), env); - break; - -/* allowed node types in look-behind */ -#define ALLOWED_TYPE_IN_LB \ - ( BIT_NT_LIST | BIT_NT_ALT | BIT_NT_STR | BIT_NT_CCLASS | BIT_NT_CTYPE | \ - BIT_NT_CANY | BIT_NT_ANCHOR | BIT_NT_ENCLOSE | BIT_NT_QTFR | BIT_NT_CALL ) - -#define ALLOWED_ENCLOSE_IN_LB ( ENCLOSE_MEMORY ) -#define ALLOWED_ENCLOSE_IN_LB_NOT 0 - -#define ALLOWED_ANCHOR_IN_LB \ -( ANCHOR_LOOK_BEHIND | ANCHOR_BEGIN_LINE | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION ) -#define ALLOWED_ANCHOR_IN_LB_NOT \ -( ANCHOR_LOOK_BEHIND | ANCHOR_LOOK_BEHIND_NOT | ANCHOR_BEGIN_LINE | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION ) - - case ANCHOR_LOOK_BEHIND: - { - r = check_type_tree(an->target, ALLOWED_TYPE_IN_LB, - ALLOWED_ENCLOSE_IN_LB, ALLOWED_ANCHOR_IN_LB); - if (r < 0) return r; - if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN; - r = setup_look_behind(node, reg, env); - if (r != 0) return r; - r = setup_tree(an->target, reg, state, env); - } - break; - - case ANCHOR_LOOK_BEHIND_NOT: - { - r = check_type_tree(an->target, ALLOWED_TYPE_IN_LB, - ALLOWED_ENCLOSE_IN_LB_NOT, ALLOWED_ANCHOR_IN_LB_NOT); - if (r < 0) return r; - if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN; - r = setup_look_behind(node, reg, env); - if (r != 0) return r; - r = setup_tree(an->target, reg, (state | IN_NOT), env); - } - break; - } - } - break; - - default: - break; - } - - return r; -} - -/* set skip map for Boyer-Moor search */ -static int -set_bm_skip(UChar* s, UChar* end, OnigEncoding enc ARG_UNUSED, - UChar skip[], int** int_skip) -{ - int i, len; - - len = (int)(end - s); - if (len < ONIG_CHAR_TABLE_SIZE) { - for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) skip[i] = (UChar)len; - - for (i = 0; i < len - 1; i++) - skip[s[i]] = (UChar)(len - 1 - i); - } - else { - if (IS_NULL(*int_skip)) { - *int_skip = (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE); - if (IS_NULL(*int_skip)) return ONIGERR_MEMORY; - } - for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) (*int_skip)[i] = len; - - for (i = 0; i < len - 1; i++) - (*int_skip)[s[i]] = len - 1 - i; - } - return 0; -} - -#define OPT_EXACT_MAXLEN 24 - -typedef struct { - OnigDistance min; /* min byte length */ - OnigDistance max; /* max byte length */ -} MinMaxLen; - -typedef struct { - MinMaxLen mmd; - OnigEncoding enc; - OnigOptionType options; - OnigCaseFoldType case_fold_flag; - ScanEnv* scan_env; -} OptEnv; - -typedef struct { - int left_anchor; - int right_anchor; -} OptAncInfo; - -typedef struct { - MinMaxLen mmd; /* info position */ - OptAncInfo anc; - - int reach_end; - int ignore_case; - int len; - UChar s[OPT_EXACT_MAXLEN]; -} OptExactInfo; - -typedef struct { - MinMaxLen mmd; /* info position */ - OptAncInfo anc; - - int value; /* weighted value */ - UChar map[ONIG_CHAR_TABLE_SIZE]; -} OptMapInfo; - -typedef struct { - MinMaxLen len; - - OptAncInfo anc; - OptExactInfo exb; /* boundary */ - OptExactInfo exm; /* middle */ - OptExactInfo expr; /* prec read (?=...) */ - - OptMapInfo map; /* boundary */ -} NodeOptInfo; - - -static int -map_position_value(OnigEncoding enc, int i) -{ - static const short int ByteValTable[] = { - 5, 1, 1, 1, 1, 1, 1, 1, 1, 10, 10, 1, 1, 10, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 12, 4, 7, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5, - 5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 6, 5, 5, 5, - 5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 1 - }; - - if (i < (int )(sizeof(ByteValTable)/sizeof(ByteValTable[0]))) { - if (i == 0 && ONIGENC_MBC_MINLEN(enc) > 1) - return 20; - else - return (int )ByteValTable[i]; - } - else - return 4; /* Take it easy. */ -} - -static int -distance_value(MinMaxLen* mm) -{ - /* 1000 / (min-max-dist + 1) */ - static const short int dist_vals[] = { - 1000, 500, 333, 250, 200, 167, 143, 125, 111, 100, - 91, 83, 77, 71, 67, 63, 59, 56, 53, 50, - 48, 45, 43, 42, 40, 38, 37, 36, 34, 33, - 32, 31, 30, 29, 29, 28, 27, 26, 26, 25, - 24, 24, 23, 23, 22, 22, 21, 21, 20, 20, - 20, 19, 19, 19, 18, 18, 18, 17, 17, 17, - 16, 16, 16, 16, 15, 15, 15, 15, 14, 14, - 14, 14, 14, 14, 13, 13, 13, 13, 13, 13, - 12, 12, 12, 12, 12, 12, 11, 11, 11, 11, - 11, 11, 11, 11, 11, 10, 10, 10, 10, 10 - }; - - int d; - - if (mm->max == ONIG_INFINITE_DISTANCE) return 0; - - d = mm->max - mm->min; - if (d < (int )(sizeof(dist_vals)/sizeof(dist_vals[0]))) - /* return dist_vals[d] * 16 / (mm->min + 12); */ - return (int )dist_vals[d]; - else - return 1; -} - -static int -comp_distance_value(MinMaxLen* d1, MinMaxLen* d2, int v1, int v2) -{ - if (v2 <= 0) return -1; - if (v1 <= 0) return 1; - - v1 *= distance_value(d1); - v2 *= distance_value(d2); - - if (v2 > v1) return 1; - if (v2 < v1) return -1; - - if (d2->min < d1->min) return 1; - if (d2->min > d1->min) return -1; - return 0; -} - -static int -is_equal_mml(MinMaxLen* a, MinMaxLen* b) -{ - return (a->min == b->min && a->max == b->max) ? 1 : 0; -} - - -static void -set_mml(MinMaxLen* mml, OnigDistance min, OnigDistance max) -{ - mml->min = min; - mml->max = max; -} - -static void -clear_mml(MinMaxLen* mml) -{ - mml->min = mml->max = 0; -} - -static void -copy_mml(MinMaxLen* to, MinMaxLen* from) -{ - to->min = from->min; - to->max = from->max; -} - -static void -add_mml(MinMaxLen* to, MinMaxLen* from) -{ - to->min = distance_add(to->min, from->min); - to->max = distance_add(to->max, from->max); -} - -#if 0 -static void -add_len_mml(MinMaxLen* to, OnigDistance len) -{ - to->min = distance_add(to->min, len); - to->max = distance_add(to->max, len); -} -#endif - -static void -alt_merge_mml(MinMaxLen* to, MinMaxLen* from) -{ - if (to->min > from->min) to->min = from->min; - if (to->max < from->max) to->max = from->max; -} - -static void -copy_opt_env(OptEnv* to, OptEnv* from) -{ - *to = *from; -} - -static void -clear_opt_anc_info(OptAncInfo* anc) -{ - anc->left_anchor = 0; - anc->right_anchor = 0; -} - -static void -copy_opt_anc_info(OptAncInfo* to, OptAncInfo* from) -{ - *to = *from; -} - -static void -concat_opt_anc_info(OptAncInfo* to, OptAncInfo* left, OptAncInfo* right, - OnigDistance left_len, OnigDistance right_len) -{ - clear_opt_anc_info(to); - - to->left_anchor = left->left_anchor; - if (left_len == 0) { - to->left_anchor |= right->left_anchor; - } - - to->right_anchor = right->right_anchor; - if (right_len == 0) { - to->right_anchor |= left->right_anchor; - } -} - -static int -is_left_anchor(int anc) -{ - if (anc == ANCHOR_END_BUF || anc == ANCHOR_SEMI_END_BUF || - anc == ANCHOR_END_LINE || anc == ANCHOR_PREC_READ || - anc == ANCHOR_PREC_READ_NOT) - return 0; - - return 1; -} - -static int -is_set_opt_anc_info(OptAncInfo* to, int anc) -{ - if ((to->left_anchor & anc) != 0) return 1; - - return ((to->right_anchor & anc) != 0 ? 1 : 0); -} - -static void -add_opt_anc_info(OptAncInfo* to, int anc) -{ - if (is_left_anchor(anc)) - to->left_anchor |= anc; - else - to->right_anchor |= anc; -} - -static void -remove_opt_anc_info(OptAncInfo* to, int anc) -{ - if (is_left_anchor(anc)) - to->left_anchor &= ~anc; - else - to->right_anchor &= ~anc; -} - -static void -alt_merge_opt_anc_info(OptAncInfo* to, OptAncInfo* add) -{ - to->left_anchor &= add->left_anchor; - to->right_anchor &= add->right_anchor; -} - -static int -is_full_opt_exact_info(OptExactInfo* ex) -{ - return (ex->len >= OPT_EXACT_MAXLEN ? 1 : 0); -} - -static void -clear_opt_exact_info(OptExactInfo* ex) -{ - clear_mml(&ex->mmd); - clear_opt_anc_info(&ex->anc); - ex->reach_end = 0; - ex->ignore_case = 0; - ex->len = 0; - ex->s[0] = '\0'; -} - -static void -copy_opt_exact_info(OptExactInfo* to, OptExactInfo* from) -{ - *to = *from; -} - -static void -concat_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OnigEncoding enc) -{ - int i, j, len; - UChar *p, *end; - OptAncInfo tanc; - - if (! to->ignore_case && add->ignore_case) { - if (to->len >= add->len) return ; /* avoid */ - - to->ignore_case = 1; - } - - p = add->s; - end = p + add->len; - for (i = to->len; p < end; ) { - len = enclen(enc, p); - if (i + len > OPT_EXACT_MAXLEN) break; - for (j = 0; j < len && p < end; j++) - to->s[i++] = *p++; - } - - to->len = i; - to->reach_end = (p == end ? add->reach_end : 0); - - concat_opt_anc_info(&tanc, &to->anc, &add->anc, 1, 1); - if (! to->reach_end) tanc.right_anchor = 0; - copy_opt_anc_info(&to->anc, &tanc); -} - -static void -concat_opt_exact_info_str(OptExactInfo* to, UChar* s, UChar* end, - int raw ARG_UNUSED, OnigEncoding enc) -{ - int i, j, len; - UChar *p; - - for (i = to->len, p = s; p < end && i < OPT_EXACT_MAXLEN; ) { - len = enclen(enc, p); - if (i + len > OPT_EXACT_MAXLEN) break; - for (j = 0; j < len && p < end; j++) - to->s[i++] = *p++; - } - - to->len = i; -} - -static void -alt_merge_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OptEnv* env) -{ - int i, j, len; - - if (add->len == 0 || to->len == 0) { - clear_opt_exact_info(to); - return ; - } - - if (! is_equal_mml(&to->mmd, &add->mmd)) { - clear_opt_exact_info(to); - return ; - } - - for (i = 0; i < to->len && i < add->len; ) { - if (to->s[i] != add->s[i]) break; - len = enclen(env->enc, to->s + i); - - for (j = 1; j < len; j++) { - if (to->s[i+j] != add->s[i+j]) break; - } - if (j < len) break; - i += len; - } - - if (! add->reach_end || i < add->len || i < to->len) { - to->reach_end = 0; - } - to->len = i; - to->ignore_case |= add->ignore_case; - - alt_merge_opt_anc_info(&to->anc, &add->anc); - if (! to->reach_end) to->anc.right_anchor = 0; -} - -static void -select_opt_exact_info(OnigEncoding enc, OptExactInfo* now, OptExactInfo* alt) -{ - int v1, v2; - - v1 = now->len; - v2 = alt->len; - - if (v2 == 0) { - return ; - } - else if (v1 == 0) { - copy_opt_exact_info(now, alt); - return ; - } - else if (v1 <= 2 && v2 <= 2) { - /* ByteValTable[x] is big value --> low price */ - v2 = map_position_value(enc, now->s[0]); - v1 = map_position_value(enc, alt->s[0]); - - if (now->len > 1) v1 += 5; - if (alt->len > 1) v2 += 5; - } - - if (now->ignore_case == 0) v1 *= 2; - if (alt->ignore_case == 0) v2 *= 2; - - if (comp_distance_value(&now->mmd, &alt->mmd, v1, v2) > 0) - copy_opt_exact_info(now, alt); -} - -static void -clear_opt_map_info(OptMapInfo* map) -{ - static const OptMapInfo clean_info = { - {0, 0}, {0, 0}, 0, - { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 - } - }; - - xmemcpy(map, &clean_info, sizeof(OptMapInfo)); -} - -static void -copy_opt_map_info(OptMapInfo* to, OptMapInfo* from) -{ - *to = *from; -} - -static void -add_char_opt_map_info(OptMapInfo* map, UChar c, OnigEncoding enc) -{ - if (map->map[c] == 0) { - map->map[c] = 1; - map->value += map_position_value(enc, c); - } -} - -static int -add_char_amb_opt_map_info(OptMapInfo* map, UChar* p, UChar* end, - OnigEncoding enc, OnigCaseFoldType case_fold_flag) -{ - OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM]; - UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN]; - int i, n; - - add_char_opt_map_info(map, p[0], enc); - - case_fold_flag = DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag); - n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, case_fold_flag, p, end, items); - if (n < 0) return n; - - for (i = 0; i < n; i++) { - ONIGENC_CODE_TO_MBC(enc, items[i].code[0], buf); - add_char_opt_map_info(map, buf[0], enc); - } - - return 0; -} - -static void -select_opt_map_info(OptMapInfo* now, OptMapInfo* alt) -{ - static int z = 1<<15; /* 32768: something big value */ - - int v1, v2; - - if (alt->value == 0) return ; - if (now->value == 0) { - copy_opt_map_info(now, alt); - return ; - } - - v1 = z / now->value; - v2 = z / alt->value; - if (comp_distance_value(&now->mmd, &alt->mmd, v1, v2) > 0) - copy_opt_map_info(now, alt); -} - -static int -comp_opt_exact_or_map_info(OptExactInfo* e, OptMapInfo* m) -{ -#define COMP_EM_BASE 20 - int ve, vm; - - if (m->value <= 0) return -1; - - ve = COMP_EM_BASE * e->len * (e->ignore_case ? 1 : 2); - vm = COMP_EM_BASE * 5 * 2 / m->value; - return comp_distance_value(&e->mmd, &m->mmd, ve, vm); -} - -static void -alt_merge_opt_map_info(OnigEncoding enc, OptMapInfo* to, OptMapInfo* add) -{ - int i, val; - - /* if (! is_equal_mml(&to->mmd, &add->mmd)) return ; */ - if (to->value == 0) return ; - if (add->value == 0 || to->mmd.max < add->mmd.min) { - clear_opt_map_info(to); - return ; - } - - alt_merge_mml(&to->mmd, &add->mmd); - - val = 0; - for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) { - if (add->map[i]) - to->map[i] = 1; - - if (to->map[i]) - val += map_position_value(enc, i); - } - to->value = val; - - alt_merge_opt_anc_info(&to->anc, &add->anc); -} - -static void -set_bound_node_opt_info(NodeOptInfo* opt, MinMaxLen* mmd) -{ - copy_mml(&(opt->exb.mmd), mmd); - copy_mml(&(opt->expr.mmd), mmd); - copy_mml(&(opt->map.mmd), mmd); -} - -static void -clear_node_opt_info(NodeOptInfo* opt) -{ - clear_mml(&opt->len); - clear_opt_anc_info(&opt->anc); - clear_opt_exact_info(&opt->exb); - clear_opt_exact_info(&opt->exm); - clear_opt_exact_info(&opt->expr); - clear_opt_map_info(&opt->map); -} - -static void -copy_node_opt_info(NodeOptInfo* to, NodeOptInfo* from) -{ - *to = *from; -} - -static void -concat_left_node_opt_info(OnigEncoding enc, NodeOptInfo* to, NodeOptInfo* add) -{ - int exb_reach, exm_reach; - OptAncInfo tanc; - - concat_opt_anc_info(&tanc, &to->anc, &add->anc, to->len.max, add->len.max); - copy_opt_anc_info(&to->anc, &tanc); - - if (add->exb.len > 0 && to->len.max == 0) { - concat_opt_anc_info(&tanc, &to->anc, &add->exb.anc, - to->len.max, add->len.max); - copy_opt_anc_info(&add->exb.anc, &tanc); - } - - if (add->map.value > 0 && to->len.max == 0) { - if (add->map.mmd.max == 0) - add->map.anc.left_anchor |= to->anc.left_anchor; - } - - exb_reach = to->exb.reach_end; - exm_reach = to->exm.reach_end; - - if (add->len.max != 0) - to->exb.reach_end = to->exm.reach_end = 0; - - if (add->exb.len > 0) { - if (exb_reach) { - concat_opt_exact_info(&to->exb, &add->exb, enc); - clear_opt_exact_info(&add->exb); - } - else if (exm_reach) { - concat_opt_exact_info(&to->exm, &add->exb, enc); - clear_opt_exact_info(&add->exb); - } - } - select_opt_exact_info(enc, &to->exm, &add->exb); - select_opt_exact_info(enc, &to->exm, &add->exm); - - if (to->expr.len > 0) { - if (add->len.max > 0) { - if (to->expr.len > (int )add->len.max) - to->expr.len = add->len.max; - - if (to->expr.mmd.max == 0) - select_opt_exact_info(enc, &to->exb, &to->expr); - else - select_opt_exact_info(enc, &to->exm, &to->expr); - } - } - else if (add->expr.len > 0) { - copy_opt_exact_info(&to->expr, &add->expr); - } - - select_opt_map_info(&to->map, &add->map); - - add_mml(&to->len, &add->len); -} - -static void -alt_merge_node_opt_info(NodeOptInfo* to, NodeOptInfo* add, OptEnv* env) -{ - alt_merge_opt_anc_info (&to->anc, &add->anc); - alt_merge_opt_exact_info(&to->exb, &add->exb, env); - alt_merge_opt_exact_info(&to->exm, &add->exm, env); - alt_merge_opt_exact_info(&to->expr, &add->expr, env); - alt_merge_opt_map_info(env->enc, &to->map, &add->map); - - alt_merge_mml(&to->len, &add->len); -} - - -#define MAX_NODE_OPT_INFO_REF_COUNT 5 - -static int -optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) -{ - int type; - int r = 0; - - clear_node_opt_info(opt); - set_bound_node_opt_info(opt, &env->mmd); - - type = NTYPE(node); - switch (type) { - case NT_LIST: - { - OptEnv nenv; - NodeOptInfo nopt; - Node* nd = node; - - copy_opt_env(&nenv, env); - do { - r = optimize_node_left(NCAR(nd), &nopt, &nenv); - if (r == 0) { - add_mml(&nenv.mmd, &nopt.len); - concat_left_node_opt_info(env->enc, opt, &nopt); - } - } while (r == 0 && IS_NOT_NULL(nd = NCDR(nd))); - } - break; - - case NT_ALT: - { - NodeOptInfo nopt; - Node* nd = node; - - do { - r = optimize_node_left(NCAR(nd), &nopt, env); - if (r == 0) { - if (nd == node) copy_node_opt_info(opt, &nopt); - else alt_merge_node_opt_info(opt, &nopt, env); - } - } while ((r == 0) && IS_NOT_NULL(nd = NCDR(nd))); - } - break; - - case NT_STR: - { - StrNode* sn = NSTR(node); - int slen = (int)(sn->end - sn->s); - int is_raw = NSTRING_IS_RAW(node); - - if (! NSTRING_IS_AMBIG(node)) { - concat_opt_exact_info_str(&opt->exb, sn->s, sn->end, - NSTRING_IS_RAW(node), env->enc); - if (slen > 0) { - add_char_opt_map_info(&opt->map, *(sn->s), env->enc); - } - set_mml(&opt->len, slen, slen); - } - else { - int max; - - if (NSTRING_IS_DONT_GET_OPT_INFO(node)) { - int n = onigenc_strlen(env->enc, sn->s, sn->end); - max = ONIGENC_MBC_MAXLEN_DIST(env->enc) * n; - } - else { - concat_opt_exact_info_str(&opt->exb, sn->s, sn->end, - is_raw, env->enc); - opt->exb.ignore_case = 1; - - if (slen > 0) { - r = add_char_amb_opt_map_info(&opt->map, sn->s, sn->end, - env->enc, env->case_fold_flag); - if (r != 0) break; - } - - max = slen; - } - - set_mml(&opt->len, slen, max); - } - - if (opt->exb.len == slen) - opt->exb.reach_end = 1; - } - break; - - case NT_CCLASS: - { - int i, z; - CClassNode* cc = NCCLASS(node); - - /* no need to check ignore case. (setted in setup_tree()) */ - - if (IS_NOT_NULL(cc->mbuf) || IS_NCCLASS_NOT(cc)) { - OnigDistance min = ONIGENC_MBC_MINLEN(env->enc); - OnigDistance max = ONIGENC_MBC_MAXLEN_DIST(env->enc); - - set_mml(&opt->len, min, max); - } - else { - for (i = 0; i < SINGLE_BYTE_SIZE; i++) { - z = BITSET_AT(cc->bs, i); - if ((z && !IS_NCCLASS_NOT(cc)) || (!z && IS_NCCLASS_NOT(cc))) { - add_char_opt_map_info(&opt->map, (UChar )i, env->enc); - } - } - set_mml(&opt->len, 1, 1); - } - } - break; - - case NT_CTYPE: - { - int i, min, max; - - max = ONIGENC_MBC_MAXLEN_DIST(env->enc); - - if (max == 1) { - min = 1; - - switch (NCTYPE(node)->ctype) { - case ONIGENC_CTYPE_WORD: - if (NCTYPE(node)->not != 0) { - for (i = 0; i < SINGLE_BYTE_SIZE; i++) { - if (! ONIGENC_IS_CODE_WORD(env->enc, i)) { - add_char_opt_map_info(&opt->map, (UChar )i, env->enc); - } - } - } - else { - for (i = 0; i < SINGLE_BYTE_SIZE; i++) { - if (ONIGENC_IS_CODE_WORD(env->enc, i)) { - add_char_opt_map_info(&opt->map, (UChar )i, env->enc); - } - } - } - break; - } - } - else { - min = ONIGENC_MBC_MINLEN(env->enc); - } - set_mml(&opt->len, min, max); - } - break; - - case NT_CANY: - { - OnigDistance min = ONIGENC_MBC_MINLEN(env->enc); - OnigDistance max = ONIGENC_MBC_MAXLEN_DIST(env->enc); - set_mml(&opt->len, min, max); - } - break; - - case NT_ANCHOR: - switch (NANCHOR(node)->type) { - case ANCHOR_BEGIN_BUF: - case ANCHOR_BEGIN_POSITION: - case ANCHOR_BEGIN_LINE: - case ANCHOR_END_BUF: - case ANCHOR_SEMI_END_BUF: - case ANCHOR_END_LINE: - add_opt_anc_info(&opt->anc, NANCHOR(node)->type); - break; - - case ANCHOR_PREC_READ: - { - NodeOptInfo nopt; - - r = optimize_node_left(NANCHOR(node)->target, &nopt, env); - if (r == 0) { - if (nopt.exb.len > 0) - copy_opt_exact_info(&opt->expr, &nopt.exb); - else if (nopt.exm.len > 0) - copy_opt_exact_info(&opt->expr, &nopt.exm); - - opt->expr.reach_end = 0; - - if (nopt.map.value > 0) - copy_opt_map_info(&opt->map, &nopt.map); - } - } - break; - - case ANCHOR_PREC_READ_NOT: - case ANCHOR_LOOK_BEHIND: /* Sorry, I can't make use of it. */ - case ANCHOR_LOOK_BEHIND_NOT: - break; - } - break; - - case NT_BREF: - { - int i; - int* backs; - OnigDistance min, max, tmin, tmax; - Node** nodes = SCANENV_MEM_NODES(env->scan_env); - BRefNode* br = NBREF(node); - - if (br->state & NST_RECURSION) { - set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE); - break; - } - backs = BACKREFS_P(br); - r = get_min_match_length(nodes[backs[0]], &min, env->scan_env); - if (r != 0) break; - r = get_max_match_length(nodes[backs[0]], &max, env->scan_env); - if (r != 0) break; - for (i = 1; i < br->back_num; i++) { - r = get_min_match_length(nodes[backs[i]], &tmin, env->scan_env); - if (r != 0) break; - r = get_max_match_length(nodes[backs[i]], &tmax, env->scan_env); - if (r != 0) break; - if (min > tmin) min = tmin; - if (max < tmax) max = tmax; - } - if (r == 0) set_mml(&opt->len, min, max); - } - break; - -#ifdef USE_SUBEXP_CALL - case NT_CALL: - if (IS_CALL_RECURSION(NCALL(node))) - set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE); - else { - OnigOptionType save = env->options; - env->options = NENCLOSE(NCALL(node)->target)->option; - r = optimize_node_left(NCALL(node)->target, opt, env); - env->options = save; - } - break; -#endif - - case NT_QTFR: - { - int i; - OnigDistance min, max; - NodeOptInfo nopt; - QtfrNode* qn = NQTFR(node); - - r = optimize_node_left(qn->target, &nopt, env); - if (r) break; - - if (qn->lower == 0 && IS_REPEAT_INFINITE(qn->upper)) { - if (env->mmd.max == 0 && - NTYPE(qn->target) == NT_CANY && qn->greedy) { - if (IS_MULTILINE(env->options)) - add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_ML); - else - add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR); - } - } - else { - if (qn->lower > 0) { - copy_node_opt_info(opt, &nopt); - if (nopt.exb.len > 0) { - if (nopt.exb.reach_end) { - for (i = 2; i <= qn->lower && - ! is_full_opt_exact_info(&opt->exb); i++) { - concat_opt_exact_info(&opt->exb, &nopt.exb, env->enc); - } - if (i < qn->lower) { - opt->exb.reach_end = 0; - } - } - } - - if (qn->lower != qn->upper) { - opt->exb.reach_end = 0; - opt->exm.reach_end = 0; - } - if (qn->lower > 1) - opt->exm.reach_end = 0; - } - } - - min = distance_multiply(nopt.len.min, qn->lower); - if (IS_REPEAT_INFINITE(qn->upper)) - max = (nopt.len.max > 0 ? ONIG_INFINITE_DISTANCE : 0); - else - max = distance_multiply(nopt.len.max, qn->upper); - - set_mml(&opt->len, min, max); - } - break; - - case NT_ENCLOSE: - { - EncloseNode* en = NENCLOSE(node); - - switch (en->type) { - case ENCLOSE_OPTION: - { - OnigOptionType save = env->options; - - env->options = en->option; - r = optimize_node_left(en->target, opt, env); - env->options = save; - } - break; - - case ENCLOSE_MEMORY: -#ifdef USE_SUBEXP_CALL - en->opt_count++; - if (en->opt_count > MAX_NODE_OPT_INFO_REF_COUNT) { - OnigDistance min, max; - - min = 0; - max = ONIG_INFINITE_DISTANCE; - if (IS_ENCLOSE_MIN_FIXED(en)) min = en->min_len; - if (IS_ENCLOSE_MAX_FIXED(en)) max = en->max_len; - set_mml(&opt->len, min, max); - } - else -#endif - { - r = optimize_node_left(en->target, opt, env); - - if (is_set_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_MASK)) { - if (BIT_STATUS_AT(env->scan_env->backrefed_mem, en->regnum)) - remove_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_MASK); - } - } - break; - - case ENCLOSE_STOP_BACKTRACK: - r = optimize_node_left(en->target, opt, env); - break; - } - } - break; - - default: -#ifdef ONIG_DEBUG - fprintf(stderr, "optimize_node_left: undefined node type %d\n", - NTYPE(node)); -#endif - r = ONIGERR_TYPE_BUG; - break; - } - - return r; -} - -static int -set_optimize_exact_info(regex_t* reg, OptExactInfo* e) -{ - int r; - - if (e->len == 0) return 0; - - if (e->ignore_case) { - reg->exact = (UChar* )xmalloc(e->len); - CHECK_NULL_RETURN_MEMERR(reg->exact); - xmemcpy(reg->exact, e->s, e->len); - reg->exact_end = reg->exact + e->len; - reg->optimize = ONIG_OPTIMIZE_EXACT_IC; - } - else { - int allow_reverse; - - reg->exact = str_dup(e->s, e->s + e->len); - CHECK_NULL_RETURN_MEMERR(reg->exact); - reg->exact_end = reg->exact + e->len; - - allow_reverse = - ONIGENC_IS_ALLOWED_REVERSE_MATCH(reg->enc, reg->exact, reg->exact_end); - - if (e->len >= 3 || (e->len >= 2 && allow_reverse)) { - r = set_bm_skip(reg->exact, reg->exact_end, reg->enc, - reg->map, &(reg->int_map)); - if (r) return r; - - reg->optimize = (allow_reverse != 0 - ? ONIG_OPTIMIZE_EXACT_BM : ONIG_OPTIMIZE_EXACT_BM_NOT_REV); - } - else { - reg->optimize = ONIG_OPTIMIZE_EXACT; - } - } - - reg->dmin = e->mmd.min; - reg->dmax = e->mmd.max; - - if (reg->dmin != ONIG_INFINITE_DISTANCE) { - reg->threshold_len = reg->dmin + (int)(reg->exact_end - reg->exact); - } - - return 0; -} - -static void -set_optimize_map_info(regex_t* reg, OptMapInfo* m) -{ - int i; - - for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) - reg->map[i] = m->map[i]; - - reg->optimize = ONIG_OPTIMIZE_MAP; - reg->dmin = m->mmd.min; - reg->dmax = m->mmd.max; - - if (reg->dmin != ONIG_INFINITE_DISTANCE) { - reg->threshold_len = reg->dmin + 1; - } -} - -static void -set_sub_anchor(regex_t* reg, OptAncInfo* anc) -{ - reg->sub_anchor |= anc->left_anchor & ANCHOR_BEGIN_LINE; - reg->sub_anchor |= anc->right_anchor & ANCHOR_END_LINE; -} - -#ifdef ONIG_DEBUG -static void print_optimize_info(FILE* f, regex_t* reg); -#endif - -static int -set_optimize_info_from_tree(Node* node, regex_t* reg, ScanEnv* scan_env) -{ - - int r; - NodeOptInfo opt; - OptEnv env; - - env.enc = reg->enc; - env.options = reg->options; - env.case_fold_flag = reg->case_fold_flag; - env.scan_env = scan_env; - clear_mml(&env.mmd); - - r = optimize_node_left(node, &opt, &env); - if (r) return r; - - reg->anchor = opt.anc.left_anchor & (ANCHOR_BEGIN_BUF | - ANCHOR_BEGIN_POSITION | ANCHOR_ANYCHAR_STAR | ANCHOR_ANYCHAR_STAR_ML); - - reg->anchor |= opt.anc.right_anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF); - - if (reg->anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF)) { - reg->anchor_dmin = opt.len.min; - reg->anchor_dmax = opt.len.max; - } - - if (opt.exb.len > 0 || opt.exm.len > 0) { - select_opt_exact_info(reg->enc, &opt.exb, &opt.exm); - if (opt.map.value > 0 && - comp_opt_exact_or_map_info(&opt.exb, &opt.map) > 0) { - goto set_map; - } - else { - r = set_optimize_exact_info(reg, &opt.exb); - set_sub_anchor(reg, &opt.exb.anc); - } - } - else if (opt.map.value > 0) { - set_map: - set_optimize_map_info(reg, &opt.map); - set_sub_anchor(reg, &opt.map.anc); - } - else { - reg->sub_anchor |= opt.anc.left_anchor & ANCHOR_BEGIN_LINE; - if (opt.len.max == 0) - reg->sub_anchor |= opt.anc.right_anchor & ANCHOR_END_LINE; - } - -#if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH) - print_optimize_info(stderr, reg); -#endif - return r; -} - -static void -clear_optimize_info(regex_t* reg) -{ - reg->optimize = ONIG_OPTIMIZE_NONE; - reg->anchor = 0; - reg->anchor_dmin = 0; - reg->anchor_dmax = 0; - reg->sub_anchor = 0; - reg->exact_end = (UChar* )NULL; - reg->threshold_len = 0; - if (IS_NOT_NULL(reg->exact)) { - xfree(reg->exact); - reg->exact = (UChar* )NULL; - } -} - -#ifdef ONIG_DEBUG - -static void print_enc_string(FILE* fp, OnigEncoding enc, - const UChar *s, const UChar *end) -{ - fprintf(fp, "\nPATTERN: /"); - - if (ONIGENC_MBC_MINLEN(enc) > 1) { - const UChar *p; - OnigCodePoint code; - - p = s; - while (p < end) { - code = ONIGENC_MBC_TO_CODE(enc, p, end); - if (code >= 0x80) { - fprintf(fp, " 0x%04x ", (int )code); - } - else { - fputc((int )code, fp); - } - - p += enclen(enc, p); - } - } - else { - while (s < end) { - fputc((int )*s, fp); - s++; - } - } - - fprintf(fp, "/\n"); -} - -static void -print_distance_range(FILE* f, OnigDistance a, OnigDistance b) -{ - if (a == ONIG_INFINITE_DISTANCE) - fputs("inf", f); - else - fprintf(f, "(%u)", a); - - fputs("-", f); - - if (b == ONIG_INFINITE_DISTANCE) - fputs("inf", f); - else - fprintf(f, "(%u)", b); -} - -static void -print_anchor(FILE* f, int anchor) -{ - int q = 0; - - fprintf(f, "["); - - if (anchor & ANCHOR_BEGIN_BUF) { - fprintf(f, "begin-buf"); - q = 1; - } - if (anchor & ANCHOR_BEGIN_LINE) { - if (q) fprintf(f, ", "); - q = 1; - fprintf(f, "begin-line"); - } - if (anchor & ANCHOR_BEGIN_POSITION) { - if (q) fprintf(f, ", "); - q = 1; - fprintf(f, "begin-pos"); - } - if (anchor & ANCHOR_END_BUF) { - if (q) fprintf(f, ", "); - q = 1; - fprintf(f, "end-buf"); - } - if (anchor & ANCHOR_SEMI_END_BUF) { - if (q) fprintf(f, ", "); - q = 1; - fprintf(f, "semi-end-buf"); - } - if (anchor & ANCHOR_END_LINE) { - if (q) fprintf(f, ", "); - q = 1; - fprintf(f, "end-line"); - } - if (anchor & ANCHOR_ANYCHAR_STAR) { - if (q) fprintf(f, ", "); - q = 1; - fprintf(f, "anychar-star"); - } - if (anchor & ANCHOR_ANYCHAR_STAR_ML) { - if (q) fprintf(f, ", "); - fprintf(f, "anychar-star-pl"); - } - - fprintf(f, "]"); -} - -static void -print_optimize_info(FILE* f, regex_t* reg) -{ - static const char* on[] = { "NONE", "EXACT", "EXACT_BM", "EXACT_BM_NOT_REV", - "EXACT_IC", "MAP" }; - - fprintf(f, "optimize: %s\n", on[reg->optimize]); - fprintf(f, " anchor: "); print_anchor(f, reg->anchor); - if ((reg->anchor & ANCHOR_END_BUF_MASK) != 0) - print_distance_range(f, reg->anchor_dmin, reg->anchor_dmax); - fprintf(f, "\n"); - - if (reg->optimize) { - fprintf(f, " sub anchor: "); print_anchor(f, reg->sub_anchor); - fprintf(f, "\n"); - } - fprintf(f, "\n"); - - if (reg->exact) { - UChar *p; - fprintf(f, "exact: ["); - for (p = reg->exact; p < reg->exact_end; p++) { - fputc(*p, f); - } - fprintf(f, "]: length: %d\n", (reg->exact_end - reg->exact)); - } - else if (reg->optimize & ONIG_OPTIMIZE_MAP) { - int c, i, n = 0; - - for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) - if (reg->map[i]) n++; - - fprintf(f, "map: n=%d\n", n); - if (n > 0) { - c = 0; - fputc('[', f); - for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) { - if (reg->map[i] != 0) { - if (c > 0) fputs(", ", f); - c++; - if (ONIGENC_MBC_MAXLEN(reg->enc) == 1 && - ONIGENC_IS_CODE_PRINT(reg->enc, (OnigCodePoint )i)) - fputc(i, f); - else - fprintf(f, "%d", i); - } - } - fprintf(f, "]\n"); - } - } -} -#endif /* ONIG_DEBUG */ - - -extern void -onig_free_body(regex_t* reg) -{ - if (IS_NOT_NULL(reg)) { - if (IS_NOT_NULL(reg->p)) xfree(reg->p); - if (IS_NOT_NULL(reg->exact)) xfree(reg->exact); - if (IS_NOT_NULL(reg->int_map)) xfree(reg->int_map); - if (IS_NOT_NULL(reg->int_map_backward)) xfree(reg->int_map_backward); - if (IS_NOT_NULL(reg->repeat_range)) xfree(reg->repeat_range); - if (IS_NOT_NULL(reg->chain)) onig_free(reg->chain); - -#ifdef USE_NAMED_GROUP - onig_names_free(reg); -#endif - } -} - -extern void -onig_free(regex_t* reg) -{ - if (IS_NOT_NULL(reg)) { - onig_free_body(reg); - xfree(reg); - } -} - -#define REGEX_TRANSFER(to,from) do {\ - (to)->state = ONIG_STATE_MODIFY;\ - onig_free_body(to);\ - xmemcpy(to, from, sizeof(regex_t));\ - xfree(from);\ -} while (0) - -extern void -onig_transfer(regex_t* to, regex_t* from) -{ - THREAD_ATOMIC_START; - REGEX_TRANSFER(to, from); - THREAD_ATOMIC_END; -} - -#define REGEX_CHAIN_HEAD(reg) do {\ - while (IS_NOT_NULL((reg)->chain)) {\ - (reg) = (reg)->chain;\ - }\ -} while (0) - -extern void -onig_chain_link_add(regex_t* to, regex_t* add) -{ - THREAD_ATOMIC_START; - REGEX_CHAIN_HEAD(to); - to->chain = add; - THREAD_ATOMIC_END; -} - -extern void -onig_chain_reduce(regex_t* reg) -{ - regex_t *head, *prev; - - prev = reg; - head = prev->chain; - if (IS_NOT_NULL(head)) { - reg->state = ONIG_STATE_MODIFY; - while (IS_NOT_NULL(head->chain)) { - prev = head; - head = head->chain; - } - prev->chain = (regex_t* )NULL; - REGEX_TRANSFER(reg, head); - } -} - -#ifdef ONIG_DEBUG -static void print_compiled_byte_code_list P_((FILE* f, regex_t* reg)); -#endif -#ifdef ONIG_DEBUG_PARSE_TREE -static void print_tree P_((FILE* f, Node* node)); -#endif - -extern int -onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end, - OnigErrorInfo* einfo) -{ -#define COMPILE_INIT_SIZE 20 - - int r, init_size; - Node* root; - ScanEnv scan_env; -#ifdef USE_SUBEXP_CALL - UnsetAddrList uslist; -#endif - - if (IS_NOT_NULL(einfo)) einfo->par = (UChar* )NULL; - - reg->state = ONIG_STATE_COMPILING; - -#ifdef ONIG_DEBUG - print_enc_string(stderr, reg->enc, pattern, pattern_end); -#endif - - if (reg->alloc == 0) { - init_size = ((int)(pattern_end - pattern)) * 2; - if (init_size <= 0) init_size = COMPILE_INIT_SIZE; - r = BBUF_INIT(reg, init_size); - if (r != 0) goto end; - } - else - reg->used = 0; - - reg->num_mem = 0; - reg->num_repeat = 0; - reg->num_null_check = 0; - reg->repeat_range_alloc = 0; - reg->repeat_range = (OnigRepeatRange* )NULL; -#ifdef USE_COMBINATION_EXPLOSION_CHECK - reg->num_comb_exp_check = 0; -#endif - - r = onig_parse_make_tree(&root, pattern, pattern_end, reg, &scan_env); - if (r != 0) goto err; - -#ifdef USE_NAMED_GROUP - /* mixed use named group and no-named group */ - if (scan_env.num_named > 0 && - IS_SYNTAX_BV(scan_env.syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) && - !ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_CAPTURE_GROUP)) { - if (scan_env.num_named != scan_env.num_mem) - r = disable_noname_group_capture(&root, reg, &scan_env); - else - r = numbered_ref_check(root); - - if (r != 0) goto err; - } -#endif - -#ifdef USE_SUBEXP_CALL - if (scan_env.num_call > 0) { - r = unset_addr_list_init(&uslist, scan_env.num_call); - if (r != 0) goto err; - scan_env.unset_addr_list = &uslist; - r = setup_subexp_call(root, &scan_env); - if (r != 0) goto err_unset; - r = subexp_recursive_check_trav(root, &scan_env); - if (r < 0) goto err_unset; - r = subexp_inf_recursive_check_trav(root, &scan_env); - if (r != 0) goto err_unset; - - reg->num_call = scan_env.num_call; - } - else - reg->num_call = 0; -#endif - - r = setup_tree(root, reg, 0, &scan_env); - if (r != 0) goto err_unset; - -#ifdef ONIG_DEBUG_PARSE_TREE - print_tree(stderr, root); -#endif - - reg->capture_history = scan_env.capture_history; - reg->bt_mem_start = scan_env.bt_mem_start; - reg->bt_mem_start |= reg->capture_history; - if (IS_FIND_CONDITION(reg->options)) - BIT_STATUS_ON_ALL(reg->bt_mem_end); - else { - reg->bt_mem_end = scan_env.bt_mem_end; - reg->bt_mem_end |= reg->capture_history; - } - -#ifdef USE_COMBINATION_EXPLOSION_CHECK - if (scan_env.backrefed_mem == 0 -#ifdef USE_SUBEXP_CALL - || scan_env.num_call == 0 -#endif - ) { - setup_comb_exp_check(root, 0, &scan_env); -#ifdef USE_SUBEXP_CALL - if (scan_env.has_recursion != 0) { - scan_env.num_comb_exp_check = 0; - } - else -#endif - if (scan_env.comb_exp_max_regnum > 0) { - int i; - for (i = 1; i <= scan_env.comb_exp_max_regnum; i++) { - if (BIT_STATUS_AT(scan_env.backrefed_mem, i) != 0) { - scan_env.num_comb_exp_check = 0; - break; - } - } - } - } - - reg->num_comb_exp_check = scan_env.num_comb_exp_check; -#endif - - clear_optimize_info(reg); -#ifndef ONIG_DONT_OPTIMIZE - r = set_optimize_info_from_tree(root, reg, &scan_env); - if (r != 0) goto err_unset; -#endif - - if (IS_NOT_NULL(scan_env.mem_nodes_dynamic)) { - xfree(scan_env.mem_nodes_dynamic); - scan_env.mem_nodes_dynamic = (Node** )NULL; - } - - r = compile_tree(root, reg); - if (r == 0) { - r = add_opcode(reg, OP_END); -#ifdef USE_SUBEXP_CALL - if (scan_env.num_call > 0) { - r = unset_addr_list_fix(&uslist, reg); - unset_addr_list_end(&uslist); - if (r) goto err; - } -#endif - - if ((reg->num_repeat != 0) || (reg->bt_mem_end != 0)) - reg->stack_pop_level = STACK_POP_LEVEL_ALL; - else { - if (reg->bt_mem_start != 0) - reg->stack_pop_level = STACK_POP_LEVEL_MEM_START; - else - reg->stack_pop_level = STACK_POP_LEVEL_FREE; - } - } -#ifdef USE_SUBEXP_CALL - else if (scan_env.num_call > 0) { - unset_addr_list_end(&uslist); - } -#endif - onig_node_free(root); - -#ifdef ONIG_DEBUG_COMPILE -#ifdef USE_NAMED_GROUP - onig_print_names(stderr, reg); -#endif - print_compiled_byte_code_list(stderr, reg); -#endif - - end: - reg->state = ONIG_STATE_NORMAL; - return r; - - err_unset: -#ifdef USE_SUBEXP_CALL - if (scan_env.num_call > 0) { - unset_addr_list_end(&uslist); - } -#endif - err: - if (IS_NOT_NULL(scan_env.error)) { - if (IS_NOT_NULL(einfo)) { - einfo->enc = scan_env.enc; - einfo->par = scan_env.error; - einfo->par_end = scan_env.error_end; - } - } - - onig_node_free(root); - if (IS_NOT_NULL(scan_env.mem_nodes_dynamic)) - xfree(scan_env.mem_nodes_dynamic); - return r; -} - -#ifdef USE_RECOMPILE_API -extern int -onig_recompile(regex_t* reg, const UChar* pattern, const UChar* pattern_end, - OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax, - OnigErrorInfo* einfo) -{ - int r; - regex_t *new_reg; - - r = onig_new(&new_reg, pattern, pattern_end, option, enc, syntax, einfo); - if (r) return r; - if (ONIG_STATE(reg) == ONIG_STATE_NORMAL) { - onig_transfer(reg, new_reg); - } - else { - onig_chain_link_add(reg, new_reg); - } - return 0; -} -#endif - -static int onig_inited = 0; - -extern int -onig_reg_init(regex_t* reg, OnigOptionType option, - OnigCaseFoldType case_fold_flag, - OnigEncoding enc, OnigSyntaxType* syntax) -{ - if (! onig_inited) - onig_init(); - - if (IS_NULL(reg)) - return ONIGERR_INVALID_ARGUMENT; - - if (ONIGENC_IS_UNDEF(enc)) - return ONIGERR_DEFAULT_ENCODING_IS_NOT_SETTED; - - if ((option & (ONIG_OPTION_DONT_CAPTURE_GROUP|ONIG_OPTION_CAPTURE_GROUP)) - == (ONIG_OPTION_DONT_CAPTURE_GROUP|ONIG_OPTION_CAPTURE_GROUP)) { - return ONIGERR_INVALID_COMBINATION_OF_OPTIONS; - } - - (reg)->state = ONIG_STATE_MODIFY; - - if ((option & ONIG_OPTION_NEGATE_SINGLELINE) != 0) { - option |= syntax->options; - option &= ~ONIG_OPTION_SINGLELINE; - } - else - option |= syntax->options; - - (reg)->enc = enc; - (reg)->options = option; - (reg)->syntax = syntax; - (reg)->optimize = 0; - (reg)->exact = (UChar* )NULL; - (reg)->int_map = (int* )NULL; - (reg)->int_map_backward = (int* )NULL; - (reg)->chain = (regex_t* )NULL; - - (reg)->p = (UChar* )NULL; - (reg)->alloc = 0; - (reg)->used = 0; - (reg)->name_table = (void* )NULL; - - (reg)->case_fold_flag = case_fold_flag; - return 0; -} - -extern int -onig_new_without_alloc(regex_t* reg, const UChar* pattern, - const UChar* pattern_end, OnigOptionType option, OnigEncoding enc, - OnigSyntaxType* syntax, OnigErrorInfo* einfo) -{ - int r; - - r = onig_reg_init(reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax); - if (r) return r; - - r = onig_compile(reg, pattern, pattern_end, einfo); - return r; -} - -extern int -onig_new(regex_t** reg, const UChar* pattern, const UChar* pattern_end, - OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax, - OnigErrorInfo* einfo) -{ - int r; - - *reg = (regex_t* )xmalloc(sizeof(regex_t)); - if (IS_NULL(*reg)) return ONIGERR_MEMORY; - - r = onig_reg_init(*reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax); - if (r) goto err; - - r = onig_compile(*reg, pattern, pattern_end, einfo); - if (r) { - err: - onig_free(*reg); - *reg = NULL; - } - return r; -} - - -extern int -onig_init(void) -{ - if (onig_inited != 0) - return 0; - - THREAD_SYSTEM_INIT; - THREAD_ATOMIC_START; - - onig_inited = 1; - - onigenc_init(); - /* onigenc_set_default_caseconv_table((UChar* )0); */ - -#ifdef ONIG_DEBUG_STATISTICS - onig_statistics_init(); -#endif - - THREAD_ATOMIC_END; - return 0; -} - - -static OnigEndCallListItemType* EndCallTop; - -extern void onig_add_end_call(void (*func)(void)) -{ - OnigEndCallListItemType* item; - - item = (OnigEndCallListItemType* )xmalloc(sizeof(*item)); - if (item == 0) return ; - - item->next = EndCallTop; - item->func = func; - - EndCallTop = item; -} - -static void -exec_end_call_list(void) -{ - OnigEndCallListItemType* prev; - void (*func)(void); - - while (EndCallTop != 0) { - func = EndCallTop->func; - (*func)(); - - prev = EndCallTop; - EndCallTop = EndCallTop->next; - xfree(prev); - } -} - -extern int -onig_end(void) -{ - THREAD_ATOMIC_START; - - exec_end_call_list(); - -#ifdef ONIG_DEBUG_STATISTICS - onig_print_statistics(stderr); -#endif - -#ifdef USE_SHARED_CCLASS_TABLE - onig_free_shared_cclass_table(); -#endif - -#ifdef USE_PARSE_TREE_NODE_RECYCLE - onig_free_node_list(); -#endif - - onig_inited = 0; - - THREAD_ATOMIC_END; - THREAD_SYSTEM_END; - return 0; -} - -extern int -onig_is_in_code_range(const UChar* p, OnigCodePoint code) -{ - OnigCodePoint n, *data; - OnigCodePoint low, high, x; - - GET_CODE_POINT(n, p); - data = (OnigCodePoint* )p; - data++; - - for (low = 0, high = n; low < high; ) { - x = (low + high) >> 1; - if (code > data[x * 2 + 1]) - low = x + 1; - else - high = x; - } - - return ((low < n && code >= data[low * 2]) ? 1 : 0); -} - -extern int -onig_is_code_in_cc_len(int elen, OnigCodePoint code, CClassNode* cc) -{ - int found; - - if (elen > 1 || (code >= SINGLE_BYTE_SIZE)) { - if (IS_NULL(cc->mbuf)) { - found = 0; - } - else { - found = (onig_is_in_code_range(cc->mbuf->p, code) != 0 ? 1 : 0); - } - } - else { - found = (BITSET_AT(cc->bs, code) == 0 ? 0 : 1); - } - - if (IS_NCCLASS_NOT(cc)) - return !found; - else - return found; -} - -extern int -onig_is_code_in_cc(OnigEncoding enc, OnigCodePoint code, CClassNode* cc) -{ - int len; - - if (ONIGENC_MBC_MINLEN(enc) > 1) { - len = 2; - } - else { - len = ONIGENC_CODE_TO_MBCLEN(enc, code); - } - return onig_is_code_in_cc_len(len, code, cc); -} - - -#ifdef ONIG_DEBUG - -/* arguments type */ -#define ARG_SPECIAL -1 -#define ARG_NON 0 -#define ARG_RELADDR 1 -#define ARG_ABSADDR 2 -#define ARG_LENGTH 3 -#define ARG_MEMNUM 4 -#define ARG_OPTION 5 -#define ARG_STATE_CHECK 6 - -OnigOpInfoType OnigOpInfo[] = { - { OP_FINISH, "finish", ARG_NON }, - { OP_END, "end", ARG_NON }, - { OP_EXACT1, "exact1", ARG_SPECIAL }, - { OP_EXACT2, "exact2", ARG_SPECIAL }, - { OP_EXACT3, "exact3", ARG_SPECIAL }, - { OP_EXACT4, "exact4", ARG_SPECIAL }, - { OP_EXACT5, "exact5", ARG_SPECIAL }, - { OP_EXACTN, "exactn", ARG_SPECIAL }, - { OP_EXACTMB2N1, "exactmb2-n1", ARG_SPECIAL }, - { OP_EXACTMB2N2, "exactmb2-n2", ARG_SPECIAL }, - { OP_EXACTMB2N3, "exactmb2-n3", ARG_SPECIAL }, - { OP_EXACTMB2N, "exactmb2-n", ARG_SPECIAL }, - { OP_EXACTMB3N, "exactmb3n" , ARG_SPECIAL }, - { OP_EXACTMBN, "exactmbn", ARG_SPECIAL }, - { OP_EXACT1_IC, "exact1-ic", ARG_SPECIAL }, - { OP_EXACTN_IC, "exactn-ic", ARG_SPECIAL }, - { OP_CCLASS, "cclass", ARG_SPECIAL }, - { OP_CCLASS_MB, "cclass-mb", ARG_SPECIAL }, - { OP_CCLASS_MIX, "cclass-mix", ARG_SPECIAL }, - { OP_CCLASS_NOT, "cclass-not", ARG_SPECIAL }, - { OP_CCLASS_MB_NOT, "cclass-mb-not", ARG_SPECIAL }, - { OP_CCLASS_MIX_NOT, "cclass-mix-not", ARG_SPECIAL }, - { OP_CCLASS_NODE, "cclass-node", ARG_SPECIAL }, - { OP_ANYCHAR, "anychar", ARG_NON }, - { OP_ANYCHAR_ML, "anychar-ml", ARG_NON }, - { OP_ANYCHAR_STAR, "anychar*", ARG_NON }, - { OP_ANYCHAR_ML_STAR, "anychar-ml*", ARG_NON }, - { OP_ANYCHAR_STAR_PEEK_NEXT, "anychar*-peek-next", ARG_SPECIAL }, - { OP_ANYCHAR_ML_STAR_PEEK_NEXT, "anychar-ml*-peek-next", ARG_SPECIAL }, - { OP_WORD, "word", ARG_NON }, - { OP_NOT_WORD, "not-word", ARG_NON }, - { OP_WORD_BOUND, "word-bound", ARG_NON }, - { OP_NOT_WORD_BOUND, "not-word-bound", ARG_NON }, - { OP_WORD_BEGIN, "word-begin", ARG_NON }, - { OP_WORD_END, "word-end", ARG_NON }, - { OP_BEGIN_BUF, "begin-buf", ARG_NON }, - { OP_END_BUF, "end-buf", ARG_NON }, - { OP_BEGIN_LINE, "begin-line", ARG_NON }, - { OP_END_LINE, "end-line", ARG_NON }, - { OP_SEMI_END_BUF, "semi-end-buf", ARG_NON }, - { OP_BEGIN_POSITION, "begin-position", ARG_NON }, - { OP_BACKREF1, "backref1", ARG_NON }, - { OP_BACKREF2, "backref2", ARG_NON }, - { OP_BACKREFN, "backrefn", ARG_MEMNUM }, - { OP_BACKREFN_IC, "backrefn-ic", ARG_SPECIAL }, - { OP_BACKREF_MULTI, "backref_multi", ARG_SPECIAL }, - { OP_BACKREF_MULTI_IC, "backref_multi-ic", ARG_SPECIAL }, - { OP_BACKREF_WITH_LEVEL, "backref_at_level", ARG_SPECIAL }, - { OP_MEMORY_START_PUSH, "mem-start-push", ARG_MEMNUM }, - { OP_MEMORY_START, "mem-start", ARG_MEMNUM }, - { OP_MEMORY_END_PUSH, "mem-end-push", ARG_MEMNUM }, - { OP_MEMORY_END_PUSH_REC, "mem-end-push-rec", ARG_MEMNUM }, - { OP_MEMORY_END, "mem-end", ARG_MEMNUM }, - { OP_MEMORY_END_REC, "mem-end-rec", ARG_MEMNUM }, - { OP_SET_OPTION_PUSH, "set-option-push", ARG_OPTION }, - { OP_SET_OPTION, "set-option", ARG_OPTION }, - { OP_FAIL, "fail", ARG_NON }, - { OP_JUMP, "jump", ARG_RELADDR }, - { OP_PUSH, "push", ARG_RELADDR }, - { OP_POP, "pop", ARG_NON }, - { OP_PUSH_OR_JUMP_EXACT1, "push-or-jump-e1", ARG_SPECIAL }, - { OP_PUSH_IF_PEEK_NEXT, "push-if-peek-next", ARG_SPECIAL }, - { OP_REPEAT, "repeat", ARG_SPECIAL }, - { OP_REPEAT_NG, "repeat-ng", ARG_SPECIAL }, - { OP_REPEAT_INC, "repeat-inc", ARG_MEMNUM }, - { OP_REPEAT_INC_NG, "repeat-inc-ng", ARG_MEMNUM }, - { OP_REPEAT_INC_SG, "repeat-inc-sg", ARG_MEMNUM }, - { OP_REPEAT_INC_NG_SG, "repeat-inc-ng-sg", ARG_MEMNUM }, - { OP_NULL_CHECK_START, "null-check-start", ARG_MEMNUM }, - { OP_NULL_CHECK_END, "null-check-end", ARG_MEMNUM }, - { OP_NULL_CHECK_END_MEMST,"null-check-end-memst", ARG_MEMNUM }, - { OP_NULL_CHECK_END_MEMST_PUSH,"null-check-end-memst-push", ARG_MEMNUM }, - { OP_PUSH_POS, "push-pos", ARG_NON }, - { OP_POP_POS, "pop-pos", ARG_NON }, - { OP_PUSH_POS_NOT, "push-pos-not", ARG_RELADDR }, - { OP_FAIL_POS, "fail-pos", ARG_NON }, - { OP_PUSH_STOP_BT, "push-stop-bt", ARG_NON }, - { OP_POP_STOP_BT, "pop-stop-bt", ARG_NON }, - { OP_LOOK_BEHIND, "look-behind", ARG_SPECIAL }, - { OP_PUSH_LOOK_BEHIND_NOT, "push-look-behind-not", ARG_SPECIAL }, - { OP_FAIL_LOOK_BEHIND_NOT, "fail-look-behind-not", ARG_NON }, - { OP_CALL, "call", ARG_ABSADDR }, - { OP_RETURN, "return", ARG_NON }, - { OP_STATE_CHECK_PUSH, "state-check-push", ARG_SPECIAL }, - { OP_STATE_CHECK_PUSH_OR_JUMP, "state-check-push-or-jump", ARG_SPECIAL }, - { OP_STATE_CHECK, "state-check", ARG_STATE_CHECK }, - { OP_STATE_CHECK_ANYCHAR_STAR, "state-check-anychar*", ARG_STATE_CHECK }, - { OP_STATE_CHECK_ANYCHAR_ML_STAR, - "state-check-anychar-ml*", ARG_STATE_CHECK }, - { -1, "", ARG_NON } -}; - -static char* -op2name(int opcode) -{ - int i; - - for (i = 0; OnigOpInfo[i].opcode >= 0; i++) { - if (opcode == OnigOpInfo[i].opcode) - return OnigOpInfo[i].name; - } - return ""; -} - -static int -op2arg_type(int opcode) -{ - int i; - - for (i = 0; OnigOpInfo[i].opcode >= 0; i++) { - if (opcode == OnigOpInfo[i].opcode) - return OnigOpInfo[i].arg_type; - } - return ARG_SPECIAL; -} - -static void -Indent(FILE* f, int indent) -{ - int i; - for (i = 0; i < indent; i++) putc(' ', f); -} - -static void -p_string(FILE* f, int len, UChar* s) -{ - fputs(":", f); - while (len-- > 0) { fputc(*s++, f); } -} - -static void -p_len_string(FILE* f, LengthType len, int mb_len, UChar* s) -{ - int x = len * mb_len; - - fprintf(f, ":%d:", len); - while (x-- > 0) { fputc(*s++, f); } -} - -extern void -onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar** nextp, - OnigEncoding enc) -{ - int i, n, arg_type; - RelAddrType addr; - LengthType len; - MemNumType mem; - StateCheckNumType scn; - OnigCodePoint code; - UChar *q; - - fprintf(f, "[%s", op2name(*bp)); - arg_type = op2arg_type(*bp); - if (arg_type != ARG_SPECIAL) { - bp++; - switch (arg_type) { - case ARG_NON: - break; - case ARG_RELADDR: - GET_RELADDR_INC(addr, bp); - fprintf(f, ":(%d)", addr); - break; - case ARG_ABSADDR: - GET_ABSADDR_INC(addr, bp); - fprintf(f, ":(%d)", addr); - break; - case ARG_LENGTH: - GET_LENGTH_INC(len, bp); - fprintf(f, ":%d", len); - break; - case ARG_MEMNUM: - mem = *((MemNumType* )bp); - bp += SIZE_MEMNUM; - fprintf(f, ":%d", mem); - break; - case ARG_OPTION: - { - OnigOptionType option = *((OnigOptionType* )bp); - bp += SIZE_OPTION; - fprintf(f, ":%d", option); - } - break; - - case ARG_STATE_CHECK: - scn = *((StateCheckNumType* )bp); - bp += SIZE_STATE_CHECK_NUM; - fprintf(f, ":%d", scn); - break; - } - } - else { - switch (*bp++) { - case OP_EXACT1: - case OP_ANYCHAR_STAR_PEEK_NEXT: - case OP_ANYCHAR_ML_STAR_PEEK_NEXT: - p_string(f, 1, bp++); break; - case OP_EXACT2: - p_string(f, 2, bp); bp += 2; break; - case OP_EXACT3: - p_string(f, 3, bp); bp += 3; break; - case OP_EXACT4: - p_string(f, 4, bp); bp += 4; break; - case OP_EXACT5: - p_string(f, 5, bp); bp += 5; break; - case OP_EXACTN: - GET_LENGTH_INC(len, bp); - p_len_string(f, len, 1, bp); - bp += len; - break; - - case OP_EXACTMB2N1: - p_string(f, 2, bp); bp += 2; break; - case OP_EXACTMB2N2: - p_string(f, 4, bp); bp += 4; break; - case OP_EXACTMB2N3: - p_string(f, 6, bp); bp += 6; break; - case OP_EXACTMB2N: - GET_LENGTH_INC(len, bp); - p_len_string(f, len, 2, bp); - bp += len * 2; - break; - case OP_EXACTMB3N: - GET_LENGTH_INC(len, bp); - p_len_string(f, len, 3, bp); - bp += len * 3; - break; - case OP_EXACTMBN: - { - int mb_len; - - GET_LENGTH_INC(mb_len, bp); - GET_LENGTH_INC(len, bp); - fprintf(f, ":%d:%d:", mb_len, len); - n = len * mb_len; - while (n-- > 0) { fputc(*bp++, f); } - } - break; - - case OP_EXACT1_IC: - len = enclen(enc, bp); - p_string(f, len, bp); - bp += len; - break; - case OP_EXACTN_IC: - GET_LENGTH_INC(len, bp); - p_len_string(f, len, 1, bp); - bp += len; - break; - - case OP_CCLASS: - n = bitset_on_num((BitSetRef )bp); - bp += SIZE_BITSET; - fprintf(f, ":%d", n); - break; - - case OP_CCLASS_NOT: - n = bitset_on_num((BitSetRef )bp); - bp += SIZE_BITSET; - fprintf(f, ":%d", n); - break; - - case OP_CCLASS_MB: - case OP_CCLASS_MB_NOT: - GET_LENGTH_INC(len, bp); - q = bp; -#ifndef PLATFORM_UNALIGNED_WORD_ACCESS - ALIGNMENT_RIGHT(q); -#endif - GET_CODE_POINT(code, q); - bp += len; - fprintf(f, ":%d:%d", (int )code, len); - break; - - case OP_CCLASS_MIX: - case OP_CCLASS_MIX_NOT: - n = bitset_on_num((BitSetRef )bp); - bp += SIZE_BITSET; - GET_LENGTH_INC(len, bp); - q = bp; -#ifndef PLATFORM_UNALIGNED_WORD_ACCESS - ALIGNMENT_RIGHT(q); -#endif - GET_CODE_POINT(code, q); - bp += len; - fprintf(f, ":%d:%d:%d", n, (int )code, len); - break; - - case OP_CCLASS_NODE: - { - CClassNode *cc; - - GET_POINTER_INC(cc, bp); - n = bitset_on_num(cc->bs); - fprintf(f, ":%u:%d", (unsigned int )cc, n); - } - break; - - case OP_BACKREFN_IC: - mem = *((MemNumType* )bp); - bp += SIZE_MEMNUM; - fprintf(f, ":%d", mem); - break; - - case OP_BACKREF_MULTI_IC: - case OP_BACKREF_MULTI: - fputs(" ", f); - GET_LENGTH_INC(len, bp); - for (i = 0; i < len; i++) { - GET_MEMNUM_INC(mem, bp); - if (i > 0) fputs(", ", f); - fprintf(f, "%d", mem); - } - break; - - case OP_BACKREF_WITH_LEVEL: - { - OnigOptionType option; - LengthType level; - - GET_OPTION_INC(option, bp); - fprintf(f, ":%d", option); - GET_LENGTH_INC(level, bp); - fprintf(f, ":%d", level); - - fputs(" ", f); - GET_LENGTH_INC(len, bp); - for (i = 0; i < len; i++) { - GET_MEMNUM_INC(mem, bp); - if (i > 0) fputs(", ", f); - fprintf(f, "%d", mem); - } - } - break; - - case OP_REPEAT: - case OP_REPEAT_NG: - { - mem = *((MemNumType* )bp); - bp += SIZE_MEMNUM; - addr = *((RelAddrType* )bp); - bp += SIZE_RELADDR; - fprintf(f, ":%d:%d", mem, addr); - } - break; - - case OP_PUSH_OR_JUMP_EXACT1: - case OP_PUSH_IF_PEEK_NEXT: - addr = *((RelAddrType* )bp); - bp += SIZE_RELADDR; - fprintf(f, ":(%d)", addr); - p_string(f, 1, bp); - bp += 1; - break; - - case OP_LOOK_BEHIND: - GET_LENGTH_INC(len, bp); - fprintf(f, ":%d", len); - break; - - case OP_PUSH_LOOK_BEHIND_NOT: - GET_RELADDR_INC(addr, bp); - GET_LENGTH_INC(len, bp); - fprintf(f, ":%d:(%d)", len, addr); - break; - - case OP_STATE_CHECK_PUSH: - case OP_STATE_CHECK_PUSH_OR_JUMP: - scn = *((StateCheckNumType* )bp); - bp += SIZE_STATE_CHECK_NUM; - addr = *((RelAddrType* )bp); - bp += SIZE_RELADDR; - fprintf(f, ":%d:(%d)", scn, addr); - break; - - default: - fprintf(stderr, "onig_print_compiled_byte_code: undefined code %d\n", - *--bp); - } - } - fputs("]", f); - if (nextp) *nextp = bp; -} - -static void -print_compiled_byte_code_list(FILE* f, regex_t* reg) -{ - int ncode; - UChar* bp = reg->p; - UChar* end = reg->p + reg->used; - - fprintf(f, "code length: %d\n", reg->used); - - ncode = 0; - while (bp < end) { - ncode++; - if (bp > reg->p) { - if (ncode % 5 == 0) - fprintf(f, "\n"); - else - fputs(" ", f); - } - onig_print_compiled_byte_code(f, bp, &bp, reg->enc); - } - - fprintf(f, "\n"); -} - -static void -print_indent_tree(FILE* f, Node* node, int indent) -{ - int i, type; - int add = 3; - UChar* p; - - Indent(f, indent); - if (IS_NULL(node)) { - fprintf(f, "ERROR: null node!!!\n"); - exit (0); - } - - type = NTYPE(node); - switch (type) { - case NT_LIST: - case NT_ALT: - if (NTYPE(node) == NT_LIST) - fprintf(f, "\n", (int )node); - else - fprintf(f, "\n", (int )node); - - print_indent_tree(f, NCAR(node), indent + add); - while (IS_NOT_NULL(node = NCDR(node))) { - if (NTYPE(node) != type) { - fprintf(f, "ERROR: list/alt right is not a cons. %d\n", NTYPE(node)); - exit(0); - } - print_indent_tree(f, NCAR(node), indent + add); - } - break; - - case NT_STR: - fprintf(f, "", - (NSTRING_IS_RAW(node) ? "-raw" : ""), (int )node); - for (p = NSTR(node)->s; p < NSTR(node)->end; p++) { - if (*p >= 0x20 && *p < 0x7f) - fputc(*p, f); - else { - fprintf(f, " 0x%02x", *p); - } - } - break; - - case NT_CCLASS: - fprintf(f, "", (int )node); - if (IS_NCCLASS_NOT(NCCLASS(node))) fputs(" not", f); - if (NCCLASS(node)->mbuf) { - BBuf* bbuf = NCCLASS(node)->mbuf; - for (i = 0; i < bbuf->used; i++) { - if (i > 0) fprintf(f, ","); - fprintf(f, "%0x", bbuf->p[i]); - } - } - break; - - case NT_CTYPE: - fprintf(f, " ", (int )node); - switch (NCTYPE(node)->ctype) { - case ONIGENC_CTYPE_WORD: - if (NCTYPE(node)->not != 0) - fputs("not word", f); - else - fputs("word", f); - break; - - default: - fprintf(f, "ERROR: undefined ctype.\n"); - exit(0); - } - break; - - case NT_CANY: - fprintf(f, "", (int )node); - break; - - case NT_ANCHOR: - fprintf(f, " ", (int )node); - switch (NANCHOR(node)->type) { - case ANCHOR_BEGIN_BUF: fputs("begin buf", f); break; - case ANCHOR_END_BUF: fputs("end buf", f); break; - case ANCHOR_BEGIN_LINE: fputs("begin line", f); break; - case ANCHOR_END_LINE: fputs("end line", f); break; - case ANCHOR_SEMI_END_BUF: fputs("semi end buf", f); break; - case ANCHOR_BEGIN_POSITION: fputs("begin position", f); break; - - case ANCHOR_WORD_BOUND: fputs("word bound", f); break; - case ANCHOR_NOT_WORD_BOUND: fputs("not word bound", f); break; -#ifdef USE_WORD_BEGIN_END - case ANCHOR_WORD_BEGIN: fputs("word begin", f); break; - case ANCHOR_WORD_END: fputs("word end", f); break; -#endif - case ANCHOR_PREC_READ: fputs("prec read", f); break; - case ANCHOR_PREC_READ_NOT: fputs("prec read not", f); break; - case ANCHOR_LOOK_BEHIND: fputs("look_behind", f); break; - case ANCHOR_LOOK_BEHIND_NOT: fputs("look_behind_not",f); break; - - default: - fprintf(f, "ERROR: undefined anchor type.\n"); - break; - } - break; - - case NT_BREF: - { - int* p; - BRefNode* br = NBREF(node); - p = BACKREFS_P(br); - fprintf(f, "", (int )node); - for (i = 0; i < br->back_num; i++) { - if (i > 0) fputs(", ", f); - fprintf(f, "%d", p[i]); - } - } - break; - -#ifdef USE_SUBEXP_CALL - case NT_CALL: - { - CallNode* cn = NCALL(node); - fprintf(f, "", (int )node); - p_string(f, cn->name_end - cn->name, cn->name); - } - break; -#endif - - case NT_QTFR: - fprintf(f, "{%d,%d}%s\n", (int )node, - NQTFR(node)->lower, NQTFR(node)->upper, - (NQTFR(node)->greedy ? "" : "?")); - print_indent_tree(f, NQTFR(node)->target, indent + add); - break; - - case NT_ENCLOSE: - fprintf(f, " ", (int )node); - switch (NENCLOSE(node)->type) { - case ENCLOSE_OPTION: - fprintf(f, "option:%d", NENCLOSE(node)->option); - break; - case ENCLOSE_MEMORY: - fprintf(f, "memory:%d", NENCLOSE(node)->regnum); - break; - case ENCLOSE_STOP_BACKTRACK: - fprintf(f, "stop-bt"); - break; - - default: - break; - } - fprintf(f, "\n"); - print_indent_tree(f, NENCLOSE(node)->target, indent + add); - break; - - default: - fprintf(f, "print_indent_tree: undefined node type %d\n", NTYPE(node)); - break; - } - - if (type != NT_LIST && type != NT_ALT && type != NT_QTFR && - type != NT_ENCLOSE) - fprintf(f, "\n"); - fflush(f); -} -#endif /* ONIG_DEBUG */ - -#ifdef ONIG_DEBUG_PARSE_TREE -static void -print_tree(FILE* f, Node* node) -{ - print_indent_tree(f, node, 0); -} -#endif diff --git a/MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/regenc.c b/MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/regenc.c deleted file mode 100644 index 9e0f0010aa..0000000000 --- a/MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/regenc.c +++ /dev/null @@ -1,904 +0,0 @@ -/********************************************************************** - regenc.c - Oniguruma (regular expression library) -**********************************************************************/ -/*- - * Copyright (c) 2002-2007 K.Kosako - * All rights reserved. - * - * Copyright (c) 2015, Hewlett Packard Enterprise Development, L.P.
- * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include "regint.h" - -OnigEncoding OnigEncDefaultCharEncoding = ONIG_ENCODING_INIT_DEFAULT; - -extern int -onigenc_init(void) -{ - return 0; -} - -extern OnigEncoding -onigenc_get_default_encoding(void) -{ - return OnigEncDefaultCharEncoding; -} - -extern int -onigenc_set_default_encoding(OnigEncoding enc) -{ - OnigEncDefaultCharEncoding = enc; - return 0; -} - -extern UChar* -onigenc_get_right_adjust_char_head(OnigEncoding enc, const UChar* start, const UChar* s) -{ - UChar* p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s); - if (p < s) { - p += enclen(enc, p); - } - return p; -} - -extern UChar* -onigenc_get_right_adjust_char_head_with_prev(OnigEncoding enc, - const UChar* start, const UChar* s, const UChar** prev) -{ - UChar* p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s); - - if (p < s) { - if (prev) *prev = (const UChar* )p; - p += enclen(enc, p); - } - else { - if (prev) *prev = (const UChar* )NULL; /* Sorry */ - } - return p; -} - -extern UChar* -onigenc_get_prev_char_head(OnigEncoding enc, const UChar* start, const UChar* s) -{ - if (s <= start) - return (UChar* )NULL; - - return ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s - 1); -} - -extern UChar* -onigenc_step_back(OnigEncoding enc, const UChar* start, const UChar* s, int n) -{ - while (ONIG_IS_NOT_NULL(s) && n-- > 0) { - if (s <= start) - return (UChar* )NULL; - - s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s - 1); - } - return (UChar* )s; -} - -extern UChar* -onigenc_step(OnigEncoding enc, const UChar* p, const UChar* end, int n) -{ - UChar* q = (UChar* )p; - while (n-- > 0) { - q += ONIGENC_MBC_ENC_LEN(enc, q); - } - return (q <= end ? q : NULL); -} - -extern int -onigenc_strlen(OnigEncoding enc, const UChar* p, const UChar* end) -{ - int n = 0; - UChar* q = (UChar* )p; - - while (q < end) { - q += ONIGENC_MBC_ENC_LEN(enc, q); - n++; - } - return n; -} - -extern int -onigenc_strlen_null(OnigEncoding enc, const UChar* s) -{ - int n = 0; - UChar* p = (UChar* )s; - - while (1) { - if (*p == '\0') { - UChar* q; - int len = ONIGENC_MBC_MINLEN(enc); - - if (len == 1) return n; - q = p + 1; - while (len > 1) { - if (*q != '\0') break; - q++; - len--; - } - if (len == 1) return n; - } - p += ONIGENC_MBC_ENC_LEN(enc, p); - n++; - } -} - -extern int -onigenc_str_bytelen_null(OnigEncoding enc, const UChar* s) -{ - UChar* start = (UChar* )s; - UChar* p = (UChar* )s; - - while (1) { - if (*p == '\0') { - UChar* q; - int len = ONIGENC_MBC_MINLEN(enc); - - if (len == 1) return (int )(p - start); - q = p + 1; - while (len > 1) { - if (*q != '\0') break; - q++; - len--; - } - if (len == 1) return (int )(p - start); - } - p += ONIGENC_MBC_ENC_LEN(enc, p); - } -} - -const UChar OnigEncAsciiToLowerCaseTable[] = { - 0u, 1u, 2u, 3u, 4u, 5u, 6u, 7u, - 10u, 11u, 12u, 13u, 14u, 15u, 16u, 17u, - 20u, 21u, 22u, 23u, 24u, 25u, 26u, 27u, - 30u, 31u, 32u, 33u, 34u, 35u, 36u, 37u, - 40u, 41u, 42u, 43u, 44u, 45u, 46u, 47u, - 50u, 51u, 52u, 53u, 54u, 55u, 56u, 57u, - 60u, 61u, 62u, 63u, 64u, 65u, 66u, 67u, - 70u, 71u, 72u, 73u, 74u, 75u, 76u, 77u, - 100u, 141u, 142u, 143u, 144u, 145u, 146u, 147u, - 150u, 151u, 152u, 153u, 154u, 155u, 156u, 157u, - 160u, 161u, 162u, 163u, 164u, 165u, 166u, 167u, - 170u, 171u, 172u, 133u, 134u, 135u, 136u, 137u, - 140u, 141u, 142u, 143u, 144u, 145u, 146u, 147u, - 150u, 151u, 152u, 153u, 154u, 155u, 156u, 157u, - 160u, 161u, 162u, 163u, 164u, 165u, 166u, 167u, - 170u, 171u, 172u, 173u, 174u, 175u, 176u, 177u, - 200u, 201u, 202u, 203u, 204u, 205u, 206u, 207u, - 210u, 211u, 212u, 213u, 214u, 215u, 216u, 217u, - 220u, 221u, 222u, 223u, 224u, 225u, 226u, 227u, - 230u, 231u, 232u, 233u, 234u, 235u, 236u, 237u, - 240u, 241u, 242u, 243u, 244u, 245u, 246u, 247u, - 250u, 251u, 252u, 253u, 254u, 255u, 256u, 257u, - 260u, 261u, 262u, 263u, 264u, 265u, 266u, 267u, - 270u, 271u, 272u, 273u, 274u, 275u, 276u, 277u, - 300u, 301u, 302u, 303u, 304u, 305u, 306u, 307u, - 310u, 311u, 312u, 313u, 314u, 315u, 316u, 317u, - 320u, 321u, 322u, 323u, 324u, 325u, 326u, 327u, - 330u, 331u, 332u, 333u, 334u, 335u, 336u, 337u, - 340u, 341u, 342u, 343u, 344u, 345u, 346u, 347u, - 350u, 351u, 352u, 353u, 354u, 355u, 356u, 357u, - 360u, 361u, 362u, 363u, 364u, 365u, 366u, 367u, - 370u, 371u, 372u, 373u, 374u, 375u, 376u, 377u, -}; - -#ifdef USE_UPPER_CASE_TABLE -const UChar OnigEncAsciiToUpperCaseTable[256] = { - 0u, 1u, 2u, 3u, 4u, 5u, 6u, 7u, - 10u, 11u, 12u, 13u, 14u, 15u, 16u, 17u, - 20u, 21u, 22u, 23u, 24u, 25u, 26u, 27u, - 30u, 31u, 32u, 33u, 34u, 35u, 36u, 37u, - 40u, 41u, 42u, 43u, 44u, 45u, 46u, 47u, - 50u, 51u, 52u, 53u, 54u, 55u, 56u, 57u, - 60u, 61u, 62u, 63u, 64u, 65u, 66u, 67u, - 70u, 71u, 72u, 73u, 74u, 75u, 76u, 77u, - 100u, 101u, 102u, 103u, 104u, 105u, 106u, 107u, - 110u, 111u, 112u, 113u, 114u, 115u, 116u, 117u, - 120u, 121u, 122u, 123u, 124u, 125u, 126u, 127u, - 130u, 131u, 132u, 133u, 134u, 135u, 136u, 137u, - 140u, 101u, 102u, 103u, 104u, 105u, 106u, 107u, - 110u, 111u, 112u, 113u, 114u, 115u, 116u, 117u, - 120u, 121u, 122u, 123u, 124u, 125u, 126u, 127u, - 130u, 131u, 132u, 173u, 174u, 175u, 176u, 177u, - 200u, 201u, 202u, 203u, 204u, 205u, 206u, 207u, - 210u, 211u, 212u, 213u, 214u, 215u, 216u, 217u, - 220u, 221u, 222u, 223u, 224u, 225u, 226u, 227u, - 230u, 231u, 232u, 233u, 234u, 235u, 236u, 237u, - 240u, 241u, 242u, 243u, 244u, 245u, 246u, 247u, - 250u, 251u, 252u, 253u, 254u, 255u, 256u, 257u, - 260u, 261u, 262u, 263u, 264u, 265u, 266u, 267u, - 270u, 271u, 272u, 273u, 274u, 275u, 276u, 277u, - 300u, 301u, 302u, 303u, 304u, 305u, 306u, 307u, - 310u, 311u, 312u, 313u, 314u, 315u, 316u, 317u, - 320u, 321u, 322u, 323u, 324u, 325u, 326u, 327u, - 330u, 331u, 332u, 333u, 334u, 335u, 336u, 337u, - 340u, 341u, 342u, 343u, 344u, 345u, 346u, 347u, - 350u, 351u, 352u, 353u, 354u, 355u, 356u, 357u, - 360u, 361u, 362u, 363u, 364u, 365u, 366u, 367u, - 370u, 371u, 372u, 373u, 374u, 375u, 376u, 377u, -}; -#endif - -const unsigned short OnigEncAsciiCtypeTable[256] = { - 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, - 0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008, - 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, - 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, - 0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, - 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, - 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, - 0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, - 0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2, - 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, - 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, - 0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0, - 0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2, - 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, - 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, - 0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 -}; - -const UChar OnigEncISO_8859_1_ToLowerCaseTable[256] = { - 0u, 1u, 2u, 3u, 4u, 5u, 6u, 7u, - 10u, 11u, 12u, 13u, 14u, 15u, 16u, 17u, - 20u, 21u, 22u, 23u, 24u, 25u, 26u, 27u, - 30u, 31u, 32u, 33u, 34u, 35u, 36u, 37u, - 40u, 41u, 42u, 43u, 44u, 45u, 46u, 47u, - 50u, 51u, 52u, 53u, 54u, 55u, 56u, 57u, - 60u, 61u, 62u, 63u, 64u, 65u, 66u, 67u, - 70u, 71u, 72u, 73u, 74u, 75u, 76u, 77u, - 100u, 141u, 142u, 143u, 144u, 145u, 146u, 147u, - 150u, 151u, 152u, 153u, 154u, 155u, 156u, 157u, - 160u, 161u, 162u, 163u, 164u, 165u, 166u, 167u, - 170u, 171u, 172u, 133u, 134u, 135u, 136u, 137u, - 140u, 141u, 142u, 143u, 144u, 145u, 146u, 147u, - 150u, 151u, 152u, 153u, 154u, 155u, 156u, 157u, - 160u, 161u, 162u, 163u, 164u, 165u, 166u, 167u, - 170u, 171u, 172u, 173u, 174u, 175u, 176u, 177u, - 200u, 201u, 202u, 203u, 204u, 205u, 206u, 207u, - 210u, 211u, 212u, 213u, 214u, 215u, 216u, 217u, - 220u, 221u, 222u, 223u, 224u, 225u, 226u, 227u, - 230u, 231u, 232u, 233u, 234u, 235u, 236u, 237u, - 240u, 241u, 242u, 243u, 244u, 245u, 246u, 247u, - 250u, 251u, 252u, 253u, 254u, 255u, 256u, 257u, - 260u, 261u, 262u, 263u, 264u, 265u, 266u, 267u, - 270u, 271u, 272u, 273u, 274u, 275u, 276u, 277u, - 340u, 341u, 342u, 343u, 344u, 345u, 346u, 347u, - 350u, 351u, 352u, 353u, 354u, 355u, 356u, 357u, - 360u, 361u, 362u, 363u, 364u, 365u, 366u, 327u, - 370u, 371u, 372u, 373u, 374u, 375u, 376u, 337u, - 340u, 341u, 342u, 343u, 344u, 345u, 346u, 347u, - 350u, 351u, 352u, 353u, 354u, 355u, 356u, 357u, - 360u, 361u, 362u, 363u, 364u, 365u, 366u, 367u, - 370u, 371u, 372u, 373u, 374u, 375u, 376u, 377u, -}; - -#ifdef USE_UPPER_CASE_TABLE -const UChar OnigEncISO_8859_1_ToUpperCaseTable[256] = { - 0u, 1u, 2u, 3u, 4u, 5u, 6u, 7u, - 10u, 11u, 12u, 13u, 14u, 15u, 16u, 17u, - 20u, 21u, 22u, 23u, 24u, 25u, 26u, 27u, - 30u, 31u, 32u, 33u, 34u, 35u, 36u, 37u, - 40u, 41u, 42u, 43u, 44u, 45u, 46u, 47u, - 50u, 51u, 52u, 53u, 54u, 55u, 56u, 57u, - 60u, 61u, 62u, 63u, 64u, 65u, 66u, 67u, - 70u, 71u, 72u, 73u, 74u, 75u, 76u, 77u, - 100u, 101u, 102u, 103u, 104u, 105u, 106u, 107u, - 110u, 111u, 112u, 113u, 114u, 115u, 116u, 117u, - 120u, 121u, 122u, 123u, 124u, 125u, 126u, 127u, - 130u, 131u, 132u, 133u, 134u, 135u, 136u, 137u, - 140u, 101u, 102u, 103u, 104u, 105u, 106u, 107u, - 110u, 111u, 112u, 113u, 114u, 115u, 116u, 117u, - 120u, 121u, 122u, 123u, 124u, 125u, 126u, 127u, - 130u, 131u, 132u, 173u, 174u, 175u, 176u, 177u, - 200u, 201u, 202u, 203u, 204u, 205u, 206u, 207u, - 210u, 211u, 212u, 213u, 214u, 215u, 216u, 217u, - 220u, 221u, 222u, 223u, 224u, 225u, 226u, 227u, - 230u, 231u, 232u, 233u, 234u, 235u, 236u, 237u, - 240u, 241u, 242u, 243u, 244u, 245u, 246u, 247u, - 250u, 251u, 252u, 253u, 254u, 255u, 256u, 257u, - 260u, 261u, 262u, 263u, 264u, 265u, 266u, 267u, - 270u, 271u, 272u, 273u, 274u, 275u, 276u, 277u, - 300u, 301u, 302u, 303u, 304u, 305u, 306u, 307u, - 310u, 311u, 312u, 313u, 314u, 315u, 316u, 317u, - 320u, 321u, 322u, 323u, 324u, 325u, 326u, 327u, - 330u, 331u, 332u, 333u, 334u, 335u, 336u, 337u, - 300u, 301u, 302u, 303u, 304u, 305u, 306u, 307u, - 310u, 311u, 312u, 313u, 314u, 315u, 316u, 317u, - 320u, 321u, 322u, 323u, 324u, 325u, 326u, 367u, - 330u, 331u, 332u, 333u, 334u, 335u, 336u, 377u, -}; -#endif - -extern void -onigenc_set_default_caseconv_table(const UChar* table ARG_UNUSED) -{ - /* nothing */ - /* obsoleted. */ -} - -extern UChar* -onigenc_get_left_adjust_char_head(OnigEncoding enc, const UChar* start, const UChar* s) -{ - return ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s); -} - -const OnigPairCaseFoldCodes OnigAsciiLowerMap[] = { - { 0x41, 0x61 }, - { 0x42, 0x62 }, - { 0x43, 0x63 }, - { 0x44, 0x64 }, - { 0x45, 0x65 }, - { 0x46, 0x66 }, - { 0x47, 0x67 }, - { 0x48, 0x68 }, - { 0x49, 0x69 }, - { 0x4a, 0x6a }, - { 0x4b, 0x6b }, - { 0x4c, 0x6c }, - { 0x4d, 0x6d }, - { 0x4e, 0x6e }, - { 0x4f, 0x6f }, - { 0x50, 0x70 }, - { 0x51, 0x71 }, - { 0x52, 0x72 }, - { 0x53, 0x73 }, - { 0x54, 0x74 }, - { 0x55, 0x75 }, - { 0x56, 0x76 }, - { 0x57, 0x77 }, - { 0x58, 0x78 }, - { 0x59, 0x79 }, - { 0x5a, 0x7a } -}; - -extern int -onigenc_ascii_apply_all_case_fold(OnigCaseFoldType flag ARG_UNUSED, - OnigApplyAllCaseFoldFunc f, void* arg) -{ - OnigCodePoint code; - int i, r; - - for (i = 0; - i < (int )(sizeof(OnigAsciiLowerMap)/sizeof(OnigPairCaseFoldCodes)); - i++) { - code = OnigAsciiLowerMap[i].to; - r = (*f)(OnigAsciiLowerMap[i].from, &code, 1, arg); - if (r != 0) return r; - - code = OnigAsciiLowerMap[i].from; - r = (*f)(OnigAsciiLowerMap[i].to, &code, 1, arg); - if (r != 0) return r; - } - - return 0; -} - -extern int -onigenc_ascii_get_case_fold_codes_by_str(OnigCaseFoldType flag ARG_UNUSED, - const OnigUChar* p, const OnigUChar* end ARG_UNUSED, - OnigCaseFoldCodeItem items[]) -{ - if (0x41 <= *p && *p <= 0x5a) { - items[0].byte_len = 1; - items[0].code_len = 1; - items[0].code[0] = (OnigCodePoint )(*p + 0x20); - return 1; - } - else if (0x61 <= *p && *p <= 0x7a) { - items[0].byte_len = 1; - items[0].code_len = 1; - items[0].code[0] = (OnigCodePoint )(*p - 0x20); - return 1; - } - else - return 0; -} - -static int -ss_apply_all_case_fold(OnigCaseFoldType flag ARG_UNUSED, - OnigApplyAllCaseFoldFunc f, void* arg) -{ - static OnigCodePoint ss[] = { 0x73, 0x73 }; - - return (*f)((OnigCodePoint )0xdf, ss, 2, arg); -} - -extern int -onigenc_apply_all_case_fold_with_map(int map_size, - const OnigPairCaseFoldCodes map[], - int ess_tsett_flag, OnigCaseFoldType flag, - OnigApplyAllCaseFoldFunc f, void* arg) -{ - OnigCodePoint code; - int i, r; - - r = onigenc_ascii_apply_all_case_fold(flag, f, arg); - if (r != 0) return r; - - for (i = 0; i < map_size; i++) { - code = map[i].to; - r = (*f)(map[i].from, &code, 1, arg); - if (r != 0) return r; - - code = map[i].from; - r = (*f)(map[i].to, &code, 1, arg); - if (r != 0) return r; - } - - if (ess_tsett_flag != 0) - return ss_apply_all_case_fold(flag, f, arg); - - return 0; -} - -extern int -onigenc_get_case_fold_codes_by_str_with_map(int map_size, - const OnigPairCaseFoldCodes map[], - int ess_tsett_flag, OnigCaseFoldType flag ARG_UNUSED, - const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[]) -{ - if (0x41 <= *p && *p <= 0x5a) { - items[0].byte_len = 1; - items[0].code_len = 1; - items[0].code[0] = (OnigCodePoint )(*p + 0x20); - if (*p == 0x53 && ess_tsett_flag != 0 && end > p + 1 - && (*(p+1) == 0x53 || *(p+1) == 0x73)) { - /* SS */ - items[1].byte_len = 2; - items[1].code_len = 1; - items[1].code[0] = (OnigCodePoint )0xdf; - return 2; - } - else - return 1; - } - else if (0x61 <= *p && *p <= 0x7a) { - items[0].byte_len = 1; - items[0].code_len = 1; - items[0].code[0] = (OnigCodePoint )(*p - 0x20); - if (*p == 0x73 && ess_tsett_flag != 0 && end > p + 1 - && (*(p+1) == 0x73 || *(p+1) == 0x53)) { - /* ss */ - items[1].byte_len = 2; - items[1].code_len = 1; - items[1].code[0] = (OnigCodePoint )0xdf; - return 2; - } - else - return 1; - } - else if (*p == 0xdf && ess_tsett_flag != 0) { - items[0].byte_len = 1; - items[0].code_len = 2; - items[0].code[0] = (OnigCodePoint )'s'; - items[0].code[1] = (OnigCodePoint )'s'; - - items[1].byte_len = 1; - items[1].code_len = 2; - items[1].code[0] = (OnigCodePoint )'S'; - items[1].code[1] = (OnigCodePoint )'S'; - - items[2].byte_len = 1; - items[2].code_len = 2; - items[2].code[0] = (OnigCodePoint )'s'; - items[2].code[1] = (OnigCodePoint )'S'; - - items[3].byte_len = 1; - items[3].code_len = 2; - items[3].code[0] = (OnigCodePoint )'S'; - items[3].code[1] = (OnigCodePoint )'s'; - - return 4; - } - else { - int i; - - for (i = 0; i < map_size; i++) { - if (*p == map[i].from) { - items[0].byte_len = 1; - items[0].code_len = 1; - items[0].code[0] = map[i].to; - return 1; - } - else if (*p == map[i].to) { - items[0].byte_len = 1; - items[0].code_len = 1; - items[0].code[0] = map[i].from; - return 1; - } - } - } - - return 0; -} - - -extern int -onigenc_not_support_get_ctype_code_range(OnigCtype ctype ARG_UNUSED, - OnigCodePoint* sb_out ARG_UNUSED, - const OnigCodePoint* ranges[] ARG_UNUSED) -{ - return ONIG_NO_SUPPORT_CONFIG; -} - -extern int -onigenc_is_mbc_newline_0x0a(const UChar* p, const UChar* end) -{ - if (p < end) { - if (*p == 0x0a) return 1; - } - return 0; -} - -/* for single byte encodings */ -extern int -onigenc_ascii_mbc_case_fold(OnigCaseFoldType flag ARG_UNUSED, const UChar** p, - const UChar*end ARG_UNUSED, UChar* lower) -{ - *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(**p); - - (*p)++; - return 1; /* return byte length of converted char to lower */ -} - -#if 0 -extern int -onigenc_ascii_is_mbc_ambiguous(OnigCaseFoldType flag, - const UChar** pp, const UChar* end) -{ - const UChar* p = *pp; - - (*pp)++; - return ONIGENC_IS_ASCII_CODE_CASE_AMBIG(*p); -} -#endif - -extern int -onigenc_single_byte_mbc_enc_len(const UChar* p ARG_UNUSED) -{ - return 1; -} - -extern OnigCodePoint -onigenc_single_byte_mbc_to_code(const UChar* p, const UChar* end ARG_UNUSED) -{ - return (OnigCodePoint )(*p); -} - -extern int -onigenc_single_byte_code_to_mbclen(OnigCodePoint code ARG_UNUSED) -{ - return (code < 0x100 ? 1 : ONIGERR_INVALID_CODE_POINT_VALUE); -} - -extern int -onigenc_single_byte_code_to_mbc(OnigCodePoint code, UChar *buf) -{ - *buf = (UChar )(code & 0xff); - return 1; -} - -extern UChar* -onigenc_single_byte_left_adjust_char_head(const UChar* start ARG_UNUSED, - const UChar* s) -{ - return (UChar* )s; -} - -extern int -onigenc_always_true_is_allowed_reverse_match(const UChar* s ARG_UNUSED, - const UChar* end ARG_UNUSED) -{ - return TRUE; -} - -extern int -onigenc_always_false_is_allowed_reverse_match(const UChar* s ARG_UNUSED, - const UChar* end ARG_UNUSED) -{ - return FALSE; -} - -extern OnigCodePoint -onigenc_mbn_mbc_to_code(OnigEncoding enc, const UChar* p, const UChar* end) -{ - int c, i, len; - OnigCodePoint n; - - len = enclen(enc, p); - n = (OnigCodePoint )(*p++); - if (len == 1) return n; - - for (i = 1; i < len; i++) { - if (p >= end) break; - c = *p++; - n <<= 8; n += c; - } - return n; -} - -extern int -onigenc_mbn_mbc_case_fold(OnigEncoding enc, OnigCaseFoldType flag ARG_UNUSED, - const UChar** pp, const UChar* end ARG_UNUSED, - UChar* lower) -{ - int len; - const UChar *p = *pp; - - if (ONIGENC_IS_MBC_ASCII(p)) { - *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p); - (*pp)++; - return 1; - } - else { - int i; - - len = enclen(enc, p); - for (i = 0; i < len; i++) { - *lower++ = *p++; - } - (*pp) += len; - return len; /* return byte length of converted to lower char */ - } -} - -#if 0 -extern int -onigenc_mbn_is_mbc_ambiguous(OnigEncoding enc, OnigCaseFoldType flag, - const UChar** pp, const UChar* end) -{ - const UChar* p = *pp; - - if (ONIGENC_IS_MBC_ASCII(p)) { - (*pp)++; - return ONIGENC_IS_ASCII_CODE_CASE_AMBIG(*p); - } - - (*pp) += enclen(enc, p); - return FALSE; -} -#endif - -extern int -onigenc_mb2_code_to_mbclen(OnigCodePoint code) -{ - if ((code & 0xff00) != 0) return 2; - else return 1; -} - -extern int -onigenc_mb4_code_to_mbclen(OnigCodePoint code) -{ - if ((code & 0xff000000) != 0) return 4; - else if ((code & 0xff0000) != 0) return 3; - else if ((code & 0xff00) != 0) return 2; - else return 1; -} - -extern int -onigenc_mb2_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf) -{ - UChar *p = buf; - - if ((code & 0xff00) != 0) { - *p++ = (UChar )((code >> 8) & 0xff); - } - *p++ = (UChar )(code & 0xff); - -#if 1 - if (enclen(enc, buf) != (p - buf)) - return ONIGERR_INVALID_CODE_POINT_VALUE; -#endif - return (int)(p - buf); -} - -extern int -onigenc_mb4_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf) -{ - UChar *p = buf; - - if ((code & 0xff000000) != 0) { - *p++ = (UChar )((code >> 24) & 0xff); - } - if ((code & 0xff0000) != 0 || p != buf) { - *p++ = (UChar )((code >> 16) & 0xff); - } - if ((code & 0xff00) != 0 || p != buf) { - *p++ = (UChar )((code >> 8) & 0xff); - } - *p++ = (UChar )(code & 0xff); - -#if 1 - if (enclen(enc, buf) != (p - buf)) - return ONIGERR_INVALID_CODE_POINT_VALUE; -#endif - return (int)(p - buf); -} - -extern int -onigenc_minimum_property_name_to_ctype(OnigEncoding enc, UChar* p, UChar* end) -{ - static PosixBracketEntryType PBS[] = { - { (UChar* )"Alnum", ONIGENC_CTYPE_ALNUM, 5 }, - { (UChar* )"Alpha", ONIGENC_CTYPE_ALPHA, 5 }, - { (UChar* )"Blank", ONIGENC_CTYPE_BLANK, 5 }, - { (UChar* )"Cntrl", ONIGENC_CTYPE_CNTRL, 5 }, - { (UChar* )"Digit", ONIGENC_CTYPE_DIGIT, 5 }, - { (UChar* )"Graph", ONIGENC_CTYPE_GRAPH, 5 }, - { (UChar* )"Lower", ONIGENC_CTYPE_LOWER, 5 }, - { (UChar* )"Print", ONIGENC_CTYPE_PRINT, 5 }, - { (UChar* )"Punct", ONIGENC_CTYPE_PUNCT, 5 }, - { (UChar* )"Space", ONIGENC_CTYPE_SPACE, 5 }, - { (UChar* )"Upper", ONIGENC_CTYPE_UPPER, 5 }, - { (UChar* )"XDigit", ONIGENC_CTYPE_XDIGIT, 6 }, - { (UChar* )"ASCII", ONIGENC_CTYPE_ASCII, 5 }, - { (UChar* )"Word", ONIGENC_CTYPE_WORD, 4 }, - { (UChar* )NULL, -1, 0 } - }; - - PosixBracketEntryType *pb; - int len; - - len = onigenc_strlen(enc, p, end); - for (pb = PBS; IS_NOT_NULL(pb->name); pb++) { - if (len == pb->len && - onigenc_with_ascii_strncmp(enc, p, end, pb->name, pb->len) == 0) - return pb->ctype; - } - - return ONIGERR_INVALID_CHAR_PROPERTY_NAME; -} - -extern int -onigenc_mb2_is_code_ctype(OnigEncoding enc, OnigCodePoint code, - unsigned int ctype) -{ - if (code < 128) - return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype); - else { - if (CTYPE_IS_WORD_GRAPH_PRINT(ctype)) { - return (ONIGENC_CODE_TO_MBCLEN(enc, code) > 1 ? TRUE : FALSE); - } - } - - return FALSE; -} - -extern int -onigenc_mb4_is_code_ctype(OnigEncoding enc, OnigCodePoint code, - unsigned int ctype) -{ - if (code < 128) - return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype); - else { - if (CTYPE_IS_WORD_GRAPH_PRINT(ctype)) { - return (ONIGENC_CODE_TO_MBCLEN(enc, code) > 1 ? TRUE : FALSE); - } - } - - return FALSE; -} - -extern int -onigenc_with_ascii_strncmp(OnigEncoding enc, const UChar* p, const UChar* end, - const UChar* sascii /* ascii */, int n) -{ - int x, c; - - while (n-- > 0) { - if (p >= end) return (int )(*sascii); - - c = (int )ONIGENC_MBC_TO_CODE(enc, p, end); - x = *sascii - c; - if (x) return x; - - sascii++; - p += enclen(enc, p); - } - return 0; -} - -/* Property management */ -static int -resize_property_list(int new_size, const OnigCodePoint*** plist, int* psize) -{ - int size; - const OnigCodePoint **list = *plist; - - size = sizeof(OnigCodePoint*) * new_size; - if (IS_NULL(list)) { - list = (const OnigCodePoint** )xmalloc(size); - } - else { - list = (const OnigCodePoint** )xrealloc((void* )list, size, *psize * sizeof(OnigCodePoint*)); - } - - if (IS_NULL(list)) return ONIGERR_MEMORY; - - *plist = list; - *psize = new_size; - - return 0; -} - -extern int -onigenc_property_list_add_property(UChar* name, const OnigCodePoint* prop, - hash_table_type **table, const OnigCodePoint*** plist, int *pnum, - int *psize) -{ -#define PROP_INIT_SIZE 16 - - int r; - - if (*psize <= *pnum) { - int new_size = (*psize == 0 ? PROP_INIT_SIZE : *psize * 2); - r = resize_property_list(new_size, plist, psize); - if (r != 0) return r; - } - - (*plist)[*pnum] = prop; - - if (ONIG_IS_NULL(*table)) { - *table = onig_st_init_strend_table_with_size(PROP_INIT_SIZE); - if (ONIG_IS_NULL(*table)) return ONIGERR_MEMORY; - } - - *pnum = *pnum + 1; - onig_st_insert_strend(*table, name, name + strlen_s((char* )name, MAX_STRING_SIZE), - (hash_data_type )(*pnum + ONIGENC_MAX_STD_CTYPE)); - return 0; -} - -extern int -onigenc_property_list_init(int (*f)(void)) -{ - int r; - - THREAD_ATOMIC_START; - - r = f(); - - THREAD_ATOMIC_END; - return r; -} diff --git a/MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/regenc.h b/MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/regenc.h deleted file mode 100644 index 1d39f9c054..0000000000 --- a/MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/regenc.h +++ /dev/null @@ -1,189 +0,0 @@ -#ifndef REGENC_H -#define REGENC_H -/********************************************************************** - regenc.h - Oniguruma (regular expression library) -**********************************************************************/ -/*- - * Copyright (c) 2002-2008 K.Kosako - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#ifndef PACKAGE -/* PACKAGE is defined in config.h */ -//#include "config.h" -#endif - -#ifdef ONIG_ESCAPE_UCHAR_COLLISION -#undef ONIG_ESCAPE_UCHAR_COLLISION -#endif - -#include "oniguruma.h" - -typedef struct { - OnigCodePoint from; - OnigCodePoint to; -} OnigPairCaseFoldCodes; - - -#ifndef NULL -#define NULL ((void* )0) -#endif - -#ifndef TRUE -#define TRUE 1 -#endif - -#ifndef FALSE -#define FALSE 0 -#endif - -#ifndef ARG_UNUSED -#if defined(__GNUC__) -# define ARG_UNUSED __attribute__ ((unused)) -#else -# define ARG_UNUSED -#endif -#endif - -#define ONIG_IS_NULL(p) (((void*)(p)) == (void*)0) -#define ONIG_IS_NOT_NULL(p) (((void*)(p)) != (void*)0) -#define ONIG_CHECK_NULL_RETURN(p) if (ONIG_IS_NULL(p)) return NULL -#define ONIG_CHECK_NULL_RETURN_VAL(p,val) if (ONIG_IS_NULL(p)) return (val) - -#define enclen(enc,p) ONIGENC_MBC_ENC_LEN(enc,p) - -/* character types bit flag */ -#define BIT_CTYPE_NEWLINE (1<< ONIGENC_CTYPE_NEWLINE) -#define BIT_CTYPE_ALPHA (1<< ONIGENC_CTYPE_ALPHA) -#define BIT_CTYPE_BLANK (1<< ONIGENC_CTYPE_BLANK) -#define BIT_CTYPE_CNTRL (1<< ONIGENC_CTYPE_CNTRL) -#define BIT_CTYPE_DIGIT (1<< ONIGENC_CTYPE_DIGIT) -#define BIT_CTYPE_GRAPH (1<< ONIGENC_CTYPE_GRAPH) -#define BIT_CTYPE_LOWER (1<< ONIGENC_CTYPE_LOWER) -#define BIT_CTYPE_PRINT (1<< ONIGENC_CTYPE_PRINT) -#define BIT_CTYPE_PUNCT (1<< ONIGENC_CTYPE_PUNCT) -#define BIT_CTYPE_SPACE (1<< ONIGENC_CTYPE_SPACE) -#define BIT_CTYPE_UPPER (1<< ONIGENC_CTYPE_UPPER) -#define BIT_CTYPE_XDIGIT (1<< ONIGENC_CTYPE_XDIGIT) -#define BIT_CTYPE_WORD (1<< ONIGENC_CTYPE_WORD) -#define BIT_CTYPE_ALNUM (1<< ONIGENC_CTYPE_ALNUM) -#define BIT_CTYPE_ASCII (1<< ONIGENC_CTYPE_ASCII) - -#define CTYPE_TO_BIT(ctype) (1<<(ctype)) -#define CTYPE_IS_WORD_GRAPH_PRINT(ctype) \ - ((ctype) == ONIGENC_CTYPE_WORD || (ctype) == ONIGENC_CTYPE_GRAPH ||\ - (ctype) == ONIGENC_CTYPE_PRINT) - - -typedef struct { - UChar *name; - int ctype; - short int len; -} PosixBracketEntryType; - - -/* #define USE_CRNL_AS_LINE_TERMINATOR */ -#define USE_UNICODE_PROPERTIES -/* #define USE_UNICODE_CASE_FOLD_TURKISH_AZERI */ -/* #define USE_UNICODE_ALL_LINE_TERMINATORS */ /* see Unicode.org UTF#18 */ - - -#define ONIG_ENCODING_INIT_DEFAULT ONIG_ENCODING_ASCII - -/* for encoding system implementation (internal) */ -ONIG_EXTERN int onigenc_ascii_apply_all_case_fold P_((OnigCaseFoldType flag, OnigApplyAllCaseFoldFunc f, void* arg)); -ONIG_EXTERN int onigenc_ascii_get_case_fold_codes_by_str P_((OnigCaseFoldType flag, const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[])); -ONIG_EXTERN int onigenc_apply_all_case_fold_with_map P_((int map_size, const OnigPairCaseFoldCodes map[], int ess_tsett_flag, OnigCaseFoldType flag, OnigApplyAllCaseFoldFunc f, void* arg)); -ONIG_EXTERN int onigenc_get_case_fold_codes_by_str_with_map P_((int map_size, const OnigPairCaseFoldCodes map[], int ess_tsett_flag, OnigCaseFoldType flag, const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[])); -ONIG_EXTERN int onigenc_not_support_get_ctype_code_range P_((OnigCtype ctype, OnigCodePoint* sb_out, const OnigCodePoint* ranges[])); -ONIG_EXTERN int onigenc_is_mbc_newline_0x0a P_((const UChar* p, const UChar* end)); - - -/* methods for single byte encoding */ -ONIG_EXTERN int onigenc_ascii_mbc_case_fold P_((OnigCaseFoldType flag, const UChar** p, const UChar* end, UChar* lower)); -ONIG_EXTERN int onigenc_single_byte_mbc_enc_len P_((const UChar* p)); -ONIG_EXTERN OnigCodePoint onigenc_single_byte_mbc_to_code P_((const UChar* p, const UChar* end)); -ONIG_EXTERN int onigenc_single_byte_code_to_mbclen P_((OnigCodePoint code)); -ONIG_EXTERN int onigenc_single_byte_code_to_mbc P_((OnigCodePoint code, UChar *buf)); -ONIG_EXTERN UChar* onigenc_single_byte_left_adjust_char_head P_((const UChar* start, const UChar* s)); -ONIG_EXTERN int onigenc_always_true_is_allowed_reverse_match P_((const UChar* s, const UChar* end)); -ONIG_EXTERN int onigenc_always_false_is_allowed_reverse_match P_((const UChar* s, const UChar* end)); - -/* methods for multi byte encoding */ -ONIG_EXTERN OnigCodePoint onigenc_mbn_mbc_to_code P_((OnigEncoding enc, const UChar* p, const UChar* end)); -ONIG_EXTERN int onigenc_mbn_mbc_case_fold P_((OnigEncoding enc, OnigCaseFoldType flag, const UChar** p, const UChar* end, UChar* lower)); -ONIG_EXTERN int onigenc_mb2_code_to_mbclen P_((OnigCodePoint code)); -ONIG_EXTERN int onigenc_mb2_code_to_mbc P_((OnigEncoding enc, OnigCodePoint code, UChar *buf)); -ONIG_EXTERN int onigenc_minimum_property_name_to_ctype P_((OnigEncoding enc, UChar* p, UChar* end)); -ONIG_EXTERN int onigenc_unicode_property_name_to_ctype P_((OnigEncoding enc, UChar* p, UChar* end)); -ONIG_EXTERN int onigenc_mb2_is_code_ctype P_((OnigEncoding enc, OnigCodePoint code, unsigned int ctype)); -ONIG_EXTERN int onigenc_mb4_code_to_mbclen P_((OnigCodePoint code)); -ONIG_EXTERN int onigenc_mb4_code_to_mbc P_((OnigEncoding enc, OnigCodePoint code, UChar *buf)); -ONIG_EXTERN int onigenc_mb4_is_code_ctype P_((OnigEncoding enc, OnigCodePoint code, unsigned int ctype)); - - -/* in enc/unicode.c */ -ONIG_EXTERN int onigenc_unicode_is_code_ctype P_((OnigCodePoint code, unsigned int ctype)); -ONIG_EXTERN int onigenc_utf16_32_get_ctype_code_range P_((OnigCtype ctype, OnigCodePoint *sb_out, const OnigCodePoint* ranges[])); -ONIG_EXTERN int onigenc_unicode_ctype_code_range P_((int ctype, const OnigCodePoint* ranges[])); -ONIG_EXTERN int onigenc_unicode_get_case_fold_codes_by_str P_((OnigEncoding enc, OnigCaseFoldType flag, const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[])); -ONIG_EXTERN int onigenc_unicode_mbc_case_fold P_((OnigEncoding enc, OnigCaseFoldType flag, const UChar** pp, const UChar* end, UChar* fold)); -ONIG_EXTERN int onigenc_unicode_apply_all_case_fold P_((OnigCaseFoldType flag, OnigApplyAllCaseFoldFunc f, void* arg)); - - -#define UTF16_IS_SURROGATE_FIRST(c) (((c) & 0xfc) == 0xd8) -#define UTF16_IS_SURROGATE_SECOND(c) (((c) & 0xfc) == 0xdc) - -#define ONIGENC_ISO_8859_1_TO_LOWER_CASE(c) \ - OnigEncISO_8859_1_ToLowerCaseTable[c] -#define ONIGENC_ISO_8859_1_TO_UPPER_CASE(c) \ - OnigEncISO_8859_1_ToUpperCaseTable[c] - -ONIG_EXTERN const UChar OnigEncISO_8859_1_ToLowerCaseTable[]; -ONIG_EXTERN const UChar OnigEncISO_8859_1_ToUpperCaseTable[]; - -ONIG_EXTERN int -onigenc_with_ascii_strncmp P_((OnigEncoding enc, const UChar* p, const UChar* end, const UChar* sascii /* ascii */, int n)); -ONIG_EXTERN UChar* -onigenc_step P_((OnigEncoding enc, const UChar* p, const UChar* end, int n)); - -/* defined in regexec.c, but used in enc/xxx.c */ -extern int onig_is_in_code_range P_((const UChar* p, OnigCodePoint code)); - -ONIG_EXTERN OnigEncoding OnigEncDefaultCharEncoding; -ONIG_EXTERN const UChar OnigEncAsciiToLowerCaseTable[]; -ONIG_EXTERN const UChar OnigEncAsciiToUpperCaseTable[]; -ONIG_EXTERN const unsigned short OnigEncAsciiCtypeTable[]; - -#define ONIGENC_IS_ASCII_CODE(code) ((code) < 0x80) -#define ONIGENC_ASCII_CODE_TO_LOWER_CASE(c) OnigEncAsciiToLowerCaseTable[c] -#define ONIGENC_ASCII_CODE_TO_UPPER_CASE(c) OnigEncAsciiToUpperCaseTable[c] -#define ONIGENC_IS_ASCII_CODE_CTYPE(code,ctype) \ - ((OnigEncAsciiCtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0) -#define ONIGENC_IS_ASCII_CODE_CASE_AMBIG(code) \ - (ONIGENC_IS_ASCII_CODE_CTYPE(code, ONIGENC_CTYPE_UPPER) ||\ - ONIGENC_IS_ASCII_CODE_CTYPE(code, ONIGENC_CTYPE_LOWER)) - - -#endif /* REGENC_H */ diff --git a/MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/regerror.c b/MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/regerror.c deleted file mode 100644 index 7bbcd02bcd..0000000000 --- a/MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/regerror.c +++ /dev/null @@ -1,394 +0,0 @@ -/********************************************************************** - regerror.c - Oniguruma (regular expression library) -**********************************************************************/ -/*- - * Copyright (c) 2002-2007 K.Kosako - * All rights reserved. - * - * Copyright (c) 2015, Hewlett Packard Enterprise Development, L.P.
- * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#define HAVE_STDARG_PROTOTYPES - -#include "regint.h" - -#if 0 -#include /* for vsnprintf() */ - -#ifdef HAVE_STDARG_PROTOTYPES -#include -#define va_init_list(a,b) va_start(a,b) -#else -#include -#define va_init_list(a,b) va_start(a) -#endif -#endif - -extern UChar* -onig_error_code_to_format(int code) -{ - char *p; - - if (code >= 0) return (UChar* )0; - - switch (code) { - case ONIG_MISMATCH: - p = "mismatch"; break; - case ONIG_NO_SUPPORT_CONFIG: - p = "no support in this configuration"; break; - case ONIGERR_MEMORY: - p = "fail to memory allocation"; break; - case ONIGERR_MATCH_STACK_LIMIT_OVER: - p = "match-stack limit over"; break; - case ONIGERR_TYPE_BUG: - p = "undefined type (bug)"; break; - case ONIGERR_PARSER_BUG: - p = "internal parser error (bug)"; break; - case ONIGERR_STACK_BUG: - p = "stack error (bug)"; break; - case ONIGERR_UNDEFINED_BYTECODE: - p = "undefined bytecode (bug)"; break; - case ONIGERR_UNEXPECTED_BYTECODE: - p = "unexpected bytecode (bug)"; break; - case ONIGERR_DEFAULT_ENCODING_IS_NOT_SETTED: - p = "default multibyte-encoding is not setted"; break; - case ONIGERR_SPECIFIED_ENCODING_CANT_CONVERT_TO_WIDE_CHAR: - p = "can't convert to wide-char on specified multibyte-encoding"; break; - case ONIGERR_INVALID_ARGUMENT: - p = "invalid argument"; break; - case ONIGERR_END_PATTERN_AT_LEFT_BRACE: - p = "end pattern at left brace"; break; - case ONIGERR_END_PATTERN_AT_LEFT_BRACKET: - p = "end pattern at left bracket"; break; - case ONIGERR_EMPTY_CHAR_CLASS: - p = "empty char-class"; break; - case ONIGERR_PREMATURE_END_OF_CHAR_CLASS: - p = "premature end of char-class"; break; - case ONIGERR_END_PATTERN_AT_ESCAPE: - p = "end pattern at escape"; break; - case ONIGERR_END_PATTERN_AT_META: - p = "end pattern at meta"; break; - case ONIGERR_END_PATTERN_AT_CONTROL: - p = "end pattern at control"; break; - case ONIGERR_META_CODE_SYNTAX: - p = "invalid meta-code syntax"; break; - case ONIGERR_CONTROL_CODE_SYNTAX: - p = "invalid control-code syntax"; break; - case ONIGERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE: - p = "char-class value at end of range"; break; - case ONIGERR_CHAR_CLASS_VALUE_AT_START_OF_RANGE: - p = "char-class value at start of range"; break; - case ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS: - p = "unmatched range specifier in char-class"; break; - case ONIGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED: - p = "target of repeat operator is not specified"; break; - case ONIGERR_TARGET_OF_REPEAT_OPERATOR_INVALID: - p = "target of repeat operator is invalid"; break; - case ONIGERR_NESTED_REPEAT_OPERATOR: - p = "nested repeat operator"; break; - case ONIGERR_UNMATCHED_CLOSE_PARENTHESIS: - p = "unmatched close parenthesis"; break; - case ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS: - p = "end pattern with unmatched parenthesis"; break; - case ONIGERR_END_PATTERN_IN_GROUP: - p = "end pattern in group"; break; - case ONIGERR_UNDEFINED_GROUP_OPTION: - p = "undefined group option"; break; - case ONIGERR_INVALID_POSIX_BRACKET_TYPE: - p = "invalid POSIX bracket type"; break; - case ONIGERR_INVALID_LOOK_BEHIND_PATTERN: - p = "invalid pattern in look-behind"; break; - case ONIGERR_INVALID_REPEAT_RANGE_PATTERN: - p = "invalid repeat range {lower,upper}"; break; - case ONIGERR_TOO_BIG_NUMBER: - p = "too big number"; break; - case ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE: - p = "too big number for repeat range"; break; - case ONIGERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE: - p = "upper is smaller than lower in repeat range"; break; - case ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS: - p = "empty range in char class"; break; - case ONIGERR_MISMATCH_CODE_LENGTH_IN_CLASS_RANGE: - p = "mismatch multibyte code length in char-class range"; break; - case ONIGERR_TOO_MANY_MULTI_BYTE_RANGES: - p = "too many multibyte code ranges are specified"; break; - case ONIGERR_TOO_SHORT_MULTI_BYTE_STRING: - p = "too short multibyte code string"; break; - case ONIGERR_TOO_BIG_BACKREF_NUMBER: - p = "too big backref number"; break; - case ONIGERR_INVALID_BACKREF: -#ifdef USE_NAMED_GROUP - p = "invalid backref number/name"; break; -#else - p = "invalid backref number"; break; -#endif - case ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED: - p = "numbered backref/call is not allowed. (use name)"; break; - case ONIGERR_TOO_BIG_WIDE_CHAR_VALUE: - p = "too big wide-char value"; break; - case ONIGERR_TOO_LONG_WIDE_CHAR_VALUE: - p = "too long wide-char value"; break; - case ONIGERR_INVALID_CODE_POINT_VALUE: - p = "invalid code point value"; break; - case ONIGERR_EMPTY_GROUP_NAME: - p = "group name is empty"; break; - case ONIGERR_INVALID_GROUP_NAME: - p = "invalid group name <%n>"; break; - case ONIGERR_INVALID_CHAR_IN_GROUP_NAME: -#ifdef USE_NAMED_GROUP - p = "invalid char in group name <%n>"; break; -#else - p = "invalid char in group number <%n>"; break; -#endif - case ONIGERR_UNDEFINED_NAME_REFERENCE: - p = "undefined name <%n> reference"; break; - case ONIGERR_UNDEFINED_GROUP_REFERENCE: - p = "undefined group <%n> reference"; break; - case ONIGERR_MULTIPLEX_DEFINED_NAME: - p = "multiplex defined name <%n>"; break; - case ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL: - p = "multiplex definition name <%n> call"; break; - case ONIGERR_NEVER_ENDING_RECURSION: - p = "never ending recursion"; break; - case ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY: - p = "group number is too big for capture history"; break; - case ONIGERR_INVALID_CHAR_PROPERTY_NAME: - p = "invalid character property name {%n}"; break; - case ONIGERR_NOT_SUPPORTED_ENCODING_COMBINATION: - p = "not supported encoding combination"; break; - case ONIGERR_INVALID_COMBINATION_OF_OPTIONS: - p = "invalid combination of options"; break; - case ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT: - p = "over thread pass limit count"; break; - - default: - p = "undefined error code"; break; - } - - return (UChar* )p; -} - -static void sprint_byte(char* s, unsigned int v) -{ - sprintf(s, "%02x", (v & 0377)); -} - -static void sprint_byte_with_x(char* s, unsigned int v) -{ - sprintf(s, "\\x%02x", (v & 0377)); -} - -static int to_ascii(OnigEncoding enc, UChar *s, UChar *end, - UChar buf[], int buf_size, int *is_over) -{ - int len; - UChar *p; - OnigCodePoint code; - - if (ONIGENC_MBC_MINLEN(enc) > 1) { - p = s; - len = 0; - while (p < end) { - code = ONIGENC_MBC_TO_CODE(enc, p, end); - if (code >= 0x80) { - if (code > 0xffff && len + 10 <= buf_size) { - sprint_byte_with_x((char*)(&(buf[len])), (unsigned int)(code >> 24)); - sprint_byte((char*)(&(buf[len+4])), (unsigned int)(code >> 16)); - sprint_byte((char*)(&(buf[len+6])), (unsigned int)(code >> 8)); - sprint_byte((char*)(&(buf[len+8])), (unsigned int)code); - len += 10; - } - else if (len + 6 <= buf_size) { - sprint_byte_with_x((char*)(&(buf[len])), (unsigned int)(code >> 8)); - sprint_byte((char*)(&(buf[len+4])), (unsigned int)code); - len += 6; - } - else { - break; - } - } - else { - buf[len++] = (UChar )code; - } - - p += enclen(enc, p); - if (len >= buf_size) break; - } - - *is_over = ((p < end) ? 1 : 0); - } - else { - len = MIN((int)(end - s), buf_size); - xmemcpy(buf, s, (size_t )len); - *is_over = ((buf_size < (end - s)) ? 1 : 0); - } - - return len; -} - - -/* for ONIG_MAX_ERROR_MESSAGE_LEN */ -#define MAX_ERROR_PAR_LEN 30 - -extern int -#ifdef HAVE_STDARG_PROTOTYPES -onig_error_code_to_str(UChar* s, int code, ...) -#else -onig_error_code_to_str(s, code, va_alist) - UChar* s; - int code; - va_dcl -#endif -{ - UChar *p, *q; - OnigErrorInfo* einfo; - int len, is_over; - UChar parbuf[MAX_ERROR_PAR_LEN]; - va_list vargs; - - va_init_list(vargs, code); - - switch (code) { - case ONIGERR_UNDEFINED_NAME_REFERENCE: - case ONIGERR_UNDEFINED_GROUP_REFERENCE: - case ONIGERR_MULTIPLEX_DEFINED_NAME: - case ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL: - case ONIGERR_INVALID_GROUP_NAME: - case ONIGERR_INVALID_CHAR_IN_GROUP_NAME: - case ONIGERR_INVALID_CHAR_PROPERTY_NAME: - einfo = va_arg(vargs, OnigErrorInfo*); - len = to_ascii(einfo->enc, einfo->par, einfo->par_end, - parbuf, MAX_ERROR_PAR_LEN - 3, &is_over); - q = onig_error_code_to_format(code); - p = s; - while (*q != '\0') { - if (*q == '%') { - q++; - if (*q == 'n') { /* '%n': name */ - xmemcpy(p, parbuf, len); - p += len; - if (is_over != 0) { - xmemcpy(p, "...", 3); - p += 3; - } - q++; - } - else - goto normal_char; - } - else { - normal_char: - *p++ = *q++; - } - } - *p = '\0'; - len = (int)(p - s); - break; - - default: - q = onig_error_code_to_format(code); - len = onigenc_str_bytelen_null(ONIG_ENCODING_ASCII, q); - xmemcpy(s, q, len); - s[len] = '\0'; - break; - } - - va_end(vargs); - return len; -} - - -void -#ifdef HAVE_STDARG_PROTOTYPES -onig_snprintf_with_pattern(UChar buf[], int bufsize, OnigEncoding enc, - UChar* pat, UChar* pat_end, const UChar *fmt, ...) -#else -onig_snprintf_with_pattern(buf, bufsize, enc, pat, pat_end, fmt, va_alist) - UChar buf[]; - int bufsize; - OnigEncoding enc; - UChar* pat; - UChar* pat_end; - const UChar *fmt; - va_dcl -#endif -{ - int n, need, len; - UChar *p, *s, *bp; - UChar bs[6]; - va_list args; - - va_init_list(args, fmt); - n = xvsnprintf((char* )buf, bufsize, (const char* )fmt, args); - va_end(args); - - need = (int)(pat_end - pat) * 4 + 4; - - if (n + need < bufsize) { - strcat_s((char* )buf, bufsize, ": /"); - s = buf + onigenc_str_bytelen_null(ONIG_ENCODING_ASCII, buf); - - p = pat; - while (p < pat_end) { - if (*p == '\\') { - *s++ = *p++; - len = enclen(enc, p); - while (len-- > 0) *s++ = *p++; - } - else if (*p == '/') { - *s++ = (unsigned char )'\\'; - *s++ = *p++; - } - else if (ONIGENC_IS_MBC_HEAD(enc, p)) { - len = enclen(enc, p); - if (ONIGENC_MBC_MINLEN(enc) == 1) { - while (len-- > 0) *s++ = *p++; - } - else { /* for UTF16 */ - int blen; - - while (len-- > 0) { - sprint_byte_with_x((char* )bs, (unsigned int )(*p++)); - blen = onigenc_str_bytelen_null(ONIG_ENCODING_ASCII, bs); - bp = bs; - while (blen-- > 0) *s++ = *bp++; - } - } - } - else if (!ONIGENC_IS_CODE_PRINT(enc, *p) && - !ONIGENC_IS_CODE_SPACE(enc, *p)) { - sprint_byte_with_x((char* )bs, (unsigned int )(*p++)); - len = onigenc_str_bytelen_null(ONIG_ENCODING_ASCII, bs); - bp = bs; - while (len-- > 0) *s++ = *bp++; - } - else { - *s++ = *p++; - } - } - - *s++ = '/'; - *s = '\0'; - } -} diff --git a/MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/regexec.c b/MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/regexec.c deleted file mode 100644 index 4bfea0bf09..0000000000 --- a/MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/regexec.c +++ /dev/null @@ -1,3810 +0,0 @@ -/********************************************************************** - regexec.c - Oniguruma (regular expression library) -**********************************************************************/ -/*- - * Copyright (c) 2002-2008 K.Kosako - * All rights reserved. - * - * Copyright (c) 2015, Hewlett Packard Enterprise Development, L.P.
- * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include "regint.h" - -#define USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE - -#ifdef USE_CRNL_AS_LINE_TERMINATOR -#define ONIGENC_IS_MBC_CRNL(enc,p,end) \ - (ONIGENC_MBC_TO_CODE(enc,p,end) == 13 && \ - ONIGENC_IS_MBC_NEWLINE(enc,(p+enclen(enc,p)),end)) -#endif - -#ifdef USE_CAPTURE_HISTORY -static void history_tree_free(OnigCaptureTreeNode* node); - -static void -history_tree_clear(OnigCaptureTreeNode* node) -{ - int i; - - if (IS_NOT_NULL(node)) { - for (i = 0; i < node->num_childs; i++) { - if (IS_NOT_NULL(node->childs[i])) { - history_tree_free(node->childs[i]); - } - } - for (i = 0; i < node->allocated; i++) { - node->childs[i] = (OnigCaptureTreeNode* )0; - } - node->num_childs = 0; - node->beg = ONIG_REGION_NOTPOS; - node->end = ONIG_REGION_NOTPOS; - node->group = -1; - } -} - -static void -history_tree_free(OnigCaptureTreeNode* node) -{ - history_tree_clear(node); - xfree(node); -} - -static void -history_root_free(OnigRegion* r) -{ - if (IS_NOT_NULL(r->history_root)) { - history_tree_free(r->history_root); - r->history_root = (OnigCaptureTreeNode* )0; - } -} - -static OnigCaptureTreeNode* -history_node_new(void) -{ - OnigCaptureTreeNode* node; - - node = (OnigCaptureTreeNode* )xmalloc(sizeof(OnigCaptureTreeNode)); - CHECK_NULL_RETURN(node); - node->childs = (OnigCaptureTreeNode** )0; - node->allocated = 0; - node->num_childs = 0; - node->group = -1; - node->beg = ONIG_REGION_NOTPOS; - node->end = ONIG_REGION_NOTPOS; - - return node; -} - -static int -history_tree_add_child(OnigCaptureTreeNode* parent, OnigCaptureTreeNode* child) -{ -#define HISTORY_TREE_INIT_ALLOC_SIZE 8 - - if (parent->num_childs >= parent->allocated) { - int n, i; - - if (IS_NULL(parent->childs)) { - n = HISTORY_TREE_INIT_ALLOC_SIZE; - parent->childs = - (OnigCaptureTreeNode** )xmalloc(sizeof(OnigCaptureTreeNode*) * n); - } - else { - n = parent->allocated * 2; - parent->childs = - (OnigCaptureTreeNode** )xrealloc(parent->childs, - sizeof(OnigCaptureTreeNode*) * n, - sizeof(OnigCaptureTreeNode*) * parent->allocated); - } - CHECK_NULL_RETURN_MEMERR(parent->childs); - for (i = parent->allocated; i < n; i++) { - parent->childs[i] = (OnigCaptureTreeNode* )0; - } - parent->allocated = n; - } - - parent->childs[parent->num_childs] = child; - parent->num_childs++; - return 0; -} - -static OnigCaptureTreeNode* -history_tree_clone(OnigCaptureTreeNode* node) -{ - int i; - OnigCaptureTreeNode *clone, *child; - - clone = history_node_new(); - CHECK_NULL_RETURN(clone); - - clone->beg = node->beg; - clone->end = node->end; - for (i = 0; i < node->num_childs; i++) { - child = history_tree_clone(node->childs[i]); - if (IS_NULL(child)) { - history_tree_free(clone); - return (OnigCaptureTreeNode* )0; - } - history_tree_add_child(clone, child); - } - - return clone; -} - -extern OnigCaptureTreeNode* -onig_get_capture_tree(OnigRegion* region) -{ - return region->history_root; -} -#endif /* USE_CAPTURE_HISTORY */ - -extern void -onig_region_clear(OnigRegion* region) -{ - int i; - - for (i = 0; i < region->num_regs; i++) { - region->beg[i] = region->end[i] = ONIG_REGION_NOTPOS; - } -#ifdef USE_CAPTURE_HISTORY - history_root_free(region); -#endif -} - -extern int -onig_region_resize(OnigRegion* region, int n) -{ - region->num_regs = n; - - if (n < ONIG_NREGION) - n = ONIG_NREGION; - - if (region->allocated == 0) { - region->beg = (int* )xmalloc(n * sizeof(int)); - region->end = (int* )xmalloc(n * sizeof(int)); - - if (region->beg == 0 || region->end == 0) - return ONIGERR_MEMORY; - - region->allocated = n; - } - else if (region->allocated < n) { - region->beg = (int* )xrealloc(region->beg, n * sizeof(int), region->allocated * sizeof(int)); - region->end = (int* )xrealloc(region->end, n * sizeof(int), region->allocated * sizeof(int)); - - if (region->beg == 0 || region->end == 0) - return ONIGERR_MEMORY; - - region->allocated = n; - } - - return 0; -} - -static int -onig_region_resize_clear(OnigRegion* region, int n) -{ - int r; - - r = onig_region_resize(region, n); - if (r != 0) return r; - onig_region_clear(region); - return 0; -} - -extern int -onig_region_set(OnigRegion* region, int at, int beg, int end) -{ - if (at < 0) return ONIGERR_INVALID_ARGUMENT; - - if (at >= region->allocated) { - int r = onig_region_resize(region, at + 1); - if (r < 0) return r; - } - - region->beg[at] = beg; - region->end[at] = end; - return 0; -} - -extern void -onig_region_init(OnigRegion* region) -{ - region->num_regs = 0; - region->allocated = 0; - region->beg = (int* )0; - region->end = (int* )0; - region->history_root = (OnigCaptureTreeNode* )0; -} - -extern OnigRegion* -onig_region_new(void) -{ - OnigRegion* r; - - r = (OnigRegion* )xmalloc(sizeof(OnigRegion)); - onig_region_init(r); - return r; -} - -extern void -onig_region_free(OnigRegion* r, int free_self) -{ - if (r) { - if (r->allocated > 0) { - if (r->beg) xfree(r->beg); - if (r->end) xfree(r->end); - r->allocated = 0; - } -#ifdef USE_CAPTURE_HISTORY - history_root_free(r); -#endif - if (free_self) xfree(r); - } -} - -extern void -onig_region_copy(OnigRegion* to, OnigRegion* from) -{ -#define RREGC_SIZE (sizeof(int) * from->num_regs) - int i; - - if (to == from) return; - - if (to->allocated == 0) { - if (from->num_regs > 0) { - to->beg = (int* )xmalloc(RREGC_SIZE); - to->end = (int* )xmalloc(RREGC_SIZE); - to->allocated = from->num_regs; - } - } - else if (to->allocated < from->num_regs) { - to->beg = (int* )xrealloc(to->beg, RREGC_SIZE, sizeof(int) * to->allocated); - to->end = (int* )xrealloc(to->end, RREGC_SIZE, sizeof(int) * to->allocated); - to->allocated = from->num_regs; - } - - for (i = 0; i < from->num_regs; i++) { - to->beg[i] = from->beg[i]; - to->end[i] = from->end[i]; - } - to->num_regs = from->num_regs; - -#ifdef USE_CAPTURE_HISTORY - history_root_free(to); - - if (IS_NOT_NULL(from->history_root)) { - to->history_root = history_tree_clone(from->history_root); - } -#endif -} - - -/** stack **/ -#define INVALID_STACK_INDEX -1 - -/* stack type */ -/* used by normal-POP */ -#define STK_ALT 0x0001 -#define STK_LOOK_BEHIND_NOT 0x0002 -#define STK_POS_NOT 0x0003 -/* handled by normal-POP */ -#define STK_MEM_START 0x0100 -#define STK_MEM_END 0x8200 -#define STK_REPEAT_INC 0x0300 -#define STK_STATE_CHECK_MARK 0x1000 -/* avoided by normal-POP */ -#define STK_NULL_CHECK_START 0x3000 -#define STK_NULL_CHECK_END 0x5000 /* for recursive call */ -#define STK_MEM_END_MARK 0x8400 -#define STK_POS 0x0500 /* used when POP-POS */ -#define STK_STOP_BT 0x0600 /* mark for "(?>...)" */ -#define STK_REPEAT 0x0700 -#define STK_CALL_FRAME 0x0800 -#define STK_RETURN 0x0900 -#define STK_VOID 0x0a00 /* for fill a blank */ - -/* stack type check mask */ -#define STK_MASK_POP_USED 0x00ff -#define STK_MASK_TO_VOID_TARGET 0x10ff -#define STK_MASK_MEM_END_OR_MARK 0x8000 /* MEM_END or MEM_END_MARK */ - -#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE -#define MATCH_ARG_INIT(msa, arg_option, arg_region, arg_start) do {\ - (msa).stack_p = (void* )0;\ - (msa).options = (arg_option);\ - (msa).region = (arg_region);\ - (msa).start = (arg_start);\ - (msa).best_len = ONIG_MISMATCH;\ -} while(0) -#else -#define MATCH_ARG_INIT(msa, arg_option, arg_region, arg_start) do {\ - (msa).stack_p = (void* )0;\ - (msa).options = (arg_option);\ - (msa).region = (arg_region);\ - (msa).start = (arg_start);\ -} while(0) -#endif - -#ifdef USE_COMBINATION_EXPLOSION_CHECK - -#define STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE 16 - -#define STATE_CHECK_BUFF_INIT(msa, str_len, offset, state_num) do { \ - if ((state_num) > 0 && str_len >= STATE_CHECK_STRING_THRESHOLD_LEN) {\ - unsigned int size = (unsigned int )(((str_len) + 1) * (state_num) + 7) >> 3;\ - offset = ((offset) * (state_num)) >> 3;\ - if (size > 0 && offset < size && size < STATE_CHECK_BUFF_MAX_SIZE) {\ - if (size >= STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE) \ - (msa).state_check_buff = (void* )xmalloc(size);\ - else \ - (msa).state_check_buff = (void* )xalloca(size);\ - xmemset(((char* )((msa).state_check_buff)+(offset)), 0, \ - (size_t )(size - (offset))); \ - (msa).state_check_buff_size = size;\ - }\ - else {\ - (msa).state_check_buff = (void* )0;\ - (msa).state_check_buff_size = 0;\ - }\ - }\ - else {\ - (msa).state_check_buff = (void* )0;\ - (msa).state_check_buff_size = 0;\ - }\ - } while(0) - -#define MATCH_ARG_FREE(msa) do {\ - if ((msa).stack_p) xfree((msa).stack_p);\ - if ((msa).state_check_buff_size >= STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE) { \ - if ((msa).state_check_buff) xfree((msa).state_check_buff);\ - }\ -} while(0) -#else -#define STATE_CHECK_BUFF_INIT(msa, str_len, offset, state_num) -#define MATCH_ARG_FREE(msa) if ((msa).stack_p) xfree((msa).stack_p) -#endif - - - -#define STACK_INIT(alloc_addr, ptr_num, stack_num) do {\ - if (msa->stack_p) {\ - alloc_addr = (char* )xmalloc(sizeof(char*) * (ptr_num));\ - stk_alloc = (OnigStackType* )(msa->stack_p);\ - stk_base = stk_alloc;\ - stk = stk_base;\ - stk_end = stk_base + msa->stack_n;\ - }\ - else {\ - alloc_addr = (char* )xmalloc(sizeof(char*) * (ptr_num)\ - + sizeof(OnigStackType) * (stack_num));\ - stk_alloc = (OnigStackType* )(alloc_addr + sizeof(char*) * (ptr_num));\ - stk_base = stk_alloc;\ - stk = stk_base;\ - stk_end = stk_base + (stack_num);\ - }\ -} while(0) - -#define STACK_SAVE do{\ - if (stk_base != stk_alloc) {\ - msa->stack_p = stk_base;\ - msa->stack_n = (int)(stk_end - stk_base);\ - };\ -} while(0) - -static unsigned int MatchStackLimitSize = DEFAULT_MATCH_STACK_LIMIT_SIZE; - -extern unsigned int -onig_get_match_stack_limit_size(void) -{ - return MatchStackLimitSize; -} - -extern int -onig_set_match_stack_limit_size(unsigned int size) -{ - MatchStackLimitSize = size; - return 0; -} - -static int -stack_double(OnigStackType** arg_stk_base, OnigStackType** arg_stk_end, - OnigStackType** arg_stk, OnigStackType* stk_alloc, OnigMatchArg* msa) -{ - unsigned int n; - OnigStackType *x, *stk_base, *stk_end, *stk; - - stk_base = *arg_stk_base; - stk_end = *arg_stk_end; - stk = *arg_stk; - - n = (unsigned int)(stk_end - stk_base); - if (stk_base == stk_alloc && IS_NULL(msa->stack_p)) { - x = (OnigStackType* )xmalloc(sizeof(OnigStackType) * n * 2); - if (IS_NULL(x)) { - STACK_SAVE; - return ONIGERR_MEMORY; - } - xmemcpy(x, stk_base, n * sizeof(OnigStackType)); - n *= 2; - } - else { - n *= 2; - if (MatchStackLimitSize != 0 && n > MatchStackLimitSize) { - if ((unsigned int )(stk_end - stk_base) == MatchStackLimitSize) - return ONIGERR_MATCH_STACK_LIMIT_OVER; - else - n = MatchStackLimitSize; - } - x = (OnigStackType* )xrealloc(stk_base, sizeof(OnigStackType) * n, sizeof(OnigStackType) * (stk_end - stk_base)); - if (IS_NULL(x)) { - STACK_SAVE; - return ONIGERR_MEMORY; - } - } - *arg_stk = x + (stk - stk_base); - *arg_stk_base = x; - *arg_stk_end = x + n; - return 0; -} - -#define STACK_ENSURE(n) do {\ - if (stk_end - stk < (n)) {\ - int r = stack_double(&stk_base, &stk_end, &stk, stk_alloc, msa);\ - if (r != 0) { STACK_SAVE; return r; } \ - }\ -} while(0) - -#define STACK_AT(index) (stk_base + (index)) -#define GET_STACK_INDEX(stk) ((OnigStackIndex)((stk) - stk_base)) - -#define STACK_PUSH_TYPE(stack_type) do {\ - STACK_ENSURE(1);\ - stk->type = (stack_type);\ - STACK_INC;\ -} while(0) - -#define IS_TO_VOID_TARGET(stk) (((stk)->type & STK_MASK_TO_VOID_TARGET) != 0) - -#ifdef USE_COMBINATION_EXPLOSION_CHECK -#define STATE_CHECK_POS(s,snum) \ - (((s) - str) * num_comb_exp_check + ((snum) - 1)) -#define STATE_CHECK_VAL(v,snum) do {\ - if (state_check_buff != NULL) {\ - int x = STATE_CHECK_POS(s,snum);\ - (v) = state_check_buff[x/8] & (1<<(x%8));\ - }\ - else (v) = 0;\ -} while(0) - - -#define ELSE_IF_STATE_CHECK_MARK(stk) \ - else if ((stk)->type == STK_STATE_CHECK_MARK) { \ - int x = STATE_CHECK_POS(stk->u.state.pstr, stk->u.state.state_check);\ - state_check_buff[x/8] |= (1<<(x%8)); \ - } - -#define STACK_PUSH(stack_type,pat,s,sprev) do {\ - STACK_ENSURE(1);\ - stk->type = (stack_type);\ - stk->u.state.pcode = (pat);\ - stk->u.state.pstr = (s);\ - stk->u.state.pstr_prev = (sprev);\ - stk->u.state.state_check = 0;\ - STACK_INC;\ -} while(0) - -#define STACK_PUSH_ENSURED(stack_type,pat) do {\ - stk->type = (stack_type);\ - stk->u.state.pcode = (pat);\ - stk->u.state.state_check = 0;\ - STACK_INC;\ -} while(0) - -#define STACK_PUSH_ALT_WITH_STATE_CHECK(pat,s,sprev,snum) do {\ - STACK_ENSURE(1);\ - stk->type = STK_ALT;\ - stk->u.state.pcode = (pat);\ - stk->u.state.pstr = (s);\ - stk->u.state.pstr_prev = (sprev);\ - stk->u.state.state_check = ((state_check_buff != NULL) ? (snum) : 0);\ - STACK_INC;\ -} while(0) - -#define STACK_PUSH_STATE_CHECK(s,snum) do {\ - if (state_check_buff != NULL) {\ - STACK_ENSURE(1);\ - stk->type = STK_STATE_CHECK_MARK;\ - stk->u.state.pstr = (s);\ - stk->u.state.state_check = (snum);\ - STACK_INC;\ - }\ -} while(0) - -#else /* USE_COMBINATION_EXPLOSION_CHECK */ - -#define ELSE_IF_STATE_CHECK_MARK(stk) - -#define STACK_PUSH(stack_type,pat,s,sprev) do {\ - STACK_ENSURE(1);\ - stk->type = (stack_type);\ - stk->u.state.pcode = (pat);\ - stk->u.state.pstr = (s);\ - stk->u.state.pstr_prev = (sprev);\ - STACK_INC;\ -} while(0) - -#define STACK_PUSH_ENSURED(stack_type,pat) do {\ - stk->type = (stack_type);\ - stk->u.state.pcode = (pat);\ - STACK_INC;\ -} while(0) -#endif /* USE_COMBINATION_EXPLOSION_CHECK */ - -#define STACK_PUSH_ALT(pat,s,sprev) STACK_PUSH(STK_ALT,pat,s,sprev) -#define STACK_PUSH_POS(s,sprev) STACK_PUSH(STK_POS,NULL_UCHARP,s,sprev) -#define STACK_PUSH_POS_NOT(pat,s,sprev) STACK_PUSH(STK_POS_NOT,pat,s,sprev) -#define STACK_PUSH_STOP_BT STACK_PUSH_TYPE(STK_STOP_BT) -#define STACK_PUSH_LOOK_BEHIND_NOT(pat,s,sprev) \ - STACK_PUSH(STK_LOOK_BEHIND_NOT,pat,s,sprev) - -#define STACK_PUSH_REPEAT(id, pat) do {\ - STACK_ENSURE(1);\ - stk->type = STK_REPEAT;\ - stk->u.repeat.num = (id);\ - stk->u.repeat.pcode = (pat);\ - stk->u.repeat.count = 0;\ - STACK_INC;\ -} while(0) - -#define STACK_PUSH_REPEAT_INC(sindex) do {\ - STACK_ENSURE(1);\ - stk->type = STK_REPEAT_INC;\ - stk->u.repeat_inc.si = (sindex);\ - STACK_INC;\ -} while(0) - -#define STACK_PUSH_MEM_START(mnum, s) do {\ - STACK_ENSURE(1);\ - stk->type = STK_MEM_START;\ - stk->u.mem.num = (int)(mnum);\ - stk->u.mem.pstr = (s);\ - stk->u.mem.start = mem_start_stk[mnum];\ - stk->u.mem.end = mem_end_stk[mnum];\ - mem_start_stk[mnum] = GET_STACK_INDEX(stk);\ - mem_end_stk[mnum] = INVALID_STACK_INDEX;\ - STACK_INC;\ -} while(0) - -#define STACK_PUSH_MEM_END(mnum, s) do {\ - STACK_ENSURE(1);\ - stk->type = STK_MEM_END;\ - stk->u.mem.num = (mnum);\ - stk->u.mem.pstr = (s);\ - stk->u.mem.start = mem_start_stk[mnum];\ - stk->u.mem.end = mem_end_stk[mnum];\ - mem_end_stk[mnum] = GET_STACK_INDEX(stk);\ - STACK_INC;\ -} while(0) - -#define STACK_PUSH_MEM_END_MARK(mnum) do {\ - STACK_ENSURE(1);\ - stk->type = STK_MEM_END_MARK;\ - stk->u.mem.num = (mnum);\ - STACK_INC;\ -} while(0) - -#define STACK_GET_MEM_START(mnum, k) do {\ - int level = 0;\ - k = stk;\ - while (k > stk_base) {\ - k--;\ - if ((k->type & STK_MASK_MEM_END_OR_MARK) != 0 \ - && k->u.mem.num == (mnum)) {\ - level++;\ - }\ - else if (k->type == STK_MEM_START && k->u.mem.num == (mnum)) {\ - if (level == 0) break;\ - level--;\ - }\ - }\ -} while(0) - -#define STACK_GET_MEM_RANGE(k, mnum, start, end) do {\ - int level = 0;\ - while (k < stk) {\ - if (k->type == STK_MEM_START && k->u.mem.num == (mnum)) {\ - if (level == 0) (start) = k->u.mem.pstr;\ - level++;\ - }\ - else if (k->type == STK_MEM_END && k->u.mem.num == (mnum)) {\ - level--;\ - if (level == 0) {\ - (end) = k->u.mem.pstr;\ - break;\ - }\ - }\ - k++;\ - }\ -} while(0) - -#define STACK_PUSH_NULL_CHECK_START(cnum, s) do {\ - STACK_ENSURE(1);\ - stk->type = STK_NULL_CHECK_START;\ - stk->u.null_check.num = (cnum);\ - stk->u.null_check.pstr = (s);\ - STACK_INC;\ -} while(0) - -#define STACK_PUSH_NULL_CHECK_END(cnum) do {\ - STACK_ENSURE(1);\ - stk->type = STK_NULL_CHECK_END;\ - stk->u.null_check.num = (cnum);\ - STACK_INC;\ -} while(0) - -#define STACK_PUSH_CALL_FRAME(pat) do {\ - STACK_ENSURE(1);\ - stk->type = STK_CALL_FRAME;\ - stk->u.call_frame.ret_addr = (pat);\ - STACK_INC;\ -} while(0) - -#define STACK_PUSH_RETURN do {\ - STACK_ENSURE(1);\ - stk->type = STK_RETURN;\ - STACK_INC;\ -} while(0) - - -#ifdef ONIG_DEBUG -#define STACK_BASE_CHECK(p, at) \ - if ((p) < stk_base) {\ - fprintf(stderr, "at %s\n", at);\ - goto stack_error;\ - } -#else -#define STACK_BASE_CHECK(p, at) -#endif - -#define STACK_POP_ONE do {\ - stk--;\ - STACK_BASE_CHECK(stk, "STACK_POP_ONE"); \ -} while(0) - -#define STACK_POP do {\ - switch (pop_level) {\ - case STACK_POP_LEVEL_FREE:\ - while (1) {\ - stk--;\ - STACK_BASE_CHECK(stk, "STACK_POP"); \ - if ((stk->type & STK_MASK_POP_USED) != 0) break;\ - ELSE_IF_STATE_CHECK_MARK(stk);\ - }\ - break;\ - case STACK_POP_LEVEL_MEM_START:\ - while (1) {\ - stk--;\ - STACK_BASE_CHECK(stk, "STACK_POP 2"); \ - if ((stk->type & STK_MASK_POP_USED) != 0) break;\ - else if (stk->type == STK_MEM_START) {\ - mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\ - mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\ - }\ - ELSE_IF_STATE_CHECK_MARK(stk);\ - }\ - break;\ - default:\ - while (1) {\ - stk--;\ - STACK_BASE_CHECK(stk, "STACK_POP 3"); \ - if ((stk->type & STK_MASK_POP_USED) != 0) break;\ - else if (stk->type == STK_MEM_START) {\ - mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\ - mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\ - }\ - else if (stk->type == STK_REPEAT_INC) {\ - STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\ - }\ - else if (stk->type == STK_MEM_END) {\ - mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\ - mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\ - }\ - ELSE_IF_STATE_CHECK_MARK(stk);\ - }\ - break;\ - }\ -} while(0) - -#define STACK_POP_TIL_POS_NOT do {\ - while (1) {\ - stk--;\ - STACK_BASE_CHECK(stk, "STACK_POP_TIL_POS_NOT"); \ - if (stk->type == STK_POS_NOT) break;\ - else if (stk->type == STK_MEM_START) {\ - mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\ - mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\ - }\ - else if (stk->type == STK_REPEAT_INC) {\ - STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\ - }\ - else if (stk->type == STK_MEM_END) {\ - mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\ - mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\ - }\ - ELSE_IF_STATE_CHECK_MARK(stk);\ - }\ -} while(0) - -#define STACK_POP_TIL_LOOK_BEHIND_NOT do {\ - while (1) {\ - stk--;\ - STACK_BASE_CHECK(stk, "STACK_POP_TIL_LOOK_BEHIND_NOT"); \ - if (stk->type == STK_LOOK_BEHIND_NOT) break;\ - else if (stk->type == STK_MEM_START) {\ - mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\ - mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\ - }\ - else if (stk->type == STK_REPEAT_INC) {\ - STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\ - }\ - else if (stk->type == STK_MEM_END) {\ - mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\ - mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\ - }\ - ELSE_IF_STATE_CHECK_MARK(stk);\ - }\ -} while(0) - -#define STACK_POS_END(k) do {\ - k = stk;\ - while (1) {\ - k--;\ - STACK_BASE_CHECK(k, "STACK_POS_END"); \ - if (IS_TO_VOID_TARGET(k)) {\ - k->type = STK_VOID;\ - }\ - else if (k->type == STK_POS) {\ - k->type = STK_VOID;\ - break;\ - }\ - }\ -} while(0) - -#define STACK_STOP_BT_END do {\ - OnigStackType *k = stk;\ - while (1) {\ - k--;\ - STACK_BASE_CHECK(k, "STACK_STOP_BT_END"); \ - if (IS_TO_VOID_TARGET(k)) {\ - k->type = STK_VOID;\ - }\ - else if (k->type == STK_STOP_BT) {\ - k->type = STK_VOID;\ - break;\ - }\ - }\ -} while(0) - -#define STACK_NULL_CHECK(isnull,id,s) do {\ - OnigStackType* k = stk;\ - while (1) {\ - k--;\ - STACK_BASE_CHECK(k, "STACK_NULL_CHECK"); \ - if (k->type == STK_NULL_CHECK_START) {\ - if (k->u.null_check.num == (id)) {\ - (isnull) = (k->u.null_check.pstr == (s));\ - break;\ - }\ - }\ - }\ -} while(0) - -#define STACK_NULL_CHECK_REC(isnull,id,s) do {\ - int level = 0;\ - OnigStackType* k = stk;\ - while (1) {\ - k--;\ - STACK_BASE_CHECK(k, "STACK_NULL_CHECK_REC"); \ - if (k->type == STK_NULL_CHECK_START) {\ - if (k->u.null_check.num == (id)) {\ - if (level == 0) {\ - (isnull) = (k->u.null_check.pstr == (s));\ - break;\ - }\ - else level--;\ - }\ - }\ - else if (k->type == STK_NULL_CHECK_END) {\ - level++;\ - }\ - }\ -} while(0) - -#define STACK_NULL_CHECK_MEMST(isnull,id,s,reg) do {\ - OnigStackType* k = stk;\ - while (1) {\ - k--;\ - STACK_BASE_CHECK(k, "STACK_NULL_CHECK_MEMST"); \ - if (k->type == STK_NULL_CHECK_START) {\ - if (k->u.null_check.num == (id)) {\ - if (k->u.null_check.pstr != (s)) {\ - (isnull) = 0;\ - break;\ - }\ - else {\ - UChar* endp;\ - (isnull) = 1;\ - while (k < stk) {\ - if (k->type == STK_MEM_START) {\ - if (k->u.mem.end == INVALID_STACK_INDEX) {\ - (isnull) = 0; break;\ - }\ - if (BIT_STATUS_AT(reg->bt_mem_end, k->u.mem.num))\ - endp = STACK_AT(k->u.mem.end)->u.mem.pstr;\ - else\ - endp = (UChar* )k->u.mem.end;\ - if (STACK_AT(k->u.mem.start)->u.mem.pstr != endp) {\ - (isnull) = 0; break;\ - }\ - else if (endp != s) {\ - (isnull) = -1; /* empty, but position changed */ \ - }\ - }\ - k++;\ - }\ - break;\ - }\ - }\ - }\ - }\ -} while(0) - -#define STACK_NULL_CHECK_MEMST_REC(isnull,id,s,reg) do {\ - int level = 0;\ - OnigStackType* k = stk;\ - while (1) {\ - k--;\ - STACK_BASE_CHECK(k, "STACK_NULL_CHECK_MEMST_REC"); \ - if (k->type == STK_NULL_CHECK_START) {\ - if (k->u.null_check.num == (id)) {\ - if (level == 0) {\ - if (k->u.null_check.pstr != (s)) {\ - (isnull) = 0;\ - break;\ - }\ - else {\ - UChar* endp;\ - (isnull) = 1;\ - while (k < stk) {\ - if (k->type == STK_MEM_START) {\ - if (k->u.mem.end == INVALID_STACK_INDEX) {\ - (isnull) = 0; break;\ - }\ - if (BIT_STATUS_AT(reg->bt_mem_end, k->u.mem.num))\ - endp = STACK_AT(k->u.mem.end)->u.mem.pstr;\ - else\ - endp = (UChar* )k->u.mem.end;\ - if (STACK_AT(k->u.mem.start)->u.mem.pstr != endp) {\ - (isnull) = 0; break;\ - }\ - else if (endp != s) {\ - (isnull) = -1; /* empty, but position changed */ \ - }\ - }\ - k++;\ - }\ - break;\ - }\ - }\ - else {\ - level--;\ - }\ - }\ - }\ - else if (k->type == STK_NULL_CHECK_END) {\ - if (k->u.null_check.num == (id)) level++;\ - }\ - }\ -} while(0) - -#define STACK_GET_REPEAT(id, k) do {\ - int level = 0;\ - k = stk;\ - while (1) {\ - k--;\ - STACK_BASE_CHECK(k, "STACK_GET_REPEAT"); \ - if (k->type == STK_REPEAT) {\ - if (level == 0) {\ - if (k->u.repeat.num == (id)) {\ - break;\ - }\ - }\ - }\ - else if (k->type == STK_CALL_FRAME) level--;\ - else if (k->type == STK_RETURN) level++;\ - }\ -} while(0) - -#define STACK_RETURN(addr) do {\ - int level = 0;\ - OnigStackType* k = stk;\ - while (1) {\ - k--;\ - STACK_BASE_CHECK(k, "STACK_RETURN"); \ - if (k->type == STK_CALL_FRAME) {\ - if (level == 0) {\ - (addr) = k->u.call_frame.ret_addr;\ - break;\ - }\ - else level--;\ - }\ - else if (k->type == STK_RETURN)\ - level++;\ - }\ -} while(0) - - -#define STRING_CMP(s1,s2,len) do {\ - while (len-- > 0) {\ - if (*s1++ != *s2++) goto fail;\ - }\ -} while(0) - -#define STRING_CMP_IC(case_fold_flag,s1,ps2,len) do {\ - if (string_cmp_ic(encode, case_fold_flag, s1, ps2, len) == 0) \ - goto fail; \ -} while(0) - -static int string_cmp_ic(OnigEncoding enc, int case_fold_flag, - UChar* s1, UChar** ps2, int mblen) -{ - UChar buf1[ONIGENC_MBC_CASE_FOLD_MAXLEN]; - UChar buf2[ONIGENC_MBC_CASE_FOLD_MAXLEN]; - UChar *p1, *p2, *end1, *s2, *end2; - int len1, len2; - - s2 = *ps2; - end1 = s1 + mblen; - end2 = s2 + mblen; - while (s1 < end1) { - len1 = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &s1, end1, buf1); - len2 = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &s2, end2, buf2); - if (len1 != len2) return 0; - p1 = buf1; - p2 = buf2; - while (len1-- > 0) { - if (*p1 != *p2) return 0; - p1++; - p2++; - } - } - - *ps2 = s2; - return 1; -} - -#define STRING_CMP_VALUE(s1,s2,len,is_fail) do {\ - is_fail = 0;\ - while (len-- > 0) {\ - if (*s1++ != *s2++) {\ - is_fail = 1; break;\ - }\ - }\ -} while(0) - -#define STRING_CMP_VALUE_IC(case_fold_flag,s1,ps2,len,is_fail) do {\ - if (string_cmp_ic(encode, case_fold_flag, s1, ps2, len) == 0) \ - is_fail = 1; \ - else \ - is_fail = 0; \ -} while(0) - - -#define IS_EMPTY_STR (str == end) -#define ON_STR_BEGIN(s) ((s) == str) -#define ON_STR_END(s) ((s) == end) -#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE -#define DATA_ENSURE_CHECK1 (s < right_range) -#define DATA_ENSURE_CHECK(n) (s + (n) <= right_range) -#define DATA_ENSURE(n) if (s + (n) > right_range) goto fail -#else -#define DATA_ENSURE_CHECK1 (s < end) -#define DATA_ENSURE_CHECK(n) (s + (n) <= end) -#define DATA_ENSURE(n) if (s + (n) > end) goto fail -#endif /* USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE */ - - -#ifdef USE_CAPTURE_HISTORY -static int -make_capture_history_tree(OnigCaptureTreeNode* node, OnigStackType** kp, - OnigStackType* stk_top, UChar* str, regex_t* reg) -{ - int n, r; - OnigCaptureTreeNode* child; - OnigStackType* k = *kp; - - while (k < stk_top) { - if (k->type == STK_MEM_START) { - n = k->u.mem.num; - if (n <= ONIG_MAX_CAPTURE_HISTORY_GROUP && - BIT_STATUS_AT(reg->capture_history, n) != 0) { - child = history_node_new(); - CHECK_NULL_RETURN_MEMERR(child); - child->group = n; - child->beg = (int )(k->u.mem.pstr - str); - r = history_tree_add_child(node, child); - if (r != 0) return r; - *kp = (k + 1); - r = make_capture_history_tree(child, kp, stk_top, str, reg); - if (r != 0) return r; - - k = *kp; - child->end = (int )(k->u.mem.pstr - str); - } - } - else if (k->type == STK_MEM_END) { - if (k->u.mem.num == node->group) { - node->end = (int )(k->u.mem.pstr - str); - *kp = k; - return 0; - } - } - k++; - } - - return 1; /* 1: root node ending. */ -} -#endif - -#ifdef USE_BACKREF_WITH_LEVEL -static int mem_is_in_memp(int mem, int num, UChar* memp) -{ - int i; - MemNumType m; - - for (i = 0; i < num; i++) { - GET_MEMNUM_INC(m, memp); - if (mem == (int )m) return 1; - } - return 0; -} - -static int backref_match_at_nested_level(regex_t* reg - , OnigStackType* top, OnigStackType* stk_base - , int ignore_case, int case_fold_flag - , int nest, int mem_num, UChar* memp, UChar** s, const UChar* send) -{ - UChar *ss, *p, *pstart, *pend = NULL_UCHARP; - int level; - OnigStackType* k; - - level = 0; - k = top; - k--; - while (k >= stk_base) { - if (k->type == STK_CALL_FRAME) { - level--; - } - else if (k->type == STK_RETURN) { - level++; - } - else if (level == nest) { - if (k->type == STK_MEM_START) { - if (mem_is_in_memp(k->u.mem.num, mem_num, memp)) { - pstart = k->u.mem.pstr; - if (pend != NULL_UCHARP) { - if (pend - pstart > send - *s) return 0; /* or goto next_mem; */ - p = pstart; - ss = *s; - - if (ignore_case != 0) { - if (string_cmp_ic(reg->enc, case_fold_flag, - pstart, &ss, (int )(pend - pstart)) == 0) - return 0; /* or goto next_mem; */ - } - else { - while (p < pend) { - if (*p++ != *ss++) return 0; /* or goto next_mem; */ - } - } - - *s = ss; - return 1; - } - } - } - else if (k->type == STK_MEM_END) { - if (mem_is_in_memp(k->u.mem.num, mem_num, memp)) { - pend = k->u.mem.pstr; - } - } - } - k--; - } - - return 0; -} -#endif /* USE_BACKREF_WITH_LEVEL */ - - -#ifdef ONIG_DEBUG_STATISTICS - -#define USE_TIMEOFDAY - -#ifdef USE_TIMEOFDAY -#ifdef HAVE_SYS_TIME_H -#include -#endif -#ifdef HAVE_UNISTD_H -#include -#endif -static struct timeval ts, te; -#define GETTIME(t) gettimeofday(&(t), (struct timezone* )0) -#define TIMEDIFF(te,ts) (((te).tv_usec - (ts).tv_usec) + \ - (((te).tv_sec - (ts).tv_sec)*1000000)) -#else -#ifdef HAVE_SYS_TIMES_H -#include -#endif -static struct tms ts, te; -#define GETTIME(t) times(&(t)) -#define TIMEDIFF(te,ts) ((te).tms_utime - (ts).tms_utime) -#endif - -static int OpCounter[256]; -static int OpPrevCounter[256]; -static unsigned long OpTime[256]; -static int OpCurr = OP_FINISH; -static int OpPrevTarget = OP_FAIL; -static int MaxStackDepth = 0; - -#define MOP_IN(opcode) do {\ - if (opcode == OpPrevTarget) OpPrevCounter[OpCurr]++;\ - OpCurr = opcode;\ - OpCounter[opcode]++;\ - GETTIME(ts);\ -} while(0) - -#define MOP_OUT do {\ - GETTIME(te);\ - OpTime[OpCurr] += TIMEDIFF(te, ts);\ -} while(0) - -extern void -onig_statistics_init(void) -{ - int i; - for (i = 0; i < 256; i++) { - OpCounter[i] = OpPrevCounter[i] = 0; OpTime[i] = 0; - } - MaxStackDepth = 0; -} - -extern void -onig_print_statistics(FILE* f) -{ - int i; - fprintf(f, " count prev time\n"); - for (i = 0; OnigOpInfo[i].opcode >= 0; i++) { - fprintf(f, "%8d: %8d: %10ld: %s\n", - OpCounter[i], OpPrevCounter[i], OpTime[i], OnigOpInfo[i].name); - } - fprintf(f, "\nmax stack depth: %d\n", MaxStackDepth); -} - -#define STACK_INC do {\ - stk++;\ - if (stk - stk_base > MaxStackDepth) \ - MaxStackDepth = stk - stk_base;\ -} while(0) - -#else -#define STACK_INC stk++ - -#define MOP_IN(opcode) -#define MOP_OUT -#endif - - -/* matching region of POSIX API */ -typedef int regoff_t; - -typedef struct { - regoff_t rm_so; - regoff_t rm_eo; -} posix_regmatch_t; - -/* match data(str - end) from position (sstart). */ -/* if sstart == str then set sprev to NULL. */ -static int -match_at(regex_t* reg, const UChar* str, const UChar* end, -#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE - const UChar* right_range, -#endif - const UChar* sstart, UChar* sprev, OnigMatchArg* msa) -{ - static UChar FinishCode[] = { OP_FINISH }; - - int i, n, num_mem, best_len, pop_level; - LengthType tlen, tlen2; - MemNumType mem; - RelAddrType addr; - OnigOptionType option = reg->options; - OnigEncoding encode = reg->enc; - OnigCaseFoldType case_fold_flag = reg->case_fold_flag; - UChar *s, *q, *sbegin; - UChar *p = reg->p; - char *alloca_base; - OnigStackType *stk_alloc, *stk_base, *stk, *stk_end; - OnigStackType *stkp; /* used as any purpose. */ - OnigStackIndex si; - OnigStackIndex *repeat_stk; - OnigStackIndex *mem_start_stk, *mem_end_stk; -#ifdef USE_COMBINATION_EXPLOSION_CHECK - int scv; - unsigned char* state_check_buff = msa->state_check_buff; - int num_comb_exp_check = reg->num_comb_exp_check; -#endif - n = reg->num_repeat + reg->num_mem * 2; - - STACK_INIT(alloca_base, n, INIT_MATCH_STACK_SIZE); - pop_level = reg->stack_pop_level; - num_mem = reg->num_mem; - repeat_stk = (OnigStackIndex* )alloca_base; - - mem_start_stk = (OnigStackIndex* )(repeat_stk + reg->num_repeat); - mem_end_stk = mem_start_stk + num_mem; - mem_start_stk--; /* for index start from 1, - mem_start_stk[1]..mem_start_stk[num_mem] */ - mem_end_stk--; /* for index start from 1, - mem_end_stk[1]..mem_end_stk[num_mem] */ - for (i = 1; i <= num_mem; i++) { - mem_start_stk[i] = mem_end_stk[i] = INVALID_STACK_INDEX; - } - -#ifdef ONIG_DEBUG_MATCH - fprintf(stderr, "match_at: str: %d, end: %d, start: %d, sprev: %d\n", - (int )str, (int )end, (int )sstart, (int )sprev); - fprintf(stderr, "size: %d, start offset: %d\n", - (int )(end - str), (int )(sstart - str)); -#endif - - STACK_PUSH_ENSURED(STK_ALT, FinishCode); /* bottom stack */ - best_len = ONIG_MISMATCH; - s = (UChar* )sstart; - while (1) { -#ifdef ONIG_DEBUG_MATCH - { - UChar *q, *bp, buf[50]; - int len; - fprintf(stderr, "%4d> \"", (int )(s - str)); - bp = buf; - for (i = 0, q = s; i < 7 && q < end; i++) { - len = enclen(encode, q); - while (len-- > 0) *bp++ = *q++; - } - if (q < end) { xmemcpy(bp, "...\"", 4); bp += 4; } - else { xmemcpy(bp, "\"", 1); bp += 1; } - *bp = 0; - fputs((char* )buf, stderr); - for (i = 0; i < 20 - (bp - buf); i++) fputc(' ', stderr); - onig_print_compiled_byte_code(stderr, p, NULL, encode); - fprintf(stderr, "\n"); - } -#endif - - sbegin = s; - switch (*p++) { - case OP_END: MOP_IN(OP_END); - n = (int)(s - sstart); - if (n > best_len) { - OnigRegion* region; -#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE - if (IS_FIND_LONGEST(option)) { - if (n > msa->best_len) { - msa->best_len = n; - msa->best_s = (UChar* )sstart; - } - else - goto end_best_len; - } -#endif - best_len = n; - region = msa->region; - if (region) { -#ifdef USE_POSIX_API_REGION_OPTION - if (IS_POSIX_REGION(msa->options)) { - posix_regmatch_t* rmt = (posix_regmatch_t* )region; - - rmt[0].rm_so = (regoff_t)(sstart - str); - rmt[0].rm_eo = (regoff_t)(s - str); - for (i = 1; i <= num_mem; i++) { - if (mem_end_stk[i] != INVALID_STACK_INDEX) { - if (BIT_STATUS_AT(reg->bt_mem_start, i)) - rmt[i].rm_so = (regoff_t)(STACK_AT(mem_start_stk[i])->u.mem.pstr - str); - else - rmt[i].rm_so = (regoff_t)((UChar* )((void* )(mem_start_stk[i])) - str); - - rmt[i].rm_eo = (regoff_t)((BIT_STATUS_AT(reg->bt_mem_end, i) - ? STACK_AT(mem_end_stk[i])->u.mem.pstr - : (UChar* )((void* )mem_end_stk[i])) - str); - } - else { - rmt[i].rm_so = rmt[i].rm_eo = ONIG_REGION_NOTPOS; - } - } - } - else { -#endif /* USE_POSIX_API_REGION_OPTION */ - region->beg[0] = (int)(sstart - str); - region->end[0] = (int)(s - str); - for (i = 1; i <= num_mem; i++) { - if (mem_end_stk[i] != INVALID_STACK_INDEX) { - if (BIT_STATUS_AT(reg->bt_mem_start, i)) - region->beg[i] = (int)(STACK_AT(mem_start_stk[i])->u.mem.pstr - str); - else - region->beg[i] = (int)((UChar* )((void* )mem_start_stk[i]) - str); - - region->end[i] = (int)((BIT_STATUS_AT(reg->bt_mem_end, i) - ? STACK_AT(mem_end_stk[i])->u.mem.pstr - : (UChar* )((void* )mem_end_stk[i])) - str); - } - else { - region->beg[i] = region->end[i] = ONIG_REGION_NOTPOS; - } - } - -#ifdef USE_CAPTURE_HISTORY - if (reg->capture_history != 0) { - int r; - OnigCaptureTreeNode* node; - - if (IS_NULL(region->history_root)) { - region->history_root = node = history_node_new(); - CHECK_NULL_RETURN_MEMERR(node); - } - else { - node = region->history_root; - history_tree_clear(node); - } - - node->group = 0; - node->beg = (int)(sstart - str); - node->end = (int)(s - str); - - stkp = stk_base; - r = make_capture_history_tree(region->history_root, &stkp, - stk, (UChar* )str, reg); - if (r < 0) { - best_len = r; /* error code */ - goto finish; - } - } -#endif /* USE_CAPTURE_HISTORY */ -#ifdef USE_POSIX_API_REGION_OPTION - } /* else IS_POSIX_REGION() */ -#endif - } /* if (region) */ - } /* n > best_len */ - -#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE - end_best_len: -#endif - MOP_OUT; - - if (IS_FIND_CONDITION(option)) { - if (IS_FIND_NOT_EMPTY(option) && s == sstart) { - best_len = ONIG_MISMATCH; - goto fail; /* for retry */ - } - if (IS_FIND_LONGEST(option) && DATA_ENSURE_CHECK1) { - goto fail; /* for retry */ - } - } - - /* default behavior: return first-matching result. */ - goto finish; - break; - - case OP_EXACT1: MOP_IN(OP_EXACT1); -#if 0 - DATA_ENSURE(1); - if (*p != *s) goto fail; - p++; s++; -#endif - if (*p != *s++) goto fail; - DATA_ENSURE(0); - p++; - MOP_OUT; - break; - - case OP_EXACT1_IC: MOP_IN(OP_EXACT1_IC); - { - int len; - UChar *q, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN]; - - DATA_ENSURE(1); - len = ONIGENC_MBC_CASE_FOLD(encode, - /* DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag), */ - case_fold_flag, - &s, end, lowbuf); - DATA_ENSURE(0); - q = lowbuf; - while (len-- > 0) { - if (*p != *q) { - goto fail; - } - p++; q++; - } - } - MOP_OUT; - break; - - case OP_EXACT2: MOP_IN(OP_EXACT2); - DATA_ENSURE(2); - if (*p != *s) goto fail; - p++; s++; - if (*p != *s) goto fail; - sprev = s; - p++; s++; - MOP_OUT; - continue; - break; - - case OP_EXACT3: MOP_IN(OP_EXACT3); - DATA_ENSURE(3); - if (*p != *s) goto fail; - p++; s++; - if (*p != *s) goto fail; - p++; s++; - if (*p != *s) goto fail; - sprev = s; - p++; s++; - MOP_OUT; - continue; - break; - - case OP_EXACT4: MOP_IN(OP_EXACT4); - DATA_ENSURE(4); - if (*p != *s) goto fail; - p++; s++; - if (*p != *s) goto fail; - p++; s++; - if (*p != *s) goto fail; - p++; s++; - if (*p != *s) goto fail; - sprev = s; - p++; s++; - MOP_OUT; - continue; - break; - - case OP_EXACT5: MOP_IN(OP_EXACT5); - DATA_ENSURE(5); - if (*p != *s) goto fail; - p++; s++; - if (*p != *s) goto fail; - p++; s++; - if (*p != *s) goto fail; - p++; s++; - if (*p != *s) goto fail; - p++; s++; - if (*p != *s) goto fail; - sprev = s; - p++; s++; - MOP_OUT; - continue; - break; - - case OP_EXACTN: MOP_IN(OP_EXACTN); - GET_LENGTH_INC(tlen, p); - DATA_ENSURE(tlen); - while (tlen-- > 0) { - if (*p++ != *s++) goto fail; - } - sprev = s - 1; - MOP_OUT; - continue; - break; - - case OP_EXACTN_IC: MOP_IN(OP_EXACTN_IC); - { - int len; - UChar *q, *endp, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN]; - - GET_LENGTH_INC(tlen, p); - endp = p + tlen; - - while (p < endp) { - sprev = s; - DATA_ENSURE(1); - len = ONIGENC_MBC_CASE_FOLD(encode, - /* DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag), */ - case_fold_flag, - &s, end, lowbuf); - DATA_ENSURE(0); - q = lowbuf; - while (len-- > 0) { - if (*p != *q) goto fail; - p++; q++; - } - } - } - - MOP_OUT; - continue; - break; - - case OP_EXACTMB2N1: MOP_IN(OP_EXACTMB2N1); - DATA_ENSURE(2); - if (*p != *s) goto fail; - p++; s++; - if (*p != *s) goto fail; - p++; s++; - MOP_OUT; - break; - - case OP_EXACTMB2N2: MOP_IN(OP_EXACTMB2N2); - DATA_ENSURE(4); - if (*p != *s) goto fail; - p++; s++; - if (*p != *s) goto fail; - p++; s++; - sprev = s; - if (*p != *s) goto fail; - p++; s++; - if (*p != *s) goto fail; - p++; s++; - MOP_OUT; - continue; - break; - - case OP_EXACTMB2N3: MOP_IN(OP_EXACTMB2N3); - DATA_ENSURE(6); - if (*p != *s) goto fail; - p++; s++; - if (*p != *s) goto fail; - p++; s++; - if (*p != *s) goto fail; - p++; s++; - if (*p != *s) goto fail; - p++; s++; - sprev = s; - if (*p != *s) goto fail; - p++; s++; - if (*p != *s) goto fail; - p++; s++; - MOP_OUT; - continue; - break; - - case OP_EXACTMB2N: MOP_IN(OP_EXACTMB2N); - GET_LENGTH_INC(tlen, p); - DATA_ENSURE(tlen * 2); - while (tlen-- > 0) { - if (*p != *s) goto fail; - p++; s++; - if (*p != *s) goto fail; - p++; s++; - } - sprev = s - 2; - MOP_OUT; - continue; - break; - - case OP_EXACTMB3N: MOP_IN(OP_EXACTMB3N); - GET_LENGTH_INC(tlen, p); - DATA_ENSURE(tlen * 3); - while (tlen-- > 0) { - if (*p != *s) goto fail; - p++; s++; - if (*p != *s) goto fail; - p++; s++; - if (*p != *s) goto fail; - p++; s++; - } - sprev = s - 3; - MOP_OUT; - continue; - break; - - case OP_EXACTMBN: MOP_IN(OP_EXACTMBN); - GET_LENGTH_INC(tlen, p); /* mb-len */ - GET_LENGTH_INC(tlen2, p); /* string len */ - tlen2 *= tlen; - DATA_ENSURE(tlen2); - while (tlen2-- > 0) { - if (*p != *s) goto fail; - p++; s++; - } - sprev = s - tlen; - MOP_OUT; - continue; - break; - - case OP_CCLASS: MOP_IN(OP_CCLASS); - DATA_ENSURE(1); - if (BITSET_AT(((BitSetRef )p), *s) == 0) goto fail; - p += SIZE_BITSET; - s += enclen(encode, s); /* OP_CCLASS can match mb-code. \D, \S */ - MOP_OUT; - break; - - case OP_CCLASS_MB: MOP_IN(OP_CCLASS_MB); - if (! ONIGENC_IS_MBC_HEAD(encode, s)) goto fail; - - cclass_mb: - GET_LENGTH_INC(tlen, p); - { - OnigCodePoint code; - UChar *ss; - int mb_len; - - DATA_ENSURE(1); - mb_len = enclen(encode, s); - DATA_ENSURE(mb_len); - ss = s; - s += mb_len; - code = ONIGENC_MBC_TO_CODE(encode, ss, s); - -#ifdef PLATFORM_UNALIGNED_WORD_ACCESS - if (! onig_is_in_code_range(p, code)) goto fail; -#else - q = p; - ALIGNMENT_RIGHT(q); - if (! onig_is_in_code_range(q, code)) goto fail; -#endif - } - p += tlen; - MOP_OUT; - break; - - case OP_CCLASS_MIX: MOP_IN(OP_CCLASS_MIX); - DATA_ENSURE(1); - if (ONIGENC_IS_MBC_HEAD(encode, s)) { - p += SIZE_BITSET; - goto cclass_mb; - } - else { - if (BITSET_AT(((BitSetRef )p), *s) == 0) - goto fail; - - p += SIZE_BITSET; - GET_LENGTH_INC(tlen, p); - p += tlen; - s++; - } - MOP_OUT; - break; - - case OP_CCLASS_NOT: MOP_IN(OP_CCLASS_NOT); - DATA_ENSURE(1); - if (BITSET_AT(((BitSetRef )p), *s) != 0) goto fail; - p += SIZE_BITSET; - s += enclen(encode, s); - MOP_OUT; - break; - - case OP_CCLASS_MB_NOT: MOP_IN(OP_CCLASS_MB_NOT); - DATA_ENSURE(1); - if (! ONIGENC_IS_MBC_HEAD(encode, s)) { - s++; - GET_LENGTH_INC(tlen, p); - p += tlen; - goto cc_mb_not_success; - } - - cclass_mb_not: - GET_LENGTH_INC(tlen, p); - { - OnigCodePoint code; - UChar *ss; - int mb_len = enclen(encode, s); - - if (! DATA_ENSURE_CHECK(mb_len)) { - DATA_ENSURE(1); - s = (UChar* )end; - p += tlen; - goto cc_mb_not_success; - } - - ss = s; - s += mb_len; - code = ONIGENC_MBC_TO_CODE(encode, ss, s); - -#ifdef PLATFORM_UNALIGNED_WORD_ACCESS - if (onig_is_in_code_range(p, code)) goto fail; -#else - q = p; - ALIGNMENT_RIGHT(q); - if (onig_is_in_code_range(q, code)) goto fail; -#endif - } - p += tlen; - - cc_mb_not_success: - MOP_OUT; - break; - - case OP_CCLASS_MIX_NOT: MOP_IN(OP_CCLASS_MIX_NOT); - DATA_ENSURE(1); - if (ONIGENC_IS_MBC_HEAD(encode, s)) { - p += SIZE_BITSET; - goto cclass_mb_not; - } - else { - if (BITSET_AT(((BitSetRef )p), *s) != 0) - goto fail; - - p += SIZE_BITSET; - GET_LENGTH_INC(tlen, p); - p += tlen; - s++; - } - MOP_OUT; - break; - - case OP_CCLASS_NODE: MOP_IN(OP_CCLASS_NODE); - { - OnigCodePoint code; - void *node; - int mb_len; - UChar *ss; - - DATA_ENSURE(1); - GET_POINTER_INC(node, p); - mb_len = enclen(encode, s); - ss = s; - s += mb_len; - DATA_ENSURE(0); - code = ONIGENC_MBC_TO_CODE(encode, ss, s); - if (onig_is_code_in_cc_len(mb_len, code, node) == 0) goto fail; - } - MOP_OUT; - break; - - case OP_ANYCHAR: MOP_IN(OP_ANYCHAR); - DATA_ENSURE(1); - n = enclen(encode, s); - DATA_ENSURE(n); - if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail; - s += n; - MOP_OUT; - break; - - case OP_ANYCHAR_ML: MOP_IN(OP_ANYCHAR_ML); - DATA_ENSURE(1); - n = enclen(encode, s); - DATA_ENSURE(n); - s += n; - MOP_OUT; - break; - - case OP_ANYCHAR_STAR: MOP_IN(OP_ANYCHAR_STAR); - while (DATA_ENSURE_CHECK1) { - STACK_PUSH_ALT(p, s, sprev); - n = enclen(encode, s); - DATA_ENSURE(n); - if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail; - sprev = s; - s += n; - } - MOP_OUT; - break; - - case OP_ANYCHAR_ML_STAR: MOP_IN(OP_ANYCHAR_ML_STAR); - while (DATA_ENSURE_CHECK1) { - STACK_PUSH_ALT(p, s, sprev); - n = enclen(encode, s); - if (n > 1) { - DATA_ENSURE(n); - sprev = s; - s += n; - } - else { - sprev = s; - s++; - } - } - MOP_OUT; - break; - - case OP_ANYCHAR_STAR_PEEK_NEXT: MOP_IN(OP_ANYCHAR_STAR_PEEK_NEXT); - while (DATA_ENSURE_CHECK1) { - if (*p == *s) { - STACK_PUSH_ALT(p + 1, s, sprev); - } - n = enclen(encode, s); - DATA_ENSURE(n); - if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail; - sprev = s; - s += n; - } - p++; - MOP_OUT; - break; - - case OP_ANYCHAR_ML_STAR_PEEK_NEXT:MOP_IN(OP_ANYCHAR_ML_STAR_PEEK_NEXT); - while (DATA_ENSURE_CHECK1) { - if (*p == *s) { - STACK_PUSH_ALT(p + 1, s, sprev); - } - n = enclen(encode, s); - if (n > 1) { - DATA_ENSURE(n); - sprev = s; - s += n; - } - else { - sprev = s; - s++; - } - } - p++; - MOP_OUT; - break; - -#ifdef USE_COMBINATION_EXPLOSION_CHECK - case OP_STATE_CHECK_ANYCHAR_STAR: MOP_IN(OP_STATE_CHECK_ANYCHAR_STAR); - GET_STATE_CHECK_NUM_INC(mem, p); - while (DATA_ENSURE_CHECK1) { - STATE_CHECK_VAL(scv, mem); - if (scv) goto fail; - - STACK_PUSH_ALT_WITH_STATE_CHECK(p, s, sprev, mem); - n = enclen(encode, s); - DATA_ENSURE(n); - if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail; - sprev = s; - s += n; - } - MOP_OUT; - break; - - case OP_STATE_CHECK_ANYCHAR_ML_STAR: - MOP_IN(OP_STATE_CHECK_ANYCHAR_ML_STAR); - - GET_STATE_CHECK_NUM_INC(mem, p); - while (DATA_ENSURE_CHECK1) { - STATE_CHECK_VAL(scv, mem); - if (scv) goto fail; - - STACK_PUSH_ALT_WITH_STATE_CHECK(p, s, sprev, mem); - n = enclen(encode, s); - if (n > 1) { - DATA_ENSURE(n); - sprev = s; - s += n; - } - else { - sprev = s; - s++; - } - } - MOP_OUT; - break; -#endif /* USE_COMBINATION_EXPLOSION_CHECK */ - - case OP_WORD: MOP_IN(OP_WORD); - DATA_ENSURE(1); - if (! ONIGENC_IS_MBC_WORD(encode, s, end)) - goto fail; - - s += enclen(encode, s); - MOP_OUT; - break; - - case OP_NOT_WORD: MOP_IN(OP_NOT_WORD); - DATA_ENSURE(1); - if (ONIGENC_IS_MBC_WORD(encode, s, end)) - goto fail; - - s += enclen(encode, s); - MOP_OUT; - break; - - case OP_WORD_BOUND: MOP_IN(OP_WORD_BOUND); - if (ON_STR_BEGIN(s)) { - DATA_ENSURE(1); - if (! ONIGENC_IS_MBC_WORD(encode, s, end)) - goto fail; - } - else if (ON_STR_END(s)) { - if (! ONIGENC_IS_MBC_WORD(encode, sprev, end)) - goto fail; - } - else { - if (ONIGENC_IS_MBC_WORD(encode, s, end) - == ONIGENC_IS_MBC_WORD(encode, sprev, end)) - goto fail; - } - MOP_OUT; - continue; - break; - - case OP_NOT_WORD_BOUND: MOP_IN(OP_NOT_WORD_BOUND); - if (ON_STR_BEGIN(s)) { - if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_WORD(encode, s, end)) - goto fail; - } - else if (ON_STR_END(s)) { - if (ONIGENC_IS_MBC_WORD(encode, sprev, end)) - goto fail; - } - else { - if (ONIGENC_IS_MBC_WORD(encode, s, end) - != ONIGENC_IS_MBC_WORD(encode, sprev, end)) - goto fail; - } - MOP_OUT; - continue; - break; - -#ifdef USE_WORD_BEGIN_END - case OP_WORD_BEGIN: MOP_IN(OP_WORD_BEGIN); - if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_WORD(encode, s, end)) { - if (ON_STR_BEGIN(s) || !ONIGENC_IS_MBC_WORD(encode, sprev, end)) { - MOP_OUT; - continue; - } - } - goto fail; - break; - - case OP_WORD_END: MOP_IN(OP_WORD_END); - if (!ON_STR_BEGIN(s) && ONIGENC_IS_MBC_WORD(encode, sprev, end)) { - if (ON_STR_END(s) || !ONIGENC_IS_MBC_WORD(encode, s, end)) { - MOP_OUT; - continue; - } - } - goto fail; - break; -#endif - - case OP_BEGIN_BUF: MOP_IN(OP_BEGIN_BUF); - if (! ON_STR_BEGIN(s)) goto fail; - - MOP_OUT; - continue; - break; - - case OP_END_BUF: MOP_IN(OP_END_BUF); - if (! ON_STR_END(s)) goto fail; - - MOP_OUT; - continue; - break; - - case OP_BEGIN_LINE: MOP_IN(OP_BEGIN_LINE); - if (ON_STR_BEGIN(s)) { - if (IS_NOTBOL(msa->options)) goto fail; - MOP_OUT; - continue; - } - else if (ONIGENC_IS_MBC_NEWLINE(encode, sprev, end) && !ON_STR_END(s)) { - MOP_OUT; - continue; - } - goto fail; - break; - - case OP_END_LINE: MOP_IN(OP_END_LINE); - if (ON_STR_END(s)) { -#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE - if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)) { -#endif - if (IS_NOTEOL(msa->options)) goto fail; - MOP_OUT; - continue; -#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE - } -#endif - } - else if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) { - MOP_OUT; - continue; - } -#ifdef USE_CRNL_AS_LINE_TERMINATOR - else if (ONIGENC_IS_MBC_CRNL(encode, s, end)) { - MOP_OUT; - continue; - } -#endif - goto fail; - break; - - case OP_SEMI_END_BUF: MOP_IN(OP_SEMI_END_BUF); - if (ON_STR_END(s)) { -#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE - if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)) { -#endif - if (IS_NOTEOL(msa->options)) goto fail; - MOP_OUT; - continue; -#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE - } -#endif - } - else if (ONIGENC_IS_MBC_NEWLINE(encode, s, end) && - ON_STR_END(s + enclen(encode, s))) { - MOP_OUT; - continue; - } -#ifdef USE_CRNL_AS_LINE_TERMINATOR - else if (ONIGENC_IS_MBC_CRNL(encode, s, end)) { - UChar* ss = s + enclen(encode, s); - ss += enclen(encode, ss); - if (ON_STR_END(ss)) { - MOP_OUT; - continue; - } - } -#endif - goto fail; - break; - - case OP_BEGIN_POSITION: MOP_IN(OP_BEGIN_POSITION); - if (s != msa->start) - goto fail; - - MOP_OUT; - continue; - break; - - case OP_MEMORY_START_PUSH: MOP_IN(OP_MEMORY_START_PUSH); - GET_MEMNUM_INC(mem, p); - STACK_PUSH_MEM_START(mem, s); - MOP_OUT; - continue; - break; - - case OP_MEMORY_START: MOP_IN(OP_MEMORY_START); - GET_MEMNUM_INC(mem, p); - mem_start_stk[mem] = (OnigStackIndex )((void* )s); - MOP_OUT; - continue; - break; - - case OP_MEMORY_END_PUSH: MOP_IN(OP_MEMORY_END_PUSH); - GET_MEMNUM_INC(mem, p); - STACK_PUSH_MEM_END(mem, s); - MOP_OUT; - continue; - break; - - case OP_MEMORY_END: MOP_IN(OP_MEMORY_END); - GET_MEMNUM_INC(mem, p); - mem_end_stk[mem] = (OnigStackIndex )((void* )s); - MOP_OUT; - continue; - break; - -#ifdef USE_SUBEXP_CALL - case OP_MEMORY_END_PUSH_REC: MOP_IN(OP_MEMORY_END_PUSH_REC); - GET_MEMNUM_INC(mem, p); - STACK_GET_MEM_START(mem, stkp); /* should be before push mem-end. */ - STACK_PUSH_MEM_END(mem, s); - mem_start_stk[mem] = GET_STACK_INDEX(stkp); - MOP_OUT; - continue; - break; - - case OP_MEMORY_END_REC: MOP_IN(OP_MEMORY_END_REC); - GET_MEMNUM_INC(mem, p); - mem_end_stk[mem] = (OnigStackIndex )((void* )s); - STACK_GET_MEM_START(mem, stkp); - - if (BIT_STATUS_AT(reg->bt_mem_start, mem)) - mem_start_stk[mem] = GET_STACK_INDEX(stkp); - else - mem_start_stk[mem] = (OnigStackIndex )((void* )stkp->u.mem.pstr); - - STACK_PUSH_MEM_END_MARK(mem); - MOP_OUT; - continue; - break; -#endif - - case OP_BACKREF1: MOP_IN(OP_BACKREF1); - mem = 1; - goto backref; - break; - - case OP_BACKREF2: MOP_IN(OP_BACKREF2); - mem = 2; - goto backref; - break; - - case OP_BACKREFN: MOP_IN(OP_BACKREFN); - GET_MEMNUM_INC(mem, p); - backref: - { - int len; - UChar *pstart, *pend; - - /* if you want to remove following line, - you should check in parse and compile time. */ - if (mem > num_mem) goto fail; - if (mem_end_stk[mem] == INVALID_STACK_INDEX) goto fail; - if (mem_start_stk[mem] == INVALID_STACK_INDEX) goto fail; - - if (BIT_STATUS_AT(reg->bt_mem_start, mem)) - pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr; - else - pstart = (UChar* )((void* )mem_start_stk[mem]); - - pend = (BIT_STATUS_AT(reg->bt_mem_end, mem) - ? STACK_AT(mem_end_stk[mem])->u.mem.pstr - : (UChar* )((void* )mem_end_stk[mem])); - n = (int)(pend - pstart); - DATA_ENSURE(n); - sprev = s; - STRING_CMP(pstart, s, n); - while (sprev + (len = enclen(encode, sprev)) < s) - sprev += len; - - MOP_OUT; - continue; - } - break; - - case OP_BACKREFN_IC: MOP_IN(OP_BACKREFN_IC); - GET_MEMNUM_INC(mem, p); - { - int len; - UChar *pstart, *pend; - - /* if you want to remove following line, - you should check in parse and compile time. */ - if (mem > num_mem) goto fail; - if (mem_end_stk[mem] == INVALID_STACK_INDEX) goto fail; - if (mem_start_stk[mem] == INVALID_STACK_INDEX) goto fail; - - if (BIT_STATUS_AT(reg->bt_mem_start, mem)) - pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr; - else - pstart = (UChar* )((void* )mem_start_stk[mem]); - - pend = (BIT_STATUS_AT(reg->bt_mem_end, mem) - ? STACK_AT(mem_end_stk[mem])->u.mem.pstr - : (UChar* )((void* )mem_end_stk[mem])); - n = (int)(pend - pstart); - DATA_ENSURE(n); - sprev = s; - STRING_CMP_IC(case_fold_flag, pstart, &s, n); - while (sprev + (len = enclen(encode, sprev)) < s) - sprev += len; - - MOP_OUT; - continue; - } - break; - - case OP_BACKREF_MULTI: MOP_IN(OP_BACKREF_MULTI); - { - int len, is_fail; - UChar *pstart, *pend, *swork; - - GET_LENGTH_INC(tlen, p); - for (i = 0; i < tlen; i++) { - GET_MEMNUM_INC(mem, p); - - if (mem_end_stk[mem] == INVALID_STACK_INDEX) continue; - if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue; - - if (BIT_STATUS_AT(reg->bt_mem_start, mem)) - pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr; - else - pstart = (UChar* )((void* )mem_start_stk[mem]); - - pend = (BIT_STATUS_AT(reg->bt_mem_end, mem) - ? STACK_AT(mem_end_stk[mem])->u.mem.pstr - : (UChar* )((void* )mem_end_stk[mem])); - n = (int)(pend - pstart); - DATA_ENSURE(n); - sprev = s; - swork = s; - STRING_CMP_VALUE(pstart, swork, n, is_fail); - if (is_fail) continue; - s = swork; - while (sprev + (len = enclen(encode, sprev)) < s) - sprev += len; - - p += (SIZE_MEMNUM * (tlen - i - 1)); - break; /* success */ - } - if (i == tlen) goto fail; - MOP_OUT; - continue; - } - break; - - case OP_BACKREF_MULTI_IC: MOP_IN(OP_BACKREF_MULTI_IC); - { - int len, is_fail; - UChar *pstart, *pend, *swork; - - GET_LENGTH_INC(tlen, p); - for (i = 0; i < tlen; i++) { - GET_MEMNUM_INC(mem, p); - - if (mem_end_stk[mem] == INVALID_STACK_INDEX) continue; - if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue; - - if (BIT_STATUS_AT(reg->bt_mem_start, mem)) - pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr; - else - pstart = (UChar* )((void* )mem_start_stk[mem]); - - pend = (BIT_STATUS_AT(reg->bt_mem_end, mem) - ? STACK_AT(mem_end_stk[mem])->u.mem.pstr - : (UChar* )((void* )mem_end_stk[mem])); - n = (int)(pend - pstart); - DATA_ENSURE(n); - sprev = s; - swork = s; - STRING_CMP_VALUE_IC(case_fold_flag, pstart, &swork, n, is_fail); - if (is_fail) continue; - s = swork; - while (sprev + (len = enclen(encode, sprev)) < s) - sprev += len; - - p += (SIZE_MEMNUM * (tlen - i - 1)); - break; /* success */ - } - if (i == tlen) goto fail; - MOP_OUT; - continue; - } - break; - -#ifdef USE_BACKREF_WITH_LEVEL - case OP_BACKREF_WITH_LEVEL: - { - int len; - OnigOptionType ic; - LengthType level; - - GET_OPTION_INC(ic, p); - GET_LENGTH_INC(level, p); - GET_LENGTH_INC(tlen, p); - - sprev = s; - if (backref_match_at_nested_level(reg, stk, stk_base, ic - , case_fold_flag, (int )level, (int )tlen, p, &s, end)) { - while (sprev + (len = enclen(encode, sprev)) < s) - sprev += len; - - p += (SIZE_MEMNUM * tlen); - } - else - goto fail; - - MOP_OUT; - continue; - } - - break; -#endif - -#if 0 /* no need: IS_DYNAMIC_OPTION() == 0 */ - case OP_SET_OPTION_PUSH: MOP_IN(OP_SET_OPTION_PUSH); - GET_OPTION_INC(option, p); - STACK_PUSH_ALT(p, s, sprev); - p += SIZE_OP_SET_OPTION + SIZE_OP_FAIL; - MOP_OUT; - continue; - break; - - case OP_SET_OPTION: MOP_IN(OP_SET_OPTION); - GET_OPTION_INC(option, p); - MOP_OUT; - continue; - break; -#endif - - case OP_NULL_CHECK_START: MOP_IN(OP_NULL_CHECK_START); - GET_MEMNUM_INC(mem, p); /* mem: null check id */ - STACK_PUSH_NULL_CHECK_START(mem, s); - MOP_OUT; - continue; - break; - - case OP_NULL_CHECK_END: MOP_IN(OP_NULL_CHECK_END); - { - int isnull; - - GET_MEMNUM_INC(mem, p); /* mem: null check id */ - STACK_NULL_CHECK(isnull, mem, s); - if (isnull) { -#ifdef ONIG_DEBUG_MATCH - fprintf(stderr, "NULL_CHECK_END: skip id:%d, s:%d\n", - (int )mem, (int )s); -#endif - null_check_found: - /* empty loop founded, skip next instruction */ - switch (*p++) { - case OP_JUMP: - case OP_PUSH: - p += SIZE_RELADDR; - break; - case OP_REPEAT_INC: - case OP_REPEAT_INC_NG: - case OP_REPEAT_INC_SG: - case OP_REPEAT_INC_NG_SG: - p += SIZE_MEMNUM; - break; - default: - goto unexpected_bytecode_error; - break; - } - } - } - MOP_OUT; - continue; - break; - -#ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT - case OP_NULL_CHECK_END_MEMST: MOP_IN(OP_NULL_CHECK_END_MEMST); - { - int isnull; - - GET_MEMNUM_INC(mem, p); /* mem: null check id */ - STACK_NULL_CHECK_MEMST(isnull, mem, s, reg); - if (isnull) { -#ifdef ONIG_DEBUG_MATCH - fprintf(stderr, "NULL_CHECK_END_MEMST: skip id:%d, s:%d\n", - (int )mem, (int )s); -#endif - if (isnull == -1) goto fail; - goto null_check_found; - } - } - MOP_OUT; - continue; - break; -#endif - -#ifdef USE_SUBEXP_CALL - case OP_NULL_CHECK_END_MEMST_PUSH: - MOP_IN(OP_NULL_CHECK_END_MEMST_PUSH); - { - int isnull; - - GET_MEMNUM_INC(mem, p); /* mem: null check id */ -#ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT - STACK_NULL_CHECK_MEMST_REC(isnull, mem, s, reg); -#else - STACK_NULL_CHECK_REC(isnull, mem, s); -#endif - if (isnull) { -#ifdef ONIG_DEBUG_MATCH - fprintf(stderr, "NULL_CHECK_END_MEMST_PUSH: skip id:%d, s:%d\n", - (int )mem, (int )s); -#endif - if (isnull == -1) goto fail; - goto null_check_found; - } - else { - STACK_PUSH_NULL_CHECK_END(mem); - } - } - MOP_OUT; - continue; - break; -#endif - - case OP_JUMP: MOP_IN(OP_JUMP); - GET_RELADDR_INC(addr, p); - p += addr; - MOP_OUT; - CHECK_INTERRUPT_IN_MATCH_AT; - continue; - break; - - case OP_PUSH: MOP_IN(OP_PUSH); - GET_RELADDR_INC(addr, p); - STACK_PUSH_ALT(p + addr, s, sprev); - MOP_OUT; - continue; - break; - -#ifdef USE_COMBINATION_EXPLOSION_CHECK - case OP_STATE_CHECK_PUSH: MOP_IN(OP_STATE_CHECK_PUSH); - GET_STATE_CHECK_NUM_INC(mem, p); - STATE_CHECK_VAL(scv, mem); - if (scv) goto fail; - - GET_RELADDR_INC(addr, p); - STACK_PUSH_ALT_WITH_STATE_CHECK(p + addr, s, sprev, mem); - MOP_OUT; - continue; - break; - - case OP_STATE_CHECK_PUSH_OR_JUMP: MOP_IN(OP_STATE_CHECK_PUSH_OR_JUMP); - GET_STATE_CHECK_NUM_INC(mem, p); - GET_RELADDR_INC(addr, p); - STATE_CHECK_VAL(scv, mem); - if (scv) { - p += addr; - } - else { - STACK_PUSH_ALT_WITH_STATE_CHECK(p + addr, s, sprev, mem); - } - MOP_OUT; - continue; - break; - - case OP_STATE_CHECK: MOP_IN(OP_STATE_CHECK); - GET_STATE_CHECK_NUM_INC(mem, p); - STATE_CHECK_VAL(scv, mem); - if (scv) goto fail; - - STACK_PUSH_STATE_CHECK(s, mem); - MOP_OUT; - continue; - break; -#endif /* USE_COMBINATION_EXPLOSION_CHECK */ - - case OP_POP: MOP_IN(OP_POP); - STACK_POP_ONE; - MOP_OUT; - continue; - break; - - case OP_PUSH_OR_JUMP_EXACT1: MOP_IN(OP_PUSH_OR_JUMP_EXACT1); - GET_RELADDR_INC(addr, p); - if (*p == *s && DATA_ENSURE_CHECK1) { - p++; - STACK_PUSH_ALT(p + addr, s, sprev); - MOP_OUT; - continue; - } - p += (addr + 1); - MOP_OUT; - continue; - break; - - case OP_PUSH_IF_PEEK_NEXT: MOP_IN(OP_PUSH_IF_PEEK_NEXT); - GET_RELADDR_INC(addr, p); - if (*p == *s) { - p++; - STACK_PUSH_ALT(p + addr, s, sprev); - MOP_OUT; - continue; - } - p++; - MOP_OUT; - continue; - break; - - case OP_REPEAT: MOP_IN(OP_REPEAT); - { - GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */ - GET_RELADDR_INC(addr, p); - - STACK_ENSURE(1); - repeat_stk[mem] = GET_STACK_INDEX(stk); - STACK_PUSH_REPEAT(mem, p); - - if (reg->repeat_range[mem].lower == 0) { - STACK_PUSH_ALT(p + addr, s, sprev); - } - } - MOP_OUT; - continue; - break; - - case OP_REPEAT_NG: MOP_IN(OP_REPEAT_NG); - { - GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */ - GET_RELADDR_INC(addr, p); - - STACK_ENSURE(1); - repeat_stk[mem] = GET_STACK_INDEX(stk); - STACK_PUSH_REPEAT(mem, p); - - if (reg->repeat_range[mem].lower == 0) { - STACK_PUSH_ALT(p, s, sprev); - p += addr; - } - } - MOP_OUT; - continue; - break; - - case OP_REPEAT_INC: MOP_IN(OP_REPEAT_INC); - GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */ - si = repeat_stk[mem]; - stkp = STACK_AT(si); - - repeat_inc: - stkp->u.repeat.count++; - if (stkp->u.repeat.count >= reg->repeat_range[mem].upper) { - /* end of repeat. Nothing to do. */ - } - else if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) { - STACK_PUSH_ALT(p, s, sprev); - p = STACK_AT(si)->u.repeat.pcode; /* Don't use stkp after PUSH. */ - } - else { - p = stkp->u.repeat.pcode; - } - STACK_PUSH_REPEAT_INC(si); - MOP_OUT; - CHECK_INTERRUPT_IN_MATCH_AT; - continue; - break; - - case OP_REPEAT_INC_SG: MOP_IN(OP_REPEAT_INC_SG); - GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */ - STACK_GET_REPEAT(mem, stkp); - si = GET_STACK_INDEX(stkp); - goto repeat_inc; - break; - - case OP_REPEAT_INC_NG: MOP_IN(OP_REPEAT_INC_NG); - GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */ - si = repeat_stk[mem]; - stkp = STACK_AT(si); - - repeat_inc_ng: - stkp->u.repeat.count++; - if (stkp->u.repeat.count < reg->repeat_range[mem].upper) { - if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) { - UChar* pcode = stkp->u.repeat.pcode; - - STACK_PUSH_REPEAT_INC(si); - STACK_PUSH_ALT(pcode, s, sprev); - } - else { - p = stkp->u.repeat.pcode; - STACK_PUSH_REPEAT_INC(si); - } - } - else if (stkp->u.repeat.count == reg->repeat_range[mem].upper) { - STACK_PUSH_REPEAT_INC(si); - } - MOP_OUT; - CHECK_INTERRUPT_IN_MATCH_AT; - continue; - break; - - case OP_REPEAT_INC_NG_SG: MOP_IN(OP_REPEAT_INC_NG_SG); - GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */ - STACK_GET_REPEAT(mem, stkp); - si = GET_STACK_INDEX(stkp); - goto repeat_inc_ng; - break; - - case OP_PUSH_POS: MOP_IN(OP_PUSH_POS); - STACK_PUSH_POS(s, sprev); - MOP_OUT; - continue; - break; - - case OP_POP_POS: MOP_IN(OP_POP_POS); - { - STACK_POS_END(stkp); - s = stkp->u.state.pstr; - sprev = stkp->u.state.pstr_prev; - } - MOP_OUT; - continue; - break; - - case OP_PUSH_POS_NOT: MOP_IN(OP_PUSH_POS_NOT); - GET_RELADDR_INC(addr, p); - STACK_PUSH_POS_NOT(p + addr, s, sprev); - MOP_OUT; - continue; - break; - - case OP_FAIL_POS: MOP_IN(OP_FAIL_POS); - STACK_POP_TIL_POS_NOT; - goto fail; - break; - - case OP_PUSH_STOP_BT: MOP_IN(OP_PUSH_STOP_BT); - STACK_PUSH_STOP_BT; - MOP_OUT; - continue; - break; - - case OP_POP_STOP_BT: MOP_IN(OP_POP_STOP_BT); - STACK_STOP_BT_END; - MOP_OUT; - continue; - break; - - case OP_LOOK_BEHIND: MOP_IN(OP_LOOK_BEHIND); - GET_LENGTH_INC(tlen, p); - s = (UChar* )ONIGENC_STEP_BACK(encode, str, s, (int )tlen); - if (IS_NULL(s)) goto fail; - sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s); - MOP_OUT; - continue; - break; - - case OP_PUSH_LOOK_BEHIND_NOT: MOP_IN(OP_PUSH_LOOK_BEHIND_NOT); - GET_RELADDR_INC(addr, p); - GET_LENGTH_INC(tlen, p); - q = (UChar* )ONIGENC_STEP_BACK(encode, str, s, (int )tlen); - if (IS_NULL(q)) { - /* too short case -> success. ex. /(?p + addr; - MOP_OUT; - continue; - break; - - case OP_RETURN: MOP_IN(OP_RETURN); - STACK_RETURN(p); - STACK_PUSH_RETURN; - MOP_OUT; - continue; - break; -#endif - - case OP_FINISH: - goto finish; - break; - - fail: - MOP_OUT; - /* fall */ - case OP_FAIL: MOP_IN(OP_FAIL); - STACK_POP; - p = stk->u.state.pcode; - s = stk->u.state.pstr; - sprev = stk->u.state.pstr_prev; - -#ifdef USE_COMBINATION_EXPLOSION_CHECK - if (stk->u.state.state_check != 0) { - stk->type = STK_STATE_CHECK_MARK; - stk++; - } -#endif - - MOP_OUT; - continue; - break; - - default: - goto bytecode_error; - - } /* end of switch */ - sprev = sbegin; - } /* end of while(1) */ - - finish: - STACK_SAVE; - xfree(alloca_base); - return best_len; - -#ifdef ONIG_DEBUG - stack_error: - STACK_SAVE; - xfree(alloca_base); - return ONIGERR_STACK_BUG; -#endif - - bytecode_error: - STACK_SAVE; - xfree(alloca_base); - return ONIGERR_UNDEFINED_BYTECODE; - - unexpected_bytecode_error: - STACK_SAVE; - xfree(alloca_base); - return ONIGERR_UNEXPECTED_BYTECODE; -} - - -static UChar* -slow_search(OnigEncoding enc, UChar* target, UChar* target_end, - const UChar* text, const UChar* text_end, UChar* text_range) -{ - UChar *t, *p, *s, *end; - - end = (UChar* )text_end; - end -= target_end - target - 1; - if (end > text_range) - end = text_range; - - s = (UChar* )text; - - while (s < end) { - if (*s == *target) { - p = s + 1; - t = target + 1; - while (t < target_end) { - if (*t != *p++) - break; - t++; - } - if (t == target_end) - return s; - } - s += enclen(enc, s); - } - - return (UChar* )NULL; -} - -static int -str_lower_case_match(OnigEncoding enc, int case_fold_flag, - const UChar* t, const UChar* tend, - const UChar* p, const UChar* end) -{ - int lowlen; - UChar *q, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN]; - - while (t < tend) { - lowlen = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &p, end, lowbuf); - q = lowbuf; - while (lowlen > 0) { - if (*t++ != *q++) return 0; - lowlen--; - } - } - - return 1; -} - -static UChar* -slow_search_ic(OnigEncoding enc, int case_fold_flag, - UChar* target, UChar* target_end, - const UChar* text, const UChar* text_end, UChar* text_range) -{ - UChar *s, *end; - - end = (UChar* )text_end; - end -= target_end - target - 1; - if (end > text_range) - end = text_range; - - s = (UChar* )text; - - while (s < end) { - if (str_lower_case_match(enc, case_fold_flag, target, target_end, - s, text_end)) - return s; - - s += enclen(enc, s); - } - - return (UChar* )NULL; -} - -static UChar* -slow_search_backward(OnigEncoding enc, UChar* target, UChar* target_end, - const UChar* text, const UChar* adjust_text, - const UChar* text_end, const UChar* text_start) -{ - UChar *t, *p, *s; - - s = (UChar* )text_end; - s -= (target_end - target); - if (s > text_start) - s = (UChar* )text_start; - else - s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, adjust_text, s); - - while (s >= text) { - if (*s == *target) { - p = s + 1; - t = target + 1; - while (t < target_end) { - if (*t != *p++) - break; - t++; - } - if (t == target_end) - return s; - } - s = (UChar* )onigenc_get_prev_char_head(enc, adjust_text, s); - } - - return (UChar* )NULL; -} - -static UChar* -slow_search_backward_ic(OnigEncoding enc, int case_fold_flag, - UChar* target, UChar* target_end, - const UChar* text, const UChar* adjust_text, - const UChar* text_end, const UChar* text_start) -{ - UChar *s; - - s = (UChar* )text_end; - s -= (target_end - target); - if (s > text_start) - s = (UChar* )text_start; - else - s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, adjust_text, s); - - while (s >= text) { - if (str_lower_case_match(enc, case_fold_flag, - target, target_end, s, text_end)) - return s; - - s = (UChar* )onigenc_get_prev_char_head(enc, adjust_text, s); - } - - return (UChar* )NULL; -} - -static UChar* -bm_search_notrev(regex_t* reg, const UChar* target, const UChar* target_end, - const UChar* text, const UChar* text_end, - const UChar* text_range) -{ - const UChar *s, *se, *t, *p, *end; - const UChar *tail; - int skip, tlen1; - -#ifdef ONIG_DEBUG_SEARCH - fprintf(stderr, "bm_search_notrev: text: %d, text_end: %d, text_range: %d\n", - (int )text, (int )text_end, (int )text_range); -#endif - - tail = target_end - 1; - tlen1 = (int)(tail - target); - end = text_range; - if (end + tlen1 > text_end) - end = text_end - tlen1; - - s = text; - - if (IS_NULL(reg->int_map)) { - while (s < end) { - p = se = s + tlen1; - t = tail; - while (*p == *t) { - if (t == target) return (UChar* )s; - p--; t--; - } - skip = reg->map[*se]; - t = s; - do { - s += enclen(reg->enc, s); - } while ((s - t) < skip && s < end); - } - } - else { - while (s < end) { - p = se = s + tlen1; - t = tail; - while (*p == *t) { - if (t == target) return (UChar* )s; - p--; t--; - } - skip = reg->int_map[*se]; - t = s; - do { - s += enclen(reg->enc, s); - } while ((s - t) < skip && s < end); - } - } - - return (UChar* )NULL; -} - -static UChar* -bm_search(regex_t* reg, const UChar* target, const UChar* target_end, - const UChar* text, const UChar* text_end, const UChar* text_range) -{ - const UChar *s, *t, *p, *end; - const UChar *tail; - - end = text_range + (target_end - target) - 1; - if (end > text_end) - end = text_end; - - tail = target_end - 1; - s = text + (target_end - target) - 1; - if (IS_NULL(reg->int_map)) { - while (s < end) { - p = s; - t = tail; - while (*p == *t) { - if (t == target) return (UChar* )p; - p--; t--; - } - s += reg->map[*s]; - } - } - else { /* see int_map[] */ - while (s < end) { - p = s; - t = tail; - while (*p == *t) { - if (t == target) return (UChar* )p; - p--; t--; - } - s += reg->int_map[*s]; - } - } - return (UChar* )NULL; -} - -static int -set_bm_backward_skip(UChar* s, UChar* end, OnigEncoding enc ARG_UNUSED, - int** skip) - -{ - int i, len; - - if (IS_NULL(*skip)) { - *skip = (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE); - if (IS_NULL(*skip)) return ONIGERR_MEMORY; - } - - len = (int)(end - s); - for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) - (*skip)[i] = len; - - for (i = len - 1; i > 0; i--) - (*skip)[s[i]] = i; - - return 0; -} - -static UChar* -bm_search_backward(regex_t* reg, const UChar* target, const UChar* target_end, - const UChar* text, const UChar* adjust_text, - const UChar* text_end, const UChar* text_start) -{ - const UChar *s, *t, *p; - - s = text_end - (target_end - target); - if (text_start < s) - s = text_start; - else - s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, adjust_text, s); - - while (s >= text) { - p = s; - t = target; - while (t < target_end && *p == *t) { - p++; t++; - } - if (t == target_end) - return (UChar* )s; - - s -= reg->int_map_backward[*s]; - s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, adjust_text, s); - } - - return (UChar* )NULL; -} - -static UChar* -map_search(OnigEncoding enc, UChar map[], - const UChar* text, const UChar* text_range) -{ - const UChar *s = text; - - while (s < text_range) { - if (map[*s]) return (UChar* )s; - - s += enclen(enc, s); - } - return (UChar* )NULL; -} - -static UChar* -map_search_backward(OnigEncoding enc, UChar map[], - const UChar* text, const UChar* adjust_text, - const UChar* text_start) -{ - const UChar *s = text_start; - - while (s >= text) { - if (map[*s]) return (UChar* )s; - - s = onigenc_get_prev_char_head(enc, adjust_text, s); - } - return (UChar* )NULL; -} - -extern int -onig_match(regex_t* reg, const UChar* str, const UChar* end, const UChar* at, OnigRegion* region, - OnigOptionType option) -{ - int r; - UChar *prev; - OnigMatchArg msa; - -#if defined(USE_RECOMPILE_API) && defined(USE_MULTI_THREAD_SYSTEM) - start: - THREAD_ATOMIC_START; - if (ONIG_STATE(reg) >= ONIG_STATE_NORMAL) { - ONIG_STATE_INC(reg); - if (IS_NOT_NULL(reg->chain) && ONIG_STATE(reg) == ONIG_STATE_NORMAL) { - onig_chain_reduce(reg); - ONIG_STATE_INC(reg); - } - } - else { - int n; - - THREAD_ATOMIC_END; - n = 0; - while (ONIG_STATE(reg) < ONIG_STATE_NORMAL) { - if (++n > THREAD_PASS_LIMIT_COUNT) - return ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT; - THREAD_PASS; - } - goto start; - } - THREAD_ATOMIC_END; -#endif /* USE_RECOMPILE_API && USE_MULTI_THREAD_SYSTEM */ - - MATCH_ARG_INIT(msa, option, region, at); -#ifdef USE_COMBINATION_EXPLOSION_CHECK - { - int offset = at - str; - STATE_CHECK_BUFF_INIT(msa, end - str, offset, reg->num_comb_exp_check); - } -#endif - - if (region -#ifdef USE_POSIX_API_REGION_OPTION - && !IS_POSIX_REGION(option) -#endif - ) { - r = onig_region_resize_clear(region, reg->num_mem + 1); - } - else - r = 0; - - if (r == 0) { - prev = (UChar* )onigenc_get_prev_char_head(reg->enc, str, at); - r = match_at(reg, str, end, -#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE - end, -#endif - at, prev, &msa); - } - - MATCH_ARG_FREE(msa); - ONIG_STATE_DEC_THREAD(reg); - return r; -} - -static int -forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s, - UChar* range, UChar** low, UChar** high, UChar** low_prev) -{ - UChar *p, *pprev = (UChar* )NULL; - -#ifdef ONIG_DEBUG_SEARCH - fprintf(stderr, "forward_search_range: str: %d, end: %d, s: %d, range: %d\n", - (int )str, (int )end, (int )s, (int )range); -#endif - - p = s; - if (reg->dmin > 0) { - if (ONIGENC_IS_SINGLEBYTE(reg->enc)) { - p += reg->dmin; - } - else { - UChar *q = p + reg->dmin; - while (p < q) p += enclen(reg->enc, p); - } - } - - retry: - switch (reg->optimize) { - case ONIG_OPTIMIZE_EXACT: - p = slow_search(reg->enc, reg->exact, reg->exact_end, p, end, range); - break; - case ONIG_OPTIMIZE_EXACT_IC: - p = slow_search_ic(reg->enc, reg->case_fold_flag, - reg->exact, reg->exact_end, p, end, range); - break; - - case ONIG_OPTIMIZE_EXACT_BM: - p = bm_search(reg, reg->exact, reg->exact_end, p, end, range); - break; - - case ONIG_OPTIMIZE_EXACT_BM_NOT_REV: - p = bm_search_notrev(reg, reg->exact, reg->exact_end, p, end, range); - break; - - case ONIG_OPTIMIZE_MAP: - p = map_search(reg->enc, reg->map, p, range); - break; - } - - if (p && p < range) { - if (p - reg->dmin < s) { - retry_gate: - pprev = p; - p += enclen(reg->enc, p); - goto retry; - } - - if (reg->sub_anchor) { - UChar* prev; - - switch (reg->sub_anchor) { - case ANCHOR_BEGIN_LINE: - if (!ON_STR_BEGIN(p)) { - prev = onigenc_get_prev_char_head(reg->enc, - (pprev ? pprev : str), p); - if (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end)) - goto retry_gate; - } - break; - - case ANCHOR_END_LINE: - if (ON_STR_END(p)) { -#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE - prev = (UChar* )onigenc_get_prev_char_head(reg->enc, - (pprev ? pprev : str), p); - if (prev && ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end)) - goto retry_gate; -#endif - } - else if (! ONIGENC_IS_MBC_NEWLINE(reg->enc, p, end) -#ifdef USE_CRNL_AS_LINE_TERMINATOR - && ! ONIGENC_IS_MBC_CRNL(reg->enc, p, end) -#endif - ) - goto retry_gate; - break; - } - } - - if (reg->dmax == 0) { - *low = p; - if (low_prev) { - if (*low > s) - *low_prev = onigenc_get_prev_char_head(reg->enc, s, p); - else - *low_prev = onigenc_get_prev_char_head(reg->enc, - (pprev ? pprev : str), p); - } - } - else { - if (reg->dmax != ONIG_INFINITE_DISTANCE) { - *low = p - reg->dmax; - if (*low > s) { - *low = onigenc_get_right_adjust_char_head_with_prev(reg->enc, s, - *low, (const UChar** )low_prev); - if (low_prev && IS_NULL(*low_prev)) - *low_prev = onigenc_get_prev_char_head(reg->enc, - (pprev ? pprev : s), *low); - } - else { - if (low_prev) - *low_prev = onigenc_get_prev_char_head(reg->enc, - (pprev ? pprev : str), *low); - } - } - } - /* no needs to adjust *high, *high is used as range check only */ - *high = p - reg->dmin; - -#ifdef ONIG_DEBUG_SEARCH - fprintf(stderr, - "forward_search_range success: low: %d, high: %d, dmin: %d, dmax: %d\n", - (int )(*low - str), (int )(*high - str), reg->dmin, reg->dmax); -#endif - return 1; /* success */ - } - - return 0; /* fail */ -} - -static int set_bm_backward_skip P_((UChar* s, UChar* end, OnigEncoding enc, - int** skip)); - -#define BM_BACKWARD_SEARCH_LENGTH_THRESHOLD 100 - -static int -backward_search_range(regex_t* reg, const UChar* str, const UChar* end, - UChar* s, const UChar* range, UChar* adjrange, - UChar** low, UChar** high) -{ - int r; - UChar *p; - - range += reg->dmin; - p = s; - - retry: - switch (reg->optimize) { - case ONIG_OPTIMIZE_EXACT: - exact_method: - p = slow_search_backward(reg->enc, reg->exact, reg->exact_end, - range, adjrange, end, p); - break; - - case ONIG_OPTIMIZE_EXACT_IC: - p = slow_search_backward_ic(reg->enc, reg->case_fold_flag, - reg->exact, reg->exact_end, - range, adjrange, end, p); - break; - - case ONIG_OPTIMIZE_EXACT_BM: - case ONIG_OPTIMIZE_EXACT_BM_NOT_REV: - if (IS_NULL(reg->int_map_backward)) { - if (s - range < BM_BACKWARD_SEARCH_LENGTH_THRESHOLD) - goto exact_method; - - r = set_bm_backward_skip(reg->exact, reg->exact_end, reg->enc, - &(reg->int_map_backward)); - if (r) return r; - } - p = bm_search_backward(reg, reg->exact, reg->exact_end, range, adjrange, - end, p); - break; - - case ONIG_OPTIMIZE_MAP: - p = map_search_backward(reg->enc, reg->map, range, adjrange, p); - break; - } - - if (p) { - if (reg->sub_anchor) { - UChar* prev; - - switch (reg->sub_anchor) { - case ANCHOR_BEGIN_LINE: - if (!ON_STR_BEGIN(p)) { - prev = onigenc_get_prev_char_head(reg->enc, str, p); - if (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end)) { - p = prev; - goto retry; - } - } - break; - - case ANCHOR_END_LINE: - if (ON_STR_END(p)) { -#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE - prev = onigenc_get_prev_char_head(reg->enc, adjrange, p); - if (IS_NULL(prev)) goto fail; - if (ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end)) { - p = prev; - goto retry; - } -#endif - } - else if (! ONIGENC_IS_MBC_NEWLINE(reg->enc, p, end) -#ifdef USE_CRNL_AS_LINE_TERMINATOR - && ! ONIGENC_IS_MBC_CRNL(reg->enc, p, end) -#endif - ) { - p = onigenc_get_prev_char_head(reg->enc, adjrange, p); - if (IS_NULL(p)) goto fail; - goto retry; - } - break; - } - } - - /* no needs to adjust *high, *high is used as range check only */ - if (reg->dmax != ONIG_INFINITE_DISTANCE) { - *low = p - reg->dmax; - *high = p - reg->dmin; - *high = onigenc_get_right_adjust_char_head(reg->enc, adjrange, *high); - } - -#ifdef ONIG_DEBUG_SEARCH - fprintf(stderr, "backward_search_range: low: %d, high: %d\n", - (int )(*low - str), (int )(*high - str)); -#endif - return 1; /* success */ - } - - fail: -#ifdef ONIG_DEBUG_SEARCH - fprintf(stderr, "backward_search_range: fail.\n"); -#endif - return 0; /* fail */ -} - - -extern int -onig_search(regex_t* reg, const UChar* str, const UChar* end, - const UChar* start, const UChar* range, OnigRegion* region, OnigOptionType option) -{ - int r; - UChar *s, *prev; - OnigMatchArg msa; - const UChar *orig_start = start; -#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE - const UChar *orig_range = range; -#endif - -#if defined(USE_RECOMPILE_API) && defined(USE_MULTI_THREAD_SYSTEM) - start: - THREAD_ATOMIC_START; - if (ONIG_STATE(reg) >= ONIG_STATE_NORMAL) { - ONIG_STATE_INC(reg); - if (IS_NOT_NULL(reg->chain) && ONIG_STATE(reg) == ONIG_STATE_NORMAL) { - onig_chain_reduce(reg); - ONIG_STATE_INC(reg); - } - } - else { - int n; - - THREAD_ATOMIC_END; - n = 0; - while (ONIG_STATE(reg) < ONIG_STATE_NORMAL) { - if (++n > THREAD_PASS_LIMIT_COUNT) - return ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT; - THREAD_PASS; - } - goto start; - } - THREAD_ATOMIC_END; -#endif /* USE_RECOMPILE_API && USE_MULTI_THREAD_SYSTEM */ - -#ifdef ONIG_DEBUG_SEARCH - fprintf(stderr, - "onig_search (entry point): str: %d, end: %d, start: %d, range: %d\n", - (int )str, (int )(end - str), (int )(start - str), (int )(range - str)); -#endif - - if (region -#ifdef USE_POSIX_API_REGION_OPTION - && !IS_POSIX_REGION(option) -#endif - ) { - r = onig_region_resize_clear(region, reg->num_mem + 1); - if (r) goto finish_no_msa; - } - - if (start > end || start < str) goto mismatch_no_msa; - - -#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE -#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE -#define MATCH_AND_RETURN_CHECK(upper_range) \ - r = match_at(reg, str, end, (upper_range), s, prev, &msa); \ - if (r != ONIG_MISMATCH) {\ - if (r >= 0) {\ - if (! IS_FIND_LONGEST(reg->options)) {\ - goto match;\ - }\ - }\ - else goto finish; /* error */ \ - } -#else -#define MATCH_AND_RETURN_CHECK(upper_range) \ - r = match_at(reg, str, end, (upper_range), s, prev, &msa); \ - if (r != ONIG_MISMATCH) {\ - if (r >= 0) {\ - goto match;\ - }\ - else goto finish; /* error */ \ - } -#endif /* USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE */ -#else -#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE -#define MATCH_AND_RETURN_CHECK(none) \ - r = match_at(reg, str, end, s, prev, &msa);\ - if (r != ONIG_MISMATCH) {\ - if (r >= 0) {\ - if (! IS_FIND_LONGEST(reg->options)) {\ - goto match;\ - }\ - }\ - else goto finish; /* error */ \ - } -#else -#define MATCH_AND_RETURN_CHECK(none) \ - r = match_at(reg, str, end, s, prev, &msa);\ - if (r != ONIG_MISMATCH) {\ - if (r >= 0) {\ - goto match;\ - }\ - else goto finish; /* error */ \ - } -#endif /* USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE */ -#endif /* USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE */ - - - /* anchor optimize: resume search range */ - if (reg->anchor != 0 && str < end) { - UChar *min_semi_end, *max_semi_end; - - if (reg->anchor & ANCHOR_BEGIN_POSITION) { - /* search start-position only */ - begin_position: - if (range > start) - range = start + 1; - else - range = start; - } - else if (reg->anchor & ANCHOR_BEGIN_BUF) { - /* search str-position only */ - if (range > start) { - if (start != str) goto mismatch_no_msa; - range = str + 1; - } - else { - if (range <= str) { - start = str; - range = str; - } - else - goto mismatch_no_msa; - } - } - else if (reg->anchor & ANCHOR_END_BUF) { - min_semi_end = max_semi_end = (UChar* )end; - - end_buf: - if ((OnigDistance )(max_semi_end - str) < reg->anchor_dmin) - goto mismatch_no_msa; - - if (range > start) { - if ((OnigDistance )(min_semi_end - start) > reg->anchor_dmax) { - start = min_semi_end - reg->anchor_dmax; - if (start < end) - start = onigenc_get_right_adjust_char_head(reg->enc, str, start); - else { /* match with empty at end */ - start = onigenc_get_prev_char_head(reg->enc, str, end); - } - } - if ((OnigDistance )(max_semi_end - (range - 1)) < reg->anchor_dmin) { - range = max_semi_end - reg->anchor_dmin + 1; - } - - if (start >= range) goto mismatch_no_msa; - } - else { - if ((OnigDistance )(min_semi_end - range) > reg->anchor_dmax) { - range = min_semi_end - reg->anchor_dmax; - } - if ((OnigDistance )(max_semi_end - start) < reg->anchor_dmin) { - start = max_semi_end - reg->anchor_dmin; - start = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, start); - } - if (range > start) goto mismatch_no_msa; - } - } - else if (reg->anchor & ANCHOR_SEMI_END_BUF) { - UChar* pre_end = ONIGENC_STEP_BACK(reg->enc, str, end, 1); - - max_semi_end = (UChar* )end; - if (ONIGENC_IS_MBC_NEWLINE(reg->enc, pre_end, end)) { - min_semi_end = pre_end; - -#ifdef USE_CRNL_AS_LINE_TERMINATOR - pre_end = ONIGENC_STEP_BACK(reg->enc, str, pre_end, 1); - if (IS_NOT_NULL(pre_end) && - ONIGENC_IS_MBC_CRNL(reg->enc, pre_end, end)) { - min_semi_end = pre_end; - } -#endif - if (min_semi_end > str && start <= min_semi_end) { - goto end_buf; - } - } - else { - min_semi_end = (UChar* )end; - goto end_buf; - } - } - else if ((reg->anchor & ANCHOR_ANYCHAR_STAR_ML)) { - goto begin_position; - } - } - else if (str == end) { /* empty string */ - static const UChar* address_for_empty_string = (UChar* )""; - -#ifdef ONIG_DEBUG_SEARCH - fprintf(stderr, "onig_search: empty string.\n"); -#endif - - if (reg->threshold_len == 0) { - start = end = str = address_for_empty_string; - s = (UChar* )start; - prev = (UChar* )NULL; - - MATCH_ARG_INIT(msa, option, region, start); -#ifdef USE_COMBINATION_EXPLOSION_CHECK - msa.state_check_buff = (void* )0; - msa.state_check_buff_size = 0; /* NO NEED, for valgrind */ -#endif - MATCH_AND_RETURN_CHECK(end); - goto mismatch; - } - goto mismatch_no_msa; - } - -#ifdef ONIG_DEBUG_SEARCH - fprintf(stderr, "onig_search(apply anchor): end: %d, start: %d, range: %d\n", - (int )(end - str), (int )(start - str), (int )(range - str)); -#endif - - MATCH_ARG_INIT(msa, option, region, orig_start); -#ifdef USE_COMBINATION_EXPLOSION_CHECK - { - int offset = (MIN(start, range) - str); - STATE_CHECK_BUFF_INIT(msa, end - str, offset, reg->num_comb_exp_check); - } -#endif - - s = (UChar* )start; - if (range > start) { /* forward search */ - if (s > str) - prev = onigenc_get_prev_char_head(reg->enc, str, s); - else - prev = (UChar* )NULL; - - if (reg->optimize != ONIG_OPTIMIZE_NONE) { - UChar *sch_range, *low, *high, *low_prev; - - sch_range = (UChar* )range; - if (reg->dmax != 0) { - if (reg->dmax == ONIG_INFINITE_DISTANCE) - sch_range = (UChar* )end; - else { - sch_range += reg->dmax; - if (sch_range > end) sch_range = (UChar* )end; - } - } - - if ((end - start) < reg->threshold_len) - goto mismatch; - - if (reg->dmax != ONIG_INFINITE_DISTANCE) { - do { - if (! forward_search_range(reg, str, end, s, sch_range, - &low, &high, &low_prev)) goto mismatch; - if (s < low) { - s = low; - prev = low_prev; - } - while (s <= high) { - MATCH_AND_RETURN_CHECK(orig_range); - prev = s; - s += enclen(reg->enc, s); - } - } while (s < range); - goto mismatch; - } - else { /* check only. */ - if (! forward_search_range(reg, str, end, s, sch_range, - &low, &high, (UChar** )NULL)) goto mismatch; - - if ((reg->anchor & ANCHOR_ANYCHAR_STAR) != 0) { - do { - MATCH_AND_RETURN_CHECK(orig_range); - prev = s; - s += enclen(reg->enc, s); - - while (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end) && s < range) { - prev = s; - s += enclen(reg->enc, s); - } - } while (s < range); - goto mismatch; - } - } - } - - do { - MATCH_AND_RETURN_CHECK(orig_range); - prev = s; - s += enclen(reg->enc, s); - } while (s < range); - - if (s == range) { /* because empty match with /$/. */ - MATCH_AND_RETURN_CHECK(orig_range); - } - } - else { /* backward search */ -#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE - if (orig_start < end) - orig_start += enclen(reg->enc, orig_start); /* is upper range */ -#endif - - if (reg->optimize != ONIG_OPTIMIZE_NONE) { - UChar *low, *high, *adjrange, *sch_start; - - if (range < end) - adjrange = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, range); - else - adjrange = (UChar* )end; - - if (reg->dmax != ONIG_INFINITE_DISTANCE && - (end - range) >= reg->threshold_len) { - do { - sch_start = s + reg->dmax; - if (sch_start > end) sch_start = (UChar* )end; - if (backward_search_range(reg, str, end, sch_start, range, adjrange, - &low, &high) <= 0) - goto mismatch; - - if (s > high) - s = high; - - while (s >= low) { - prev = onigenc_get_prev_char_head(reg->enc, str, s); - MATCH_AND_RETURN_CHECK(orig_start); - s = prev; - } - } while (s >= range); - goto mismatch; - } - else { /* check only. */ - if ((end - range) < reg->threshold_len) goto mismatch; - - sch_start = s; - if (reg->dmax != 0) { - if (reg->dmax == ONIG_INFINITE_DISTANCE) - sch_start = (UChar* )end; - else { - sch_start += reg->dmax; - if (sch_start > end) sch_start = (UChar* )end; - else - sch_start = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, - start, sch_start); - } - } - if (backward_search_range(reg, str, end, sch_start, range, adjrange, - &low, &high) <= 0) goto mismatch; - } - } - - do { - prev = onigenc_get_prev_char_head(reg->enc, str, s); - MATCH_AND_RETURN_CHECK(orig_start); - s = prev; - } while (s >= range); - } - - mismatch: -#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE - if (IS_FIND_LONGEST(reg->options)) { - if (msa.best_len >= 0) { - s = msa.best_s; - goto match; - } - } -#endif - r = ONIG_MISMATCH; - - finish: - MATCH_ARG_FREE(msa); - ONIG_STATE_DEC_THREAD(reg); - - /* If result is mismatch and no FIND_NOT_EMPTY option, - then the region is not setted in match_at(). */ - if (IS_FIND_NOT_EMPTY(reg->options) && region -#ifdef USE_POSIX_API_REGION_OPTION - && !IS_POSIX_REGION(option) -#endif - ) { - onig_region_clear(region); - } - -#ifdef ONIG_DEBUG - if (r != ONIG_MISMATCH) - fprintf(stderr, "onig_search: error %d\n", r); -#endif - return r; - - mismatch_no_msa: - r = ONIG_MISMATCH; - finish_no_msa: - ONIG_STATE_DEC_THREAD(reg); -#ifdef ONIG_DEBUG - if (r != ONIG_MISMATCH) - fprintf(stderr, "onig_search: error %d\n", r); -#endif - return r; - - match: - ONIG_STATE_DEC_THREAD(reg); - MATCH_ARG_FREE(msa); - return (int)(s - str); -} - -extern OnigEncoding -onig_get_encoding(regex_t* reg) -{ - return reg->enc; -} - -extern OnigOptionType -onig_get_options(regex_t* reg) -{ - return reg->options; -} - -extern OnigCaseFoldType -onig_get_case_fold_flag(regex_t* reg) -{ - return reg->case_fold_flag; -} - -extern OnigSyntaxType* -onig_get_syntax(regex_t* reg) -{ - return reg->syntax; -} - -extern int -onig_number_of_captures(regex_t* reg) -{ - return reg->num_mem; -} - -extern int -onig_number_of_capture_histories(regex_t* reg) -{ -#ifdef USE_CAPTURE_HISTORY - int i, n; - - n = 0; - for (i = 0; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) { - if (BIT_STATUS_AT(reg->capture_history, i) != 0) - n++; - } - return n; -#else - return 0; -#endif -} - -extern void -onig_copy_encoding(OnigEncoding to, OnigEncoding from) -{ - *to = *from; -} - diff --git a/MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/reggnu.c b/MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/reggnu.c deleted file mode 100644 index 89cb4d6030..0000000000 --- a/MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/reggnu.c +++ /dev/null @@ -1,169 +0,0 @@ -/********************************************************************** - reggnu.c - Oniguruma (regular expression library) -**********************************************************************/ -/*- - * Copyright (c) 2002-2008 K.Kosako - * All rights reserved. - * - * Copyright (c) 2015, Hewlett Packard Enterprise Development, L.P.
- * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include "regint.h" - -#ifndef ONIGGNU_H -#include "oniggnu.h" -#endif - -extern void -re_free_registers(OnigRegion* r) -{ - /* 0: don't free self */ - onig_region_free(r, 0); -} - -extern int -re_adjust_startpos(regex_t* reg, const char* string, int size, - int startpos, int range) -{ - if (startpos > 0 && ONIGENC_MBC_MAXLEN(reg->enc) != 1 && startpos < size) { - UChar *p; - UChar *s = (UChar* )string + startpos; - - if (range > 0) { - p = onigenc_get_right_adjust_char_head(reg->enc, (UChar* )string, s); - } - else { - p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, (UChar* )string, s); - } - return (int)(p - (UChar* )string); - } - - return startpos; -} - -extern int -re_match(regex_t* reg, const char* str, int size, int pos, - struct re_registers* regs) -{ - return onig_match(reg, (UChar* )str, (UChar* )(str + size), - (UChar* )(str + pos), regs, ONIG_OPTION_NONE); -} - -extern int -re_search(regex_t* bufp, const char* string, int size, int startpos, int range, - struct re_registers* regs) -{ - return onig_search(bufp, (UChar* )string, (UChar* )(string + size), - (UChar* )(string + startpos), - (UChar* )(string + startpos + range), - regs, ONIG_OPTION_NONE); -} - -extern int -re_compile_pattern(const char* pattern, int size, regex_t* reg, char* ebuf) -{ - int r; - OnigErrorInfo einfo; - - r = onig_compile(reg, (UChar* )pattern, (UChar* )(pattern + size), &einfo); - if (r != ONIG_NORMAL) { - if (IS_NOT_NULL(ebuf)) - (void )onig_error_code_to_str((UChar* )ebuf, r, &einfo); - } - - return r; -} - -#ifdef USE_RECOMPILE_API -extern int -re_recompile_pattern(const char* pattern, int size, regex_t* reg, char* ebuf) -{ - int r; - OnigErrorInfo einfo; - OnigEncoding enc; - - /* I think encoding and options should be arguments of this function. - But this is adapted to present re.c. (2002/11/29) - */ - enc = OnigEncDefaultCharEncoding; - - r = onig_recompile(reg, (UChar* )pattern, (UChar* )(pattern + size), - reg->options, enc, OnigDefaultSyntax, &einfo); - if (r != ONIG_NORMAL) { - if (IS_NOT_NULL(ebuf)) - (void )onig_error_code_to_str((UChar* )ebuf, r, &einfo); - } - return r; -} -#endif - -extern void -re_free_pattern(regex_t* reg) -{ - onig_free(reg); -} - -extern int -re_alloc_pattern(regex_t** reg) -{ - *reg = (regex_t* )xmalloc(sizeof(regex_t)); - if (IS_NULL(*reg)) return ONIGERR_MEMORY; - - return onig_reg_init(*reg, ONIG_OPTION_DEFAULT, - ONIGENC_CASE_FOLD_DEFAULT, - OnigEncDefaultCharEncoding, - OnigDefaultSyntax); -} - -extern void -re_set_casetable(const char* table) -{ - onigenc_set_default_caseconv_table((UChar* )table); -} - -extern void -re_mbcinit(int mb_code) -{ - OnigEncoding enc; - - switch (mb_code) { - case RE_MBCTYPE_ASCII: - enc = ONIG_ENCODING_ASCII; - break; - case RE_MBCTYPE_EUC: - enc = ONIG_ENCODING_EUC_JP; - break; - case RE_MBCTYPE_SJIS: - enc = ONIG_ENCODING_SJIS; - break; - case RE_MBCTYPE_UTF8: - enc = ONIG_ENCODING_UTF8; - break; - default: - return ; - break; - } - - onigenc_set_default_encoding(enc); -} diff --git a/MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/regint.h b/MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/regint.h deleted file mode 100644 index 2db3b709e8..0000000000 --- a/MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/regint.h +++ /dev/null @@ -1,820 +0,0 @@ -#ifndef REGINT_H -#define REGINT_H -/********************************************************************** - regint.h - Oniguruma (regular expression library) -**********************************************************************/ -/*- - * Copyright (c) 2002-2013 K.Kosako - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -/* for debug */ -/* #define ONIG_DEBUG_PARSE_TREE */ -/* #define ONIG_DEBUG_COMPILE */ -/* #define ONIG_DEBUG_SEARCH */ -/* #define ONIG_DEBUG_MATCH */ -/* #define ONIG_DONT_OPTIMIZE */ - -/* for byte-code statistical data. */ -/* #define ONIG_DEBUG_STATISTICS */ - -#if defined(ONIG_DEBUG_PARSE_TREE) || defined(ONIG_DEBUG_MATCH) || \ - defined(ONIG_DEBUG_SEARCH) || defined(ONIG_DEBUG_COMPILE) || \ - defined(ONIG_DEBUG_STATISTICS) -#ifndef ONIG_DEBUG -#define ONIG_DEBUG -#endif -#endif - -#if defined(__i386) || defined(__i386__) || defined(_M_IX86) || \ - (defined(__ppc__) && defined(__APPLE__)) || \ - defined(__x86_64) || defined(__x86_64__) || \ - defined(__mc68020__) -#define PLATFORM_UNALIGNED_WORD_ACCESS -#endif - -/* config */ -/* spec. config */ -#define USE_NAMED_GROUP -#define USE_SUBEXP_CALL -#define USE_BACKREF_WITH_LEVEL /* \k, \k */ -#define USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT /* /(?:()|())*\2/ */ -#define USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE /* /\n$/ =~ "\n" */ -#define USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR -/* #define USE_RECOMPILE_API */ -/* !!! moved to regenc.h. */ /* #define USE_CRNL_AS_LINE_TERMINATOR */ - -/* internal config */ -#define USE_PARSE_TREE_NODE_RECYCLE -#define USE_OP_PUSH_OR_JUMP_EXACT -#define USE_QTFR_PEEK_NEXT -#define USE_ST_LIBRARY -#define USE_SHARED_CCLASS_TABLE - -#define INIT_MATCH_STACK_SIZE 160 -#define DEFAULT_MATCH_STACK_LIMIT_SIZE 0 /* unlimited */ - -#if defined(__GNUC__) -# define ARG_UNUSED __attribute__ ((unused)) -#else -# define ARG_UNUSED -#endif - -/* */ -/* escape other system UChar definition */ -//#include "config.h" -#ifdef ONIG_ESCAPE_UCHAR_COLLISION -#undef ONIG_ESCAPE_UCHAR_COLLISION -#endif - -#define USE_WORD_BEGIN_END /* "\<", "\>" */ -#define USE_CAPTURE_HISTORY -#define USE_VARIABLE_META_CHARS -#define USE_POSIX_API_REGION_OPTION -#define USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE -/* #define USE_COMBINATION_EXPLOSION_CHECK */ /* (X*)* */ - -/* #define USE_MULTI_THREAD_SYSTEM */ -#define THREAD_SYSTEM_INIT /* depend on thread system */ -#define THREAD_SYSTEM_END /* depend on thread system */ -#define THREAD_ATOMIC_START /* depend on thread system */ -#define THREAD_ATOMIC_END /* depend on thread system */ -#define THREAD_PASS /* depend on thread system */ -#define xmalloc malloc -#define xrealloc realloc -#define xcalloc calloc -#define xfree free - -#define CHECK_INTERRUPT_IN_MATCH_AT - -#define st_init_table onig_st_init_table -#define st_init_table_with_size onig_st_init_table_with_size -#define st_init_numtable onig_st_init_numtable -#define st_init_numtable_with_size onig_st_init_numtable_with_size -#define st_init_strtable onig_st_init_strtable -#define st_init_strtable_with_size onig_st_init_strtable_with_size -#define st_delete onig_st_delete -#define st_delete_safe onig_st_delete_safe -#define st_insert onig_st_insert -#define st_lookup onig_st_lookup -#define st_foreach onig_st_foreach -#define st_add_direct onig_st_add_direct -#define st_free_table onig_st_free_table -#define st_cleanup_safe onig_st_cleanup_safe -#define st_copy onig_st_copy -#define st_nothing_key_clone onig_st_nothing_key_clone -#define st_nothing_key_free onig_st_nothing_key_free -/* */ -#define onig_st_is_member st_is_member - -#define STATE_CHECK_STRING_THRESHOLD_LEN 7 -#define STATE_CHECK_BUFF_MAX_SIZE 0x4000 - -#define THREAD_PASS_LIMIT_COUNT 8 -//#define xmemset memset -//#define xmemcpy memcpy -//#define xmemmove memmove - -#if defined(_WIN32) && !defined(__GNUC__) -#define xalloca _alloca -#define xvsnprintf _vsnprintf -#else -#define xalloca alloca -#define xvsnprintf vsnprintf -#endif - - -#if defined(USE_RECOMPILE_API) && defined(USE_MULTI_THREAD_SYSTEM) -#define ONIG_STATE_INC(reg) (reg)->state++ -#define ONIG_STATE_DEC(reg) (reg)->state-- - -#define ONIG_STATE_INC_THREAD(reg) do {\ - THREAD_ATOMIC_START;\ - (reg)->state++;\ - THREAD_ATOMIC_END;\ -} while(0) -#define ONIG_STATE_DEC_THREAD(reg) do {\ - THREAD_ATOMIC_START;\ - (reg)->state--;\ - THREAD_ATOMIC_END;\ -} while(0) -#else -#define ONIG_STATE_INC(reg) /* Nothing */ -#define ONIG_STATE_DEC(reg) /* Nothing */ -#define ONIG_STATE_INC_THREAD(reg) /* Nothing */ -#define ONIG_STATE_DEC_THREAD(reg) /* Nothing */ -#endif /* USE_RECOMPILE_API && USE_MULTI_THREAD_SYSTEM */ - -#if 0 -#ifdef HAVE_STDLIB_H -#include -#endif - -#if defined(HAVE_ALLOCA_H) && !defined(__GNUC__) -#include -#endif - -#ifdef HAVE_STRING_H -# include -#else -# include -#endif - -#include -#ifdef HAVE_SYS_TYPES_H -#ifndef __BORLANDC__ -#include -#endif -#endif - -#ifdef __BORLANDC__ -#include -#endif - -#ifdef ONIG_DEBUG -# include -#endif -#endif - -#include "regenc.h" - -#ifdef MIN -#undef MIN -#endif -#ifdef MAX -#undef MAX -#endif -#define MIN(a,b) (((a)>(b))?(b):(a)) -#define MAX(a,b) (((a)<(b))?(b):(a)) - -#define IS_NULL(p) (((void*)(p)) == (void*)0) -#define IS_NOT_NULL(p) (((void*)(p)) != (void*)0) -#define CHECK_NULL_RETURN(p) if (IS_NULL(p)) return NULL -#define CHECK_NULL_RETURN_MEMERR(p) if (IS_NULL(p)) return ONIGERR_MEMORY -#define NULL_UCHARP ((UChar* )0) - -#ifdef PLATFORM_UNALIGNED_WORD_ACCESS - -#define PLATFORM_GET_INC(val,p,type) do{\ - val = *(type* )p;\ - (p) += sizeof(type);\ -} while(0) - -#else - -#define PLATFORM_GET_INC(val,p,type) do{\ - xmemcpy(&val, (p), sizeof(type));\ - (p) += sizeof(type);\ -} while(0) - -/* sizeof(OnigCodePoint) */ -#define WORD_ALIGNMENT_SIZE SIZEOF_LONG - -#define GET_ALIGNMENT_PAD_SIZE(addr,pad_size) do {\ - (pad_size) = WORD_ALIGNMENT_SIZE \ - - ((unsigned int )(addr) % WORD_ALIGNMENT_SIZE);\ - if ((pad_size) == WORD_ALIGNMENT_SIZE) (pad_size) = 0;\ -} while (0) - -#define ALIGNMENT_RIGHT(addr) do {\ - (addr) += (WORD_ALIGNMENT_SIZE - 1);\ - (addr) -= ((unsigned int )(addr) % WORD_ALIGNMENT_SIZE);\ -} while (0) - -#endif /* PLATFORM_UNALIGNED_WORD_ACCESS */ - -/* stack pop level */ -#define STACK_POP_LEVEL_FREE 0 -#define STACK_POP_LEVEL_MEM_START 1 -#define STACK_POP_LEVEL_ALL 2 - -/* optimize flags */ -#define ONIG_OPTIMIZE_NONE 0 -#define ONIG_OPTIMIZE_EXACT 1 /* Slow Search */ -#define ONIG_OPTIMIZE_EXACT_BM 2 /* Boyer Moore Search */ -#define ONIG_OPTIMIZE_EXACT_BM_NOT_REV 3 /* BM (but not simple match) */ -#define ONIG_OPTIMIZE_EXACT_IC 4 /* Slow Search (ignore case) */ -#define ONIG_OPTIMIZE_MAP 5 /* char map */ - -/* bit status */ -typedef unsigned int BitStatusType; - -#define BIT_STATUS_BITS_NUM (sizeof(BitStatusType) * 8) -#define BIT_STATUS_CLEAR(stats) (stats) = 0 -#define BIT_STATUS_ON_ALL(stats) (stats) = ~((BitStatusType )0) -#define BIT_STATUS_AT(stats,n) \ - ((n) < (int )BIT_STATUS_BITS_NUM ? ((stats) & (1 << n)) : ((stats) & 1)) - -#define BIT_STATUS_ON_AT(stats,n) do {\ - if ((n) < (int )BIT_STATUS_BITS_NUM) \ - (stats) |= (1 << (n));\ - else\ - (stats) |= 1;\ -} while (0) - -#define BIT_STATUS_ON_AT_SIMPLE(stats,n) do {\ - if ((n) < (int )BIT_STATUS_BITS_NUM)\ - (stats) |= (1 << (n));\ -} while (0) - - -#define INT_MAX_LIMIT ((1UL << (SIZEOF_INT * 8 - 1)) - 1) - -#define DIGITVAL(code) ((code) - '0') -#define ODIGITVAL(code) DIGITVAL(code) -#define XDIGITVAL(enc,code) \ - (ONIGENC_IS_CODE_DIGIT(enc,code) ? DIGITVAL(code) \ - : (ONIGENC_IS_CODE_UPPER(enc,code) ? (code) - 'A' + 10 : (code) - 'a' + 10)) - -#define IS_SINGLELINE(option) ((option) & ONIG_OPTION_SINGLELINE) -#define IS_MULTILINE(option) ((option) & ONIG_OPTION_MULTILINE) -#define IS_IGNORECASE(option) ((option) & ONIG_OPTION_IGNORECASE) -#define IS_EXTEND(option) ((option) & ONIG_OPTION_EXTEND) -#define IS_FIND_LONGEST(option) ((option) & ONIG_OPTION_FIND_LONGEST) -#define IS_FIND_NOT_EMPTY(option) ((option) & ONIG_OPTION_FIND_NOT_EMPTY) -#define IS_FIND_CONDITION(option) ((option) & \ - (ONIG_OPTION_FIND_LONGEST | ONIG_OPTION_FIND_NOT_EMPTY)) -#define IS_NOTBOL(option) ((option) & ONIG_OPTION_NOTBOL) -#define IS_NOTEOL(option) ((option) & ONIG_OPTION_NOTEOL) -#define IS_POSIX_REGION(option) ((option) & ONIG_OPTION_POSIX_REGION) - -/* OP_SET_OPTION is required for these options. -#define IS_DYNAMIC_OPTION(option) \ - (((option) & (ONIG_OPTION_MULTILINE | ONIG_OPTION_IGNORECASE)) != 0) -*/ -/* ignore-case and multibyte status are included in compiled code. */ -#define IS_DYNAMIC_OPTION(option) 0 - -#define DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag) \ - ((case_fold_flag) & ~INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) - -#define REPEAT_INFINITE -1 -#define IS_REPEAT_INFINITE(n) ((n) == REPEAT_INFINITE) - -/* bitset */ -#define BITS_PER_BYTE 8 -#define SINGLE_BYTE_SIZE (1 << BITS_PER_BYTE) -#define BITS_IN_ROOM (sizeof(Bits) * BITS_PER_BYTE) -#define BITSET_SIZE (SINGLE_BYTE_SIZE / BITS_IN_ROOM) - -#ifdef PLATFORM_UNALIGNED_WORD_ACCESS -typedef unsigned int Bits; -#else -typedef unsigned char Bits; -#endif -typedef Bits BitSet[BITSET_SIZE]; -typedef Bits* BitSetRef; - -#define SIZE_BITSET sizeof(BitSet) - -#define BITSET_CLEAR(bs) do {\ - int i;\ - for (i = 0; i < (int )BITSET_SIZE; i++) { (bs)[i] = 0; } \ -} while (0) - -#define BS_ROOM(bs,pos) (bs)[pos / BITS_IN_ROOM] -#define BS_BIT(pos) (1 << (pos % BITS_IN_ROOM)) - -#define BITSET_AT(bs, pos) (BS_ROOM(bs,pos) & BS_BIT(pos)) -#define BITSET_SET_BIT(bs, pos) BS_ROOM(bs,pos) |= BS_BIT(pos) -#define BITSET_CLEAR_BIT(bs, pos) BS_ROOM(bs,pos) &= ~(BS_BIT(pos)) -#define BITSET_INVERT_BIT(bs, pos) BS_ROOM(bs,pos) ^= BS_BIT(pos) - -/* bytes buffer */ -typedef struct _BBuf { - UChar* p; - unsigned int used; - unsigned int alloc; -} BBuf; - -#define BBUF_INIT(buf,size) onig_bbuf_init((BBuf* )(buf), (size)) - -#define BBUF_SIZE_INC(buf,inc) do{\ - (buf)->alloc += (inc);\ - (buf)->p = (UChar* )xrealloc((buf)->p, (buf)->alloc);\ - if (IS_NULL((buf)->p)) return(ONIGERR_MEMORY);\ -} while (0) - -#define BBUF_EXPAND(buf,low) do{\ - unsigned int OldSize_ = (buf)->alloc * sizeof((buf)->p[0]);\ - do { (buf)->alloc *= 2; } while ((buf)->alloc < (unsigned int )low);\ - (buf)->p = (UChar* )xrealloc((buf)->p, (buf)->alloc, OldSize_);\ - if (IS_NULL((buf)->p)) return(ONIGERR_MEMORY);\ -} while (0) - -#define BBUF_ENSURE_SIZE(buf,size) do{\ - unsigned int new_alloc = (buf)->alloc;\ - while (new_alloc < (unsigned int )(size)) { new_alloc *= 2; }\ - if ((buf)->alloc != new_alloc) {\ - (buf)->p = (UChar* )xrealloc((buf)->p, new_alloc, (buf)->alloc);\ - if (IS_NULL((buf)->p)) return(ONIGERR_MEMORY);\ - (buf)->alloc = new_alloc;\ - }\ -} while (0) - -#define BBUF_WRITE(buf,pos,bytes,n) do{\ - int used = (pos) + (n);\ - if ((buf)->alloc < (unsigned int )used) BBUF_EXPAND((buf),used);\ - xmemcpy((buf)->p + (pos), (bytes), (n));\ - if ((buf)->used < (unsigned int )used) (buf)->used = used;\ -} while (0) - -#define BBUF_WRITE1(buf,pos,byte) do{\ - int used = (pos) + 1;\ - if ((buf)->alloc < (unsigned int )used) BBUF_EXPAND((buf),used);\ - (buf)->p[(pos)] = (byte);\ - if ((buf)->used < (unsigned int )used) (buf)->used = used;\ -} while (0) - -#define BBUF_ADD(buf,bytes,n) BBUF_WRITE((buf),(buf)->used,(bytes),(n)) -#define BBUF_ADD1(buf,byte) BBUF_WRITE1((buf),(buf)->used,(byte)) -#define BBUF_GET_ADD_ADDRESS(buf) ((buf)->p + (buf)->used) -#define BBUF_GET_OFFSET_POS(buf) ((buf)->used) - -/* from < to */ -#define BBUF_MOVE_RIGHT(buf,from,to,n) do {\ - if ((unsigned int )((to)+(n)) > (buf)->alloc) BBUF_EXPAND((buf),(to) + (n));\ - xmemmove((buf)->p + (to), (buf)->p + (from), (n));\ - if ((unsigned int )((to)+(n)) > (buf)->used) (buf)->used = (to) + (n);\ -} while (0) - -/* from > to */ -#define BBUF_MOVE_LEFT(buf,from,to,n) do {\ - xmemmove((buf)->p + (to), (buf)->p + (from), (n));\ -} while (0) - -/* from > to */ -#define BBUF_MOVE_LEFT_REDUCE(buf,from,to) do {\ - xmemmove((buf)->p + (to), (buf)->p + (from), (buf)->used - (from));\ - (buf)->used -= (from - to);\ -} while (0) - -#define BBUF_INSERT(buf,pos,bytes,n) do {\ - if (pos >= (buf)->used) {\ - BBUF_WRITE(buf,pos,bytes,n);\ - }\ - else {\ - BBUF_MOVE_RIGHT((buf),(pos),(pos) + (n),((buf)->used - (pos)));\ - xmemcpy((buf)->p + (pos), (bytes), (n));\ - }\ -} while (0) - -#define BBUF_GET_BYTE(buf, pos) (buf)->p[(pos)] - - -#define ANCHOR_BEGIN_BUF (1<<0) -#define ANCHOR_BEGIN_LINE (1<<1) -#define ANCHOR_BEGIN_POSITION (1<<2) -#define ANCHOR_END_BUF (1<<3) -#define ANCHOR_SEMI_END_BUF (1<<4) -#define ANCHOR_END_LINE (1<<5) - -#define ANCHOR_WORD_BOUND (1<<6) -#define ANCHOR_NOT_WORD_BOUND (1<<7) -#define ANCHOR_WORD_BEGIN (1<<8) -#define ANCHOR_WORD_END (1<<9) -#define ANCHOR_PREC_READ (1<<10) -#define ANCHOR_PREC_READ_NOT (1<<11) -#define ANCHOR_LOOK_BEHIND (1<<12) -#define ANCHOR_LOOK_BEHIND_NOT (1<<13) - -#define ANCHOR_ANYCHAR_STAR (1<<14) /* ".*" optimize info */ -#define ANCHOR_ANYCHAR_STAR_ML (1<<15) /* ".*" optimize info (multi-line) */ - -/* operation code */ -enum OpCode { - OP_FINISH = 0, /* matching process terminator (no more alternative) */ - OP_END = 1, /* pattern code terminator (success end) */ - - OP_EXACT1 = 2, /* single byte, N = 1 */ - OP_EXACT2, /* single byte, N = 2 */ - OP_EXACT3, /* single byte, N = 3 */ - OP_EXACT4, /* single byte, N = 4 */ - OP_EXACT5, /* single byte, N = 5 */ - OP_EXACTN, /* single byte */ - OP_EXACTMB2N1, /* mb-length = 2 N = 1 */ - OP_EXACTMB2N2, /* mb-length = 2 N = 2 */ - OP_EXACTMB2N3, /* mb-length = 2 N = 3 */ - OP_EXACTMB2N, /* mb-length = 2 */ - OP_EXACTMB3N, /* mb-length = 3 */ - OP_EXACTMBN, /* other length */ - - OP_EXACT1_IC, /* single byte, N = 1, ignore case */ - OP_EXACTN_IC, /* single byte, ignore case */ - - OP_CCLASS, - OP_CCLASS_MB, - OP_CCLASS_MIX, - OP_CCLASS_NOT, - OP_CCLASS_MB_NOT, - OP_CCLASS_MIX_NOT, - OP_CCLASS_NODE, /* pointer to CClassNode node */ - - OP_ANYCHAR, /* "." */ - OP_ANYCHAR_ML, /* "." multi-line */ - OP_ANYCHAR_STAR, /* ".*" */ - OP_ANYCHAR_ML_STAR, /* ".*" multi-line */ - OP_ANYCHAR_STAR_PEEK_NEXT, - OP_ANYCHAR_ML_STAR_PEEK_NEXT, - - OP_WORD, - OP_NOT_WORD, - OP_WORD_BOUND, - OP_NOT_WORD_BOUND, - OP_WORD_BEGIN, - OP_WORD_END, - - OP_BEGIN_BUF, - OP_END_BUF, - OP_BEGIN_LINE, - OP_END_LINE, - OP_SEMI_END_BUF, - OP_BEGIN_POSITION, - - OP_BACKREF1, - OP_BACKREF2, - OP_BACKREFN, - OP_BACKREFN_IC, - OP_BACKREF_MULTI, - OP_BACKREF_MULTI_IC, - OP_BACKREF_WITH_LEVEL, /* \k, \k */ - - OP_MEMORY_START, - OP_MEMORY_START_PUSH, /* push back-tracker to stack */ - OP_MEMORY_END_PUSH, /* push back-tracker to stack */ - OP_MEMORY_END_PUSH_REC, /* push back-tracker to stack */ - OP_MEMORY_END, - OP_MEMORY_END_REC, /* push marker to stack */ - - OP_FAIL, /* pop stack and move */ - OP_JUMP, - OP_PUSH, - OP_POP, - OP_PUSH_OR_JUMP_EXACT1, /* if match exact then push, else jump. */ - OP_PUSH_IF_PEEK_NEXT, /* if match exact then push, else none. */ - OP_REPEAT, /* {n,m} */ - OP_REPEAT_NG, /* {n,m}? (non greedy) */ - OP_REPEAT_INC, - OP_REPEAT_INC_NG, /* non greedy */ - OP_REPEAT_INC_SG, /* search and get in stack */ - OP_REPEAT_INC_NG_SG, /* search and get in stack (non greedy) */ - OP_NULL_CHECK_START, /* null loop checker start */ - OP_NULL_CHECK_END, /* null loop checker end */ - OP_NULL_CHECK_END_MEMST, /* null loop checker end (with capture status) */ - OP_NULL_CHECK_END_MEMST_PUSH, /* with capture status and push check-end */ - - OP_PUSH_POS, /* (?=...) start */ - OP_POP_POS, /* (?=...) end */ - OP_PUSH_POS_NOT, /* (?!...) start */ - OP_FAIL_POS, /* (?!...) end */ - OP_PUSH_STOP_BT, /* (?>...) start */ - OP_POP_STOP_BT, /* (?>...) end */ - OP_LOOK_BEHIND, /* (?<=...) start (no needs end opcode) */ - OP_PUSH_LOOK_BEHIND_NOT, /* (? */ - OP_RETURN, - - OP_STATE_CHECK_PUSH, /* combination explosion check and push */ - OP_STATE_CHECK_PUSH_OR_JUMP, /* check ok -> push, else jump */ - OP_STATE_CHECK, /* check only */ - OP_STATE_CHECK_ANYCHAR_STAR, - OP_STATE_CHECK_ANYCHAR_ML_STAR, - - /* no need: IS_DYNAMIC_OPTION() == 0 */ - OP_SET_OPTION_PUSH, /* set option and push recover option */ - OP_SET_OPTION /* set option */ -}; - -typedef int RelAddrType; -typedef int AbsAddrType; -typedef int LengthType; -typedef int RepeatNumType; -typedef short int MemNumType; -typedef short int StateCheckNumType; -typedef void* PointerType; - -#define SIZE_OPCODE 1 -#define SIZE_RELADDR sizeof(RelAddrType) -#define SIZE_ABSADDR sizeof(AbsAddrType) -#define SIZE_LENGTH sizeof(LengthType) -#define SIZE_MEMNUM sizeof(MemNumType) -#define SIZE_STATE_CHECK_NUM sizeof(StateCheckNumType) -#define SIZE_REPEATNUM sizeof(RepeatNumType) -#define SIZE_OPTION sizeof(OnigOptionType) -#define SIZE_CODE_POINT sizeof(OnigCodePoint) -#define SIZE_POINTER sizeof(PointerType) - - -#define GET_RELADDR_INC(addr,p) PLATFORM_GET_INC(addr, p, RelAddrType) -#define GET_ABSADDR_INC(addr,p) PLATFORM_GET_INC(addr, p, AbsAddrType) -#define GET_LENGTH_INC(len,p) PLATFORM_GET_INC(len, p, LengthType) -#define GET_MEMNUM_INC(num,p) PLATFORM_GET_INC(num, p, MemNumType) -#define GET_REPEATNUM_INC(num,p) PLATFORM_GET_INC(num, p, RepeatNumType) -#define GET_OPTION_INC(option,p) PLATFORM_GET_INC(option, p, OnigOptionType) -#define GET_POINTER_INC(ptr,p) PLATFORM_GET_INC(ptr, p, PointerType) -#define GET_STATE_CHECK_NUM_INC(num,p) PLATFORM_GET_INC(num, p, StateCheckNumType) - -/* code point's address must be aligned address. */ -#define GET_CODE_POINT(code,p) code = *((OnigCodePoint* )(p)) -#define GET_BYTE_INC(byte,p) do{\ - byte = *(p);\ - (p)++;\ -} while(0) - - -/* op-code + arg size */ -#define SIZE_OP_ANYCHAR_STAR SIZE_OPCODE -#define SIZE_OP_ANYCHAR_STAR_PEEK_NEXT (SIZE_OPCODE + 1) -#define SIZE_OP_JUMP (SIZE_OPCODE + SIZE_RELADDR) -#define SIZE_OP_PUSH (SIZE_OPCODE + SIZE_RELADDR) -#define SIZE_OP_POP SIZE_OPCODE -#define SIZE_OP_PUSH_OR_JUMP_EXACT1 (SIZE_OPCODE + SIZE_RELADDR + 1) -#define SIZE_OP_PUSH_IF_PEEK_NEXT (SIZE_OPCODE + SIZE_RELADDR + 1) -#define SIZE_OP_REPEAT_INC (SIZE_OPCODE + SIZE_MEMNUM) -#define SIZE_OP_REPEAT_INC_NG (SIZE_OPCODE + SIZE_MEMNUM) -#define SIZE_OP_PUSH_POS SIZE_OPCODE -#define SIZE_OP_PUSH_POS_NOT (SIZE_OPCODE + SIZE_RELADDR) -#define SIZE_OP_POP_POS SIZE_OPCODE -#define SIZE_OP_FAIL_POS SIZE_OPCODE -#define SIZE_OP_SET_OPTION (SIZE_OPCODE + SIZE_OPTION) -#define SIZE_OP_SET_OPTION_PUSH (SIZE_OPCODE + SIZE_OPTION) -#define SIZE_OP_FAIL SIZE_OPCODE -#define SIZE_OP_MEMORY_START (SIZE_OPCODE + SIZE_MEMNUM) -#define SIZE_OP_MEMORY_START_PUSH (SIZE_OPCODE + SIZE_MEMNUM) -#define SIZE_OP_MEMORY_END_PUSH (SIZE_OPCODE + SIZE_MEMNUM) -#define SIZE_OP_MEMORY_END_PUSH_REC (SIZE_OPCODE + SIZE_MEMNUM) -#define SIZE_OP_MEMORY_END (SIZE_OPCODE + SIZE_MEMNUM) -#define SIZE_OP_MEMORY_END_REC (SIZE_OPCODE + SIZE_MEMNUM) -#define SIZE_OP_PUSH_STOP_BT SIZE_OPCODE -#define SIZE_OP_POP_STOP_BT SIZE_OPCODE -#define SIZE_OP_NULL_CHECK_START (SIZE_OPCODE + SIZE_MEMNUM) -#define SIZE_OP_NULL_CHECK_END (SIZE_OPCODE + SIZE_MEMNUM) -#define SIZE_OP_LOOK_BEHIND (SIZE_OPCODE + SIZE_LENGTH) -#define SIZE_OP_PUSH_LOOK_BEHIND_NOT (SIZE_OPCODE + SIZE_RELADDR + SIZE_LENGTH) -#define SIZE_OP_FAIL_LOOK_BEHIND_NOT SIZE_OPCODE -#define SIZE_OP_CALL (SIZE_OPCODE + SIZE_ABSADDR) -#define SIZE_OP_RETURN SIZE_OPCODE - -#ifdef USE_COMBINATION_EXPLOSION_CHECK -#define SIZE_OP_STATE_CHECK (SIZE_OPCODE + SIZE_STATE_CHECK_NUM) -#define SIZE_OP_STATE_CHECK_PUSH (SIZE_OPCODE + SIZE_STATE_CHECK_NUM + SIZE_RELADDR) -#define SIZE_OP_STATE_CHECK_PUSH_OR_JUMP (SIZE_OPCODE + SIZE_STATE_CHECK_NUM + SIZE_RELADDR) -#define SIZE_OP_STATE_CHECK_ANYCHAR_STAR (SIZE_OPCODE + SIZE_STATE_CHECK_NUM) -#endif - -#define MC_ESC(syn) (syn)->meta_char_table.esc -#define MC_ANYCHAR(syn) (syn)->meta_char_table.anychar -#define MC_ANYTIME(syn) (syn)->meta_char_table.anytime -#define MC_ZERO_OR_ONE_TIME(syn) (syn)->meta_char_table.zero_or_one_time -#define MC_ONE_OR_MORE_TIME(syn) (syn)->meta_char_table.one_or_more_time -#define MC_ANYCHAR_ANYTIME(syn) (syn)->meta_char_table.anychar_anytime - -#define IS_MC_ESC_CODE(code, syn) \ - ((code) == MC_ESC(syn) && \ - !IS_SYNTAX_OP2((syn), ONIG_SYN_OP2_INEFFECTIVE_ESCAPE)) - - -#define SYN_POSIX_COMMON_OP \ - ( ONIG_SYN_OP_DOT_ANYCHAR | ONIG_SYN_OP_POSIX_BRACKET | \ - ONIG_SYN_OP_DECIMAL_BACKREF | \ - ONIG_SYN_OP_BRACKET_CC | ONIG_SYN_OP_ASTERISK_ZERO_INF | \ - ONIG_SYN_OP_LINE_ANCHOR | \ - ONIG_SYN_OP_ESC_CONTROL_CHARS ) - -#define SYN_GNU_REGEX_OP \ - ( ONIG_SYN_OP_DOT_ANYCHAR | ONIG_SYN_OP_BRACKET_CC | \ - ONIG_SYN_OP_POSIX_BRACKET | ONIG_SYN_OP_DECIMAL_BACKREF | \ - ONIG_SYN_OP_BRACE_INTERVAL | ONIG_SYN_OP_LPAREN_SUBEXP | \ - ONIG_SYN_OP_VBAR_ALT | \ - ONIG_SYN_OP_ASTERISK_ZERO_INF | ONIG_SYN_OP_PLUS_ONE_INF | \ - ONIG_SYN_OP_QMARK_ZERO_ONE | \ - ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR | ONIG_SYN_OP_ESC_CAPITAL_G_BEGIN_ANCHOR | \ - ONIG_SYN_OP_ESC_W_WORD | \ - ONIG_SYN_OP_ESC_B_WORD_BOUND | ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END | \ - ONIG_SYN_OP_ESC_S_WHITE_SPACE | ONIG_SYN_OP_ESC_D_DIGIT | \ - ONIG_SYN_OP_LINE_ANCHOR ) - -#define SYN_GNU_REGEX_BV \ - ( ONIG_SYN_CONTEXT_INDEP_ANCHORS | ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS | \ - ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS | ONIG_SYN_ALLOW_INVALID_INTERVAL | \ - ONIG_SYN_BACKSLASH_ESCAPE_IN_CC | ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC ) - - -#define NCCLASS_FLAGS(cc) ((cc)->flags) -#define NCCLASS_FLAG_SET(cc,flag) (NCCLASS_FLAGS(cc) |= (flag)) -#define NCCLASS_FLAG_CLEAR(cc,flag) (NCCLASS_FLAGS(cc) &= ~(flag)) -#define IS_NCCLASS_FLAG_ON(cc,flag) ((NCCLASS_FLAGS(cc) & (flag)) != 0) - -/* cclass node */ -#define FLAG_NCCLASS_NOT (1<<0) -#define FLAG_NCCLASS_SHARE (1<<1) - -#define NCCLASS_SET_NOT(nd) NCCLASS_FLAG_SET(nd, FLAG_NCCLASS_NOT) -#define NCCLASS_SET_SHARE(nd) NCCLASS_FLAG_SET(nd, FLAG_NCCLASS_SHARE) -#define NCCLASS_CLEAR_NOT(nd) NCCLASS_FLAG_CLEAR(nd, FLAG_NCCLASS_NOT) -#define IS_NCCLASS_NOT(nd) IS_NCCLASS_FLAG_ON(nd, FLAG_NCCLASS_NOT) -#define IS_NCCLASS_SHARE(nd) IS_NCCLASS_FLAG_ON(nd, FLAG_NCCLASS_SHARE) - -typedef struct { - int type; - /* struct _Node* next; */ - /* unsigned int flags; */ -} NodeBase; - -typedef struct { - NodeBase base; - unsigned int flags; - BitSet bs; - BBuf* mbuf; /* multi-byte info or NULL */ -} CClassNode; - -typedef long OnigStackIndex; - -typedef struct _OnigStackType { - unsigned int type; - union { - struct { - UChar *pcode; /* byte code position */ - UChar *pstr; /* string position */ - UChar *pstr_prev; /* previous char position of pstr */ -#ifdef USE_COMBINATION_EXPLOSION_CHECK - unsigned int state_check; -#endif - } state; - struct { - int count; /* for OP_REPEAT_INC, OP_REPEAT_INC_NG */ - UChar *pcode; /* byte code position (head of repeated target) */ - int num; /* repeat id */ - } repeat; - struct { - OnigStackIndex si; /* index of stack */ - } repeat_inc; - struct { - int num; /* memory num */ - UChar *pstr; /* start/end position */ - /* Following information is setted, if this stack type is MEM-START */ - OnigStackIndex start; /* prev. info (for backtrack "(...)*" ) */ - OnigStackIndex end; /* prev. info (for backtrack "(...)*" ) */ - } mem; - struct { - int num; /* null check id */ - UChar *pstr; /* start position */ - } null_check; -#ifdef USE_SUBEXP_CALL - struct { - UChar *ret_addr; /* byte code position */ - int num; /* null check id */ - UChar *pstr; /* string position */ - } call_frame; -#endif - } u; -} OnigStackType; - -typedef struct { - void* stack_p; - int stack_n; - OnigOptionType options; - OnigRegion* region; - const UChar* start; /* search start position (for \G: BEGIN_POSITION) */ -#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE - int best_len; /* for ONIG_OPTION_FIND_LONGEST */ - UChar* best_s; -#endif -#ifdef USE_COMBINATION_EXPLOSION_CHECK - void* state_check_buff; - int state_check_buff_size; -#endif -} OnigMatchArg; - - -#define IS_CODE_SB_WORD(enc,code) \ - (ONIGENC_IS_CODE_ASCII(code) && ONIGENC_IS_CODE_WORD(enc,code)) - -typedef struct OnigEndCallListItem { - struct OnigEndCallListItem* next; - void (*func)(void); -} OnigEndCallListItemType; - -extern void onig_add_end_call(void (*func)(void)); - - -#ifdef ONIG_DEBUG - -typedef struct { - short int opcode; - char* name; - short int arg_type; -} OnigOpInfoType; - -extern OnigOpInfoType OnigOpInfo[]; - - -extern void onig_print_compiled_byte_code P_((FILE* f, UChar* bp, UChar** nextp, OnigEncoding enc)); - -#ifdef ONIG_DEBUG_STATISTICS -extern void onig_statistics_init P_((void)); -extern void onig_print_statistics P_((FILE* f)); -#endif -#endif - -extern UChar* onig_error_code_to_format P_((int code)); -extern void onig_snprintf_with_pattern PV_((UChar buf[], int bufsize, OnigEncoding enc, UChar* pat, UChar* pat_end, const UChar *fmt, ...)); -extern int onig_bbuf_init P_((BBuf* buf, int size)); -extern int onig_compile P_((regex_t* reg, const UChar* pattern, const UChar* pattern_end, OnigErrorInfo* einfo)); -extern void onig_chain_reduce P_((regex_t* reg)); -extern void onig_chain_link_add P_((regex_t* to, regex_t* add)); -extern void onig_transfer P_((regex_t* to, regex_t* from)); -extern int onig_is_code_in_cc P_((OnigEncoding enc, OnigCodePoint code, CClassNode* cc)); -extern int onig_is_code_in_cc_len P_((int enclen, OnigCodePoint code, CClassNode* cc)); - -/* strend hash */ -typedef void hash_table_type; -typedef unsigned long hash_data_type; - -extern hash_table_type* onig_st_init_strend_table_with_size P_((int size)); -extern int onig_st_lookup_strend P_((hash_table_type* table, const UChar* str_key, const UChar* end_key, hash_data_type *value)); -extern int onig_st_insert_strend P_((hash_table_type* table, const UChar* str_key, const UChar* end_key, hash_data_type value)); - -/* encoding property management */ -#define PROPERTY_LIST_ADD_PROP(Name, CR) \ - r = onigenc_property_list_add_property((UChar* )Name, CR,\ - &PropertyNameTable, &PropertyList, &PropertyListNum,\ - &PropertyListSize);\ - if (r != 0) goto end - -#define PROPERTY_LIST_INIT_CHECK \ - if (PropertyInited == 0) {\ - int r = onigenc_property_list_init(init_property_list);\ - if (r != 0) return r;\ - } - -extern int onigenc_property_list_add_property P_((UChar* name, const OnigCodePoint* prop, hash_table_type **table, const OnigCodePoint*** plist, int *pnum, int *psize)); - -typedef int (*ONIGENC_INIT_PROPERTY_LIST_FUNC_TYPE)(void); - -extern int onigenc_property_list_init P_((ONIGENC_INIT_PROPERTY_LIST_FUNC_TYPE)); - -#endif /* REGINT_H */ diff --git a/MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/regparse.c b/MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/regparse.c deleted file mode 100644 index d7e645bf9a..0000000000 --- a/MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/regparse.c +++ /dev/null @@ -1,5556 +0,0 @@ -/********************************************************************** - regparse.c - Oniguruma (regular expression library) -**********************************************************************/ -/*- - * Copyright (c) 2002-2008 K.Kosako - * All rights reserved. - * - * Copyright (c) 2015, Hewlett Packard Enterprise Development, L.P.
- * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include "regparse.h" -#include "st.h" - -#define WARN_BUFSIZE 256 - -#define CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS - - -OnigSyntaxType OnigSyntaxRuby = { - (( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY | - ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 | - ONIG_SYN_OP_ESC_X_BRACE_HEX8 | ONIG_SYN_OP_ESC_CONTROL_CHARS | - ONIG_SYN_OP_ESC_C_CONTROL ) - & ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END ) - , ( ONIG_SYN_OP2_QMARK_GROUP_EFFECT | - ONIG_SYN_OP2_OPTION_RUBY | - ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP | ONIG_SYN_OP2_ESC_K_NAMED_BACKREF | - ONIG_SYN_OP2_ESC_G_SUBEXP_CALL | - ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY | - ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT | - ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT | - ONIG_SYN_OP2_CCLASS_SET_OP | ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL | - ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META | ONIG_SYN_OP2_ESC_V_VTAB | - ONIG_SYN_OP2_ESC_H_XDIGIT ) - , ( SYN_GNU_REGEX_BV | - ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV | - ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND | - ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP | - ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME | - ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY | - ONIG_SYN_WARN_CC_OP_NOT_ESCAPED | - ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT ) - , ONIG_OPTION_NONE - , - { - (OnigCodePoint )'\\' /* esc */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ - } -}; - -OnigSyntaxType* OnigDefaultSyntax = ONIG_SYNTAX_RUBY; - -extern void onig_null_warn(const char* s ARG_UNUSED) { } - -#ifdef DEFAULT_WARN_FUNCTION -static OnigWarnFunc onig_warn = (OnigWarnFunc )DEFAULT_WARN_FUNCTION; -#else -static OnigWarnFunc onig_warn = onig_null_warn; -#endif - -#ifdef DEFAULT_VERB_WARN_FUNCTION -static OnigWarnFunc onig_verb_warn = (OnigWarnFunc )DEFAULT_VERB_WARN_FUNCTION; -#else -static OnigWarnFunc onig_verb_warn = onig_null_warn; -#endif - -extern void onig_set_warn_func(OnigWarnFunc f) -{ - onig_warn = f; -} - -extern void onig_set_verb_warn_func(OnigWarnFunc f) -{ - onig_verb_warn = f; -} - -static void -bbuf_free(BBuf* bbuf) -{ - if (IS_NOT_NULL(bbuf)) { - if (IS_NOT_NULL(bbuf->p)) xfree(bbuf->p); - xfree(bbuf); - } -} - -static int -bbuf_clone(BBuf** rto, BBuf* from) -{ - int r; - BBuf *to; - - *rto = to = (BBuf* )xmalloc(sizeof(BBuf)); - CHECK_NULL_RETURN_MEMERR(to); - r = BBUF_INIT(to, from->alloc); - if (r != 0) return r; - to->used = from->used; - xmemcpy(to->p, from->p, from->used); - return 0; -} - -#define BACKREF_REL_TO_ABS(rel_no, env) \ - ((env)->num_mem + 1 + (rel_no)) - -#define ONOFF(v,f,negative) (negative) ? ((v) &= ~(f)) : ((v) |= (f)) - -#define MBCODE_START_POS(enc) \ - (OnigCodePoint )(ONIGENC_MBC_MINLEN(enc) > 1 ? 0 : 0x80) - -#define SET_ALL_MULTI_BYTE_RANGE(enc, pbuf) \ - add_code_range_to_buf(pbuf, MBCODE_START_POS(enc), ~((OnigCodePoint )0)) - -#define ADD_ALL_MULTI_BYTE_RANGE(enc, mbuf) do {\ - if (! ONIGENC_IS_SINGLEBYTE(enc)) {\ - r = SET_ALL_MULTI_BYTE_RANGE(enc, &(mbuf));\ - if (r) return r;\ - }\ -} while (0) - - -#define BITSET_IS_EMPTY(bs,empty) do {\ - int i;\ - empty = 1;\ - for (i = 0; i < (int )BITSET_SIZE; i++) {\ - if ((bs)[i] != 0) {\ - empty = 0; break;\ - }\ - }\ -} while (0) - -static void -bitset_set_range(BitSetRef bs, int from, int to) -{ - int i; - for (i = from; i <= to && i < SINGLE_BYTE_SIZE; i++) { - BITSET_SET_BIT(bs, i); - } -} - -#if 0 -static void -bitset_set_all(BitSetRef bs) -{ - int i; - for (i = 0; i < BITSET_SIZE; i++) { bs[i] = ~((Bits )0); } -} -#endif - -static void -bitset_invert(BitSetRef bs) -{ - int i; - for (i = 0; i < (int )BITSET_SIZE; i++) { bs[i] = ~(bs[i]); } -} - -static void -bitset_invert_to(BitSetRef from, BitSetRef to) -{ - int i; - for (i = 0; i < (int )BITSET_SIZE; i++) { to[i] = ~(from[i]); } -} - -static void -bitset_and(BitSetRef dest, BitSetRef bs) -{ - int i; - for (i = 0; i < (int )BITSET_SIZE; i++) { dest[i] &= bs[i]; } -} - -static void -bitset_or(BitSetRef dest, BitSetRef bs) -{ - int i; - for (i = 0; i < (int )BITSET_SIZE; i++) { dest[i] |= bs[i]; } -} - -static void -bitset_copy(BitSetRef dest, BitSetRef bs) -{ - int i; - for (i = 0; i < (int )BITSET_SIZE; i++) { dest[i] = bs[i]; } -} - -extern int -onig_strncmp(const UChar* s1, const UChar* s2, int n) -{ - int x; - - while (n-- > 0) { - x = *s2++ - *s1++; - if (x) return x; - } - return 0; -} - -extern void -onig_strcpy(UChar* dest, const UChar* src, const UChar* end) -{ - int len = (int)(end - src); - if (len > 0) { - xmemcpy(dest, src, len); - dest[len] = (UChar )0; - } -} - -#ifdef USE_NAMED_GROUP -static UChar* -strdup_with_null(OnigEncoding enc, UChar* s, UChar* end) -{ - int slen, term_len, i; - UChar *r; - - slen = (int)(end - s); - term_len = ONIGENC_MBC_MINLEN(enc); - - r = (UChar* )xmalloc(slen + term_len); - CHECK_NULL_RETURN(r); - xmemcpy(r, s, slen); - - for (i = 0; i < term_len; i++) - r[slen + i] = (UChar )0; - - return r; -} -#endif - -/* scan pattern methods */ -#define PEND_VALUE 0 - -#define PFETCH_READY UChar* pfetch_prev -#define PEND (p < end ? 0 : 1) -#define PUNFETCH p = pfetch_prev -#define PINC do { \ - pfetch_prev = p; \ - p += ONIGENC_MBC_ENC_LEN(enc, p); \ -} while (0) -#define PFETCH(c) do { \ - c = ONIGENC_MBC_TO_CODE(enc, p, end); \ - pfetch_prev = p; \ - p += ONIGENC_MBC_ENC_LEN(enc, p); \ -} while (0) - -#define PINC_S do { \ - p += ONIGENC_MBC_ENC_LEN(enc, p); \ -} while (0) -#define PFETCH_S(c) do { \ - c = ONIGENC_MBC_TO_CODE(enc, p, end); \ - p += ONIGENC_MBC_ENC_LEN(enc, p); \ -} while (0) - -#define PPEEK (p < end ? ONIGENC_MBC_TO_CODE(enc, p, end) : PEND_VALUE) -#define PPEEK_IS(c) (PPEEK == (OnigCodePoint )c) - -static UChar* -strcat_capa(UChar* dest, UChar* dest_end, const UChar* src, const UChar* src_end, - int capa, int oldCapa) -{ - UChar* r; - - if (dest) - r = (UChar* )xrealloc(dest, capa + 1, oldCapa); - else - r = (UChar* )xmalloc(capa + 1); - - CHECK_NULL_RETURN(r); - onig_strcpy(r + (dest_end - dest), src, src_end); - return r; -} - -/* dest on static area */ -static UChar* -strcat_capa_from_static(UChar* dest, UChar* dest_end, - const UChar* src, const UChar* src_end, int capa) -{ - UChar* r; - - r = (UChar* )xmalloc(capa + 1); - CHECK_NULL_RETURN(r); - onig_strcpy(r, dest, dest_end); - onig_strcpy(r + (dest_end - dest), src, src_end); - return r; -} - - -#ifdef USE_ST_LIBRARY - -typedef struct { - UChar* s; - UChar* end; -} st_str_end_key; - -static int -str_end_cmp(st_str_end_key* x, st_str_end_key* y) -{ - UChar *p, *q; - int c; - - if ((x->end - x->s) != (y->end - y->s)) - return 1; - - p = x->s; - q = y->s; - while (p < x->end) { - c = (int )*p - (int )*q; - if (c != 0) return c; - - p++; q++; - } - - return 0; -} - -static int -str_end_hash(st_str_end_key* x) -{ - UChar *p; - int val = 0; - - p = x->s; - while (p < x->end) { - val = val * 997 + (int )*p++; - } - - return val + (val >> 5); -} - -extern hash_table_type* -onig_st_init_strend_table_with_size(int size) -{ - static struct st_hash_type hashType = { - str_end_cmp, - str_end_hash, - }; - - return (hash_table_type* ) - onig_st_init_table_with_size(&hashType, size); -} - -extern int -onig_st_lookup_strend(hash_table_type* table, const UChar* str_key, - const UChar* end_key, hash_data_type *value) -{ - st_str_end_key key; - - key.s = (UChar* )str_key; - key.end = (UChar* )end_key; - - return onig_st_lookup(table, (st_data_t )(&key), value); -} - -extern int -onig_st_insert_strend(hash_table_type* table, const UChar* str_key, - const UChar* end_key, hash_data_type value) -{ - st_str_end_key* key; - int result; - - key = (st_str_end_key* )xmalloc(sizeof(st_str_end_key)); - key->s = (UChar* )str_key; - key->end = (UChar* )end_key; - result = onig_st_insert(table, (st_data_t )key, value); - if (result) { - xfree(key); - } - return result; -} - -#endif /* USE_ST_LIBRARY */ - - -#ifdef USE_NAMED_GROUP - -#define INIT_NAME_BACKREFS_ALLOC_NUM 8 - -typedef struct { - UChar* name; - int name_len; /* byte length */ - int back_num; /* number of backrefs */ - int back_alloc; - int back_ref1; - int* back_refs; -} NameEntry; - -#ifdef USE_ST_LIBRARY - -typedef st_table NameTable; -typedef st_data_t HashDataType; /* 1.6 st.h doesn't define st_data_t type */ - -#define NAMEBUF_SIZE 24 -#define NAMEBUF_SIZE_1 25 - -#ifdef ONIG_DEBUG -static int -i_print_name_entry(UChar* key, NameEntry* e, void* arg) -{ - int i; - FILE* fp = (FILE* )arg; - - fprintf(fp, "%s: ", e->name); - if (e->back_num == 0) - fputs("-", fp); - else if (e->back_num == 1) - fprintf(fp, "%d", e->back_ref1); - else { - for (i = 0; i < e->back_num; i++) { - if (i > 0) fprintf(fp, ", "); - fprintf(fp, "%d", e->back_refs[i]); - } - } - fputs("\n", fp); - return ST_CONTINUE; -} - -extern int -onig_print_names(FILE* fp, regex_t* reg) -{ - NameTable* t = (NameTable* )reg->name_table; - - if (IS_NOT_NULL(t)) { - fprintf(fp, "name table\n"); - onig_st_foreach(t, i_print_name_entry, (HashDataType )fp); - fputs("\n", fp); - } - return 0; -} -#endif /* ONIG_DEBUG */ - -static int -i_free_name_entry(UChar* key, NameEntry* e, void* arg ARG_UNUSED) -{ - xfree(e->name); - if (IS_NOT_NULL(e->back_refs)) xfree(e->back_refs); - xfree(key); - xfree(e); - return ST_DELETE; -} - -static int -names_clear(regex_t* reg) -{ - NameTable* t = (NameTable* )reg->name_table; - - if (IS_NOT_NULL(t)) { - onig_st_foreach(t, i_free_name_entry, 0); - } - return 0; -} - -extern int -onig_names_free(regex_t* reg) -{ - int r; - NameTable* t; - - r = names_clear(reg); - if (r) return r; - - t = (NameTable* )reg->name_table; - if (IS_NOT_NULL(t)) onig_st_free_table(t); - reg->name_table = (void* )NULL; - return 0; -} - -static NameEntry* -name_find(regex_t* reg, const UChar* name, const UChar* name_end) -{ - NameEntry* e; - NameTable* t = (NameTable* )reg->name_table; - - e = (NameEntry* )NULL; - if (IS_NOT_NULL(t)) { - onig_st_lookup_strend(t, name, name_end, (HashDataType* )((void* )(&e))); - } - return e; -} - -typedef struct { - int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*); - regex_t* reg; - void* arg; - int ret; - OnigEncoding enc; -} INamesArg; - -static int -i_names(UChar* key ARG_UNUSED, NameEntry* e, INamesArg* arg) -{ - int r = (*(arg->func))(e->name, - e->name + e->name_len, - e->back_num, - (e->back_num > 1 ? e->back_refs : &(e->back_ref1)), - arg->reg, arg->arg); - if (r != 0) { - arg->ret = r; - return ST_STOP; - } - return ST_CONTINUE; -} - -extern int -onig_foreach_name(regex_t* reg, - int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*), void* arg) -{ - INamesArg narg; - NameTable* t = (NameTable* )reg->name_table; - - narg.ret = 0; - if (IS_NOT_NULL(t)) { - narg.func = func; - narg.reg = reg; - narg.arg = arg; - narg.enc = reg->enc; /* should be pattern encoding. */ - onig_st_foreach(t, i_names, (HashDataType )&narg); - } - return narg.ret; -} - -static int -i_renumber_name(UChar* key ARG_UNUSED, NameEntry* e, GroupNumRemap* map) -{ - int i; - - if (e->back_num > 1) { - for (i = 0; i < e->back_num; i++) { - e->back_refs[i] = map[e->back_refs[i]].new_val; - } - } - else if (e->back_num == 1) { - e->back_ref1 = map[e->back_ref1].new_val; - } - - return ST_CONTINUE; -} - -extern int -onig_renumber_name_table(regex_t* reg, GroupNumRemap* map) -{ - NameTable* t = (NameTable* )reg->name_table; - - if (IS_NOT_NULL(t)) { - onig_st_foreach(t, i_renumber_name, (HashDataType )map); - } - return 0; -} - - -extern int -onig_number_of_names(regex_t* reg) -{ - NameTable* t = (NameTable* )reg->name_table; - - if (IS_NOT_NULL(t)) - return t->num_entries; - else - return 0; -} - -#else /* USE_ST_LIBRARY */ - -#define INIT_NAMES_ALLOC_NUM 8 - -typedef struct { - NameEntry* e; - int num; - int alloc; -} NameTable; - -#ifdef ONIG_DEBUG -extern int -onig_print_names(FILE* fp, regex_t* reg) -{ - int i, j; - NameEntry* e; - NameTable* t = (NameTable* )reg->name_table; - - if (IS_NOT_NULL(t) && t->num > 0) { - fprintf(fp, "name table\n"); - for (i = 0; i < t->num; i++) { - e = &(t->e[i]); - fprintf(fp, "%s: ", e->name); - if (e->back_num == 0) { - fputs("-", fp); - } - else if (e->back_num == 1) { - fprintf(fp, "%d", e->back_ref1); - } - else { - for (j = 0; j < e->back_num; j++) { - if (j > 0) fprintf(fp, ", "); - fprintf(fp, "%d", e->back_refs[j]); - } - } - fputs("\n", fp); - } - fputs("\n", fp); - } - return 0; -} -#endif - -static int -names_clear(regex_t* reg) -{ - int i; - NameEntry* e; - NameTable* t = (NameTable* )reg->name_table; - - if (IS_NOT_NULL(t)) { - for (i = 0; i < t->num; i++) { - e = &(t->e[i]); - if (IS_NOT_NULL(e->name)) { - xfree(e->name); - e->name = NULL; - e->name_len = 0; - e->back_num = 0; - e->back_alloc = 0; - if (IS_NOT_NULL(e->back_refs)) xfree(e->back_refs); - e->back_refs = (int* )NULL; - } - } - if (IS_NOT_NULL(t->e)) { - xfree(t->e); - t->e = NULL; - } - t->num = 0; - } - return 0; -} - -extern int -onig_names_free(regex_t* reg) -{ - int r; - NameTable* t; - - r = names_clear(reg); - if (r) return r; - - t = (NameTable* )reg->name_table; - if (IS_NOT_NULL(t)) xfree(t); - reg->name_table = NULL; - return 0; -} - -static NameEntry* -name_find(regex_t* reg, UChar* name, UChar* name_end) -{ - int i, len; - NameEntry* e; - NameTable* t = (NameTable* )reg->name_table; - - if (IS_NOT_NULL(t)) { - len = name_end - name; - for (i = 0; i < t->num; i++) { - e = &(t->e[i]); - if (len == e->name_len && onig_strncmp(name, e->name, len) == 0) - return e; - } - } - return (NameEntry* )NULL; -} - -extern int -onig_foreach_name(regex_t* reg, - int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*), void* arg) -{ - int i, r; - NameEntry* e; - NameTable* t = (NameTable* )reg->name_table; - - if (IS_NOT_NULL(t)) { - for (i = 0; i < t->num; i++) { - e = &(t->e[i]); - r = (*func)(e->name, e->name + e->name_len, e->back_num, - (e->back_num > 1 ? e->back_refs : &(e->back_ref1)), - reg, arg); - if (r != 0) return r; - } - } - return 0; -} - -extern int -onig_number_of_names(regex_t* reg) -{ - NameTable* t = (NameTable* )reg->name_table; - - if (IS_NOT_NULL(t)) - return t->num; - else - return 0; -} - -#endif /* else USE_ST_LIBRARY */ - -static int -name_add(regex_t* reg, UChar* name, UChar* name_end, int backref, ScanEnv* env) -{ - int alloc; - NameEntry* e; - NameTable* t = (NameTable* )reg->name_table; - - if (name_end - name <= 0) - return ONIGERR_EMPTY_GROUP_NAME; - - e = name_find(reg, name, name_end); - if (IS_NULL(e)) { -#ifdef USE_ST_LIBRARY - if (IS_NULL(t)) { - t = onig_st_init_strend_table_with_size(5); - reg->name_table = (void* )t; - } - e = (NameEntry* )xmalloc(sizeof(NameEntry)); - CHECK_NULL_RETURN_MEMERR(e); - - e->name = strdup_with_null(reg->enc, name, name_end); - if (IS_NULL(e->name)) { - xfree(e); return ONIGERR_MEMORY; - } - onig_st_insert_strend(t, e->name, (e->name + (name_end - name)), - (HashDataType )e); - - e->name_len = (int)(name_end - name); - e->back_num = 0; - e->back_alloc = 0; - e->back_refs = (int* )NULL; - -#else - - if (IS_NULL(t)) { - alloc = INIT_NAMES_ALLOC_NUM; - t = (NameTable* )xmalloc(sizeof(NameTable)); - CHECK_NULL_RETURN_MEMERR(t); - t->e = NULL; - t->alloc = 0; - t->num = 0; - - t->e = (NameEntry* )xmalloc(sizeof(NameEntry) * alloc); - if (IS_NULL(t->e)) { - xfree(t); - return ONIGERR_MEMORY; - } - t->alloc = alloc; - reg->name_table = t; - goto clear; - } - else if (t->num == t->alloc) { - int i; - - alloc = t->alloc * 2; - t->e = (NameEntry* )xrealloc(t->e, sizeof(NameEntry) * alloc); - CHECK_NULL_RETURN_MEMERR(t->e); - t->alloc = alloc; - - clear: - for (i = t->num; i < t->alloc; i++) { - t->e[i].name = NULL; - t->e[i].name_len = 0; - t->e[i].back_num = 0; - t->e[i].back_alloc = 0; - t->e[i].back_refs = (int* )NULL; - } - } - e = &(t->e[t->num]); - t->num++; - e->name = strdup_with_null(reg->enc, name, name_end); - if (IS_NULL(e->name)) return ONIGERR_MEMORY; - e->name_len = name_end - name; -#endif - } - - if (e->back_num >= 1 && - ! IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME)) { - onig_scan_env_set_error_string(env, ONIGERR_MULTIPLEX_DEFINED_NAME, - name, name_end); - return ONIGERR_MULTIPLEX_DEFINED_NAME; - } - - e->back_num++; - if (e->back_num == 1) { - e->back_ref1 = backref; - } - else { - if (e->back_num == 2) { - alloc = INIT_NAME_BACKREFS_ALLOC_NUM; - e->back_refs = (int* )xmalloc(sizeof(int) * alloc); - CHECK_NULL_RETURN_MEMERR(e->back_refs); - e->back_alloc = alloc; - e->back_refs[0] = e->back_ref1; - e->back_refs[1] = backref; - } - else { - if (e->back_num > e->back_alloc) { - alloc = e->back_alloc * 2; - e->back_refs = (int* )xrealloc(e->back_refs, sizeof(int) * alloc, sizeof(int) * e->back_alloc); - CHECK_NULL_RETURN_MEMERR(e->back_refs); - e->back_alloc = alloc; - } - e->back_refs[e->back_num - 1] = backref; - } - } - - return 0; -} - -extern int -onig_name_to_group_numbers(regex_t* reg, const UChar* name, - const UChar* name_end, int** nums) -{ - NameEntry* e = name_find(reg, name, name_end); - - if (IS_NULL(e)) return ONIGERR_UNDEFINED_NAME_REFERENCE; - - switch (e->back_num) { - case 0: - break; - case 1: - *nums = &(e->back_ref1); - break; - default: - *nums = e->back_refs; - break; - } - return e->back_num; -} - -extern int -onig_name_to_backref_number(regex_t* reg, const UChar* name, - const UChar* name_end, OnigRegion *region) -{ - int i, n, *nums; - - n = onig_name_to_group_numbers(reg, name, name_end, &nums); - if (n < 0) - return n; - else if (n == 0) - return ONIGERR_PARSER_BUG; - else if (n == 1) - return nums[0]; - else { - if (IS_NOT_NULL(region)) { - for (i = n - 1; i >= 0; i--) { - if (region->beg[nums[i]] != ONIG_REGION_NOTPOS) - return nums[i]; - } - } - return nums[n - 1]; - } -} - -#else /* USE_NAMED_GROUP */ - -extern int -onig_name_to_group_numbers(regex_t* reg, const UChar* name, - const UChar* name_end, int** nums) -{ - return ONIG_NO_SUPPORT_CONFIG; -} - -extern int -onig_name_to_backref_number(regex_t* reg, const UChar* name, - const UChar* name_end, OnigRegion* region) -{ - return ONIG_NO_SUPPORT_CONFIG; -} - -extern int -onig_foreach_name(regex_t* reg, - int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*), void* arg) -{ - return ONIG_NO_SUPPORT_CONFIG; -} - -extern int -onig_number_of_names(regex_t* reg) -{ - return 0; -} -#endif /* else USE_NAMED_GROUP */ - -extern int -onig_noname_group_capture_is_active(regex_t* reg) -{ - if (ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_DONT_CAPTURE_GROUP)) - return 0; - -#ifdef USE_NAMED_GROUP - if (onig_number_of_names(reg) > 0 && - IS_SYNTAX_BV(reg->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) && - !ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_CAPTURE_GROUP)) { - return 0; - } -#endif - - return 1; -} - - -#define INIT_SCANENV_MEMNODES_ALLOC_SIZE 16 - -static void -scan_env_clear(ScanEnv* env) -{ - int i; - - BIT_STATUS_CLEAR(env->capture_history); - BIT_STATUS_CLEAR(env->bt_mem_start); - BIT_STATUS_CLEAR(env->bt_mem_end); - BIT_STATUS_CLEAR(env->backrefed_mem); - env->error = (UChar* )NULL; - env->error_end = (UChar* )NULL; - env->num_call = 0; - env->num_mem = 0; -#ifdef USE_NAMED_GROUP - env->num_named = 0; -#endif - env->mem_alloc = 0; - env->mem_nodes_dynamic = (Node** )NULL; - - for (i = 0; i < SCANENV_MEMNODES_SIZE; i++) - env->mem_nodes_static[i] = NULL_NODE; - -#ifdef USE_COMBINATION_EXPLOSION_CHECK - env->num_comb_exp_check = 0; - env->comb_exp_max_regnum = 0; - env->curr_max_regnum = 0; - env->has_recursion = 0; -#endif -} - -static int -scan_env_add_mem_entry(ScanEnv* env) -{ - int i, need, alloc; - Node** p; - - need = env->num_mem + 1; - if (need >= SCANENV_MEMNODES_SIZE) { - if (env->mem_alloc <= need) { - if (IS_NULL(env->mem_nodes_dynamic)) { - alloc = INIT_SCANENV_MEMNODES_ALLOC_SIZE; - p = (Node** )xmalloc(sizeof(Node*) * alloc); - xmemcpy(p, env->mem_nodes_static, - sizeof(Node*) * SCANENV_MEMNODES_SIZE); - } - else { - alloc = env->mem_alloc * 2; - p = (Node** )xrealloc(env->mem_nodes_dynamic, sizeof(Node*) * alloc, sizeof(Node*) * env->mem_alloc); - } - CHECK_NULL_RETURN_MEMERR(p); - - for (i = env->num_mem + 1; i < alloc; i++) - p[i] = NULL_NODE; - - env->mem_nodes_dynamic = p; - env->mem_alloc = alloc; - } - } - - env->num_mem++; - return env->num_mem; -} - -static int -scan_env_set_mem_node(ScanEnv* env, int num, Node* node) -{ - if (env->num_mem >= num) - SCANENV_MEM_NODES(env)[num] = node; - else - return ONIGERR_PARSER_BUG; - return 0; -} - - -#ifdef USE_PARSE_TREE_NODE_RECYCLE -typedef struct _FreeNode { - struct _FreeNode* next; -} FreeNode; - -static FreeNode* FreeNodeList = (FreeNode* )NULL; -#endif - -extern void -onig_node_free(Node* node) -{ - start: - if (IS_NULL(node)) return ; - - switch (NTYPE(node)) { - case NT_STR: - if (NSTR(node)->capa != 0 && - IS_NOT_NULL(NSTR(node)->s) && NSTR(node)->s != NSTR(node)->buf) { - xfree(NSTR(node)->s); - } - break; - - case NT_LIST: - case NT_ALT: - onig_node_free(NCAR(node)); - { - Node* next_node = NCDR(node); - -#ifdef USE_PARSE_TREE_NODE_RECYCLE - { - FreeNode* n = (FreeNode* )node; - - THREAD_ATOMIC_START; - n->next = FreeNodeList; - FreeNodeList = n; - THREAD_ATOMIC_END; - } -#else - xfree(node); -#endif - node = next_node; - goto start; - } - break; - - case NT_CCLASS: - { - CClassNode* cc = NCCLASS(node); - - if (IS_NCCLASS_SHARE(cc)) return ; - if (cc->mbuf) - bbuf_free(cc->mbuf); - } - break; - - case NT_QTFR: - if (NQTFR(node)->target) - onig_node_free(NQTFR(node)->target); - break; - - case NT_ENCLOSE: - if (NENCLOSE(node)->target) - onig_node_free(NENCLOSE(node)->target); - break; - - case NT_BREF: - if (IS_NOT_NULL(NBREF(node)->back_dynamic)) - xfree(NBREF(node)->back_dynamic); - break; - - case NT_ANCHOR: - if (NANCHOR(node)->target) - onig_node_free(NANCHOR(node)->target); - break; - } - -#ifdef USE_PARSE_TREE_NODE_RECYCLE - { - FreeNode* n = (FreeNode* )node; - - THREAD_ATOMIC_START; - n->next = FreeNodeList; - FreeNodeList = n; - THREAD_ATOMIC_END; - } -#else - xfree(node); -#endif -} - -#ifdef USE_PARSE_TREE_NODE_RECYCLE -extern int -onig_free_node_list(void) -{ - FreeNode* n; - - /* THREAD_ATOMIC_START; */ - while (IS_NOT_NULL(FreeNodeList)) { - n = FreeNodeList; - FreeNodeList = FreeNodeList->next; - xfree(n); - } - /* THREAD_ATOMIC_END; */ - return 0; -} -#endif - -static Node* -node_new(void) -{ - Node* node; - -#ifdef USE_PARSE_TREE_NODE_RECYCLE - THREAD_ATOMIC_START; - if (IS_NOT_NULL(FreeNodeList)) { - node = (Node* )FreeNodeList; - FreeNodeList = FreeNodeList->next; - THREAD_ATOMIC_END; - return node; - } - THREAD_ATOMIC_END; -#endif - - node = (Node* )xmalloc(sizeof(Node)); - /* xmemset(node, 0, sizeof(Node)); */ - return node; -} - - -static void -initialize_cclass(CClassNode* cc) -{ - BITSET_CLEAR(cc->bs); - /* cc->base.flags = 0; */ - cc->flags = 0; - cc->mbuf = NULL; -} - -static Node* -node_new_cclass(void) -{ - Node* node = node_new(); - CHECK_NULL_RETURN(node); - - SET_NTYPE(node, NT_CCLASS); - initialize_cclass(NCCLASS(node)); - return node; -} - -static Node* -node_new_cclass_by_codepoint_range(int not, OnigCodePoint sb_out, - const OnigCodePoint ranges[]) -{ - int n, i; - CClassNode* cc; - OnigCodePoint j; - - Node* node = node_new_cclass(); - CHECK_NULL_RETURN(node); - - cc = NCCLASS(node); - if (not != 0) NCCLASS_SET_NOT(cc); - - BITSET_CLEAR(cc->bs); - if (sb_out > 0 && IS_NOT_NULL(ranges)) { - n = ONIGENC_CODE_RANGE_NUM(ranges); - for (i = 0; i < n; i++) { - for (j = ONIGENC_CODE_RANGE_FROM(ranges, i); - j <= (OnigCodePoint )ONIGENC_CODE_RANGE_TO(ranges, i); j++) { - if (j >= sb_out) goto sb_end; - - BITSET_SET_BIT(cc->bs, j); - } - } - } - - sb_end: - if (IS_NULL(ranges)) { - is_null: - cc->mbuf = NULL; - } - else { - BBuf* bbuf; - - n = ONIGENC_CODE_RANGE_NUM(ranges); - if (n == 0) goto is_null; - - bbuf = (BBuf* )xmalloc(sizeof(BBuf)); - CHECK_NULL_RETURN(bbuf); - bbuf->alloc = n + 1; - bbuf->used = n + 1; - bbuf->p = (UChar* )((void* )ranges); - - cc->mbuf = bbuf; - } - - return node; -} - -static Node* -node_new_ctype(int type, int not) -{ - Node* node = node_new(); - CHECK_NULL_RETURN(node); - - SET_NTYPE(node, NT_CTYPE); - NCTYPE(node)->ctype = type; - NCTYPE(node)->not = not; - return node; -} - -static Node* -node_new_anychar(void) -{ - Node* node = node_new(); - CHECK_NULL_RETURN(node); - - SET_NTYPE(node, NT_CANY); - return node; -} - -static Node* -node_new_list(Node* left, Node* right) -{ - Node* node = node_new(); - CHECK_NULL_RETURN(node); - - SET_NTYPE(node, NT_LIST); - NCAR(node) = left; - NCDR(node) = right; - return node; -} - -extern Node* -onig_node_new_list(Node* left, Node* right) -{ - return node_new_list(left, right); -} - -extern Node* -onig_node_list_add(Node* list, Node* x) -{ - Node *n; - - n = onig_node_new_list(x, NULL); - if (IS_NULL(n)) return NULL_NODE; - - if (IS_NOT_NULL(list)) { - while (IS_NOT_NULL(NCDR(list))) - list = NCDR(list); - - NCDR(list) = n; - } - - return n; -} - -extern Node* -onig_node_new_alt(Node* left, Node* right) -{ - Node* node = node_new(); - CHECK_NULL_RETURN(node); - - SET_NTYPE(node, NT_ALT); - NCAR(node) = left; - NCDR(node) = right; - return node; -} - -extern Node* -onig_node_new_anchor(int type) -{ - Node* node = node_new(); - CHECK_NULL_RETURN(node); - - SET_NTYPE(node, NT_ANCHOR); - NANCHOR(node)->type = type; - NANCHOR(node)->target = NULL; - NANCHOR(node)->char_len = -1; - return node; -} - -static Node* -node_new_backref(int back_num, int* backrefs, int by_name, -#ifdef USE_BACKREF_WITH_LEVEL - int exist_level, int nest_level, -#endif - ScanEnv* env) -{ - int i; - Node* node = node_new(); - - CHECK_NULL_RETURN(node); - - SET_NTYPE(node, NT_BREF); - NBREF(node)->state = 0; - NBREF(node)->back_num = back_num; - NBREF(node)->back_dynamic = (int* )NULL; - if (by_name != 0) - NBREF(node)->state |= NST_NAME_REF; - -#ifdef USE_BACKREF_WITH_LEVEL - if (exist_level != 0) { - NBREF(node)->state |= NST_NEST_LEVEL; - NBREF(node)->nest_level = nest_level; - } -#endif - - for (i = 0; i < back_num; i++) { - if (backrefs[i] <= env->num_mem && - IS_NULL(SCANENV_MEM_NODES(env)[backrefs[i]])) { - NBREF(node)->state |= NST_RECURSION; /* /...(\1).../ */ - break; - } - } - - if (back_num <= NODE_BACKREFS_SIZE) { - for (i = 0; i < back_num; i++) - NBREF(node)->back_static[i] = backrefs[i]; - } - else { - int* p = (int* )xmalloc(sizeof(int) * back_num); - if (IS_NULL(p)) { - onig_node_free(node); - return NULL; - } - NBREF(node)->back_dynamic = p; - for (i = 0; i < back_num; i++) - p[i] = backrefs[i]; - } - return node; -} - -#ifdef USE_SUBEXP_CALL -static Node* -node_new_call(UChar* name, UChar* name_end, int gnum) -{ - Node* node = node_new(); - CHECK_NULL_RETURN(node); - - SET_NTYPE(node, NT_CALL); - NCALL(node)->state = 0; - NCALL(node)->target = NULL_NODE; - NCALL(node)->name = name; - NCALL(node)->name_end = name_end; - NCALL(node)->group_num = gnum; /* call by number if gnum != 0 */ - return node; -} -#endif - -static Node* -node_new_quantifier(int lower, int upper, int by_number) -{ - Node* node = node_new(); - CHECK_NULL_RETURN(node); - - SET_NTYPE(node, NT_QTFR); - NQTFR(node)->state = 0; - NQTFR(node)->target = NULL; - NQTFR(node)->lower = lower; - NQTFR(node)->upper = upper; - NQTFR(node)->greedy = 1; - NQTFR(node)->target_empty_info = NQ_TARGET_ISNOT_EMPTY; - NQTFR(node)->head_exact = NULL_NODE; - NQTFR(node)->next_head_exact = NULL_NODE; - NQTFR(node)->is_refered = 0; - if (by_number != 0) - NQTFR(node)->state |= NST_BY_NUMBER; - -#ifdef USE_COMBINATION_EXPLOSION_CHECK - NQTFR(node)->comb_exp_check_num = 0; -#endif - - return node; -} - -static Node* -node_new_enclose(int type) -{ - Node* node = node_new(); - CHECK_NULL_RETURN(node); - - SET_NTYPE(node, NT_ENCLOSE); - NENCLOSE(node)->type = type; - NENCLOSE(node)->state = 0; - NENCLOSE(node)->regnum = 0; - NENCLOSE(node)->option = 0; - NENCLOSE(node)->target = NULL; - NENCLOSE(node)->call_addr = -1; - NENCLOSE(node)->opt_count = 0; - return node; -} - -extern Node* -onig_node_new_enclose(int type) -{ - return node_new_enclose(type); -} - -static Node* -node_new_enclose_memory(OnigOptionType option, int is_named) -{ - Node* node = node_new_enclose(ENCLOSE_MEMORY); - CHECK_NULL_RETURN(node); - if (is_named != 0) - SET_ENCLOSE_STATUS(node, NST_NAMED_GROUP); - -#ifdef USE_SUBEXP_CALL - NENCLOSE(node)->option = option; -#endif - return node; -} - -static Node* -node_new_option(OnigOptionType option) -{ - Node* node = node_new_enclose(ENCLOSE_OPTION); - CHECK_NULL_RETURN(node); - NENCLOSE(node)->option = option; - return node; -} - -extern int -onig_node_str_cat(Node* node, const UChar* s, const UChar* end) -{ - int addlen = (int)(end - s); - - if (addlen > 0) { - int len = (int)(NSTR(node)->end - NSTR(node)->s); - - if (NSTR(node)->capa > 0 || (len + addlen > NODE_STR_BUF_SIZE - 1)) { - UChar* p; - int capa = len + addlen + NODE_STR_MARGIN; - - if (capa <= NSTR(node)->capa) { - onig_strcpy(NSTR(node)->s + len, s, end); - } - else { - if (NSTR(node)->s == NSTR(node)->buf) - p = strcat_capa_from_static(NSTR(node)->s, NSTR(node)->end, - s, end, capa); - else - p = strcat_capa(NSTR(node)->s, NSTR(node)->end, s, end, capa, NSTR(node)->capa); - - CHECK_NULL_RETURN_MEMERR(p); - NSTR(node)->s = p; - NSTR(node)->capa = capa; - } - } - else { - onig_strcpy(NSTR(node)->s + len, s, end); - } - NSTR(node)->end = NSTR(node)->s + len + addlen; - } - - return 0; -} - -extern int -onig_node_str_set(Node* node, const UChar* s, const UChar* end) -{ - onig_node_str_clear(node); - return onig_node_str_cat(node, s, end); -} - -static int -node_str_cat_char(Node* node, UChar c) -{ - UChar s[1]; - - s[0] = c; - return onig_node_str_cat(node, s, s + 1); -} - -extern void -onig_node_conv_to_str_node(Node* node, int flag) -{ - SET_NTYPE(node, NT_STR); - NSTR(node)->flag = flag; - NSTR(node)->capa = 0; - NSTR(node)->s = NSTR(node)->buf; - NSTR(node)->end = NSTR(node)->buf; -} - -extern void -onig_node_str_clear(Node* node) -{ - if (NSTR(node)->capa != 0 && - IS_NOT_NULL(NSTR(node)->s) && NSTR(node)->s != NSTR(node)->buf) { - xfree(NSTR(node)->s); - } - - NSTR(node)->capa = 0; - NSTR(node)->flag = 0; - NSTR(node)->s = NSTR(node)->buf; - NSTR(node)->end = NSTR(node)->buf; -} - -static Node* -node_new_str(const UChar* s, const UChar* end) -{ - Node* node = node_new(); - CHECK_NULL_RETURN(node); - - SET_NTYPE(node, NT_STR); - NSTR(node)->capa = 0; - NSTR(node)->flag = 0; - NSTR(node)->s = NSTR(node)->buf; - NSTR(node)->end = NSTR(node)->buf; - if (onig_node_str_cat(node, s, end)) { - onig_node_free(node); - return NULL; - } - return node; -} - -extern Node* -onig_node_new_str(const UChar* s, const UChar* end) -{ - return node_new_str(s, end); -} - -static Node* -node_new_str_raw(UChar* s, UChar* end) -{ - Node* node = node_new_str(s, end); - NSTRING_SET_RAW(node); - return node; -} - -static Node* -node_new_empty(void) -{ - return node_new_str(NULL, NULL); -} - -static Node* -node_new_str_raw_char(UChar c) -{ - UChar p[1]; - - p[0] = c; - return node_new_str_raw(p, p + 1); -} - -static Node* -str_node_split_last_char(StrNode* sn, OnigEncoding enc) -{ - const UChar *p; - Node* n = NULL_NODE; - - if (sn->end > sn->s) { - p = onigenc_get_prev_char_head(enc, sn->s, sn->end); - if (p && p > sn->s) { /* can be splitted. */ - n = node_new_str(p, sn->end); - if ((sn->flag & NSTR_RAW) != 0) - NSTRING_SET_RAW(n); - sn->end = (UChar* )p; - } - } - return n; -} - -static int -str_node_can_be_split(StrNode* sn, OnigEncoding enc) -{ - if (sn->end > sn->s) { - return ((enclen(enc, sn->s) < sn->end - sn->s) ? 1 : 0); - } - return 0; -} - -#ifdef USE_PAD_TO_SHORT_BYTE_CHAR -static int -node_str_head_pad(StrNode* sn, int num, UChar val) -{ - UChar buf[NODE_STR_BUF_SIZE]; - int i, len; - - len = sn->end - sn->s; - onig_strcpy(buf, sn->s, sn->end); - onig_strcpy(&(sn->s[num]), buf, buf + len); - sn->end += num; - - for (i = 0; i < num; i++) { - sn->s[i] = val; - } -} -#endif - -extern int -onig_scan_unsigned_number(UChar** src, const UChar* end, OnigEncoding enc) -{ - unsigned int num, val; - OnigCodePoint c; - UChar* p = *src; - PFETCH_READY; - - num = 0; - while (!PEND) { - PFETCH(c); - if (ONIGENC_IS_CODE_DIGIT(enc, c)) { - val = (unsigned int )DIGITVAL(c); - if ((INT_MAX_LIMIT - val) / 10UL < num) - return -1; /* overflow */ - - num = num * 10 + val; - } - else { - PUNFETCH; - break; - } - } - *src = p; - return num; -} - -static int -scan_unsigned_hexadecimal_number(UChar** src, UChar* end, int maxlen, - OnigEncoding enc) -{ - OnigCodePoint c; - unsigned int num, val; - UChar* p = *src; - PFETCH_READY; - - num = 0; - while (!PEND && maxlen-- != 0) { - PFETCH(c); - if (ONIGENC_IS_CODE_XDIGIT(enc, c)) { - val = (unsigned int )XDIGITVAL(enc,c); - if ((INT_MAX_LIMIT - val) / 16UL < num) - return -1; /* overflow */ - - num = (num << 4) + XDIGITVAL(enc,c); - } - else { - PUNFETCH; - break; - } - } - *src = p; - return num; -} - -static int -scan_unsigned_octal_number(UChar** src, UChar* end, int maxlen, - OnigEncoding enc) -{ - OnigCodePoint c; - unsigned int num, val; - UChar* p = *src; - PFETCH_READY; - - num = 0; - while (!PEND && maxlen-- != 0) { - PFETCH(c); - if (ONIGENC_IS_CODE_DIGIT(enc, c) && c < '8') { - val = ODIGITVAL(c); - if ((INT_MAX_LIMIT - val) / 8UL < num) - return -1; /* overflow */ - - num = (num << 3) + val; - } - else { - PUNFETCH; - break; - } - } - *src = p; - return num; -} - - -#define BBUF_WRITE_CODE_POINT(bbuf,pos,code) \ - BBUF_WRITE(bbuf, pos, &(code), SIZE_CODE_POINT) - -/* data format: - [n][from-1][to-1][from-2][to-2] ... [from-n][to-n] - (all data size is OnigCodePoint) - */ -static int -new_code_range(BBuf** pbuf) -{ -#define INIT_MULTI_BYTE_RANGE_SIZE (SIZE_CODE_POINT * 5) - int r; - OnigCodePoint n; - BBuf* bbuf; - - bbuf = *pbuf = (BBuf* )xmalloc(sizeof(BBuf)); - CHECK_NULL_RETURN_MEMERR(*pbuf); - r = BBUF_INIT(*pbuf, INIT_MULTI_BYTE_RANGE_SIZE); - if (r) return r; - - n = 0; - BBUF_WRITE_CODE_POINT(bbuf, 0, n); - return 0; -} - -static int -add_code_range_to_buf(BBuf** pbuf, OnigCodePoint from, OnigCodePoint to) -{ - int r, inc_n, pos; - int low, high, bound, x; - OnigCodePoint n, *data; - BBuf* bbuf; - - if (from > to) { - n = from; from = to; to = n; - } - - if (IS_NULL(*pbuf)) { - r = new_code_range(pbuf); - if (r) return r; - bbuf = *pbuf; - n = 0; - } - else { - bbuf = *pbuf; - GET_CODE_POINT(n, bbuf->p); - } - data = (OnigCodePoint* )(bbuf->p); - data++; - - for (low = 0, bound = n; low < bound; ) { - x = (low + bound) >> 1; - if (from > data[x*2 + 1]) - low = x + 1; - else - bound = x; - } - - for (high = low, bound = n; high < bound; ) { - x = (high + bound) >> 1; - if (to >= data[x*2] - 1) - high = x + 1; - else - bound = x; - } - - inc_n = low + 1 - high; - if (n + inc_n > ONIG_MAX_MULTI_BYTE_RANGES_NUM) - return ONIGERR_TOO_MANY_MULTI_BYTE_RANGES; - - if (inc_n != 1) { - if (from > data[low*2]) - from = data[low*2]; - if (to < data[(high - 1)*2 + 1]) - to = data[(high - 1)*2 + 1]; - } - - if (inc_n != 0 && (OnigCodePoint )high < n) { - int from_pos = SIZE_CODE_POINT * (1 + high * 2); - int to_pos = SIZE_CODE_POINT * (1 + (low + 1) * 2); - int size = (n - high) * 2 * SIZE_CODE_POINT; - - if (inc_n > 0) { - BBUF_MOVE_RIGHT(bbuf, from_pos, to_pos, size); - } - else { - BBUF_MOVE_LEFT_REDUCE(bbuf, from_pos, to_pos); - } - } - - pos = SIZE_CODE_POINT * (1 + low * 2); - BBUF_ENSURE_SIZE(bbuf, pos + SIZE_CODE_POINT * 2); - BBUF_WRITE_CODE_POINT(bbuf, pos, from); - BBUF_WRITE_CODE_POINT(bbuf, pos + SIZE_CODE_POINT, to); - n += inc_n; - BBUF_WRITE_CODE_POINT(bbuf, 0, n); - - return 0; -} - -static int -add_code_range(BBuf** pbuf, ScanEnv* env, OnigCodePoint from, OnigCodePoint to) -{ - if (from > to) { - if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC)) - return 0; - else - return ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS; - } - - return add_code_range_to_buf(pbuf, from, to); -} - -static int -not_code_range_buf(OnigEncoding enc, BBuf* bbuf, BBuf** pbuf) -{ - int r, i, n; - OnigCodePoint pre, from, *data, to = 0; - - *pbuf = (BBuf* )NULL; - if (IS_NULL(bbuf)) { - set_all: - return SET_ALL_MULTI_BYTE_RANGE(enc, pbuf); - } - - data = (OnigCodePoint* )(bbuf->p); - GET_CODE_POINT(n, data); - data++; - if (n <= 0) goto set_all; - - r = 0; - pre = MBCODE_START_POS(enc); - for (i = 0; i < n; i++) { - from = data[i*2]; - to = data[i*2+1]; - if (pre <= from - 1) { - r = add_code_range_to_buf(pbuf, pre, from - 1); - if (r != 0) return r; - } - if (to == ~((OnigCodePoint )0)) break; - pre = to + 1; - } - if (to < ~((OnigCodePoint )0)) { - r = add_code_range_to_buf(pbuf, to + 1, ~((OnigCodePoint )0)); - } - return r; -} - -#define SWAP_BBUF_NOT(bbuf1, not1, bbuf2, not2) do {\ - BBuf *tbuf; \ - int tnot; \ - tnot = not1; not1 = not2; not2 = tnot; \ - tbuf = bbuf1; bbuf1 = bbuf2; bbuf2 = tbuf; \ -} while (0) - -static int -or_code_range_buf(OnigEncoding enc, BBuf* bbuf1, int not1, - BBuf* bbuf2, int not2, BBuf** pbuf) -{ - int r; - OnigCodePoint i, n1, *data1; - OnigCodePoint from, to; - - *pbuf = (BBuf* )NULL; - if (IS_NULL(bbuf1) && IS_NULL(bbuf2)) { - if (not1 != 0 || not2 != 0) - return SET_ALL_MULTI_BYTE_RANGE(enc, pbuf); - return 0; - } - - r = 0; - if (IS_NULL(bbuf2)) - SWAP_BBUF_NOT(bbuf1, not1, bbuf2, not2); - - if (IS_NULL(bbuf1)) { - if (not1 != 0) { - return SET_ALL_MULTI_BYTE_RANGE(enc, pbuf); - } - else { - if (not2 == 0) { - return bbuf_clone(pbuf, bbuf2); - } - else { - return not_code_range_buf(enc, bbuf2, pbuf); - } - } - } - - if (not1 != 0) - SWAP_BBUF_NOT(bbuf1, not1, bbuf2, not2); - - data1 = (OnigCodePoint* )(bbuf1->p); - GET_CODE_POINT(n1, data1); - data1++; - - if (not2 == 0 && not1 == 0) { /* 1 OR 2 */ - r = bbuf_clone(pbuf, bbuf2); - } - else if (not1 == 0) { /* 1 OR (not 2) */ - r = not_code_range_buf(enc, bbuf2, pbuf); - } - if (r != 0) return r; - - for (i = 0; i < n1; i++) { - from = data1[i*2]; - to = data1[i*2+1]; - r = add_code_range_to_buf(pbuf, from, to); - if (r != 0) return r; - } - return 0; -} - -static int -and_code_range1(BBuf** pbuf, OnigCodePoint from1, OnigCodePoint to1, - OnigCodePoint* data, int n) -{ - int i, r; - OnigCodePoint from2, to2; - - for (i = 0; i < n; i++) { - from2 = data[i*2]; - to2 = data[i*2+1]; - if (from2 < from1) { - if (to2 < from1) continue; - else { - from1 = to2 + 1; - } - } - else if (from2 <= to1) { - if (to2 < to1) { - if (from1 <= from2 - 1) { - r = add_code_range_to_buf(pbuf, from1, from2-1); - if (r != 0) return r; - } - from1 = to2 + 1; - } - else { - to1 = from2 - 1; - } - } - else { - from1 = from2; - } - if (from1 > to1) break; - } - if (from1 <= to1) { - r = add_code_range_to_buf(pbuf, from1, to1); - if (r != 0) return r; - } - return 0; -} - -static int -and_code_range_buf(BBuf* bbuf1, int not1, BBuf* bbuf2, int not2, BBuf** pbuf) -{ - int r; - OnigCodePoint i, j, n1, n2, *data1, *data2; - OnigCodePoint from, to, from1, to1, from2, to2; - - *pbuf = (BBuf* )NULL; - if (IS_NULL(bbuf1)) { - if (not1 != 0 && IS_NOT_NULL(bbuf2)) /* not1 != 0 -> not2 == 0 */ - return bbuf_clone(pbuf, bbuf2); - return 0; - } - else if (IS_NULL(bbuf2)) { - if (not2 != 0) - return bbuf_clone(pbuf, bbuf1); - return 0; - } - - if (not1 != 0) - SWAP_BBUF_NOT(bbuf1, not1, bbuf2, not2); - - data1 = (OnigCodePoint* )(bbuf1->p); - data2 = (OnigCodePoint* )(bbuf2->p); - GET_CODE_POINT(n1, data1); - GET_CODE_POINT(n2, data2); - data1++; - data2++; - - if (not2 == 0 && not1 == 0) { /* 1 AND 2 */ - for (i = 0; i < n1; i++) { - from1 = data1[i*2]; - to1 = data1[i*2+1]; - for (j = 0; j < n2; j++) { - from2 = data2[j*2]; - to2 = data2[j*2+1]; - if (from2 > to1) break; - if (to2 < from1) continue; - from = MAX(from1, from2); - to = MIN(to1, to2); - r = add_code_range_to_buf(pbuf, from, to); - if (r != 0) return r; - } - } - } - else if (not1 == 0) { /* 1 AND (not 2) */ - for (i = 0; i < n1; i++) { - from1 = data1[i*2]; - to1 = data1[i*2+1]; - r = and_code_range1(pbuf, from1, to1, data2, n2); - if (r != 0) return r; - } - } - - return 0; -} - -static int -and_cclass(CClassNode* dest, CClassNode* cc, OnigEncoding enc) -{ - int r, not1, not2; - BBuf *buf1, *buf2, *pbuf; - BitSetRef bsr1, bsr2; - BitSet bs1, bs2; - - not1 = IS_NCCLASS_NOT(dest); - bsr1 = dest->bs; - buf1 = dest->mbuf; - not2 = IS_NCCLASS_NOT(cc); - bsr2 = cc->bs; - buf2 = cc->mbuf; - - if (not1 != 0) { - bitset_invert_to(bsr1, bs1); - bsr1 = bs1; - } - if (not2 != 0) { - bitset_invert_to(bsr2, bs2); - bsr2 = bs2; - } - bitset_and(bsr1, bsr2); - if (bsr1 != dest->bs) { - bitset_copy(dest->bs, bsr1); - bsr1 = dest->bs; - } - if (not1 != 0) { - bitset_invert(dest->bs); - } - - if (! ONIGENC_IS_SINGLEBYTE(enc)) { - if (not1 != 0 && not2 != 0) { - r = or_code_range_buf(enc, buf1, 0, buf2, 0, &pbuf); - } - else { - r = and_code_range_buf(buf1, not1, buf2, not2, &pbuf); - if (r == 0 && not1 != 0) { - BBuf *tbuf; - r = not_code_range_buf(enc, pbuf, &tbuf); - if (r != 0) { - bbuf_free(pbuf); - return r; - } - bbuf_free(pbuf); - pbuf = tbuf; - } - } - if (r != 0) return r; - - dest->mbuf = pbuf; - bbuf_free(buf1); - return r; - } - return 0; -} - -static int -or_cclass(CClassNode* dest, CClassNode* cc, OnigEncoding enc) -{ - int r, not1, not2; - BBuf *buf1, *buf2, *pbuf; - BitSetRef bsr1, bsr2; - BitSet bs1, bs2; - - not1 = IS_NCCLASS_NOT(dest); - bsr1 = dest->bs; - buf1 = dest->mbuf; - not2 = IS_NCCLASS_NOT(cc); - bsr2 = cc->bs; - buf2 = cc->mbuf; - - if (not1 != 0) { - bitset_invert_to(bsr1, bs1); - bsr1 = bs1; - } - if (not2 != 0) { - bitset_invert_to(bsr2, bs2); - bsr2 = bs2; - } - bitset_or(bsr1, bsr2); - if (bsr1 != dest->bs) { - bitset_copy(dest->bs, bsr1); - bsr1 = dest->bs; - } - if (not1 != 0) { - bitset_invert(dest->bs); - } - - if (! ONIGENC_IS_SINGLEBYTE(enc)) { - if (not1 != 0 && not2 != 0) { - r = and_code_range_buf(buf1, 0, buf2, 0, &pbuf); - } - else { - r = or_code_range_buf(enc, buf1, not1, buf2, not2, &pbuf); - if (r == 0 && not1 != 0) { - BBuf *tbuf; - r = not_code_range_buf(enc, pbuf, &tbuf); - if (r != 0) { - bbuf_free(pbuf); - return r; - } - bbuf_free(pbuf); - pbuf = tbuf; - } - } - if (r != 0) return r; - - dest->mbuf = pbuf; - bbuf_free(buf1); - return r; - } - else - return 0; -} - -static int -conv_backslash_value(int c, ScanEnv* env) -{ - if (IS_SYNTAX_OP(env->syntax, ONIG_SYN_OP_ESC_CONTROL_CHARS)) { - switch (c) { - case 'n': return '\n'; - case 't': return '\t'; - case 'r': return '\r'; - case 'f': return '\f'; - case 'a': return '\007'; - case 'b': return '\010'; - case 'e': return '\033'; - case 'v': - if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ESC_V_VTAB)) - return '\v'; - break; - - default: - break; - } - } - return c; -} - -static int -is_invalid_quantifier_target(Node* node) -{ - switch (NTYPE(node)) { - case NT_ANCHOR: - return 1; - break; - - case NT_ENCLOSE: - /* allow enclosed elements */ - /* return is_invalid_quantifier_target(NENCLOSE(node)->target); */ - break; - - case NT_LIST: - do { - if (! is_invalid_quantifier_target(NCAR(node))) return 0; - } while (IS_NOT_NULL(node = NCDR(node))); - return 0; - break; - - case NT_ALT: - do { - if (is_invalid_quantifier_target(NCAR(node))) return 1; - } while (IS_NOT_NULL(node = NCDR(node))); - break; - - default: - break; - } - return 0; -} - -/* ?:0, *:1, +:2, ??:3, *?:4, +?:5 */ -static int -popular_quantifier_num(QtfrNode* q) -{ - if (q->greedy) { - if (q->lower == 0) { - if (q->upper == 1) return 0; - else if (IS_REPEAT_INFINITE(q->upper)) return 1; - } - else if (q->lower == 1) { - if (IS_REPEAT_INFINITE(q->upper)) return 2; - } - } - else { - if (q->lower == 0) { - if (q->upper == 1) return 3; - else if (IS_REPEAT_INFINITE(q->upper)) return 4; - } - else if (q->lower == 1) { - if (IS_REPEAT_INFINITE(q->upper)) return 5; - } - } - return -1; -} - - -enum ReduceType { - RQ_ASIS = 0, /* as is */ - RQ_DEL = 1, /* delete parent */ - RQ_A, /* to '*' */ - RQ_AQ, /* to '*?' */ - RQ_QQ, /* to '??' */ - RQ_P_QQ, /* to '+)??' */ - RQ_PQ_Q /* to '+?)?' */ -}; - -static enum ReduceType ReduceTypeTable[6][6] = { - {RQ_DEL, RQ_A, RQ_A, RQ_QQ, RQ_AQ, RQ_ASIS}, /* '?' */ - {RQ_DEL, RQ_DEL, RQ_DEL, RQ_P_QQ, RQ_P_QQ, RQ_DEL}, /* '*' */ - {RQ_A, RQ_A, RQ_DEL, RQ_ASIS, RQ_P_QQ, RQ_DEL}, /* '+' */ - {RQ_DEL, RQ_AQ, RQ_AQ, RQ_DEL, RQ_AQ, RQ_AQ}, /* '??' */ - {RQ_DEL, RQ_DEL, RQ_DEL, RQ_DEL, RQ_DEL, RQ_DEL}, /* '*?' */ - {RQ_ASIS, RQ_PQ_Q, RQ_DEL, RQ_AQ, RQ_AQ, RQ_DEL} /* '+?' */ -}; - -extern void -onig_reduce_nested_quantifier(Node* pnode, Node* cnode) -{ - int pnum, cnum; - QtfrNode *p, *c; - - p = NQTFR(pnode); - c = NQTFR(cnode); - pnum = popular_quantifier_num(p); - cnum = popular_quantifier_num(c); - if (pnum < 0 || cnum < 0) return ; - - switch(ReduceTypeTable[cnum][pnum]) { - case RQ_DEL: - *pnode = *cnode; - break; - case RQ_A: - p->target = c->target; - p->lower = 0; p->upper = REPEAT_INFINITE; p->greedy = 1; - break; - case RQ_AQ: - p->target = c->target; - p->lower = 0; p->upper = REPEAT_INFINITE; p->greedy = 0; - break; - case RQ_QQ: - p->target = c->target; - p->lower = 0; p->upper = 1; p->greedy = 0; - break; - case RQ_P_QQ: - p->target = cnode; - p->lower = 0; p->upper = 1; p->greedy = 0; - c->lower = 1; c->upper = REPEAT_INFINITE; c->greedy = 1; - return ; - break; - case RQ_PQ_Q: - p->target = cnode; - p->lower = 0; p->upper = 1; p->greedy = 1; - c->lower = 1; c->upper = REPEAT_INFINITE; c->greedy = 0; - return ; - break; - case RQ_ASIS: - p->target = cnode; - return ; - break; - } - - c->target = NULL_NODE; - onig_node_free(cnode); -} - - -enum TokenSyms { - TK_EOT = 0, /* end of token */ - TK_RAW_BYTE = 1, - TK_CHAR, - TK_STRING, - TK_CODE_POINT, - TK_ANYCHAR, - TK_CHAR_TYPE, - TK_BACKREF, - TK_CALL, - TK_ANCHOR, - TK_OP_REPEAT, - TK_INTERVAL, - TK_ANYCHAR_ANYTIME, /* SQL '%' == .* */ - TK_ALT, - TK_SUBEXP_OPEN, - TK_SUBEXP_CLOSE, - TK_CC_OPEN, - TK_QUOTE_OPEN, - TK_CHAR_PROPERTY, /* \p{...}, \P{...} */ - /* in cc */ - TK_CC_CLOSE, - TK_CC_RANGE, - TK_POSIX_BRACKET_OPEN, - TK_CC_AND, /* && */ - TK_CC_CC_OPEN /* [ */ -}; - -typedef struct { - enum TokenSyms type; - int escaped; - int base; /* is number: 8, 16 (used in [....]) */ - UChar* backp; - union { - UChar* s; - int c; - OnigCodePoint code; - int anchor; - int subtype; - struct { - int lower; - int upper; - int greedy; - int possessive; - } repeat; - struct { - int num; - int ref1; - int* refs; - int by_name; -#ifdef USE_BACKREF_WITH_LEVEL - int exist_level; - int level; /* \k */ -#endif - } backref; - struct { - UChar* name; - UChar* name_end; - int gnum; - } call; - struct { - int ctype; - int not; - } prop; - } u; -} OnigToken; - - -static int -fetch_range_quantifier(UChar** src, UChar* end, OnigToken* tok, ScanEnv* env) -{ - int low, up, syn_allow, non_low = 0; - int r = 0; - OnigCodePoint c; - OnigEncoding enc = env->enc; - UChar* p = *src; - PFETCH_READY; - - syn_allow = IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_INVALID_INTERVAL); - - if (PEND) { - if (syn_allow) - return 1; /* "....{" : OK! */ - else - return ONIGERR_END_PATTERN_AT_LEFT_BRACE; /* "....{" syntax error */ - } - - if (! syn_allow) { - c = PPEEK; - if (c == ')' || c == '(' || c == '|') { - return ONIGERR_END_PATTERN_AT_LEFT_BRACE; - } - } - - low = onig_scan_unsigned_number(&p, end, env->enc); - if (low < 0) return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE; - if (low > ONIG_MAX_REPEAT_NUM) - return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE; - - if (p == *src) { /* can't read low */ - if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV)) { - /* allow {,n} as {0,n} */ - low = 0; - non_low = 1; - } - else - goto invalid; - } - - if (PEND) goto invalid; - PFETCH(c); - if (c == ',') { - UChar* prev = p; - up = onig_scan_unsigned_number(&p, end, env->enc); - if (up < 0) return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE; - if (up > ONIG_MAX_REPEAT_NUM) - return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE; - - if (p == prev) { - if (non_low != 0) - goto invalid; - up = REPEAT_INFINITE; /* {n,} : {n,infinite} */ - } - } - else { - if (non_low != 0) - goto invalid; - - PUNFETCH; - up = low; /* {n} : exact n times */ - r = 2; /* fixed */ - } - - if (PEND) goto invalid; - PFETCH(c); - if (IS_SYNTAX_OP(env->syntax, ONIG_SYN_OP_ESC_BRACE_INTERVAL)) { - if (c != MC_ESC(env->syntax)) goto invalid; - PFETCH(c); - } - if (c != '}') goto invalid; - - if (!IS_REPEAT_INFINITE(up) && low > up) { - return ONIGERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE; - } - - tok->type = TK_INTERVAL; - tok->u.repeat.lower = low; - tok->u.repeat.upper = up; - *src = p; - return r; /* 0: normal {n,m}, 2: fixed {n} */ - - invalid: - if (syn_allow) - return 1; /* OK */ - else - return ONIGERR_INVALID_REPEAT_RANGE_PATTERN; -} - -/* \M-, \C-, \c, or \... */ -static int -fetch_escaped_value(UChar** src, UChar* end, ScanEnv* env) -{ - int v; - OnigCodePoint c; - OnigEncoding enc = env->enc; - UChar* p = *src; - - if (PEND) return ONIGERR_END_PATTERN_AT_ESCAPE; - - PFETCH_S(c); - switch (c) { - case 'M': - if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META)) { - if (PEND) return ONIGERR_END_PATTERN_AT_META; - PFETCH_S(c); - if (c != '-') return ONIGERR_META_CODE_SYNTAX; - if (PEND) return ONIGERR_END_PATTERN_AT_META; - PFETCH_S(c); - if (c == MC_ESC(env->syntax)) { - v = fetch_escaped_value(&p, end, env); - if (v < 0) return v; - c = (OnigCodePoint )v; - } - c = ((c & 0xff) | 0x80); - } - else - goto backslash; - break; - - case 'C': - if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL)) { - if (PEND) return ONIGERR_END_PATTERN_AT_CONTROL; - PFETCH_S(c); - if (c != '-') return ONIGERR_CONTROL_CODE_SYNTAX; - goto control; - } - else - goto backslash; - - case 'c': - if (IS_SYNTAX_OP(env->syntax, ONIG_SYN_OP_ESC_C_CONTROL)) { - control: - if (PEND) return ONIGERR_END_PATTERN_AT_CONTROL; - PFETCH_S(c); - if (c == '?') { - c = 0177; - } - else { - if (c == MC_ESC(env->syntax)) { - v = fetch_escaped_value(&p, end, env); - if (v < 0) return v; - c = (OnigCodePoint )v; - } - c &= 0x9f; - } - break; - } - /* fall through */ - - default: - { - backslash: - c = conv_backslash_value(c, env); - } - break; - } - - *src = p; - return c; -} - -static int fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env); - -static OnigCodePoint -get_name_end_code_point(OnigCodePoint start) -{ - switch (start) { - case '<': return (OnigCodePoint )'>'; break; - case '\'': return (OnigCodePoint )'\''; break; - default: - break; - } - - return (OnigCodePoint )0; -} - -#ifdef USE_NAMED_GROUP -#ifdef USE_BACKREF_WITH_LEVEL -/* - \k, \k - \k, \k - \k<-num+n>, \k<-num-n> -*/ -static int -fetch_name_with_level(OnigCodePoint start_code, UChar** src, UChar* end, - UChar** rname_end, ScanEnv* env, - int* rback_num, int* rlevel) -{ - int r, sign, is_num, exist_level; - OnigCodePoint end_code; - OnigCodePoint c = 0; - OnigEncoding enc = env->enc; - UChar *name_end; - UChar *pnum_head; - UChar *p = *src; - PFETCH_READY; - - *rback_num = 0; - is_num = exist_level = 0; - sign = 1; - pnum_head = *src; - - end_code = get_name_end_code_point(start_code); - - name_end = end; - r = 0; - if (PEND) { - return ONIGERR_EMPTY_GROUP_NAME; - } - else { - PFETCH(c); - if (c == end_code) - return ONIGERR_EMPTY_GROUP_NAME; - - if (ONIGENC_IS_CODE_DIGIT(enc, c)) { - is_num = 1; - } - else if (c == '-') { - is_num = 2; - sign = -1; - pnum_head = p; - } - else if (!ONIGENC_IS_CODE_WORD(enc, c)) { - r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME; - } - } - - while (!PEND) { - name_end = p; - PFETCH(c); - if (c == end_code || c == ')' || c == '+' || c == '-') { - if (is_num == 2) r = ONIGERR_INVALID_GROUP_NAME; - break; - } - - if (is_num != 0) { - if (ONIGENC_IS_CODE_DIGIT(enc, c)) { - is_num = 1; - } - else { - r = ONIGERR_INVALID_GROUP_NAME; - is_num = 0; - } - } - else if (!ONIGENC_IS_CODE_WORD(enc, c)) { - r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME; - } - } - - if (r == 0 && c != end_code) { - if (c == '+' || c == '-') { - int level; - int flag = (c == '-' ? -1 : 1); - - PFETCH(c); - if (! ONIGENC_IS_CODE_DIGIT(enc, c)) goto err; - PUNFETCH; - level = onig_scan_unsigned_number(&p, end, enc); - if (level < 0) return ONIGERR_TOO_BIG_NUMBER; - *rlevel = (level * flag); - exist_level = 1; - - PFETCH(c); - if (c == end_code) - goto end; - } - - err: - r = ONIGERR_INVALID_GROUP_NAME; - name_end = end; - } - - end: - if (r == 0) { - if (is_num != 0) { - *rback_num = onig_scan_unsigned_number(&pnum_head, name_end, enc); - if (*rback_num < 0) return ONIGERR_TOO_BIG_NUMBER; - else if (*rback_num == 0) goto err; - - *rback_num *= sign; - } - - *rname_end = name_end; - *src = p; - return (exist_level ? 1 : 0); - } - else { - onig_scan_env_set_error_string(env, r, *src, name_end); - return r; - } -} -#endif /* USE_BACKREF_WITH_LEVEL */ - -/* - def: 0 -> define name (don't allow number name) - 1 -> reference name (allow number name) -*/ -static int -fetch_name(OnigCodePoint start_code, UChar** src, UChar* end, - UChar** rname_end, ScanEnv* env, int* rback_num, int ref) -{ - int r, is_num, sign; - OnigCodePoint end_code; - OnigCodePoint c = 0; - OnigEncoding enc = env->enc; - UChar *name_end; - UChar *pnum_head; - UChar *p = *src; - - *rback_num = 0; - - end_code = get_name_end_code_point(start_code); - - name_end = end; - pnum_head = *src; - r = 0; - is_num = 0; - sign = 1; - if (PEND) { - return ONIGERR_EMPTY_GROUP_NAME; - } - else { - PFETCH_S(c); - if (c == end_code) - return ONIGERR_EMPTY_GROUP_NAME; - - if (ONIGENC_IS_CODE_DIGIT(enc, c)) { - if (ref == 1) - is_num = 1; - else { - r = ONIGERR_INVALID_GROUP_NAME; - is_num = 0; - } - } - else if (c == '-') { - if (ref == 1) { - is_num = 2; - sign = -1; - pnum_head = p; - } - else { - r = ONIGERR_INVALID_GROUP_NAME; - is_num = 0; - } - } - else if (!ONIGENC_IS_CODE_WORD(enc, c)) { - r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME; - } - } - - if (r == 0) { - while (!PEND) { - name_end = p; - PFETCH_S(c); - if (c == end_code || c == ')') { - if (is_num == 2) r = ONIGERR_INVALID_GROUP_NAME; - break; - } - - if (is_num != 0) { - if (ONIGENC_IS_CODE_DIGIT(enc, c)) { - is_num = 1; - } - else { - if (!ONIGENC_IS_CODE_WORD(enc, c)) - r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME; - else - r = ONIGERR_INVALID_GROUP_NAME; - is_num = 0; - } - } - else { - if (!ONIGENC_IS_CODE_WORD(enc, c)) { - r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME; - } - } - } - - if (c != end_code) { - r = ONIGERR_INVALID_GROUP_NAME; - name_end = end; - } - - if (is_num != 0) { - *rback_num = onig_scan_unsigned_number(&pnum_head, name_end, enc); - if (*rback_num < 0) return ONIGERR_TOO_BIG_NUMBER; - else if (*rback_num == 0) { - r = ONIGERR_INVALID_GROUP_NAME; - goto err; - } - - *rback_num *= sign; - } - - *rname_end = name_end; - *src = p; - return 0; - } - else { - while (!PEND) { - name_end = p; - PFETCH_S(c); - if (c == end_code || c == ')') - break; - } - if (PEND) - name_end = end; - - err: - onig_scan_env_set_error_string(env, r, *src, name_end); - return r; - } -} -#else -static int -fetch_name(OnigCodePoint start_code, UChar** src, UChar* end, - UChar** rname_end, ScanEnv* env, int* rback_num, int ref) -{ - int r, is_num, sign; - OnigCodePoint end_code; - OnigCodePoint c = 0; - UChar *name_end; - OnigEncoding enc = env->enc; - UChar *pnum_head; - UChar *p = *src; - PFETCH_READY; - - *rback_num = 0; - - end_code = get_name_end_code_point(start_code); - - *rname_end = name_end = end; - r = 0; - pnum_head = *src; - is_num = 0; - sign = 1; - - if (PEND) { - return ONIGERR_EMPTY_GROUP_NAME; - } - else { - PFETCH(c); - if (c == end_code) - return ONIGERR_EMPTY_GROUP_NAME; - - if (ONIGENC_IS_CODE_DIGIT(enc, c)) { - is_num = 1; - } - else if (c == '-') { - is_num = 2; - sign = -1; - pnum_head = p; - } - else { - r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME; - } - } - - while (!PEND) { - name_end = p; - - PFETCH(c); - if (c == end_code || c == ')') break; - if (! ONIGENC_IS_CODE_DIGIT(enc, c)) - r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME; - } - if (r == 0 && c != end_code) { - r = ONIGERR_INVALID_GROUP_NAME; - name_end = end; - } - - if (r == 0) { - *rback_num = onig_scan_unsigned_number(&pnum_head, name_end, enc); - if (*rback_num < 0) return ONIGERR_TOO_BIG_NUMBER; - else if (*rback_num == 0) { - r = ONIGERR_INVALID_GROUP_NAME; - goto err; - } - *rback_num *= sign; - - *rname_end = name_end; - *src = p; - return 0; - } - else { - err: - onig_scan_env_set_error_string(env, r, *src, name_end); - return r; - } -} -#endif /* USE_NAMED_GROUP */ - -static void -CC_ESC_WARN(ScanEnv* env, UChar *c) -{ - if (onig_warn == onig_null_warn) return ; - - if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_WARN_CC_OP_NOT_ESCAPED) && - IS_SYNTAX_BV(env->syntax, ONIG_SYN_BACKSLASH_ESCAPE_IN_CC)) { - UChar buf[WARN_BUFSIZE]; - onig_snprintf_with_pattern(buf, WARN_BUFSIZE, env->enc, - env->pattern, env->pattern_end, - (UChar* )"character class has '%s' without escape", c); - (*onig_warn)((char* )buf); - } -} - -static void -CLOSE_BRACKET_WITHOUT_ESC_WARN(ScanEnv* env, UChar* c) -{ - if (onig_warn == onig_null_warn) return ; - - if (IS_SYNTAX_BV((env)->syntax, ONIG_SYN_WARN_CC_OP_NOT_ESCAPED)) { - UChar buf[WARN_BUFSIZE]; - onig_snprintf_with_pattern(buf, WARN_BUFSIZE, (env)->enc, - (env)->pattern, (env)->pattern_end, - (UChar* )"regular expression has '%s' without escape", c); - (*onig_warn)((char* )buf); - } -} - -static UChar* -find_str_position(OnigCodePoint s[], int n, UChar* from, UChar* to, - UChar **next, OnigEncoding enc) -{ - int i; - OnigCodePoint x; - UChar *q; - UChar *p = from; - - while (p < to) { - x = ONIGENC_MBC_TO_CODE(enc, p, to); - q = p + enclen(enc, p); - if (x == s[0]) { - for (i = 1; i < n && q < to; i++) { - x = ONIGENC_MBC_TO_CODE(enc, q, to); - if (x != s[i]) break; - q += enclen(enc, q); - } - if (i >= n) { - if (IS_NOT_NULL(next)) - *next = q; - return p; - } - } - p = q; - } - return NULL_UCHARP; -} - -static int -str_exist_check_with_esc(OnigCodePoint s[], int n, UChar* from, UChar* to, - OnigCodePoint bad, OnigEncoding enc, OnigSyntaxType* syn) -{ - int i, in_esc; - OnigCodePoint x; - UChar *q; - UChar *p = from; - - in_esc = 0; - while (p < to) { - if (in_esc) { - in_esc = 0; - p += enclen(enc, p); - } - else { - x = ONIGENC_MBC_TO_CODE(enc, p, to); - q = p + enclen(enc, p); - if (x == s[0]) { - for (i = 1; i < n && q < to; i++) { - x = ONIGENC_MBC_TO_CODE(enc, q, to); - if (x != s[i]) break; - q += enclen(enc, q); - } - if (i >= n) return 1; - p += enclen(enc, p); - } - else { - x = ONIGENC_MBC_TO_CODE(enc, p, to); - if (x == bad) return 0; - else if (x == MC_ESC(syn)) in_esc = 1; - p = q; - } - } - } - return 0; -} - -static int -fetch_token_in_cc(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) -{ - int num; - OnigCodePoint c, c2; - OnigSyntaxType* syn = env->syntax; - OnigEncoding enc = env->enc; - UChar* prev; - UChar* p = *src; - PFETCH_READY; - - if (PEND) { - tok->type = TK_EOT; - return tok->type; - } - - PFETCH(c); - tok->type = TK_CHAR; - tok->base = 0; - tok->u.c = c; - tok->escaped = 0; - - if (c == ']') { - tok->type = TK_CC_CLOSE; - } - else if (c == '-') { - tok->type = TK_CC_RANGE; - } - else if (c == MC_ESC(syn)) { - if (! IS_SYNTAX_BV(syn, ONIG_SYN_BACKSLASH_ESCAPE_IN_CC)) - goto end; - - if (PEND) return ONIGERR_END_PATTERN_AT_ESCAPE; - - PFETCH(c); - tok->escaped = 1; - tok->u.c = c; - switch (c) { - case 'w': - tok->type = TK_CHAR_TYPE; - tok->u.prop.ctype = ONIGENC_CTYPE_WORD; - tok->u.prop.not = 0; - break; - case 'W': - tok->type = TK_CHAR_TYPE; - tok->u.prop.ctype = ONIGENC_CTYPE_WORD; - tok->u.prop.not = 1; - break; - case 'd': - tok->type = TK_CHAR_TYPE; - tok->u.prop.ctype = ONIGENC_CTYPE_DIGIT; - tok->u.prop.not = 0; - break; - case 'D': - tok->type = TK_CHAR_TYPE; - tok->u.prop.ctype = ONIGENC_CTYPE_DIGIT; - tok->u.prop.not = 1; - break; - case 's': - tok->type = TK_CHAR_TYPE; - tok->u.prop.ctype = ONIGENC_CTYPE_SPACE; - tok->u.prop.not = 0; - break; - case 'S': - tok->type = TK_CHAR_TYPE; - tok->u.prop.ctype = ONIGENC_CTYPE_SPACE; - tok->u.prop.not = 1; - break; - case 'h': - if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break; - tok->type = TK_CHAR_TYPE; - tok->u.prop.ctype = ONIGENC_CTYPE_XDIGIT; - tok->u.prop.not = 0; - break; - case 'H': - if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break; - tok->type = TK_CHAR_TYPE; - tok->u.prop.ctype = ONIGENC_CTYPE_XDIGIT; - tok->u.prop.not = 1; - break; - - case 'p': - case 'P': - c2 = PPEEK; - if (c2 == '{' && - IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY)) { - PINC; - tok->type = TK_CHAR_PROPERTY; - tok->u.prop.not = (c == 'P' ? 1 : 0); - - if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT)) { - PFETCH(c2); - if (c2 == '^') { - tok->u.prop.not = (tok->u.prop.not == 0 ? 1 : 0); - } - else - PUNFETCH; - } - } - break; - - case 'x': - if (PEND) break; - - prev = p; - if (PPEEK_IS('{') && IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_BRACE_HEX8)) { - PINC; - num = scan_unsigned_hexadecimal_number(&p, end, 8, enc); - if (num < 0) return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE; - if (!PEND) { - c2 = PPEEK; - if (ONIGENC_IS_CODE_XDIGIT(enc, c2)) - return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE; - } - - if (p > prev + enclen(enc, prev) && !PEND && (PPEEK_IS('}'))) { - PINC; - tok->type = TK_CODE_POINT; - tok->base = 16; - tok->u.code = (OnigCodePoint )num; - } - else { - /* can't read nothing or invalid format */ - p = prev; - } - } - else if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_HEX2)) { - num = scan_unsigned_hexadecimal_number(&p, end, 2, enc); - if (num < 0) return ONIGERR_TOO_BIG_NUMBER; - if (p == prev) { /* can't read nothing. */ - num = 0; /* but, it's not error */ - } - tok->type = TK_RAW_BYTE; - tok->base = 16; - tok->u.c = num; - } - break; - - case 'u': - if (PEND) break; - - prev = p; - if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_U_HEX4)) { - num = scan_unsigned_hexadecimal_number(&p, end, 4, enc); - if (num < 0) return ONIGERR_TOO_BIG_NUMBER; - if (p == prev) { /* can't read nothing. */ - num = 0; /* but, it's not error */ - } - tok->type = TK_CODE_POINT; - tok->base = 16; - tok->u.code = (OnigCodePoint )num; - } - break; - - case '0': - case '1': case '2': case '3': case '4': case '5': case '6': case '7': - if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_OCTAL3)) { - PUNFETCH; - prev = p; - num = scan_unsigned_octal_number(&p, end, 3, enc); - if (num < 0) return ONIGERR_TOO_BIG_NUMBER; - if (p == prev) { /* can't read nothing. */ - num = 0; /* but, it's not error */ - } - tok->type = TK_RAW_BYTE; - tok->base = 8; - tok->u.c = num; - } - break; - - default: - PUNFETCH; - num = fetch_escaped_value(&p, end, env); - if (num < 0) return num; - if (tok->u.c != num) { - tok->u.code = (OnigCodePoint )num; - tok->type = TK_CODE_POINT; - } - break; - } - } - else if (c == '[') { - if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_POSIX_BRACKET) && (PPEEK_IS(':'))) { - OnigCodePoint send[] = { (OnigCodePoint )':', (OnigCodePoint )']' }; - tok->backp = p; /* point at '[' is readed */ - PINC; - if (str_exist_check_with_esc(send, 2, p, end, - (OnigCodePoint )']', enc, syn)) { - tok->type = TK_POSIX_BRACKET_OPEN; - } - else { - PUNFETCH; - goto cc_in_cc; - } - } - else { - cc_in_cc: - if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_CCLASS_SET_OP)) { - tok->type = TK_CC_CC_OPEN; - } - else { - CC_ESC_WARN(env, (UChar* )"["); - } - } - } - else if (c == '&') { - if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_CCLASS_SET_OP) && - !PEND && (PPEEK_IS('&'))) { - PINC; - tok->type = TK_CC_AND; - } - } - - end: - *src = p; - return tok->type; -} - -static int -fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) -{ - int r, num; - OnigCodePoint c; - OnigEncoding enc = env->enc; - OnigSyntaxType* syn = env->syntax; - UChar* prev; - UChar* p = *src; - PFETCH_READY; - - start: - if (PEND) { - tok->type = TK_EOT; - return tok->type; - } - - tok->type = TK_STRING; - tok->base = 0; - tok->backp = p; - - PFETCH(c); - if (IS_MC_ESC_CODE(c, syn)) { - if (PEND) return ONIGERR_END_PATTERN_AT_ESCAPE; - - tok->backp = p; - PFETCH(c); - - tok->u.c = c; - tok->escaped = 1; - switch (c) { - case '*': - if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_ASTERISK_ZERO_INF)) break; - tok->type = TK_OP_REPEAT; - tok->u.repeat.lower = 0; - tok->u.repeat.upper = REPEAT_INFINITE; - goto greedy_check; - break; - - case '+': - if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_PLUS_ONE_INF)) break; - tok->type = TK_OP_REPEAT; - tok->u.repeat.lower = 1; - tok->u.repeat.upper = REPEAT_INFINITE; - goto greedy_check; - break; - - case '?': - if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_QMARK_ZERO_ONE)) break; - tok->type = TK_OP_REPEAT; - tok->u.repeat.lower = 0; - tok->u.repeat.upper = 1; - greedy_check: - if (!PEND && PPEEK_IS('?') && - IS_SYNTAX_OP(syn, ONIG_SYN_OP_QMARK_NON_GREEDY)) { - PFETCH(c); - tok->u.repeat.greedy = 0; - tok->u.repeat.possessive = 0; - } - else { - possessive_check: - if (!PEND && PPEEK_IS('+') && - ((IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT) && - tok->type != TK_INTERVAL) || - (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL) && - tok->type == TK_INTERVAL))) { - PFETCH(c); - tok->u.repeat.greedy = 1; - tok->u.repeat.possessive = 1; - } - else { - tok->u.repeat.greedy = 1; - tok->u.repeat.possessive = 0; - } - } - break; - - case '{': - if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_BRACE_INTERVAL)) break; - r = fetch_range_quantifier(&p, end, tok, env); - if (r < 0) return r; /* error */ - if (r == 0) goto greedy_check; - else if (r == 2) { /* {n} */ - if (IS_SYNTAX_BV(syn, ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY)) - goto possessive_check; - - goto greedy_check; - } - /* r == 1 : normal char */ - break; - - case '|': - if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_VBAR_ALT)) break; - tok->type = TK_ALT; - break; - - case '(': - if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LPAREN_SUBEXP)) break; - tok->type = TK_SUBEXP_OPEN; - break; - - case ')': - if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LPAREN_SUBEXP)) break; - tok->type = TK_SUBEXP_CLOSE; - break; - - case 'w': - if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_W_WORD)) break; - tok->type = TK_CHAR_TYPE; - tok->u.prop.ctype = ONIGENC_CTYPE_WORD; - tok->u.prop.not = 0; - break; - - case 'W': - if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_W_WORD)) break; - tok->type = TK_CHAR_TYPE; - tok->u.prop.ctype = ONIGENC_CTYPE_WORD; - tok->u.prop.not = 1; - break; - - case 'b': - if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_B_WORD_BOUND)) break; - tok->type = TK_ANCHOR; - tok->u.anchor = ANCHOR_WORD_BOUND; - break; - - case 'B': - if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_B_WORD_BOUND)) break; - tok->type = TK_ANCHOR; - tok->u.anchor = ANCHOR_NOT_WORD_BOUND; - break; - -#ifdef USE_WORD_BEGIN_END - case '<': - if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END)) break; - tok->type = TK_ANCHOR; - tok->u.anchor = ANCHOR_WORD_BEGIN; - break; - - case '>': - if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END)) break; - tok->type = TK_ANCHOR; - tok->u.anchor = ANCHOR_WORD_END; - break; -#endif - - case 's': - if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_S_WHITE_SPACE)) break; - tok->type = TK_CHAR_TYPE; - tok->u.prop.ctype = ONIGENC_CTYPE_SPACE; - tok->u.prop.not = 0; - break; - - case 'S': - if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_S_WHITE_SPACE)) break; - tok->type = TK_CHAR_TYPE; - tok->u.prop.ctype = ONIGENC_CTYPE_SPACE; - tok->u.prop.not = 1; - break; - - case 'd': - if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_D_DIGIT)) break; - tok->type = TK_CHAR_TYPE; - tok->u.prop.ctype = ONIGENC_CTYPE_DIGIT; - tok->u.prop.not = 0; - break; - - case 'D': - if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_D_DIGIT)) break; - tok->type = TK_CHAR_TYPE; - tok->u.prop.ctype = ONIGENC_CTYPE_DIGIT; - tok->u.prop.not = 1; - break; - - case 'h': - if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break; - tok->type = TK_CHAR_TYPE; - tok->u.prop.ctype = ONIGENC_CTYPE_XDIGIT; - tok->u.prop.not = 0; - break; - - case 'H': - if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break; - tok->type = TK_CHAR_TYPE; - tok->u.prop.ctype = ONIGENC_CTYPE_XDIGIT; - tok->u.prop.not = 1; - break; - - case 'A': - if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR)) break; - begin_buf: - tok->type = TK_ANCHOR; - tok->u.subtype = ANCHOR_BEGIN_BUF; - break; - - case 'Z': - if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR)) break; - tok->type = TK_ANCHOR; - tok->u.subtype = ANCHOR_SEMI_END_BUF; - break; - - case 'z': - if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR)) break; - end_buf: - tok->type = TK_ANCHOR; - tok->u.subtype = ANCHOR_END_BUF; - break; - - case 'G': - if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_CAPITAL_G_BEGIN_ANCHOR)) break; - tok->type = TK_ANCHOR; - tok->u.subtype = ANCHOR_BEGIN_POSITION; - break; - - case '`': - if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR)) break; - goto begin_buf; - break; - - case '\'': - if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR)) break; - goto end_buf; - break; - - case 'x': - if (PEND) break; - - prev = p; - if (PPEEK_IS('{') && IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_BRACE_HEX8)) { - PINC; - num = scan_unsigned_hexadecimal_number(&p, end, 8, enc); - if (num < 0) return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE; - if (!PEND) { - if (ONIGENC_IS_CODE_XDIGIT(enc, PPEEK)) - return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE; - } - - if ((p > prev + enclen(enc, prev)) && !PEND && PPEEK_IS('}')) { - PINC; - tok->type = TK_CODE_POINT; - tok->u.code = (OnigCodePoint )num; - } - else { - /* can't read nothing or invalid format */ - p = prev; - } - } - else if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_HEX2)) { - num = scan_unsigned_hexadecimal_number(&p, end, 2, enc); - if (num < 0) return ONIGERR_TOO_BIG_NUMBER; - if (p == prev) { /* can't read nothing. */ - num = 0; /* but, it's not error */ - } - tok->type = TK_RAW_BYTE; - tok->base = 16; - tok->u.c = num; - } - break; - - case 'u': - if (PEND) break; - - prev = p; - if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_U_HEX4)) { - num = scan_unsigned_hexadecimal_number(&p, end, 4, enc); - if (num < 0) return ONIGERR_TOO_BIG_NUMBER; - if (p == prev) { /* can't read nothing. */ - num = 0; /* but, it's not error */ - } - tok->type = TK_CODE_POINT; - tok->base = 16; - tok->u.code = (OnigCodePoint )num; - } - break; - - case '1': case '2': case '3': case '4': - case '5': case '6': case '7': case '8': case '9': - PUNFETCH; - prev = p; - num = onig_scan_unsigned_number(&p, end, enc); - if (num < 0 || num > ONIG_MAX_BACKREF_NUM) { - goto skip_backref; - } - - if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_DECIMAL_BACKREF) && - (num <= env->num_mem || num <= 9)) { /* This spec. from GNU regex */ - if (IS_SYNTAX_BV(syn, ONIG_SYN_STRICT_CHECK_BACKREF)) { - if (num > env->num_mem || IS_NULL(SCANENV_MEM_NODES(env)[num])) - return ONIGERR_INVALID_BACKREF; - } - - tok->type = TK_BACKREF; - tok->u.backref.num = 1; - tok->u.backref.ref1 = num; - tok->u.backref.by_name = 0; -#ifdef USE_BACKREF_WITH_LEVEL - tok->u.backref.exist_level = 0; -#endif - break; - } - - skip_backref: - if (c == '8' || c == '9') { - /* normal char */ - p = prev; PINC; - break; - } - - p = prev; - /* fall through */ - case '0': - if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_OCTAL3)) { - prev = p; - num = scan_unsigned_octal_number(&p, end, (c == '0' ? 2:3), enc); - if (num < 0) return ONIGERR_TOO_BIG_NUMBER; - if (p == prev) { /* can't read nothing. */ - num = 0; /* but, it's not error */ - } - tok->type = TK_RAW_BYTE; - tok->base = 8; - tok->u.c = num; - } - else if (c != '0') { - PINC; - } - break; - -#ifdef USE_NAMED_GROUP - case 'k': - if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_K_NAMED_BACKREF)) { - PFETCH(c); - if (c == '<' || c == '\'') { - UChar* name_end; - int* backs; - int back_num; - - prev = p; - -#ifdef USE_BACKREF_WITH_LEVEL - name_end = NULL_UCHARP; /* no need. escape gcc warning. */ - r = fetch_name_with_level((OnigCodePoint )c, &p, end, &name_end, - env, &back_num, &tok->u.backref.level); - if (r == 1) tok->u.backref.exist_level = 1; - else tok->u.backref.exist_level = 0; -#else - r = fetch_name(&p, end, &name_end, env, &back_num, 1); -#endif - if (r < 0) return r; - - if (back_num != 0) { - if (back_num < 0) { - back_num = BACKREF_REL_TO_ABS(back_num, env); - if (back_num <= 0) - return ONIGERR_INVALID_BACKREF; - } - - if (IS_SYNTAX_BV(syn, ONIG_SYN_STRICT_CHECK_BACKREF)) { - if (back_num > env->num_mem || - IS_NULL(SCANENV_MEM_NODES(env)[back_num])) - return ONIGERR_INVALID_BACKREF; - } - tok->type = TK_BACKREF; - tok->u.backref.by_name = 0; - tok->u.backref.num = 1; - tok->u.backref.ref1 = back_num; - } - else { - num = onig_name_to_group_numbers(env->reg, prev, name_end, &backs); - if (num <= 0) { - onig_scan_env_set_error_string(env, - ONIGERR_UNDEFINED_NAME_REFERENCE, prev, name_end); - return ONIGERR_UNDEFINED_NAME_REFERENCE; - } - if (IS_SYNTAX_BV(syn, ONIG_SYN_STRICT_CHECK_BACKREF)) { - int i; - for (i = 0; i < num; i++) { - if (backs[i] > env->num_mem || - IS_NULL(SCANENV_MEM_NODES(env)[backs[i]])) - return ONIGERR_INVALID_BACKREF; - } - } - - tok->type = TK_BACKREF; - tok->u.backref.by_name = 1; - if (num == 1) { - tok->u.backref.num = 1; - tok->u.backref.ref1 = backs[0]; - } - else { - tok->u.backref.num = num; - tok->u.backref.refs = backs; - } - } - } - else - PUNFETCH; - } - break; -#endif - -#ifdef USE_SUBEXP_CALL - case 'g': - if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_G_SUBEXP_CALL)) { - PFETCH(c); - if (c == '<' || c == '\'') { - int gnum; - UChar* name_end; - - prev = p; - r = fetch_name((OnigCodePoint )c, &p, end, &name_end, env, &gnum, 1); - if (r < 0) return r; - - tok->type = TK_CALL; - tok->u.call.name = prev; - tok->u.call.name_end = name_end; - tok->u.call.gnum = gnum; - } - else - PUNFETCH; - } - break; -#endif - - case 'Q': - if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE)) { - tok->type = TK_QUOTE_OPEN; - } - break; - - case 'p': - case 'P': - if (PPEEK_IS('{') && - IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY)) { - PINC; - tok->type = TK_CHAR_PROPERTY; - tok->u.prop.not = (c == 'P' ? 1 : 0); - - if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT)) { - PFETCH(c); - if (c == '^') { - tok->u.prop.not = (tok->u.prop.not == 0 ? 1 : 0); - } - else - PUNFETCH; - } - } - break; - - default: - PUNFETCH; - num = fetch_escaped_value(&p, end, env); - if (num < 0) return num; - /* set_raw: */ - if (tok->u.c != num) { - tok->type = TK_CODE_POINT; - tok->u.code = (OnigCodePoint )num; - } - else { /* string */ - p = tok->backp + enclen(enc, tok->backp); - } - break; - } - } - else { - tok->u.c = c; - tok->escaped = 0; - -#ifdef USE_VARIABLE_META_CHARS - if ((c != ONIG_INEFFECTIVE_META_CHAR) && - IS_SYNTAX_OP(syn, ONIG_SYN_OP_VARIABLE_META_CHARACTERS)) { - if (c == MC_ANYCHAR(syn)) - goto any_char; - else if (c == MC_ANYTIME(syn)) - goto anytime; - else if (c == MC_ZERO_OR_ONE_TIME(syn)) - goto zero_or_one_time; - else if (c == MC_ONE_OR_MORE_TIME(syn)) - goto one_or_more_time; - else if (c == MC_ANYCHAR_ANYTIME(syn)) { - tok->type = TK_ANYCHAR_ANYTIME; - goto out; - } - } -#endif - - switch (c) { - case '.': - if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_DOT_ANYCHAR)) break; -#ifdef USE_VARIABLE_META_CHARS - any_char: -#endif - tok->type = TK_ANYCHAR; - break; - - case '*': - if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ASTERISK_ZERO_INF)) break; -#ifdef USE_VARIABLE_META_CHARS - anytime: -#endif - tok->type = TK_OP_REPEAT; - tok->u.repeat.lower = 0; - tok->u.repeat.upper = REPEAT_INFINITE; - goto greedy_check; - break; - - case '+': - if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_PLUS_ONE_INF)) break; -#ifdef USE_VARIABLE_META_CHARS - one_or_more_time: -#endif - tok->type = TK_OP_REPEAT; - tok->u.repeat.lower = 1; - tok->u.repeat.upper = REPEAT_INFINITE; - goto greedy_check; - break; - - case '?': - if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_QMARK_ZERO_ONE)) break; -#ifdef USE_VARIABLE_META_CHARS - zero_or_one_time: -#endif - tok->type = TK_OP_REPEAT; - tok->u.repeat.lower = 0; - tok->u.repeat.upper = 1; - goto greedy_check; - break; - - case '{': - if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_BRACE_INTERVAL)) break; - r = fetch_range_quantifier(&p, end, tok, env); - if (r < 0) return r; /* error */ - if (r == 0) goto greedy_check; - else if (r == 2) { /* {n} */ - if (IS_SYNTAX_BV(syn, ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY)) - goto possessive_check; - - goto greedy_check; - } - /* r == 1 : normal char */ - break; - - case '|': - if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_VBAR_ALT)) break; - tok->type = TK_ALT; - break; - - case '(': - if (PPEEK_IS('?') && - IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_QMARK_GROUP_EFFECT)) { - PINC; - if (PPEEK_IS('#')) { - PFETCH(c); - while (1) { - if (PEND) return ONIGERR_END_PATTERN_IN_GROUP; - PFETCH(c); - if (c == MC_ESC(syn)) { - if (!PEND) PFETCH(c); - } - else { - if (c == ')') break; - } - } - goto start; - } - PUNFETCH; - } - - if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LPAREN_SUBEXP)) break; - tok->type = TK_SUBEXP_OPEN; - break; - - case ')': - if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LPAREN_SUBEXP)) break; - tok->type = TK_SUBEXP_CLOSE; - break; - - case '^': - if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LINE_ANCHOR)) break; - tok->type = TK_ANCHOR; - tok->u.subtype = (IS_SINGLELINE(env->option) - ? ANCHOR_BEGIN_BUF : ANCHOR_BEGIN_LINE); - break; - - case '$': - if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LINE_ANCHOR)) break; - tok->type = TK_ANCHOR; - tok->u.subtype = (IS_SINGLELINE(env->option) - ? ANCHOR_SEMI_END_BUF : ANCHOR_END_LINE); - break; - - case '[': - if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_BRACKET_CC)) break; - tok->type = TK_CC_OPEN; - break; - - case ']': - if (*src > env->pattern) /* /].../ is allowed. */ - CLOSE_BRACKET_WITHOUT_ESC_WARN(env, (UChar* )"]"); - break; - - case '#': - if (IS_EXTEND(env->option)) { - while (!PEND) { - PFETCH(c); - if (ONIGENC_IS_CODE_NEWLINE(enc, c)) - break; - } - goto start; - break; - } - break; - - case ' ': case '\t': case '\n': case '\r': case '\f': - if (IS_EXTEND(env->option)) - goto start; - break; - - default: - /* string */ - break; - } - } - -#ifdef USE_VARIABLE_META_CHARS - out: -#endif - *src = p; - return tok->type; -} - -static int -add_ctype_to_cc_by_range(CClassNode* cc, int ctype ARG_UNUSED, int not, - OnigEncoding enc ARG_UNUSED, - OnigCodePoint sb_out, const OnigCodePoint mbr[]) -{ - int i, r; - OnigCodePoint j; - - int n = ONIGENC_CODE_RANGE_NUM(mbr); - - if (not == 0) { - for (i = 0; i < n; i++) { - for (j = ONIGENC_CODE_RANGE_FROM(mbr, i); - j <= ONIGENC_CODE_RANGE_TO(mbr, i); j++) { - if (j >= sb_out) { - if (j == ONIGENC_CODE_RANGE_TO(mbr, i)) i++; - else if (j > ONIGENC_CODE_RANGE_FROM(mbr, i)) { - r = add_code_range_to_buf(&(cc->mbuf), j, - ONIGENC_CODE_RANGE_TO(mbr, i)); - if (r != 0) return r; - i++; - } - - goto sb_end; - } - BITSET_SET_BIT(cc->bs, j); - } - } - - sb_end: - for ( ; i < n; i++) { - r = add_code_range_to_buf(&(cc->mbuf), - ONIGENC_CODE_RANGE_FROM(mbr, i), - ONIGENC_CODE_RANGE_TO(mbr, i)); - if (r != 0) return r; - } - } - else { - OnigCodePoint prev = 0; - - for (i = 0; i < n; i++) { - for (j = prev; - j < ONIGENC_CODE_RANGE_FROM(mbr, i); j++) { - if (j >= sb_out) { - goto sb_end2; - } - BITSET_SET_BIT(cc->bs, j); - } - prev = ONIGENC_CODE_RANGE_TO(mbr, i) + 1; - } - for (j = prev; j < sb_out; j++) { - BITSET_SET_BIT(cc->bs, j); - } - - sb_end2: - prev = sb_out; - - for (i = 0; i < n; i++) { - if (prev < ONIGENC_CODE_RANGE_FROM(mbr, i)) { - r = add_code_range_to_buf(&(cc->mbuf), prev, - ONIGENC_CODE_RANGE_FROM(mbr, i) - 1); - if (r != 0) return r; - } - prev = ONIGENC_CODE_RANGE_TO(mbr, i) + 1; - } - if (prev < 0x7fffffff) { - r = add_code_range_to_buf(&(cc->mbuf), prev, 0x7fffffff); - if (r != 0) return r; - } - } - - return 0; -} - -static int -add_ctype_to_cc(CClassNode* cc, int ctype, int not, ScanEnv* env) -{ - int c, r; - const OnigCodePoint *ranges; - OnigCodePoint sb_out; - OnigEncoding enc = env->enc; - - r = ONIGENC_GET_CTYPE_CODE_RANGE(enc, ctype, &sb_out, &ranges); - if (r == 0) { - return add_ctype_to_cc_by_range(cc, ctype, not, env->enc, sb_out, ranges); - } - else if (r != ONIG_NO_SUPPORT_CONFIG) { - return r; - } - - r = 0; - switch (ctype) { - case ONIGENC_CTYPE_ALPHA: - case ONIGENC_CTYPE_BLANK: - case ONIGENC_CTYPE_CNTRL: - case ONIGENC_CTYPE_DIGIT: - case ONIGENC_CTYPE_LOWER: - case ONIGENC_CTYPE_PUNCT: - case ONIGENC_CTYPE_SPACE: - case ONIGENC_CTYPE_UPPER: - case ONIGENC_CTYPE_XDIGIT: - case ONIGENC_CTYPE_ASCII: - case ONIGENC_CTYPE_ALNUM: - if (not != 0) { - for (c = 0; c < SINGLE_BYTE_SIZE; c++) { - if (! ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype)) - BITSET_SET_BIT(cc->bs, c); - } - ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf); - } - else { - for (c = 0; c < SINGLE_BYTE_SIZE; c++) { - if (ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype)) - BITSET_SET_BIT(cc->bs, c); - } - } - break; - - case ONIGENC_CTYPE_GRAPH: - case ONIGENC_CTYPE_PRINT: - if (not != 0) { - for (c = 0; c < SINGLE_BYTE_SIZE; c++) { - if (! ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype)) - BITSET_SET_BIT(cc->bs, c); - } - } - else { - for (c = 0; c < SINGLE_BYTE_SIZE; c++) { - if (ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype)) - BITSET_SET_BIT(cc->bs, c); - } - ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf); - } - break; - - case ONIGENC_CTYPE_WORD: - if (not == 0) { - for (c = 0; c < SINGLE_BYTE_SIZE; c++) { - if (IS_CODE_SB_WORD(enc, c)) BITSET_SET_BIT(cc->bs, c); - } - ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf); - } - else { - for (c = 0; c < SINGLE_BYTE_SIZE; c++) { - if ((ONIGENC_CODE_TO_MBCLEN(enc, c) > 0) /* check invalid code point */ - && ! ONIGENC_IS_CODE_WORD(enc, c)) - BITSET_SET_BIT(cc->bs, c); - } - } - break; - - default: - return ONIGERR_PARSER_BUG; - break; - } - - return r; -} - -static int -parse_posix_bracket(CClassNode* cc, UChar** src, UChar* end, ScanEnv* env) -{ -#define POSIX_BRACKET_CHECK_LIMIT_LENGTH 20 -#define POSIX_BRACKET_NAME_MIN_LEN 4 - - static PosixBracketEntryType PBS[] = { - { (UChar* )"alnum", ONIGENC_CTYPE_ALNUM, 5 }, - { (UChar* )"alpha", ONIGENC_CTYPE_ALPHA, 5 }, - { (UChar* )"blank", ONIGENC_CTYPE_BLANK, 5 }, - { (UChar* )"cntrl", ONIGENC_CTYPE_CNTRL, 5 }, - { (UChar* )"digit", ONIGENC_CTYPE_DIGIT, 5 }, - { (UChar* )"graph", ONIGENC_CTYPE_GRAPH, 5 }, - { (UChar* )"lower", ONIGENC_CTYPE_LOWER, 5 }, - { (UChar* )"print", ONIGENC_CTYPE_PRINT, 5 }, - { (UChar* )"punct", ONIGENC_CTYPE_PUNCT, 5 }, - { (UChar* )"space", ONIGENC_CTYPE_SPACE, 5 }, - { (UChar* )"upper", ONIGENC_CTYPE_UPPER, 5 }, - { (UChar* )"xdigit", ONIGENC_CTYPE_XDIGIT, 6 }, - { (UChar* )"ascii", ONIGENC_CTYPE_ASCII, 5 }, - { (UChar* )"word", ONIGENC_CTYPE_WORD, 4 }, - { (UChar* )NULL, -1, 0 } - }; - - PosixBracketEntryType *pb; - int not, i, r; - OnigCodePoint c; - OnigEncoding enc = env->enc; - UChar *p = *src; - - if (PPEEK_IS('^')) { - PINC_S; - not = 1; - } - else - not = 0; - - if (onigenc_strlen(enc, p, end) < POSIX_BRACKET_NAME_MIN_LEN + 3) - goto not_posix_bracket; - - for (pb = PBS; IS_NOT_NULL(pb->name); pb++) { - if (onigenc_with_ascii_strncmp(enc, p, end, pb->name, pb->len) == 0) { - p = (UChar* )onigenc_step(enc, p, end, pb->len); - if (onigenc_with_ascii_strncmp(enc, p, end, (UChar* )":]", 2) != 0) - return ONIGERR_INVALID_POSIX_BRACKET_TYPE; - - r = add_ctype_to_cc(cc, pb->ctype, not, env); - if (r != 0) return r; - - PINC_S; PINC_S; - *src = p; - return 0; - } - } - - not_posix_bracket: - c = 0; - i = 0; - while (!PEND && ((c = PPEEK) != ':') && c != ']') { - PINC_S; - if (++i > POSIX_BRACKET_CHECK_LIMIT_LENGTH) break; - } - if (c == ':' && ! PEND) { - PINC_S; - if (! PEND) { - PFETCH_S(c); - if (c == ']') - return ONIGERR_INVALID_POSIX_BRACKET_TYPE; - } - } - - return 1; /* 1: is not POSIX bracket, but no error. */ -} - -static int -fetch_char_property_to_ctype(UChar** src, UChar* end, ScanEnv* env) -{ - int r; - OnigCodePoint c; - OnigEncoding enc = env->enc; - UChar *prev, *start, *p = *src; - - r = 0; - start = prev = p; - - while (!PEND) { - prev = p; - PFETCH_S(c); - if (c == '}') { - r = ONIGENC_PROPERTY_NAME_TO_CTYPE(enc, start, prev); - if (r < 0) break; - - *src = p; - return r; - } - else if (c == '(' || c == ')' || c == '{' || c == '|') { - r = ONIGERR_INVALID_CHAR_PROPERTY_NAME; - break; - } - } - - onig_scan_env_set_error_string(env, r, *src, prev); - return r; -} - -static int -parse_char_property(Node** np, OnigToken* tok, UChar** src, UChar* end, - ScanEnv* env) -{ - int r, ctype; - CClassNode* cc; - - ctype = fetch_char_property_to_ctype(src, end, env); - if (ctype < 0) return ctype; - - *np = node_new_cclass(); - CHECK_NULL_RETURN_MEMERR(*np); - cc = NCCLASS(*np); - r = add_ctype_to_cc(cc, ctype, 0, env); - if (r != 0) return r; - if (tok->u.prop.not != 0) NCCLASS_SET_NOT(cc); - - return 0; -} - - -enum CCSTATE { - CCS_VALUE, - CCS_RANGE, - CCS_COMPLETE, - CCS_START -}; - -enum CCVALTYPE { - CCV_SB, - CCV_CODE_POINT, - CCV_CLASS -}; - -static int -next_state_class(CClassNode* cc, OnigCodePoint* vs, enum CCVALTYPE* type, - enum CCSTATE* state, ScanEnv* env) -{ - int r; - - if (*state == CCS_RANGE) - return ONIGERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE; - - if (*state == CCS_VALUE && *type != CCV_CLASS) { - if (*type == CCV_SB) - BITSET_SET_BIT(cc->bs, (int )(*vs)); - else if (*type == CCV_CODE_POINT) { - r = add_code_range(&(cc->mbuf), env, *vs, *vs); - if (r < 0) return r; - } - } - - *state = CCS_VALUE; - *type = CCV_CLASS; - return 0; -} - -static int -next_state_val(CClassNode* cc, OnigCodePoint *vs, OnigCodePoint v, - int* vs_israw, int v_israw, - enum CCVALTYPE intype, enum CCVALTYPE* type, - enum CCSTATE* state, ScanEnv* env) -{ - int r; - - switch (*state) { - case CCS_VALUE: - if (*type == CCV_SB) - BITSET_SET_BIT(cc->bs, (int )(*vs)); - else if (*type == CCV_CODE_POINT) { - r = add_code_range(&(cc->mbuf), env, *vs, *vs); - if (r < 0) return r; - } - break; - - case CCS_RANGE: - if (intype == *type) { - if (intype == CCV_SB) { - if (*vs > 0xff || v > 0xff) - return ONIGERR_INVALID_CODE_POINT_VALUE; - - if (*vs > v) { - if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC)) - goto ccs_range_end; - else - return ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS; - } - bitset_set_range(cc->bs, (int )*vs, (int )v); - } - else { - r = add_code_range(&(cc->mbuf), env, *vs, v); - if (r < 0) return r; - } - } - else { -#if 0 - if (intype == CCV_CODE_POINT && *type == CCV_SB) { -#endif - if (*vs > v) { - if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC)) - goto ccs_range_end; - else - return ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS; - } - bitset_set_range(cc->bs, (int )*vs, (int )(v < 0xff ? v : 0xff)); - r = add_code_range(&(cc->mbuf), env, (OnigCodePoint )*vs, v); - if (r < 0) return r; -#if 0 - } - else - return ONIGERR_MISMATCH_CODE_LENGTH_IN_CLASS_RANGE; -#endif - } - ccs_range_end: - *state = CCS_COMPLETE; - break; - - case CCS_COMPLETE: - case CCS_START: - *state = CCS_VALUE; - break; - - default: - break; - } - - *vs_israw = v_israw; - *vs = v; - *type = intype; - return 0; -} - -static int -code_exist_check(OnigCodePoint c, UChar* from, UChar* end, int ignore_escaped, - ScanEnv* env) -{ - int in_esc; - OnigCodePoint code; - OnigEncoding enc = env->enc; - UChar* p = from; - - in_esc = 0; - while (! PEND) { - if (ignore_escaped && in_esc) { - in_esc = 0; - } - else { - PFETCH_S(code); - if (code == c) return 1; - if (code == MC_ESC(env->syntax)) in_esc = 1; - } - } - return 0; -} - -static int -parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end, - ScanEnv* env) -{ - int r, neg, len, fetched, and_start; - OnigCodePoint v, vs; - UChar *p; - Node* node; - CClassNode *cc, *prev_cc; - CClassNode work_cc; - - enum CCSTATE state; - enum CCVALTYPE val_type, in_type; - int val_israw, in_israw; - - prev_cc = (CClassNode* )NULL; - *np = NULL_NODE; - r = fetch_token_in_cc(tok, src, end, env); - if (r == TK_CHAR && tok->u.c == '^' && tok->escaped == 0) { - neg = 1; - r = fetch_token_in_cc(tok, src, end, env); - } - else { - neg = 0; - } - - if (r < 0) return r; - if (r == TK_CC_CLOSE) { - if (! code_exist_check((OnigCodePoint )']', - *src, env->pattern_end, 1, env)) - return ONIGERR_EMPTY_CHAR_CLASS; - - CC_ESC_WARN(env, (UChar* )"]"); - r = tok->type = TK_CHAR; /* allow []...] */ - } - - *np = node = node_new_cclass(); - CHECK_NULL_RETURN_MEMERR(node); - cc = NCCLASS(node); - - and_start = 0; - state = CCS_START; - p = *src; - while (r != TK_CC_CLOSE) { - fetched = 0; - switch (r) { - case TK_CHAR: - len = ONIGENC_CODE_TO_MBCLEN(env->enc, tok->u.c); - if (len > 1) { - in_type = CCV_CODE_POINT; - } - else if (len < 0) { - r = len; - goto err; - } - else { - sb_char: - in_type = CCV_SB; - } - v = (OnigCodePoint )tok->u.c; - in_israw = 0; - goto val_entry2; - break; - - case TK_RAW_BYTE: - /* tok->base != 0 : octal or hexadec. */ - if (! ONIGENC_IS_SINGLEBYTE(env->enc) && tok->base != 0) { - UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN]; - UChar* bufe = buf + ONIGENC_CODE_TO_MBC_MAXLEN; - UChar* psave = p; - int i, base = tok->base; - - buf[0] = (UChar)tok->u.c; - for (i = 1; i < ONIGENC_MBC_MAXLEN(env->enc); i++) { - r = fetch_token_in_cc(tok, &p, end, env); - if (r < 0) goto err; - if (r != TK_RAW_BYTE || tok->base != base) { - fetched = 1; - break; - } - buf[i] = (UChar)tok->u.c; - } - - if (i < ONIGENC_MBC_MINLEN(env->enc)) { - r = ONIGERR_TOO_SHORT_MULTI_BYTE_STRING; - goto err; - } - - len = enclen(env->enc, buf); - if (i < len) { - r = ONIGERR_TOO_SHORT_MULTI_BYTE_STRING; - goto err; - } - else if (i > len) { /* fetch back */ - p = psave; - for (i = 1; i < len; i++) { - r = fetch_token_in_cc(tok, &p, end, env); - } - fetched = 0; - } - - if (i == 1) { - v = (OnigCodePoint )buf[0]; - goto raw_single; - } - else { - v = ONIGENC_MBC_TO_CODE(env->enc, buf, bufe); - in_type = CCV_CODE_POINT; - } - } - else { - v = (OnigCodePoint )tok->u.c; - raw_single: - in_type = CCV_SB; - } - in_israw = 1; - goto val_entry2; - break; - - case TK_CODE_POINT: - v = tok->u.code; - in_israw = 1; - val_entry: - len = ONIGENC_CODE_TO_MBCLEN(env->enc, v); - if (len < 0) { - r = len; - goto err; - } - in_type = (len == 1 ? CCV_SB : CCV_CODE_POINT); - val_entry2: - r = next_state_val(cc, &vs, v, &val_israw, in_israw, in_type, &val_type, - &state, env); - if (r != 0) goto err; - break; - - case TK_POSIX_BRACKET_OPEN: - r = parse_posix_bracket(cc, &p, end, env); - if (r < 0) goto err; - if (r == 1) { /* is not POSIX bracket */ - CC_ESC_WARN(env, (UChar* )"["); - p = tok->backp; - v = (OnigCodePoint )tok->u.c; - in_israw = 0; - goto val_entry; - } - goto next_class; - break; - - case TK_CHAR_TYPE: - r = add_ctype_to_cc(cc, tok->u.prop.ctype, tok->u.prop.not, env); - if (r != 0) return r; - - next_class: - r = next_state_class(cc, &vs, &val_type, &state, env); - if (r != 0) goto err; - break; - - case TK_CHAR_PROPERTY: - { - int ctype; - - ctype = fetch_char_property_to_ctype(&p, end, env); - if (ctype < 0) return ctype; - r = add_ctype_to_cc(cc, ctype, tok->u.prop.not, env); - if (r != 0) return r; - goto next_class; - } - break; - - case TK_CC_RANGE: - if (state == CCS_VALUE) { - r = fetch_token_in_cc(tok, &p, end, env); - if (r < 0) goto err; - fetched = 1; - if (r == TK_CC_CLOSE) { /* allow [x-] */ - range_end_val: - v = (OnigCodePoint )'-'; - in_israw = 0; - goto val_entry; - } - else if (r == TK_CC_AND) { - CC_ESC_WARN(env, (UChar* )"-"); - goto range_end_val; - } - state = CCS_RANGE; - } - else if (state == CCS_START) { - /* [-xa] is allowed */ - v = (OnigCodePoint )tok->u.c; - in_israw = 0; - - r = fetch_token_in_cc(tok, &p, end, env); - if (r < 0) goto err; - fetched = 1; - /* [--x] or [a&&-x] is warned. */ - if (r == TK_CC_RANGE || and_start != 0) - CC_ESC_WARN(env, (UChar* )"-"); - - goto val_entry; - } - else if (state == CCS_RANGE) { - CC_ESC_WARN(env, (UChar* )"-"); - goto sb_char; /* [!--x] is allowed */ - } - else { /* CCS_COMPLETE */ - r = fetch_token_in_cc(tok, &p, end, env); - if (r < 0) goto err; - fetched = 1; - if (r == TK_CC_CLOSE) goto range_end_val; /* allow [a-b-] */ - else if (r == TK_CC_AND) { - CC_ESC_WARN(env, (UChar* )"-"); - goto range_end_val; - } - - if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC)) { - CC_ESC_WARN(env, (UChar* )"-"); - goto sb_char; /* [0-9-a] is allowed as [0-9\-a] */ - } - r = ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS; - goto err; - } - break; - - case TK_CC_CC_OPEN: /* [ */ - { - Node *anode; - CClassNode* acc; - - r = parse_char_class(&anode, tok, &p, end, env); - if (r != 0) goto cc_open_err; - acc = NCCLASS(anode); - r = or_cclass(cc, acc, env->enc); - - onig_node_free(anode); - cc_open_err: - if (r != 0) goto err; - } - break; - - case TK_CC_AND: /* && */ - { - if (state == CCS_VALUE) { - r = next_state_val(cc, &vs, 0, &val_israw, 0, val_type, - &val_type, &state, env); - if (r != 0) goto err; - } - /* initialize local variables */ - and_start = 1; - state = CCS_START; - - if (IS_NOT_NULL(prev_cc)) { - r = and_cclass(prev_cc, cc, env->enc); - if (r != 0) goto err; - bbuf_free(cc->mbuf); - } - else { - prev_cc = cc; - cc = &work_cc; - } - initialize_cclass(cc); - } - break; - - case TK_EOT: - r = ONIGERR_PREMATURE_END_OF_CHAR_CLASS; - goto err; - break; - default: - r = ONIGERR_PARSER_BUG; - goto err; - break; - } - - if (fetched) - r = tok->type; - else { - r = fetch_token_in_cc(tok, &p, end, env); - if (r < 0) goto err; - } - } - - if (state == CCS_VALUE) { - r = next_state_val(cc, &vs, 0, &val_israw, 0, val_type, - &val_type, &state, env); - if (r != 0) goto err; - } - - if (IS_NOT_NULL(prev_cc)) { - r = and_cclass(prev_cc, cc, env->enc); - if (r != 0) goto err; - bbuf_free(cc->mbuf); - cc = prev_cc; - } - - if (neg != 0) - NCCLASS_SET_NOT(cc); - else - NCCLASS_CLEAR_NOT(cc); - if (IS_NCCLASS_NOT(cc) && - IS_SYNTAX_BV(env->syntax, ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC)) { - int is_empty; - - is_empty = (IS_NULL(cc->mbuf) ? 1 : 0); - if (is_empty != 0) - BITSET_IS_EMPTY(cc->bs, is_empty); - - if (is_empty == 0) { -#define NEWLINE_CODE 0x0a - - if (ONIGENC_IS_CODE_NEWLINE(env->enc, NEWLINE_CODE)) { - if (ONIGENC_CODE_TO_MBCLEN(env->enc, NEWLINE_CODE) == 1) - BITSET_SET_BIT(cc->bs, NEWLINE_CODE); - else - add_code_range(&(cc->mbuf), env, NEWLINE_CODE, NEWLINE_CODE); - } - } - } - *src = p; - return 0; - - err: - if (cc != NCCLASS(*np)) - bbuf_free(cc->mbuf); - onig_node_free(*np); - return r; -} - -static int parse_subexp(Node** top, OnigToken* tok, int term, - UChar** src, UChar* end, ScanEnv* env); - -static int -parse_enclose(Node** np, OnigToken* tok, int term, UChar** src, UChar* end, - ScanEnv* env) -{ - int r, num; - Node *target; - OnigOptionType option; - OnigCodePoint c; - OnigEncoding enc = env->enc; - -#ifdef USE_NAMED_GROUP - int list_capture; -#endif - - UChar* p = *src; - PFETCH_READY; - - *np = NULL; - if (PEND) return ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS; - - option = env->option; - if (PPEEK_IS('?') && - IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_GROUP_EFFECT)) { - PINC; - if (PEND) return ONIGERR_END_PATTERN_IN_GROUP; - - PFETCH(c); - switch (c) { - case ':': /* (?:...) grouping only */ - group: - r = fetch_token(tok, &p, end, env); - if (r < 0) return r; - r = parse_subexp(np, tok, term, &p, end, env); - if (r < 0) return r; - *src = p; - return 1; /* group */ - break; - - case '=': - *np = onig_node_new_anchor(ANCHOR_PREC_READ); - break; - case '!': /* preceding read */ - *np = onig_node_new_anchor(ANCHOR_PREC_READ_NOT); - break; - case '>': /* (?>...) stop backtrack */ - *np = node_new_enclose(ENCLOSE_STOP_BACKTRACK); - break; - -#ifdef USE_NAMED_GROUP - case '\'': - if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP)) { - goto named_group1; - } - else - return ONIGERR_UNDEFINED_GROUP_OPTION; - break; -#endif - - case '<': /* look behind (?<=...), (?syntax, ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP)) { - UChar *name; - UChar *name_end; - - PUNFETCH; - c = '<'; - - named_group1: - list_capture = 0; - - named_group2: - name = p; - r = fetch_name((OnigCodePoint )c, &p, end, &name_end, env, &num, 0); - if (r < 0) return r; - - num = scan_env_add_mem_entry(env); - if (num < 0) return num; - if (list_capture != 0 && num >= (int )BIT_STATUS_BITS_NUM) - return ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY; - - r = name_add(env->reg, name, name_end, num, env); - if (r != 0) return r; - *np = node_new_enclose_memory(env->option, 1); - CHECK_NULL_RETURN_MEMERR(*np); - NENCLOSE(*np)->regnum = num; - if (list_capture != 0) - BIT_STATUS_ON_AT_SIMPLE(env->capture_history, num); - env->num_named++; - } - else { - return ONIGERR_UNDEFINED_GROUP_OPTION; - } - } -#else - else { - return ONIGERR_UNDEFINED_GROUP_OPTION; - } -#endif - break; - - case '@': - if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ATMARK_CAPTURE_HISTORY)) { -#ifdef USE_NAMED_GROUP - if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP)) { - PFETCH(c); - if (c == '<' || c == '\'') { - list_capture = 1; - goto named_group2; /* (?@...) */ - } - PUNFETCH; - } -#endif - *np = node_new_enclose_memory(env->option, 0); - CHECK_NULL_RETURN_MEMERR(*np); - num = scan_env_add_mem_entry(env); - if (num < 0) { - onig_node_free(*np); - return num; - } - else if (num >= (int )BIT_STATUS_BITS_NUM) { - onig_node_free(*np); - return ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY; - } - NENCLOSE(*np)->regnum = num; - BIT_STATUS_ON_AT_SIMPLE(env->capture_history, num); - } - else { - return ONIGERR_UNDEFINED_GROUP_OPTION; - } - break; - -#ifdef USE_POSIXLINE_OPTION - case 'p': -#endif - case '-': case 'i': case 'm': case 's': case 'x': - { - int neg = 0; - - while (1) { - switch (c) { - case ':': - case ')': - break; - - case '-': neg = 1; break; - case 'x': ONOFF(option, ONIG_OPTION_EXTEND, neg); break; - case 'i': ONOFF(option, ONIG_OPTION_IGNORECASE, neg); break; - case 's': - if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_PERL)) { - ONOFF(option, ONIG_OPTION_MULTILINE, neg); - } - else - return ONIGERR_UNDEFINED_GROUP_OPTION; - break; - - case 'm': - if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_PERL)) { - ONOFF(option, ONIG_OPTION_SINGLELINE, (neg == 0 ? 1 : 0)); - } - else if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_RUBY)) { - ONOFF(option, ONIG_OPTION_MULTILINE, neg); - } - else - return ONIGERR_UNDEFINED_GROUP_OPTION; - break; -#ifdef USE_POSIXLINE_OPTION - case 'p': - ONOFF(option, ONIG_OPTION_MULTILINE|ONIG_OPTION_SINGLELINE, neg); - break; -#endif - default: - return ONIGERR_UNDEFINED_GROUP_OPTION; - } - - if (c == ')') { - *np = node_new_option(option); - CHECK_NULL_RETURN_MEMERR(*np); - *src = p; - return 2; /* option only */ - } - else if (c == ':') { - OnigOptionType prev = env->option; - - env->option = option; - r = fetch_token(tok, &p, end, env); - if (r < 0) return r; - r = parse_subexp(&target, tok, term, &p, end, env); - env->option = prev; - if (r < 0) return r; - *np = node_new_option(option); - CHECK_NULL_RETURN_MEMERR(*np); - NENCLOSE(*np)->target = target; - *src = p; - return 0; - } - - if (PEND) return ONIGERR_END_PATTERN_IN_GROUP; - PFETCH(c); - } - } - break; - - default: - return ONIGERR_UNDEFINED_GROUP_OPTION; - } - } - else { - if (ONIG_IS_OPTION_ON(env->option, ONIG_OPTION_DONT_CAPTURE_GROUP)) - goto group; - - *np = node_new_enclose_memory(env->option, 0); - CHECK_NULL_RETURN_MEMERR(*np); - num = scan_env_add_mem_entry(env); - if (num < 0) return num; - NENCLOSE(*np)->regnum = num; - } - - CHECK_NULL_RETURN_MEMERR(*np); - r = fetch_token(tok, &p, end, env); - if (r < 0) return r; - r = parse_subexp(&target, tok, term, &p, end, env); - if (r < 0) return r; - - if (NTYPE(*np) == NT_ANCHOR) - NANCHOR(*np)->target = target; - else { - NENCLOSE(*np)->target = target; - if (NENCLOSE(*np)->type == ENCLOSE_MEMORY) { - /* Don't move this to previous of parse_subexp() */ - r = scan_env_set_mem_node(env, NENCLOSE(*np)->regnum, *np); - if (r != 0) return r; - } - } - - *src = p; - return 0; -} - -static const char* PopularQStr[] = { - "?", "*", "+", "??", "*?", "+?" -}; - -static const char* ReduceQStr[] = { - "", "", "*", "*?", "??", "+ and ??", "+? and ?" -}; - -static int -set_quantifier(Node* qnode, Node* target, int group, ScanEnv* env) -{ - QtfrNode* qn; - - qn = NQTFR(qnode); - if (qn->lower == 1 && qn->upper == 1) { - return 1; - } - - switch (NTYPE(target)) { - case NT_STR: - if (! group) { - StrNode* sn = NSTR(target); - if (str_node_can_be_split(sn, env->enc)) { - Node* n = str_node_split_last_char(sn, env->enc); - if (IS_NOT_NULL(n)) { - qn->target = n; - return 2; - } - } - } - break; - - case NT_QTFR: - { /* check redundant double repeat. */ - /* verbose warn (?:.?)? etc... but not warn (.?)? etc... */ - QtfrNode* qnt = NQTFR(target); - int nestq_num = popular_quantifier_num(qn); - int targetq_num = popular_quantifier_num(qnt); - -#ifdef USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR - if (!IS_QUANTIFIER_BY_NUMBER(qn) && !IS_QUANTIFIER_BY_NUMBER(qnt) && - IS_SYNTAX_BV(env->syntax, ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT)) { - UChar buf[WARN_BUFSIZE]; - - switch(ReduceTypeTable[targetq_num][nestq_num]) { - case RQ_ASIS: - break; - - case RQ_DEL: - if (onig_verb_warn != onig_null_warn) { - onig_snprintf_with_pattern(buf, WARN_BUFSIZE, env->enc, - env->pattern, env->pattern_end, - (UChar* )"redundant nested repeat operator"); - (*onig_verb_warn)((char* )buf); - } - goto warn_exit; - break; - - default: - if (onig_verb_warn != onig_null_warn) { - onig_snprintf_with_pattern(buf, WARN_BUFSIZE, env->enc, - env->pattern, env->pattern_end, - (UChar* )"nested repeat operator %s and %s was replaced with '%s'", - PopularQStr[targetq_num], PopularQStr[nestq_num], - ReduceQStr[ReduceTypeTable[targetq_num][nestq_num]]); - (*onig_verb_warn)((char* )buf); - } - goto warn_exit; - break; - } - } - - warn_exit: -#endif - if (targetq_num >= 0) { - if (nestq_num >= 0) { - onig_reduce_nested_quantifier(qnode, target); - goto q_exit; - } - else if (targetq_num == 1 || targetq_num == 2) { /* * or + */ - /* (?:a*){n,m}, (?:a+){n,m} => (?:a*){n,n}, (?:a+){n,n} */ - if (! IS_REPEAT_INFINITE(qn->upper) && qn->upper > 1 && qn->greedy) { - qn->upper = (qn->lower == 0 ? 1 : qn->lower); - } - } - } - } - break; - - default: - break; - } - - qn->target = target; - q_exit: - return 0; -} - - -#ifdef USE_SHARED_CCLASS_TABLE - -#define THRESHOLD_RANGE_NUM_FOR_SHARE_CCLASS 8 - -/* for ctype node hash table */ - -typedef struct { - OnigEncoding enc; - int not; - int type; -} type_cclass_key; - -static int type_cclass_cmp(type_cclass_key* x, type_cclass_key* y) -{ - if (x->type != y->type) return 1; - if (x->enc != y->enc) return 1; - if (x->not != y->not) return 1; - return 0; -} - -static int type_cclass_hash(type_cclass_key* key) -{ - int i, val; - UChar *p; - - val = 0; - - p = (UChar* )&(key->enc); - for (i = 0; i < (int )sizeof(key->enc); i++) { - val = val * 997 + (int )*p++; - } - - p = (UChar* )(&key->type); - for (i = 0; i < (int )sizeof(key->type); i++) { - val = val * 997 + (int )*p++; - } - - val += key->not; - return val + (val >> 5); -} - -static struct st_hash_type type_type_cclass_hash = { - type_cclass_cmp, - type_cclass_hash, -}; - -static st_table* OnigTypeCClassTable; - - -static int -i_free_shared_class(type_cclass_key* key, Node* node, void* arg ARG_UNUSED) -{ - if (IS_NOT_NULL(node)) { - CClassNode* cc = NCCLASS(node); - if (IS_NOT_NULL(cc->mbuf)) xfree(cc->mbuf); - xfree(node); - } - - if (IS_NOT_NULL(key)) xfree(key); - return ST_DELETE; -} - -extern int -onig_free_shared_cclass_table(void) -{ - if (IS_NOT_NULL(OnigTypeCClassTable)) { - onig_st_foreach(OnigTypeCClassTable, i_free_shared_class, 0); - onig_st_free_table(OnigTypeCClassTable); - OnigTypeCClassTable = NULL; - } - - return 0; -} - -#endif /* USE_SHARED_CCLASS_TABLE */ - - -#ifndef CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS -static int -clear_not_flag_cclass(CClassNode* cc, OnigEncoding enc) -{ - BBuf *tbuf; - int r; - - if (IS_NCCLASS_NOT(cc)) { - bitset_invert(cc->bs); - - if (! ONIGENC_IS_SINGLEBYTE(enc)) { - r = not_code_range_buf(enc, cc->mbuf, &tbuf); - if (r != 0) return r; - - bbuf_free(cc->mbuf); - cc->mbuf = tbuf; - } - - NCCLASS_CLEAR_NOT(cc); - } - - return 0; -} -#endif /* CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS */ - -typedef struct { - ScanEnv* env; - CClassNode* cc; - Node* alt_root; - Node** ptail; -} IApplyCaseFoldArg; - -static int -i_apply_case_fold(OnigCodePoint from, OnigCodePoint to[], - int to_len, void* arg) -{ - IApplyCaseFoldArg* iarg; - ScanEnv* env; - CClassNode* cc; - BitSetRef bs; - - iarg = (IApplyCaseFoldArg* )arg; - env = iarg->env; - cc = iarg->cc; - bs = cc->bs; - - if (to_len == 1) { - int is_in = onig_is_code_in_cc(env->enc, from, cc); -#ifdef CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS - if ((is_in != 0 && !IS_NCCLASS_NOT(cc)) || - (is_in == 0 && IS_NCCLASS_NOT(cc))) { - if (ONIGENC_MBC_MINLEN(env->enc) > 1 || *to >= SINGLE_BYTE_SIZE) { - add_code_range(&(cc->mbuf), env, *to, *to); - } - else { - BITSET_SET_BIT(bs, *to); - } - } -#else - if (is_in != 0) { - if (ONIGENC_MBC_MINLEN(env->enc) > 1 || *to >= SINGLE_BYTE_SIZE) { - if (IS_NCCLASS_NOT(cc)) clear_not_flag_cclass(cc, env->enc); - add_code_range(&(cc->mbuf), env, *to, *to); - } - else { - if (IS_NCCLASS_NOT(cc)) { - BITSET_CLEAR_BIT(bs, *to); - } - else - BITSET_SET_BIT(bs, *to); - } - } -#endif /* CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS */ - } - else { - int r, i, len; - UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN]; - Node *snode = NULL_NODE; - - if (onig_is_code_in_cc(env->enc, from, cc) -#ifdef CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS - && !IS_NCCLASS_NOT(cc) -#endif - ) { - for (i = 0; i < to_len; i++) { - len = ONIGENC_CODE_TO_MBC(env->enc, to[i], buf); - if (i == 0) { - snode = onig_node_new_str(buf, buf + len); - CHECK_NULL_RETURN_MEMERR(snode); - - /* char-class expanded multi-char only - compare with string folded at match time. */ - NSTRING_SET_AMBIG(snode); - } - else { - r = onig_node_str_cat(snode, buf, buf + len); - if (r < 0) { - onig_node_free(snode); - return r; - } - } - } - - *(iarg->ptail) = onig_node_new_alt(snode, NULL_NODE); - CHECK_NULL_RETURN_MEMERR(*(iarg->ptail)); - iarg->ptail = &(NCDR((*(iarg->ptail)))); - } - } - - return 0; -} - -static int -parse_exp(Node** np, OnigToken* tok, int term, - UChar** src, UChar* end, ScanEnv* env) -{ - int r, len, group = 0; - Node* qn; - Node** targetp; - - *np = NULL; - if (tok->type == (enum TokenSyms )term) - goto end_of_token; - - switch (tok->type) { - case TK_ALT: - case TK_EOT: - end_of_token: - *np = node_new_empty(); - return tok->type; - break; - - case TK_SUBEXP_OPEN: - r = parse_enclose(np, tok, TK_SUBEXP_CLOSE, src, end, env); - if (r < 0) return r; - if (r == 1) group = 1; - else if (r == 2) { /* option only */ - Node* target; - OnigOptionType prev = env->option; - - env->option = NENCLOSE(*np)->option; - r = fetch_token(tok, src, end, env); - if (r < 0) return r; - r = parse_subexp(&target, tok, term, src, end, env); - env->option = prev; - if (r < 0) return r; - NENCLOSE(*np)->target = target; - return tok->type; - } - break; - - case TK_SUBEXP_CLOSE: - if (! IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_UNMATCHED_CLOSE_SUBEXP)) - return ONIGERR_UNMATCHED_CLOSE_PARENTHESIS; - - if (tok->escaped) goto tk_raw_byte; - else goto tk_byte; - break; - - case TK_STRING: - tk_byte: - { - *np = node_new_str(tok->backp, *src); - CHECK_NULL_RETURN_MEMERR(*np); - - while (1) { - r = fetch_token(tok, src, end, env); - if (r < 0) return r; - if (r != TK_STRING) break; - - r = onig_node_str_cat(*np, tok->backp, *src); - if (r < 0) return r; - } - - string_end: - targetp = np; - goto repeat; - } - break; - - case TK_RAW_BYTE: - tk_raw_byte: - { - *np = node_new_str_raw_char((UChar )tok->u.c); - CHECK_NULL_RETURN_MEMERR(*np); - len = 1; - while (1) { - if (len >= ONIGENC_MBC_MINLEN(env->enc)) { - if (len == enclen(env->enc, NSTR(*np)->s)) { - r = fetch_token(tok, src, end, env); - NSTRING_CLEAR_RAW(*np); - goto string_end; - } - } - - r = fetch_token(tok, src, end, env); - if (r < 0) return r; - if (r != TK_RAW_BYTE) { - /* Don't use this, it is wrong for little endian encodings. */ -#ifdef USE_PAD_TO_SHORT_BYTE_CHAR - int rem; - if (len < ONIGENC_MBC_MINLEN(env->enc)) { - rem = ONIGENC_MBC_MINLEN(env->enc) - len; - (void )node_str_head_pad(NSTR(*np), rem, (UChar )0); - if (len + rem == enclen(env->enc, NSTR(*np)->s)) { - NSTRING_CLEAR_RAW(*np); - goto string_end; - } - } -#endif - return ONIGERR_TOO_SHORT_MULTI_BYTE_STRING; - } - - r = node_str_cat_char(*np, (UChar )tok->u.c); - if (r < 0) return r; - - len++; - } - } - break; - - case TK_CODE_POINT: - { - UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN]; - int num = ONIGENC_CODE_TO_MBC(env->enc, tok->u.code, buf); - if (num < 0) return num; -#ifdef NUMBERED_CHAR_IS_NOT_CASE_AMBIG - *np = node_new_str_raw(buf, buf + num); -#else - *np = node_new_str(buf, buf + num); -#endif - CHECK_NULL_RETURN_MEMERR(*np); - } - break; - - case TK_QUOTE_OPEN: - { - OnigCodePoint end_op[2]; - UChar *qstart, *qend, *nextp; - - end_op[0] = (OnigCodePoint )MC_ESC(env->syntax); - end_op[1] = (OnigCodePoint )'E'; - qstart = *src; - qend = find_str_position(end_op, 2, qstart, end, &nextp, env->enc); - if (IS_NULL(qend)) { - nextp = qend = end; - } - *np = node_new_str(qstart, qend); - CHECK_NULL_RETURN_MEMERR(*np); - *src = nextp; - } - break; - - case TK_CHAR_TYPE: - { - switch (tok->u.prop.ctype) { - case ONIGENC_CTYPE_WORD: - *np = node_new_ctype(tok->u.prop.ctype, tok->u.prop.not); - CHECK_NULL_RETURN_MEMERR(*np); - break; - - case ONIGENC_CTYPE_SPACE: - case ONIGENC_CTYPE_DIGIT: - case ONIGENC_CTYPE_XDIGIT: - { - CClassNode* cc; - -#ifdef USE_SHARED_CCLASS_TABLE - const OnigCodePoint *mbr; - OnigCodePoint sb_out; - - r = ONIGENC_GET_CTYPE_CODE_RANGE(env->enc, tok->u.prop.ctype, - &sb_out, &mbr); - if (r == 0 && - ONIGENC_CODE_RANGE_NUM(mbr) - >= THRESHOLD_RANGE_NUM_FOR_SHARE_CCLASS) { - type_cclass_key key; - type_cclass_key* new_key; - - key.enc = env->enc; - key.not = tok->u.prop.not; - key.type = tok->u.prop.ctype; - - THREAD_ATOMIC_START; - - if (IS_NULL(OnigTypeCClassTable)) { - OnigTypeCClassTable - = onig_st_init_table_with_size(&type_type_cclass_hash, 10); - if (IS_NULL(OnigTypeCClassTable)) { - THREAD_ATOMIC_END; - return ONIGERR_MEMORY; - } - } - else { - if (onig_st_lookup(OnigTypeCClassTable, (st_data_t )&key, - (st_data_t* )np)) { - THREAD_ATOMIC_END; - break; - } - } - - *np = node_new_cclass_by_codepoint_range(tok->u.prop.not, - sb_out, mbr); - if (IS_NULL(*np)) { - THREAD_ATOMIC_END; - return ONIGERR_MEMORY; - } - - cc = NCCLASS(*np); - NCCLASS_SET_SHARE(cc); - new_key = (type_cclass_key* )xmalloc(sizeof(type_cclass_key)); - xmemcpy(new_key, &key, sizeof(type_cclass_key)); - onig_st_add_direct(OnigTypeCClassTable, (st_data_t )new_key, - (st_data_t )*np); - - THREAD_ATOMIC_END; - } - else { -#endif - *np = node_new_cclass(); - CHECK_NULL_RETURN_MEMERR(*np); - cc = NCCLASS(*np); - add_ctype_to_cc(cc, tok->u.prop.ctype, 0, env); - if (tok->u.prop.not != 0) NCCLASS_SET_NOT(cc); -#ifdef USE_SHARED_CCLASS_TABLE - } -#endif - } - break; - - default: - return ONIGERR_PARSER_BUG; - break; - } - } - break; - - case TK_CHAR_PROPERTY: - r = parse_char_property(np, tok, src, end, env); - if (r != 0) return r; - break; - - case TK_CC_OPEN: - { - CClassNode* cc; - - r = parse_char_class(np, tok, src, end, env); - if (r != 0) return r; - - cc = NCCLASS(*np); - if (IS_IGNORECASE(env->option)) { - IApplyCaseFoldArg iarg; - - iarg.env = env; - iarg.cc = cc; - iarg.alt_root = NULL_NODE; - iarg.ptail = &(iarg.alt_root); - - r = ONIGENC_APPLY_ALL_CASE_FOLD(env->enc, env->case_fold_flag, - i_apply_case_fold, &iarg); - if (r != 0) { - onig_node_free(iarg.alt_root); - return r; - } - if (IS_NOT_NULL(iarg.alt_root)) { - Node* work = onig_node_new_alt(*np, iarg.alt_root); - if (IS_NULL(work)) { - onig_node_free(iarg.alt_root); - return ONIGERR_MEMORY; - } - *np = work; - } - } - } - break; - - case TK_ANYCHAR: - *np = node_new_anychar(); - CHECK_NULL_RETURN_MEMERR(*np); - break; - - case TK_ANYCHAR_ANYTIME: - *np = node_new_anychar(); - CHECK_NULL_RETURN_MEMERR(*np); - qn = node_new_quantifier(0, REPEAT_INFINITE, 0); - CHECK_NULL_RETURN_MEMERR(qn); - NQTFR(qn)->target = *np; - *np = qn; - break; - - case TK_BACKREF: - len = tok->u.backref.num; - *np = node_new_backref(len, - (len > 1 ? tok->u.backref.refs : &(tok->u.backref.ref1)), - tok->u.backref.by_name, -#ifdef USE_BACKREF_WITH_LEVEL - tok->u.backref.exist_level, - tok->u.backref.level, -#endif - env); - CHECK_NULL_RETURN_MEMERR(*np); - break; - -#ifdef USE_SUBEXP_CALL - case TK_CALL: - { - int gnum = tok->u.call.gnum; - - if (gnum < 0) { - gnum = BACKREF_REL_TO_ABS(gnum, env); - if (gnum <= 0) - return ONIGERR_INVALID_BACKREF; - } - *np = node_new_call(tok->u.call.name, tok->u.call.name_end, gnum); - CHECK_NULL_RETURN_MEMERR(*np); - env->num_call++; - } - break; -#endif - - case TK_ANCHOR: - *np = onig_node_new_anchor(tok->u.anchor); - break; - - case TK_OP_REPEAT: - case TK_INTERVAL: - if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS)) { - if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS)) - return ONIGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED; - else - *np = node_new_empty(); - } - else { - goto tk_byte; - } - break; - - default: - return ONIGERR_PARSER_BUG; - break; - } - - { - targetp = np; - - re_entry: - r = fetch_token(tok, src, end, env); - if (r < 0) return r; - - repeat: - if (r == TK_OP_REPEAT || r == TK_INTERVAL) { - if (is_invalid_quantifier_target(*targetp)) - return ONIGERR_TARGET_OF_REPEAT_OPERATOR_INVALID; - - qn = node_new_quantifier(tok->u.repeat.lower, tok->u.repeat.upper, - (r == TK_INTERVAL ? 1 : 0)); - CHECK_NULL_RETURN_MEMERR(qn); - NQTFR(qn)->greedy = tok->u.repeat.greedy; - r = set_quantifier(qn, *targetp, group, env); - if (r < 0) { - onig_node_free(qn); - return r; - } - - if (tok->u.repeat.possessive != 0) { - Node* en; - en = node_new_enclose(ENCLOSE_STOP_BACKTRACK); - if (IS_NULL(en)) { - onig_node_free(qn); - return ONIGERR_MEMORY; - } - NENCLOSE(en)->target = qn; - qn = en; - } - - if (r == 0) { - *targetp = qn; - } - else if (r == 1) { - onig_node_free(qn); - } - else if (r == 2) { /* split case: /abc+/ */ - Node *tmp; - - *targetp = node_new_list(*targetp, NULL); - if (IS_NULL(*targetp)) { - onig_node_free(qn); - return ONIGERR_MEMORY; - } - tmp = NCDR(*targetp) = node_new_list(qn, NULL); - if (IS_NULL(tmp)) { - onig_node_free(qn); - return ONIGERR_MEMORY; - } - targetp = &(NCAR(tmp)); - } - goto re_entry; - } - } - - return r; -} - -static int -parse_branch(Node** top, OnigToken* tok, int term, - UChar** src, UChar* end, ScanEnv* env) -{ - int r; - Node *node, **headp; - - *top = NULL; - r = parse_exp(&node, tok, term, src, end, env); - if (r < 0) return r; - - if (r == TK_EOT || r == term || r == TK_ALT) { - *top = node; - } - else { - *top = node_new_list(node, NULL); - headp = &(NCDR(*top)); - while (r != TK_EOT && r != term && r != TK_ALT) { - r = parse_exp(&node, tok, term, src, end, env); - if (r < 0) return r; - - if (NTYPE(node) == NT_LIST) { - *headp = node; - while (IS_NOT_NULL(NCDR(node))) node = NCDR(node); - headp = &(NCDR(node)); - } - else { - *headp = node_new_list(node, NULL); - headp = &(NCDR(*headp)); - } - } - } - - return r; -} - -/* term_tok: TK_EOT or TK_SUBEXP_CLOSE */ -static int -parse_subexp(Node** top, OnigToken* tok, int term, - UChar** src, UChar* end, ScanEnv* env) -{ - int r; - Node *node, **headp; - - *top = NULL; - r = parse_branch(&node, tok, term, src, end, env); - if (r < 0) { - onig_node_free(node); - return r; - } - - if (r == term) { - *top = node; - } - else if (r == TK_ALT) { - *top = onig_node_new_alt(node, NULL); - headp = &(NCDR(*top)); - while (r == TK_ALT) { - r = fetch_token(tok, src, end, env); - if (r < 0) return r; - r = parse_branch(&node, tok, term, src, end, env); - if (r < 0) return r; - - *headp = onig_node_new_alt(node, NULL); - headp = &(NCDR(*headp)); - } - - if (tok->type != (enum TokenSyms )term) - goto err; - } - else { - err: - if (term == TK_SUBEXP_CLOSE) - return ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS; - else - return ONIGERR_PARSER_BUG; - } - - return r; -} - -static int -parse_regexp(Node** top, UChar** src, UChar* end, ScanEnv* env) -{ - int r; - OnigToken tok; - - r = fetch_token(&tok, src, end, env); - if (r < 0) return r; - r = parse_subexp(top, &tok, TK_EOT, src, end, env); - if (r < 0) return r; - return 0; -} - -extern int -onig_parse_make_tree(Node** root, const UChar* pattern, const UChar* end, - regex_t* reg, ScanEnv* env) -{ - int r; - UChar* p; - -#ifdef USE_NAMED_GROUP - names_clear(reg); -#endif - - scan_env_clear(env); - env->option = reg->options; - env->case_fold_flag = reg->case_fold_flag; - env->enc = reg->enc; - env->syntax = reg->syntax; - env->pattern = (UChar* )pattern; - env->pattern_end = (UChar* )end; - env->reg = reg; - - *root = NULL; - p = (UChar* )pattern; - r = parse_regexp(root, &p, (UChar* )end, env); - reg->num_mem = env->num_mem; - return r; -} - -extern void -onig_scan_env_set_error_string(ScanEnv* env, int ecode ARG_UNUSED, - UChar* arg, UChar* arg_end) -{ - env->error = arg; - env->error_end = arg_end; -} diff --git a/MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/regparse.h b/MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/regparse.h deleted file mode 100644 index d5edd6e698..0000000000 --- a/MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/regparse.h +++ /dev/null @@ -1,353 +0,0 @@ -#ifndef REGPARSE_H -#define REGPARSE_H -/********************************************************************** - regparse.h - Oniguruma (regular expression library) -**********************************************************************/ -/*- - * Copyright (c) 2002-2007 K.Kosako - * All rights reserved. - * - * Copyright (c) 2015, Hewlett Packard Enterprise Development, L.P.
- * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include "regint.h" - -/* node type */ -#define NT_STR 0 -#define NT_CCLASS 1 -#define NT_CTYPE 2 -#define NT_CANY 3 -#define NT_BREF 4 -#define NT_QTFR 5 -#define NT_ENCLOSE 6 -#define NT_ANCHOR 7 -#define NT_LIST 8 -#define NT_ALT 9 -#define NT_CALL 10 - -/* node type bit */ -#define NTYPE2BIT(type) (1<<(type)) - -#define BIT_NT_STR NTYPE2BIT(NT_STR) -#define BIT_NT_CCLASS NTYPE2BIT(NT_CCLASS) -#define BIT_NT_CTYPE NTYPE2BIT(NT_CTYPE) -#define BIT_NT_CANY NTYPE2BIT(NT_CANY) -#define BIT_NT_BREF NTYPE2BIT(NT_BREF) -#define BIT_NT_QTFR NTYPE2BIT(NT_QTFR) -#define BIT_NT_ENCLOSE NTYPE2BIT(NT_ENCLOSE) -#define BIT_NT_ANCHOR NTYPE2BIT(NT_ANCHOR) -#define BIT_NT_LIST NTYPE2BIT(NT_LIST) -#define BIT_NT_ALT NTYPE2BIT(NT_ALT) -#define BIT_NT_CALL NTYPE2BIT(NT_CALL) - -#define IS_NODE_TYPE_SIMPLE(type) \ - ((NTYPE2BIT(type) & (BIT_NT_STR | BIT_NT_CCLASS | BIT_NT_CTYPE |\ - BIT_NT_CANY | BIT_NT_BREF)) != 0) - -#define NTYPE(node) ((node)->u.base.type) -#define SET_NTYPE(node, ntype) (node)->u.base.type = (ntype) - -#define NSTR(node) (&((node)->u.str)) -#define NCCLASS(node) (&((node)->u.cclass)) -#define NCTYPE(node) (&((node)->u.ctype)) -#define NBREF(node) (&((node)->u.bref)) -#define NQTFR(node) (&((node)->u.qtfr)) -#define NENCLOSE(node) (&((node)->u.enclose)) -#define NANCHOR(node) (&((node)->u.anchor)) -#define NCONS(node) (&((node)->u.cons)) -#define NCALL(node) (&((node)->u.call)) - -#define NCAR(node) (NCONS(node)->car) -#define NCDR(node) (NCONS(node)->cdr) - - - -#define ANCHOR_ANYCHAR_STAR_MASK (ANCHOR_ANYCHAR_STAR | ANCHOR_ANYCHAR_STAR_ML) -#define ANCHOR_END_BUF_MASK (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF) - -#define ENCLOSE_MEMORY (1<<0) -#define ENCLOSE_OPTION (1<<1) -#define ENCLOSE_STOP_BACKTRACK (1<<2) - -#define NODE_STR_MARGIN 16 -#define NODE_STR_BUF_SIZE 24 /* sizeof(CClassNode) - sizeof(int)*4 */ -#define NODE_BACKREFS_SIZE 6 - -#define NSTR_RAW (1<<0) /* by backslashed number */ -#define NSTR_AMBIG (1<<1) -#define NSTR_DONT_GET_OPT_INFO (1<<2) - -#define NSTRING_LEN(node) ((int)((node)->u.str.end - (node)->u.str.s)) -#define NSTRING_SET_RAW(node) (node)->u.str.flag |= NSTR_RAW -#define NSTRING_CLEAR_RAW(node) (node)->u.str.flag &= ~NSTR_RAW -#define NSTRING_SET_AMBIG(node) (node)->u.str.flag |= NSTR_AMBIG -#define NSTRING_SET_DONT_GET_OPT_INFO(node) \ - (node)->u.str.flag |= NSTR_DONT_GET_OPT_INFO -#define NSTRING_IS_RAW(node) (((node)->u.str.flag & NSTR_RAW) != 0) -#define NSTRING_IS_AMBIG(node) (((node)->u.str.flag & NSTR_AMBIG) != 0) -#define NSTRING_IS_DONT_GET_OPT_INFO(node) \ - (((node)->u.str.flag & NSTR_DONT_GET_OPT_INFO) != 0) - -#define BACKREFS_P(br) \ - (IS_NOT_NULL((br)->back_dynamic) ? (br)->back_dynamic : (br)->back_static); - -#define NQ_TARGET_ISNOT_EMPTY 0 -#define NQ_TARGET_IS_EMPTY 1 -#define NQ_TARGET_IS_EMPTY_MEM 2 -#define NQ_TARGET_IS_EMPTY_REC 3 - -/* status bits */ -#define NST_MIN_FIXED (1<<0) -#define NST_MAX_FIXED (1<<1) -#define NST_CLEN_FIXED (1<<2) -#define NST_MARK1 (1<<3) -#define NST_MARK2 (1<<4) -#define NST_MEM_BACKREFED (1<<5) -#define NST_STOP_BT_SIMPLE_REPEAT (1<<6) -#define NST_RECURSION (1<<7) -#define NST_CALLED (1<<8) -#define NST_ADDR_FIXED (1<<9) -#define NST_NAMED_GROUP (1<<10) -#define NST_NAME_REF (1<<11) -#define NST_IN_REPEAT (1<<12) /* STK_REPEAT is nested in stack. */ -#define NST_NEST_LEVEL (1<<13) -#define NST_BY_NUMBER (1<<14) /* {n,m} */ - -#define SET_ENCLOSE_STATUS(node,f) (node)->u.enclose.state |= (f) -#define CLEAR_ENCLOSE_STATUS(node,f) (node)->u.enclose.state &= ~(f) - -#define IS_ENCLOSE_CALLED(en) (((en)->state & NST_CALLED) != 0) -#define IS_ENCLOSE_ADDR_FIXED(en) (((en)->state & NST_ADDR_FIXED) != 0) -#define IS_ENCLOSE_RECURSION(en) (((en)->state & NST_RECURSION) != 0) -#define IS_ENCLOSE_MARK1(en) (((en)->state & NST_MARK1) != 0) -#define IS_ENCLOSE_MARK2(en) (((en)->state & NST_MARK2) != 0) -#define IS_ENCLOSE_MIN_FIXED(en) (((en)->state & NST_MIN_FIXED) != 0) -#define IS_ENCLOSE_MAX_FIXED(en) (((en)->state & NST_MAX_FIXED) != 0) -#define IS_ENCLOSE_CLEN_FIXED(en) (((en)->state & NST_CLEN_FIXED) != 0) -#define IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(en) \ - (((en)->state & NST_STOP_BT_SIMPLE_REPEAT) != 0) -#define IS_ENCLOSE_NAMED_GROUP(en) (((en)->state & NST_NAMED_GROUP) != 0) - -#define SET_CALL_RECURSION(node) (node)->u.call.state |= NST_RECURSION -#define IS_CALL_RECURSION(cn) (((cn)->state & NST_RECURSION) != 0) -#define IS_CALL_NAME_REF(cn) (((cn)->state & NST_NAME_REF) != 0) -#define IS_BACKREF_NAME_REF(bn) (((bn)->state & NST_NAME_REF) != 0) -#define IS_BACKREF_NEST_LEVEL(bn) (((bn)->state & NST_NEST_LEVEL) != 0) -#define IS_QUANTIFIER_IN_REPEAT(qn) (((qn)->state & NST_IN_REPEAT) != 0) -#define IS_QUANTIFIER_BY_NUMBER(qn) (((qn)->state & NST_BY_NUMBER) != 0) - -#define CALLNODE_REFNUM_UNDEF -1 - -typedef struct { - NodeBase base; - UChar* s; - UChar* end; - unsigned int flag; - int capa; /* (allocated size - 1) or 0: use buf[] */ - UChar buf[NODE_STR_BUF_SIZE]; -} StrNode; - -typedef struct { - NodeBase base; - int state; - struct _Node* target; - int lower; - int upper; - int greedy; - int target_empty_info; - struct _Node* head_exact; - struct _Node* next_head_exact; - int is_refered; /* include called node. don't eliminate even if {0} */ -#ifdef USE_COMBINATION_EXPLOSION_CHECK - int comb_exp_check_num; /* 1,2,3...: check, 0: no check */ -#endif -} QtfrNode; - -typedef struct { - NodeBase base; - int state; - int type; - int regnum; - OnigOptionType option; - struct _Node* target; - AbsAddrType call_addr; - /* for multiple call reference */ - OnigDistance min_len; /* min length (byte) */ - OnigDistance max_len; /* max length (byte) */ - int char_len; /* character length */ - int opt_count; /* referenced count in optimize_node_left() */ -} EncloseNode; - -#ifdef USE_SUBEXP_CALL - -typedef struct { - int offset; - struct _Node* target; -} UnsetAddr; - -typedef struct { - int num; - int alloc; - UnsetAddr* us; -} UnsetAddrList; - -typedef struct { - NodeBase base; - int state; - int group_num; - UChar* name; - UChar* name_end; - struct _Node* target; /* EncloseNode : ENCLOSE_MEMORY */ - UnsetAddrList* unset_addr_list; -} CallNode; - -#endif - -typedef struct { - NodeBase base; - int state; - int back_num; - int back_static[NODE_BACKREFS_SIZE]; - int* back_dynamic; - int nest_level; -} BRefNode; - -typedef struct { - NodeBase base; - int type; - struct _Node* target; - int char_len; -} AnchorNode; - -typedef struct { - NodeBase base; - struct _Node* car; - struct _Node* cdr; -} ConsAltNode; - -typedef struct { - NodeBase base; - int ctype; - int not; -} CtypeNode; - -typedef struct _Node { - union { - NodeBase base; - StrNode str; - CClassNode cclass; - QtfrNode qtfr; - EncloseNode enclose; - BRefNode bref; - AnchorNode anchor; - ConsAltNode cons; - CtypeNode ctype; -#ifdef USE_SUBEXP_CALL - CallNode call; -#endif - } u; -} Node; - - -#define NULL_NODE ((Node* )0) - -#define SCANENV_MEMNODES_SIZE 8 -#define SCANENV_MEM_NODES(senv) \ - (IS_NOT_NULL((senv)->mem_nodes_dynamic) ? \ - (senv)->mem_nodes_dynamic : (senv)->mem_nodes_static) - -typedef struct { - OnigOptionType option; - OnigCaseFoldType case_fold_flag; - OnigEncoding enc; - OnigSyntaxType* syntax; - BitStatusType capture_history; - BitStatusType bt_mem_start; - BitStatusType bt_mem_end; - BitStatusType backrefed_mem; - UChar* pattern; - UChar* pattern_end; - UChar* error; - UChar* error_end; - regex_t* reg; /* for reg->names only */ - int num_call; -#ifdef USE_SUBEXP_CALL - UnsetAddrList* unset_addr_list; -#endif - int num_mem; -#ifdef USE_NAMED_GROUP - int num_named; -#endif - int mem_alloc; - Node* mem_nodes_static[SCANENV_MEMNODES_SIZE]; - Node** mem_nodes_dynamic; -#ifdef USE_COMBINATION_EXPLOSION_CHECK - int num_comb_exp_check; - int comb_exp_max_regnum; - int curr_max_regnum; - int has_recursion; -#endif -} ScanEnv; - - -#define IS_SYNTAX_OP(syn, opm) (((syn)->op & (opm)) != 0) -#define IS_SYNTAX_OP2(syn, opm) (((syn)->op2 & (opm)) != 0) -#define IS_SYNTAX_BV(syn, bvm) (((syn)->behavior & (bvm)) != 0) - -#ifdef USE_NAMED_GROUP -typedef struct { - int new_val; -} GroupNumRemap; - -extern int onig_renumber_name_table P_((regex_t* reg, GroupNumRemap* map)); -#endif - -extern int onig_strncmp P_((const UChar* s1, const UChar* s2, int n)); -extern void onig_strcpy P_((UChar* dest, const UChar* src, const UChar* end)); -extern void onig_scan_env_set_error_string P_((ScanEnv* env, int ecode, UChar* arg, UChar* arg_end)); -extern int onig_scan_unsigned_number P_((UChar** src, const UChar* end, OnigEncoding enc)); -extern void onig_reduce_nested_quantifier P_((Node* pnode, Node* cnode)); -extern void onig_node_conv_to_str_node P_((Node* node, int raw)); -extern int onig_node_str_cat P_((Node* node, const UChar* s, const UChar* end)); -extern int onig_node_str_set P_((Node* node, const UChar* s, const UChar* end)); -extern void onig_node_free P_((Node* node)); -extern Node* onig_node_new_enclose P_((int type)); -extern Node* onig_node_new_anchor P_((int type)); -extern Node* onig_node_new_str P_((const UChar* s, const UChar* end)); -extern Node* onig_node_new_list P_((Node* left, Node* right)); -extern Node* onig_node_list_add P_((Node* list, Node* x)); -extern Node* onig_node_new_alt P_((Node* left, Node* right)); -extern void onig_node_str_clear P_((Node* node)); -extern int onig_free_node_list P_((void)); -extern int onig_names_free P_((regex_t* reg)); -extern int onig_parse_make_tree P_((Node** root, const UChar* pattern, const UChar* end, regex_t* reg, ScanEnv* env)); -extern int onig_free_shared_cclass_table P_((void)); - -#ifdef ONIG_DEBUG -#ifdef USE_NAMED_GROUP -extern int onig_print_names(FILE*, regex_t*); -#endif -#endif - -#endif /* REGPARSE_H */ diff --git a/MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/regposerr.c b/MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/regposerr.c deleted file mode 100644 index e5ff3f5b40..0000000000 --- a/MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/regposerr.c +++ /dev/null @@ -1,102 +0,0 @@ -/********************************************************************** - regposerr.c - Oniguruma (regular expression library) -**********************************************************************/ -/*- - * Copyright (c) 2002-2007 K.Kosako - * All rights reserved. - * - * Copyright (c) 2015, Hewlett Packard Enterprise Development, L.P.
- * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -/*#include "config.h"*/ -#include "onigposix.h" - -#if 0 -#ifdef HAVE_STRING_H -# include -#else -# include -#endif -#endif - -#if defined(__GNUC__) -# define ARG_UNUSED __attribute__ ((unused)) -#else -# define ARG_UNUSED -#endif - -static char* ESTRING[] = { - NULL, - "failed to match", /* REG_NOMATCH */ - "Invalid regular expression", /* REG_BADPAT */ - "invalid collating element referenced", /* REG_ECOLLATE */ - "invalid character class type referenced", /* REG_ECTYPE */ - "bad backslash-escape sequence", /* REG_EESCAPE */ - "invalid back reference number", /* REG_ESUBREG */ - "imbalanced [ and ]", /* REG_EBRACK */ - "imbalanced ( and )", /* REG_EPAREN */ - "imbalanced { and }", /* REG_EBRACE */ - "invalid repeat range {n,m}", /* REG_BADBR */ - "invalid range", /* REG_ERANGE */ - "Out of memory", /* REG_ESPACE */ - "? * + not preceded by valid regular expression", /* REG_BADRPT */ - - /* Extended errors */ - "internal error", /* REG_EONIG_INTERNAL */ - "invalid wide char value", /* REG_EONIG_BADWC */ - "invalid argument", /* REG_EONIG_BADARG */ - "multi-thread error" /* REG_EONIG_THREAD */ -}; - -//#include - - -extern size_t -regerror(int posix_ecode, const regex_t* reg ARG_UNUSED, char* buf, - size_t size) -{ - char* s; - char tbuf[35]; - size_t len; - - if (posix_ecode > 0 - && posix_ecode < (int )(sizeof(ESTRING) / sizeof(ESTRING[0]))) { - s = ESTRING[posix_ecode]; - } - else if (posix_ecode == 0) { - s = ""; - } - else { - sprintf(tbuf, "undefined error code (%d)", posix_ecode); - s = tbuf; - } - - len = strlen_s(s, MAX_STRING_SIZE) + 1; /* use strlen() because s is ascii encoding. */ - - if (buf != NULL && size > 0) { - strncpy_s(buf, size, s, size - 1); - buf[size - 1] = '\0'; - } - return len; -} diff --git a/MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/regposix.c b/MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/regposix.c deleted file mode 100644 index adc9b0affd..0000000000 --- a/MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/regposix.c +++ /dev/null @@ -1,305 +0,0 @@ -/********************************************************************** - regposix.c - Oniguruma (regular expression library) -**********************************************************************/ -/*- - * Copyright (c) 2002-2008 K.Kosako - * All rights reserved. - * - * Copyright (c) 2015, Hewlett Packard Enterprise Development, L.P.
- * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#define regex_t onig_regex_t -#include "regint.h" -#undef regex_t -#include "onigposix.h" - -#define ONIG_C(reg) ((onig_regex_t* )((reg)->onig)) -#define PONIG_C(reg) ((onig_regex_t** )(&(reg)->onig)) - -/* #define ENC_STRING_LEN(enc,s,len) len = strlen(s) */ -#define ENC_STRING_LEN(enc,s,len) do { \ - if (ONIGENC_MBC_MINLEN(enc) == 1) { \ - UChar* tmps = (UChar* )(s); \ - while (*tmps != 0) tmps++; \ - len = (int)(tmps - (UChar* )(s)); \ - } \ - else { \ - len = onigenc_str_bytelen_null(enc, (UChar* )s); \ - } \ -} while(0) - -typedef struct { - int onig_err; - int posix_err; -} O2PERR; - -static int -onig2posix_error_code(int code) -{ - static const O2PERR o2p[] = { - { ONIG_MISMATCH, REG_NOMATCH }, - { ONIG_NO_SUPPORT_CONFIG, REG_EONIG_INTERNAL }, - { ONIGERR_MEMORY, REG_ESPACE }, - { ONIGERR_MATCH_STACK_LIMIT_OVER, REG_EONIG_INTERNAL }, - { ONIGERR_TYPE_BUG, REG_EONIG_INTERNAL }, - { ONIGERR_PARSER_BUG, REG_EONIG_INTERNAL }, - { ONIGERR_STACK_BUG, REG_EONIG_INTERNAL }, - { ONIGERR_UNDEFINED_BYTECODE, REG_EONIG_INTERNAL }, - { ONIGERR_UNEXPECTED_BYTECODE, REG_EONIG_INTERNAL }, - { ONIGERR_DEFAULT_ENCODING_IS_NOT_SETTED, REG_EONIG_BADARG }, - { ONIGERR_SPECIFIED_ENCODING_CANT_CONVERT_TO_WIDE_CHAR, REG_EONIG_BADARG }, - { ONIGERR_INVALID_ARGUMENT, REG_EONIG_BADARG }, - { ONIGERR_END_PATTERN_AT_LEFT_BRACE, REG_EBRACE }, - { ONIGERR_END_PATTERN_AT_LEFT_BRACKET, REG_EBRACK }, - { ONIGERR_EMPTY_CHAR_CLASS, REG_ECTYPE }, - { ONIGERR_PREMATURE_END_OF_CHAR_CLASS, REG_ECTYPE }, - { ONIGERR_END_PATTERN_AT_ESCAPE, REG_EESCAPE }, - { ONIGERR_END_PATTERN_AT_META, REG_EESCAPE }, - { ONIGERR_END_PATTERN_AT_CONTROL, REG_EESCAPE }, - { ONIGERR_META_CODE_SYNTAX, REG_BADPAT }, - { ONIGERR_CONTROL_CODE_SYNTAX, REG_BADPAT }, - { ONIGERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE, REG_ECTYPE }, - { ONIGERR_CHAR_CLASS_VALUE_AT_START_OF_RANGE, REG_ECTYPE }, - { ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS, REG_ECTYPE }, - { ONIGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED, REG_BADRPT }, - { ONIGERR_TARGET_OF_REPEAT_OPERATOR_INVALID, REG_BADRPT }, - { ONIGERR_NESTED_REPEAT_OPERATOR, REG_BADRPT }, - { ONIGERR_UNMATCHED_CLOSE_PARENTHESIS, REG_EPAREN }, - { ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS, REG_EPAREN }, - { ONIGERR_END_PATTERN_IN_GROUP, REG_BADPAT }, - { ONIGERR_UNDEFINED_GROUP_OPTION, REG_BADPAT }, - { ONIGERR_INVALID_POSIX_BRACKET_TYPE, REG_BADPAT }, - { ONIGERR_INVALID_LOOK_BEHIND_PATTERN, REG_BADPAT }, - { ONIGERR_INVALID_REPEAT_RANGE_PATTERN, REG_BADPAT }, - { ONIGERR_TOO_BIG_NUMBER, REG_BADPAT }, - { ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE, REG_BADBR }, - { ONIGERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE, REG_BADBR }, - { ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS, REG_ECTYPE }, - { ONIGERR_MISMATCH_CODE_LENGTH_IN_CLASS_RANGE, REG_ECTYPE }, - { ONIGERR_TOO_MANY_MULTI_BYTE_RANGES, REG_ECTYPE }, - { ONIGERR_TOO_SHORT_MULTI_BYTE_STRING, REG_BADPAT }, - { ONIGERR_TOO_BIG_BACKREF_NUMBER, REG_ESUBREG }, - { ONIGERR_INVALID_BACKREF, REG_ESUBREG }, - { ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED, REG_BADPAT }, - { ONIGERR_TOO_BIG_WIDE_CHAR_VALUE, REG_EONIG_BADWC }, - { ONIGERR_TOO_LONG_WIDE_CHAR_VALUE, REG_EONIG_BADWC }, - { ONIGERR_INVALID_CODE_POINT_VALUE, REG_EONIG_BADWC }, - { ONIGERR_EMPTY_GROUP_NAME, REG_BADPAT }, - { ONIGERR_INVALID_GROUP_NAME, REG_BADPAT }, - { ONIGERR_INVALID_CHAR_IN_GROUP_NAME, REG_BADPAT }, - { ONIGERR_UNDEFINED_NAME_REFERENCE, REG_BADPAT }, - { ONIGERR_UNDEFINED_GROUP_REFERENCE, REG_BADPAT }, - { ONIGERR_MULTIPLEX_DEFINED_NAME, REG_BADPAT }, - { ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL, REG_BADPAT }, - { ONIGERR_NEVER_ENDING_RECURSION, REG_BADPAT }, - { ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY, REG_BADPAT }, - { ONIGERR_INVALID_CHAR_PROPERTY_NAME, REG_BADPAT }, - { ONIGERR_NOT_SUPPORTED_ENCODING_COMBINATION, REG_EONIG_BADARG }, - { ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT, REG_EONIG_THREAD } - - }; - - int i; - - if (code >= 0) return 0; - - for (i = 0; i < (int )(sizeof(o2p) / sizeof(o2p[0])); i++) { - if (code == o2p[i].onig_err) - return o2p[i].posix_err; - } - - return REG_EONIG_INTERNAL; /* but, unknown error code */ -} - -extern int -regcomp(regex_t* reg, const char* pattern, int posix_options) -{ - int r, len; - OnigSyntaxType* syntax = OnigDefaultSyntax; - OnigOptionType options; - - if ((posix_options & REG_EXTENDED) == 0) - syntax = ONIG_SYNTAX_POSIX_BASIC; - - options = syntax->options; - if ((posix_options & REG_ICASE) != 0) - ONIG_OPTION_ON(options, ONIG_OPTION_IGNORECASE); - if ((posix_options & REG_NEWLINE) != 0) { - ONIG_OPTION_ON( options, ONIG_OPTION_NEGATE_SINGLELINE); - ONIG_OPTION_OFF(options, ONIG_OPTION_SINGLELINE); - } - - reg->comp_options = posix_options; - - ENC_STRING_LEN(OnigEncDefaultCharEncoding, pattern, len); - r = onig_new(PONIG_C(reg), (UChar* )pattern, (UChar* )(pattern + len), - options, OnigEncDefaultCharEncoding, syntax, - (OnigErrorInfo* )NULL); - if (r != ONIG_NORMAL) { - return onig2posix_error_code(r); - } - - reg->re_nsub = ONIG_C(reg)->num_mem; - return 0; -} - -extern int -regexec(regex_t* reg, const char* str, size_t nmatch, - regmatch_t pmatch[], int posix_options) -{ - int r, i, len; - UChar* end; - regmatch_t* pm; - OnigOptionType options; - - options = ONIG_OPTION_POSIX_REGION; - if ((posix_options & REG_NOTBOL) != 0) options |= ONIG_OPTION_NOTBOL; - if ((posix_options & REG_NOTEOL) != 0) options |= ONIG_OPTION_NOTEOL; - - if (nmatch == 0 || (reg->comp_options & REG_NOSUB) != 0) { - pm = (regmatch_t* )NULL; - nmatch = 0; - } - else if ((int )nmatch < ONIG_C(reg)->num_mem + 1) { - pm = (regmatch_t* )xmalloc(sizeof(regmatch_t) - * (ONIG_C(reg)->num_mem + 1)); - if (pm == NULL) - return REG_ESPACE; - } - else { - pm = pmatch; - } - - ENC_STRING_LEN(ONIG_C(reg)->enc, str, len); - end = (UChar* )(str + len); - r = onig_search(ONIG_C(reg), (UChar* )str, end, (UChar* )str, end, - (OnigRegion* )pm, options); - - if (r >= 0) { - r = 0; /* Match */ - if (pm != pmatch && pm != NULL) { - xmemcpy(pmatch, pm, sizeof(regmatch_t) * nmatch); - } - } - else if (r == ONIG_MISMATCH) { - r = REG_NOMATCH; - for (i = 0; i < (int )nmatch; i++) - pmatch[i].rm_so = pmatch[i].rm_eo = ONIG_REGION_NOTPOS; - } - else { - r = onig2posix_error_code(r); - } - - if (pm != pmatch && pm != NULL) - xfree(pm); - -#if 0 - if (reg->re_nsub > nmatch - 1) - reg->re_nsub = (nmatch <= 1 ? 0 : nmatch - 1); -#endif - - return r; -} - -extern void -regfree(regex_t* reg) -{ - onig_free(ONIG_C(reg)); -} - - -extern void -reg_set_encoding(int mb_code) -{ - OnigEncoding enc; - - switch (mb_code) { - case REG_POSIX_ENCODING_ASCII: - enc = ONIG_ENCODING_ASCII; - break; - case REG_POSIX_ENCODING_EUC_JP: - enc = ONIG_ENCODING_EUC_JP; - break; - case REG_POSIX_ENCODING_SJIS: - enc = ONIG_ENCODING_SJIS; - break; - case REG_POSIX_ENCODING_UTF8: - enc = ONIG_ENCODING_UTF8; - break; - case REG_POSIX_ENCODING_UTF16_BE: - enc = ONIG_ENCODING_UTF16_BE; - break; - case REG_POSIX_ENCODING_UTF16_LE: - enc = ONIG_ENCODING_UTF16_LE; - break; - - default: - return ; - break; - } - - onigenc_set_default_encoding(enc); -} - -extern int -reg_name_to_group_numbers(regex_t* reg, - const unsigned char* name, const unsigned char* name_end, int** nums) -{ - return onig_name_to_group_numbers(ONIG_C(reg), name, name_end, nums); -} - -typedef struct { - int (*func)(const unsigned char*, const unsigned char*,int,int*,regex_t*,void*); - regex_t* reg; - void* arg; -} i_wrap; - -static int -i_wrapper(const UChar* name, const UChar* name_end, int ng, int* gs, - onig_regex_t* reg ARG_UNUSED, void* arg) -{ - i_wrap* warg = (i_wrap* )arg; - - return (*warg->func)(name, name_end, ng, gs, warg->reg, warg->arg); -} - -extern int -reg_foreach_name(regex_t* reg, - int (*func)(const unsigned char*, const unsigned char*,int,int*,regex_t*,void*), - void* arg) -{ - i_wrap warg; - - warg.func = func; - warg.reg = reg; - warg.arg = arg; - - return onig_foreach_name(ONIG_C(reg), i_wrapper, &warg); -} - -extern int -reg_number_of_names(regex_t* reg) -{ - return onig_number_of_names(ONIG_C(reg)); -} diff --git a/MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/regsyntax.c b/MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/regsyntax.c deleted file mode 100644 index dc4d68183b..0000000000 --- a/MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/regsyntax.c +++ /dev/null @@ -1,315 +0,0 @@ -/********************************************************************** - regsyntax.c - Oniguruma (regular expression library) -**********************************************************************/ -/*- - * Copyright (c) 2002-2006 K.Kosako - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include "regint.h" - -OnigSyntaxType OnigSyntaxASIS = { - 0 - , ONIG_SYN_OP2_INEFFECTIVE_ESCAPE - , 0 - , ONIG_OPTION_NONE - , - { - (OnigCodePoint )'\\' /* esc */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ - } -}; - -OnigSyntaxType OnigSyntaxPosixBasic = { - ( SYN_POSIX_COMMON_OP | ONIG_SYN_OP_ESC_LPAREN_SUBEXP | - ONIG_SYN_OP_ESC_BRACE_INTERVAL ) - , 0 - , 0 - , ( ONIG_OPTION_SINGLELINE | ONIG_OPTION_MULTILINE ) - , - { - (OnigCodePoint )'\\' /* esc */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ - } -}; - -OnigSyntaxType OnigSyntaxPosixExtended = { - ( SYN_POSIX_COMMON_OP | ONIG_SYN_OP_LPAREN_SUBEXP | - ONIG_SYN_OP_BRACE_INTERVAL | - ONIG_SYN_OP_PLUS_ONE_INF | ONIG_SYN_OP_QMARK_ZERO_ONE | ONIG_SYN_OP_VBAR_ALT ) - , 0 - , ( ONIG_SYN_CONTEXT_INDEP_ANCHORS | - ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS | ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS | - ONIG_SYN_ALLOW_UNMATCHED_CLOSE_SUBEXP | - ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC ) - , ( ONIG_OPTION_SINGLELINE | ONIG_OPTION_MULTILINE ) - , - { - (OnigCodePoint )'\\' /* esc */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ - } -}; - -OnigSyntaxType OnigSyntaxEmacs = { - ( ONIG_SYN_OP_DOT_ANYCHAR | ONIG_SYN_OP_BRACKET_CC | - ONIG_SYN_OP_ESC_BRACE_INTERVAL | - ONIG_SYN_OP_ESC_LPAREN_SUBEXP | ONIG_SYN_OP_ESC_VBAR_ALT | - ONIG_SYN_OP_ASTERISK_ZERO_INF | ONIG_SYN_OP_PLUS_ONE_INF | - ONIG_SYN_OP_QMARK_ZERO_ONE | ONIG_SYN_OP_DECIMAL_BACKREF | - ONIG_SYN_OP_LINE_ANCHOR | ONIG_SYN_OP_ESC_CONTROL_CHARS ) - , ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR - , ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC - , ONIG_OPTION_NONE - , - { - (OnigCodePoint )'\\' /* esc */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ - } -}; - -OnigSyntaxType OnigSyntaxGrep = { - ( ONIG_SYN_OP_DOT_ANYCHAR | ONIG_SYN_OP_BRACKET_CC | ONIG_SYN_OP_POSIX_BRACKET | - ONIG_SYN_OP_ESC_BRACE_INTERVAL | ONIG_SYN_OP_ESC_LPAREN_SUBEXP | - ONIG_SYN_OP_ESC_VBAR_ALT | - ONIG_SYN_OP_ASTERISK_ZERO_INF | ONIG_SYN_OP_ESC_PLUS_ONE_INF | - ONIG_SYN_OP_ESC_QMARK_ZERO_ONE | ONIG_SYN_OP_LINE_ANCHOR | - ONIG_SYN_OP_ESC_W_WORD | ONIG_SYN_OP_ESC_B_WORD_BOUND | - ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END | ONIG_SYN_OP_DECIMAL_BACKREF ) - , 0 - , ( ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC | ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC ) - , ONIG_OPTION_NONE - , - { - (OnigCodePoint )'\\' /* esc */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ - } -}; - -OnigSyntaxType OnigSyntaxGnuRegex = { - SYN_GNU_REGEX_OP - , 0 - , SYN_GNU_REGEX_BV - , ONIG_OPTION_NONE - , - { - (OnigCodePoint )'\\' /* esc */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ - } -}; - -OnigSyntaxType OnigSyntaxJava = { - (( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY | - ONIG_SYN_OP_ESC_CONTROL_CHARS | ONIG_SYN_OP_ESC_C_CONTROL | - ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 ) - & ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END ) - , ( ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE | ONIG_SYN_OP2_QMARK_GROUP_EFFECT | - ONIG_SYN_OP2_OPTION_PERL | ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT | - ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL | ONIG_SYN_OP2_CCLASS_SET_OP | - ONIG_SYN_OP2_ESC_V_VTAB | ONIG_SYN_OP2_ESC_U_HEX4 | - ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY ) - , ( SYN_GNU_REGEX_BV | ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND ) - , ONIG_OPTION_SINGLELINE - , - { - (OnigCodePoint )'\\' /* esc */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ - } -}; - -OnigSyntaxType OnigSyntaxPerl = { - (( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY | - ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 | - ONIG_SYN_OP_ESC_X_BRACE_HEX8 | ONIG_SYN_OP_ESC_CONTROL_CHARS | - ONIG_SYN_OP_ESC_C_CONTROL ) - & ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END ) - , ( ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE | - ONIG_SYN_OP2_QMARK_GROUP_EFFECT | ONIG_SYN_OP2_OPTION_PERL | - ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY | - ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT ) - , SYN_GNU_REGEX_BV - , ONIG_OPTION_SINGLELINE - , - { - (OnigCodePoint )'\\' /* esc */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ - } -}; - -/* Perl + named group */ -OnigSyntaxType OnigSyntaxPerl_NG = { - (( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY | - ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 | - ONIG_SYN_OP_ESC_X_BRACE_HEX8 | ONIG_SYN_OP_ESC_CONTROL_CHARS | - ONIG_SYN_OP_ESC_C_CONTROL ) - & ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END ) - , ( ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE | - ONIG_SYN_OP2_QMARK_GROUP_EFFECT | ONIG_SYN_OP2_OPTION_PERL | - ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY | - ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT | - ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP | - ONIG_SYN_OP2_ESC_K_NAMED_BACKREF | - ONIG_SYN_OP2_ESC_G_SUBEXP_CALL ) - , ( SYN_GNU_REGEX_BV | - ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP | - ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME ) - , ONIG_OPTION_SINGLELINE - , - { - (OnigCodePoint )'\\' /* esc */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ - } -}; - - - -extern int -onig_set_default_syntax(OnigSyntaxType* syntax) -{ - if (IS_NULL(syntax)) - syntax = ONIG_SYNTAX_RUBY; - - OnigDefaultSyntax = syntax; - return 0; -} - -extern void -onig_copy_syntax(OnigSyntaxType* to, OnigSyntaxType* from) -{ - *to = *from; -} - -extern void -onig_set_syntax_op(OnigSyntaxType* syntax, unsigned int op) -{ - syntax->op = op; -} - -extern void -onig_set_syntax_op2(OnigSyntaxType* syntax, unsigned int op2) -{ - syntax->op2 = op2; -} - -extern void -onig_set_syntax_behavior(OnigSyntaxType* syntax, unsigned int behavior) -{ - syntax->behavior = behavior; -} - -extern void -onig_set_syntax_options(OnigSyntaxType* syntax, OnigOptionType options) -{ - syntax->options = options; -} - -extern unsigned int -onig_get_syntax_op(OnigSyntaxType* syntax) -{ - return syntax->op; -} - -extern unsigned int -onig_get_syntax_op2(OnigSyntaxType* syntax) -{ - return syntax->op2; -} - -extern unsigned int -onig_get_syntax_behavior(OnigSyntaxType* syntax) -{ - return syntax->behavior; -} - -extern OnigOptionType -onig_get_syntax_options(OnigSyntaxType* syntax) -{ - return syntax->options; -} - -#ifdef USE_VARIABLE_META_CHARS -extern int onig_set_meta_char(OnigSyntaxType* enc, - unsigned int what, OnigCodePoint code) -{ - switch (what) { - case ONIG_META_CHAR_ESCAPE: - enc->meta_char_table.esc = code; - break; - case ONIG_META_CHAR_ANYCHAR: - enc->meta_char_table.anychar = code; - break; - case ONIG_META_CHAR_ANYTIME: - enc->meta_char_table.anytime = code; - break; - case ONIG_META_CHAR_ZERO_OR_ONE_TIME: - enc->meta_char_table.zero_or_one_time = code; - break; - case ONIG_META_CHAR_ONE_OR_MORE_TIME: - enc->meta_char_table.one_or_more_time = code; - break; - case ONIG_META_CHAR_ANYCHAR_ANYTIME: - enc->meta_char_table.anychar_anytime = code; - break; - default: - return ONIGERR_INVALID_ARGUMENT; - break; - } - return 0; -} -#endif /* USE_VARIABLE_META_CHARS */ diff --git a/MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/regtrav.c b/MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/regtrav.c deleted file mode 100644 index fbc71d899f..0000000000 --- a/MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/regtrav.c +++ /dev/null @@ -1,76 +0,0 @@ -/********************************************************************** - regtrav.c - Oniguruma (regular expression library) -**********************************************************************/ -/*- - * Copyright (c) 2002-2004 K.Kosako - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include "regint.h" - -#ifdef USE_CAPTURE_HISTORY - -static int -capture_tree_traverse(OnigCaptureTreeNode* node, int at, - int(*callback_func)(int,int,int,int,int,void*), - int level, void* arg) -{ - int r, i; - - if (node == (OnigCaptureTreeNode* )0) - return 0; - - if ((at & ONIG_TRAVERSE_CALLBACK_AT_FIRST) != 0) { - r = (*callback_func)(node->group, node->beg, node->end, - level, ONIG_TRAVERSE_CALLBACK_AT_FIRST, arg); - if (r != 0) return r; - } - - for (i = 0; i < node->num_childs; i++) { - r = capture_tree_traverse(node->childs[i], at, - callback_func, level + 1, arg); - if (r != 0) return r; - } - - if ((at & ONIG_TRAVERSE_CALLBACK_AT_LAST) != 0) { - r = (*callback_func)(node->group, node->beg, node->end, - level, ONIG_TRAVERSE_CALLBACK_AT_LAST, arg); - if (r != 0) return r; - } - - return 0; -} -#endif /* USE_CAPTURE_HISTORY */ - -extern int -onig_capture_tree_traverse(OnigRegion* region, int at, - int(*callback_func)(int,int,int,int,int,void*), void* arg) -{ -#ifdef USE_CAPTURE_HISTORY - return capture_tree_traverse(region->history_root, at, - callback_func, 0, arg); -#else - return ONIG_NO_SUPPORT_CONFIG; -#endif -} diff --git a/MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/regversion.c b/MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/regversion.c deleted file mode 100644 index 087c6ad899..0000000000 --- a/MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/regversion.c +++ /dev/null @@ -1,56 +0,0 @@ -/********************************************************************** - regversion.c - Oniguruma (regular expression library) -**********************************************************************/ -/*- - * Copyright (c) 2002-2008 K.Kosako - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -//#include "config.h" -#include "oniguruma.h" -//#include - -extern const char* -onig_version(void) -{ - static char s[12]; - - sprintf(s, "%d.%d.%d", - ONIGURUMA_VERSION_MAJOR, - ONIGURUMA_VERSION_MINOR, - ONIGURUMA_VERSION_TEENY); - return s; -} - -extern const char* -onig_copyright(void) -{ - static char s[58]; - - sprintf(s, "Oniguruma %d.%d.%d : Copyright (C) 2002-2008 K.Kosako", - ONIGURUMA_VERSION_MAJOR, - ONIGURUMA_VERSION_MINOR, - ONIGURUMA_VERSION_TEENY); - return s; -} diff --git a/MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/st.c b/MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/st.c deleted file mode 100644 index 1527fcc439..0000000000 --- a/MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/st.c +++ /dev/null @@ -1,579 +0,0 @@ -/* This is a public domain general purpose hash table package written by Peter Moore @ UCB. */ - -/* static char sccsid[] = "@(#) st.c 5.1 89/12/14 Crucible"; */ - -//#include -//#include -//#include -#include "OnigurumaUefiPort.h" - -#ifdef _WIN32 -#include -#endif - -#include "regint.h" -#include "st.h" - -typedef struct st_table_entry st_table_entry; - -struct st_table_entry { - unsigned int hash; - st_data_t key; - st_data_t record; - st_table_entry *next; -}; - -#define ST_DEFAULT_MAX_DENSITY 5 -#define ST_DEFAULT_INIT_TABLE_SIZE 11 - - /* - * DEFAULT_MAX_DENSITY is the default for the largest we allow the - * average number of items per bin before increasing the number of - * bins - * - * DEFAULT_INIT_TABLE_SIZE is the default for the number of bins - * allocated initially - * - */ - -static int numcmp(long, long); -static int numhash(long); -static struct st_hash_type type_numhash = { - numcmp, - numhash, -}; - -/* extern int strcmp(const char *, const char *); */ -static int strhash(const char *); -static struct st_hash_type type_strhash = { - strcmp, - strhash, -}; - -static void rehash(st_table *); - -#define alloc(type) (type*)xmalloc((unsigned)sizeof(type)) -#define Calloc(n,s) (char*)xcalloc((n),(s)) - -#define EQUAL(table,x,y) ((x)==(y) || (*table->type->compare)((x),(y)) == 0) - -#define do_hash(key,table) (unsigned int)(*(table)->type->hash)((key)) -#define do_hash_bin(key,table) (do_hash(key, table)%(table)->num_bins) - -/* - * MINSIZE is the minimum size of a dictionary. - */ - -#define MINSIZE 8 - -/* -Table of prime numbers 2^n+a, 2<=n<=30. -*/ -static const long primes[] = { - 8 + 3, - 16 + 3, - 32 + 5, - 64 + 3, - 128 + 3, - 256 + 27, - 512 + 9, - 1024 + 9, - 2048 + 5, - 4096 + 3, - 8192 + 27, - 16384 + 43, - 32768 + 3, - 65536 + 45, - 131072 + 29, - 262144 + 3, - 524288 + 21, - 1048576 + 7, - 2097152 + 17, - 4194304 + 15, - 8388608 + 9, - 16777216 + 43, - 33554432 + 35, - 67108864 + 15, - 134217728 + 29, - 268435456 + 3, - 536870912 + 11, - 1073741824 + 85, - 0 -}; - -static int -new_size(size) - int size; -{ - int i; - -#if 0 - for (i=3; i<31; i++) { - if ((1< size) return 1< size) return primes[i]; - } - /* Ran out of polynomials */ - return -1; /* should raise exception */ -#endif -} - -#ifdef HASH_LOG -static int collision = 0; -static int init_st = 0; - -static void -stat_col() -{ - FILE *f = fopen("/tmp/col", "w"); - fprintf(f, "collision: %d\n", collision); - fclose(f); -} -#endif - -st_table* -st_init_table_with_size(type, size) - struct st_hash_type *type; - int size; -{ - st_table *tbl; - -#ifdef HASH_LOG - if (init_st == 0) { - init_st = 1; - atexit(stat_col); - } -#endif - - size = new_size(size); /* round up to prime number */ - - tbl = alloc(st_table); - tbl->type = type; - tbl->num_entries = 0; - tbl->num_bins = size; - tbl->bins = (st_table_entry **)Calloc(size, sizeof(st_table_entry*)); - - return tbl; -} - -st_table* -st_init_table(type) - struct st_hash_type *type; -{ - return st_init_table_with_size(type, 0); -} - -st_table* -st_init_numtable(void) -{ - return st_init_table(&type_numhash); -} - -st_table* -st_init_numtable_with_size(size) - int size; -{ - return st_init_table_with_size(&type_numhash, size); -} - -st_table* -st_init_strtable(void) -{ - return st_init_table(&type_strhash); -} - -st_table* -st_init_strtable_with_size(size) - int size; -{ - return st_init_table_with_size(&type_strhash, size); -} - -void -st_free_table(table) - st_table *table; -{ - register st_table_entry *ptr, *next; - int i; - - for(i = 0; i < table->num_bins; i++) { - ptr = table->bins[i]; - while (ptr != 0) { - next = ptr->next; - free(ptr); - ptr = next; - } - } - free(table->bins); - free(table); -} - -#define PTR_NOT_EQUAL(table, ptr, hash_val, key) \ -((ptr) != 0 && (ptr->hash != (hash_val) || !EQUAL((table), (key), (ptr)->key))) - -#ifdef HASH_LOG -#define COLLISION collision++ -#else -#define COLLISION -#endif - -#define FIND_ENTRY(table, ptr, hash_val, bin_pos) do {\ - bin_pos = hash_val%(table)->num_bins;\ - ptr = (table)->bins[bin_pos];\ - if (PTR_NOT_EQUAL(table, ptr, hash_val, key)) {\ - COLLISION;\ - while (PTR_NOT_EQUAL(table, ptr->next, hash_val, key)) {\ - ptr = ptr->next;\ - }\ - ptr = ptr->next;\ - }\ -} while (0) - -int -st_lookup(table, key, value) - st_table *table; - register st_data_t key; - st_data_t *value; -{ - unsigned int hash_val, bin_pos; - register st_table_entry *ptr; - - hash_val = do_hash(key, table); - FIND_ENTRY(table, ptr, hash_val, bin_pos); - - if (ptr == 0) { - return 0; - } - else { - if (value != 0) *value = ptr->record; - return 1; - } -} - -#define ADD_DIRECT(table, key, value, hash_val, bin_pos)\ -do {\ - st_table_entry *entry;\ - if (table->num_entries/(table->num_bins) > ST_DEFAULT_MAX_DENSITY) {\ - rehash(table);\ - bin_pos = hash_val % table->num_bins;\ - }\ - \ - entry = alloc(st_table_entry);\ - \ - entry->hash = hash_val;\ - entry->key = key;\ - entry->record = value;\ - entry->next = table->bins[bin_pos];\ - table->bins[bin_pos] = entry;\ - table->num_entries++;\ -} while (0) - -int -st_insert(table, key, value) - register st_table *table; - register st_data_t key; - st_data_t value; -{ - unsigned int hash_val, bin_pos; - register st_table_entry *ptr; - - hash_val = do_hash(key, table); - FIND_ENTRY(table, ptr, hash_val, bin_pos); - - if (ptr == 0) { - ADD_DIRECT(table, key, value, hash_val, bin_pos); - return 0; - } - else { - ptr->record = value; - return 1; - } -} - -void -st_add_direct(table, key, value) - st_table *table; - st_data_t key; - st_data_t value; -{ - unsigned int hash_val, bin_pos; - - hash_val = do_hash(key, table); - bin_pos = hash_val % table->num_bins; - ADD_DIRECT(table, key, value, hash_val, bin_pos); -} - -static void -rehash(table) - register st_table *table; -{ - register st_table_entry *ptr, *next, **new_bins; - int i, old_num_bins = table->num_bins, new_num_bins; - unsigned int hash_val; - - new_num_bins = new_size(old_num_bins+1); - new_bins = (st_table_entry**)Calloc(new_num_bins, sizeof(st_table_entry*)); - - for(i = 0; i < old_num_bins; i++) { - ptr = table->bins[i]; - while (ptr != 0) { - next = ptr->next; - hash_val = ptr->hash % new_num_bins; - ptr->next = new_bins[hash_val]; - new_bins[hash_val] = ptr; - ptr = next; - } - } - free(table->bins); - table->num_bins = new_num_bins; - table->bins = new_bins; -} - -st_table* -st_copy(old_table) - st_table *old_table; -{ - st_table *new_table; - st_table_entry *ptr, *entry; - int i, num_bins = old_table->num_bins; - - new_table = alloc(st_table); - if (new_table == 0) { - return 0; - } - - *new_table = *old_table; - new_table->bins = (st_table_entry**) - Calloc((unsigned)num_bins, sizeof(st_table_entry*)); - - if (new_table->bins == 0) { - free(new_table); - return 0; - } - - for(i = 0; i < num_bins; i++) { - new_table->bins[i] = 0; - ptr = old_table->bins[i]; - while (ptr != 0) { - entry = alloc(st_table_entry); - if (entry == 0) { - free(new_table->bins); - free(new_table); - return 0; - } - *entry = *ptr; - entry->next = new_table->bins[i]; - new_table->bins[i] = entry; - ptr = ptr->next; - } - } - return new_table; -} - -int -st_delete(table, key, value) - register st_table *table; - register st_data_t *key; - st_data_t *value; -{ - unsigned int hash_val; - st_table_entry *tmp; - register st_table_entry *ptr; - - hash_val = do_hash_bin(*key, table); - ptr = table->bins[hash_val]; - - if (ptr == 0) { - if (value != 0) *value = 0; - return 0; - } - - if (EQUAL(table, *key, ptr->key)) { - table->bins[hash_val] = ptr->next; - table->num_entries--; - if (value != 0) *value = ptr->record; - *key = ptr->key; - free(ptr); - return 1; - } - - for(; ptr->next != 0; ptr = ptr->next) { - if (EQUAL(table, ptr->next->key, *key)) { - tmp = ptr->next; - ptr->next = ptr->next->next; - table->num_entries--; - if (value != 0) *value = tmp->record; - *key = tmp->key; - free(tmp); - return 1; - } - } - - return 0; -} - -int -st_delete_safe(table, key, value, never) - register st_table *table; - register st_data_t *key; - st_data_t *value; - st_data_t never; -{ - unsigned int hash_val; - register st_table_entry *ptr; - - hash_val = do_hash_bin(*key, table); - ptr = table->bins[hash_val]; - - if (ptr == 0) { - if (value != 0) *value = 0; - return 0; - } - - for(; ptr != 0; ptr = ptr->next) { - if ((ptr->key != never) && EQUAL(table, ptr->key, *key)) { - table->num_entries--; - *key = ptr->key; - if (value != 0) *value = ptr->record; - ptr->key = ptr->record = never; - return 1; - } - } - - return 0; -} - -static int -#if defined(__GNUC__) -delete_never(st_data_t key __attribute__ ((unused)), st_data_t value, - st_data_t never) -#else -delete_never(key, value, never) - st_data_t key, value, never; -#endif -{ - if (value == never) return ST_DELETE; - return ST_CONTINUE; -} - -void -st_cleanup_safe(table, never) - st_table *table; - st_data_t never; -{ - int num_entries = table->num_entries; - - st_foreach(table, delete_never, never); - table->num_entries = num_entries; -} - -int -st_foreach(table, func, arg) - st_table *table; - int (*func)(); - st_data_t arg; -{ - st_table_entry *ptr, *last, *tmp; - enum st_retval retval; - int i; - - for(i = 0; i < table->num_bins; i++) { - last = 0; - for(ptr = table->bins[i]; ptr != 0;) { - retval = (*func)(ptr->key, ptr->record, arg); - switch (retval) { - case ST_CHECK: /* check if hash is modified during iteration */ - tmp = 0; - if (i < table->num_bins) { - for (tmp = table->bins[i]; tmp; tmp=tmp->next) { - if (tmp == ptr) break; - } - } - if (!tmp) { - /* call func with error notice */ - return 1; - } - /* fall through */ - case ST_CONTINUE: - last = ptr; - ptr = ptr->next; - break; - case ST_STOP: - return 0; - case ST_DELETE: - tmp = ptr; - if (last == 0) { - table->bins[i] = ptr->next; - } - else { - last->next = ptr->next; - } - ptr = ptr->next; - free(tmp); - table->num_entries--; - } - } - } - return 0; -} - -static int -strhash(string) - register const char *string; -{ - register int c; - -#ifdef HASH_ELFHASH - register unsigned int h = 0, g; - - while ((c = *string++) != '\0') { - h = ( h << 4 ) + c; - if ( g = h & 0xF0000000 ) - h ^= g >> 24; - h &= ~g; - } - return h; -#elif HASH_PERL - register int val = 0; - - while ((c = *string++) != '\0') { - val += c; - val += (val << 10); - val ^= (val >> 6); - } - val += (val << 3); - val ^= (val >> 11); - - return val + (val << 15); -#else - register int val = 0; - - while ((c = *string++) != '\0') { - val = val*997 + c; - } - - return val + (val>>5); -#endif -} - -static int -numcmp(x, y) - long x, y; -{ - return x != y; -} - -static int -numhash(n) - long n; -{ - return n; -} diff --git a/MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/st.h b/MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/st.h deleted file mode 100644 index a620002449..0000000000 --- a/MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/st.h +++ /dev/null @@ -1,68 +0,0 @@ -/* This is a public domain general purpose hash table package written by Peter Moore @ UCB. */ - -/* @(#) st.h 5.1 89/12/14 */ - -#ifndef ST_INCLUDED - -#define ST_INCLUDED - -#ifdef _WIN32 -# include -typedef ULONG_PTR st_data_t; -#else -typedef unsigned long st_data_t; -#endif -#define ST_DATA_T_DEFINED - -typedef struct st_table st_table; - -struct st_hash_type { - int (*compare)(); - int (*hash)(); -}; - -struct st_table { - struct st_hash_type *type; - int num_bins; - int num_entries; - struct st_table_entry **bins; -}; - -#define st_is_member(table,key) st_lookup(table,key,(st_data_t *)0) - -enum st_retval {ST_CONTINUE, ST_STOP, ST_DELETE, ST_CHECK}; - -#ifndef _ -# define _(args) args -#endif -#ifndef ANYARGS -# ifdef __cplusplus -# define ANYARGS ... -# else -# define ANYARGS -# endif -#endif - -st_table *st_init_table _((struct st_hash_type *)); -st_table *st_init_table_with_size _((struct st_hash_type *, int)); -st_table *st_init_numtable _((void)); -st_table *st_init_numtable_with_size _((int)); -st_table *st_init_strtable _((void)); -st_table *st_init_strtable_with_size _((int)); -int st_delete _((st_table *, st_data_t *, st_data_t *)); -int st_delete_safe _((st_table *, st_data_t *, st_data_t *, st_data_t)); -int st_insert _((st_table *, st_data_t, st_data_t)); -int st_lookup _((st_table *, st_data_t, st_data_t *)); -int st_foreach _((st_table *, int (*)(ANYARGS), st_data_t)); -void st_add_direct _((st_table *, st_data_t, st_data_t)); -void st_free_table _((st_table *)); -void st_cleanup_safe _((st_table *, st_data_t)); -st_table *st_copy _((st_table *)); - -#define ST_NUMCMP ((int (*)()) 0) -#define ST_NUMHASH ((int (*)()) -2) - -#define st_numcmp ST_NUMCMP -#define st_numhash ST_NUMHASH - -#endif /* ST_INCLUDED */ diff --git a/MdeModulePkg/Universal/RegularExpressionDxe/RegularExpressionDxe.c b/MdeModulePkg/Universal/RegularExpressionDxe/RegularExpressionDxe.c deleted file mode 100644 index 6c62957ca3..0000000000 --- a/MdeModulePkg/Universal/RegularExpressionDxe/RegularExpressionDxe.c +++ /dev/null @@ -1,321 +0,0 @@ -/** - @file - - EFI_REGULAR_EXPRESSION_PROTOCOL Implementation - - Copyright (c) 2015, Hewlett Packard Enterprise Development, L.P.
- - This program and the accompanying materials are licensed and made available - under the terms and conditions of the BSD License that accompanies this - distribution. The full text of the license may be found at - http://opensource.org/licenses/bsd-license.php. - - THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS, WITHOUT - WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED. -**/ - -#include "RegularExpressionDxe.h" - -STATIC -EFI_REGEX_SYNTAX_TYPE * CONST mSupportedSyntaxes[] = { - &gEfiRegexSyntaxTypePosixExtendedGuid, - &gEfiRegexSyntaxTypePerlGuid -}; - -STATIC -EFI_REGULAR_EXPRESSION_PROTOCOL mProtocolInstance = { - RegularExpressionMatch, - RegularExpressionGetInfo -}; - - - -#define CHAR16_ENCODING ONIG_ENCODING_UTF16_LE - -/** - Call the Oniguruma regex match API. - - Same parameters as RegularExpressionMatch, except SyntaxType is required. - - @retval EFI_SUCCESS Regex compilation and match completed successfully. - @retval EFI_DEVICE_ERROR Regex compilation failed. -**/ -STATIC -EFI_STATUS -OnigurumaMatch ( - IN CHAR16 *String, - IN CHAR16 *Pattern, - IN EFI_REGEX_SYNTAX_TYPE *SyntaxType, - OUT BOOLEAN *Result, - OUT EFI_REGEX_CAPTURE **Captures, OPTIONAL - OUT UINTN *CapturesCount - ) -{ - regex_t *OnigRegex; - OnigSyntaxType *OnigSyntax; - OnigRegion *Region; - INT32 OnigResult; - OnigErrorInfo ErrorInfo; - CHAR8 ErrorMessage[ONIG_MAX_ERROR_MESSAGE_LEN]; - UINT32 Index; - OnigUChar *Start; - - // - // Detemine the internal syntax type - // - OnigSyntax = ONIG_SYNTAX_DEFAULT; - if (CompareGuid (SyntaxType, &gEfiRegexSyntaxTypePosixExtendedGuid)) { - OnigSyntax = ONIG_SYNTAX_POSIX_EXTENDED; - } else if (CompareGuid (SyntaxType, &gEfiRegexSyntaxTypePerlGuid)) { - OnigSyntax = ONIG_SYNTAX_PERL; - } else { - DEBUG ((DEBUG_ERROR, "Unsupported regex syntax - using default\n")); - ASSERT (FALSE); - } - - // - // Compile pattern - // - Start = (OnigUChar*)Pattern; - OnigResult = onig_new ( - &OnigRegex, - Start, - Start + onigenc_str_bytelen_null (CHAR16_ENCODING, Start), - ONIG_OPTION_DEFAULT, - CHAR16_ENCODING, - OnigSyntax, - &ErrorInfo - ); - - if (OnigResult != ONIG_NORMAL) { - onig_error_code_to_str (ErrorMessage, OnigResult, &ErrorInfo); - DEBUG ((DEBUG_ERROR, "Regex compilation failed: %a\n", ErrorMessage)); - return EFI_DEVICE_ERROR; - } - - // - // Try to match - // - Start = (OnigUChar*)String; - Region = onig_region_new (); - OnigResult = onig_search ( - OnigRegex, - Start, - Start + onigenc_str_bytelen_null (CHAR16_ENCODING, Start), - Start, - Start + onigenc_str_bytelen_null (CHAR16_ENCODING, Start), - Region, - ONIG_OPTION_NONE - ); - if (OnigResult >= 0) { - *Result = TRUE; - } else { - *Result = FALSE; - if (OnigResult != ONIG_MISMATCH) { - onig_error_code_to_str (ErrorMessage, OnigResult); - DEBUG ((DEBUG_ERROR, "Regex match failed: %a\n", ErrorMessage)); - } - } - - // - // If successful, copy out the region (capture) information - // - if (*Result && Captures != NULL) { - *CapturesCount = Region->num_regs; - *Captures = AllocatePool (*CapturesCount * sizeof(**Captures)); - if (*Captures != NULL) { - for (Index = 0; Index < *CapturesCount; ++Index) { - // - // Region beg/end values represent bytes, not characters - // - (*Captures)[Index].CapturePtr = (CHAR16*)((UINTN)String + Region->beg[Index]); - (*Captures)[Index].Length = (Region->end[Index] - Region->beg[Index]) / sizeof(CHAR16); - } - } - } - - onig_region_free (Region, 1); - onig_free (OnigRegex); - - return EFI_SUCCESS; -} - -/** - Returns information about the regular expression syntax types supported - by the implementation. - - This A pointer to the EFI_REGULAR_EXPRESSION_PROTOCOL - instance. - - RegExSyntaxTypeListSize On input, the size in bytes of RegExSyntaxTypeList. - On output with a return code of EFI_SUCCESS, the - size in bytes of the data returned in - RegExSyntaxTypeList. On output with a return code - of EFI_BUFFER_TOO_SMALL, the size of - RegExSyntaxTypeList required to obtain the list. - - RegExSyntaxTypeList A caller-allocated memory buffer filled by the - driver with one EFI_REGEX_SYNTAX_TYPE element - for each supported Regular expression syntax - type. The list must not change across multiple - calls to the same driver. The first syntax - type in the list is the default type for the - driver. - - @retval EFI_SUCCESS The regular expression syntax types list - was returned successfully. - @retval EFI_UNSUPPORTED The service is not supported by this driver. - @retval EFI_DEVICE_ERROR The list of syntax types could not be - retrieved due to a hardware or firmware error. - @retval EFI_BUFFER_TOO_SMALL The buffer RegExSyntaxTypeList is too small - to hold the result. - @retval EFI_INVALID_PARAMETER RegExSyntaxTypeListSize is NULL - -**/ -EFI_STATUS -EFIAPI -RegularExpressionGetInfo ( - IN EFI_REGULAR_EXPRESSION_PROTOCOL *This, - IN OUT UINTN *RegExSyntaxTypeListSize, - OUT EFI_REGEX_SYNTAX_TYPE *RegExSyntaxTypeList - ) -{ - UINTN SyntaxSize; - UINTN Index; - - if (This == NULL || RegExSyntaxTypeListSize == NULL) { - return EFI_INVALID_PARAMETER; - } - - if (*RegExSyntaxTypeListSize != 0 && RegExSyntaxTypeList == NULL) { - return EFI_INVALID_PARAMETER; - } - - SyntaxSize = ARRAY_SIZE (mSupportedSyntaxes) * sizeof(**mSupportedSyntaxes); - - if (*RegExSyntaxTypeListSize < SyntaxSize) { - *RegExSyntaxTypeListSize = SyntaxSize; - return EFI_BUFFER_TOO_SMALL; - } - - for (Index = 0; Index < ARRAY_SIZE (mSupportedSyntaxes); ++Index) { - CopyMem (&RegExSyntaxTypeList[Index], mSupportedSyntaxes[Index], sizeof(**mSupportedSyntaxes)); - } - *RegExSyntaxTypeListSize = SyntaxSize; - - return EFI_SUCCESS; -} - -/** - Checks if the input string matches to the regular expression pattern. - - This A pointer to the EFI_REGULAR_EXPRESSION_PROTOCOL instance. - Type EFI_REGULAR_EXPRESSION_PROTOCOL is defined in Section - XYZ. - - String A pointer to a NULL terminated string to match against the - regular expression string specified by Pattern. - - Pattern A pointer to a NULL terminated string that represents the - regular expression. - - SyntaxType A pointer to the EFI_REGEX_SYNTAX_TYPE that identifies the - regular expression syntax type to use. May be NULL in which - case the function will use its default regular expression - syntax type. - - Result On return, points to TRUE if String fully matches against - the regular expression Pattern using the regular expression - SyntaxType. Otherwise, points to FALSE. - - Captures A Pointer to an array of EFI_REGEX_CAPTURE objects to receive - the captured groups in the event of a match. The full - sub-string match is put in Captures[0], and the results of N - capturing groups are put in Captures[1:N]. If Captures is - NULL, then this function doesn't allocate the memory for the - array and does not build up the elements. It only returns the - number of matching patterns in CapturesCount. If Captures is - not NULL, this function returns a pointer to an array and - builds up the elements in the array. CapturesCount is also - updated to the number of matching patterns found. It is the - caller's responsibility to free the memory pool in Captures - and in each CapturePtr in the array elements. - - CapturesCount On output, CapturesCount is the number of matching patterns - found in String. Zero means no matching patterns were found - in the string. - - @retval EFI_SUCCESS The regular expression string matching - completed successfully. - @retval EFI_UNSUPPORTED The regular expression syntax specified by - SyntaxType is not supported by this driver. - @retval EFI_DEVICE_ERROR The regular expression string matching - failed due to a hardware or firmware error. - @retval EFI_INVALID_PARAMETER String, Pattern, Result, or CapturesCountis - NULL. - -**/ -EFI_STATUS -EFIAPI -RegularExpressionMatch ( - IN EFI_REGULAR_EXPRESSION_PROTOCOL *This, - IN CHAR16 *String, - IN CHAR16 *Pattern, - IN EFI_REGEX_SYNTAX_TYPE *SyntaxType, OPTIONAL - OUT BOOLEAN *Result, - OUT EFI_REGEX_CAPTURE **Captures, OPTIONAL - OUT UINTN *CapturesCount - ) -{ - EFI_STATUS Status; - UINT32 Index; - BOOLEAN Supported; - - if (This == NULL || String == NULL || Pattern == NULL || Result == NULL || CapturesCount == NULL) { - return EFI_INVALID_PARAMETER; - } - - // - // Figure out which syntax to use - // - if (SyntaxType == NULL) { - SyntaxType = mSupportedSyntaxes[0]; - } else { - Supported = FALSE; - for (Index = 0; Index < ARRAY_SIZE (mSupportedSyntaxes); ++Index) { - if (CompareGuid (SyntaxType, mSupportedSyntaxes[Index])) { - Supported = TRUE; - break; - } - } - if (!Supported) { - return EFI_UNSUPPORTED; - } - } - - Status = OnigurumaMatch (String, Pattern, SyntaxType, Result, Captures, CapturesCount); - - return Status; -} - -/** - Entry point for RegularExpressionDxe. -**/ -EFI_STATUS -EFIAPI -RegularExpressionDxeEntry ( - IN EFI_HANDLE ImageHandle, - IN EFI_SYSTEM_TABLE *SystemTable - ) -{ - EFI_STATUS Status; - - Status = gBS->InstallMultipleProtocolInterfaces ( - &ImageHandle, - &gEfiRegularExpressionProtocolGuid, - &mProtocolInstance, - NULL - ); - - return Status; -} diff --git a/MdeModulePkg/Universal/RegularExpressionDxe/RegularExpressionDxe.h b/MdeModulePkg/Universal/RegularExpressionDxe/RegularExpressionDxe.h deleted file mode 100644 index 46734718c8..0000000000 --- a/MdeModulePkg/Universal/RegularExpressionDxe/RegularExpressionDxe.h +++ /dev/null @@ -1,130 +0,0 @@ -/** - @file - - EFI_REGULAR_EXPRESSION_PROTOCOL Header File. - - Copyright (c) 2015, Hewlett Packard Enterprise Development, L.P.
- - This program and the accompanying materials are licensed and made available - under the terms and conditions of the BSD License that accompanies this - distribution. The full text of the license may be found at - http://opensource.org/licenses/bsd-license.php. - - THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS, WITHOUT - WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED. -**/ - -#include "Oniguruma/oniguruma.h" - -#include -#include -#include -#include -#include -#include -#include - -#define ARRAY_SIZE(Array) (sizeof(Array) / sizeof(*Array)) - -/** - Checks if the input string matches to the regular expression pattern. - - This A pointer to the EFI_REGULAR_EXPRESSION_PROTOCOL instance. - Type EFI_REGULAR_EXPRESSION_PROTOCOL is defined in Section - XYZ. - - String A pointer to a NULL terminated string to match against the - regular expression string specified by Pattern. - - Pattern A pointer to a NULL terminated string that represents the - regular expression. - - SyntaxType A pointer to the EFI_REGEX_SYNTAX_TYPE that identifies the - regular expression syntax type to use. May be NULL in which - case the function will use its default regular expression - syntax type. - - Result On return, points to TRUE if String fully matches against - the regular expression Pattern using the regular expression - SyntaxType. Otherwise, points to FALSE. - - Captures A Pointer to an array of EFI_REGEX_CAPTURE objects to receive - the captured groups in the event of a match. The full - sub-string match is put in Captures[0], and the results of N - capturing groups are put in Captures[1:N]. If Captures is - NULL, then this function doesn't allocate the memory for the - array and does not build up the elements. It only returns the - number of matching patterns in CapturesCount. If Captures is - not NULL, this function returns a pointer to an array and - builds up the elements in the array. CapturesCount is also - updated to the number of matching patterns found. It is the - caller's responsibility to free the memory pool in Captures - and in each CapturePtr in the array elements. - - CapturesCount On output, CapturesCount is the number of matching patterns - found in String. Zero means no matching patterns were found - in the string. - - @retval EFI_SUCCESS The regular expression string matching - completed successfully. - @retval EFI_UNSUPPORTED The regular expression syntax specified by - SyntaxType is not supported by this driver. - @retval EFI_DEVICE_ERROR The regular expression string matching - failed due to a hardware or firmware error. - @retval EFI_INVALID_PARAMETER String, Pattern, Result, or CapturesCountis - NULL. - -**/ -EFI_STATUS -EFIAPI -RegularExpressionMatch ( - IN EFI_REGULAR_EXPRESSION_PROTOCOL *This, - IN CHAR16 *String, - IN CHAR16 *Pattern, - IN EFI_REGEX_SYNTAX_TYPE *SyntaxType, OPTIONAL - OUT BOOLEAN *Result, - OUT EFI_REGEX_CAPTURE **Captures, OPTIONAL - OUT UINTN *CapturesCount - ); - -/** - Returns information about the regular expression syntax types supported - by the implementation. - - This A pointer to the EFI_REGULAR_EXPRESSION_PROTOCOL - instance. - - RegExSyntaxTypeListSize On input, the size in bytes of RegExSyntaxTypeList. - On output with a return code of EFI_SUCCESS, the - size in bytes of the data returned in - RegExSyntaxTypeList. On output with a return code - of EFI_BUFFER_TOO_SMALL, the size of - RegExSyntaxTypeList required to obtain the list. - - RegExSyntaxTypeList A caller-allocated memory buffer filled by the - driver with one EFI_REGEX_SYNTAX_TYPE element - for each supported Regular expression syntax - type. The list must not change across multiple - calls to the same driver. The first syntax - type in the list is the default type for the - driver. - - @retval EFI_SUCCESS The regular expression syntax types list - was returned successfully. - @retval EFI_UNSUPPORTED The service is not supported by this driver. - @retval EFI_DEVICE_ERROR The list of syntax types could not be - retrieved due to a hardware or firmware error. - @retval EFI_BUFFER_TOO_SMALL The buffer RegExSyntaxTypeList is too small - to hold the result. - @retval EFI_INVALID_PARAMETER RegExSyntaxTypeListSize is NULL - -**/ -EFI_STATUS -EFIAPI -RegularExpressionGetInfo ( - IN EFI_REGULAR_EXPRESSION_PROTOCOL *This, - IN OUT UINTN *RegExSyntaxTypeListSize, - OUT EFI_REGEX_SYNTAX_TYPE *RegExSyntaxTypeList - ); - - diff --git a/MdeModulePkg/Universal/RegularExpressionDxe/RegularExpressionDxe.inf b/MdeModulePkg/Universal/RegularExpressionDxe/RegularExpressionDxe.inf deleted file mode 100644 index 7f1023eed0..0000000000 --- a/MdeModulePkg/Universal/RegularExpressionDxe/RegularExpressionDxe.inf +++ /dev/null @@ -1,98 +0,0 @@ -## -# @file -# -# EFI_REGULAR_EXPRESSION_PROTOCOL Implementation -# -# Copyright (c) 2015, Hewlett Packard Enterprise Development, L.P.
-# -# This program and the accompanying materials are licensed and made available -# under the terms and conditions of the BSD License that accompanies this -# distribution. The full text of the license may be found at -# http://opensource.org/licenses/bsd-license.php. -# -# THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS, WITHOUT -# WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED. -## - -[Defines] - INF_VERSION = 0x00010018 - BASE_NAME = RegularExpressionDxe - FILE_GUID = 3E197E9C-D8DC-42D3-89CE-B04FA9833756 - MODULE_TYPE = UEFI_DRIVER - VERSION_STRING = 1.0 - ENTRY_POINT = RegularExpressionDxeEntry - -[Sources] - RegularExpressionDxe.c - RegularExpressionDxe.h - Oniguruma/OnigurumaUefiPort.h - Oniguruma/OnigurumaUefiPort.c - Oniguruma/OnigurumaIntrinsics.c | MSFT - -# Upstream Oniguruma code - Oniguruma/oniguruma.h - Oniguruma/regcomp.c - Oniguruma/regenc.c - Oniguruma/regenc.h - Oniguruma/regerror.c - Oniguruma/regexec.c - Oniguruma/oniggnu.h - Oniguruma/reggnu.c - Oniguruma/regint.h - Oniguruma/regparse.c - Oniguruma/regparse.h - Oniguruma/regposerr.c - Oniguruma/onigposix.h - Oniguruma/regposix.c - Oniguruma/regsyntax.c - Oniguruma/regtrav.c - Oniguruma/regversion.c - Oniguruma/st.c - Oniguruma/st.h - -# Supported Character Encodings - Oniguruma/enc/ascii.c - Oniguruma/enc/unicode.c - Oniguruma/enc/utf16_le.c - -[Packages] - MdePkg/MdePkg.dec - MdeModulePkg/MdeModulePkg.dec - -[LibraryClasses] - UefiBootServicesTableLib - UefiDriverEntryPoint - MemoryAllocationLib - BaseMemoryLib - DebugLib - -[Guids] - gEfiRegexSyntaxTypePosixExtendedGuid - gEfiRegexSyntaxTypePerlGuid - -[Protocols] - gEfiRegularExpressionProtocolGuid - -[BuildOptions] - # Override MSFT build option to remove /Oi and /GL - MSFT:DEBUG_*_IA32_CC_FLAGS == /nologo /c /WX /GS- /W4 /Gs32768 /D UNICODE /O1b2 /FIAutoGen.h /EHs-c- /GR- /GF /Gy /Zi /Gm - MSFT:RELEASE_*_IA32_CC_FLAGS == /nologo /c /WX /GS- /W4 /Gs32768 /D UNICODE /O1b2 /FIAutoGen.h /EHs-c- /GR- /GF - MSFT:DEBUG_*_X64_CC_FLAGS == /nologo /c /WX /GS- /W4 /Gs32768 /D UNICODE /O1b2s /FIAutoGen.h /EHs-c- /GR- /GF /Gy /Zi /Gm /X - MSFT:RELEASE_*_X64_CC_FLAGS == /nologo /c /WX /GS- /W4 /Gs32768 /D UNICODE /O1b2s /FIAutoGen.h /EHs-c- /GR- /GF /Gy /X - MSFT:DEBUG_*_IPF_CC_FLAGS == /nologo /c /WX /GS- /W4 /EHs-c- /GR- /Gy /Os /FIAutoGen.h /QIPF_fr32 /Zi /X - MSFT:RELEASE_*_IPF_CC_FLAGS == /nologo /c /WX /GS- /W4 /EHs-c- /GR- /Gy /Os /FIAutoGen.h /QIPF_fr32 /X - INTEL:*_*_*_CC_FLAGS = /Oi- - GCC:*_*_*_CC_FLAGS = -fno-builtin - - # Oniguruma: potentially uninitialized local variable used - MSFT:*_*_*_CC_FLAGS = /wd4701 - - # Oniguruma: intrinsic function not declared - MSFT:*_*_*_CC_FLAGS = /wd4164 - - # Oniguruma: old style declaration in st.c - MSFT:*_*_*_CC_FLAGS = /wd4131 - - # Oniguruma: 'type cast' : truncation from 'OnigUChar *' to 'unsigned int' - MSFT:*_*_*_CC_FLAGS = /wd4305 /wd4306 - -- 2.39.2