]> git.proxmox.com Git - mirror_edk2.git/blame - MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/enc/utf16_le.c
UefiCpuPkg PiSmmCpuDxeSmm: Update SmiEntry function run the same position
[mirror_edk2.git] / MdeModulePkg / Universal / RegularExpressionDxe / Oniguruma / enc / utf16_le.c
CommitLineData
14b0e578
CS
1/**********************************************************************\r
2 utf16_le.c - Oniguruma (regular expression library)\r
3**********************************************************************/\r
4/*-\r
5 * Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>\r
6 * All rights reserved.\r
7 *\r
8 * Redistribution and use in source and binary forms, with or without\r
9 * modification, are permitted provided that the following conditions\r
10 * are met:\r
11 * 1. Redistributions of source code must retain the above copyright\r
12 * notice, this list of conditions and the following disclaimer.\r
13 * 2. Redistributions in binary form must reproduce the above copyright\r
14 * notice, this list of conditions and the following disclaimer in the\r
15 * documentation and/or other materials provided with the distribution.\r
16 *\r
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND\r
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE\r
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE\r
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE\r
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL\r
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS\r
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)\r
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT\r
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY\r
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF\r
27 * SUCH DAMAGE.\r
28 */\r
29\r
30#include "regenc.h"\r
31\r
32static const int EncLen_UTF16[] = {\r
33 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,\r
34 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,\r
35 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,\r
36 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,\r
37 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,\r
38 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,\r
39 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,\r
40 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,\r
41 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,\r
42 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,\r
43 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,\r
44 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,\r
45 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,\r
46 2, 2, 2, 2, 2, 2, 2, 2, 4, 4, 4, 4, 2, 2, 2, 2,\r
47 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,\r
48 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2\r
49};\r
50\r
51static int\r
52utf16le_code_to_mbclen(OnigCodePoint code)\r
53{\r
54 return (code > 0xffff ? 4 : 2);\r
55}\r
56\r
57static int\r
58utf16le_mbc_enc_len(const UChar* p)\r
59{\r
60 return EncLen_UTF16[*(p+1)];\r
61}\r
62\r
63static int\r
64utf16le_is_mbc_newline(const UChar* p, const UChar* end)\r
65{\r
66 if (p + 1 < end) {\r
67 if (*p == 0x0a && *(p+1) == 0x00)\r
68 return 1;\r
69#ifdef USE_UNICODE_ALL_LINE_TERMINATORS\r
70 if ((\r
71#ifndef USE_CRNL_AS_LINE_TERMINATOR\r
72 *p == 0x0d ||\r
73#endif\r
74 *p == 0x85) && *(p+1) == 0x00)\r
75 return 1;\r
76 if (*(p+1) == 0x20 && (*p == 0x29 || *p == 0x28))\r
77 return 1;\r
78#endif\r
79 }\r
80 return 0;\r
81}\r
82\r
83static OnigCodePoint\r
84utf16le_mbc_to_code(const UChar* p, const UChar* end ARG_UNUSED)\r
85{\r
86 OnigCodePoint code;\r
87 UChar c0 = *p;\r
88 UChar c1 = *(p+1);\r
89\r
90 if (UTF16_IS_SURROGATE_FIRST(c1)) {\r
91 code = ((((c1 - 0xd8) << 2) + ((c0 & 0xc0) >> 6) + 1) << 16)\r
92 + ((((c0 & 0x3f) << 2) + (p[3] - 0xdc)) << 8)\r
93 + p[2];\r
94 }\r
95 else {\r
96 code = c1 * 256 + p[0];\r
97 }\r
98 return code;\r
99}\r
100\r
101static int\r
102utf16le_code_to_mbc(OnigCodePoint code, UChar *buf)\r
103{\r
104 UChar* p = buf;\r
105\r
106 if (code > 0xffff) {\r
107 unsigned int plane, high;\r
108\r
109 plane = (code >> 16) - 1;\r
110 high = (code & 0xff00) >> 8;\r
111\r
112 *p++ = (UChar)(((plane & 0x03) << 6) + (high >> 2));\r
113 *p++ = (UChar)((plane >> 2) + 0xd8);\r
114 *p++ = (UChar )(code & 0xff);\r
115 *p = (high & 0x03) + 0xdc;\r
116 return 4;\r
117 }\r
118 else {\r
119 *p++ = (UChar )(code & 0xff);\r
120 *p++ = (UChar )((code & 0xff00) >> 8);\r
121 return 2;\r
122 }\r
123}\r
124\r
125static int\r
126utf16le_mbc_case_fold(OnigCaseFoldType flag,\r
127 const UChar** pp, const UChar* end, UChar* fold)\r
128{\r
129 const UChar* p = *pp;\r
130\r
131 if (ONIGENC_IS_ASCII_CODE(*p) && *(p+1) == 0) {\r
132#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI\r
133 if ((flag & ONIGENC_CASE_FOLD_TURKISH_AZERI) != 0) {\r
134 if (*p == 0x49) {\r
135 *fold++ = 0x31;\r
136 *fold = 0x01;\r
137 (*pp) += 2;\r
138 return 2;\r
139 }\r
140 }\r
141#endif\r
142\r
143 *fold++ = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p);\r
144 *fold = 0;\r
145 *pp += 2;\r
146 return 2;\r
147 }\r
148 else\r
149 return onigenc_unicode_mbc_case_fold(ONIG_ENCODING_UTF16_LE, flag, pp, end,\r
150 fold);\r
151}\r
152\r
153#if 0\r
154static int\r
155utf16le_is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp,\r
156 const UChar* end)\r
157{\r
158 const UChar* p = *pp;\r
159\r
160 (*pp) += EncLen_UTF16[*(p+1)];\r
161\r
162 if (*(p+1) == 0) {\r
163 int c, v;\r
164\r
165 if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {\r
166 return TRUE;\r
167 }\r
168\r
169 c = *p;\r
170 v = ONIGENC_IS_UNICODE_ISO_8859_1_BIT_CTYPE(c,\r
171 (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER));\r
172 if ((v | BIT_CTYPE_LOWER) != 0) {\r
173 /* 0xaa, 0xb5, 0xba are lower case letter, but can't convert. */\r
174 if (c >= 0xaa && c <= 0xba)\r
175 return FALSE;\r
176 else\r
177 return TRUE;\r
178 }\r
179 return (v != 0 ? TRUE : FALSE);\r
180 }\r
181\r
182 return FALSE;\r
183}\r
184#endif\r
185\r
186static UChar*\r
187utf16le_left_adjust_char_head(const UChar* start, const UChar* s)\r
188{\r
189 if (s <= start) return (UChar* )s;\r
190\r
191 if ((s - start) % 2 == 1) {\r
192 s--;\r
193 }\r
194\r
195 if (UTF16_IS_SURROGATE_SECOND(*(s+1)) && s > start + 1)\r
196 s -= 2;\r
197\r
198 return (UChar* )s;\r
199}\r
200\r
201static int\r
202utf16le_get_case_fold_codes_by_str(OnigCaseFoldType flag,\r
203 const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[])\r
204{\r
205 return onigenc_unicode_get_case_fold_codes_by_str(ONIG_ENCODING_UTF16_LE,\r
206 flag, p, end, items);\r
207}\r
208\r
209OnigEncodingType OnigEncodingUTF16_LE = {\r
210 utf16le_mbc_enc_len,\r
211 "UTF-16LE", /* name */\r
212 4, /* max byte length */\r
213 2, /* min byte length */\r
214 utf16le_is_mbc_newline,\r
215 utf16le_mbc_to_code,\r
216 utf16le_code_to_mbclen,\r
217 utf16le_code_to_mbc,\r
218 utf16le_mbc_case_fold,\r
219 onigenc_unicode_apply_all_case_fold,\r
220 utf16le_get_case_fold_codes_by_str,\r
221 onigenc_unicode_property_name_to_ctype,\r
222 onigenc_unicode_is_code_ctype,\r
223 onigenc_utf16_32_get_ctype_code_range,\r
224 utf16le_left_adjust_char_head,\r
225 onigenc_always_false_is_allowed_reverse_match\r
226};\r