]> git.proxmox.com Git - mirror_edk2.git/blame - MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/utf16_le.c
IntelSiliconPkg: Replace BSD License with BSD+Patent License
[mirror_edk2.git] / MdeModulePkg / Universal / RegularExpressionDxe / Oniguruma / utf16_le.c
CommitLineData
14b0e578
CS
1/**********************************************************************\r
2 utf16_le.c - Oniguruma (regular expression library)\r
3**********************************************************************/\r
4/*-\r
b602265d 5 * Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>\r
14b0e578
CS
6 * All rights reserved.\r
7 *\r
8 * Redistribution and use in source and binary forms, with or without\r
9 * modification, are permitted provided that the following conditions\r
10 * are met:\r
11 * 1. Redistributions of source code must retain the above copyright\r
12 * notice, this list of conditions and the following disclaimer.\r
13 * 2. Redistributions in binary form must reproduce the above copyright\r
14 * notice, this list of conditions and the following disclaimer in the\r
15 * documentation and/or other materials provided with the distribution.\r
16 *\r
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND\r
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE\r
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE\r
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE\r
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL\r
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS\r
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)\r
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT\r
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY\r
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF\r
27 * SUCH DAMAGE.\r
28 */\r
b602265d 29#include "regint.h" /* for USE_CALLOUT */\r
14b0e578 30\r
b602265d
DG
31static int\r
32init(void)\r
33{\r
34#ifdef USE_CALLOUT\r
35\r
36 int id;\r
37 OnigEncoding enc;\r
38 char* name;\r
39 unsigned int args[4];\r
40 OnigValue opts[4];\r
41\r
42 enc = ONIG_ENCODING_UTF16_LE;\r
43\r
44 name = "F\000A\000I\000L\000\000\000"; BC0_P(name, fail);\r
45 name = "M\000I\000S\000M\000A\000T\000C\000H\000\000\000"; BC0_P(name, mismatch);\r
46\r
47 name = "M\000A\000X\000\000\000";\r
48 args[0] = ONIG_TYPE_TAG | ONIG_TYPE_LONG;\r
49 args[1] = ONIG_TYPE_CHAR;\r
50 opts[0].c = 'X';\r
51 BC_B_O(name, max, 2, args, 1, opts);\r
52\r
53 name = "E\000R\000R\000O\000R\000\000\000";\r
54 args[0] = ONIG_TYPE_LONG; opts[0].l = ONIG_ABORT;\r
55 BC_P_O(name, error, 1, args, 1, opts);\r
56\r
57 name = "C\000O\000U\000N\000T\000\000\000";\r
58 args[0] = ONIG_TYPE_CHAR; opts[0].c = '>';\r
59 BC_B_O(name, count, 1, args, 1, opts);\r
60\r
61 name = "T\000O\000T\000A\000L\000_\000C\000O\000U\000N\000T\000\000\000";\r
62 args[0] = ONIG_TYPE_CHAR; opts[0].c = '>';\r
63 BC_B_O(name, total_count, 1, args, 1, opts);\r
64\r
65 name = "C\000M\000P\000\000\000";\r
66 args[0] = ONIG_TYPE_TAG | ONIG_TYPE_LONG;\r
67 args[1] = ONIG_TYPE_STRING;\r
68 args[2] = ONIG_TYPE_TAG | ONIG_TYPE_LONG;\r
69 BC_P(name, cmp, 3, args);\r
70\r
71#endif /* USE_CALLOUT */\r
72\r
73 return ONIG_NORMAL;\r
74}\r
14b0e578
CS
75\r
76static const int EncLen_UTF16[] = {\r
77 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,\r
78 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,\r
79 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,\r
80 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,\r
81 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,\r
82 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,\r
83 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,\r
84 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,\r
85 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,\r
86 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,\r
87 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,\r
88 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,\r
89 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,\r
90 2, 2, 2, 2, 2, 2, 2, 2, 4, 4, 4, 4, 2, 2, 2, 2,\r
91 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,\r
92 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2\r
93};\r
94\r
95static int\r
96utf16le_code_to_mbclen(OnigCodePoint code)\r
97{\r
98 return (code > 0xffff ? 4 : 2);\r
99}\r
100\r
101static int\r
102utf16le_mbc_enc_len(const UChar* p)\r
103{\r
104 return EncLen_UTF16[*(p+1)];\r
105}\r
106\r
b602265d
DG
107static int\r
108is_valid_mbc_string(const UChar* p, const UChar* end)\r
109{\r
110 const UChar* end1 = end - 1;\r
111\r
112 while (p < end1) {\r
113 p += utf16le_mbc_enc_len(p);\r
114 }\r
115\r
116 if (p != end)\r
117 return FALSE;\r
118 else\r
119 return TRUE;\r
120}\r
121\r
14b0e578
CS
122static int\r
123utf16le_is_mbc_newline(const UChar* p, const UChar* end)\r
124{\r
125 if (p + 1 < end) {\r
126 if (*p == 0x0a && *(p+1) == 0x00)\r
127 return 1;\r
128#ifdef USE_UNICODE_ALL_LINE_TERMINATORS\r
129 if ((\r
130#ifndef USE_CRNL_AS_LINE_TERMINATOR\r
b602265d 131 *p == 0x0d ||\r
14b0e578 132#endif\r
b602265d 133 *p == 0x85) && *(p+1) == 0x00)\r
14b0e578 134 return 1;\r
b602265d 135\r
14b0e578
CS
136 if (*(p+1) == 0x20 && (*p == 0x29 || *p == 0x28))\r
137 return 1;\r
138#endif\r
139 }\r
140 return 0;\r
141}\r
142\r
143static OnigCodePoint\r
144utf16le_mbc_to_code(const UChar* p, const UChar* end ARG_UNUSED)\r
145{\r
146 OnigCodePoint code;\r
147 UChar c0 = *p;\r
148 UChar c1 = *(p+1);\r
149\r
150 if (UTF16_IS_SURROGATE_FIRST(c1)) {\r
151 code = ((((c1 - 0xd8) << 2) + ((c0 & 0xc0) >> 6) + 1) << 16)\r
152 + ((((c0 & 0x3f) << 2) + (p[3] - 0xdc)) << 8)\r
153 + p[2];\r
154 }\r
155 else {\r
156 code = c1 * 256 + p[0];\r
157 }\r
158 return code;\r
159}\r
160\r
161static int\r
162utf16le_code_to_mbc(OnigCodePoint code, UChar *buf)\r
163{\r
164 UChar* p = buf;\r
165\r
166 if (code > 0xffff) {\r
167 unsigned int plane, high;\r
168\r
169 plane = (code >> 16) - 1;\r
170 high = (code & 0xff00) >> 8;\r
171\r
b602265d
DG
172 *p++ = ((plane & 0x03) << 6) + (high >> 2);\r
173 *p++ = (plane >> 2) + 0xd8;\r
14b0e578
CS
174 *p++ = (UChar )(code & 0xff);\r
175 *p = (high & 0x03) + 0xdc;\r
176 return 4;\r
177 }\r
178 else {\r
179 *p++ = (UChar )(code & 0xff);\r
180 *p++ = (UChar )((code & 0xff00) >> 8);\r
181 return 2;\r
182 }\r
183}\r
184\r
185static int\r
186utf16le_mbc_case_fold(OnigCaseFoldType flag,\r
187 const UChar** pp, const UChar* end, UChar* fold)\r
188{\r
189 const UChar* p = *pp;\r
190\r
191 if (ONIGENC_IS_ASCII_CODE(*p) && *(p+1) == 0) {\r
192#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI\r
193 if ((flag & ONIGENC_CASE_FOLD_TURKISH_AZERI) != 0) {\r
194 if (*p == 0x49) {\r
b602265d
DG
195 *fold++ = 0x31;\r
196 *fold = 0x01;\r
197 (*pp) += 2;\r
198 return 2;\r
14b0e578
CS
199 }\r
200 }\r
201#endif\r
202\r
203 *fold++ = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p);\r
204 *fold = 0;\r
205 *pp += 2;\r
206 return 2;\r
207 }\r
208 else\r
209 return onigenc_unicode_mbc_case_fold(ONIG_ENCODING_UTF16_LE, flag, pp, end,\r
210 fold);\r
211}\r
212\r
213#if 0\r
214static int\r
215utf16le_is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp,\r
216 const UChar* end)\r
217{\r
218 const UChar* p = *pp;\r
219\r
220 (*pp) += EncLen_UTF16[*(p+1)];\r
221\r
222 if (*(p+1) == 0) {\r
223 int c, v;\r
224\r
225 if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {\r
226 return TRUE;\r
227 }\r
228\r
229 c = *p;\r
230 v = ONIGENC_IS_UNICODE_ISO_8859_1_BIT_CTYPE(c,\r
231 (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER));\r
232 if ((v | BIT_CTYPE_LOWER) != 0) {\r
233 /* 0xaa, 0xb5, 0xba are lower case letter, but can't convert. */\r
234 if (c >= 0xaa && c <= 0xba)\r
b602265d 235 return FALSE;\r
14b0e578 236 else\r
b602265d 237 return TRUE;\r
14b0e578
CS
238 }\r
239 return (v != 0 ? TRUE : FALSE);\r
240 }\r
241\r
242 return FALSE;\r
243}\r
244#endif\r
245\r
246static UChar*\r
247utf16le_left_adjust_char_head(const UChar* start, const UChar* s)\r
248{\r
249 if (s <= start) return (UChar* )s;\r
250\r
251 if ((s - start) % 2 == 1) {\r
252 s--;\r
253 }\r
254\r
255 if (UTF16_IS_SURROGATE_SECOND(*(s+1)) && s > start + 1)\r
256 s -= 2;\r
257\r
258 return (UChar* )s;\r
259}\r
260\r
261static int\r
262utf16le_get_case_fold_codes_by_str(OnigCaseFoldType flag,\r
263 const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[])\r
264{\r
265 return onigenc_unicode_get_case_fold_codes_by_str(ONIG_ENCODING_UTF16_LE,\r
266 flag, p, end, items);\r
267}\r
268\r
269OnigEncodingType OnigEncodingUTF16_LE = {\r
270 utf16le_mbc_enc_len,\r
271 "UTF-16LE", /* name */\r
b602265d
DG
272 4, /* max enc length */\r
273 2, /* min enc length */\r
14b0e578
CS
274 utf16le_is_mbc_newline,\r
275 utf16le_mbc_to_code,\r
276 utf16le_code_to_mbclen,\r
277 utf16le_code_to_mbc,\r
278 utf16le_mbc_case_fold,\r
279 onigenc_unicode_apply_all_case_fold,\r
280 utf16le_get_case_fold_codes_by_str,\r
281 onigenc_unicode_property_name_to_ctype,\r
282 onigenc_unicode_is_code_ctype,\r
283 onigenc_utf16_32_get_ctype_code_range,\r
284 utf16le_left_adjust_char_head,\r
b602265d
DG
285 onigenc_always_false_is_allowed_reverse_match,\r
286 init,\r
287 0, /* is_initialized */\r
288 is_valid_mbc_string,\r
289 ENC_FLAG_UNICODE,\r
290 0, 0\r
14b0e578 291};\r