]> git.proxmox.com Git - mirror_edk2.git/blame - MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/utf16_le.c
MdeModulePkg RegularExpressionDxe: Update Oniguruma from v6.9.0 to v6.9.3
[mirror_edk2.git] / MdeModulePkg / Universal / RegularExpressionDxe / Oniguruma / utf16_le.c
CommitLineData
14b0e578
CS
1/**********************************************************************\r
2 utf16_le.c - Oniguruma (regular expression library)\r
3**********************************************************************/\r
4/*-\r
b26691c4 5 * Copyright (c) 2002-2019 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>\r
14b0e578
CS
6 * All rights reserved.\r
7 *\r
8 * Redistribution and use in source and binary forms, with or without\r
9 * modification, are permitted provided that the following conditions\r
10 * are met:\r
11 * 1. Redistributions of source code must retain the above copyright\r
12 * notice, this list of conditions and the following disclaimer.\r
13 * 2. Redistributions in binary form must reproduce the above copyright\r
14 * notice, this list of conditions and the following disclaimer in the\r
15 * documentation and/or other materials provided with the distribution.\r
16 *\r
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND\r
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE\r
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE\r
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE\r
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL\r
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS\r
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)\r
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT\r
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY\r
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF\r
27 * SUCH DAMAGE.\r
28 */\r
b602265d 29#include "regint.h" /* for USE_CALLOUT */\r
14b0e578 30\r
b602265d
DG
31static int\r
32init(void)\r
33{\r
34#ifdef USE_CALLOUT\r
35\r
36 int id;\r
37 OnigEncoding enc;\r
38 char* name;\r
39 unsigned int args[4];\r
40 OnigValue opts[4];\r
41\r
42 enc = ONIG_ENCODING_UTF16_LE;\r
43\r
44 name = "F\000A\000I\000L\000\000\000"; BC0_P(name, fail);\r
45 name = "M\000I\000S\000M\000A\000T\000C\000H\000\000\000"; BC0_P(name, mismatch);\r
46\r
47 name = "M\000A\000X\000\000\000";\r
48 args[0] = ONIG_TYPE_TAG | ONIG_TYPE_LONG;\r
49 args[1] = ONIG_TYPE_CHAR;\r
50 opts[0].c = 'X';\r
51 BC_B_O(name, max, 2, args, 1, opts);\r
52\r
53 name = "E\000R\000R\000O\000R\000\000\000";\r
54 args[0] = ONIG_TYPE_LONG; opts[0].l = ONIG_ABORT;\r
55 BC_P_O(name, error, 1, args, 1, opts);\r
56\r
57 name = "C\000O\000U\000N\000T\000\000\000";\r
58 args[0] = ONIG_TYPE_CHAR; opts[0].c = '>';\r
59 BC_B_O(name, count, 1, args, 1, opts);\r
60\r
61 name = "T\000O\000T\000A\000L\000_\000C\000O\000U\000N\000T\000\000\000";\r
62 args[0] = ONIG_TYPE_CHAR; opts[0].c = '>';\r
63 BC_B_O(name, total_count, 1, args, 1, opts);\r
64\r
65 name = "C\000M\000P\000\000\000";\r
66 args[0] = ONIG_TYPE_TAG | ONIG_TYPE_LONG;\r
67 args[1] = ONIG_TYPE_STRING;\r
68 args[2] = ONIG_TYPE_TAG | ONIG_TYPE_LONG;\r
69 BC_P(name, cmp, 3, args);\r
70\r
71#endif /* USE_CALLOUT */\r
72\r
73 return ONIG_NORMAL;\r
74}\r
14b0e578
CS
75\r
76static const int EncLen_UTF16[] = {\r
77 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,\r
78 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,\r
79 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,\r
80 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,\r
81 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,\r
82 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,\r
83 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,\r
84 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,\r
85 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,\r
86 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,\r
87 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,\r
88 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,\r
89 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,\r
90 2, 2, 2, 2, 2, 2, 2, 2, 4, 4, 4, 4, 2, 2, 2, 2,\r
91 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,\r
92 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2\r
93};\r
94\r
95static int\r
96utf16le_code_to_mbclen(OnigCodePoint code)\r
97{\r
b26691c4
LG
98 if (code > 0xffff) {\r
99 if (code > 0x10ffff)\r
100 return ONIGERR_INVALID_CODE_POINT_VALUE;\r
101 else\r
102 return 4;\r
103 }\r
104 else {\r
105 return 2;\r
106 }\r
14b0e578
CS
107}\r
108\r
109static int\r
110utf16le_mbc_enc_len(const UChar* p)\r
111{\r
112 return EncLen_UTF16[*(p+1)];\r
113}\r
114\r
b602265d
DG
115static int\r
116is_valid_mbc_string(const UChar* p, const UChar* end)\r
117{\r
118 const UChar* end1 = end - 1;\r
119\r
120 while (p < end1) {\r
b26691c4
LG
121 int len = utf16le_mbc_enc_len(p);\r
122 if (len == 4) {\r
123 if (p + 3 < end && ! UTF16_IS_SURROGATE_SECOND(*(p + 3)))\r
124 return FALSE;\r
125 }\r
126 else\r
127 if (UTF16_IS_SURROGATE_SECOND(*(p + 1)))\r
128 return FALSE;\r
129\r
130 p += len;\r
b602265d
DG
131 }\r
132\r
133 if (p != end)\r
134 return FALSE;\r
135 else\r
136 return TRUE;\r
137}\r
138\r
14b0e578
CS
139static int\r
140utf16le_is_mbc_newline(const UChar* p, const UChar* end)\r
141{\r
142 if (p + 1 < end) {\r
143 if (*p == 0x0a && *(p+1) == 0x00)\r
144 return 1;\r
145#ifdef USE_UNICODE_ALL_LINE_TERMINATORS\r
146 if ((\r
147#ifndef USE_CRNL_AS_LINE_TERMINATOR\r
b602265d 148 *p == 0x0d ||\r
14b0e578 149#endif\r
b602265d 150 *p == 0x85) && *(p+1) == 0x00)\r
14b0e578 151 return 1;\r
b602265d 152\r
14b0e578
CS
153 if (*(p+1) == 0x20 && (*p == 0x29 || *p == 0x28))\r
154 return 1;\r
155#endif\r
156 }\r
157 return 0;\r
158}\r
159\r
160static OnigCodePoint\r
161utf16le_mbc_to_code(const UChar* p, const UChar* end ARG_UNUSED)\r
162{\r
163 OnigCodePoint code;\r
164 UChar c0 = *p;\r
165 UChar c1 = *(p+1);\r
166\r
167 if (UTF16_IS_SURROGATE_FIRST(c1)) {\r
168 code = ((((c1 - 0xd8) << 2) + ((c0 & 0xc0) >> 6) + 1) << 16)\r
169 + ((((c0 & 0x3f) << 2) + (p[3] - 0xdc)) << 8)\r
170 + p[2];\r
171 }\r
172 else {\r
173 code = c1 * 256 + p[0];\r
174 }\r
175 return code;\r
176}\r
177\r
178static int\r
179utf16le_code_to_mbc(OnigCodePoint code, UChar *buf)\r
180{\r
181 UChar* p = buf;\r
182\r
183 if (code > 0xffff) {\r
184 unsigned int plane, high;\r
185\r
186 plane = (code >> 16) - 1;\r
187 high = (code & 0xff00) >> 8;\r
188\r
b602265d
DG
189 *p++ = ((plane & 0x03) << 6) + (high >> 2);\r
190 *p++ = (plane >> 2) + 0xd8;\r
14b0e578
CS
191 *p++ = (UChar )(code & 0xff);\r
192 *p = (high & 0x03) + 0xdc;\r
193 return 4;\r
194 }\r
195 else {\r
196 *p++ = (UChar )(code & 0xff);\r
197 *p++ = (UChar )((code & 0xff00) >> 8);\r
198 return 2;\r
199 }\r
200}\r
201\r
202static int\r
203utf16le_mbc_case_fold(OnigCaseFoldType flag,\r
b26691c4 204 const UChar** pp, const UChar* end, UChar* fold)\r
14b0e578
CS
205{\r
206 const UChar* p = *pp;\r
207\r
208 if (ONIGENC_IS_ASCII_CODE(*p) && *(p+1) == 0) {\r
209#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI\r
210 if ((flag & ONIGENC_CASE_FOLD_TURKISH_AZERI) != 0) {\r
211 if (*p == 0x49) {\r
b602265d
DG
212 *fold++ = 0x31;\r
213 *fold = 0x01;\r
214 (*pp) += 2;\r
215 return 2;\r
14b0e578
CS
216 }\r
217 }\r
218#endif\r
219\r
220 *fold++ = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p);\r
221 *fold = 0;\r
222 *pp += 2;\r
223 return 2;\r
224 }\r
225 else\r
226 return onigenc_unicode_mbc_case_fold(ONIG_ENCODING_UTF16_LE, flag, pp, end,\r
b26691c4 227 fold);\r
14b0e578
CS
228}\r
229\r
230#if 0\r
231static int\r
232utf16le_is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp,\r
b26691c4 233 const UChar* end)\r
14b0e578
CS
234{\r
235 const UChar* p = *pp;\r
236\r
237 (*pp) += EncLen_UTF16[*(p+1)];\r
238\r
239 if (*(p+1) == 0) {\r
240 int c, v;\r
241\r
242 if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {\r
243 return TRUE;\r
244 }\r
245\r
246 c = *p;\r
247 v = ONIGENC_IS_UNICODE_ISO_8859_1_BIT_CTYPE(c,\r
248 (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER));\r
249 if ((v | BIT_CTYPE_LOWER) != 0) {\r
250 /* 0xaa, 0xb5, 0xba are lower case letter, but can't convert. */\r
251 if (c >= 0xaa && c <= 0xba)\r
b602265d 252 return FALSE;\r
14b0e578 253 else\r
b602265d 254 return TRUE;\r
14b0e578
CS
255 }\r
256 return (v != 0 ? TRUE : FALSE);\r
257 }\r
258\r
259 return FALSE;\r
260}\r
261#endif\r
262\r
263static UChar*\r
264utf16le_left_adjust_char_head(const UChar* start, const UChar* s)\r
265{\r
266 if (s <= start) return (UChar* )s;\r
267\r
268 if ((s - start) % 2 == 1) {\r
269 s--;\r
270 }\r
271\r
b26691c4
LG
272 if (UTF16_IS_SURROGATE_SECOND(*(s+1)) && s > start + 1 &&\r
273 UTF16_IS_SURROGATE_FIRST(*(s-1)))\r
14b0e578
CS
274 s -= 2;\r
275\r
276 return (UChar* )s;\r
277}\r
278\r
279static int\r
280utf16le_get_case_fold_codes_by_str(OnigCaseFoldType flag,\r
281 const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[])\r
282{\r
283 return onigenc_unicode_get_case_fold_codes_by_str(ONIG_ENCODING_UTF16_LE,\r
b26691c4 284 flag, p, end, items);\r
14b0e578
CS
285}\r
286\r
287OnigEncodingType OnigEncodingUTF16_LE = {\r
288 utf16le_mbc_enc_len,\r
289 "UTF-16LE", /* name */\r
b602265d
DG
290 4, /* max enc length */\r
291 2, /* min enc length */\r
14b0e578
CS
292 utf16le_is_mbc_newline,\r
293 utf16le_mbc_to_code,\r
294 utf16le_code_to_mbclen,\r
295 utf16le_code_to_mbc,\r
296 utf16le_mbc_case_fold,\r
297 onigenc_unicode_apply_all_case_fold,\r
298 utf16le_get_case_fold_codes_by_str,\r
299 onigenc_unicode_property_name_to_ctype,\r
300 onigenc_unicode_is_code_ctype,\r
301 onigenc_utf16_32_get_ctype_code_range,\r
302 utf16le_left_adjust_char_head,\r
b602265d
DG
303 onigenc_always_false_is_allowed_reverse_match,\r
304 init,\r
305 0, /* is_initialized */\r
306 is_valid_mbc_string,\r
b26691c4 307 ENC_FLAG_UNICODE|ENC_FLAG_SKIP_OFFSET_1,\r
b602265d 308 0, 0\r
14b0e578 309};\r