]> git.proxmox.com Git - mirror_edk2.git/blame - MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/unicode.c
MdeModulePkg RegularExpressionDxe: Update Oniguruma from v6.9.0 to v6.9.3
[mirror_edk2.git] / MdeModulePkg / Universal / RegularExpressionDxe / Oniguruma / unicode.c
CommitLineData
b602265d
DG
1/**********************************************************************\r
2 unicode.c - Oniguruma (regular expression library)\r
3**********************************************************************/\r
4/*-\r
b26691c4 5 * Copyright (c) 2002-2019 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>\r
b602265d
DG
6 * All rights reserved.\r
7 *\r
8 * Redistribution and use in source and binary forms, with or without\r
9 * modification, are permitted provided that the following conditions\r
10 * are met:\r
11 * 1. Redistributions of source code must retain the above copyright\r
12 * notice, this list of conditions and the following disclaimer.\r
13 * 2. Redistributions in binary form must reproduce the above copyright\r
14 * notice, this list of conditions and the following disclaimer in the\r
15 * documentation and/or other materials provided with the distribution.\r
16 *\r
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND\r
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE\r
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE\r
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE\r
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL\r
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS\r
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)\r
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT\r
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY\r
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF\r
27 * SUCH DAMAGE.\r
28 */\r
29\r
30#include "regint.h"\r
31\r
32struct PoolPropertyNameCtype {\r
33 short int name;\r
34 short int ctype;\r
35};\r
36\r
37#define ONIGENC_IS_UNICODE_ISO_8859_1_CTYPE(code,ctype) \\r
38 ((EncUNICODE_ISO_8859_1_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0)\r
39\r
40static const unsigned short EncUNICODE_ISO_8859_1_CtypeTable[256] = {\r
41 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,\r
42 0x4008, 0x428c, 0x4289, 0x4288, 0x4288, 0x4288, 0x4008, 0x4008,\r
43 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,\r
44 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,\r
45 0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,\r
46 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,\r
47 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0,\r
48 0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,\r
49 0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2,\r
50 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,\r
51 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,\r
52 0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0,\r
53 0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2,\r
54 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,\r
55 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,\r
56 0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008,\r
57 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0288, 0x0008, 0x0008,\r
58 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,\r
59 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,\r
60 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,\r
61 0x0284, 0x01a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0,\r
62 0x00a0, 0x00a0, 0x30e2, 0x01a0, 0x00a0, 0x00a8, 0x00a0, 0x00a0,\r
63 0x00a0, 0x00a0, 0x10a0, 0x10a0, 0x00a0, 0x30e2, 0x00a0, 0x01a0,\r
64 0x00a0, 0x10a0, 0x30e2, 0x01a0, 0x10a0, 0x10a0, 0x10a0, 0x01a0,\r
65 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,\r
66 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,\r
67 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x00a0,\r
68 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x30e2,\r
69 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,\r
70 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,\r
71 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x00a0,\r
72 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2\r
73};\r
74\r
75#include "st.h"\r
76\r
77#include "unicode_fold_data.c"\r
78\r
79extern int\r
80onigenc_unicode_mbc_case_fold(OnigEncoding enc,\r
81 OnigCaseFoldType flag ARG_UNUSED, const UChar** pp, const UChar* end,\r
82 UChar* fold)\r
83{\r
84 const struct ByUnfoldKey* buk;\r
85\r
86 OnigCodePoint code;\r
87 int i, len, rlen;\r
88 const UChar *p = *pp;\r
89\r
90 code = ONIGENC_MBC_TO_CODE(enc, p, end);\r
91 len = enclen(enc, p);\r
92 *pp += len;\r
93\r
94#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI\r
95 if ((flag & ONIGENC_CASE_FOLD_TURKISH_AZERI) != 0) {\r
96 if (code == 0x0130) {\r
97 return ONIGENC_CODE_TO_MBC(enc, 0x0069, fold);\r
98 }\r
99#if 0\r
100 if (code == 0x0049) {\r
101 return ONIGENC_CODE_TO_MBC(enc, 0x0131, fold);\r
102 }\r
103#endif\r
104 }\r
105#endif\r
106\r
107 buk = onigenc_unicode_unfold_key(code);\r
108 if (buk != 0) {\r
109 if (buk->fold_len == 1) {\r
110 return ONIGENC_CODE_TO_MBC(enc, *FOLDS1_FOLD(buk->index), fold);\r
111 }\r
112 else {\r
113 OnigCodePoint* addr;\r
114\r
115 FOLDS_FOLD_ADDR_BUK(buk, addr);\r
116 rlen = 0;\r
117 for (i = 0; i < buk->fold_len; i++) {\r
118 OnigCodePoint c = addr[i];\r
119 len = ONIGENC_CODE_TO_MBC(enc, c, fold);\r
120 fold += len;\r
121 rlen += len;\r
122 }\r
123 return rlen;\r
124 }\r
125 }\r
126\r
127 for (i = 0; i < len; i++) {\r
128 *fold++ = *p++;\r
129 }\r
130 return len;\r
131}\r
132\r
133static int\r
134apply_case_fold1(int from, int to, OnigApplyAllCaseFoldFunc f, void* arg)\r
135{\r
136 int i, j, k, n, r;\r
137\r
138 for (i = from; i < to; ) {\r
139 OnigCodePoint fold = *FOLDS1_FOLD(i);\r
140 n = FOLDS1_UNFOLDS_NUM(i);\r
141 for (j = 0; j < n; j++) {\r
142 OnigCodePoint unfold = FOLDS1_UNFOLDS(i)[j];\r
143\r
144 r = (*f)(fold, &unfold, 1, arg);\r
145 if (r != 0) return r;\r
146 r = (*f)(unfold, &fold, 1, arg);\r
147 if (r != 0) return r;\r
148\r
149 for (k = 0; k < j; k++) {\r
150 OnigCodePoint unfold2 = FOLDS1_UNFOLDS(i)[k];\r
151 r = (*f)(unfold, &unfold2, 1, arg);\r
152 if (r != 0) return r;\r
153 r = (*f)(unfold2, &unfold, 1, arg);\r
154 if (r != 0) return r;\r
155 }\r
156 }\r
157\r
158 i = FOLDS1_NEXT_INDEX(i);\r
159 }\r
160\r
161 return 0;\r
162}\r
163\r
164static int\r
165apply_case_fold2(int from, int to, OnigApplyAllCaseFoldFunc f, void* arg)\r
166{\r
167 int i, j, k, n, r;\r
168\r
169 for (i = from; i < to; ) {\r
170 OnigCodePoint* fold = FOLDS2_FOLD(i);\r
171 n = FOLDS2_UNFOLDS_NUM(i);\r
172 for (j = 0; j < n; j++) {\r
173 OnigCodePoint unfold = FOLDS2_UNFOLDS(i)[j];\r
174\r
175 r = (*f)(unfold, fold, 2, arg);\r
176 if (r != 0) return r;\r
177\r
178 for (k = 0; k < j; k++) {\r
179 OnigCodePoint unfold2 = FOLDS2_UNFOLDS(i)[k];\r
180 r = (*f)(unfold, &unfold2, 1, arg);\r
181 if (r != 0) return r;\r
182 r = (*f)(unfold2, &unfold, 1, arg);\r
183 if (r != 0) return r;\r
184 }\r
185 }\r
186\r
187 i = FOLDS2_NEXT_INDEX(i);\r
188 }\r
189\r
190 return 0;\r
191}\r
192\r
193static int\r
194apply_case_fold3(int from, int to, OnigApplyAllCaseFoldFunc f, void* arg)\r
195{\r
196 int i, j, k, n, r;\r
197\r
198 for (i = from; i < to; ) {\r
199 OnigCodePoint* fold = FOLDS3_FOLD(i);\r
200 n = FOLDS3_UNFOLDS_NUM(i);\r
201 for (j = 0; j < n; j++) {\r
202 OnigCodePoint unfold = FOLDS3_UNFOLDS(i)[j];\r
203\r
204 r = (*f)(unfold, fold, 3, arg);\r
205 if (r != 0) return r;\r
206\r
207 for (k = 0; k < j; k++) {\r
208 OnigCodePoint unfold2 = FOLDS3_UNFOLDS(i)[k];\r
209 r = (*f)(unfold, &unfold2, 1, arg);\r
210 if (r != 0) return r;\r
211 r = (*f)(unfold2, &unfold, 1, arg);\r
212 if (r != 0) return r;\r
213 }\r
214 }\r
215\r
216 i = FOLDS3_NEXT_INDEX(i);\r
217 }\r
218\r
219 return 0;\r
220}\r
221\r
222extern int\r
223onigenc_unicode_apply_all_case_fold(OnigCaseFoldType flag,\r
224 OnigApplyAllCaseFoldFunc f, void* arg)\r
225{\r
226 int r;\r
227\r
228 r = apply_case_fold1(0, FOLDS1_NORMAL_END_INDEX, f, arg);\r
229 if (r != 0) return r;\r
230\r
231#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI\r
232 if ((flag & ONIGENC_CASE_FOLD_TURKISH_AZERI) != 0) {\r
233 code = 0x0131;\r
234 r = (*f)(0x0049, &code, 1, arg);\r
235 if (r != 0) return r;\r
236 code = 0x0049;\r
237 r = (*f)(0x0131, &code, 1, arg);\r
238 if (r != 0) return r;\r
239\r
240 code = 0x0130;\r
241 r = (*f)(0x0069, &code, 1, arg);\r
242 if (r != 0) return r;\r
243 code = 0x0069;\r
244 r = (*f)(0x0130, &code, 1, arg);\r
245 if (r != 0) return r;\r
246 }\r
247 else {\r
248#endif\r
249 r = apply_case_fold1(FOLDS1_NORMAL_END_INDEX, FOLDS1_END_INDEX, f, arg);\r
250 if (r != 0) return r;\r
251#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI\r
252 }\r
253#endif\r
254\r
255 if ((flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) == 0)\r
256 return 0;\r
257\r
258 r = apply_case_fold2(0, FOLDS2_NORMAL_END_INDEX, f, arg);\r
259 if (r != 0) return r;\r
260\r
261#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI\r
262 if ((flag & ONIGENC_CASE_FOLD_TURKISH_AZERI) == 0) {\r
263#endif\r
264 r = apply_case_fold2(FOLDS2_NORMAL_END_INDEX, FOLDS2_END_INDEX, f, arg);\r
265 if (r != 0) return r;\r
266#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI\r
267 }\r
268#endif\r
269\r
270 r = apply_case_fold3(0, FOLDS3_NORMAL_END_INDEX, f, arg);\r
271 if (r != 0) return r;\r
272\r
273 return 0;\r
274}\r
275\r
276extern int\r
277onigenc_unicode_get_case_fold_codes_by_str(OnigEncoding enc,\r
278 OnigCaseFoldType flag, const OnigUChar* p, const OnigUChar* end,\r
279 OnigCaseFoldCodeItem items[])\r
280{\r
281 int n, m, i, j, k, len;\r
282 OnigCodePoint code, codes[3];\r
283 const struct ByUnfoldKey* buk;\r
284\r
285 n = 0;\r
286\r
287 code = ONIGENC_MBC_TO_CODE(enc, p, end);\r
288 len = enclen(enc, p);\r
289\r
290#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI\r
291 if ((flag & ONIGENC_CASE_FOLD_TURKISH_AZERI) != 0) {\r
292 if (code == 0x0049) {\r
293 items[0].byte_len = len;\r
294 items[0].code_len = 1;\r
295 items[0].code[0] = 0x0131;\r
296 return 1;\r
297 }\r
298 else if (code == 0x0130) {\r
299 items[0].byte_len = len;\r
300 items[0].code_len = 1;\r
301 items[0].code[0] = 0x0069;\r
302 return 1;\r
303 }\r
304 else if (code == 0x0131) {\r
305 items[0].byte_len = len;\r
306 items[0].code_len = 1;\r
307 items[0].code[0] = 0x0049;\r
308 return 1;\r
309 }\r
310 else if (code == 0x0069) {\r
311 items[0].byte_len = len;\r
312 items[0].code_len = 1;\r
313 items[0].code[0] = 0x0130;\r
314 return 1;\r
315 }\r
316 }\r
317#endif\r
318\r
319 buk = onigenc_unicode_unfold_key(code);\r
320 if (buk != 0) {\r
321 if (buk->fold_len == 1) {\r
322 int un;\r
323 items[0].byte_len = len;\r
324 items[0].code_len = 1;\r
325 items[0].code[0] = *FOLDS1_FOLD(buk->index);\r
326 n++;\r
327\r
328 un = FOLDS1_UNFOLDS_NUM(buk->index);\r
329 for (i = 0; i < un; i++) {\r
330 OnigCodePoint unfold = FOLDS1_UNFOLDS(buk->index)[i];\r
331 if (unfold != code) {\r
332 items[n].byte_len = len;\r
333 items[n].code_len = 1;\r
334 items[n].code[0] = unfold;\r
335 n++;\r
336 }\r
337 }\r
338 code = items[0].code[0]; /* for multi-code to unfold search. */\r
339 }\r
340 else if ((flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {\r
341 OnigCodePoint cs[3][4];\r
342 int fn, ncs[3];\r
343\r
344 if (buk->fold_len == 2) {\r
345 m = FOLDS2_UNFOLDS_NUM(buk->index);\r
346 for (i = 0; i < m; i++) {\r
347 OnigCodePoint unfold = FOLDS2_UNFOLDS(buk->index)[i];\r
348 if (unfold == code) continue;\r
349\r
350 items[n].byte_len = len;\r
351 items[n].code_len = 1;\r
352 items[n].code[0] = unfold;\r
353 n++;\r
354 }\r
355\r
356 for (fn = 0; fn < 2; fn++) {\r
357 int index;\r
358 cs[fn][0] = FOLDS2_FOLD(buk->index)[fn];\r
359 index = onigenc_unicode_fold1_key(&cs[fn][0]);\r
360 if (index >= 0) {\r
361 int m = FOLDS1_UNFOLDS_NUM(index);\r
362 for (i = 0; i < m; i++) {\r
363 cs[fn][i+1] = FOLDS1_UNFOLDS(index)[i];\r
364 }\r
365 ncs[fn] = m + 1;\r
366 }\r
367 else\r
368 ncs[fn] = 1;\r
369 }\r
370\r
371 for (i = 0; i < ncs[0]; i++) {\r
372 for (j = 0; j < ncs[1]; j++) {\r
373 items[n].byte_len = len;\r
374 items[n].code_len = 2;\r
375 items[n].code[0] = cs[0][i];\r
376 items[n].code[1] = cs[1][j];\r
377 n++;\r
378 }\r
379 }\r
380 }\r
381 else { /* fold_len == 3 */\r
382 m = FOLDS3_UNFOLDS_NUM(buk->index);\r
383 for (i = 0; i < m; i++) {\r
384 OnigCodePoint unfold = FOLDS3_UNFOLDS(buk->index)[i];\r
385 if (unfold == code) continue;\r
386\r
387 items[n].byte_len = len;\r
388 items[n].code_len = 1;\r
389 items[n].code[0] = unfold;\r
390 n++;\r
391 }\r
392\r
393 for (fn = 0; fn < 3; fn++) {\r
394 int index;\r
395 cs[fn][0] = FOLDS3_FOLD(buk->index)[fn];\r
396 index = onigenc_unicode_fold1_key(&cs[fn][0]);\r
397 if (index >= 0) {\r
398 int m = FOLDS1_UNFOLDS_NUM(index);\r
399 for (i = 0; i < m; i++) {\r
400 cs[fn][i+1] = FOLDS1_UNFOLDS(index)[i];\r
401 }\r
402 ncs[fn] = m + 1;\r
403 }\r
404 else\r
405 ncs[fn] = 1;\r
406 }\r
407\r
408 for (i = 0; i < ncs[0]; i++) {\r
409 for (j = 0; j < ncs[1]; j++) {\r
410 for (k = 0; k < ncs[2]; k++) {\r
411 items[n].byte_len = len;\r
412 items[n].code_len = 3;\r
413 items[n].code[0] = cs[0][i];\r
414 items[n].code[1] = cs[1][j];\r
415 items[n].code[2] = cs[2][k];\r
416 n++;\r
417 }\r
418 }\r
419 }\r
420 }\r
421\r
422 /* multi char folded code is not head of another folded multi char */\r
423 return n;\r
424 }\r
425 }\r
426 else {\r
427 int index = onigenc_unicode_fold1_key(&code);\r
428 if (index >= 0) {\r
429 int m = FOLDS1_UNFOLDS_NUM(index);\r
430 for (i = 0; i < m; i++) {\r
431 items[n].byte_len = len;\r
432 items[n].code_len = 1;\r
433 items[n].code[0] = FOLDS1_UNFOLDS(index)[i];\r
434 n++;\r
435 }\r
436 }\r
437 }\r
438\r
439 if ((flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) == 0)\r
440 return n;\r
441\r
442 p += len;\r
443 if (p < end) {\r
444 int clen;\r
445 int index;\r
446\r
447 codes[0] = code;\r
448 code = ONIGENC_MBC_TO_CODE(enc, p, end);\r
449\r
450 buk = onigenc_unicode_unfold_key(code);\r
451 if (buk != 0 && buk->fold_len == 1) {\r
452 codes[1] = *FOLDS1_FOLD(buk->index);\r
453 }\r
454 else\r
455 codes[1] = code;\r
456\r
457 clen = enclen(enc, p);\r
458 len += clen;\r
459\r
460 index = onigenc_unicode_fold2_key(codes);\r
461 if (index >= 0) {\r
462 m = FOLDS2_UNFOLDS_NUM(index);\r
463 for (i = 0; i < m; i++) {\r
464 items[n].byte_len = len;\r
465 items[n].code_len = 1;\r
466 items[n].code[0] = FOLDS2_UNFOLDS(index)[i];\r
467 n++;\r
468 }\r
469 }\r
470\r
471 p += clen;\r
472 if (p < end) {\r
473 code = ONIGENC_MBC_TO_CODE(enc, p, end);\r
474 buk = onigenc_unicode_unfold_key(code);\r
475 if (buk != 0 && buk->fold_len == 1) {\r
476 codes[2] = *FOLDS1_FOLD(buk->index);\r
477 }\r
478 else\r
479 codes[2] = code;\r
480\r
481 clen = enclen(enc, p);\r
482 len += clen;\r
483\r
484 index = onigenc_unicode_fold3_key(codes);\r
485 if (index >= 0) {\r
486 m = FOLDS3_UNFOLDS_NUM(index);\r
487 for (i = 0; i < m; i++) {\r
488 items[n].byte_len = len;\r
489 items[n].code_len = 1;\r
490 items[n].code[0] = FOLDS3_UNFOLDS(index)[i];\r
491 n++;\r
492 }\r
493 }\r
494 }\r
495 }\r
496\r
497 return n;\r
498}\r
499\r
500#ifdef USE_UNICODE_PROPERTIES\r
501#include "unicode_property_data.c"\r
502#else\r
503#include "unicode_property_data_posix.c"\r
504#endif\r
505\r
506\r
b26691c4
LG
507#ifdef USE_UNICODE_WORD_BREAK\r
508\r
509enum WB_TYPE {\r
510 WB_Any = 0,\r
511 WB_ALetter,\r
512 WB_CR,\r
513 WB_Double_Quote,\r
514 WB_Extend,\r
515 WB_ExtendNumLet,\r
516 WB_Format,\r
517 WB_Hebrew_Letter,\r
518 WB_Katakana,\r
519 WB_LF,\r
520 WB_MidLetter,\r
521 WB_MidNum,\r
522 WB_MidNumLet,\r
523 WB_Newline,\r
524 WB_Numeric,\r
525 WB_Regional_Indicator,\r
526 WB_Single_Quote,\r
527 WB_WSegSpace,\r
528 WB_ZWJ,\r
529};\r
530\r
531typedef struct {\r
532 OnigCodePoint start;\r
533 OnigCodePoint end;\r
534 enum WB_TYPE type;\r
535} WB_RANGE_TYPE;\r
536\r
537#include "unicode_wb_data.c"\r
538\r
539static enum WB_TYPE\r
540wb_get_type(OnigCodePoint code)\r
541{\r
542 OnigCodePoint low, high, x;\r
543 enum WB_TYPE type;\r
544\r
545 for (low = 0, high = (OnigCodePoint )WB_RANGE_NUM; low < high; ) {\r
546 x = (low + high) >> 1;\r
547 if (code > WB_RANGES[x].end)\r
548 low = x + 1;\r
549 else\r
550 high = x;\r
551 }\r
552\r
553 type = (low < (OnigCodePoint )WB_RANGE_NUM &&\r
554 code >= WB_RANGES[low].start) ?\r
555 WB_RANGES[low].type : WB_Any;\r
556\r
557 return type;\r
558}\r
559\r
560#define IS_WB_IGNORE_TAIL(t) ((t) == WB_Extend || (t) == WB_Format || (t) == WB_ZWJ)\r
561#define IS_WB_AHLetter(t) ((t) == WB_ALetter || (t) == WB_Hebrew_Letter)\r
562#define IS_WB_MidNumLetQ(t) ((t) == WB_MidNumLet || (t) == WB_Single_Quote)\r
563\r
564static int\r
565wb_get_next_main_code(OnigEncoding enc, UChar* p, const UChar* end,\r
566 OnigCodePoint* rcode, enum WB_TYPE* rtype)\r
567{\r
568 OnigCodePoint code;\r
569 enum WB_TYPE type;\r
570\r
571 while (TRUE) {\r
572 p += enclen(enc, p);\r
573 if (p >= end) break;\r
574\r
575 code = ONIGENC_MBC_TO_CODE(enc, p, end);\r
576 type = wb_get_type(code);\r
577 if (! IS_WB_IGNORE_TAIL(type)) {\r
578 *rcode = code;\r
579 *rtype = type;\r
580 return 1;\r
581 }\r
582 }\r
583\r
584 return 0;\r
585}\r
586\r
587extern int\r
588onigenc_wb_is_break_position(OnigEncoding enc, UChar* p, UChar* prev,\r
589 const UChar* start, const UChar* end)\r
590{\r
591 int r;\r
592 UChar* pp;\r
593 OnigCodePoint cfrom;\r
594 OnigCodePoint cfrom2;\r
595 OnigCodePoint cto;\r
596 OnigCodePoint cto2;\r
597 enum WB_TYPE from;\r
598 enum WB_TYPE from2;\r
599 enum WB_TYPE to;\r
600 enum WB_TYPE to2;\r
601\r
602 /* WB1: sot / Any */\r
603 if (p == start) return TRUE;\r
604 /* WB2: Any / eot */\r
605 if (p == end) return TRUE;\r
606\r
607 if (IS_NULL(prev)) {\r
608 prev = onigenc_get_prev_char_head(enc, start, p);\r
609 if (IS_NULL(prev)) return TRUE;\r
610 }\r
611\r
612 cfrom = ONIGENC_MBC_TO_CODE(enc, prev, end);\r
613 cto = ONIGENC_MBC_TO_CODE(enc, p, end);\r
614\r
615 from = wb_get_type(cfrom);\r
616 to = wb_get_type(cto);\r
617\r
618 /* short cut */\r
619 if (from == 0 && to == 0) goto WB999;\r
620\r
621 /* WB3: CR + LF */\r
622 if (from == WB_CR && to == WB_LF) return FALSE;\r
623\r
624 /* WB3a: (Newline|CR|LF) / */\r
625 if (from == WB_Newline || from == WB_CR || from == WB_LF) return TRUE;\r
626 /* WB3b: / (Newline|CR|LF) */\r
627 if (to == WB_Newline || to == WB_CR || to == WB_LF) return TRUE;\r
628\r
629 /* WB3c: ZWJ + {Extended_Pictographic} */\r
630 if (from == WB_ZWJ) {\r
631 if (onigenc_unicode_is_code_ctype(cto, PROP_INDEX_EXTENDEDPICTOGRAPHIC))\r
632 return FALSE;\r
633 }\r
634\r
635 /* WB3d: WSegSpace + WSegSpace */\r
636 if (from == WB_WSegSpace && to == WB_WSegSpace) return FALSE;\r
637\r
638 /* WB4: X (Extend|Format|ZWJ)* -> X */\r
639 if (IS_WB_IGNORE_TAIL(to)) return FALSE;\r
640 if (IS_WB_IGNORE_TAIL(from)) {\r
641 while ((pp = onigenc_get_prev_char_head(enc, start, prev)) != NULL) {\r
642 prev = pp;\r
643 cfrom = ONIGENC_MBC_TO_CODE(enc, prev, end);\r
644 from = wb_get_type(cfrom);\r
645 if (! IS_WB_IGNORE_TAIL(from))\r
646 break;\r
647 }\r
648 }\r
649\r
650 if (IS_WB_AHLetter(from)) {\r
651 /* WB5: AHLetter + AHLetter */\r
652 if (IS_WB_AHLetter(to)) return FALSE;\r
653\r
654 /* WB6: AHLetter + (MidLetter | MidNumLetQ) AHLetter */\r
655 if (to == WB_MidLetter || IS_WB_MidNumLetQ(to)) {\r
656 r = wb_get_next_main_code(enc, p, end, &cto2, &to2);\r
657 if (r == 1) {\r
658 if (IS_WB_AHLetter(to2)) return FALSE;\r
659 }\r
660 }\r
661 }\r
662\r
663 /* WB7: AHLetter (MidLetter | MidNumLetQ) + AHLetter */\r
664 if (from == WB_MidLetter || IS_WB_MidNumLetQ(from)) {\r
665 if (IS_WB_AHLetter(to)) {\r
666 from2 = WB_Any;\r
667 while ((pp = onigenc_get_prev_char_head(enc, start, prev)) != NULL) {\r
668 prev = pp;\r
669 cfrom2 = ONIGENC_MBC_TO_CODE(enc, prev, end);\r
670 from2 = wb_get_type(cfrom2);\r
671 if (! IS_WB_IGNORE_TAIL(from2))\r
672 break;\r
673 }\r
674\r
675 if (IS_WB_AHLetter(from2)) return FALSE;\r
676 }\r
677 }\r
678\r
679 if (from == WB_Hebrew_Letter) {\r
680 /* WB7a: Hebrew_Letter + Single_Quote */\r
681 if (to == WB_Single_Quote) return FALSE;\r
682\r
683 /* WB7b: Hebrew_Letter + Double_Quote Hebrew_Letter */\r
684 if (to == WB_Double_Quote) {\r
685 r = wb_get_next_main_code(enc, p, end, &cto2, &to2);\r
686 if (r == 1) {\r
687 if (to2 == WB_Hebrew_Letter) return FALSE;\r
688 }\r
689 }\r
690 }\r
691\r
692 /* WB7c: Hebrew_Letter Double_Quote + Hebrew_Letter */\r
693 if (from == WB_Double_Quote) {\r
694 if (to == WB_Hebrew_Letter) {\r
695 from2 = WB_Any;\r
696 while ((pp = onigenc_get_prev_char_head(enc, start, prev)) != NULL) {\r
697 prev = pp;\r
698 cfrom2 = ONIGENC_MBC_TO_CODE(enc, prev, end);\r
699 from2 = wb_get_type(cfrom2);\r
700 if (! IS_WB_IGNORE_TAIL(from2))\r
701 break;\r
702 }\r
703\r
704 if (from2 == WB_Hebrew_Letter) return FALSE;\r
705 }\r
706 }\r
707\r
708 if (to == WB_Numeric) {\r
709 /* WB8: Numeric + Numeric */\r
710 if (from == WB_Numeric) return FALSE;\r
711\r
712 /* WB9: AHLetter + Numeric */\r
713 if (IS_WB_AHLetter(from)) return FALSE;\r
714\r
715 /* WB11: Numeric (MidNum | MidNumLetQ) + Numeric */\r
716 if (from == WB_MidNum || IS_WB_MidNumLetQ(from)) {\r
717 from2 = WB_Any;\r
718 while ((pp = onigenc_get_prev_char_head(enc, start, prev)) != NULL) {\r
719 prev = pp;\r
720 cfrom2 = ONIGENC_MBC_TO_CODE(enc, prev, end);\r
721 from2 = wb_get_type(cfrom2);\r
722 if (! IS_WB_IGNORE_TAIL(from2))\r
723 break;\r
724 }\r
725\r
726 if (from2 == WB_Numeric) return FALSE;\r
727 }\r
728 }\r
729\r
730 if (from == WB_Numeric) {\r
731 /* WB10: Numeric + AHLetter */\r
732 if (IS_WB_AHLetter(to)) return FALSE;\r
733\r
734 /* WB12: Numeric + (MidNum | MidNumLetQ) Numeric */\r
735 if (to == WB_MidNum || IS_WB_MidNumLetQ(to)) {\r
736 r = wb_get_next_main_code(enc, p, end, &cto2, &to2);\r
737 if (r == 1) {\r
738 if (to2 == WB_Numeric) return FALSE;\r
739 }\r
740 }\r
741 }\r
742\r
743 /* WB13: Katakana + Katakana */\r
744 if (from == WB_Katakana && to == WB_Katakana) return FALSE;\r
745\r
746 /* WB13a: (AHLetter | Numeric | Katakana | ExtendNumLet) + ExtendNumLet */\r
747 if (IS_WB_AHLetter(from) || from == WB_Numeric || from == WB_Katakana\r
748 || from == WB_ExtendNumLet) {\r
749 if (to == WB_ExtendNumLet) return FALSE;\r
750 }\r
751\r
752 /* WB13b: ExtendNumLet + (AHLetter | Numeric | Katakana) */\r
753 if (from == WB_ExtendNumLet) {\r
754 if (IS_WB_AHLetter(to) || to == WB_Numeric || to == WB_Katakana)\r
755 return FALSE;\r
756 }\r
757\r
758\r
759 /* WB15: sot (RI RI)* RI + RI */\r
760 /* WB16: [^RI] (RI RI)* RI + RI */\r
761 if (from == WB_Regional_Indicator && to == WB_Regional_Indicator) {\r
762 int n = 0;\r
763 while ((prev = onigenc_get_prev_char_head(enc, start, prev)) != NULL) {\r
764 cfrom2 = ONIGENC_MBC_TO_CODE(enc, prev, end);\r
765 from2 = wb_get_type(cfrom2);\r
766 if (from2 != WB_Regional_Indicator)\r
767 break;\r
768\r
769 n++;\r
770 }\r
771 if ((n % 2) == 0) return FALSE;\r
772 }\r
773\r
774 WB999:\r
775 /* WB999: Any / Any */\r
776 return TRUE;\r
777}\r
778\r
779#endif /* USE_UNICODE_WORD_BREAK */\r
780\r
781\r
b602265d
DG
782#ifdef USE_UNICODE_EXTENDED_GRAPHEME_CLUSTER\r
783\r
784enum EGCB_BREAK_TYPE {\r
785 EGCB_NOT_BREAK = 0,\r
786 EGCB_BREAK = 1,\r
787 EGCB_BREAK_UNDEF_GB11 = 2,\r
788 EGCB_BREAK_UNDEF_RI_RI = 3\r
789};\r
790\r
791enum EGCB_TYPE {\r
792 EGCB_Other = 0,\r
793 EGCB_CR = 1,\r
794 EGCB_LF = 2,\r
795 EGCB_Control = 3,\r
796 EGCB_Extend = 4,\r
797 EGCB_Prepend = 5,\r
798 EGCB_Regional_Indicator = 6,\r
799 EGCB_SpacingMark = 7,\r
800 EGCB_ZWJ = 8,\r
801#if 0\r
802 /* obsoleted */\r
803 EGCB_E_Base = 9,\r
804 EGCB_E_Base_GAZ = 10,\r
805 EGCB_E_Modifier = 11,\r
806 EGCB_Glue_After_Zwj = 12,\r
807#endif\r
808 EGCB_L = 13,\r
809 EGCB_LV = 14,\r
810 EGCB_LVT = 15,\r
811 EGCB_T = 16,\r
812 EGCB_V = 17\r
813};\r
814\r
815typedef struct {\r
816 OnigCodePoint start;\r
817 OnigCodePoint end;\r
818 enum EGCB_TYPE type;\r
819} EGCB_RANGE_TYPE;\r
820\r
821#include "unicode_egcb_data.c"\r
822\r
823static enum EGCB_TYPE\r
824egcb_get_type(OnigCodePoint code)\r
825{\r
826 OnigCodePoint low, high, x;\r
827 enum EGCB_TYPE type;\r
828\r
829 for (low = 0, high = (OnigCodePoint )EGCB_RANGE_NUM; low < high; ) {\r
830 x = (low + high) >> 1;\r
831 if (code > EGCB_RANGES[x].end)\r
832 low = x + 1;\r
833 else\r
834 high = x;\r
835 }\r
836\r
837 type = (low < (OnigCodePoint )EGCB_RANGE_NUM &&\r
838 code >= EGCB_RANGES[low].start) ?\r
839 EGCB_RANGES[low].type : EGCB_Other;\r
840\r
841 return type;\r
842}\r
843\r
844#define IS_CONTROL_CR_LF(code) ((code) <= EGCB_Control && (code) >= EGCB_CR)\r
845#define IS_HANGUL(code) ((code) >= EGCB_L)\r
846\r
847/* GB1 and GB2 are outside of this function. */\r
848static enum EGCB_BREAK_TYPE\r
849unicode_egcb_is_break_2code(OnigCodePoint from_code, OnigCodePoint to_code)\r
850{\r
851 enum EGCB_TYPE from;\r
852 enum EGCB_TYPE to;\r
853\r
854 from = egcb_get_type(from_code);\r
855 to = egcb_get_type(to_code);\r
856\r
857 /* short cut */\r
858 if (from == 0 && to == 0) goto GB999;\r
859\r
860 /* GB3 */\r
861 if (from == EGCB_CR && to == EGCB_LF) return EGCB_NOT_BREAK;\r
862 /* GB4 */\r
863 if (IS_CONTROL_CR_LF(from)) return EGCB_BREAK;\r
864 /* GB5 */\r
865 if (IS_CONTROL_CR_LF(to)) return EGCB_BREAK;\r
866\r
867 if (IS_HANGUL(from) && IS_HANGUL(to)) {\r
868 /* GB6 */\r
869 if (from == EGCB_L && to != EGCB_T) return EGCB_NOT_BREAK;\r
870 /* GB7 */\r
871 if ((from == EGCB_LV || from == EGCB_V)\r
872 && (to == EGCB_V || to == EGCB_T)) return EGCB_NOT_BREAK;\r
873\r
874 /* GB8 */\r
875 if ((to == EGCB_T) && (from == EGCB_LVT || from == EGCB_T))\r
876 return EGCB_NOT_BREAK;\r
877\r
878 goto GB999;\r
879 }\r
880\r
881 /* GB9 */\r
882 if (to == EGCB_Extend || to == EGCB_ZWJ) return EGCB_NOT_BREAK;\r
883\r
884 /* GB9a */\r
885 if (to == EGCB_SpacingMark) return EGCB_NOT_BREAK;\r
886 /* GB9b */\r
887 if (from == EGCB_Prepend) return EGCB_NOT_BREAK;\r
888\r
889 /* GB10 removed */\r
890\r
891 /* GB11 */\r
892 if (from == EGCB_ZWJ) {\r
893 if (onigenc_unicode_is_code_ctype(to_code, PROP_INDEX_EXTENDEDPICTOGRAPHIC))\r
894 return EGCB_BREAK_UNDEF_GB11;\r
895\r
896 goto GB999;\r
897 }\r
898\r
899 /* GB12, GB13 */\r
900 if (from == EGCB_Regional_Indicator && to == EGCB_Regional_Indicator) {\r
901 return EGCB_BREAK_UNDEF_RI_RI;\r
902 }\r
903\r
904 GB999:\r
905 return EGCB_BREAK;\r
906}\r
907\r
908#endif /* USE_UNICODE_EXTENDED_GRAPHEME_CLUSTER */\r
909\r
910extern int\r
911onigenc_egcb_is_break_position(OnigEncoding enc, UChar* p, UChar* prev,\r
912 const UChar* start, const UChar* end)\r
913{\r
914 OnigCodePoint from;\r
915 OnigCodePoint to;\r
916#ifdef USE_UNICODE_EXTENDED_GRAPHEME_CLUSTER\r
917 enum EGCB_BREAK_TYPE btype;\r
918 enum EGCB_TYPE type;\r
919#endif\r
920\r
921 /* GB1 and GB2 */\r
922 if (p == start) return 1;\r
923 if (p == end) return 1;\r
924\r
925 if (IS_NULL(prev)) {\r
926 prev = onigenc_get_prev_char_head(enc, start, p);\r
927 if (IS_NULL(prev)) return 1;\r
928 }\r
929\r
930 from = ONIGENC_MBC_TO_CODE(enc, prev, end);\r
931 to = ONIGENC_MBC_TO_CODE(enc, p, end);\r
932\r
933#ifdef USE_UNICODE_EXTENDED_GRAPHEME_CLUSTER\r
934 if (! ONIGENC_IS_UNICODE_ENCODING(enc)) {\r
b26691c4 935 return from != 0x000d || to != 0x000a;\r
b602265d
DG
936 }\r
937\r
938 btype = unicode_egcb_is_break_2code(from, to);\r
939 switch (btype) {\r
940 case EGCB_NOT_BREAK:\r
941 return 0;\r
942 break;\r
943 case EGCB_BREAK:\r
944 return 1;\r
945 break;\r
946\r
947 case EGCB_BREAK_UNDEF_GB11:\r
948 while ((prev = onigenc_get_prev_char_head(enc, start, prev)) != NULL) {\r
949 from = ONIGENC_MBC_TO_CODE(enc, prev, end);\r
950 if (onigenc_unicode_is_code_ctype(from, PROP_INDEX_EXTENDEDPICTOGRAPHIC))\r
951 return 0;\r
952\r
953 type = egcb_get_type(from);\r
954 if (type != EGCB_Extend)\r
955 break;\r
956 }\r
957 break;\r
958\r
959 case EGCB_BREAK_UNDEF_RI_RI:\r
960 {\r
961 int n = 0;\r
962 while ((prev = onigenc_get_prev_char_head(enc, start, prev)) != NULL) {\r
963 from = ONIGENC_MBC_TO_CODE(enc, prev, end);\r
964 type = egcb_get_type(from);\r
965 if (type != EGCB_Regional_Indicator)\r
966 break;\r
967\r
968 n++;\r
969 }\r
970 if ((n % 2) == 0) return 0;\r
971 }\r
972 break;\r
973 }\r
974\r
975 return 1;\r
976\r
977#else\r
b26691c4 978 return from != 0x000d || to != 0x000a;\r
b602265d
DG
979#endif /* USE_UNICODE_EXTENDED_GRAPHEME_CLUSTER */\r
980}\r
981\r
982\r
983#define USER_DEFINED_PROPERTY_MAX_NUM 20\r
984\r
985typedef struct {\r
986 int ctype;\r
987 OnigCodePoint* ranges;\r
988} UserDefinedPropertyValue;\r
989\r
990static int UserDefinedPropertyNum;\r
991static UserDefinedPropertyValue\r
992UserDefinedPropertyRanges[USER_DEFINED_PROPERTY_MAX_NUM];\r
993static st_table* UserDefinedPropertyTable;\r
994\r
995extern int\r
996onig_unicode_define_user_property(const char* name, OnigCodePoint* ranges)\r
997{\r
998 UserDefinedPropertyValue* e;\r
999 int r;\r
1000 int i;\r
1001 int n;\r
1002 int len;\r
1003 int c;\r
1004 char* s;\r
b26691c4 1005 UChar* uname;\r
b602265d
DG
1006\r
1007 if (UserDefinedPropertyNum >= USER_DEFINED_PROPERTY_MAX_NUM)\r
1008 return ONIGERR_TOO_MANY_USER_DEFINED_OBJECTS;\r
1009\r
1010 len = (int )strlen_s(name,MAX_STRING_SIZE);\r
1011 if (len >= PROPERTY_NAME_MAX_SIZE)\r
1012 return ONIGERR_TOO_LONG_PROPERTY_NAME;\r
1013\r
1014 s = (char* )xmalloc(len + 1);\r
1015 if (s == 0)\r
1016 return ONIGERR_MEMORY;\r
1017\r
b26691c4 1018 uname = (UChar* )name;\r
b602265d
DG
1019 n = 0;\r
1020 for (i = 0; i < len; i++) {\r
b26691c4
LG
1021 c = uname[i];\r
1022 if (c < 0x20 || c >= 0x80) {\r
b602265d
DG
1023 xfree(s);\r
1024 return ONIGERR_INVALID_CHAR_PROPERTY_NAME;\r
1025 }\r
1026\r
1027 if (c != ' ' && c != '-' && c != '_') {\r
1028 s[n] = c;\r
1029 n++;\r
1030 }\r
1031 }\r
1032 s[n] = '\0';\r
1033\r
1034 if (UserDefinedPropertyTable == 0) {\r
1035 UserDefinedPropertyTable = onig_st_init_strend_table_with_size(10);\r
a5def177
DG
1036 if (IS_NULL(UserDefinedPropertyTable)) {\r
1037 xfree(s);\r
1038 return ONIGERR_MEMORY;\r
1039 }\r
b602265d
DG
1040 }\r
1041\r
1042 e = UserDefinedPropertyRanges + UserDefinedPropertyNum;\r
1043 e->ctype = CODE_RANGES_NUM + UserDefinedPropertyNum;\r
1044 e->ranges = ranges;\r
1045 r = onig_st_insert_strend(UserDefinedPropertyTable,\r
1046 (const UChar* )s, (const UChar* )s + n,\r
1047 (hash_data_type )((void* )e));\r
1048 if (r < 0) return r;\r
1049\r
1050 UserDefinedPropertyNum++;\r
1051 return 0;\r
1052}\r
1053\r
1054extern int\r
1055onigenc_unicode_is_code_ctype(OnigCodePoint code, unsigned int ctype)\r
1056{\r
1057 if (\r
1058#ifdef USE_UNICODE_PROPERTIES\r
1059 ctype <= ONIGENC_MAX_STD_CTYPE &&\r
1060#endif\r
1061 code < 256) {\r
1062 return ONIGENC_IS_UNICODE_ISO_8859_1_CTYPE(code, ctype);\r
1063 }\r
1064\r
1065 if (ctype >= CODE_RANGES_NUM) {\r
1066 int index = ctype - CODE_RANGES_NUM;\r
1067 if (index < UserDefinedPropertyNum)\r
1068 return onig_is_in_code_range((UChar* )UserDefinedPropertyRanges[index].ranges, code);\r
1069 else\r
1070 return ONIGERR_TYPE_BUG;\r
1071 }\r
1072\r
1073 return onig_is_in_code_range((UChar* )CodeRanges[ctype], code);\r
1074}\r
1075\r
1076\r
1077extern int\r
1078onigenc_unicode_ctype_code_range(OnigCtype ctype, const OnigCodePoint* ranges[])\r
1079{\r
1080 if (ctype >= CODE_RANGES_NUM) {\r
1081 int index = ctype - CODE_RANGES_NUM;\r
1082 if (index < UserDefinedPropertyNum) {\r
1083 *ranges = UserDefinedPropertyRanges[index].ranges;\r
1084 return 0;\r
1085 }\r
1086 else\r
1087 return ONIGERR_TYPE_BUG;\r
1088 }\r
1089\r
1090 *ranges = CodeRanges[ctype];\r
1091 return 0;\r
1092}\r
1093\r
1094extern int\r
1095onigenc_utf16_32_get_ctype_code_range(OnigCtype ctype, OnigCodePoint* sb_out,\r
1096 const OnigCodePoint* ranges[])\r
1097{\r
1098 *sb_out = 0x00;\r
1099 return onigenc_unicode_ctype_code_range(ctype, ranges);\r
1100}\r
1101\r
1102extern int\r
1103onigenc_unicode_property_name_to_ctype(OnigEncoding enc, UChar* name, UChar* end)\r
1104{\r
1105 int len;\r
1106 UChar *p;\r
1107 OnigCodePoint code;\r
1108 const struct PoolPropertyNameCtype* pc;\r
1109 char buf[PROPERTY_NAME_MAX_SIZE];\r
1110\r
1111 p = name;\r
1112 len = 0;\r
1113 while (p < end) {\r
1114 code = ONIGENC_MBC_TO_CODE(enc, p, end);\r
1115 if (code >= 0x80)\r
1116 return ONIGERR_INVALID_CHAR_PROPERTY_NAME;\r
1117\r
1118 if (code != ' ' && code != '-' && code != '_') {\r
1119 buf[len++] = (char )code;\r
1120 if (len >= PROPERTY_NAME_MAX_SIZE)\r
1121 return ONIGERR_INVALID_CHAR_PROPERTY_NAME;\r
1122 }\r
1123\r
1124 p += enclen(enc, p);\r
1125 }\r
1126\r
1127 buf[len] = 0;\r
1128\r
1129 if (UserDefinedPropertyTable != 0) {\r
1130 UserDefinedPropertyValue* e;\r
1131 e = (UserDefinedPropertyValue* )NULL;\r
1132 onig_st_lookup_strend(UserDefinedPropertyTable,\r
1133 (const UChar* )buf, (const UChar* )buf + len,\r
1134 (hash_data_type* )((void* )(&e)));\r
1135 if (e != 0) {\r
1136 return e->ctype;\r
1137 }\r
1138 }\r
1139\r
1140 pc = unicode_lookup_property_name(buf, len);\r
1141 if (pc != 0) {\r
1142 /* fprintf(stderr, "LOOKUP: %s: %d\n", buf, pc->ctype); */\r
1143#ifndef USE_UNICODE_PROPERTIES\r
1144 if (pc->ctype > ONIGENC_MAX_STD_CTYPE)\r
1145 return ONIGERR_INVALID_CHAR_PROPERTY_NAME;\r
1146#endif\r
1147\r
1148 return (int )pc->ctype;\r
1149 }\r
1150\r
1151 return ONIGERR_INVALID_CHAR_PROPERTY_NAME;\r
1152}\r