utf16_le.c - Oniguruma (regular expression library)\r
**********************************************************************/\r
/*-\r
- * Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>\r
+ * Copyright (c) 2002-2019 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>\r
* All rights reserved.\r
*\r
* Redistribution and use in source and binary forms, with or without\r
static int\r
utf16le_code_to_mbclen(OnigCodePoint code)\r
{\r
- return (code > 0xffff ? 4 : 2);\r
+ if (code > 0xffff) {\r
+ if (code > 0x10ffff)\r
+ return ONIGERR_INVALID_CODE_POINT_VALUE;\r
+ else\r
+ return 4;\r
+ }\r
+ else {\r
+ return 2;\r
+ }\r
}\r
\r
static int\r
const UChar* end1 = end - 1;\r
\r
while (p < end1) {\r
- p += utf16le_mbc_enc_len(p);\r
+ int len = utf16le_mbc_enc_len(p);\r
+ if (len == 4) {\r
+ if (p + 3 < end && ! UTF16_IS_SURROGATE_SECOND(*(p + 3)))\r
+ return FALSE;\r
+ }\r
+ else\r
+ if (UTF16_IS_SURROGATE_SECOND(*(p + 1)))\r
+ return FALSE;\r
+\r
+ p += len;\r
}\r
\r
if (p != end)\r
\r
static int\r
utf16le_mbc_case_fold(OnigCaseFoldType flag,\r
- const UChar** pp, const UChar* end, UChar* fold)\r
+ const UChar** pp, const UChar* end, UChar* fold)\r
{\r
const UChar* p = *pp;\r
\r
}\r
else\r
return onigenc_unicode_mbc_case_fold(ONIG_ENCODING_UTF16_LE, flag, pp, end,\r
- fold);\r
+ fold);\r
}\r
\r
#if 0\r
static int\r
utf16le_is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp,\r
- const UChar* end)\r
+ const UChar* end)\r
{\r
const UChar* p = *pp;\r
\r
s--;\r
}\r
\r
- if (UTF16_IS_SURROGATE_SECOND(*(s+1)) && s > start + 1)\r
+ if (UTF16_IS_SURROGATE_SECOND(*(s+1)) && s > start + 1 &&\r
+ UTF16_IS_SURROGATE_FIRST(*(s-1)))\r
s -= 2;\r
\r
return (UChar* )s;\r
const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[])\r
{\r
return onigenc_unicode_get_case_fold_codes_by_str(ONIG_ENCODING_UTF16_LE,\r
- flag, p, end, items);\r
+ flag, p, end, items);\r
}\r
\r
OnigEncodingType OnigEncodingUTF16_LE = {\r
init,\r
0, /* is_initialized */\r
is_valid_mbc_string,\r
- ENC_FLAG_UNICODE,\r
+ ENC_FLAG_UNICODE|ENC_FLAG_SKIP_OFFSET_1,\r
0, 0\r
};\r