X-Git-Url: https://git.proxmox.com/?a=blobdiff_plain;f=MdeModulePkg%2FUniversal%2FRegularExpressionDxe%2FOniguruma%2Futf16_le.c;fp=MdeModulePkg%2FUniversal%2FRegularExpressionDxe%2FOniguruma%2Futf16_le.c;h=535713852f7d3a72a1c278b8118d3c7e9b4a9158;hb=b26691c47188ce255b8a4d920bf07ddf1431e2cd;hp=b139f83ff012ab7a739c1e226c35b6f6d301470f;hpb=ecc32c90ee4ad557205cb2725619a3cc2f45ebd0;p=mirror_edk2.git diff --git a/MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/utf16_le.c b/MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/utf16_le.c index b139f83ff0..535713852f 100644 --- a/MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/utf16_le.c +++ b/MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/utf16_le.c @@ -2,7 +2,7 @@ utf16_le.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2018 K.Kosako + * Copyright (c) 2002-2019 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -95,7 +95,15 @@ static const int EncLen_UTF16[] = { static int utf16le_code_to_mbclen(OnigCodePoint code) { - return (code > 0xffff ? 4 : 2); + if (code > 0xffff) { + if (code > 0x10ffff) + return ONIGERR_INVALID_CODE_POINT_VALUE; + else + return 4; + } + else { + return 2; + } } static int @@ -110,7 +118,16 @@ is_valid_mbc_string(const UChar* p, const UChar* end) const UChar* end1 = end - 1; while (p < end1) { - p += utf16le_mbc_enc_len(p); + int len = utf16le_mbc_enc_len(p); + if (len == 4) { + if (p + 3 < end && ! UTF16_IS_SURROGATE_SECOND(*(p + 3))) + return FALSE; + } + else + if (UTF16_IS_SURROGATE_SECOND(*(p + 1))) + return FALSE; + + p += len; } if (p != end) @@ -184,7 +201,7 @@ utf16le_code_to_mbc(OnigCodePoint code, UChar *buf) static int utf16le_mbc_case_fold(OnigCaseFoldType flag, - const UChar** pp, const UChar* end, UChar* fold) + const UChar** pp, const UChar* end, UChar* fold) { const UChar* p = *pp; @@ -207,13 +224,13 @@ utf16le_mbc_case_fold(OnigCaseFoldType flag, } else return onigenc_unicode_mbc_case_fold(ONIG_ENCODING_UTF16_LE, flag, pp, end, - fold); + fold); } #if 0 static int utf16le_is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, - const UChar* end) + const UChar* end) { const UChar* p = *pp; @@ -252,7 +269,8 @@ utf16le_left_adjust_char_head(const UChar* start, const UChar* s) s--; } - if (UTF16_IS_SURROGATE_SECOND(*(s+1)) && s > start + 1) + if (UTF16_IS_SURROGATE_SECOND(*(s+1)) && s > start + 1 && + UTF16_IS_SURROGATE_FIRST(*(s-1))) s -= 2; return (UChar* )s; @@ -263,7 +281,7 @@ utf16le_get_case_fold_codes_by_str(OnigCaseFoldType flag, const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[]) { return onigenc_unicode_get_case_fold_codes_by_str(ONIG_ENCODING_UTF16_LE, - flag, p, end, items); + flag, p, end, items); } OnigEncodingType OnigEncodingUTF16_LE = { @@ -286,6 +304,6 @@ OnigEncodingType OnigEncodingUTF16_LE = { init, 0, /* is_initialized */ is_valid_mbc_string, - ENC_FLAG_UNICODE, + ENC_FLAG_UNICODE|ENC_FLAG_SKIP_OFFSET_1, 0, 0 };