From 5d68fc6781f0fa0887715784db5bafc34d5c766f Mon Sep 17 00:00:00 2001 From: Laszlo Ersek Date: Mon, 1 Jul 2019 19:46:01 +0200 Subject: [PATCH] MdePkg/BaseLib: re-specify Base64Decode(), and add temporary stub impl MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Rewrite Base64Decode() from scratch, due to reasons listed in the second reference below. As first step, redo the interface contract, and replace the current implementation with a stub that asserts FALSE, then fails. Cc: Liming Gao Cc: Marvin Häuser Cc: Michael D Kinney Cc: Philippe Mathieu-Daudé Cc: Zhichao Gao Ref: https://bugzilla.tianocore.org/show_bug.cgi?id=1891 Ref: http://mid.mail-archive.com/c495bd0b-ea4d-7206-8a4f-a7149760d19a@redhat.com Signed-off-by: Laszlo Ersek Reviewed-by: Philippe Mathieu-Daude Reviewed-by: Liming Gao --- MdePkg/Include/Library/BaseLib.h | 107 +++++++++--- MdePkg/Library/BaseLib/String.c | 285 ++++++++++--------------------- 2 files changed, 172 insertions(+), 220 deletions(-) diff --git a/MdePkg/Include/Library/BaseLib.h b/MdePkg/Include/Library/BaseLib.h index a22bfc9fad..2a75bc023f 100644 --- a/MdePkg/Include/Library/BaseLib.h +++ b/MdePkg/Include/Library/BaseLib.h @@ -2785,31 +2785,94 @@ Base64Encode ( ); /** - Convert Base64 ascii string to binary data based on RFC4648. - - Produce Null-terminated binary data in the output buffer specified by Destination and DestinationSize. - The binary data is produced by converting the Base64 ascii string specified by Source and SourceLength. - - @param Source Input ASCII characters - @param SourceLength Number of ASCII characters - @param Destination Pointer to output buffer - @param DestinationSize Caller is responsible for passing in buffer of at least DestinationSize. - Set 0 to get the size needed. Set to bytes stored on return. - - @retval RETURN_SUCCESS When binary buffer is filled in. - @retval RETURN_INVALID_PARAMETER If Source is NULL or DestinationSize is NULL. - @retval RETURN_INVALID_PARAMETER If SourceLength or DestinationSize is bigger than (MAX_ADDRESS -(UINTN)Destination ). - @retval RETURN_INVALID_PARAMETER If there is any invalid character in input stream. - @retval RETURN_BUFFER_TOO_SMALL If buffer length is smaller than required buffer size. - - **/ + Decode Base64 ASCII encoded data to 8-bit binary representation, based on + RFC4648. + + Decoding occurs according to "Table 1: The Base 64 Alphabet" in RFC4648. + + Whitespace is ignored at all positions: + - 0x09 ('\t') horizontal tab + - 0x0A ('\n') new line + - 0x0B ('\v') vertical tab + - 0x0C ('\f') form feed + - 0x0D ('\r') carriage return + - 0x20 (' ') space + + The minimum amount of required padding (with ASCII 0x3D, '=') is tolerated + and enforced at the end of the Base64 ASCII encoded data, and only there. + + Other characters outside of the encoding alphabet cause the function to + reject the Base64 ASCII encoded data. + + @param[in] Source Array of CHAR8 elements containing the Base64 + ASCII encoding. May be NULL if SourceSize is + zero. + + @param[in] SourceSize Number of CHAR8 elements in Source. + + @param[out] Destination Array of UINT8 elements receiving the decoded + 8-bit binary representation. Allocated by the + caller. May be NULL if DestinationSize is + zero on input. If NULL, decoding is + performed, but the 8-bit binary + representation is not stored. If non-NULL and + the function returns an error, the contents + of Destination are indeterminate. + + @param[in,out] DestinationSize On input, the number of UINT8 elements that + the caller allocated for Destination. On + output, if the function returns + RETURN_SUCCESS or RETURN_BUFFER_TOO_SMALL, + the number of UINT8 elements that are + required for decoding the Base64 ASCII + representation. If the function returns a + value different from both RETURN_SUCCESS and + RETURN_BUFFER_TOO_SMALL, then DestinationSize + is indeterminate on output. + + @retval RETURN_SUCCESS SourceSize CHAR8 elements at Source have + been decoded to on-output DestinationSize + UINT8 elements at Destination. Note that + RETURN_SUCCESS covers the case when + DestinationSize is zero on input, and + Source decodes to zero bytes (due to + containing at most ignored whitespace). + + @retval RETURN_BUFFER_TOO_SMALL The input value of DestinationSize is not + large enough for decoding SourceSize CHAR8 + elements at Source. The required number of + UINT8 elements has been stored to + DestinationSize. + + @retval RETURN_INVALID_PARAMETER DestinationSize is NULL. + + @retval RETURN_INVALID_PARAMETER Source is NULL, but SourceSize is not zero. + + @retval RETURN_INVALID_PARAMETER Destination is NULL, but DestinationSize is + not zero on input. + + @retval RETURN_INVALID_PARAMETER Source is non-NULL, and (Source + + SourceSize) would wrap around MAX_ADDRESS. + + @retval RETURN_INVALID_PARAMETER Destination is non-NULL, and (Destination + + DestinationSize) would wrap around + MAX_ADDRESS, as specified on input. + + @retval RETURN_INVALID_PARAMETER None of Source and Destination are NULL, + and CHAR8[SourceSize] at Source overlaps + UINT8[DestinationSize] at Destination, as + specified on input. + + @retval RETURN_INVALID_PARAMETER Invalid CHAR8 element encountered in + Source. +**/ RETURN_STATUS EFIAPI Base64Decode ( - IN CONST CHAR8 *Source, - IN UINTN SourceLength, - OUT UINT8 *Destination OPTIONAL, - IN OUT UINTN *DestinationSize + IN CONST CHAR8 *Source OPTIONAL, + IN UINTN SourceSize, + OUT UINT8 *Destination OPTIONAL, + IN OUT UINTN *DestinationSize ); /** diff --git a/MdePkg/Library/BaseLib/String.c b/MdePkg/Library/BaseLib/String.c index 32e189791c..f8397035c3 100644 --- a/MdePkg/Library/BaseLib/String.c +++ b/MdePkg/Library/BaseLib/String.c @@ -1757,45 +1757,10 @@ AsciiStrToUnicodeStr ( #endif -// -// The basis for Base64 encoding is RFC 4686 https://tools.ietf.org/html/rfc4648 -// -// RFC 4686 has a number of MAY and SHOULD cases. This implementation chooses -// the more restrictive versions for security concerns (see RFC 4686 section 3.3). -// -// A invalid character, if encountered during the decode operation, causes the data -// to be rejected. In addition, the '=' padding character is only allowed at the end -// of the Base64 encoded string. -// -#define BAD_V 99 - STATIC CHAR8 EncodingTable[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" "abcdefghijklmnopqrstuvwxyz" "0123456789+/"; -STATIC UINT8 DecodingTable[] = { - // - // Valid characters ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/ - // Also, set '=' as a zero for decoding - // 0 , 1, 2, 3, 4, 5, 6, 7, 8, 9, a, b, c, d, e, f - BAD_V, BAD_V, BAD_V, BAD_V, BAD_V, BAD_V, BAD_V, BAD_V, BAD_V, BAD_V, BAD_V, BAD_V, BAD_V, BAD_V, BAD_V, BAD_V, // 0 - BAD_V, BAD_V, BAD_V, BAD_V, BAD_V, BAD_V, BAD_V, BAD_V, BAD_V, BAD_V, BAD_V, BAD_V, BAD_V, BAD_V, BAD_V, BAD_V, // 10 - BAD_V, BAD_V, BAD_V, BAD_V, BAD_V, BAD_V, BAD_V, BAD_V, BAD_V, BAD_V, BAD_V, 62, BAD_V, BAD_V, BAD_V, 63, // 20 - 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, BAD_V, BAD_V, BAD_V, 0, BAD_V, BAD_V, // 30 - BAD_V, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, // 40 - 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, BAD_V, BAD_V, BAD_V, BAD_V, BAD_V, // 50 - BAD_V, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, // 60 - 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, BAD_V, BAD_V, BAD_V, BAD_V, BAD_V, // 70 - BAD_V, BAD_V, BAD_V, BAD_V, BAD_V, BAD_V, BAD_V, BAD_V, BAD_V, BAD_V, BAD_V, BAD_V, BAD_V, BAD_V, BAD_V, BAD_V, // 80 - BAD_V, BAD_V, BAD_V, BAD_V, BAD_V, BAD_V, BAD_V, BAD_V, BAD_V, BAD_V, BAD_V, BAD_V, BAD_V, BAD_V, BAD_V, BAD_V, // 90 - BAD_V, BAD_V, BAD_V, BAD_V, BAD_V, BAD_V, BAD_V, BAD_V, BAD_V, BAD_V, BAD_V, BAD_V, BAD_V, BAD_V, BAD_V, BAD_V, // a0 - BAD_V, BAD_V, BAD_V, BAD_V, BAD_V, BAD_V, BAD_V, BAD_V, BAD_V, BAD_V, BAD_V, BAD_V, BAD_V, BAD_V, BAD_V, BAD_V, // b0 - BAD_V, BAD_V, BAD_V, BAD_V, BAD_V, BAD_V, BAD_V, BAD_V, BAD_V, BAD_V, BAD_V, BAD_V, BAD_V, BAD_V, BAD_V, BAD_V, // c0 - BAD_V, BAD_V, BAD_V, BAD_V, BAD_V, BAD_V, BAD_V, BAD_V, BAD_V, BAD_V, BAD_V, BAD_V, BAD_V, BAD_V, BAD_V, BAD_V, // d0 - BAD_V, BAD_V, BAD_V, BAD_V, BAD_V, BAD_V, BAD_V, BAD_V, BAD_V, BAD_V, BAD_V, BAD_V, BAD_V, BAD_V, BAD_V, BAD_V, // d0 - BAD_V, BAD_V, BAD_V, BAD_V, BAD_V, BAD_V, BAD_V, BAD_V, BAD_V, BAD_V, BAD_V, BAD_V, BAD_V, BAD_V, BAD_V, BAD_V // f0 -}; - /** Convert binary data to a Base64 encoded ascii string based on RFC4648. @@ -1918,174 +1883,98 @@ Base64Encode ( } /** - Convert Base64 ascii string to binary data based on RFC4648. - - Produce Null-terminated binary data in the output buffer specified by Destination and DestinationSize. - The binary data is produced by converting the Base64 ascii string specified by Source and SourceLength. - - @param Source Input ASCII characters - @param SourceLength Number of ASCII characters - @param Destination Pointer to output buffer - @param DestinationSize Caller is responsible for passing in buffer of at least DestinationSize. - Set 0 to get the size needed. Set to bytes stored on return. - - @retval RETURN_SUCCESS When binary buffer is filled in. - @retval RETURN_INVALID_PARAMETER If Source is NULL or DestinationSize is NULL. - @retval RETURN_INVALID_PARAMETER If SourceLength or DestinationSize is bigger than (MAX_ADDRESS -(UINTN)Destination ). - @retval RETURN_INVALID_PARAMETER If there is any invalid character in input stream. - @retval RETURN_BUFFER_TOO_SMALL If buffer length is smaller than required buffer size. - **/ + Decode Base64 ASCII encoded data to 8-bit binary representation, based on + RFC4648. + + Decoding occurs according to "Table 1: The Base 64 Alphabet" in RFC4648. + + Whitespace is ignored at all positions: + - 0x09 ('\t') horizontal tab + - 0x0A ('\n') new line + - 0x0B ('\v') vertical tab + - 0x0C ('\f') form feed + - 0x0D ('\r') carriage return + - 0x20 (' ') space + + The minimum amount of required padding (with ASCII 0x3D, '=') is tolerated + and enforced at the end of the Base64 ASCII encoded data, and only there. + + Other characters outside of the encoding alphabet cause the function to + reject the Base64 ASCII encoded data. + + @param[in] Source Array of CHAR8 elements containing the Base64 + ASCII encoding. May be NULL if SourceSize is + zero. + + @param[in] SourceSize Number of CHAR8 elements in Source. + + @param[out] Destination Array of UINT8 elements receiving the decoded + 8-bit binary representation. Allocated by the + caller. May be NULL if DestinationSize is + zero on input. If NULL, decoding is + performed, but the 8-bit binary + representation is not stored. If non-NULL and + the function returns an error, the contents + of Destination are indeterminate. + + @param[in,out] DestinationSize On input, the number of UINT8 elements that + the caller allocated for Destination. On + output, if the function returns + RETURN_SUCCESS or RETURN_BUFFER_TOO_SMALL, + the number of UINT8 elements that are + required for decoding the Base64 ASCII + representation. If the function returns a + value different from both RETURN_SUCCESS and + RETURN_BUFFER_TOO_SMALL, then DestinationSize + is indeterminate on output. + + @retval RETURN_SUCCESS SourceSize CHAR8 elements at Source have + been decoded to on-output DestinationSize + UINT8 elements at Destination. Note that + RETURN_SUCCESS covers the case when + DestinationSize is zero on input, and + Source decodes to zero bytes (due to + containing at most ignored whitespace). + + @retval RETURN_BUFFER_TOO_SMALL The input value of DestinationSize is not + large enough for decoding SourceSize CHAR8 + elements at Source. The required number of + UINT8 elements has been stored to + DestinationSize. + + @retval RETURN_INVALID_PARAMETER DestinationSize is NULL. + + @retval RETURN_INVALID_PARAMETER Source is NULL, but SourceSize is not zero. + + @retval RETURN_INVALID_PARAMETER Destination is NULL, but DestinationSize is + not zero on input. + + @retval RETURN_INVALID_PARAMETER Source is non-NULL, and (Source + + SourceSize) would wrap around MAX_ADDRESS. + + @retval RETURN_INVALID_PARAMETER Destination is non-NULL, and (Destination + + DestinationSize) would wrap around + MAX_ADDRESS, as specified on input. + + @retval RETURN_INVALID_PARAMETER None of Source and Destination are NULL, + and CHAR8[SourceSize] at Source overlaps + UINT8[DestinationSize] at Destination, as + specified on input. + + @retval RETURN_INVALID_PARAMETER Invalid CHAR8 element encountered in + Source. +**/ RETURN_STATUS EFIAPI Base64Decode ( - IN CONST CHAR8 *Source, - IN UINTN SourceLength, - OUT UINT8 *Destination OPTIONAL, - IN OUT UINTN *DestinationSize + IN CONST CHAR8 *Source OPTIONAL, + IN UINTN SourceSize, + OUT UINT8 *Destination OPTIONAL, + IN OUT UINTN *DestinationSize ) { - - UINT32 Value; - CHAR8 Chr; - INTN BufferSize; - UINTN SourceIndex; - UINTN DestinationIndex; - UINTN Index; - UINTN ActualSourceLength; - - // - // Check pointers are not NULL - // - if ((Source == NULL) || (DestinationSize == NULL)) { - return RETURN_INVALID_PARAMETER; - } - - // - // Check if SourceLength or DestinationSize is valid - // - if ((SourceLength >= (MAX_ADDRESS - (UINTN)Source)) || (*DestinationSize >= (MAX_ADDRESS - (UINTN)Destination))){ - return RETURN_INVALID_PARAMETER; - } - - ActualSourceLength = 0; - BufferSize = 0; - - // - // Determine the actual number of valid characters in the string. - // All invalid characters except selected white space characters, - // will cause the Base64 string to be rejected. White space to allow - // properly formatted XML will be ignored. - // - // See section 3.3 of RFC 4648. - // - for (SourceIndex = 0; SourceIndex < SourceLength; SourceIndex++) { - - // - // '=' is part of the quantum - // - if (Source[SourceIndex] == '=') { - ActualSourceLength++; - BufferSize--; - - // - // Only two '=' characters can be valid. - // - if (BufferSize < -2) { - return RETURN_INVALID_PARAMETER; - } - } - else { - Chr = Source[SourceIndex]; - if (BAD_V != DecodingTable[(UINT8) Chr]) { - - // - // The '=' characters are only valid at the end, so any - // valid character after an '=', will be flagged as an error. - // - if (BufferSize < 0) { - return RETURN_INVALID_PARAMETER; - } - ActualSourceLength++; - } - else { - - // - // The reset of the decoder will ignore all invalid characters allowed here. - // Ignoring selected white space is useful. In this case, the decoder will - // ignore ' ', '\t', '\n', and '\r'. - // - if ((Chr != ' ') &&(Chr != '\t') &&(Chr != '\n') &&(Chr != '\r')) { - return RETURN_INVALID_PARAMETER; - } - } - } - } - - // - // The Base64 character string must be a multiple of 4 character quantums. - // - if (ActualSourceLength % 4 != 0) { - return RETURN_INVALID_PARAMETER; - } - - BufferSize += ActualSourceLength / 4 * 3; - if (BufferSize < 0) { - return RETURN_INVALID_PARAMETER; - } - - // - // BufferSize is >= 0 - // - if ((Destination == NULL) || (*DestinationSize < (UINTN) BufferSize)) { - *DestinationSize = BufferSize; - return RETURN_BUFFER_TOO_SMALL; - } - - // - // If no decodable characters, return a size of zero. RFC 4686 test vector 1. - // - if (ActualSourceLength == 0) { - *DestinationSize = 0; - return RETURN_SUCCESS; - } - - // - // Input data is verified to be a multiple of 4 valid charcters. Process four - // characters at a time. Uncounted (ie. invalid) characters will be ignored. - // - for (SourceIndex = 0, DestinationIndex = 0; (SourceIndex < SourceLength) && (DestinationIndex < *DestinationSize); ) { - Value = 0; - - // - // Get 24 bits of data from 4 input characters, each character representing 6 bits - // - for (Index = 0; Index < 4; Index++) { - do { - Chr = DecodingTable[(UINT8) Source[SourceIndex++]]; - } while (Chr == BAD_V); - Value <<= 6; - Value |= (UINT32)Chr; - } - - // - // Store 3 bytes of binary data (24 bits) - // - *Destination++ = (UINT8) (Value >> 16); - DestinationIndex++; - - // - // Due to the '=' special cases for the two bytes at the end, - // we have to check the length and not store the padding data - // - if (DestinationIndex++ < *DestinationSize) { - *Destination++ = (UINT8) (Value >> 8); - } - if (DestinationIndex++ < *DestinationSize) { - *Destination++ = (UINT8) Value; - } - } - - return RETURN_SUCCESS; + ASSERT (FALSE); + return RETURN_INVALID_PARAMETER; } /** -- 2.39.2