+++ /dev/null
-/* Copyright 2013 Google Inc. All Rights Reserved.\r
-\r
- Distributed under MIT license.\r
- See file LICENSE for detail or copy at https://opensource.org/licenses/MIT\r
-*/\r
-\r
-/* Heuristics for deciding about the UTF8-ness of strings. */\r
-\r
-#include "./utf8_util.h"\r
-\r
-#include <brotli/types.h>\r
-\r
-#if defined(__cplusplus) || defined(c_plusplus)\r
-extern "C" {\r
-#endif\r
-\r
-static size_t BrotliParseAsUTF8(\r
- int* symbol, const uint8_t* input, size_t size) {\r
- /* ASCII */\r
- if ((input[0] & 0x80) == 0) {\r
- *symbol = input[0];\r
- if (*symbol > 0) {\r
- return 1;\r
- }\r
- }\r
- /* 2-byte UTF8 */\r
- if (size > 1u &&\r
- (input[0] & 0xE0) == 0xC0 &&\r
- (input[1] & 0xC0) == 0x80) {\r
- *symbol = (((input[0] & 0x1F) << 6) |\r
- (input[1] & 0x3F));\r
- if (*symbol > 0x7F) {\r
- return 2;\r
- }\r
- }\r
- /* 3-byte UFT8 */\r
- if (size > 2u &&\r
- (input[0] & 0xF0) == 0xE0 &&\r
- (input[1] & 0xC0) == 0x80 &&\r
- (input[2] & 0xC0) == 0x80) {\r
- *symbol = (((input[0] & 0x0F) << 12) |\r
- ((input[1] & 0x3F) << 6) |\r
- (input[2] & 0x3F));\r
- if (*symbol > 0x7FF) {\r
- return 3;\r
- }\r
- }\r
- /* 4-byte UFT8 */\r
- if (size > 3u &&\r
- (input[0] & 0xF8) == 0xF0 &&\r
- (input[1] & 0xC0) == 0x80 &&\r
- (input[2] & 0xC0) == 0x80 &&\r
- (input[3] & 0xC0) == 0x80) {\r
- *symbol = (((input[0] & 0x07) << 18) |\r
- ((input[1] & 0x3F) << 12) |\r
- ((input[2] & 0x3F) << 6) |\r
- (input[3] & 0x3F));\r
- if (*symbol > 0xFFFF && *symbol <= 0x10FFFF) {\r
- return 4;\r
- }\r
- }\r
- /* Not UTF8, emit a special symbol above the UTF8-code space */\r
- *symbol = 0x110000 | input[0];\r
- return 1;\r
-}\r
-\r
-/* Returns 1 if at least min_fraction of the data is UTF8-encoded.*/\r
-BROTLI_BOOL BrotliIsMostlyUTF8(\r
- const uint8_t* data, const size_t pos, const size_t mask,\r
- const size_t length, const double min_fraction) {\r
- size_t size_utf8 = 0;\r
- size_t i = 0;\r
- while (i < length) {\r
- int symbol;\r
- size_t bytes_read =\r
- BrotliParseAsUTF8(&symbol, &data[(pos + i) & mask], length - i);\r
- i += bytes_read;\r
- if (symbol < 0x110000) size_utf8 += bytes_read;\r
- }\r
- return TO_BROTLI_BOOL(size_utf8 > min_fraction * (double)length);\r
-}\r
-\r
-#if defined(__cplusplus) || defined(c_plusplus)\r
-} /* extern "C" */\r
-#endif\r