]> git.proxmox.com Git - mirror_edk2.git/blame - BaseTools/Source/C/BrotliCompress/enc/utf8_util.c
MdeModulePkg/BrotliCustomDecompressLib: Make brotli a submodule
[mirror_edk2.git] / BaseTools / Source / C / BrotliCompress / enc / utf8_util.c
CommitLineData
11b7501a
SB
1/* Copyright 2013 Google Inc. All Rights Reserved.\r
2\r
3 Distributed under MIT license.\r
4 See file LICENSE for detail or copy at https://opensource.org/licenses/MIT\r
5*/\r
6\r
7/* Heuristics for deciding about the UTF8-ness of strings. */\r
8\r
9#include "./utf8_util.h"\r
10\r
dd4f667e 11#include <brotli/types.h>\r
11b7501a
SB
12\r
13#if defined(__cplusplus) || defined(c_plusplus)\r
14extern "C" {\r
15#endif\r
16\r
17static size_t BrotliParseAsUTF8(\r
18 int* symbol, const uint8_t* input, size_t size) {\r
19 /* ASCII */\r
20 if ((input[0] & 0x80) == 0) {\r
21 *symbol = input[0];\r
22 if (*symbol > 0) {\r
23 return 1;\r
24 }\r
25 }\r
26 /* 2-byte UTF8 */\r
27 if (size > 1u &&\r
dd4f667e
LG
28 (input[0] & 0xE0) == 0xC0 &&\r
29 (input[1] & 0xC0) == 0x80) {\r
30 *symbol = (((input[0] & 0x1F) << 6) |\r
31 (input[1] & 0x3F));\r
32 if (*symbol > 0x7F) {\r
11b7501a
SB
33 return 2;\r
34 }\r
35 }\r
36 /* 3-byte UFT8 */\r
37 if (size > 2u &&\r
dd4f667e
LG
38 (input[0] & 0xF0) == 0xE0 &&\r
39 (input[1] & 0xC0) == 0x80 &&\r
40 (input[2] & 0xC0) == 0x80) {\r
41 *symbol = (((input[0] & 0x0F) << 12) |\r
42 ((input[1] & 0x3F) << 6) |\r
43 (input[2] & 0x3F));\r
44 if (*symbol > 0x7FF) {\r
11b7501a
SB
45 return 3;\r
46 }\r
47 }\r
48 /* 4-byte UFT8 */\r
49 if (size > 3u &&\r
dd4f667e
LG
50 (input[0] & 0xF8) == 0xF0 &&\r
51 (input[1] & 0xC0) == 0x80 &&\r
52 (input[2] & 0xC0) == 0x80 &&\r
53 (input[3] & 0xC0) == 0x80) {\r
11b7501a 54 *symbol = (((input[0] & 0x07) << 18) |\r
dd4f667e
LG
55 ((input[1] & 0x3F) << 12) |\r
56 ((input[2] & 0x3F) << 6) |\r
57 (input[3] & 0x3F));\r
58 if (*symbol > 0xFFFF && *symbol <= 0x10FFFF) {\r
11b7501a
SB
59 return 4;\r
60 }\r
61 }\r
62 /* Not UTF8, emit a special symbol above the UTF8-code space */\r
63 *symbol = 0x110000 | input[0];\r
64 return 1;\r
65}\r
66\r
67/* Returns 1 if at least min_fraction of the data is UTF8-encoded.*/\r
68BROTLI_BOOL BrotliIsMostlyUTF8(\r
69 const uint8_t* data, const size_t pos, const size_t mask,\r
70 const size_t length, const double min_fraction) {\r
71 size_t size_utf8 = 0;\r
72 size_t i = 0;\r
73 while (i < length) {\r
74 int symbol;\r
75 size_t bytes_read =\r
76 BrotliParseAsUTF8(&symbol, &data[(pos + i) & mask], length - i);\r
77 i += bytes_read;\r
78 if (symbol < 0x110000) size_utf8 += bytes_read;\r
79 }\r
80 return TO_BROTLI_BOOL(size_utf8 > min_fraction * (double)length);\r
81}\r
82\r
83#if defined(__cplusplus) || defined(c_plusplus)\r
84} /* extern "C" */\r
85#endif\r