]>
git.proxmox.com Git - mirror_edk2.git/blob - BaseTools/Source/C/BrotliCompress/enc/utf8_util.c
1 /* Copyright 2013 Google Inc. All Rights Reserved.
3 Distributed under MIT license.
4 See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
7 /* Heuristics for deciding about the UTF8-ness of strings. */
9 #include "./utf8_util.h"
11 #include <brotli/types.h>
13 #if defined(__cplusplus) || defined(c_plusplus)
17 static size_t BrotliParseAsUTF8(
18 int* symbol
, const uint8_t* input
, size_t size
) {
20 if ((input
[0] & 0x80) == 0) {
28 (input
[0] & 0xE0) == 0xC0 &&
29 (input
[1] & 0xC0) == 0x80) {
30 *symbol
= (((input
[0] & 0x1F) << 6) |
38 (input
[0] & 0xF0) == 0xE0 &&
39 (input
[1] & 0xC0) == 0x80 &&
40 (input
[2] & 0xC0) == 0x80) {
41 *symbol
= (((input
[0] & 0x0F) << 12) |
42 ((input
[1] & 0x3F) << 6) |
44 if (*symbol
> 0x7FF) {
50 (input
[0] & 0xF8) == 0xF0 &&
51 (input
[1] & 0xC0) == 0x80 &&
52 (input
[2] & 0xC0) == 0x80 &&
53 (input
[3] & 0xC0) == 0x80) {
54 *symbol
= (((input
[0] & 0x07) << 18) |
55 ((input
[1] & 0x3F) << 12) |
56 ((input
[2] & 0x3F) << 6) |
58 if (*symbol
> 0xFFFF && *symbol
<= 0x10FFFF) {
62 /* Not UTF8, emit a special symbol above the UTF8-code space */
63 *symbol
= 0x110000 | input
[0];
67 /* Returns 1 if at least min_fraction of the data is UTF8-encoded.*/
68 BROTLI_BOOL
BrotliIsMostlyUTF8(
69 const uint8_t* data
, const size_t pos
, const size_t mask
,
70 const size_t length
, const double min_fraction
) {
76 BrotliParseAsUTF8(&symbol
, &data
[(pos
+ i
) & mask
], length
- i
);
78 if (symbol
< 0x110000) size_utf8
+= bytes_read
;
80 return TO_BROTLI_BOOL(size_utf8
> min_fraction
* (double)length
);
83 #if defined(__cplusplus) || defined(c_plusplus)