]>
Commit | Line | Data |
---|---|---|
11b7501a SB |
1 | /* Copyright 2013 Google Inc. All Rights Reserved.\r |
2 | \r | |
3 | Distributed under MIT license.\r | |
4 | See file LICENSE for detail or copy at https://opensource.org/licenses/MIT\r | |
5 | */\r | |
6 | \r | |
7 | /* Functions to map previous bytes into a context id. */\r | |
8 | \r | |
9 | #ifndef BROTLI_ENC_CONTEXT_H_\r | |
10 | #define BROTLI_ENC_CONTEXT_H_\r | |
11 | \r | |
12 | #include "../common/types.h"\r | |
13 | #include "../common/port.h"\r | |
14 | \r | |
15 | #if defined(__cplusplus) || defined(c_plusplus)\r | |
16 | extern "C" {\r | |
17 | #endif\r | |
18 | \r | |
19 | /* Second-order context lookup table for UTF8 byte streams.\r | |
20 | \r | |
21 | If p1 and p2 are the previous two bytes, we calculate the context as\r | |
22 | \r | |
23 | context = kUTF8ContextLookup[p1] | kUTF8ContextLookup[p2 + 256].\r | |
24 | \r | |
25 | If the previous two bytes are ASCII characters (i.e. < 128), this will be\r | |
26 | equivalent to\r | |
27 | \r | |
28 | context = 4 * context1(p1) + context2(p2),\r | |
29 | \r | |
30 | where context1 is based on the previous byte in the following way:\r | |
31 | \r | |
32 | 0 : non-ASCII control\r | |
33 | 1 : \t, \n, \r\r | |
34 | 2 : space\r | |
35 | 3 : other punctuation\r | |
36 | 4 : " '\r | |
37 | 5 : %\r | |
38 | 6 : ( < [ {\r | |
39 | 7 : ) > ] }\r | |
40 | 8 : , ; :\r | |
41 | 9 : .\r | |
42 | 10 : =\r | |
43 | 11 : number\r | |
44 | 12 : upper-case vowel\r | |
45 | 13 : upper-case consonant\r | |
46 | 14 : lower-case vowel\r | |
47 | 15 : lower-case consonant\r | |
48 | \r | |
49 | and context2 is based on the second last byte:\r | |
50 | \r | |
51 | 0 : control, space\r | |
52 | 1 : punctuation\r | |
53 | 2 : upper-case letter, number\r | |
54 | 3 : lower-case letter\r | |
55 | \r | |
56 | If the last byte is ASCII, and the second last byte is not (in a valid UTF8\r | |
57 | stream it will be a continuation byte, value between 128 and 191), the\r | |
58 | context is the same as if the second last byte was an ASCII control or space.\r | |
59 | \r | |
60 | If the last byte is a UTF8 lead byte (value >= 192), then the next byte will\r | |
61 | be a continuation byte and the context id is 2 or 3 depending on the LSB of\r | |
62 | the last byte and to a lesser extent on the second last byte if it is ASCII.\r | |
63 | \r | |
64 | If the last byte is a UTF8 continuation byte, the second last byte can be:\r | |
65 | - continuation byte: the next byte is probably ASCII or lead byte (assuming\r | |
66 | 4-byte UTF8 characters are rare) and the context id is 0 or 1.\r | |
67 | - lead byte (192 - 207): next byte is ASCII or lead byte, context is 0 or 1\r | |
68 | - lead byte (208 - 255): next byte is continuation byte, context is 2 or 3\r | |
69 | \r | |
70 | The possible value combinations of the previous two bytes, the range of\r | |
71 | context ids and the type of the next byte is summarized in the table below:\r | |
72 | \r | |
73 | |--------\-----------------------------------------------------------------|\r | |
74 | | \ Last byte |\r | |
75 | | Second \---------------------------------------------------------------|\r | |
76 | | last byte \ ASCII | cont. byte | lead byte |\r | |
77 | | \ (0-127) | (128-191) | (192-) |\r | |
78 | |=============|===================|=====================|==================|\r | |
79 | | ASCII | next: ASCII/lead | not valid | next: cont. |\r | |
80 | | (0-127) | context: 4 - 63 | | context: 2 - 3 |\r | |
81 | |-------------|-------------------|---------------------|------------------|\r | |
82 | | cont. byte | next: ASCII/lead | next: ASCII/lead | next: cont. |\r | |
83 | | (128-191) | context: 4 - 63 | context: 0 - 1 | context: 2 - 3 |\r | |
84 | |-------------|-------------------|---------------------|------------------|\r | |
85 | | lead byte | not valid | next: ASCII/lead | not valid |\r | |
86 | | (192-207) | | context: 0 - 1 | |\r | |
87 | |-------------|-------------------|---------------------|------------------|\r | |
88 | | lead byte | not valid | next: cont. | not valid |\r | |
89 | | (208-) | | context: 2 - 3 | |\r | |
90 | |-------------|-------------------|---------------------|------------------|\r | |
91 | */\r | |
92 | static const uint8_t kUTF8ContextLookup[512] = {\r | |
93 | /* Last byte. */\r | |
94 | /* */\r | |
95 | /* ASCII range. */\r | |
96 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 0, 0, 4, 0, 0,\r | |
97 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\r | |
98 | 8, 12, 16, 12, 12, 20, 12, 16, 24, 28, 12, 12, 32, 12, 36, 12,\r | |
99 | 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 32, 32, 24, 40, 28, 12,\r | |
100 | 12, 48, 52, 52, 52, 48, 52, 52, 52, 48, 52, 52, 52, 52, 52, 48,\r | |
101 | 52, 52, 52, 52, 52, 48, 52, 52, 52, 52, 52, 24, 12, 28, 12, 12,\r | |
102 | 12, 56, 60, 60, 60, 56, 60, 60, 60, 56, 60, 60, 60, 60, 60, 56,\r | |
103 | 60, 60, 60, 60, 60, 56, 60, 60, 60, 60, 60, 24, 12, 28, 12, 0,\r | |
104 | /* UTF8 continuation byte range. */\r | |
105 | 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,\r | |
106 | 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,\r | |
107 | 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,\r | |
108 | 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,\r | |
109 | /* UTF8 lead byte range. */\r | |
110 | 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,\r | |
111 | 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,\r | |
112 | 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,\r | |
113 | 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,\r | |
114 | /* Second last byte. */\r | |
115 | /* */\r | |
116 | /* ASCII range. */\r | |
117 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\r | |
118 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\r | |
119 | 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\r | |
120 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1,\r | |
121 | 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,\r | |
122 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1,\r | |
123 | 1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,\r | |
124 | 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1, 1, 1, 1, 0,\r | |
125 | /* UTF8 continuation byte range. */\r | |
126 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\r | |
127 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\r | |
128 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\r | |
129 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\r | |
130 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\r | |
131 | /* UTF8 lead byte range. */\r | |
132 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\r | |
133 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,\r | |
134 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,\r | |
135 | };\r | |
136 | \r | |
137 | /* Context lookup table for small signed integers. */\r | |
138 | static const uint8_t kSigned3BitContextLookup[] = {\r | |
139 | 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\r | |
140 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,\r | |
141 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,\r | |
142 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,\r | |
143 | 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,\r | |
144 | 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,\r | |
145 | 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,\r | |
146 | 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,\r | |
147 | 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,\r | |
148 | 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,\r | |
149 | 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,\r | |
150 | 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,\r | |
151 | 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,\r | |
152 | 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,\r | |
153 | 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,\r | |
154 | 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7,\r | |
155 | };\r | |
156 | \r | |
157 | typedef enum ContextType {\r | |
158 | CONTEXT_LSB6 = 0,\r | |
159 | CONTEXT_MSB6 = 1,\r | |
160 | CONTEXT_UTF8 = 2,\r | |
161 | CONTEXT_SIGNED = 3\r | |
162 | } ContextType;\r | |
163 | \r | |
164 | static BROTLI_INLINE uint8_t Context(uint8_t p1, uint8_t p2, ContextType mode) {\r | |
165 | switch (mode) {\r | |
166 | case CONTEXT_LSB6:\r | |
167 | return p1 & 0x3f;\r | |
168 | case CONTEXT_MSB6:\r | |
169 | return (uint8_t)(p1 >> 2);\r | |
170 | case CONTEXT_UTF8:\r | |
171 | return kUTF8ContextLookup[p1] | kUTF8ContextLookup[p2 + 256];\r | |
172 | case CONTEXT_SIGNED:\r | |
173 | return (uint8_t)((kSigned3BitContextLookup[p1] << 3) +\r | |
174 | kSigned3BitContextLookup[p2]);\r | |
175 | default:\r | |
176 | return 0;\r | |
177 | }\r | |
178 | }\r | |
179 | \r | |
180 | #if defined(__cplusplus) || defined(c_plusplus)\r | |
181 | } /* extern "C" */\r | |
182 | #endif\r | |
183 | \r | |
184 | #endif /* BROTLI_ENC_CONTEXT_H_ */\r |