]>
Commit | Line | Data |
---|---|---|
11b7501a SB |
1 | /* Copyright 2013 Google Inc. All Rights Reserved.\r |
2 | \r | |
3 | Distributed under MIT license.\r | |
4 | See file LICENSE for detail or copy at https://opensource.org/licenses/MIT\r | |
5 | */\r | |
6 | \r | |
7 | /* Implementation of Brotli compressor. */\r | |
8 | \r | |
9 | #include "./encode.h"\r | |
10 | \r | |
11 | #include <stdlib.h> /* free, malloc */\r | |
12 | #include <string.h> /* memcpy, memset */\r | |
13 | \r | |
14 | #include "./backward_references.h"\r | |
15 | #include "./bit_cost.h"\r | |
16 | #include "./brotli_bit_stream.h"\r | |
17 | #include "./compress_fragment.h"\r | |
18 | #include "./compress_fragment_two_pass.h"\r | |
19 | #include "./context.h"\r | |
20 | #include "./entropy_encode.h"\r | |
21 | #include "./fast_log.h"\r | |
22 | #include "./hash.h"\r | |
23 | #include "./histogram.h"\r | |
24 | #include "./memory.h"\r | |
25 | #include "./metablock.h"\r | |
26 | #include "./port.h"\r | |
27 | #include "./prefix.h"\r | |
28 | #include "./quality.h"\r | |
29 | #include "./ringbuffer.h"\r | |
30 | #include "./utf8_util.h"\r | |
31 | #include "./write_bits.h"\r | |
32 | \r | |
33 | #if defined(__cplusplus) || defined(c_plusplus)\r | |
34 | extern "C" {\r | |
35 | #endif\r | |
36 | \r | |
37 | #define COPY_ARRAY(dst, src) memcpy(dst, src, sizeof(src));\r | |
38 | \r | |
39 | typedef enum BrotliEncoderStreamState {\r | |
40 | /* Default state. */\r | |
41 | BROTLI_STREAM_PROCESSING = 0,\r | |
42 | /* Intermediate state; after next block is emitted, byte-padding should be\r | |
43 | performed before getting back to default state. */\r | |
44 | BROTLI_STREAM_FLUSH_REQUESTED = 1,\r | |
45 | /* Last metablock was produced; no more input is acceptable. */\r | |
46 | BROTLI_STREAM_FINISHED = 2\r | |
47 | } BrotliEncoderStreamState;\r | |
48 | \r | |
49 | typedef struct BrotliEncoderStateStruct {\r | |
50 | BrotliEncoderParams params;\r | |
51 | \r | |
52 | MemoryManager memory_manager_;\r | |
53 | \r | |
54 | Hashers hashers_;\r | |
55 | uint64_t input_pos_;\r | |
56 | RingBuffer ringbuffer_;\r | |
57 | size_t cmd_alloc_size_;\r | |
58 | Command* commands_;\r | |
59 | size_t num_commands_;\r | |
60 | size_t num_literals_;\r | |
61 | size_t last_insert_len_;\r | |
62 | uint64_t last_flush_pos_;\r | |
63 | uint64_t last_processed_pos_;\r | |
64 | int dist_cache_[4];\r | |
65 | int saved_dist_cache_[4];\r | |
66 | uint8_t last_byte_;\r | |
67 | uint8_t last_byte_bits_;\r | |
68 | uint8_t prev_byte_;\r | |
69 | uint8_t prev_byte2_;\r | |
70 | size_t storage_size_;\r | |
71 | uint8_t* storage_;\r | |
72 | /* Hash table for FAST_ONE_PASS_COMPRESSION_QUALITY mode. */\r | |
73 | int small_table_[1 << 10]; /* 4KiB */\r | |
74 | int* large_table_; /* Allocated only when needed */\r | |
75 | size_t large_table_size_;\r | |
76 | /* Command and distance prefix codes (each 64 symbols, stored back-to-back)\r | |
77 | used for the next block in FAST_ONE_PASS_COMPRESSION_QUALITY. The command\r | |
78 | prefix code is over a smaller alphabet with the following 64 symbols:\r | |
79 | 0 - 15: insert length code 0, copy length code 0 - 15, same distance\r | |
80 | 16 - 39: insert length code 0, copy length code 0 - 23\r | |
81 | 40 - 63: insert length code 0 - 23, copy length code 0\r | |
82 | Note that symbols 16 and 40 represent the same code in the full alphabet,\r | |
83 | but we do not use either of them in FAST_ONE_PASS_COMPRESSION_QUALITY. */\r | |
84 | uint8_t cmd_depths_[128];\r | |
85 | uint16_t cmd_bits_[128];\r | |
86 | /* The compressed form of the command and distance prefix codes for the next\r | |
87 | block in FAST_ONE_PASS_COMPRESSION_QUALITY. */\r | |
88 | uint8_t cmd_code_[512];\r | |
89 | size_t cmd_code_numbits_;\r | |
90 | /* Command and literal buffers for FAST_TWO_PASS_COMPRESSION_QUALITY. */\r | |
91 | uint32_t* command_buf_;\r | |
92 | uint8_t* literal_buf_;\r | |
93 | \r | |
94 | uint8_t* next_out_;\r | |
95 | size_t available_out_;\r | |
96 | size_t total_out_;\r | |
97 | uint8_t flush_buf_[2];\r | |
98 | BrotliEncoderStreamState stream_state_;\r | |
99 | \r | |
100 | BROTLI_BOOL is_last_block_emitted_;\r | |
101 | BROTLI_BOOL is_initialized_;\r | |
102 | } BrotliEncoderStateStruct;\r | |
103 | \r | |
104 | static BROTLI_BOOL EnsureInitialized(BrotliEncoderState* s);\r | |
105 | \r | |
106 | size_t BrotliEncoderInputBlockSize(BrotliEncoderState* s) {\r | |
107 | if (!EnsureInitialized(s)) return 0;\r | |
108 | return (size_t)1 << s->params.lgblock;\r | |
109 | }\r | |
110 | \r | |
111 | static uint64_t UnprocessedInputSize(BrotliEncoderState* s) {\r | |
112 | return s->input_pos_ - s->last_processed_pos_;\r | |
113 | }\r | |
114 | \r | |
115 | static size_t RemainingInputBlockSize(BrotliEncoderState* s) {\r | |
116 | const uint64_t delta = UnprocessedInputSize(s);\r | |
117 | size_t block_size = BrotliEncoderInputBlockSize(s);\r | |
118 | if (delta >= block_size) return 0;\r | |
119 | return block_size - (size_t)delta;\r | |
120 | }\r | |
121 | \r | |
122 | BROTLI_BOOL BrotliEncoderSetParameter(\r | |
123 | BrotliEncoderState* state, BrotliEncoderParameter p, uint32_t value) {\r | |
124 | /* Changing parameters on the fly is not implemented yet. */\r | |
125 | if (state->is_initialized_) return BROTLI_FALSE;\r | |
126 | /* TODO: Validate/clamp params here. */\r | |
127 | switch (p) {\r | |
128 | case BROTLI_PARAM_MODE:\r | |
129 | state->params.mode = (BrotliEncoderMode)value;\r | |
130 | return BROTLI_TRUE;\r | |
131 | \r | |
132 | case BROTLI_PARAM_QUALITY:\r | |
133 | state->params.quality = (int)value;\r | |
134 | return BROTLI_TRUE;\r | |
135 | \r | |
136 | case BROTLI_PARAM_LGWIN:\r | |
137 | state->params.lgwin = (int)value;\r | |
138 | return BROTLI_TRUE;\r | |
139 | \r | |
140 | case BROTLI_PARAM_LGBLOCK:\r | |
141 | state->params.lgblock = (int)value;\r | |
142 | return BROTLI_TRUE;\r | |
143 | \r | |
144 | default: return BROTLI_FALSE;\r | |
145 | }\r | |
146 | }\r | |
147 | \r | |
148 | static void RecomputeDistancePrefixes(Command* cmds,\r | |
149 | size_t num_commands,\r | |
150 | uint32_t num_direct_distance_codes,\r | |
151 | uint32_t distance_postfix_bits) {\r | |
152 | size_t i;\r | |
153 | if (num_direct_distance_codes == 0 && distance_postfix_bits == 0) {\r | |
154 | return;\r | |
155 | }\r | |
156 | for (i = 0; i < num_commands; ++i) {\r | |
157 | Command* cmd = &cmds[i];\r | |
158 | if (CommandCopyLen(cmd) && cmd->cmd_prefix_ >= 128) {\r | |
159 | PrefixEncodeCopyDistance(CommandDistanceCode(cmd),\r | |
160 | num_direct_distance_codes,\r | |
161 | distance_postfix_bits,\r | |
162 | &cmd->dist_prefix_,\r | |
163 | &cmd->dist_extra_);\r | |
164 | }\r | |
165 | }\r | |
166 | }\r | |
167 | \r | |
168 | /* Wraps 64-bit input position to 32-bit ringbuffer position preserving\r | |
169 | "not-a-first-lap" feature. */\r | |
170 | static uint32_t WrapPosition(uint64_t position) {\r | |
171 | uint32_t result = (uint32_t)position;\r | |
172 | uint64_t gb = position >> 30;\r | |
173 | if (gb > 2) {\r | |
174 | /* Wrap every 2GiB; The first 3GB are continous. */\r | |
175 | result = (result & ((1u << 30) - 1)) | ((uint32_t)((gb - 1) & 1) + 1) << 30;\r | |
176 | }\r | |
177 | return result;\r | |
178 | }\r | |
179 | \r | |
180 | static uint8_t* GetBrotliStorage(BrotliEncoderState* s, size_t size) {\r | |
181 | MemoryManager* m = &s->memory_manager_;\r | |
182 | if (s->storage_size_ < size) {\r | |
183 | BROTLI_FREE(m, s->storage_);\r | |
184 | s->storage_ = BROTLI_ALLOC(m, uint8_t, size);\r | |
185 | if (BROTLI_IS_OOM(m)) return NULL;\r | |
186 | s->storage_size_ = size;\r | |
187 | }\r | |
188 | return s->storage_;\r | |
189 | }\r | |
190 | \r | |
191 | static size_t HashTableSize(size_t max_table_size, size_t input_size) {\r | |
192 | size_t htsize = 256;\r | |
193 | while (htsize < max_table_size && htsize < input_size) {\r | |
194 | htsize <<= 1;\r | |
195 | }\r | |
196 | return htsize;\r | |
197 | }\r | |
198 | \r | |
199 | static int* GetHashTable(BrotliEncoderState* s, int quality,\r | |
200 | size_t input_size, size_t* table_size) {\r | |
201 | /* Use smaller hash table when input.size() is smaller, since we\r | |
202 | fill the table, incurring O(hash table size) overhead for\r | |
203 | compression, and if the input is short, we won't need that\r | |
204 | many hash table entries anyway. */\r | |
205 | MemoryManager* m = &s->memory_manager_;\r | |
206 | const size_t max_table_size = MaxHashTableSize(quality);\r | |
207 | size_t htsize = HashTableSize(max_table_size, input_size);\r | |
208 | int* table;\r | |
209 | assert(max_table_size >= 256);\r | |
210 | \r | |
211 | if (htsize <= sizeof(s->small_table_) / sizeof(s->small_table_[0])) {\r | |
212 | table = s->small_table_;\r | |
213 | } else {\r | |
214 | if (htsize > s->large_table_size_) {\r | |
215 | s->large_table_size_ = htsize;\r | |
216 | BROTLI_FREE(m, s->large_table_);\r | |
217 | s->large_table_ = BROTLI_ALLOC(m, int, htsize);\r | |
218 | if (BROTLI_IS_OOM(m)) return 0;\r | |
219 | }\r | |
220 | table = s->large_table_;\r | |
221 | }\r | |
222 | \r | |
223 | *table_size = htsize;\r | |
224 | memset(table, 0, htsize * sizeof(*table));\r | |
225 | return table;\r | |
226 | }\r | |
227 | \r | |
228 | static void EncodeWindowBits(int lgwin, uint8_t* last_byte,\r | |
229 | uint8_t* last_byte_bits) {\r | |
230 | if (lgwin == 16) {\r | |
231 | *last_byte = 0;\r | |
232 | *last_byte_bits = 1;\r | |
233 | } else if (lgwin == 17) {\r | |
234 | *last_byte = 1;\r | |
235 | *last_byte_bits = 7;\r | |
236 | } else if (lgwin > 17) {\r | |
237 | *last_byte = (uint8_t)(((lgwin - 17) << 1) | 1);\r | |
238 | *last_byte_bits = 4;\r | |
239 | } else {\r | |
240 | *last_byte = (uint8_t)(((lgwin - 8) << 4) | 1);\r | |
241 | *last_byte_bits = 7;\r | |
242 | }\r | |
243 | }\r | |
244 | \r | |
245 | /* Initializes the command and distance prefix codes for the first block. */\r | |
246 | static void InitCommandPrefixCodes(uint8_t cmd_depths[128],\r | |
247 | uint16_t cmd_bits[128],\r | |
248 | uint8_t cmd_code[512],\r | |
249 | size_t* cmd_code_numbits) {\r | |
250 | static const uint8_t kDefaultCommandDepths[128] = {\r | |
251 | 0, 4, 4, 5, 6, 6, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8,\r | |
252 | 0, 0, 0, 4, 4, 4, 4, 4, 5, 5, 6, 6, 6, 6, 7, 7,\r | |
253 | 7, 7, 10, 10, 10, 10, 10, 10, 0, 4, 4, 5, 5, 5, 6, 6,\r | |
254 | 7, 8, 8, 9, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,\r | |
255 | 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\r | |
256 | 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5, 4, 4, 4, 4,\r | |
257 | 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 7, 7, 7, 8, 10,\r | |
258 | 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,\r | |
259 | };\r | |
260 | static const uint16_t kDefaultCommandBits[128] = {\r | |
261 | 0, 0, 8, 9, 3, 35, 7, 71,\r | |
262 | 39, 103, 23, 47, 175, 111, 239, 31,\r | |
263 | 0, 0, 0, 4, 12, 2, 10, 6,\r | |
264 | 13, 29, 11, 43, 27, 59, 87, 55,\r | |
265 | 15, 79, 319, 831, 191, 703, 447, 959,\r | |
266 | 0, 14, 1, 25, 5, 21, 19, 51,\r | |
267 | 119, 159, 95, 223, 479, 991, 63, 575,\r | |
268 | 127, 639, 383, 895, 255, 767, 511, 1023,\r | |
269 | 14, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\r | |
270 | 27, 59, 7, 39, 23, 55, 30, 1, 17, 9, 25, 5, 0, 8, 4, 12,\r | |
271 | 2, 10, 6, 21, 13, 29, 3, 19, 11, 15, 47, 31, 95, 63, 127, 255,\r | |
272 | 767, 2815, 1791, 3839, 511, 2559, 1535, 3583, 1023, 3071, 2047, 4095,\r | |
273 | };\r | |
274 | static const uint8_t kDefaultCommandCode[] = {\r | |
275 | 0xff, 0x77, 0xd5, 0xbf, 0xe7, 0xde, 0xea, 0x9e, 0x51, 0x5d, 0xde, 0xc6,\r | |
276 | 0x70, 0x57, 0xbc, 0x58, 0x58, 0x58, 0xd8, 0xd8, 0x58, 0xd5, 0xcb, 0x8c,\r | |
277 | 0xea, 0xe0, 0xc3, 0x87, 0x1f, 0x83, 0xc1, 0x60, 0x1c, 0x67, 0xb2, 0xaa,\r | |
278 | 0x06, 0x83, 0xc1, 0x60, 0x30, 0x18, 0xcc, 0xa1, 0xce, 0x88, 0x54, 0x94,\r | |
279 | 0x46, 0xe1, 0xb0, 0xd0, 0x4e, 0xb2, 0xf7, 0x04, 0x00,\r | |
280 | };\r | |
281 | static const size_t kDefaultCommandCodeNumBits = 448;\r | |
282 | COPY_ARRAY(cmd_depths, kDefaultCommandDepths);\r | |
283 | COPY_ARRAY(cmd_bits, kDefaultCommandBits);\r | |
284 | \r | |
285 | /* Initialize the pre-compressed form of the command and distance prefix\r | |
286 | codes. */\r | |
287 | COPY_ARRAY(cmd_code, kDefaultCommandCode);\r | |
288 | *cmd_code_numbits = kDefaultCommandCodeNumBits;\r | |
289 | }\r | |
290 | \r | |
291 | /* Decide about the context map based on the ability of the prediction\r | |
292 | ability of the previous byte UTF8-prefix on the next byte. The\r | |
293 | prediction ability is calculated as shannon entropy. Here we need\r | |
294 | shannon entropy instead of 'BitsEntropy' since the prefix will be\r | |
295 | encoded with the remaining 6 bits of the following byte, and\r | |
296 | BitsEntropy will assume that symbol to be stored alone using Huffman\r | |
297 | coding. */\r | |
298 | static void ChooseContextMap(int quality,\r | |
299 | uint32_t* bigram_histo,\r | |
300 | size_t* num_literal_contexts,\r | |
301 | const uint32_t** literal_context_map) {\r | |
302 | static const uint32_t kStaticContextMapContinuation[64] = {\r | |
303 | 1, 1, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\r | |
304 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\r | |
305 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\r | |
306 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\r | |
307 | };\r | |
308 | static const uint32_t kStaticContextMapSimpleUTF8[64] = {\r | |
309 | 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\r | |
310 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\r | |
311 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\r | |
312 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\r | |
313 | };\r | |
314 | \r | |
315 | uint32_t monogram_histo[3] = { 0 };\r | |
316 | uint32_t two_prefix_histo[6] = { 0 };\r | |
317 | size_t total = 0;\r | |
318 | size_t i;\r | |
319 | size_t dummy;\r | |
320 | double entropy[4];\r | |
321 | for (i = 0; i < 9; ++i) {\r | |
322 | size_t j = i;\r | |
323 | total += bigram_histo[i];\r | |
324 | monogram_histo[i % 3] += bigram_histo[i];\r | |
325 | if (j >= 6) {\r | |
326 | j -= 6;\r | |
327 | }\r | |
328 | two_prefix_histo[j] += bigram_histo[i];\r | |
329 | }\r | |
330 | entropy[1] = ShannonEntropy(monogram_histo, 3, &dummy);\r | |
331 | entropy[2] = (ShannonEntropy(two_prefix_histo, 3, &dummy) +\r | |
332 | ShannonEntropy(two_prefix_histo + 3, 3, &dummy));\r | |
333 | entropy[3] = 0;\r | |
334 | for (i = 0; i < 3; ++i) {\r | |
335 | entropy[3] += ShannonEntropy(bigram_histo + 3 * i, 3, &dummy);\r | |
336 | }\r | |
337 | \r | |
338 | assert(total != 0);\r | |
339 | entropy[0] = 1.0 / (double)total;\r | |
340 | entropy[1] *= entropy[0];\r | |
341 | entropy[2] *= entropy[0];\r | |
342 | entropy[3] *= entropy[0];\r | |
343 | \r | |
344 | if (quality < MIN_QUALITY_FOR_HQ_CONTEXT_MODELING) {\r | |
345 | /* 3 context models is a bit slower, don't use it at lower qualities. */\r | |
346 | entropy[3] = entropy[1] * 10;\r | |
347 | }\r | |
348 | /* If expected savings by symbol are less than 0.2 bits, skip the\r | |
349 | context modeling -- in exchange for faster decoding speed. */\r | |
350 | if (entropy[1] - entropy[2] < 0.2 &&\r | |
351 | entropy[1] - entropy[3] < 0.2) {\r | |
352 | *num_literal_contexts = 1;\r | |
353 | } else if (entropy[2] - entropy[3] < 0.02) {\r | |
354 | *num_literal_contexts = 2;\r | |
355 | *literal_context_map = kStaticContextMapSimpleUTF8;\r | |
356 | } else {\r | |
357 | *num_literal_contexts = 3;\r | |
358 | *literal_context_map = kStaticContextMapContinuation;\r | |
359 | }\r | |
360 | }\r | |
361 | \r | |
362 | static void DecideOverLiteralContextModeling(const uint8_t* input,\r | |
363 | size_t start_pos, size_t length, size_t mask, int quality,\r | |
364 | ContextType* literal_context_mode, size_t* num_literal_contexts,\r | |
365 | const uint32_t** literal_context_map) {\r | |
366 | if (quality < MIN_QUALITY_FOR_CONTEXT_MODELING || length < 64) {\r | |
367 | return;\r | |
368 | } else {\r | |
369 | /* Gather bigram data of the UTF8 byte prefixes. To make the analysis of\r | |
370 | UTF8 data faster we only examine 64 byte long strides at every 4kB\r | |
371 | intervals. */\r | |
372 | const size_t end_pos = start_pos + length;\r | |
373 | uint32_t bigram_prefix_histo[9] = { 0 };\r | |
374 | for (; start_pos + 64 <= end_pos; start_pos += 4096) {\r | |
375 | static const int lut[4] = { 0, 0, 1, 2 };\r | |
376 | const size_t stride_end_pos = start_pos + 64;\r | |
377 | int prev = lut[input[start_pos & mask] >> 6] * 3;\r | |
378 | size_t pos;\r | |
379 | for (pos = start_pos + 1; pos < stride_end_pos; ++pos) {\r | |
380 | const uint8_t literal = input[pos & mask];\r | |
381 | ++bigram_prefix_histo[prev + lut[literal >> 6]];\r | |
382 | prev = lut[literal >> 6] * 3;\r | |
383 | }\r | |
384 | }\r | |
385 | *literal_context_mode = CONTEXT_UTF8;\r | |
386 | ChooseContextMap(quality, &bigram_prefix_histo[0], num_literal_contexts,\r | |
387 | literal_context_map);\r | |
388 | }\r | |
389 | }\r | |
390 | \r | |
391 | static BROTLI_BOOL ShouldCompress(\r | |
392 | const uint8_t* data, const size_t mask, const uint64_t last_flush_pos,\r | |
393 | const size_t bytes, const size_t num_literals, const size_t num_commands) {\r | |
394 | if (num_commands < (bytes >> 8) + 2) {\r | |
395 | if (num_literals > 0.99 * (double)bytes) {\r | |
396 | uint32_t literal_histo[256] = { 0 };\r | |
397 | static const uint32_t kSampleRate = 13;\r | |
398 | static const double kMinEntropy = 7.92;\r | |
399 | const double bit_cost_threshold =\r | |
400 | (double)bytes * kMinEntropy / kSampleRate;\r | |
401 | size_t t = (bytes + kSampleRate - 1) / kSampleRate;\r | |
402 | uint32_t pos = (uint32_t)last_flush_pos;\r | |
403 | size_t i;\r | |
404 | for (i = 0; i < t; i++) {\r | |
405 | ++literal_histo[data[pos & mask]];\r | |
406 | pos += kSampleRate;\r | |
407 | }\r | |
408 | if (BitsEntropy(literal_histo, 256) > bit_cost_threshold) {\r | |
409 | return BROTLI_FALSE;\r | |
410 | }\r | |
411 | }\r | |
412 | }\r | |
413 | return BROTLI_TRUE;\r | |
414 | }\r | |
415 | \r | |
416 | static void WriteMetaBlockInternal(MemoryManager* m,\r | |
417 | const uint8_t* data,\r | |
418 | const size_t mask,\r | |
419 | const uint64_t last_flush_pos,\r | |
420 | const size_t bytes,\r | |
421 | const BROTLI_BOOL is_last,\r | |
422 | const BrotliEncoderParams* params,\r | |
423 | const uint8_t prev_byte,\r | |
424 | const uint8_t prev_byte2,\r | |
425 | const size_t num_literals,\r | |
426 | const size_t num_commands,\r | |
427 | Command* commands,\r | |
428 | const int* saved_dist_cache,\r | |
429 | int* dist_cache,\r | |
430 | size_t* storage_ix,\r | |
431 | uint8_t* storage) {\r | |
432 | const uint32_t wrapped_last_flush_pos = WrapPosition(last_flush_pos);\r | |
433 | uint8_t last_byte;\r | |
434 | uint8_t last_byte_bits;\r | |
435 | uint32_t num_direct_distance_codes = 0;\r | |
436 | uint32_t distance_postfix_bits = 0;\r | |
437 | \r | |
438 | if (bytes == 0) {\r | |
439 | /* Write the ISLAST and ISEMPTY bits. */\r | |
440 | BrotliWriteBits(2, 3, storage_ix, storage);\r | |
441 | *storage_ix = (*storage_ix + 7u) & ~7u;\r | |
442 | return;\r | |
443 | }\r | |
444 | \r | |
445 | if (!ShouldCompress(data, mask, last_flush_pos, bytes,\r | |
446 | num_literals, num_commands)) {\r | |
447 | /* Restore the distance cache, as its last update by\r | |
448 | CreateBackwardReferences is now unused. */\r | |
449 | memcpy(dist_cache, saved_dist_cache, 4 * sizeof(dist_cache[0]));\r | |
450 | BrotliStoreUncompressedMetaBlock(is_last, data,\r | |
451 | wrapped_last_flush_pos, mask, bytes,\r | |
452 | storage_ix, storage);\r | |
453 | return;\r | |
454 | }\r | |
455 | \r | |
456 | last_byte = storage[0];\r | |
457 | last_byte_bits = (uint8_t)(*storage_ix & 0xff);\r | |
458 | if (params->quality >= MIN_QUALITY_FOR_RECOMPUTE_DISTANCE_PREFIXES &&\r | |
459 | params->mode == BROTLI_MODE_FONT) {\r | |
460 | num_direct_distance_codes = 12;\r | |
461 | distance_postfix_bits = 1;\r | |
462 | RecomputeDistancePrefixes(commands,\r | |
463 | num_commands,\r | |
464 | num_direct_distance_codes,\r | |
465 | distance_postfix_bits);\r | |
466 | }\r | |
467 | if (params->quality <= MAX_QUALITY_FOR_STATIC_ENRTOPY_CODES) {\r | |
468 | BrotliStoreMetaBlockFast(m, data, wrapped_last_flush_pos,\r | |
469 | bytes, mask, is_last,\r | |
470 | commands, num_commands,\r | |
471 | storage_ix, storage);\r | |
472 | if (BROTLI_IS_OOM(m)) return;\r | |
473 | } else if (params->quality < MIN_QUALITY_FOR_BLOCK_SPLIT) {\r | |
474 | BrotliStoreMetaBlockTrivial(m, data, wrapped_last_flush_pos,\r | |
475 | bytes, mask, is_last,\r | |
476 | commands, num_commands,\r | |
477 | storage_ix, storage);\r | |
478 | if (BROTLI_IS_OOM(m)) return;\r | |
479 | } else {\r | |
480 | ContextType literal_context_mode = CONTEXT_UTF8;\r | |
481 | MetaBlockSplit mb;\r | |
482 | InitMetaBlockSplit(&mb);\r | |
483 | if (params->quality < MIN_QUALITY_FOR_HQ_BLOCK_SPLITTING) {\r | |
484 | size_t num_literal_contexts = 1;\r | |
485 | const uint32_t* literal_context_map = NULL;\r | |
486 | DecideOverLiteralContextModeling(data, wrapped_last_flush_pos,\r | |
487 | bytes, mask,\r | |
488 | params->quality,\r | |
489 | &literal_context_mode,\r | |
490 | &num_literal_contexts,\r | |
491 | &literal_context_map);\r | |
492 | if (literal_context_map == NULL) {\r | |
493 | BrotliBuildMetaBlockGreedy(m, data, wrapped_last_flush_pos, mask,\r | |
494 | commands, num_commands, &mb);\r | |
495 | if (BROTLI_IS_OOM(m)) return;\r | |
496 | } else {\r | |
497 | BrotliBuildMetaBlockGreedyWithContexts(m, data,\r | |
498 | wrapped_last_flush_pos,\r | |
499 | mask,\r | |
500 | prev_byte, prev_byte2,\r | |
501 | literal_context_mode,\r | |
502 | num_literal_contexts,\r | |
503 | literal_context_map,\r | |
504 | commands, num_commands,\r | |
505 | &mb);\r | |
506 | if (BROTLI_IS_OOM(m)) return;\r | |
507 | }\r | |
508 | } else {\r | |
509 | if (!BrotliIsMostlyUTF8(data, wrapped_last_flush_pos, mask, bytes,\r | |
510 | kMinUTF8Ratio)) {\r | |
511 | literal_context_mode = CONTEXT_SIGNED;\r | |
512 | }\r | |
513 | BrotliBuildMetaBlock(m, data, wrapped_last_flush_pos, mask, params,\r | |
514 | prev_byte, prev_byte2,\r | |
515 | commands, num_commands,\r | |
516 | literal_context_mode,\r | |
517 | &mb);\r | |
518 | if (BROTLI_IS_OOM(m)) return;\r | |
519 | }\r | |
520 | if (params->quality >= MIN_QUALITY_FOR_OPTIMIZE_HISTOGRAMS) {\r | |
521 | BrotliOptimizeHistograms(num_direct_distance_codes,\r | |
522 | distance_postfix_bits,\r | |
523 | &mb);\r | |
524 | }\r | |
525 | BrotliStoreMetaBlock(m, data, wrapped_last_flush_pos, bytes, mask,\r | |
526 | prev_byte, prev_byte2,\r | |
527 | is_last,\r | |
528 | num_direct_distance_codes,\r | |
529 | distance_postfix_bits,\r | |
530 | literal_context_mode,\r | |
531 | commands, num_commands,\r | |
532 | &mb,\r | |
533 | storage_ix, storage);\r | |
534 | if (BROTLI_IS_OOM(m)) return;\r | |
535 | DestroyMetaBlockSplit(m, &mb);\r | |
536 | }\r | |
537 | if (bytes + 4 < (*storage_ix >> 3)) {\r | |
538 | /* Restore the distance cache and last byte. */\r | |
539 | memcpy(dist_cache, saved_dist_cache, 4 * sizeof(dist_cache[0]));\r | |
540 | storage[0] = last_byte;\r | |
541 | *storage_ix = last_byte_bits;\r | |
542 | BrotliStoreUncompressedMetaBlock(is_last, data,\r | |
543 | wrapped_last_flush_pos, mask,\r | |
544 | bytes, storage_ix, storage);\r | |
545 | }\r | |
546 | }\r | |
547 | \r | |
548 | static BROTLI_BOOL EnsureInitialized(BrotliEncoderState* s) {\r | |
549 | if (BROTLI_IS_OOM(&s->memory_manager_)) return BROTLI_FALSE;\r | |
550 | if (s->is_initialized_) return BROTLI_TRUE;\r | |
551 | \r | |
552 | SanitizeParams(&s->params);\r | |
553 | s->params.lgblock = ComputeLgBlock(&s->params);\r | |
554 | \r | |
555 | RingBufferSetup(&s->params, &s->ringbuffer_);\r | |
556 | \r | |
557 | /* Initialize last byte with stream header. */\r | |
558 | EncodeWindowBits(s->params.lgwin, &s->last_byte_, &s->last_byte_bits_);\r | |
559 | \r | |
560 | if (s->params.quality == FAST_ONE_PASS_COMPRESSION_QUALITY) {\r | |
561 | InitCommandPrefixCodes(s->cmd_depths_, s->cmd_bits_,\r | |
562 | s->cmd_code_, &s->cmd_code_numbits_);\r | |
563 | }\r | |
564 | \r | |
565 | /* Initialize hashers. */\r | |
566 | HashersSetup(&s->memory_manager_, &s->hashers_, ChooseHasher(&s->params));\r | |
567 | if (BROTLI_IS_OOM(&s->memory_manager_)) return BROTLI_FALSE;\r | |
568 | \r | |
569 | s->is_initialized_ = BROTLI_TRUE;\r | |
570 | return BROTLI_TRUE;\r | |
571 | }\r | |
572 | \r | |
573 | static void BrotliEncoderInitState(BrotliEncoderState* s) {\r | |
574 | s->params.mode = BROTLI_DEFAULT_MODE;\r | |
575 | s->params.quality = BROTLI_DEFAULT_QUALITY;\r | |
576 | s->params.lgwin = BROTLI_DEFAULT_WINDOW;\r | |
577 | s->params.lgblock = 0;\r | |
578 | \r | |
579 | s->input_pos_ = 0;\r | |
580 | s->num_commands_ = 0;\r | |
581 | s->num_literals_ = 0;\r | |
582 | s->last_insert_len_ = 0;\r | |
583 | s->last_flush_pos_ = 0;\r | |
584 | s->last_processed_pos_ = 0;\r | |
585 | s->prev_byte_ = 0;\r | |
586 | s->prev_byte2_ = 0;\r | |
587 | s->storage_size_ = 0;\r | |
588 | s->storage_ = 0;\r | |
589 | s->large_table_ = NULL;\r | |
590 | s->large_table_size_ = 0;\r | |
591 | s->cmd_code_numbits_ = 0;\r | |
592 | s->command_buf_ = NULL;\r | |
593 | s->literal_buf_ = NULL;\r | |
594 | s->next_out_ = NULL;\r | |
595 | s->available_out_ = 0;\r | |
596 | s->total_out_ = 0;\r | |
597 | s->stream_state_ = BROTLI_STREAM_PROCESSING;\r | |
598 | s->is_last_block_emitted_ = BROTLI_FALSE;\r | |
599 | s->is_initialized_ = BROTLI_FALSE;\r | |
600 | \r | |
601 | InitHashers(&s->hashers_);\r | |
602 | \r | |
603 | RingBufferInit(&s->ringbuffer_);\r | |
604 | \r | |
605 | s->commands_ = 0;\r | |
606 | s->cmd_alloc_size_ = 0;\r | |
607 | \r | |
608 | /* Initialize distance cache. */\r | |
609 | s->dist_cache_[0] = 4;\r | |
610 | s->dist_cache_[1] = 11;\r | |
611 | s->dist_cache_[2] = 15;\r | |
612 | s->dist_cache_[3] = 16;\r | |
613 | /* Save the state of the distance cache in case we need to restore it for\r | |
614 | emitting an uncompressed block. */\r | |
615 | memcpy(s->saved_dist_cache_, s->dist_cache_, sizeof(s->dist_cache_));\r | |
616 | }\r | |
617 | \r | |
618 | BrotliEncoderState* BrotliEncoderCreateInstance(brotli_alloc_func alloc_func,\r | |
619 | brotli_free_func free_func,\r | |
620 | void* opaque) {\r | |
621 | BrotliEncoderState* state = 0;\r | |
622 | if (!alloc_func && !free_func) {\r | |
623 | state = (BrotliEncoderState*)malloc(sizeof(BrotliEncoderState));\r | |
624 | } else if (alloc_func && free_func) {\r | |
625 | state = (BrotliEncoderState*)alloc_func(opaque, sizeof(BrotliEncoderState));\r | |
626 | }\r | |
627 | if (state == 0) {\r | |
628 | /* BROTLI_DUMP(); */\r | |
629 | return 0;\r | |
630 | }\r | |
631 | BrotliInitMemoryManager(\r | |
632 | &state->memory_manager_, alloc_func, free_func, opaque);\r | |
633 | BrotliEncoderInitState(state);\r | |
634 | return state;\r | |
635 | }\r | |
636 | \r | |
637 | static void BrotliEncoderCleanupState(BrotliEncoderState* s) {\r | |
638 | MemoryManager* m = &s->memory_manager_;\r | |
639 | if (BROTLI_IS_OOM(m)) {\r | |
640 | BrotliWipeOutMemoryManager(m);\r | |
641 | return;\r | |
642 | }\r | |
643 | BROTLI_FREE(m, s->storage_);\r | |
644 | BROTLI_FREE(m, s->commands_);\r | |
645 | RingBufferFree(m, &s->ringbuffer_);\r | |
646 | DestroyHashers(m, &s->hashers_);\r | |
647 | BROTLI_FREE(m, s->large_table_);\r | |
648 | BROTLI_FREE(m, s->command_buf_);\r | |
649 | BROTLI_FREE(m, s->literal_buf_);\r | |
650 | }\r | |
651 | \r | |
652 | /* Deinitializes and frees BrotliEncoderState instance. */\r | |
653 | void BrotliEncoderDestroyInstance(BrotliEncoderState* state) {\r | |
654 | if (!state) {\r | |
655 | return;\r | |
656 | } else {\r | |
657 | MemoryManager* m = &state->memory_manager_;\r | |
658 | brotli_free_func free_func = m->free_func;\r | |
659 | void* opaque = m->opaque;\r | |
660 | BrotliEncoderCleanupState(state);\r | |
661 | free_func(opaque, state);\r | |
662 | }\r | |
663 | }\r | |
664 | \r | |
665 | void BrotliEncoderCopyInputToRingBuffer(BrotliEncoderState* s,\r | |
666 | const size_t input_size,\r | |
667 | const uint8_t* input_buffer) {\r | |
668 | RingBuffer* ringbuffer_ = &s->ringbuffer_;\r | |
669 | MemoryManager* m = &s->memory_manager_;\r | |
670 | if (!EnsureInitialized(s)) return;\r | |
671 | RingBufferWrite(m, input_buffer, input_size, ringbuffer_);\r | |
672 | if (BROTLI_IS_OOM(m)) return;\r | |
673 | s->input_pos_ += input_size;\r | |
674 | \r | |
675 | /* TL;DR: If needed, initialize 7 more bytes in the ring buffer to make the\r | |
676 | hashing not depend on uninitialized data. This makes compression\r | |
677 | deterministic and it prevents uninitialized memory warnings in Valgrind.\r | |
678 | Even without erasing, the output would be valid (but nondeterministic).\r | |
679 | \r | |
680 | Background information: The compressor stores short (at most 8 bytes)\r | |
681 | substrings of the input already read in a hash table, and detects\r | |
682 | repetitions by looking up such substrings in the hash table. If it\r | |
683 | can find a substring, it checks whether the substring is really there\r | |
684 | in the ring buffer (or it's just a hash collision). Should the hash\r | |
685 | table become corrupt, this check makes sure that the output is\r | |
686 | still valid, albeit the compression ratio would be bad.\r | |
687 | \r | |
688 | The compressor populates the hash table from the ring buffer as it's\r | |
689 | reading new bytes from the input. However, at the last few indexes of\r | |
690 | the ring buffer, there are not enough bytes to build full-length\r | |
691 | substrings from. Since the hash table always contains full-length\r | |
692 | substrings, we erase with dummy 0s here to make sure that those\r | |
693 | substrings will contain 0s at the end instead of uninitialized\r | |
694 | data.\r | |
695 | \r | |
696 | Please note that erasing is not necessary (because the\r | |
697 | memory region is already initialized since he ring buffer\r | |
698 | has a `tail' that holds a copy of the beginning,) so we\r | |
699 | skip erasing if we have already gone around at least once in\r | |
700 | the ring buffer.\r | |
701 | \r | |
702 | Only clear during the first round of ringbuffer writes. On\r | |
703 | subsequent rounds data in the ringbuffer would be affected. */\r | |
704 | if (ringbuffer_->pos_ <= ringbuffer_->mask_) {\r | |
705 | /* This is the first time when the ring buffer is being written.\r | |
706 | We clear 7 bytes just after the bytes that have been copied from\r | |
707 | the input buffer.\r | |
708 | \r | |
709 | The ringbuffer has a "tail" that holds a copy of the beginning,\r | |
710 | but only once the ring buffer has been fully written once, i.e.,\r | |
711 | pos <= mask. For the first time, we need to write values\r | |
712 | in this tail (where index may be larger than mask), so that\r | |
713 | we have exactly defined behavior and don't read un-initialized\r | |
714 | memory. Due to performance reasons, hashing reads data using a\r | |
715 | LOAD64, which can go 7 bytes beyond the bytes written in the\r | |
716 | ringbuffer. */\r | |
717 | memset(ringbuffer_->buffer_ + ringbuffer_->pos_, 0, 7);\r | |
718 | }\r | |
719 | }\r | |
720 | \r | |
721 | void BrotliEncoderSetCustomDictionary(BrotliEncoderState* s, size_t size,\r | |
722 | const uint8_t* dict) {\r | |
723 | size_t max_dict_size = MaxBackwardLimit(s->params.lgwin);\r | |
724 | size_t dict_size = size;\r | |
725 | MemoryManager* m = &s->memory_manager_;\r | |
726 | \r | |
727 | if (!EnsureInitialized(s)) return;\r | |
728 | \r | |
729 | if (dict_size == 0 ||\r | |
730 | s->params.quality == FAST_ONE_PASS_COMPRESSION_QUALITY ||\r | |
731 | s->params.quality == FAST_TWO_PASS_COMPRESSION_QUALITY) {\r | |
732 | return;\r | |
733 | }\r | |
734 | if (size > max_dict_size) {\r | |
735 | dict += size - max_dict_size;\r | |
736 | dict_size = max_dict_size;\r | |
737 | }\r | |
738 | BrotliEncoderCopyInputToRingBuffer(s, dict_size, dict);\r | |
739 | s->last_flush_pos_ = dict_size;\r | |
740 | s->last_processed_pos_ = dict_size;\r | |
741 | if (dict_size > 0) {\r | |
742 | s->prev_byte_ = dict[dict_size - 1];\r | |
743 | }\r | |
744 | if (dict_size > 1) {\r | |
745 | s->prev_byte2_ = dict[dict_size - 2];\r | |
746 | }\r | |
747 | HashersPrependCustomDictionary(m, &s->hashers_, &s->params, dict_size, dict);\r | |
748 | if (BROTLI_IS_OOM(m)) return;\r | |
749 | }\r | |
750 | \r | |
751 | /* Marks all input as processed.\r | |
752 | Returns true if position wrapping occurs. */\r | |
753 | static BROTLI_BOOL UpdateLastProcessedPos(BrotliEncoderState* s) {\r | |
754 | uint32_t wrapped_last_processed_pos = WrapPosition(s->last_processed_pos_);\r | |
755 | uint32_t wrapped_input_pos = WrapPosition(s->input_pos_);\r | |
756 | s->last_processed_pos_ = s->input_pos_;\r | |
757 | return TO_BROTLI_BOOL(wrapped_input_pos < wrapped_last_processed_pos);\r | |
758 | }\r | |
759 | \r | |
760 | BROTLI_BOOL BrotliEncoderWriteData(\r | |
761 | BrotliEncoderState* s, const BROTLI_BOOL is_last,\r | |
762 | const BROTLI_BOOL force_flush, size_t* out_size, uint8_t** output) {\r | |
763 | const uint64_t delta = UnprocessedInputSize(s);\r | |
764 | const uint32_t bytes = (uint32_t)delta;\r | |
765 | const uint32_t wrapped_last_processed_pos =\r | |
766 | WrapPosition(s->last_processed_pos_);\r | |
767 | uint8_t* data;\r | |
768 | uint32_t mask;\r | |
769 | MemoryManager* m = &s->memory_manager_;\r | |
770 | \r | |
771 | if (!EnsureInitialized(s)) return BROTLI_FALSE;\r | |
772 | data = s->ringbuffer_.buffer_;\r | |
773 | mask = s->ringbuffer_.mask_;\r | |
774 | \r | |
775 | /* Adding more blocks after "last" block is forbidden. */\r | |
776 | if (s->is_last_block_emitted_) return BROTLI_FALSE;\r | |
777 | if (is_last) s->is_last_block_emitted_ = BROTLI_TRUE;\r | |
778 | \r | |
779 | if (delta > BrotliEncoderInputBlockSize(s)) {\r | |
780 | return BROTLI_FALSE;\r | |
781 | }\r | |
782 | if (s->params.quality == FAST_TWO_PASS_COMPRESSION_QUALITY &&\r | |
783 | !s->command_buf_) {\r | |
784 | s->command_buf_ =\r | |
785 | BROTLI_ALLOC(m, uint32_t, kCompressFragmentTwoPassBlockSize);\r | |
786 | s->literal_buf_ =\r | |
787 | BROTLI_ALLOC(m, uint8_t, kCompressFragmentTwoPassBlockSize);\r | |
788 | if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;\r | |
789 | }\r | |
790 | \r | |
791 | if (s->params.quality == FAST_ONE_PASS_COMPRESSION_QUALITY ||\r | |
792 | s->params.quality == FAST_TWO_PASS_COMPRESSION_QUALITY) {\r | |
793 | uint8_t* storage;\r | |
794 | size_t storage_ix = s->last_byte_bits_;\r | |
795 | size_t table_size;\r | |
796 | int* table;\r | |
797 | \r | |
798 | if (delta == 0 && !is_last) {\r | |
799 | /* We have no new input data and we don't have to finish the stream, so\r | |
800 | nothing to do. */\r | |
801 | *out_size = 0;\r | |
802 | return BROTLI_TRUE;\r | |
803 | }\r | |
804 | storage = GetBrotliStorage(s, 2 * bytes + 500);\r | |
805 | if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;\r | |
806 | storage[0] = s->last_byte_;\r | |
807 | table = GetHashTable(s, s->params.quality, bytes, &table_size);\r | |
808 | if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;\r | |
809 | if (s->params.quality == FAST_ONE_PASS_COMPRESSION_QUALITY) {\r | |
810 | BrotliCompressFragmentFast(\r | |
811 | m, &data[wrapped_last_processed_pos & mask],\r | |
812 | bytes, is_last,\r | |
813 | table, table_size,\r | |
814 | s->cmd_depths_, s->cmd_bits_,\r | |
815 | &s->cmd_code_numbits_, s->cmd_code_,\r | |
816 | &storage_ix, storage);\r | |
817 | if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;\r | |
818 | } else {\r | |
819 | BrotliCompressFragmentTwoPass(\r | |
820 | m, &data[wrapped_last_processed_pos & mask],\r | |
821 | bytes, is_last,\r | |
822 | s->command_buf_, s->literal_buf_,\r | |
823 | table, table_size,\r | |
824 | &storage_ix, storage);\r | |
825 | if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;\r | |
826 | }\r | |
827 | s->last_byte_ = storage[storage_ix >> 3];\r | |
828 | s->last_byte_bits_ = storage_ix & 7u;\r | |
829 | UpdateLastProcessedPos(s);\r | |
830 | *output = &storage[0];\r | |
831 | *out_size = storage_ix >> 3;\r | |
832 | return BROTLI_TRUE;\r | |
833 | }\r | |
834 | \r | |
835 | {\r | |
836 | /* Theoretical max number of commands is 1 per 2 bytes. */\r | |
837 | size_t newsize = s->num_commands_ + bytes / 2 + 1;\r | |
838 | if (newsize > s->cmd_alloc_size_) {\r | |
839 | Command* new_commands;\r | |
840 | /* Reserve a bit more memory to allow merging with a next block\r | |
841 | without realloc: that would impact speed. */\r | |
842 | newsize += (bytes / 4) + 16;\r | |
843 | s->cmd_alloc_size_ = newsize;\r | |
844 | new_commands = BROTLI_ALLOC(m, Command, newsize);\r | |
845 | if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;\r | |
846 | if (s->commands_) {\r | |
847 | memcpy(new_commands, s->commands_, sizeof(Command) * s->num_commands_);\r | |
848 | BROTLI_FREE(m, s->commands_);\r | |
849 | }\r | |
850 | s->commands_ = new_commands;\r | |
851 | }\r | |
852 | }\r | |
853 | \r | |
854 | BrotliCreateBackwardReferences(m, bytes, wrapped_last_processed_pos,\r | |
855 | is_last, data, mask,\r | |
856 | &s->params,\r | |
857 | &s->hashers_,\r | |
858 | s->dist_cache_,\r | |
859 | &s->last_insert_len_,\r | |
860 | &s->commands_[s->num_commands_],\r | |
861 | &s->num_commands_,\r | |
862 | &s->num_literals_);\r | |
863 | if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;\r | |
864 | \r | |
865 | {\r | |
866 | const size_t max_length = MaxMetablockSize(&s->params);\r | |
867 | const size_t max_literals = max_length / 8;\r | |
868 | const size_t max_commands = max_length / 8;\r | |
869 | const size_t processed_bytes = (size_t)(s->input_pos_ - s->last_flush_pos_);\r | |
870 | /* If maximal possible additional block doesn't fit metablock, flush now. */\r | |
871 | /* TODO: Postpone decision until next block arrives? */\r | |
872 | const BROTLI_BOOL next_input_fits_metablock = TO_BROTLI_BOOL(\r | |
873 | processed_bytes + BrotliEncoderInputBlockSize(s) <= max_length);\r | |
874 | /* If block splitting is not used, then flush as soon as there is some\r | |
875 | amount of commands / literals produced. */\r | |
876 | const BROTLI_BOOL should_flush = TO_BROTLI_BOOL(\r | |
877 | s->params.quality < MIN_QUALITY_FOR_BLOCK_SPLIT &&\r | |
878 | s->num_literals_ + s->num_commands_ >= MAX_NUM_DELAYED_SYMBOLS);\r | |
879 | if (!is_last && !force_flush && !should_flush &&\r | |
880 | next_input_fits_metablock &&\r | |
881 | s->num_literals_ < max_literals &&\r | |
882 | s->num_commands_ < max_commands) {\r | |
883 | /* Merge with next input block. Everything will happen later. */\r | |
884 | if (UpdateLastProcessedPos(s)) {\r | |
885 | HashersReset(&s->hashers_, ChooseHasher(&s->params));\r | |
886 | }\r | |
887 | *out_size = 0;\r | |
888 | return BROTLI_TRUE;\r | |
889 | }\r | |
890 | }\r | |
891 | \r | |
892 | /* Create the last insert-only command. */\r | |
893 | if (s->last_insert_len_ > 0) {\r | |
894 | InitInsertCommand(&s->commands_[s->num_commands_++], s->last_insert_len_);\r | |
895 | s->num_literals_ += s->last_insert_len_;\r | |
896 | s->last_insert_len_ = 0;\r | |
897 | }\r | |
898 | \r | |
899 | if (!is_last && s->input_pos_ == s->last_flush_pos_) {\r | |
900 | /* We have no new input data and we don't have to finish the stream, so\r | |
901 | nothing to do. */\r | |
902 | *out_size = 0;\r | |
903 | return BROTLI_TRUE;\r | |
904 | }\r | |
905 | assert(s->input_pos_ >= s->last_flush_pos_);\r | |
906 | assert(s->input_pos_ > s->last_flush_pos_ || is_last);\r | |
907 | assert(s->input_pos_ - s->last_flush_pos_ <= 1u << 24);\r | |
908 | {\r | |
909 | const uint32_t metablock_size =\r | |
910 | (uint32_t)(s->input_pos_ - s->last_flush_pos_);\r | |
911 | uint8_t* storage = GetBrotliStorage(s, 2 * metablock_size + 500);\r | |
912 | size_t storage_ix = s->last_byte_bits_;\r | |
913 | if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;\r | |
914 | storage[0] = s->last_byte_;\r | |
915 | WriteMetaBlockInternal(\r | |
916 | m, data, mask, s->last_flush_pos_, metablock_size, is_last,\r | |
917 | &s->params, s->prev_byte_, s->prev_byte2_,\r | |
918 | s->num_literals_, s->num_commands_, s->commands_, s->saved_dist_cache_,\r | |
919 | s->dist_cache_, &storage_ix, storage);\r | |
920 | if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;\r | |
921 | s->last_byte_ = storage[storage_ix >> 3];\r | |
922 | s->last_byte_bits_ = storage_ix & 7u;\r | |
923 | s->last_flush_pos_ = s->input_pos_;\r | |
924 | if (UpdateLastProcessedPos(s)) {\r | |
925 | HashersReset(&s->hashers_, ChooseHasher(&s->params));\r | |
926 | }\r | |
927 | if (s->last_flush_pos_ > 0) {\r | |
928 | s->prev_byte_ = data[((uint32_t)s->last_flush_pos_ - 1) & mask];\r | |
929 | }\r | |
930 | if (s->last_flush_pos_ > 1) {\r | |
931 | s->prev_byte2_ = data[(uint32_t)(s->last_flush_pos_ - 2) & mask];\r | |
932 | }\r | |
933 | s->num_commands_ = 0;\r | |
934 | s->num_literals_ = 0;\r | |
935 | /* Save the state of the distance cache in case we need to restore it for\r | |
936 | emitting an uncompressed block. */\r | |
937 | memcpy(s->saved_dist_cache_, s->dist_cache_, sizeof(s->dist_cache_));\r | |
938 | *output = &storage[0];\r | |
939 | *out_size = storage_ix >> 3;\r | |
940 | return BROTLI_TRUE;\r | |
941 | }\r | |
942 | }\r | |
943 | \r | |
944 | BROTLI_BOOL BrotliEncoderWriteMetaBlock(\r | |
945 | BrotliEncoderState* s, const size_t input_size, const uint8_t* input_buffer,\r | |
946 | const BROTLI_BOOL is_last, size_t* encoded_size, uint8_t* encoded_buffer) {\r | |
947 | size_t out_size = 0;\r | |
948 | uint8_t* output;\r | |
949 | int result;\r | |
950 | if (!EnsureInitialized(s)) return BROTLI_FALSE;\r | |
951 | BrotliEncoderCopyInputToRingBuffer(s, input_size, input_buffer);\r | |
952 | result = BrotliEncoderWriteData(\r | |
953 | s, is_last, /* force_flush */ BROTLI_TRUE, &out_size, &output);\r | |
954 | if (!result || out_size > *encoded_size) {\r | |
955 | return BROTLI_FALSE;\r | |
956 | }\r | |
957 | if (out_size > 0) {\r | |
958 | memcpy(encoded_buffer, output, out_size);\r | |
959 | }\r | |
960 | *encoded_size = out_size;\r | |
961 | return BROTLI_TRUE;\r | |
962 | }\r | |
963 | \r | |
964 | BROTLI_BOOL BrotliEncoderWriteMetadata(\r | |
965 | BrotliEncoderState* s, const size_t input_size, const uint8_t* input_buffer,\r | |
966 | const BROTLI_BOOL is_last, size_t* encoded_size, uint8_t* encoded_buffer) {\r | |
967 | uint64_t hdr_buffer_data[2];\r | |
968 | uint8_t* hdr_buffer = (uint8_t*)&hdr_buffer_data[0];\r | |
969 | size_t storage_ix;\r | |
970 | if (!EnsureInitialized(s)) return BROTLI_FALSE;\r | |
971 | if (input_size > (1 << 24) || input_size + 6 > *encoded_size) {\r | |
972 | return BROTLI_FALSE;\r | |
973 | }\r | |
974 | storage_ix = s->last_byte_bits_;\r | |
975 | hdr_buffer[0] = s->last_byte_;\r | |
976 | BrotliWriteBits(1, 0, &storage_ix, hdr_buffer);\r | |
977 | BrotliWriteBits(2, 3, &storage_ix, hdr_buffer);\r | |
978 | BrotliWriteBits(1, 0, &storage_ix, hdr_buffer);\r | |
979 | if (input_size == 0) {\r | |
980 | BrotliWriteBits(2, 0, &storage_ix, hdr_buffer);\r | |
981 | *encoded_size = (storage_ix + 7u) >> 3;\r | |
982 | memcpy(encoded_buffer, hdr_buffer, *encoded_size);\r | |
983 | } else {\r | |
984 | uint32_t nbits = (input_size == 1) ? 0 :\r | |
985 | (Log2FloorNonZero((uint32_t)input_size - 1) + 1);\r | |
986 | uint32_t nbytes = (nbits + 7) / 8;\r | |
987 | size_t hdr_size;\r | |
988 | BrotliWriteBits(2, nbytes, &storage_ix, hdr_buffer);\r | |
989 | BrotliWriteBits(8 * nbytes, input_size - 1, &storage_ix, hdr_buffer);\r | |
990 | hdr_size = (storage_ix + 7u) >> 3;\r | |
991 | memcpy(encoded_buffer, hdr_buffer, hdr_size);\r | |
992 | memcpy(&encoded_buffer[hdr_size], input_buffer, input_size);\r | |
993 | *encoded_size = hdr_size + input_size;\r | |
994 | }\r | |
995 | if (is_last) {\r | |
996 | encoded_buffer[(*encoded_size)++] = 3;\r | |
997 | }\r | |
998 | s->last_byte_ = 0;\r | |
999 | s->last_byte_bits_ = 0;\r | |
1000 | return BROTLI_TRUE;\r | |
1001 | }\r | |
1002 | \r | |
1003 | BROTLI_BOOL BrotliEncoderFinishStream(\r | |
1004 | BrotliEncoderState* s, size_t* encoded_size, uint8_t* encoded_buffer) {\r | |
1005 | if (!EnsureInitialized(s)) return BROTLI_FALSE;\r | |
1006 | return BrotliEncoderWriteMetaBlock(\r | |
1007 | s, 0, NULL, 1, encoded_size, encoded_buffer);\r | |
1008 | }\r | |
1009 | \r | |
1010 | static BROTLI_BOOL BrotliCompressBufferQuality10(\r | |
1011 | int lgwin, size_t input_size, const uint8_t* input_buffer,\r | |
1012 | size_t* encoded_size, uint8_t* encoded_buffer) {\r | |
1013 | MemoryManager memory_manager;\r | |
1014 | MemoryManager* m = &memory_manager;\r | |
1015 | \r | |
1016 | const size_t mask = BROTLI_SIZE_MAX >> 1;\r | |
1017 | const size_t max_backward_limit = MaxBackwardLimit(lgwin);\r | |
1018 | int dist_cache[4] = { 4, 11, 15, 16 };\r | |
1019 | int saved_dist_cache[4] = { 4, 11, 15, 16 };\r | |
1020 | BROTLI_BOOL ok = BROTLI_TRUE;\r | |
1021 | const size_t max_out_size = *encoded_size;\r | |
1022 | size_t total_out_size = 0;\r | |
1023 | uint8_t last_byte;\r | |
1024 | uint8_t last_byte_bits;\r | |
1025 | H10* hasher;\r | |
1026 | \r | |
1027 | const size_t hasher_eff_size =\r | |
1028 | BROTLI_MIN(size_t, input_size, max_backward_limit + 16);\r | |
1029 | \r | |
1030 | BrotliEncoderParams params;\r | |
1031 | \r | |
1032 | const int lgmetablock = BROTLI_MIN(int, 24, lgwin + 1);\r | |
1033 | size_t max_block_size;\r | |
1034 | const size_t max_metablock_size = (size_t)1 << lgmetablock;\r | |
1035 | const size_t max_literals_per_metablock = max_metablock_size / 8;\r | |
1036 | const size_t max_commands_per_metablock = max_metablock_size / 8;\r | |
1037 | size_t metablock_start = 0;\r | |
1038 | uint8_t prev_byte = 0;\r | |
1039 | uint8_t prev_byte2 = 0;\r | |
1040 | \r | |
1041 | params.mode = BROTLI_DEFAULT_MODE;\r | |
1042 | params.quality = 10;\r | |
1043 | params.lgwin = lgwin;\r | |
1044 | params.lgblock = 0;\r | |
1045 | SanitizeParams(¶ms);\r | |
1046 | params.lgblock = ComputeLgBlock(¶ms);\r | |
1047 | max_block_size = (size_t)1 << params.lgblock;\r | |
1048 | \r | |
1049 | BrotliInitMemoryManager(m, 0, 0, 0);\r | |
1050 | \r | |
1051 | assert(input_size <= mask + 1);\r | |
1052 | EncodeWindowBits(lgwin, &last_byte, &last_byte_bits);\r | |
1053 | hasher = BROTLI_ALLOC(m, H10, 1);\r | |
1054 | if (BROTLI_IS_OOM(m)) goto oom;\r | |
1055 | InitializeH10(hasher);\r | |
1056 | InitH10(m, hasher, input_buffer, ¶ms, 0, hasher_eff_size, 1);\r | |
1057 | if (BROTLI_IS_OOM(m)) goto oom;\r | |
1058 | \r | |
1059 | while (ok && metablock_start < input_size) {\r | |
1060 | const size_t metablock_end =\r | |
1061 | BROTLI_MIN(size_t, input_size, metablock_start + max_metablock_size);\r | |
1062 | const size_t expected_num_commands =\r | |
1063 | (metablock_end - metablock_start) / 12 + 16;\r | |
1064 | Command* commands = 0;\r | |
1065 | size_t num_commands = 0;\r | |
1066 | size_t last_insert_len = 0;\r | |
1067 | size_t num_literals = 0;\r | |
1068 | size_t metablock_size = 0;\r | |
1069 | size_t cmd_alloc_size = 0;\r | |
1070 | BROTLI_BOOL is_last;\r | |
1071 | uint8_t* storage;\r | |
1072 | size_t storage_ix;\r | |
1073 | \r | |
1074 | size_t block_start;\r | |
1075 | for (block_start = metablock_start; block_start < metablock_end; ) {\r | |
1076 | size_t block_size =\r | |
1077 | BROTLI_MIN(size_t, metablock_end - block_start, max_block_size);\r | |
1078 | ZopfliNode* nodes = BROTLI_ALLOC(m, ZopfliNode, block_size + 1);\r | |
1079 | size_t path_size;\r | |
1080 | size_t new_cmd_alloc_size;\r | |
1081 | if (BROTLI_IS_OOM(m)) goto oom;\r | |
1082 | BrotliInitZopfliNodes(nodes, block_size + 1);\r | |
1083 | StitchToPreviousBlockH10(hasher, block_size, block_start,\r | |
1084 | input_buffer, mask);\r | |
1085 | path_size = BrotliZopfliComputeShortestPath(\r | |
1086 | m, block_size, block_start, input_buffer, mask, ¶ms,\r | |
1087 | max_backward_limit, dist_cache, hasher, nodes);\r | |
1088 | if (BROTLI_IS_OOM(m)) goto oom;\r | |
1089 | /* We allocate a command buffer in the first iteration of this loop that\r | |
1090 | will be likely big enough for the whole metablock, so that for most\r | |
1091 | inputs we will not have to reallocate in later iterations. We do the\r | |
1092 | allocation here and not before the loop, because if the input is small,\r | |
1093 | this will be allocated after the zopfli cost model is freed, so this\r | |
1094 | will not increase peak memory usage.\r | |
1095 | TODO: If the first allocation is too small, increase command\r | |
1096 | buffer size exponentially. */\r | |
1097 | new_cmd_alloc_size = BROTLI_MAX(size_t, expected_num_commands,\r | |
1098 | num_commands + path_size + 1);\r | |
1099 | if (cmd_alloc_size != new_cmd_alloc_size) {\r | |
1100 | Command* new_commands = BROTLI_ALLOC(m, Command, new_cmd_alloc_size);\r | |
1101 | if (BROTLI_IS_OOM(m)) goto oom;\r | |
1102 | cmd_alloc_size = new_cmd_alloc_size;\r | |
1103 | if (commands) {\r | |
1104 | memcpy(new_commands, commands, sizeof(Command) * num_commands);\r | |
1105 | BROTLI_FREE(m, commands);\r | |
1106 | }\r | |
1107 | commands = new_commands;\r | |
1108 | }\r | |
1109 | BrotliZopfliCreateCommands(block_size, block_start, max_backward_limit,\r | |
1110 | &nodes[0], dist_cache, &last_insert_len,\r | |
1111 | &commands[num_commands], &num_literals);\r | |
1112 | num_commands += path_size;\r | |
1113 | block_start += block_size;\r | |
1114 | metablock_size += block_size;\r | |
1115 | BROTLI_FREE(m, nodes);\r | |
1116 | if (num_literals > max_literals_per_metablock ||\r | |
1117 | num_commands > max_commands_per_metablock) {\r | |
1118 | break;\r | |
1119 | }\r | |
1120 | }\r | |
1121 | \r | |
1122 | if (last_insert_len > 0) {\r | |
1123 | InitInsertCommand(&commands[num_commands++], last_insert_len);\r | |
1124 | num_literals += last_insert_len;\r | |
1125 | }\r | |
1126 | \r | |
1127 | is_last = TO_BROTLI_BOOL(metablock_start + metablock_size == input_size);\r | |
1128 | storage = NULL;\r | |
1129 | storage_ix = last_byte_bits;\r | |
1130 | \r | |
1131 | if (metablock_size == 0) {\r | |
1132 | /* Write the ISLAST and ISEMPTY bits. */\r | |
1133 | storage = BROTLI_ALLOC(m, uint8_t, 16);\r | |
1134 | if (BROTLI_IS_OOM(m)) goto oom;\r | |
1135 | storage[0] = last_byte;\r | |
1136 | BrotliWriteBits(2, 3, &storage_ix, storage);\r | |
1137 | storage_ix = (storage_ix + 7u) & ~7u;\r | |
1138 | } else if (!ShouldCompress(input_buffer, mask, metablock_start,\r | |
1139 | metablock_size, num_literals, num_commands)) {\r | |
1140 | /* Restore the distance cache, as its last update by\r | |
1141 | CreateBackwardReferences is now unused. */\r | |
1142 | memcpy(dist_cache, saved_dist_cache, 4 * sizeof(dist_cache[0]));\r | |
1143 | storage = BROTLI_ALLOC(m, uint8_t, metablock_size + 16);\r | |
1144 | if (BROTLI_IS_OOM(m)) goto oom;\r | |
1145 | storage[0] = last_byte;\r | |
1146 | BrotliStoreUncompressedMetaBlock(is_last, input_buffer,\r | |
1147 | metablock_start, mask, metablock_size,\r | |
1148 | &storage_ix, storage);\r | |
1149 | } else {\r | |
1150 | uint32_t num_direct_distance_codes = 0;\r | |
1151 | uint32_t distance_postfix_bits = 0;\r | |
1152 | ContextType literal_context_mode = CONTEXT_UTF8;\r | |
1153 | MetaBlockSplit mb;\r | |
1154 | InitMetaBlockSplit(&mb);\r | |
1155 | if (!BrotliIsMostlyUTF8(input_buffer, metablock_start, mask,\r | |
1156 | metablock_size, kMinUTF8Ratio)) {\r | |
1157 | literal_context_mode = CONTEXT_SIGNED;\r | |
1158 | }\r | |
1159 | BrotliBuildMetaBlock(m, input_buffer, metablock_start, mask, ¶ms,\r | |
1160 | prev_byte, prev_byte2,\r | |
1161 | commands, num_commands,\r | |
1162 | literal_context_mode,\r | |
1163 | &mb);\r | |
1164 | if (BROTLI_IS_OOM(m)) goto oom;\r | |
1165 | BrotliOptimizeHistograms(num_direct_distance_codes,\r | |
1166 | distance_postfix_bits,\r | |
1167 | &mb);\r | |
1168 | storage = BROTLI_ALLOC(m, uint8_t, 2 * metablock_size + 500);\r | |
1169 | if (BROTLI_IS_OOM(m)) goto oom;\r | |
1170 | storage[0] = last_byte;\r | |
1171 | BrotliStoreMetaBlock(m, input_buffer, metablock_start, metablock_size,\r | |
1172 | mask, prev_byte, prev_byte2,\r | |
1173 | is_last,\r | |
1174 | num_direct_distance_codes,\r | |
1175 | distance_postfix_bits,\r | |
1176 | literal_context_mode,\r | |
1177 | commands, num_commands,\r | |
1178 | &mb,\r | |
1179 | &storage_ix, storage);\r | |
1180 | if (BROTLI_IS_OOM(m)) goto oom;\r | |
1181 | if (metablock_size + 4 < (storage_ix >> 3)) {\r | |
1182 | /* Restore the distance cache and last byte. */\r | |
1183 | memcpy(dist_cache, saved_dist_cache, 4 * sizeof(dist_cache[0]));\r | |
1184 | storage[0] = last_byte;\r | |
1185 | storage_ix = last_byte_bits;\r | |
1186 | BrotliStoreUncompressedMetaBlock(is_last, input_buffer,\r | |
1187 | metablock_start, mask,\r | |
1188 | metablock_size, &storage_ix, storage);\r | |
1189 | }\r | |
1190 | DestroyMetaBlockSplit(m, &mb);\r | |
1191 | }\r | |
1192 | last_byte = storage[storage_ix >> 3];\r | |
1193 | last_byte_bits = storage_ix & 7u;\r | |
1194 | metablock_start += metablock_size;\r | |
1195 | prev_byte = input_buffer[metablock_start - 1];\r | |
1196 | prev_byte2 = input_buffer[metablock_start - 2];\r | |
1197 | /* Save the state of the distance cache in case we need to restore it for\r | |
1198 | emitting an uncompressed block. */\r | |
1199 | memcpy(saved_dist_cache, dist_cache, 4 * sizeof(dist_cache[0]));\r | |
1200 | \r | |
1201 | {\r | |
1202 | const size_t out_size = storage_ix >> 3;\r | |
1203 | total_out_size += out_size;\r | |
1204 | if (total_out_size <= max_out_size) {\r | |
1205 | memcpy(encoded_buffer, storage, out_size);\r | |
1206 | encoded_buffer += out_size;\r | |
1207 | } else {\r | |
1208 | ok = BROTLI_FALSE;\r | |
1209 | }\r | |
1210 | }\r | |
1211 | BROTLI_FREE(m, storage);\r | |
1212 | BROTLI_FREE(m, commands);\r | |
1213 | }\r | |
1214 | \r | |
1215 | *encoded_size = total_out_size;\r | |
1216 | CleanupH10(m, hasher);\r | |
1217 | BROTLI_FREE(m, hasher);\r | |
1218 | return ok;\r | |
1219 | \r | |
1220 | oom:\r | |
1221 | BrotliWipeOutMemoryManager(m);\r | |
1222 | return BROTLI_FALSE;\r | |
1223 | }\r | |
1224 | \r | |
1225 | size_t BrotliEncoderMaxCompressedSize(size_t input_size) {\r | |
1226 | /* [window bits / empty metadata] + N * [uncompressed] + [last empty] */\r | |
1227 | size_t num_large_blocks = input_size >> 24;\r | |
1228 | size_t tail = input_size - (num_large_blocks << 24);\r | |
1229 | size_t tail_overhead = (tail > (1 << 20)) ? 4 : 3;\r | |
1230 | size_t overhead = 2 + (4 * num_large_blocks) + tail_overhead + 1;\r | |
1231 | size_t result = input_size + overhead;\r | |
1232 | if (input_size == 0) return 1;\r | |
1233 | return (result < input_size) ? 0 : result;\r | |
1234 | }\r | |
1235 | \r | |
1236 | /* Wraps data to uncompressed brotli stream with minimal window size.\r | |
1237 | |output| should point at region with at least BrotliEncoderMaxCompressedSize\r | |
1238 | addressable bytes.\r | |
1239 | Returns the length of stream. */\r | |
1240 | static size_t MakeUncompressedStream(\r | |
1241 | const uint8_t* input, size_t input_size, uint8_t* output) {\r | |
1242 | size_t size = input_size;\r | |
1243 | size_t result = 0;\r | |
1244 | size_t offset = 0;\r | |
1245 | if (input_size == 0) {\r | |
1246 | output[0] = 6;\r | |
1247 | return 1;\r | |
1248 | }\r | |
1249 | output[result++] = 0x21; /* window bits = 10, is_last = false */\r | |
1250 | output[result++] = 0x03; /* empty metadata, padding */\r | |
1251 | while (size > 0) {\r | |
1252 | uint32_t nibbles = 0;\r | |
1253 | uint32_t chunk_size;\r | |
1254 | uint32_t bits;\r | |
1255 | chunk_size = (size > (1u << 24)) ? (1u << 24) : (uint32_t)size;\r | |
1256 | if (chunk_size > (1u << 16)) nibbles = (chunk_size > (1u << 20)) ? 2 : 1;\r | |
1257 | bits =\r | |
1258 | (nibbles << 1) | ((chunk_size - 1) << 3) | (1u << (19 + 4 * nibbles));\r | |
1259 | output[result++] = (uint8_t)bits;\r | |
1260 | output[result++] = (uint8_t)(bits >> 8);\r | |
1261 | output[result++] = (uint8_t)(bits >> 16);\r | |
1262 | if (nibbles == 2) output[result++] = (uint8_t)(bits >> 24);\r | |
1263 | memcpy(&output[result], &input[offset], chunk_size);\r | |
1264 | result += chunk_size;\r | |
1265 | offset += chunk_size;\r | |
1266 | size -= chunk_size;\r | |
1267 | }\r | |
1268 | output[result++] = 3;\r | |
1269 | return result;\r | |
1270 | }\r | |
1271 | \r | |
1272 | BROTLI_BOOL BrotliEncoderCompress(\r | |
1273 | int quality, int lgwin, BrotliEncoderMode mode, size_t input_size,\r | |
1274 | const uint8_t* input_buffer, size_t* encoded_size,\r | |
1275 | uint8_t* encoded_buffer) {\r | |
1276 | BrotliEncoderState* s;\r | |
1277 | size_t out_size = *encoded_size;\r | |
1278 | const uint8_t* input_start = input_buffer;\r | |
1279 | uint8_t* output_start = encoded_buffer;\r | |
1280 | size_t max_out_size = BrotliEncoderMaxCompressedSize(input_size);\r | |
1281 | if (out_size == 0) {\r | |
1282 | /* Output buffer needs at least one byte. */\r | |
1283 | return BROTLI_FALSE;\r | |
1284 | }\r | |
1285 | if (input_size == 0) {\r | |
1286 | /* Handle the special case of empty input. */\r | |
1287 | *encoded_size = 1;\r | |
1288 | *encoded_buffer = 6;\r | |
1289 | return BROTLI_TRUE;\r | |
1290 | }\r | |
1291 | if (quality == 10) {\r | |
1292 | /* TODO: Implement this direct path for all quality levels. */\r | |
1293 | const int lg_win = BROTLI_MIN(int, 24, BROTLI_MAX(int, 16, lgwin));\r | |
1294 | int ok = BrotliCompressBufferQuality10(lg_win, input_size, input_buffer,\r | |
1295 | encoded_size, encoded_buffer);\r | |
1296 | if (!ok || (max_out_size && *encoded_size > max_out_size)) {\r | |
1297 | goto fallback;\r | |
1298 | }\r | |
1299 | return BROTLI_TRUE;\r | |
1300 | }\r | |
1301 | \r | |
1302 | s = BrotliEncoderCreateInstance(0, 0, 0);\r | |
1303 | if (!s) {\r | |
1304 | return BROTLI_FALSE;\r | |
1305 | } else {\r | |
1306 | size_t available_in = input_size;\r | |
1307 | const uint8_t* next_in = input_buffer;\r | |
1308 | size_t available_out = *encoded_size;\r | |
1309 | uint8_t* next_out = encoded_buffer;\r | |
1310 | size_t total_out = 0;\r | |
1311 | BROTLI_BOOL result = BROTLI_FALSE;\r | |
1312 | BrotliEncoderSetParameter(s, BROTLI_PARAM_QUALITY, (uint32_t)quality);\r | |
1313 | BrotliEncoderSetParameter(s, BROTLI_PARAM_LGWIN, (uint32_t)lgwin);\r | |
1314 | BrotliEncoderSetParameter(s, BROTLI_PARAM_MODE, (uint32_t)mode);\r | |
1315 | result = BrotliEncoderCompressStream(s, BROTLI_OPERATION_FINISH,\r | |
1316 | &available_in, &next_in, &available_out, &next_out, &total_out);\r | |
1317 | if (!BrotliEncoderIsFinished(s)) result = 0;\r | |
1318 | *encoded_size = total_out;\r | |
1319 | BrotliEncoderDestroyInstance(s);\r | |
1320 | if (!result || (max_out_size && *encoded_size > max_out_size)) {\r | |
1321 | goto fallback;\r | |
1322 | }\r | |
1323 | return BROTLI_TRUE;\r | |
1324 | }\r | |
1325 | fallback:\r | |
1326 | *encoded_size = 0;\r | |
1327 | if (!max_out_size) return BROTLI_FALSE;\r | |
1328 | if (out_size >= max_out_size) {\r | |
1329 | *encoded_size =\r | |
1330 | MakeUncompressedStream(input_start, input_size, output_start);\r | |
1331 | return BROTLI_TRUE;\r | |
1332 | }\r | |
1333 | return BROTLI_FALSE;\r | |
1334 | }\r | |
1335 | \r | |
1336 | static void InjectBytePaddingBlock(BrotliEncoderState* s) {\r | |
1337 | uint32_t seal = s->last_byte_;\r | |
1338 | size_t seal_bits = s->last_byte_bits_;\r | |
1339 | s->last_byte_ = 0;\r | |
1340 | s->last_byte_bits_ = 0;\r | |
1341 | /* is_last = 0, data_nibbles = 11, reseved = 0, meta_nibbles = 00 */\r | |
1342 | seal |= 0x6u << seal_bits;\r | |
1343 | seal_bits += 6;\r | |
1344 | s->flush_buf_[0] = (uint8_t)seal;\r | |
1345 | if (seal_bits > 8) s->flush_buf_[1] = (uint8_t)(seal >> 8);\r | |
1346 | s->next_out_ = s->flush_buf_;\r | |
1347 | s->available_out_ = (seal_bits + 7) >> 3;\r | |
1348 | }\r | |
1349 | \r | |
1350 | static BROTLI_BOOL BrotliEncoderCompressStreamFast(\r | |
1351 | BrotliEncoderState* s, BrotliEncoderOperation op, size_t* available_in,\r | |
1352 | const uint8_t** next_in, size_t* available_out, uint8_t** next_out,\r | |
1353 | size_t* total_out) {\r | |
1354 | const size_t block_size_limit = (size_t)1 << s->params.lgwin;\r | |
1355 | const size_t buf_size = BROTLI_MIN(size_t, kCompressFragmentTwoPassBlockSize,\r | |
1356 | BROTLI_MIN(size_t, *available_in, block_size_limit));\r | |
1357 | uint32_t* tmp_command_buf = NULL;\r | |
1358 | uint32_t* command_buf = NULL;\r | |
1359 | uint8_t* tmp_literal_buf = NULL;\r | |
1360 | uint8_t* literal_buf = NULL;\r | |
1361 | MemoryManager* m = &s->memory_manager_;\r | |
1362 | if (s->params.quality != FAST_ONE_PASS_COMPRESSION_QUALITY &&\r | |
1363 | s->params.quality != FAST_TWO_PASS_COMPRESSION_QUALITY) {\r | |
1364 | return BROTLI_FALSE;\r | |
1365 | }\r | |
1366 | if (s->params.quality == FAST_TWO_PASS_COMPRESSION_QUALITY) {\r | |
1367 | if (!s->command_buf_ && buf_size == kCompressFragmentTwoPassBlockSize) {\r | |
1368 | s->command_buf_ =\r | |
1369 | BROTLI_ALLOC(m, uint32_t, kCompressFragmentTwoPassBlockSize);\r | |
1370 | s->literal_buf_ =\r | |
1371 | BROTLI_ALLOC(m, uint8_t, kCompressFragmentTwoPassBlockSize);\r | |
1372 | if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;\r | |
1373 | }\r | |
1374 | if (s->command_buf_) {\r | |
1375 | command_buf = s->command_buf_;\r | |
1376 | literal_buf = s->literal_buf_;\r | |
1377 | } else {\r | |
1378 | tmp_command_buf = BROTLI_ALLOC(m, uint32_t, buf_size);\r | |
1379 | tmp_literal_buf = BROTLI_ALLOC(m, uint8_t, buf_size);\r | |
1380 | if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;\r | |
1381 | command_buf = tmp_command_buf;\r | |
1382 | literal_buf = tmp_literal_buf;\r | |
1383 | }\r | |
1384 | }\r | |
1385 | \r | |
1386 | while (BROTLI_TRUE) {\r | |
1387 | if (s->available_out_ == 0 &&\r | |
1388 | s->stream_state_ == BROTLI_STREAM_FLUSH_REQUESTED) {\r | |
1389 | s->stream_state_ = BROTLI_STREAM_PROCESSING;\r | |
1390 | if (s->last_byte_bits_ == 0) break;\r | |
1391 | InjectBytePaddingBlock(s);\r | |
1392 | continue;\r | |
1393 | }\r | |
1394 | \r | |
1395 | if (s->available_out_ != 0 && *available_out != 0) {\r | |
1396 | size_t copy_output_size =\r | |
1397 | BROTLI_MIN(size_t, s->available_out_, *available_out);\r | |
1398 | memcpy(*next_out, s->next_out_, copy_output_size);\r | |
1399 | *next_out += copy_output_size;\r | |
1400 | *available_out -= copy_output_size;\r | |
1401 | s->next_out_ += copy_output_size;\r | |
1402 | s->available_out_ -= copy_output_size;\r | |
1403 | s->total_out_ += copy_output_size;\r | |
1404 | if (total_out) *total_out = s->total_out_;\r | |
1405 | continue;\r | |
1406 | }\r | |
1407 | \r | |
1408 | /* Compress block only when internal output buffer is empty, stream is not\r | |
1409 | finished, there is no pending flush request, and there is either\r | |
1410 | additional input or pending operation. */\r | |
1411 | if (s->available_out_ == 0 &&\r | |
1412 | s->stream_state_ == BROTLI_STREAM_PROCESSING &&\r | |
1413 | (*available_in != 0 || op != BROTLI_OPERATION_PROCESS)) {\r | |
1414 | size_t block_size = BROTLI_MIN(size_t, block_size_limit, *available_in);\r | |
1415 | BROTLI_BOOL is_last =\r | |
1416 | (*available_in == block_size) && (op == BROTLI_OPERATION_FINISH);\r | |
1417 | BROTLI_BOOL force_flush =\r | |
1418 | (*available_in == block_size) && (op == BROTLI_OPERATION_FLUSH);\r | |
1419 | size_t max_out_size = 2 * block_size + 500;\r | |
1420 | BROTLI_BOOL inplace = BROTLI_TRUE;\r | |
1421 | uint8_t* storage = NULL;\r | |
1422 | size_t storage_ix = s->last_byte_bits_;\r | |
1423 | size_t table_size;\r | |
1424 | int* table;\r | |
1425 | \r | |
1426 | if (force_flush && block_size == 0) {\r | |
1427 | s->stream_state_ = BROTLI_STREAM_FLUSH_REQUESTED;\r | |
1428 | continue;\r | |
1429 | }\r | |
1430 | if (max_out_size <= *available_out) {\r | |
1431 | storage = *next_out;\r | |
1432 | } else {\r | |
1433 | inplace = 0;\r | |
1434 | storage = GetBrotliStorage(s, max_out_size);\r | |
1435 | if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;\r | |
1436 | }\r | |
1437 | storage[0] = s->last_byte_;\r | |
1438 | table = GetHashTable(s, s->params.quality, block_size, &table_size);\r | |
1439 | if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;\r | |
1440 | \r | |
1441 | if (s->params.quality == FAST_ONE_PASS_COMPRESSION_QUALITY) {\r | |
1442 | BrotliCompressFragmentFast(m, *next_in, block_size, is_last, table,\r | |
1443 | table_size, s->cmd_depths_, s->cmd_bits_, &s->cmd_code_numbits_,\r | |
1444 | s->cmd_code_, &storage_ix, storage);\r | |
1445 | if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;\r | |
1446 | } else {\r | |
1447 | BrotliCompressFragmentTwoPass(m, *next_in, block_size, is_last,\r | |
1448 | command_buf, literal_buf, table, table_size,\r | |
1449 | &storage_ix, storage);\r | |
1450 | if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;\r | |
1451 | }\r | |
1452 | *next_in += block_size;\r | |
1453 | *available_in -= block_size;\r | |
1454 | if (inplace) {\r | |
1455 | size_t out_bytes = storage_ix >> 3;\r | |
1456 | assert(out_bytes <= *available_out);\r | |
1457 | assert((storage_ix & 7) == 0 || out_bytes < *available_out);\r | |
1458 | *next_out += out_bytes;\r | |
1459 | *available_out -= out_bytes;\r | |
1460 | s->total_out_ += out_bytes;\r | |
1461 | if (total_out) *total_out = s->total_out_;\r | |
1462 | } else {\r | |
1463 | size_t out_bytes = storage_ix >> 3;\r | |
1464 | s->next_out_ = storage;\r | |
1465 | s->available_out_ = out_bytes;\r | |
1466 | }\r | |
1467 | s->last_byte_ = storage[storage_ix >> 3];\r | |
1468 | s->last_byte_bits_ = storage_ix & 7u;\r | |
1469 | \r | |
1470 | if (force_flush) s->stream_state_ = BROTLI_STREAM_FLUSH_REQUESTED;\r | |
1471 | if (is_last) s->stream_state_ = BROTLI_STREAM_FINISHED;\r | |
1472 | continue;\r | |
1473 | }\r | |
1474 | break;\r | |
1475 | }\r | |
1476 | BROTLI_FREE(m, tmp_command_buf);\r | |
1477 | BROTLI_FREE(m, tmp_literal_buf);\r | |
1478 | return BROTLI_TRUE;\r | |
1479 | }\r | |
1480 | \r | |
1481 | BROTLI_BOOL BrotliEncoderCompressStream(\r | |
1482 | BrotliEncoderState* s, BrotliEncoderOperation op, size_t* available_in,\r | |
1483 | const uint8_t** next_in, size_t* available_out,uint8_t** next_out,\r | |
1484 | size_t* total_out) {\r | |
1485 | if (!EnsureInitialized(s)) return BROTLI_FALSE;\r | |
1486 | \r | |
1487 | if (s->stream_state_ != BROTLI_STREAM_PROCESSING && *available_in != 0) {\r | |
1488 | return BROTLI_FALSE;\r | |
1489 | }\r | |
1490 | if (s->params.quality == FAST_ONE_PASS_COMPRESSION_QUALITY ||\r | |
1491 | s->params.quality == FAST_TWO_PASS_COMPRESSION_QUALITY) {\r | |
1492 | return BrotliEncoderCompressStreamFast(s, op, available_in, next_in,\r | |
1493 | available_out, next_out, total_out);\r | |
1494 | }\r | |
1495 | while (BROTLI_TRUE) {\r | |
1496 | size_t remaining_block_size = RemainingInputBlockSize(s);\r | |
1497 | \r | |
1498 | if (remaining_block_size != 0 && *available_in != 0) {\r | |
1499 | size_t copy_input_size =\r | |
1500 | BROTLI_MIN(size_t, remaining_block_size, *available_in);\r | |
1501 | BrotliEncoderCopyInputToRingBuffer(s, copy_input_size, *next_in);\r | |
1502 | *next_in += copy_input_size;\r | |
1503 | *available_in -= copy_input_size;\r | |
1504 | continue;\r | |
1505 | }\r | |
1506 | \r | |
1507 | if (s->available_out_ == 0 &&\r | |
1508 | s->stream_state_ == BROTLI_STREAM_FLUSH_REQUESTED) {\r | |
1509 | s->stream_state_ = BROTLI_STREAM_PROCESSING;\r | |
1510 | if (s->last_byte_bits_ == 0) break;\r | |
1511 | InjectBytePaddingBlock(s);\r | |
1512 | continue;\r | |
1513 | }\r | |
1514 | \r | |
1515 | if (s->available_out_ != 0 && *available_out != 0) {\r | |
1516 | size_t copy_output_size =\r | |
1517 | BROTLI_MIN(size_t, s->available_out_, *available_out);\r | |
1518 | memcpy(*next_out, s->next_out_, copy_output_size);\r | |
1519 | *next_out += copy_output_size;\r | |
1520 | *available_out -= copy_output_size;\r | |
1521 | s->next_out_ += copy_output_size;\r | |
1522 | s->available_out_ -= copy_output_size;\r | |
1523 | s->total_out_ += copy_output_size;\r | |
1524 | if (total_out) *total_out = s->total_out_;\r | |
1525 | continue;\r | |
1526 | }\r | |
1527 | \r | |
1528 | /* Compress data only when internal outpuf buffer is empty, stream is not\r | |
1529 | finished and there is no pending flush request. */\r | |
1530 | if (s->available_out_ == 0 &&\r | |
1531 | s->stream_state_ == BROTLI_STREAM_PROCESSING) {\r | |
1532 | if (remaining_block_size == 0 || op != BROTLI_OPERATION_PROCESS) {\r | |
1533 | BROTLI_BOOL is_last = TO_BROTLI_BOOL(\r | |
1534 | (*available_in == 0) && op == BROTLI_OPERATION_FINISH);\r | |
1535 | BROTLI_BOOL force_flush = TO_BROTLI_BOOL(\r | |
1536 | (*available_in == 0) && op == BROTLI_OPERATION_FLUSH);\r | |
1537 | BROTLI_BOOL result = BrotliEncoderWriteData(s, is_last, force_flush,\r | |
1538 | &s->available_out_, &s->next_out_);\r | |
1539 | if (!result) return BROTLI_FALSE;\r | |
1540 | if (force_flush) s->stream_state_ = BROTLI_STREAM_FLUSH_REQUESTED;\r | |
1541 | if (is_last) s->stream_state_ = BROTLI_STREAM_FINISHED;\r | |
1542 | continue;\r | |
1543 | }\r | |
1544 | }\r | |
1545 | break;\r | |
1546 | }\r | |
1547 | return BROTLI_TRUE;\r | |
1548 | }\r | |
1549 | \r | |
1550 | BROTLI_BOOL BrotliEncoderIsFinished(BrotliEncoderState* s) {\r | |
1551 | return TO_BROTLI_BOOL(s->stream_state_ == BROTLI_STREAM_FINISHED &&\r | |
1552 | !BrotliEncoderHasMoreOutput(s));\r | |
1553 | }\r | |
1554 | \r | |
1555 | BROTLI_BOOL BrotliEncoderHasMoreOutput(BrotliEncoderState* s) {\r | |
1556 | return TO_BROTLI_BOOL(s->available_out_ != 0);\r | |
1557 | }\r | |
1558 | \r | |
1559 | \r | |
1560 | #if defined(__cplusplus) || defined(c_plusplus)\r | |
1561 | } /* extern "C" */\r | |
1562 | #endif\r |