+++ /dev/null
-/* Copyright 2013 Google Inc. All Rights Reserved.\r
-\r
- Distributed under MIT license.\r
- See file LICENSE for detail or copy at https://opensource.org/licenses/MIT\r
-*/\r
-\r
-/* Implementation of Brotli compressor. */\r
-\r
-#include <brotli/encode.h>\r
-\r
-#include <stdlib.h> /* free, malloc */\r
-#include <string.h> /* memcpy, memset */\r
-\r
-#include "../common/constants.h"\r
-#include "../common/context.h"\r
-#include "../common/platform.h"\r
-#include "../common/version.h"\r
-#include "./backward_references.h"\r
-#include "./backward_references_hq.h"\r
-#include "./bit_cost.h"\r
-#include "./brotli_bit_stream.h"\r
-#include "./compress_fragment.h"\r
-#include "./compress_fragment_two_pass.h"\r
-#include "./encoder_dict.h"\r
-#include "./entropy_encode.h"\r
-#include "./fast_log.h"\r
-#include "./hash.h"\r
-#include "./histogram.h"\r
-#include "./memory.h"\r
-#include "./metablock.h"\r
-#include "./prefix.h"\r
-#include "./quality.h"\r
-#include "./ringbuffer.h"\r
-#include "./utf8_util.h"\r
-#include "./write_bits.h"\r
-\r
-#if defined(__cplusplus) || defined(c_plusplus)\r
-extern "C" {\r
-#endif\r
-\r
-#define COPY_ARRAY(dst, src) memcpy(dst, src, sizeof(src));\r
-\r
-typedef enum BrotliEncoderStreamState {\r
- /* Default state. */\r
- BROTLI_STREAM_PROCESSING = 0,\r
- /* Intermediate state; after next block is emitted, byte-padding should be\r
- performed before getting back to default state. */\r
- BROTLI_STREAM_FLUSH_REQUESTED = 1,\r
- /* Last metablock was produced; no more input is acceptable. */\r
- BROTLI_STREAM_FINISHED = 2,\r
- /* Flushing compressed block and writing meta-data block header. */\r
- BROTLI_STREAM_METADATA_HEAD = 3,\r
- /* Writing metadata block body. */\r
- BROTLI_STREAM_METADATA_BODY = 4\r
-} BrotliEncoderStreamState;\r
-\r
-typedef struct BrotliEncoderStateStruct {\r
- BrotliEncoderParams params;\r
-\r
- MemoryManager memory_manager_;\r
-\r
- HasherHandle hasher_;\r
- uint64_t input_pos_;\r
- RingBuffer ringbuffer_;\r
- size_t cmd_alloc_size_;\r
- Command* commands_;\r
- size_t num_commands_;\r
- size_t num_literals_;\r
- size_t last_insert_len_;\r
- uint64_t last_flush_pos_;\r
- uint64_t last_processed_pos_;\r
- int dist_cache_[BROTLI_NUM_DISTANCE_SHORT_CODES];\r
- int saved_dist_cache_[4];\r
- uint16_t last_bytes_;\r
- uint8_t last_bytes_bits_;\r
- uint8_t prev_byte_;\r
- uint8_t prev_byte2_;\r
- size_t storage_size_;\r
- uint8_t* storage_;\r
- /* Hash table for FAST_ONE_PASS_COMPRESSION_QUALITY mode. */\r
- int small_table_[1 << 10]; /* 4KiB */\r
- int* large_table_; /* Allocated only when needed */\r
- size_t large_table_size_;\r
- /* Command and distance prefix codes (each 64 symbols, stored back-to-back)\r
- used for the next block in FAST_ONE_PASS_COMPRESSION_QUALITY. The command\r
- prefix code is over a smaller alphabet with the following 64 symbols:\r
- 0 - 15: insert length code 0, copy length code 0 - 15, same distance\r
- 16 - 39: insert length code 0, copy length code 0 - 23\r
- 40 - 63: insert length code 0 - 23, copy length code 0\r
- Note that symbols 16 and 40 represent the same code in the full alphabet,\r
- but we do not use either of them in FAST_ONE_PASS_COMPRESSION_QUALITY. */\r
- uint8_t cmd_depths_[128];\r
- uint16_t cmd_bits_[128];\r
- /* The compressed form of the command and distance prefix codes for the next\r
- block in FAST_ONE_PASS_COMPRESSION_QUALITY. */\r
- uint8_t cmd_code_[512];\r
- size_t cmd_code_numbits_;\r
- /* Command and literal buffers for FAST_TWO_PASS_COMPRESSION_QUALITY. */\r
- uint32_t* command_buf_;\r
- uint8_t* literal_buf_;\r
-\r
- uint8_t* next_out_;\r
- size_t available_out_;\r
- size_t total_out_;\r
- /* Temporary buffer for padding flush bits or metadata block header / body. */\r
- union {\r
- uint64_t u64[2];\r
- uint8_t u8[16];\r
- } tiny_buf_;\r
- uint32_t remaining_metadata_bytes_;\r
- BrotliEncoderStreamState stream_state_;\r
-\r
- BROTLI_BOOL is_last_block_emitted_;\r
- BROTLI_BOOL is_initialized_;\r
-} BrotliEncoderStateStruct;\r
-\r
-static BROTLI_BOOL EnsureInitialized(BrotliEncoderState* s);\r
-\r
-static size_t InputBlockSize(BrotliEncoderState* s) {\r
- return (size_t)1 << s->params.lgblock;\r
-}\r
-\r
-static uint64_t UnprocessedInputSize(BrotliEncoderState* s) {\r
- return s->input_pos_ - s->last_processed_pos_;\r
-}\r
-\r
-static size_t RemainingInputBlockSize(BrotliEncoderState* s) {\r
- const uint64_t delta = UnprocessedInputSize(s);\r
- size_t block_size = InputBlockSize(s);\r
- if (delta >= block_size) return 0;\r
- return block_size - (size_t)delta;\r
-}\r
-\r
-BROTLI_BOOL BrotliEncoderSetParameter(\r
- BrotliEncoderState* state, BrotliEncoderParameter p, uint32_t value) {\r
- /* Changing parameters on the fly is not implemented yet. */\r
- if (state->is_initialized_) return BROTLI_FALSE;\r
- /* TODO: Validate/clamp parameters here. */\r
- switch (p) {\r
- case BROTLI_PARAM_MODE:\r
- state->params.mode = (BrotliEncoderMode)value;\r
- return BROTLI_TRUE;\r
-\r
- case BROTLI_PARAM_QUALITY:\r
- state->params.quality = (int)value;\r
- return BROTLI_TRUE;\r
-\r
- case BROTLI_PARAM_LGWIN:\r
- state->params.lgwin = (int)value;\r
- return BROTLI_TRUE;\r
-\r
- case BROTLI_PARAM_LGBLOCK:\r
- state->params.lgblock = (int)value;\r
- return BROTLI_TRUE;\r
-\r
- case BROTLI_PARAM_DISABLE_LITERAL_CONTEXT_MODELING:\r
- if ((value != 0) && (value != 1)) return BROTLI_FALSE;\r
- state->params.disable_literal_context_modeling = TO_BROTLI_BOOL(!!value);\r
- return BROTLI_TRUE;\r
-\r
- case BROTLI_PARAM_SIZE_HINT:\r
- state->params.size_hint = value;\r
- return BROTLI_TRUE;\r
-\r
- case BROTLI_PARAM_LARGE_WINDOW:\r
- state->params.large_window = TO_BROTLI_BOOL(!!value);\r
- return BROTLI_TRUE;\r
-\r
- case BROTLI_PARAM_NPOSTFIX:\r
- state->params.dist.distance_postfix_bits = value;\r
- return BROTLI_TRUE;\r
-\r
- case BROTLI_PARAM_NDIRECT:\r
- state->params.dist.num_direct_distance_codes = value;\r
- return BROTLI_TRUE;\r
-\r
- default: return BROTLI_FALSE;\r
- }\r
-}\r
-\r
-/* Wraps 64-bit input position to 32-bit ring-buffer position preserving\r
- "not-a-first-lap" feature. */\r
-static uint32_t WrapPosition(uint64_t position) {\r
- uint32_t result = (uint32_t)position;\r
- uint64_t gb = position >> 30;\r
- if (gb > 2) {\r
- /* Wrap every 2GiB; The first 3GB are continuous. */\r
- result = (result & ((1u << 30) - 1)) | ((uint32_t)((gb - 1) & 1) + 1) << 30;\r
- }\r
- return result;\r
-}\r
-\r
-static uint8_t* GetBrotliStorage(BrotliEncoderState* s, size_t size) {\r
- MemoryManager* m = &s->memory_manager_;\r
- if (s->storage_size_ < size) {\r
- BROTLI_FREE(m, s->storage_);\r
- s->storage_ = BROTLI_ALLOC(m, uint8_t, size);\r
- if (BROTLI_IS_OOM(m)) return NULL;\r
- s->storage_size_ = size;\r
- }\r
- return s->storage_;\r
-}\r
-\r
-static size_t HashTableSize(size_t max_table_size, size_t input_size) {\r
- size_t htsize = 256;\r
- while (htsize < max_table_size && htsize < input_size) {\r
- htsize <<= 1;\r
- }\r
- return htsize;\r
-}\r
-\r
-static int* GetHashTable(BrotliEncoderState* s, int quality,\r
- size_t input_size, size_t* table_size) {\r
- /* Use smaller hash table when input.size() is smaller, since we\r
- fill the table, incurring O(hash table size) overhead for\r
- compression, and if the input is short, we won't need that\r
- many hash table entries anyway. */\r
- MemoryManager* m = &s->memory_manager_;\r
- const size_t max_table_size = MaxHashTableSize(quality);\r
- size_t htsize = HashTableSize(max_table_size, input_size);\r
- int* table;\r
- BROTLI_DCHECK(max_table_size >= 256);\r
- if (quality == FAST_ONE_PASS_COMPRESSION_QUALITY) {\r
- /* Only odd shifts are supported by fast-one-pass. */\r
- if ((htsize & 0xAAAAA) == 0) {\r
- htsize <<= 1;\r
- }\r
- }\r
-\r
- if (htsize <= sizeof(s->small_table_) / sizeof(s->small_table_[0])) {\r
- table = s->small_table_;\r
- } else {\r
- if (htsize > s->large_table_size_) {\r
- s->large_table_size_ = htsize;\r
- BROTLI_FREE(m, s->large_table_);\r
- s->large_table_ = BROTLI_ALLOC(m, int, htsize);\r
- if (BROTLI_IS_OOM(m)) return 0;\r
- }\r
- table = s->large_table_;\r
- }\r
-\r
- *table_size = htsize;\r
- memset(table, 0, htsize * sizeof(*table));\r
- return table;\r
-}\r
-\r
-static void EncodeWindowBits(int lgwin, BROTLI_BOOL large_window,\r
- uint16_t* last_bytes, uint8_t* last_bytes_bits) {\r
- if (large_window) {\r
- *last_bytes = (uint16_t)(((lgwin & 0x3F) << 8) | 0x11);\r
- *last_bytes_bits = 14;\r
- } else {\r
- if (lgwin == 16) {\r
- *last_bytes = 0;\r
- *last_bytes_bits = 1;\r
- } else if (lgwin == 17) {\r
- *last_bytes = 1;\r
- *last_bytes_bits = 7;\r
- } else if (lgwin > 17) {\r
- *last_bytes = (uint16_t)(((lgwin - 17) << 1) | 0x01);\r
- *last_bytes_bits = 4;\r
- } else {\r
- *last_bytes = (uint16_t)(((lgwin - 8) << 4) | 0x01);\r
- *last_bytes_bits = 7;\r
- }\r
- }\r
-}\r
-\r
-/* Initializes the command and distance prefix codes for the first block. */\r
-static void InitCommandPrefixCodes(uint8_t cmd_depths[128],\r
- uint16_t cmd_bits[128],\r
- uint8_t cmd_code[512],\r
- size_t* cmd_code_numbits) {\r
- static const uint8_t kDefaultCommandDepths[128] = {\r
- 0, 4, 4, 5, 6, 6, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8,\r
- 0, 0, 0, 4, 4, 4, 4, 4, 5, 5, 6, 6, 6, 6, 7, 7,\r
- 7, 7, 10, 10, 10, 10, 10, 10, 0, 4, 4, 5, 5, 5, 6, 6,\r
- 7, 8, 8, 9, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,\r
- 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\r
- 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5, 4, 4, 4, 4,\r
- 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 7, 7, 7, 8, 10,\r
- 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,\r
- };\r
- static const uint16_t kDefaultCommandBits[128] = {\r
- 0, 0, 8, 9, 3, 35, 7, 71,\r
- 39, 103, 23, 47, 175, 111, 239, 31,\r
- 0, 0, 0, 4, 12, 2, 10, 6,\r
- 13, 29, 11, 43, 27, 59, 87, 55,\r
- 15, 79, 319, 831, 191, 703, 447, 959,\r
- 0, 14, 1, 25, 5, 21, 19, 51,\r
- 119, 159, 95, 223, 479, 991, 63, 575,\r
- 127, 639, 383, 895, 255, 767, 511, 1023,\r
- 14, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\r
- 27, 59, 7, 39, 23, 55, 30, 1, 17, 9, 25, 5, 0, 8, 4, 12,\r
- 2, 10, 6, 21, 13, 29, 3, 19, 11, 15, 47, 31, 95, 63, 127, 255,\r
- 767, 2815, 1791, 3839, 511, 2559, 1535, 3583, 1023, 3071, 2047, 4095,\r
- };\r
- static const uint8_t kDefaultCommandCode[] = {\r
- 0xff, 0x77, 0xd5, 0xbf, 0xe7, 0xde, 0xea, 0x9e, 0x51, 0x5d, 0xde, 0xc6,\r
- 0x70, 0x57, 0xbc, 0x58, 0x58, 0x58, 0xd8, 0xd8, 0x58, 0xd5, 0xcb, 0x8c,\r
- 0xea, 0xe0, 0xc3, 0x87, 0x1f, 0x83, 0xc1, 0x60, 0x1c, 0x67, 0xb2, 0xaa,\r
- 0x06, 0x83, 0xc1, 0x60, 0x30, 0x18, 0xcc, 0xa1, 0xce, 0x88, 0x54, 0x94,\r
- 0x46, 0xe1, 0xb0, 0xd0, 0x4e, 0xb2, 0xf7, 0x04, 0x00,\r
- };\r
- static const size_t kDefaultCommandCodeNumBits = 448;\r
- COPY_ARRAY(cmd_depths, kDefaultCommandDepths);\r
- COPY_ARRAY(cmd_bits, kDefaultCommandBits);\r
-\r
- /* Initialize the pre-compressed form of the command and distance prefix\r
- codes. */\r
- COPY_ARRAY(cmd_code, kDefaultCommandCode);\r
- *cmd_code_numbits = kDefaultCommandCodeNumBits;\r
-}\r
-\r
-/* Decide about the context map based on the ability of the prediction\r
- ability of the previous byte UTF8-prefix on the next byte. The\r
- prediction ability is calculated as Shannon entropy. Here we need\r
- Shannon entropy instead of 'BitsEntropy' since the prefix will be\r
- encoded with the remaining 6 bits of the following byte, and\r
- BitsEntropy will assume that symbol to be stored alone using Huffman\r
- coding. */\r
-static void ChooseContextMap(int quality,\r
- uint32_t* bigram_histo,\r
- size_t* num_literal_contexts,\r
- const uint32_t** literal_context_map) {\r
- static const uint32_t kStaticContextMapContinuation[64] = {\r
- 1, 1, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\r
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\r
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\r
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\r
- };\r
- static const uint32_t kStaticContextMapSimpleUTF8[64] = {\r
- 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\r
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\r
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\r
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\r
- };\r
-\r
- uint32_t monogram_histo[3] = { 0 };\r
- uint32_t two_prefix_histo[6] = { 0 };\r
- size_t total;\r
- size_t i;\r
- size_t dummy;\r
- double entropy[4];\r
- for (i = 0; i < 9; ++i) {\r
- monogram_histo[i % 3] += bigram_histo[i];\r
- two_prefix_histo[i % 6] += bigram_histo[i];\r
- }\r
- entropy[1] = ShannonEntropy(monogram_histo, 3, &dummy);\r
- entropy[2] = (ShannonEntropy(two_prefix_histo, 3, &dummy) +\r
- ShannonEntropy(two_prefix_histo + 3, 3, &dummy));\r
- entropy[3] = 0;\r
- for (i = 0; i < 3; ++i) {\r
- entropy[3] += ShannonEntropy(bigram_histo + 3 * i, 3, &dummy);\r
- }\r
-\r
- total = monogram_histo[0] + monogram_histo[1] + monogram_histo[2];\r
- BROTLI_DCHECK(total != 0);\r
- entropy[0] = 1.0 / (double)total;\r
- entropy[1] *= entropy[0];\r
- entropy[2] *= entropy[0];\r
- entropy[3] *= entropy[0];\r
-\r
- if (quality < MIN_QUALITY_FOR_HQ_CONTEXT_MODELING) {\r
- /* 3 context models is a bit slower, don't use it at lower qualities. */\r
- entropy[3] = entropy[1] * 10;\r
- }\r
- /* If expected savings by symbol are less than 0.2 bits, skip the\r
- context modeling -- in exchange for faster decoding speed. */\r
- if (entropy[1] - entropy[2] < 0.2 &&\r
- entropy[1] - entropy[3] < 0.2) {\r
- *num_literal_contexts = 1;\r
- } else if (entropy[2] - entropy[3] < 0.02) {\r
- *num_literal_contexts = 2;\r
- *literal_context_map = kStaticContextMapSimpleUTF8;\r
- } else {\r
- *num_literal_contexts = 3;\r
- *literal_context_map = kStaticContextMapContinuation;\r
- }\r
-}\r
-\r
-/* Decide if we want to use a more complex static context map containing 13\r
- context values, based on the entropy reduction of histograms over the\r
- first 5 bits of literals. */\r
-static BROTLI_BOOL ShouldUseComplexStaticContextMap(const uint8_t* input,\r
- size_t start_pos, size_t length, size_t mask, int quality, size_t size_hint,\r
- size_t* num_literal_contexts, const uint32_t** literal_context_map) {\r
- static const uint32_t kStaticContextMapComplexUTF8[64] = {\r
- 11, 11, 12, 12, /* 0 special */\r
- 0, 0, 0, 0, /* 4 lf */\r
- 1, 1, 9, 9, /* 8 space */\r
- 2, 2, 2, 2, /* !, first after space/lf and after something else. */\r
- 1, 1, 1, 1, /* " */\r
- 8, 3, 3, 3, /* % */\r
- 1, 1, 1, 1, /* ({[ */\r
- 2, 2, 2, 2, /* }]) */\r
- 8, 4, 4, 4, /* :; */\r
- 8, 7, 4, 4, /* . */\r
- 8, 0, 0, 0, /* > */\r
- 3, 3, 3, 3, /* [0..9] */\r
- 5, 5, 10, 5, /* [A-Z] */\r
- 5, 5, 10, 5,\r
- 6, 6, 6, 6, /* [a-z] */\r
- 6, 6, 6, 6,\r
- };\r
- BROTLI_UNUSED(quality);\r
- /* Try the more complex static context map only for long data. */\r
- if (size_hint < (1 << 20)) {\r
- return BROTLI_FALSE;\r
- } else {\r
- const size_t end_pos = start_pos + length;\r
- /* To make entropy calculations faster and to fit on the stack, we collect\r
- histograms over the 5 most significant bits of literals. One histogram\r
- without context and 13 additional histograms for each context value. */\r
- uint32_t combined_histo[32] = { 0 };\r
- uint32_t context_histo[13][32] = { { 0 } };\r
- uint32_t total = 0;\r
- double entropy[3];\r
- size_t dummy;\r
- size_t i;\r
- ContextLut utf8_lut = BROTLI_CONTEXT_LUT(CONTEXT_UTF8);\r
- for (; start_pos + 64 <= end_pos; start_pos += 4096) {\r
- const size_t stride_end_pos = start_pos + 64;\r
- uint8_t prev2 = input[start_pos & mask];\r
- uint8_t prev1 = input[(start_pos + 1) & mask];\r
- size_t pos;\r
- /* To make the analysis of the data faster we only examine 64 byte long\r
- strides at every 4kB intervals. */\r
- for (pos = start_pos + 2; pos < stride_end_pos; ++pos) {\r
- const uint8_t literal = input[pos & mask];\r
- const uint8_t context = (uint8_t)kStaticContextMapComplexUTF8[\r
- BROTLI_CONTEXT(prev1, prev2, utf8_lut)];\r
- ++total;\r
- ++combined_histo[literal >> 3];\r
- ++context_histo[context][literal >> 3];\r
- prev2 = prev1;\r
- prev1 = literal;\r
- }\r
- }\r
- entropy[1] = ShannonEntropy(combined_histo, 32, &dummy);\r
- entropy[2] = 0;\r
- for (i = 0; i < 13; ++i) {\r
- entropy[2] += ShannonEntropy(&context_histo[i][0], 32, &dummy);\r
- }\r
- entropy[0] = 1.0 / (double)total;\r
- entropy[1] *= entropy[0];\r
- entropy[2] *= entropy[0];\r
- /* The triggering heuristics below were tuned by compressing the individual\r
- files of the silesia corpus. If we skip this kind of context modeling\r
- for not very well compressible input (i.e. entropy using context modeling\r
- is 60% of maximal entropy) or if expected savings by symbol are less\r
- than 0.2 bits, then in every case when it triggers, the final compression\r
- ratio is improved. Note however that this heuristics might be too strict\r
- for some cases and could be tuned further. */\r
- if (entropy[2] > 3.0 || entropy[1] - entropy[2] < 0.2) {\r
- return BROTLI_FALSE;\r
- } else {\r
- *num_literal_contexts = 13;\r
- *literal_context_map = kStaticContextMapComplexUTF8;\r
- return BROTLI_TRUE;\r
- }\r
- }\r
-}\r
-\r
-static void DecideOverLiteralContextModeling(const uint8_t* input,\r
- size_t start_pos, size_t length, size_t mask, int quality, size_t size_hint,\r
- size_t* num_literal_contexts, const uint32_t** literal_context_map) {\r
- if (quality < MIN_QUALITY_FOR_CONTEXT_MODELING || length < 64) {\r
- return;\r
- } else if (ShouldUseComplexStaticContextMap(\r
- input, start_pos, length, mask, quality, size_hint,\r
- num_literal_contexts, literal_context_map)) {\r
- /* Context map was already set, nothing else to do. */\r
- } else {\r
- /* Gather bi-gram data of the UTF8 byte prefixes. To make the analysis of\r
- UTF8 data faster we only examine 64 byte long strides at every 4kB\r
- intervals. */\r
- const size_t end_pos = start_pos + length;\r
- uint32_t bigram_prefix_histo[9] = { 0 };\r
- for (; start_pos + 64 <= end_pos; start_pos += 4096) {\r
- static const int lut[4] = { 0, 0, 1, 2 };\r
- const size_t stride_end_pos = start_pos + 64;\r
- int prev = lut[input[start_pos & mask] >> 6] * 3;\r
- size_t pos;\r
- for (pos = start_pos + 1; pos < stride_end_pos; ++pos) {\r
- const uint8_t literal = input[pos & mask];\r
- ++bigram_prefix_histo[prev + lut[literal >> 6]];\r
- prev = lut[literal >> 6] * 3;\r
- }\r
- }\r
- ChooseContextMap(quality, &bigram_prefix_histo[0], num_literal_contexts,\r
- literal_context_map);\r
- }\r
-}\r
-\r
-static BROTLI_BOOL ShouldCompress(\r
- const uint8_t* data, const size_t mask, const uint64_t last_flush_pos,\r
- const size_t bytes, const size_t num_literals, const size_t num_commands) {\r
- if (num_commands < (bytes >> 8) + 2) {\r
- if (num_literals > 0.99 * (double)bytes) {\r
- uint32_t literal_histo[256] = { 0 };\r
- static const uint32_t kSampleRate = 13;\r
- static const double kMinEntropy = 7.92;\r
- const double bit_cost_threshold =\r
- (double)bytes * kMinEntropy / kSampleRate;\r
- size_t t = (bytes + kSampleRate - 1) / kSampleRate;\r
- uint32_t pos = (uint32_t)last_flush_pos;\r
- size_t i;\r
- for (i = 0; i < t; i++) {\r
- ++literal_histo[data[pos & mask]];\r
- pos += kSampleRate;\r
- }\r
- if (BitsEntropy(literal_histo, 256) > bit_cost_threshold) {\r
- return BROTLI_FALSE;\r
- }\r
- }\r
- }\r
- return BROTLI_TRUE;\r
-}\r
-\r
-/* Chooses the literal context mode for a metablock */\r
-static ContextType ChooseContextMode(const BrotliEncoderParams* params,\r
- const uint8_t* data, const size_t pos, const size_t mask,\r
- const size_t length) {\r
- /* We only do the computation for the option of something else than\r
- CONTEXT_UTF8 for the highest qualities */\r
- if (params->quality >= MIN_QUALITY_FOR_HQ_BLOCK_SPLITTING &&\r
- !BrotliIsMostlyUTF8(data, pos, mask, length, kMinUTF8Ratio)) {\r
- return CONTEXT_SIGNED;\r
- }\r
- return CONTEXT_UTF8;\r
-}\r
-\r
-static void WriteMetaBlockInternal(MemoryManager* m,\r
- const uint8_t* data,\r
- const size_t mask,\r
- const uint64_t last_flush_pos,\r
- const size_t bytes,\r
- const BROTLI_BOOL is_last,\r
- ContextType literal_context_mode,\r
- const BrotliEncoderParams* params,\r
- const uint8_t prev_byte,\r
- const uint8_t prev_byte2,\r
- const size_t num_literals,\r
- const size_t num_commands,\r
- Command* commands,\r
- const int* saved_dist_cache,\r
- int* dist_cache,\r
- size_t* storage_ix,\r
- uint8_t* storage) {\r
- const uint32_t wrapped_last_flush_pos = WrapPosition(last_flush_pos);\r
- uint16_t last_bytes;\r
- uint8_t last_bytes_bits;\r
- ContextLut literal_context_lut = BROTLI_CONTEXT_LUT(literal_context_mode);\r
- BrotliEncoderParams block_params = *params;\r
-\r
- if (bytes == 0) {\r
- /* Write the ISLAST and ISEMPTY bits. */\r
- BrotliWriteBits(2, 3, storage_ix, storage);\r
- *storage_ix = (*storage_ix + 7u) & ~7u;\r
- return;\r
- }\r
-\r
- if (!ShouldCompress(data, mask, last_flush_pos, bytes,\r
- num_literals, num_commands)) {\r
- /* Restore the distance cache, as its last update by\r
- CreateBackwardReferences is now unused. */\r
- memcpy(dist_cache, saved_dist_cache, 4 * sizeof(dist_cache[0]));\r
- BrotliStoreUncompressedMetaBlock(is_last, data,\r
- wrapped_last_flush_pos, mask, bytes,\r
- storage_ix, storage);\r
- return;\r
- }\r
-\r
- BROTLI_DCHECK(*storage_ix <= 14);\r
- last_bytes = (uint16_t)((storage[1] << 8) | storage[0]);\r
- last_bytes_bits = (uint8_t)(*storage_ix);\r
- if (params->quality <= MAX_QUALITY_FOR_STATIC_ENTROPY_CODES) {\r
- BrotliStoreMetaBlockFast(m, data, wrapped_last_flush_pos,\r
- bytes, mask, is_last, params,\r
- commands, num_commands,\r
- storage_ix, storage);\r
- if (BROTLI_IS_OOM(m)) return;\r
- } else if (params->quality < MIN_QUALITY_FOR_BLOCK_SPLIT) {\r
- BrotliStoreMetaBlockTrivial(m, data, wrapped_last_flush_pos,\r
- bytes, mask, is_last, params,\r
- commands, num_commands,\r
- storage_ix, storage);\r
- if (BROTLI_IS_OOM(m)) return;\r
- } else {\r
- MetaBlockSplit mb;\r
- InitMetaBlockSplit(&mb);\r
- if (params->quality < MIN_QUALITY_FOR_HQ_BLOCK_SPLITTING) {\r
- size_t num_literal_contexts = 1;\r
- const uint32_t* literal_context_map = NULL;\r
- if (!params->disable_literal_context_modeling) {\r
- DecideOverLiteralContextModeling(\r
- data, wrapped_last_flush_pos, bytes, mask, params->quality,\r
- params->size_hint, &num_literal_contexts,\r
- &literal_context_map);\r
- }\r
- BrotliBuildMetaBlockGreedy(m, data, wrapped_last_flush_pos, mask,\r
- prev_byte, prev_byte2, literal_context_lut, num_literal_contexts,\r
- literal_context_map, commands, num_commands, &mb);\r
- if (BROTLI_IS_OOM(m)) return;\r
- } else {\r
- BrotliBuildMetaBlock(m, data, wrapped_last_flush_pos, mask, &block_params,\r
- prev_byte, prev_byte2,\r
- commands, num_commands,\r
- literal_context_mode,\r
- &mb);\r
- if (BROTLI_IS_OOM(m)) return;\r
- }\r
- if (params->quality >= MIN_QUALITY_FOR_OPTIMIZE_HISTOGRAMS) {\r
- /* The number of distance symbols effectively used for distance\r
- histograms. It might be less than distance alphabet size\r
- for "Large Window Brotli" (32-bit). */\r
- uint32_t num_effective_dist_codes = block_params.dist.alphabet_size;\r
- if (num_effective_dist_codes > BROTLI_NUM_HISTOGRAM_DISTANCE_SYMBOLS) {\r
- num_effective_dist_codes = BROTLI_NUM_HISTOGRAM_DISTANCE_SYMBOLS;\r
- }\r
- BrotliOptimizeHistograms(num_effective_dist_codes, &mb);\r
- }\r
- BrotliStoreMetaBlock(m, data, wrapped_last_flush_pos, bytes, mask,\r
- prev_byte, prev_byte2,\r
- is_last,\r
- &block_params,\r
- literal_context_mode,\r
- commands, num_commands,\r
- &mb,\r
- storage_ix, storage);\r
- if (BROTLI_IS_OOM(m)) return;\r
- DestroyMetaBlockSplit(m, &mb);\r
- }\r
- if (bytes + 4 < (*storage_ix >> 3)) {\r
- /* Restore the distance cache and last byte. */\r
- memcpy(dist_cache, saved_dist_cache, 4 * sizeof(dist_cache[0]));\r
- storage[0] = (uint8_t)last_bytes;\r
- storage[1] = (uint8_t)(last_bytes >> 8);\r
- *storage_ix = last_bytes_bits;\r
- BrotliStoreUncompressedMetaBlock(is_last, data,\r
- wrapped_last_flush_pos, mask,\r
- bytes, storage_ix, storage);\r
- }\r
-}\r
-\r
-static void ChooseDistanceParams(BrotliEncoderParams* params) {\r
- uint32_t distance_postfix_bits = 0;\r
- uint32_t num_direct_distance_codes = 0;\r
-\r
- if (params->quality >= MIN_QUALITY_FOR_NONZERO_DISTANCE_PARAMS) {\r
- uint32_t ndirect_msb;\r
- if (params->mode == BROTLI_MODE_FONT) {\r
- distance_postfix_bits = 1;\r
- num_direct_distance_codes = 12;\r
- } else {\r
- distance_postfix_bits = params->dist.distance_postfix_bits;\r
- num_direct_distance_codes = params->dist.num_direct_distance_codes;\r
- }\r
- ndirect_msb = (num_direct_distance_codes >> distance_postfix_bits) & 0x0F;\r
- if (distance_postfix_bits > BROTLI_MAX_NPOSTFIX ||\r
- num_direct_distance_codes > BROTLI_MAX_NDIRECT ||\r
- (ndirect_msb << distance_postfix_bits) != num_direct_distance_codes) {\r
- distance_postfix_bits = 0;\r
- num_direct_distance_codes = 0;\r
- }\r
- }\r
-\r
- BrotliInitDistanceParams(\r
- params, distance_postfix_bits, num_direct_distance_codes);\r
-}\r
-\r
-static BROTLI_BOOL EnsureInitialized(BrotliEncoderState* s) {\r
- if (BROTLI_IS_OOM(&s->memory_manager_)) return BROTLI_FALSE;\r
- if (s->is_initialized_) return BROTLI_TRUE;\r
-\r
- SanitizeParams(&s->params);\r
- s->params.lgblock = ComputeLgBlock(&s->params);\r
- ChooseDistanceParams(&s->params);\r
-\r
- s->remaining_metadata_bytes_ = BROTLI_UINT32_MAX;\r
-\r
- RingBufferSetup(&s->params, &s->ringbuffer_);\r
-\r
- /* Initialize last byte with stream header. */\r
- {\r
- int lgwin = s->params.lgwin;\r
- if (s->params.quality == FAST_ONE_PASS_COMPRESSION_QUALITY ||\r
- s->params.quality == FAST_TWO_PASS_COMPRESSION_QUALITY) {\r
- lgwin = BROTLI_MAX(int, lgwin, 18);\r
- }\r
- EncodeWindowBits(lgwin, s->params.large_window,\r
- &s->last_bytes_, &s->last_bytes_bits_);\r
- }\r
-\r
- if (s->params.quality == FAST_ONE_PASS_COMPRESSION_QUALITY) {\r
- InitCommandPrefixCodes(s->cmd_depths_, s->cmd_bits_,\r
- s->cmd_code_, &s->cmd_code_numbits_);\r
- }\r
-\r
- s->is_initialized_ = BROTLI_TRUE;\r
- return BROTLI_TRUE;\r
-}\r
-\r
-static void BrotliEncoderInitParams(BrotliEncoderParams* params) {\r
- params->mode = BROTLI_DEFAULT_MODE;\r
- params->large_window = BROTLI_FALSE;\r
- params->quality = BROTLI_DEFAULT_QUALITY;\r
- params->lgwin = BROTLI_DEFAULT_WINDOW;\r
- params->lgblock = 0;\r
- params->size_hint = 0;\r
- params->disable_literal_context_modeling = BROTLI_FALSE;\r
- BrotliInitEncoderDictionary(¶ms->dictionary);\r
- params->dist.distance_postfix_bits = 0;\r
- params->dist.num_direct_distance_codes = 0;\r
- params->dist.alphabet_size =\r
- BROTLI_DISTANCE_ALPHABET_SIZE(0, 0, BROTLI_MAX_DISTANCE_BITS);\r
- params->dist.max_distance = BROTLI_MAX_DISTANCE;\r
-}\r
-\r
-static void BrotliEncoderInitState(BrotliEncoderState* s) {\r
- BrotliEncoderInitParams(&s->params);\r
- s->input_pos_ = 0;\r
- s->num_commands_ = 0;\r
- s->num_literals_ = 0;\r
- s->last_insert_len_ = 0;\r
- s->last_flush_pos_ = 0;\r
- s->last_processed_pos_ = 0;\r
- s->prev_byte_ = 0;\r
- s->prev_byte2_ = 0;\r
- s->storage_size_ = 0;\r
- s->storage_ = 0;\r
- s->hasher_ = NULL;\r
- s->large_table_ = NULL;\r
- s->large_table_size_ = 0;\r
- s->cmd_code_numbits_ = 0;\r
- s->command_buf_ = NULL;\r
- s->literal_buf_ = NULL;\r
- s->next_out_ = NULL;\r
- s->available_out_ = 0;\r
- s->total_out_ = 0;\r
- s->stream_state_ = BROTLI_STREAM_PROCESSING;\r
- s->is_last_block_emitted_ = BROTLI_FALSE;\r
- s->is_initialized_ = BROTLI_FALSE;\r
-\r
- RingBufferInit(&s->ringbuffer_);\r
-\r
- s->commands_ = 0;\r
- s->cmd_alloc_size_ = 0;\r
-\r
- /* Initialize distance cache. */\r
- s->dist_cache_[0] = 4;\r
- s->dist_cache_[1] = 11;\r
- s->dist_cache_[2] = 15;\r
- s->dist_cache_[3] = 16;\r
- /* Save the state of the distance cache in case we need to restore it for\r
- emitting an uncompressed block. */\r
- memcpy(s->saved_dist_cache_, s->dist_cache_, sizeof(s->saved_dist_cache_));\r
-}\r
-\r
-BrotliEncoderState* BrotliEncoderCreateInstance(\r
- brotli_alloc_func alloc_func, brotli_free_func free_func, void* opaque) {\r
- BrotliEncoderState* state = 0;\r
- if (!alloc_func && !free_func) {\r
- state = (BrotliEncoderState*)malloc(sizeof(BrotliEncoderState));\r
- } else if (alloc_func && free_func) {\r
- state = (BrotliEncoderState*)alloc_func(opaque, sizeof(BrotliEncoderState));\r
- }\r
- if (state == 0) {\r
- /* BROTLI_DUMP(); */\r
- return 0;\r
- }\r
- BrotliInitMemoryManager(\r
- &state->memory_manager_, alloc_func, free_func, opaque);\r
- BrotliEncoderInitState(state);\r
- return state;\r
-}\r
-\r
-static void BrotliEncoderCleanupState(BrotliEncoderState* s) {\r
- MemoryManager* m = &s->memory_manager_;\r
- if (BROTLI_IS_OOM(m)) {\r
- BrotliWipeOutMemoryManager(m);\r
- return;\r
- }\r
- BROTLI_FREE(m, s->storage_);\r
- BROTLI_FREE(m, s->commands_);\r
- RingBufferFree(m, &s->ringbuffer_);\r
- DestroyHasher(m, &s->hasher_);\r
- BROTLI_FREE(m, s->large_table_);\r
- BROTLI_FREE(m, s->command_buf_);\r
- BROTLI_FREE(m, s->literal_buf_);\r
-}\r
-\r
-/* Deinitializes and frees BrotliEncoderState instance. */\r
-void BrotliEncoderDestroyInstance(BrotliEncoderState* state) {\r
- if (!state) {\r
- return;\r
- } else {\r
- MemoryManager* m = &state->memory_manager_;\r
- brotli_free_func free_func = m->free_func;\r
- void* opaque = m->opaque;\r
- BrotliEncoderCleanupState(state);\r
- free_func(opaque, state);\r
- }\r
-}\r
-\r
-/*\r
- Copies the given input data to the internal ring buffer of the compressor.\r
- No processing of the data occurs at this time and this function can be\r
- called multiple times before calling WriteBrotliData() to process the\r
- accumulated input. At most input_block_size() bytes of input data can be\r
- copied to the ring buffer, otherwise the next WriteBrotliData() will fail.\r
- */\r
-static void CopyInputToRingBuffer(BrotliEncoderState* s,\r
- const size_t input_size,\r
- const uint8_t* input_buffer) {\r
- RingBuffer* ringbuffer_ = &s->ringbuffer_;\r
- MemoryManager* m = &s->memory_manager_;\r
- RingBufferWrite(m, input_buffer, input_size, ringbuffer_);\r
- if (BROTLI_IS_OOM(m)) return;\r
- s->input_pos_ += input_size;\r
-\r
- /* TL;DR: If needed, initialize 7 more bytes in the ring buffer to make the\r
- hashing not depend on uninitialized data. This makes compression\r
- deterministic and it prevents uninitialized memory warnings in Valgrind.\r
- Even without erasing, the output would be valid (but nondeterministic).\r
-\r
- Background information: The compressor stores short (at most 8 bytes)\r
- substrings of the input already read in a hash table, and detects\r
- repetitions by looking up such substrings in the hash table. If it\r
- can find a substring, it checks whether the substring is really there\r
- in the ring buffer (or it's just a hash collision). Should the hash\r
- table become corrupt, this check makes sure that the output is\r
- still valid, albeit the compression ratio would be bad.\r
-\r
- The compressor populates the hash table from the ring buffer as it's\r
- reading new bytes from the input. However, at the last few indexes of\r
- the ring buffer, there are not enough bytes to build full-length\r
- substrings from. Since the hash table always contains full-length\r
- substrings, we erase with dummy zeros here to make sure that those\r
- substrings will contain zeros at the end instead of uninitialized\r
- data.\r
-\r
- Please note that erasing is not necessary (because the\r
- memory region is already initialized since he ring buffer\r
- has a `tail' that holds a copy of the beginning,) so we\r
- skip erasing if we have already gone around at least once in\r
- the ring buffer.\r
-\r
- Only clear during the first round of ring-buffer writes. On\r
- subsequent rounds data in the ring-buffer would be affected. */\r
- if (ringbuffer_->pos_ <= ringbuffer_->mask_) {\r
- /* This is the first time when the ring buffer is being written.\r
- We clear 7 bytes just after the bytes that have been copied from\r
- the input buffer.\r
-\r
- The ring-buffer has a "tail" that holds a copy of the beginning,\r
- but only once the ring buffer has been fully written once, i.e.,\r
- pos <= mask. For the first time, we need to write values\r
- in this tail (where index may be larger than mask), so that\r
- we have exactly defined behavior and don't read uninitialized\r
- memory. Due to performance reasons, hashing reads data using a\r
- LOAD64, which can go 7 bytes beyond the bytes written in the\r
- ring-buffer. */\r
- memset(ringbuffer_->buffer_ + ringbuffer_->pos_, 0, 7);\r
- }\r
-}\r
-\r
-/* Marks all input as processed.\r
- Returns true if position wrapping occurs. */\r
-static BROTLI_BOOL UpdateLastProcessedPos(BrotliEncoderState* s) {\r
- uint32_t wrapped_last_processed_pos = WrapPosition(s->last_processed_pos_);\r
- uint32_t wrapped_input_pos = WrapPosition(s->input_pos_);\r
- s->last_processed_pos_ = s->input_pos_;\r
- return TO_BROTLI_BOOL(wrapped_input_pos < wrapped_last_processed_pos);\r
-}\r
-\r
-static void ExtendLastCommand(BrotliEncoderState* s, uint32_t* bytes,\r
- uint32_t* wrapped_last_processed_pos) {\r
- Command* last_command = &s->commands_[s->num_commands_ - 1];\r
- const uint8_t* data = s->ringbuffer_.buffer_;\r
- const uint32_t mask = s->ringbuffer_.mask_;\r
- uint64_t max_backward_distance =\r
- (((uint64_t)1) << s->params.lgwin) - BROTLI_WINDOW_GAP;\r
- uint64_t last_copy_len = last_command->copy_len_ & 0x1FFFFFF;\r
- uint64_t last_processed_pos = s->last_processed_pos_ - last_copy_len;\r
- uint64_t max_distance = last_processed_pos < max_backward_distance ?\r
- last_processed_pos : max_backward_distance;\r
- uint64_t cmd_dist = (uint64_t)s->dist_cache_[0];\r
- uint32_t distance_code = CommandRestoreDistanceCode(last_command,\r
- &s->params.dist);\r
- if (distance_code < BROTLI_NUM_DISTANCE_SHORT_CODES ||\r
- distance_code - (BROTLI_NUM_DISTANCE_SHORT_CODES - 1) == cmd_dist) {\r
- if (cmd_dist <= max_distance) {\r
- while (*bytes != 0 && data[*wrapped_last_processed_pos & mask] ==\r
- data[(*wrapped_last_processed_pos - cmd_dist) & mask]) {\r
- last_command->copy_len_++;\r
- (*bytes)--;\r
- (*wrapped_last_processed_pos)++;\r
- }\r
- }\r
- /* The copy length is at most the metablock size, and thus expressible. */\r
- GetLengthCode(last_command->insert_len_,\r
- (size_t)((int)(last_command->copy_len_ & 0x1FFFFFF) +\r
- (int)(last_command->copy_len_ >> 25)),\r
- TO_BROTLI_BOOL((last_command->dist_prefix_ & 0x3FF) == 0),\r
- &last_command->cmd_prefix_);\r
- }\r
-}\r
-\r
-/*\r
- Processes the accumulated input data and sets |*out_size| to the length of\r
- the new output meta-block, or to zero if no new output meta-block has been\r
- created (in this case the processed input data is buffered internally).\r
- If |*out_size| is positive, |*output| points to the start of the output\r
- data. If |is_last| or |force_flush| is BROTLI_TRUE, an output meta-block is\r
- always created. However, until |is_last| is BROTLI_TRUE encoder may retain up\r
- to 7 bits of the last byte of output. To force encoder to dump the remaining\r
- bits use WriteMetadata() to append an empty meta-data block.\r
- Returns BROTLI_FALSE if the size of the input data is larger than\r
- input_block_size().\r
- */\r
-static BROTLI_BOOL EncodeData(\r
- BrotliEncoderState* s, const BROTLI_BOOL is_last,\r
- const BROTLI_BOOL force_flush, size_t* out_size, uint8_t** output) {\r
- const uint64_t delta = UnprocessedInputSize(s);\r
- uint32_t bytes = (uint32_t)delta;\r
- uint32_t wrapped_last_processed_pos = WrapPosition(s->last_processed_pos_);\r
- uint8_t* data;\r
- uint32_t mask;\r
- MemoryManager* m = &s->memory_manager_;\r
- ContextType literal_context_mode;\r
-\r
- data = s->ringbuffer_.buffer_;\r
- mask = s->ringbuffer_.mask_;\r
-\r
- /* Adding more blocks after "last" block is forbidden. */\r
- if (s->is_last_block_emitted_) return BROTLI_FALSE;\r
- if (is_last) s->is_last_block_emitted_ = BROTLI_TRUE;\r
-\r
- if (delta > InputBlockSize(s)) {\r
- return BROTLI_FALSE;\r
- }\r
- if (s->params.quality == FAST_TWO_PASS_COMPRESSION_QUALITY &&\r
- !s->command_buf_) {\r
- s->command_buf_ =\r
- BROTLI_ALLOC(m, uint32_t, kCompressFragmentTwoPassBlockSize);\r
- s->literal_buf_ =\r
- BROTLI_ALLOC(m, uint8_t, kCompressFragmentTwoPassBlockSize);\r
- if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;\r
- }\r
-\r
- if (s->params.quality == FAST_ONE_PASS_COMPRESSION_QUALITY ||\r
- s->params.quality == FAST_TWO_PASS_COMPRESSION_QUALITY) {\r
- uint8_t* storage;\r
- size_t storage_ix = s->last_bytes_bits_;\r
- size_t table_size;\r
- int* table;\r
-\r
- if (delta == 0 && !is_last) {\r
- /* We have no new input data and we don't have to finish the stream, so\r
- nothing to do. */\r
- *out_size = 0;\r
- return BROTLI_TRUE;\r
- }\r
- storage = GetBrotliStorage(s, 2 * bytes + 503);\r
- if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;\r
- storage[0] = (uint8_t)s->last_bytes_;\r
- storage[1] = (uint8_t)(s->last_bytes_ >> 8);\r
- table = GetHashTable(s, s->params.quality, bytes, &table_size);\r
- if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;\r
- if (s->params.quality == FAST_ONE_PASS_COMPRESSION_QUALITY) {\r
- BrotliCompressFragmentFast(\r
- m, &data[wrapped_last_processed_pos & mask],\r
- bytes, is_last,\r
- table, table_size,\r
- s->cmd_depths_, s->cmd_bits_,\r
- &s->cmd_code_numbits_, s->cmd_code_,\r
- &storage_ix, storage);\r
- if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;\r
- } else {\r
- BrotliCompressFragmentTwoPass(\r
- m, &data[wrapped_last_processed_pos & mask],\r
- bytes, is_last,\r
- s->command_buf_, s->literal_buf_,\r
- table, table_size,\r
- &storage_ix, storage);\r
- if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;\r
- }\r
- s->last_bytes_ = (uint16_t)(storage[storage_ix >> 3]);\r
- s->last_bytes_bits_ = storage_ix & 7u;\r
- UpdateLastProcessedPos(s);\r
- *output = &storage[0];\r
- *out_size = storage_ix >> 3;\r
- return BROTLI_TRUE;\r
- }\r
-\r
- {\r
- /* Theoretical max number of commands is 1 per 2 bytes. */\r
- size_t newsize = s->num_commands_ + bytes / 2 + 1;\r
- if (newsize > s->cmd_alloc_size_) {\r
- Command* new_commands;\r
- /* Reserve a bit more memory to allow merging with a next block\r
- without reallocation: that would impact speed. */\r
- newsize += (bytes / 4) + 16;\r
- s->cmd_alloc_size_ = newsize;\r
- new_commands = BROTLI_ALLOC(m, Command, newsize);\r
- if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;\r
- if (s->commands_) {\r
- memcpy(new_commands, s->commands_, sizeof(Command) * s->num_commands_);\r
- BROTLI_FREE(m, s->commands_);\r
- }\r
- s->commands_ = new_commands;\r
- }\r
- }\r
-\r
- InitOrStitchToPreviousBlock(m, &s->hasher_, data, mask, &s->params,\r
- wrapped_last_processed_pos, bytes, is_last);\r
-\r
- literal_context_mode = ChooseContextMode(\r
- &s->params, data, WrapPosition(s->last_flush_pos_),\r
- mask, (size_t)(s->input_pos_ - s->last_flush_pos_));\r
-\r
- if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;\r
-\r
- if (s->num_commands_ && s->last_insert_len_ == 0) {\r
- ExtendLastCommand(s, &bytes, &wrapped_last_processed_pos);\r
- }\r
-\r
- if (s->params.quality == ZOPFLIFICATION_QUALITY) {\r
- BROTLI_DCHECK(s->params.hasher.type == 10);\r
- BrotliCreateZopfliBackwardReferences(m,\r
- bytes, wrapped_last_processed_pos,\r
- data, mask, &s->params, s->hasher_, s->dist_cache_,\r
- &s->last_insert_len_, &s->commands_[s->num_commands_],\r
- &s->num_commands_, &s->num_literals_);\r
- if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;\r
- } else if (s->params.quality == HQ_ZOPFLIFICATION_QUALITY) {\r
- BROTLI_DCHECK(s->params.hasher.type == 10);\r
- BrotliCreateHqZopfliBackwardReferences(m,\r
- bytes, wrapped_last_processed_pos,\r
- data, mask, &s->params, s->hasher_, s->dist_cache_,\r
- &s->last_insert_len_, &s->commands_[s->num_commands_],\r
- &s->num_commands_, &s->num_literals_);\r
- if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;\r
- } else {\r
- BrotliCreateBackwardReferences(\r
- bytes, wrapped_last_processed_pos,\r
- data, mask, &s->params, s->hasher_, s->dist_cache_,\r
- &s->last_insert_len_, &s->commands_[s->num_commands_],\r
- &s->num_commands_, &s->num_literals_);\r
- }\r
-\r
- {\r
- const size_t max_length = MaxMetablockSize(&s->params);\r
- const size_t max_literals = max_length / 8;\r
- const size_t max_commands = max_length / 8;\r
- const size_t processed_bytes = (size_t)(s->input_pos_ - s->last_flush_pos_);\r
- /* If maximal possible additional block doesn't fit metablock, flush now. */\r
- /* TODO: Postpone decision until next block arrives? */\r
- const BROTLI_BOOL next_input_fits_metablock = TO_BROTLI_BOOL(\r
- processed_bytes + InputBlockSize(s) <= max_length);\r
- /* If block splitting is not used, then flush as soon as there is some\r
- amount of commands / literals produced. */\r
- const BROTLI_BOOL should_flush = TO_BROTLI_BOOL(\r
- s->params.quality < MIN_QUALITY_FOR_BLOCK_SPLIT &&\r
- s->num_literals_ + s->num_commands_ >= MAX_NUM_DELAYED_SYMBOLS);\r
- if (!is_last && !force_flush && !should_flush &&\r
- next_input_fits_metablock &&\r
- s->num_literals_ < max_literals &&\r
- s->num_commands_ < max_commands) {\r
- /* Merge with next input block. Everything will happen later. */\r
- if (UpdateLastProcessedPos(s)) {\r
- HasherReset(s->hasher_);\r
- }\r
- *out_size = 0;\r
- return BROTLI_TRUE;\r
- }\r
- }\r
-\r
- /* Create the last insert-only command. */\r
- if (s->last_insert_len_ > 0) {\r
- InitInsertCommand(&s->commands_[s->num_commands_++], s->last_insert_len_);\r
- s->num_literals_ += s->last_insert_len_;\r
- s->last_insert_len_ = 0;\r
- }\r
-\r
- if (!is_last && s->input_pos_ == s->last_flush_pos_) {\r
- /* We have no new input data and we don't have to finish the stream, so\r
- nothing to do. */\r
- *out_size = 0;\r
- return BROTLI_TRUE;\r
- }\r
- BROTLI_DCHECK(s->input_pos_ >= s->last_flush_pos_);\r
- BROTLI_DCHECK(s->input_pos_ > s->last_flush_pos_ || is_last);\r
- BROTLI_DCHECK(s->input_pos_ - s->last_flush_pos_ <= 1u << 24);\r
- {\r
- const uint32_t metablock_size =\r
- (uint32_t)(s->input_pos_ - s->last_flush_pos_);\r
- uint8_t* storage = GetBrotliStorage(s, 2 * metablock_size + 503);\r
- size_t storage_ix = s->last_bytes_bits_;\r
- if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;\r
- storage[0] = (uint8_t)s->last_bytes_;\r
- storage[1] = (uint8_t)(s->last_bytes_ >> 8);\r
- WriteMetaBlockInternal(\r
- m, data, mask, s->last_flush_pos_, metablock_size, is_last,\r
- literal_context_mode, &s->params, s->prev_byte_, s->prev_byte2_,\r
- s->num_literals_, s->num_commands_, s->commands_, s->saved_dist_cache_,\r
- s->dist_cache_, &storage_ix, storage);\r
- if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;\r
- s->last_bytes_ = (uint16_t)(storage[storage_ix >> 3]);\r
- s->last_bytes_bits_ = storage_ix & 7u;\r
- s->last_flush_pos_ = s->input_pos_;\r
- if (UpdateLastProcessedPos(s)) {\r
- HasherReset(s->hasher_);\r
- }\r
- if (s->last_flush_pos_ > 0) {\r
- s->prev_byte_ = data[((uint32_t)s->last_flush_pos_ - 1) & mask];\r
- }\r
- if (s->last_flush_pos_ > 1) {\r
- s->prev_byte2_ = data[(uint32_t)(s->last_flush_pos_ - 2) & mask];\r
- }\r
- s->num_commands_ = 0;\r
- s->num_literals_ = 0;\r
- /* Save the state of the distance cache in case we need to restore it for\r
- emitting an uncompressed block. */\r
- memcpy(s->saved_dist_cache_, s->dist_cache_, sizeof(s->saved_dist_cache_));\r
- *output = &storage[0];\r
- *out_size = storage_ix >> 3;\r
- return BROTLI_TRUE;\r
- }\r
-}\r
-\r
-/* Dumps remaining output bits and metadata header to |header|.\r
- Returns number of produced bytes.\r
- REQUIRED: |header| should be 8-byte aligned and at least 16 bytes long.\r
- REQUIRED: |block_size| <= (1 << 24). */\r
-static size_t WriteMetadataHeader(\r
- BrotliEncoderState* s, const size_t block_size, uint8_t* header) {\r
- size_t storage_ix;\r
- storage_ix = s->last_bytes_bits_;\r
- header[0] = (uint8_t)s->last_bytes_;\r
- header[1] = (uint8_t)(s->last_bytes_ >> 8);\r
- s->last_bytes_ = 0;\r
- s->last_bytes_bits_ = 0;\r
-\r
- BrotliWriteBits(1, 0, &storage_ix, header);\r
- BrotliWriteBits(2, 3, &storage_ix, header);\r
- BrotliWriteBits(1, 0, &storage_ix, header);\r
- if (block_size == 0) {\r
- BrotliWriteBits(2, 0, &storage_ix, header);\r
- } else {\r
- uint32_t nbits = (block_size == 1) ? 0 :\r
- (Log2FloorNonZero((uint32_t)block_size - 1) + 1);\r
- uint32_t nbytes = (nbits + 7) / 8;\r
- BrotliWriteBits(2, nbytes, &storage_ix, header);\r
- BrotliWriteBits(8 * nbytes, block_size - 1, &storage_ix, header);\r
- }\r
- return (storage_ix + 7u) >> 3;\r
-}\r
-\r
-static BROTLI_BOOL BrotliCompressBufferQuality10(\r
- int lgwin, size_t input_size, const uint8_t* input_buffer,\r
- size_t* encoded_size, uint8_t* encoded_buffer) {\r
- MemoryManager memory_manager;\r
- MemoryManager* m = &memory_manager;\r
-\r
- const size_t mask = BROTLI_SIZE_MAX >> 1;\r
- const size_t max_backward_limit = BROTLI_MAX_BACKWARD_LIMIT(lgwin);\r
- int dist_cache[4] = { 4, 11, 15, 16 };\r
- int saved_dist_cache[4] = { 4, 11, 15, 16 };\r
- BROTLI_BOOL ok = BROTLI_TRUE;\r
- const size_t max_out_size = *encoded_size;\r
- size_t total_out_size = 0;\r
- uint16_t last_bytes;\r
- uint8_t last_bytes_bits;\r
- HasherHandle hasher = NULL;\r
-\r
- const size_t hasher_eff_size =\r
- BROTLI_MIN(size_t, input_size, max_backward_limit + BROTLI_WINDOW_GAP);\r
-\r
- BrotliEncoderParams params;\r
-\r
- const int lgmetablock = BROTLI_MIN(int, 24, lgwin + 1);\r
- size_t max_block_size;\r
- const size_t max_metablock_size = (size_t)1 << lgmetablock;\r
- const size_t max_literals_per_metablock = max_metablock_size / 8;\r
- const size_t max_commands_per_metablock = max_metablock_size / 8;\r
- size_t metablock_start = 0;\r
- uint8_t prev_byte = 0;\r
- uint8_t prev_byte2 = 0;\r
-\r
- BrotliEncoderInitParams(¶ms);\r
- params.quality = 10;\r
- params.lgwin = lgwin;\r
- if (lgwin > BROTLI_MAX_WINDOW_BITS) {\r
- params.large_window = BROTLI_TRUE;\r
- }\r
- SanitizeParams(¶ms);\r
- params.lgblock = ComputeLgBlock(¶ms);\r
- ChooseDistanceParams(¶ms);\r
- max_block_size = (size_t)1 << params.lgblock;\r
-\r
- BrotliInitMemoryManager(m, 0, 0, 0);\r
-\r
- BROTLI_DCHECK(input_size <= mask + 1);\r
- EncodeWindowBits(lgwin, params.large_window, &last_bytes, &last_bytes_bits);\r
- InitOrStitchToPreviousBlock(m, &hasher, input_buffer, mask, ¶ms,\r
- 0, hasher_eff_size, BROTLI_TRUE);\r
- if (BROTLI_IS_OOM(m)) goto oom;\r
-\r
- while (ok && metablock_start < input_size) {\r
- const size_t metablock_end =\r
- BROTLI_MIN(size_t, input_size, metablock_start + max_metablock_size);\r
- const size_t expected_num_commands =\r
- (metablock_end - metablock_start) / 12 + 16;\r
- Command* commands = 0;\r
- size_t num_commands = 0;\r
- size_t last_insert_len = 0;\r
- size_t num_literals = 0;\r
- size_t metablock_size = 0;\r
- size_t cmd_alloc_size = 0;\r
- BROTLI_BOOL is_last;\r
- uint8_t* storage;\r
- size_t storage_ix;\r
-\r
- ContextType literal_context_mode = ChooseContextMode(¶ms,\r
- input_buffer, metablock_start, mask, metablock_end - metablock_start);\r
-\r
- size_t block_start;\r
- for (block_start = metablock_start; block_start < metablock_end; ) {\r
- size_t block_size =\r
- BROTLI_MIN(size_t, metablock_end - block_start, max_block_size);\r
- ZopfliNode* nodes = BROTLI_ALLOC(m, ZopfliNode, block_size + 1);\r
- size_t path_size;\r
- size_t new_cmd_alloc_size;\r
- if (BROTLI_IS_OOM(m)) goto oom;\r
- BrotliInitZopfliNodes(nodes, block_size + 1);\r
- StitchToPreviousBlockH10(hasher, block_size, block_start,\r
- input_buffer, mask);\r
- path_size = BrotliZopfliComputeShortestPath(m,\r
- block_size, block_start, input_buffer, mask, ¶ms,\r
- max_backward_limit, dist_cache, hasher, nodes);\r
- if (BROTLI_IS_OOM(m)) goto oom;\r
- /* We allocate a command buffer in the first iteration of this loop that\r
- will be likely big enough for the whole metablock, so that for most\r
- inputs we will not have to reallocate in later iterations. We do the\r
- allocation here and not before the loop, because if the input is small,\r
- this will be allocated after the Zopfli cost model is freed, so this\r
- will not increase peak memory usage.\r
- TODO: If the first allocation is too small, increase command\r
- buffer size exponentially. */\r
- new_cmd_alloc_size = BROTLI_MAX(size_t, expected_num_commands,\r
- num_commands + path_size + 1);\r
- if (cmd_alloc_size != new_cmd_alloc_size) {\r
- Command* new_commands = BROTLI_ALLOC(m, Command, new_cmd_alloc_size);\r
- if (BROTLI_IS_OOM(m)) goto oom;\r
- cmd_alloc_size = new_cmd_alloc_size;\r
- if (commands) {\r
- memcpy(new_commands, commands, sizeof(Command) * num_commands);\r
- BROTLI_FREE(m, commands);\r
- }\r
- commands = new_commands;\r
- }\r
- BrotliZopfliCreateCommands(block_size, block_start, max_backward_limit,\r
- &nodes[0], dist_cache, &last_insert_len,\r
- ¶ms, &commands[num_commands],\r
- &num_literals);\r
- num_commands += path_size;\r
- block_start += block_size;\r
- metablock_size += block_size;\r
- BROTLI_FREE(m, nodes);\r
- if (num_literals > max_literals_per_metablock ||\r
- num_commands > max_commands_per_metablock) {\r
- break;\r
- }\r
- }\r
-\r
- if (last_insert_len > 0) {\r
- InitInsertCommand(&commands[num_commands++], last_insert_len);\r
- num_literals += last_insert_len;\r
- }\r
-\r
- is_last = TO_BROTLI_BOOL(metablock_start + metablock_size == input_size);\r
- storage = NULL;\r
- storage_ix = last_bytes_bits;\r
-\r
- if (metablock_size == 0) {\r
- /* Write the ISLAST and ISEMPTY bits. */\r
- storage = BROTLI_ALLOC(m, uint8_t, 16);\r
- if (BROTLI_IS_OOM(m)) goto oom;\r
- storage[0] = (uint8_t)last_bytes;\r
- storage[1] = (uint8_t)(last_bytes >> 8);\r
- BrotliWriteBits(2, 3, &storage_ix, storage);\r
- storage_ix = (storage_ix + 7u) & ~7u;\r
- } else if (!ShouldCompress(input_buffer, mask, metablock_start,\r
- metablock_size, num_literals, num_commands)) {\r
- /* Restore the distance cache, as its last update by\r
- CreateBackwardReferences is now unused. */\r
- memcpy(dist_cache, saved_dist_cache, 4 * sizeof(dist_cache[0]));\r
- storage = BROTLI_ALLOC(m, uint8_t, metablock_size + 16);\r
- if (BROTLI_IS_OOM(m)) goto oom;\r
- storage[0] = (uint8_t)last_bytes;\r
- storage[1] = (uint8_t)(last_bytes >> 8);\r
- BrotliStoreUncompressedMetaBlock(is_last, input_buffer,\r
- metablock_start, mask, metablock_size,\r
- &storage_ix, storage);\r
- } else {\r
- MetaBlockSplit mb;\r
- BrotliEncoderParams block_params = params;\r
- InitMetaBlockSplit(&mb);\r
- BrotliBuildMetaBlock(m, input_buffer, metablock_start, mask,\r
- &block_params,\r
- prev_byte, prev_byte2,\r
- commands, num_commands,\r
- literal_context_mode,\r
- &mb);\r
- if (BROTLI_IS_OOM(m)) goto oom;\r
- {\r
- /* The number of distance symbols effectively used for distance\r
- histograms. It might be less than distance alphabet size\r
- for "Large Window Brotli" (32-bit). */\r
- uint32_t num_effective_dist_codes = block_params.dist.alphabet_size;\r
- if (num_effective_dist_codes > BROTLI_NUM_HISTOGRAM_DISTANCE_SYMBOLS) {\r
- num_effective_dist_codes = BROTLI_NUM_HISTOGRAM_DISTANCE_SYMBOLS;\r
- }\r
- BrotliOptimizeHistograms(num_effective_dist_codes, &mb);\r
- }\r
- storage = BROTLI_ALLOC(m, uint8_t, 2 * metablock_size + 503);\r
- if (BROTLI_IS_OOM(m)) goto oom;\r
- storage[0] = (uint8_t)last_bytes;\r
- storage[1] = (uint8_t)(last_bytes >> 8);\r
- BrotliStoreMetaBlock(m, input_buffer, metablock_start, metablock_size,\r
- mask, prev_byte, prev_byte2,\r
- is_last,\r
- &block_params,\r
- literal_context_mode,\r
- commands, num_commands,\r
- &mb,\r
- &storage_ix, storage);\r
- if (BROTLI_IS_OOM(m)) goto oom;\r
- if (metablock_size + 4 < (storage_ix >> 3)) {\r
- /* Restore the distance cache and last byte. */\r
- memcpy(dist_cache, saved_dist_cache, 4 * sizeof(dist_cache[0]));\r
- storage[0] = (uint8_t)last_bytes;\r
- storage[1] = (uint8_t)(last_bytes >> 8);\r
- storage_ix = last_bytes_bits;\r
- BrotliStoreUncompressedMetaBlock(is_last, input_buffer,\r
- metablock_start, mask,\r
- metablock_size, &storage_ix, storage);\r
- }\r
- DestroyMetaBlockSplit(m, &mb);\r
- }\r
- last_bytes = (uint16_t)(storage[storage_ix >> 3]);\r
- last_bytes_bits = storage_ix & 7u;\r
- metablock_start += metablock_size;\r
- if (metablock_start < input_size) {\r
- prev_byte = input_buffer[metablock_start - 1];\r
- prev_byte2 = input_buffer[metablock_start - 2];\r
- }\r
- /* Save the state of the distance cache in case we need to restore it for\r
- emitting an uncompressed block. */\r
- memcpy(saved_dist_cache, dist_cache, 4 * sizeof(dist_cache[0]));\r
-\r
- {\r
- const size_t out_size = storage_ix >> 3;\r
- total_out_size += out_size;\r
- if (total_out_size <= max_out_size) {\r
- memcpy(encoded_buffer, storage, out_size);\r
- encoded_buffer += out_size;\r
- } else {\r
- ok = BROTLI_FALSE;\r
- }\r
- }\r
- BROTLI_FREE(m, storage);\r
- BROTLI_FREE(m, commands);\r
- }\r
-\r
- *encoded_size = total_out_size;\r
- DestroyHasher(m, &hasher);\r
- return ok;\r
-\r
-oom:\r
- BrotliWipeOutMemoryManager(m);\r
- return BROTLI_FALSE;\r
-}\r
-\r
-size_t BrotliEncoderMaxCompressedSize(size_t input_size) {\r
- /* [window bits / empty metadata] + N * [uncompressed] + [last empty] */\r
- size_t num_large_blocks = input_size >> 14;\r
- size_t overhead = 2 + (4 * num_large_blocks) + 3 + 1;\r
- size_t result = input_size + overhead;\r
- if (input_size == 0) return 2;\r
- return (result < input_size) ? 0 : result;\r
-}\r
-\r
-/* Wraps data to uncompressed brotli stream with minimal window size.\r
- |output| should point at region with at least BrotliEncoderMaxCompressedSize\r
- addressable bytes.\r
- Returns the length of stream. */\r
-static size_t MakeUncompressedStream(\r
- const uint8_t* input, size_t input_size, uint8_t* output) {\r
- size_t size = input_size;\r
- size_t result = 0;\r
- size_t offset = 0;\r
- if (input_size == 0) {\r
- output[0] = 6;\r
- return 1;\r
- }\r
- output[result++] = 0x21; /* window bits = 10, is_last = false */\r
- output[result++] = 0x03; /* empty metadata, padding */\r
- while (size > 0) {\r
- uint32_t nibbles = 0;\r
- uint32_t chunk_size;\r
- uint32_t bits;\r
- chunk_size = (size > (1u << 24)) ? (1u << 24) : (uint32_t)size;\r
- if (chunk_size > (1u << 16)) nibbles = (chunk_size > (1u << 20)) ? 2 : 1;\r
- bits =\r
- (nibbles << 1) | ((chunk_size - 1) << 3) | (1u << (19 + 4 * nibbles));\r
- output[result++] = (uint8_t)bits;\r
- output[result++] = (uint8_t)(bits >> 8);\r
- output[result++] = (uint8_t)(bits >> 16);\r
- if (nibbles == 2) output[result++] = (uint8_t)(bits >> 24);\r
- memcpy(&output[result], &input[offset], chunk_size);\r
- result += chunk_size;\r
- offset += chunk_size;\r
- size -= chunk_size;\r
- }\r
- output[result++] = 3;\r
- return result;\r
-}\r
-\r
-BROTLI_BOOL BrotliEncoderCompress(\r
- int quality, int lgwin, BrotliEncoderMode mode, size_t input_size,\r
- const uint8_t* input_buffer, size_t* encoded_size,\r
- uint8_t* encoded_buffer) {\r
- BrotliEncoderState* s;\r
- size_t out_size = *encoded_size;\r
- const uint8_t* input_start = input_buffer;\r
- uint8_t* output_start = encoded_buffer;\r
- size_t max_out_size = BrotliEncoderMaxCompressedSize(input_size);\r
- if (out_size == 0) {\r
- /* Output buffer needs at least one byte. */\r
- return BROTLI_FALSE;\r
- }\r
- if (input_size == 0) {\r
- /* Handle the special case of empty input. */\r
- *encoded_size = 1;\r
- *encoded_buffer = 6;\r
- return BROTLI_TRUE;\r
- }\r
- if (quality == 10) {\r
- /* TODO: Implement this direct path for all quality levels. */\r
- const int lg_win = BROTLI_MIN(int, BROTLI_LARGE_MAX_WINDOW_BITS,\r
- BROTLI_MAX(int, 16, lgwin));\r
- int ok = BrotliCompressBufferQuality10(lg_win, input_size, input_buffer,\r
- encoded_size, encoded_buffer);\r
- if (!ok || (max_out_size && *encoded_size > max_out_size)) {\r
- goto fallback;\r
- }\r
- return BROTLI_TRUE;\r
- }\r
-\r
- s = BrotliEncoderCreateInstance(0, 0, 0);\r
- if (!s) {\r
- return BROTLI_FALSE;\r
- } else {\r
- size_t available_in = input_size;\r
- const uint8_t* next_in = input_buffer;\r
- size_t available_out = *encoded_size;\r
- uint8_t* next_out = encoded_buffer;\r
- size_t total_out = 0;\r
- BROTLI_BOOL result = BROTLI_FALSE;\r
- BrotliEncoderSetParameter(s, BROTLI_PARAM_QUALITY, (uint32_t)quality);\r
- BrotliEncoderSetParameter(s, BROTLI_PARAM_LGWIN, (uint32_t)lgwin);\r
- BrotliEncoderSetParameter(s, BROTLI_PARAM_MODE, (uint32_t)mode);\r
- BrotliEncoderSetParameter(s, BROTLI_PARAM_SIZE_HINT, (uint32_t)input_size);\r
- if (lgwin > BROTLI_MAX_WINDOW_BITS) {\r
- BrotliEncoderSetParameter(s, BROTLI_PARAM_LARGE_WINDOW, BROTLI_TRUE);\r
- }\r
- result = BrotliEncoderCompressStream(s, BROTLI_OPERATION_FINISH,\r
- &available_in, &next_in, &available_out, &next_out, &total_out);\r
- if (!BrotliEncoderIsFinished(s)) result = 0;\r
- *encoded_size = total_out;\r
- BrotliEncoderDestroyInstance(s);\r
- if (!result || (max_out_size && *encoded_size > max_out_size)) {\r
- goto fallback;\r
- }\r
- return BROTLI_TRUE;\r
- }\r
-fallback:\r
- *encoded_size = 0;\r
- if (!max_out_size) return BROTLI_FALSE;\r
- if (out_size >= max_out_size) {\r
- *encoded_size =\r
- MakeUncompressedStream(input_start, input_size, output_start);\r
- return BROTLI_TRUE;\r
- }\r
- return BROTLI_FALSE;\r
-}\r
-\r
-static void InjectBytePaddingBlock(BrotliEncoderState* s) {\r
- uint32_t seal = s->last_bytes_;\r
- size_t seal_bits = s->last_bytes_bits_;\r
- uint8_t* destination;\r
- s->last_bytes_ = 0;\r
- s->last_bytes_bits_ = 0;\r
- /* is_last = 0, data_nibbles = 11, reserved = 0, meta_nibbles = 00 */\r
- seal |= 0x6u << seal_bits;\r
- seal_bits += 6;\r
- /* If we have already created storage, then append to it.\r
- Storage is valid until next block is being compressed. */\r
- if (s->next_out_) {\r
- destination = s->next_out_ + s->available_out_;\r
- } else {\r
- destination = s->tiny_buf_.u8;\r
- s->next_out_ = destination;\r
- }\r
- destination[0] = (uint8_t)seal;\r
- if (seal_bits > 8) destination[1] = (uint8_t)(seal >> 8);\r
- if (seal_bits > 16) destination[2] = (uint8_t)(seal >> 16);\r
- s->available_out_ += (seal_bits + 7) >> 3;\r
-}\r
-\r
-/* Injects padding bits or pushes compressed data to output.\r
- Returns false if nothing is done. */\r
-static BROTLI_BOOL InjectFlushOrPushOutput(BrotliEncoderState* s,\r
- size_t* available_out, uint8_t** next_out, size_t* total_out) {\r
- if (s->stream_state_ == BROTLI_STREAM_FLUSH_REQUESTED &&\r
- s->last_bytes_bits_ != 0) {\r
- InjectBytePaddingBlock(s);\r
- return BROTLI_TRUE;\r
- }\r
-\r
- if (s->available_out_ != 0 && *available_out != 0) {\r
- size_t copy_output_size =\r
- BROTLI_MIN(size_t, s->available_out_, *available_out);\r
- memcpy(*next_out, s->next_out_, copy_output_size);\r
- *next_out += copy_output_size;\r
- *available_out -= copy_output_size;\r
- s->next_out_ += copy_output_size;\r
- s->available_out_ -= copy_output_size;\r
- s->total_out_ += copy_output_size;\r
- if (total_out) *total_out = s->total_out_;\r
- return BROTLI_TRUE;\r
- }\r
-\r
- return BROTLI_FALSE;\r
-}\r
-\r
-static void CheckFlushComplete(BrotliEncoderState* s) {\r
- if (s->stream_state_ == BROTLI_STREAM_FLUSH_REQUESTED &&\r
- s->available_out_ == 0) {\r
- s->stream_state_ = BROTLI_STREAM_PROCESSING;\r
- s->next_out_ = 0;\r
- }\r
-}\r
-\r
-static BROTLI_BOOL BrotliEncoderCompressStreamFast(\r
- BrotliEncoderState* s, BrotliEncoderOperation op, size_t* available_in,\r
- const uint8_t** next_in, size_t* available_out, uint8_t** next_out,\r
- size_t* total_out) {\r
- const size_t block_size_limit = (size_t)1 << s->params.lgwin;\r
- const size_t buf_size = BROTLI_MIN(size_t, kCompressFragmentTwoPassBlockSize,\r
- BROTLI_MIN(size_t, *available_in, block_size_limit));\r
- uint32_t* tmp_command_buf = NULL;\r
- uint32_t* command_buf = NULL;\r
- uint8_t* tmp_literal_buf = NULL;\r
- uint8_t* literal_buf = NULL;\r
- MemoryManager* m = &s->memory_manager_;\r
- if (s->params.quality != FAST_ONE_PASS_COMPRESSION_QUALITY &&\r
- s->params.quality != FAST_TWO_PASS_COMPRESSION_QUALITY) {\r
- return BROTLI_FALSE;\r
- }\r
- if (s->params.quality == FAST_TWO_PASS_COMPRESSION_QUALITY) {\r
- if (!s->command_buf_ && buf_size == kCompressFragmentTwoPassBlockSize) {\r
- s->command_buf_ =\r
- BROTLI_ALLOC(m, uint32_t, kCompressFragmentTwoPassBlockSize);\r
- s->literal_buf_ =\r
- BROTLI_ALLOC(m, uint8_t, kCompressFragmentTwoPassBlockSize);\r
- if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;\r
- }\r
- if (s->command_buf_) {\r
- command_buf = s->command_buf_;\r
- literal_buf = s->literal_buf_;\r
- } else {\r
- tmp_command_buf = BROTLI_ALLOC(m, uint32_t, buf_size);\r
- tmp_literal_buf = BROTLI_ALLOC(m, uint8_t, buf_size);\r
- if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;\r
- command_buf = tmp_command_buf;\r
- literal_buf = tmp_literal_buf;\r
- }\r
- }\r
-\r
- while (BROTLI_TRUE) {\r
- if (InjectFlushOrPushOutput(s, available_out, next_out, total_out)) {\r
- continue;\r
- }\r
-\r
- /* Compress block only when internal output buffer is empty, stream is not\r
- finished, there is no pending flush request, and there is either\r
- additional input or pending operation. */\r
- if (s->available_out_ == 0 &&\r
- s->stream_state_ == BROTLI_STREAM_PROCESSING &&\r
- (*available_in != 0 || op != BROTLI_OPERATION_PROCESS)) {\r
- size_t block_size = BROTLI_MIN(size_t, block_size_limit, *available_in);\r
- BROTLI_BOOL is_last =\r
- (*available_in == block_size) && (op == BROTLI_OPERATION_FINISH);\r
- BROTLI_BOOL force_flush =\r
- (*available_in == block_size) && (op == BROTLI_OPERATION_FLUSH);\r
- size_t max_out_size = 2 * block_size + 503;\r
- BROTLI_BOOL inplace = BROTLI_TRUE;\r
- uint8_t* storage = NULL;\r
- size_t storage_ix = s->last_bytes_bits_;\r
- size_t table_size;\r
- int* table;\r
-\r
- if (force_flush && block_size == 0) {\r
- s->stream_state_ = BROTLI_STREAM_FLUSH_REQUESTED;\r
- continue;\r
- }\r
- if (max_out_size <= *available_out) {\r
- storage = *next_out;\r
- } else {\r
- inplace = BROTLI_FALSE;\r
- storage = GetBrotliStorage(s, max_out_size);\r
- if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;\r
- }\r
- storage[0] = (uint8_t)s->last_bytes_;\r
- storage[1] = (uint8_t)(s->last_bytes_ >> 8);\r
- table = GetHashTable(s, s->params.quality, block_size, &table_size);\r
- if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;\r
-\r
- if (s->params.quality == FAST_ONE_PASS_COMPRESSION_QUALITY) {\r
- BrotliCompressFragmentFast(m, *next_in, block_size, is_last, table,\r
- table_size, s->cmd_depths_, s->cmd_bits_, &s->cmd_code_numbits_,\r
- s->cmd_code_, &storage_ix, storage);\r
- if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;\r
- } else {\r
- BrotliCompressFragmentTwoPass(m, *next_in, block_size, is_last,\r
- command_buf, literal_buf, table, table_size,\r
- &storage_ix, storage);\r
- if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;\r
- }\r
- *next_in += block_size;\r
- *available_in -= block_size;\r
- if (inplace) {\r
- size_t out_bytes = storage_ix >> 3;\r
- BROTLI_DCHECK(out_bytes <= *available_out);\r
- BROTLI_DCHECK((storage_ix & 7) == 0 || out_bytes < *available_out);\r
- *next_out += out_bytes;\r
- *available_out -= out_bytes;\r
- s->total_out_ += out_bytes;\r
- if (total_out) *total_out = s->total_out_;\r
- } else {\r
- size_t out_bytes = storage_ix >> 3;\r
- s->next_out_ = storage;\r
- s->available_out_ = out_bytes;\r
- }\r
- s->last_bytes_ = (uint16_t)(storage[storage_ix >> 3]);\r
- s->last_bytes_bits_ = storage_ix & 7u;\r
-\r
- if (force_flush) s->stream_state_ = BROTLI_STREAM_FLUSH_REQUESTED;\r
- if (is_last) s->stream_state_ = BROTLI_STREAM_FINISHED;\r
- continue;\r
- }\r
- break;\r
- }\r
- BROTLI_FREE(m, tmp_command_buf);\r
- BROTLI_FREE(m, tmp_literal_buf);\r
- CheckFlushComplete(s);\r
- return BROTLI_TRUE;\r
-}\r
-\r
-static BROTLI_BOOL ProcessMetadata(\r
- BrotliEncoderState* s, size_t* available_in, const uint8_t** next_in,\r
- size_t* available_out, uint8_t** next_out, size_t* total_out) {\r
- if (*available_in > (1u << 24)) return BROTLI_FALSE;\r
- /* Switch to metadata block workflow, if required. */\r
- if (s->stream_state_ == BROTLI_STREAM_PROCESSING) {\r
- s->remaining_metadata_bytes_ = (uint32_t)*available_in;\r
- s->stream_state_ = BROTLI_STREAM_METADATA_HEAD;\r
- }\r
- if (s->stream_state_ != BROTLI_STREAM_METADATA_HEAD &&\r
- s->stream_state_ != BROTLI_STREAM_METADATA_BODY) {\r
- return BROTLI_FALSE;\r
- }\r
-\r
- while (BROTLI_TRUE) {\r
- if (InjectFlushOrPushOutput(s, available_out, next_out, total_out)) {\r
- continue;\r
- }\r
- if (s->available_out_ != 0) break;\r
-\r
- if (s->input_pos_ != s->last_flush_pos_) {\r
- BROTLI_BOOL result = EncodeData(s, BROTLI_FALSE, BROTLI_TRUE,\r
- &s->available_out_, &s->next_out_);\r
- if (!result) return BROTLI_FALSE;\r
- continue;\r
- }\r
-\r
- if (s->stream_state_ == BROTLI_STREAM_METADATA_HEAD) {\r
- s->next_out_ = s->tiny_buf_.u8;\r
- s->available_out_ =\r
- WriteMetadataHeader(s, s->remaining_metadata_bytes_, s->next_out_);\r
- s->stream_state_ = BROTLI_STREAM_METADATA_BODY;\r
- continue;\r
- } else {\r
- /* Exit workflow only when there is no more input and no more output.\r
- Otherwise client may continue producing empty metadata blocks. */\r
- if (s->remaining_metadata_bytes_ == 0) {\r
- s->remaining_metadata_bytes_ = BROTLI_UINT32_MAX;\r
- s->stream_state_ = BROTLI_STREAM_PROCESSING;\r
- break;\r
- }\r
- if (*available_out) {\r
- /* Directly copy input to output. */\r
- uint32_t copy = (uint32_t)BROTLI_MIN(\r
- size_t, s->remaining_metadata_bytes_, *available_out);\r
- memcpy(*next_out, *next_in, copy);\r
- *next_in += copy;\r
- *available_in -= copy;\r
- s->remaining_metadata_bytes_ -= copy;\r
- *next_out += copy;\r
- *available_out -= copy;\r
- } else {\r
- /* This guarantees progress in "TakeOutput" workflow. */\r
- uint32_t copy = BROTLI_MIN(uint32_t, s->remaining_metadata_bytes_, 16);\r
- s->next_out_ = s->tiny_buf_.u8;\r
- memcpy(s->next_out_, *next_in, copy);\r
- *next_in += copy;\r
- *available_in -= copy;\r
- s->remaining_metadata_bytes_ -= copy;\r
- s->available_out_ = copy;\r
- }\r
- continue;\r
- }\r
- }\r
-\r
- return BROTLI_TRUE;\r
-}\r
-\r
-static void UpdateSizeHint(BrotliEncoderState* s, size_t available_in) {\r
- if (s->params.size_hint == 0) {\r
- uint64_t delta = UnprocessedInputSize(s);\r
- uint64_t tail = available_in;\r
- uint32_t limit = 1u << 30;\r
- uint32_t total;\r
- if ((delta >= limit) || (tail >= limit) || ((delta + tail) >= limit)) {\r
- total = limit;\r
- } else {\r
- total = (uint32_t)(delta + tail);\r
- }\r
- s->params.size_hint = total;\r
- }\r
-}\r
-\r
-BROTLI_BOOL BrotliEncoderCompressStream(\r
- BrotliEncoderState* s, BrotliEncoderOperation op, size_t* available_in,\r
- const uint8_t** next_in, size_t* available_out,uint8_t** next_out,\r
- size_t* total_out) {\r
- if (!EnsureInitialized(s)) return BROTLI_FALSE;\r
-\r
- /* Unfinished metadata block; check requirements. */\r
- if (s->remaining_metadata_bytes_ != BROTLI_UINT32_MAX) {\r
- if (*available_in != s->remaining_metadata_bytes_) return BROTLI_FALSE;\r
- if (op != BROTLI_OPERATION_EMIT_METADATA) return BROTLI_FALSE;\r
- }\r
-\r
- if (op == BROTLI_OPERATION_EMIT_METADATA) {\r
- UpdateSizeHint(s, 0); /* First data metablock might be emitted here. */\r
- return ProcessMetadata(\r
- s, available_in, next_in, available_out, next_out, total_out);\r
- }\r
-\r
- if (s->stream_state_ == BROTLI_STREAM_METADATA_HEAD ||\r
- s->stream_state_ == BROTLI_STREAM_METADATA_BODY) {\r
- return BROTLI_FALSE;\r
- }\r
-\r
- if (s->stream_state_ != BROTLI_STREAM_PROCESSING && *available_in != 0) {\r
- return BROTLI_FALSE;\r
- }\r
- if (s->params.quality == FAST_ONE_PASS_COMPRESSION_QUALITY ||\r
- s->params.quality == FAST_TWO_PASS_COMPRESSION_QUALITY) {\r
- return BrotliEncoderCompressStreamFast(s, op, available_in, next_in,\r
- available_out, next_out, total_out);\r
- }\r
- while (BROTLI_TRUE) {\r
- size_t remaining_block_size = RemainingInputBlockSize(s);\r
-\r
- if (remaining_block_size != 0 && *available_in != 0) {\r
- size_t copy_input_size =\r
- BROTLI_MIN(size_t, remaining_block_size, *available_in);\r
- CopyInputToRingBuffer(s, copy_input_size, *next_in);\r
- *next_in += copy_input_size;\r
- *available_in -= copy_input_size;\r
- continue;\r
- }\r
-\r
- if (InjectFlushOrPushOutput(s, available_out, next_out, total_out)) {\r
- continue;\r
- }\r
-\r
- /* Compress data only when internal output buffer is empty, stream is not\r
- finished and there is no pending flush request. */\r
- if (s->available_out_ == 0 &&\r
- s->stream_state_ == BROTLI_STREAM_PROCESSING) {\r
- if (remaining_block_size == 0 || op != BROTLI_OPERATION_PROCESS) {\r
- BROTLI_BOOL is_last = TO_BROTLI_BOOL(\r
- (*available_in == 0) && op == BROTLI_OPERATION_FINISH);\r
- BROTLI_BOOL force_flush = TO_BROTLI_BOOL(\r
- (*available_in == 0) && op == BROTLI_OPERATION_FLUSH);\r
- BROTLI_BOOL result;\r
- UpdateSizeHint(s, *available_in);\r
- result = EncodeData(s, is_last, force_flush,\r
- &s->available_out_, &s->next_out_);\r
- if (!result) return BROTLI_FALSE;\r
- if (force_flush) s->stream_state_ = BROTLI_STREAM_FLUSH_REQUESTED;\r
- if (is_last) s->stream_state_ = BROTLI_STREAM_FINISHED;\r
- continue;\r
- }\r
- }\r
- break;\r
- }\r
- CheckFlushComplete(s);\r
- return BROTLI_TRUE;\r
-}\r
-\r
-BROTLI_BOOL BrotliEncoderIsFinished(BrotliEncoderState* s) {\r
- return TO_BROTLI_BOOL(s->stream_state_ == BROTLI_STREAM_FINISHED &&\r
- !BrotliEncoderHasMoreOutput(s));\r
-}\r
-\r
-BROTLI_BOOL BrotliEncoderHasMoreOutput(BrotliEncoderState* s) {\r
- return TO_BROTLI_BOOL(s->available_out_ != 0);\r
-}\r
-\r
-const uint8_t* BrotliEncoderTakeOutput(BrotliEncoderState* s, size_t* size) {\r
- size_t consumed_size = s->available_out_;\r
- uint8_t* result = s->next_out_;\r
- if (*size) {\r
- consumed_size = BROTLI_MIN(size_t, *size, s->available_out_);\r
- }\r
- if (consumed_size) {\r
- s->next_out_ += consumed_size;\r
- s->available_out_ -= consumed_size;\r
- s->total_out_ += consumed_size;\r
- CheckFlushComplete(s);\r
- *size = consumed_size;\r
- } else {\r
- *size = 0;\r
- result = 0;\r
- }\r
- return result;\r
-}\r
-\r
-uint32_t BrotliEncoderVersion(void) {\r
- return BROTLI_VERSION;\r
-}\r
-\r
-#if defined(__cplusplus) || defined(c_plusplus)\r
-} /* extern "C" */\r
-#endif\r