+++ /dev/null
-/* Copyright 2010 Google Inc. All Rights Reserved.\r
-\r
- Distributed under MIT license.\r
- See file LICENSE for detail or copy at https://opensource.org/licenses/MIT\r
-*/\r
-\r
-/* A (forgetful) hash table to the data seen by the compressor, to\r
- help create backward references to previous data. */\r
-\r
-#ifndef BROTLI_ENC_HASH_H_\r
-#define BROTLI_ENC_HASH_H_\r
-\r
-#include <string.h> /* memcmp, memset */\r
-\r
-#include "../common/constants.h"\r
-#include "../common/dictionary.h"\r
-#include "../common/platform.h"\r
-#include <brotli/types.h>\r
-#include "./encoder_dict.h"\r
-#include "./fast_log.h"\r
-#include "./find_match_length.h"\r
-#include "./memory.h"\r
-#include "./quality.h"\r
-#include "./static_dict.h"\r
-\r
-#if defined(__cplusplus) || defined(c_plusplus)\r
-extern "C" {\r
-#endif\r
-\r
-/* Pointer to hasher data.\r
- *\r
- * Excluding initialization and destruction, hasher can be passed as\r
- * HasherHandle by value.\r
- *\r
- * Typically hasher data consists of 3 sections:\r
- * * HasherCommon structure\r
- * * private structured hasher data, depending on hasher type\r
- * * private dynamic hasher data, depending on hasher type and parameters\r
- *\r
- * Using "define" instead of "typedef", because on MSVC __restrict does not work\r
- * on typedef pointer types. */\r
-#define HasherHandle uint8_t*\r
-\r
-typedef struct {\r
- BrotliHasherParams params;\r
-\r
- /* False if hasher needs to be "prepared" before use. */\r
- BROTLI_BOOL is_prepared_;\r
-\r
- size_t dict_num_lookups;\r
- size_t dict_num_matches;\r
-} HasherCommon;\r
-\r
-static BROTLI_INLINE HasherCommon* GetHasherCommon(HasherHandle handle) {\r
- return (HasherCommon*)handle;\r
-}\r
-\r
-#define score_t size_t\r
-\r
-static const uint32_t kCutoffTransformsCount = 10;\r
-/* 0, 12, 27, 23, 42, 63, 56, 48, 59, 64 */\r
-/* 0+0, 4+8, 8+19, 12+11, 16+26, 20+43, 24+32, 28+20, 32+27, 36+28 */\r
-static const uint64_t kCutoffTransforms =\r
- BROTLI_MAKE_UINT64_T(0x071B520A, 0xDA2D3200);\r
-\r
-typedef struct HasherSearchResult {\r
- size_t len;\r
- size_t distance;\r
- score_t score;\r
- int len_code_delta; /* == len_code - len */\r
-} HasherSearchResult;\r
-\r
-/* kHashMul32 multiplier has these properties:\r
- * The multiplier must be odd. Otherwise we may lose the highest bit.\r
- * No long streaks of ones or zeros.\r
- * There is no effort to ensure that it is a prime, the oddity is enough\r
- for this use.\r
- * The number has been tuned heuristically against compression benchmarks. */\r
-static const uint32_t kHashMul32 = 0x1E35A7BD;\r
-static const uint64_t kHashMul64 = BROTLI_MAKE_UINT64_T(0x1E35A7BD, 0x1E35A7BD);\r
-static const uint64_t kHashMul64Long =\r
- BROTLI_MAKE_UINT64_T(0x1FE35A7Bu, 0xD3579BD3u);\r
-\r
-static BROTLI_INLINE uint32_t Hash14(const uint8_t* data) {\r
- uint32_t h = BROTLI_UNALIGNED_LOAD32LE(data) * kHashMul32;\r
- /* The higher bits contain more mixture from the multiplication,\r
- so we take our results from there. */\r
- return h >> (32 - 14);\r
-}\r
-\r
-static BROTLI_INLINE void PrepareDistanceCache(\r
- int* BROTLI_RESTRICT distance_cache, const int num_distances) {\r
- if (num_distances > 4) {\r
- int last_distance = distance_cache[0];\r
- distance_cache[4] = last_distance - 1;\r
- distance_cache[5] = last_distance + 1;\r
- distance_cache[6] = last_distance - 2;\r
- distance_cache[7] = last_distance + 2;\r
- distance_cache[8] = last_distance - 3;\r
- distance_cache[9] = last_distance + 3;\r
- if (num_distances > 10) {\r
- int next_last_distance = distance_cache[1];\r
- distance_cache[10] = next_last_distance - 1;\r
- distance_cache[11] = next_last_distance + 1;\r
- distance_cache[12] = next_last_distance - 2;\r
- distance_cache[13] = next_last_distance + 2;\r
- distance_cache[14] = next_last_distance - 3;\r
- distance_cache[15] = next_last_distance + 3;\r
- }\r
- }\r
-}\r
-\r
-#define BROTLI_LITERAL_BYTE_SCORE 135\r
-#define BROTLI_DISTANCE_BIT_PENALTY 30\r
-/* Score must be positive after applying maximal penalty. */\r
-#define BROTLI_SCORE_BASE (BROTLI_DISTANCE_BIT_PENALTY * 8 * sizeof(size_t))\r
-\r
-/* Usually, we always choose the longest backward reference. This function\r
- allows for the exception of that rule.\r
-\r
- If we choose a backward reference that is further away, it will\r
- usually be coded with more bits. We approximate this by assuming\r
- log2(distance). If the distance can be expressed in terms of the\r
- last four distances, we use some heuristic constants to estimate\r
- the bits cost. For the first up to four literals we use the bit\r
- cost of the literals from the literal cost model, after that we\r
- use the average bit cost of the cost model.\r
-\r
- This function is used to sometimes discard a longer backward reference\r
- when it is not much longer and the bit cost for encoding it is more\r
- than the saved literals.\r
-\r
- backward_reference_offset MUST be positive. */\r
-static BROTLI_INLINE score_t BackwardReferenceScore(\r
- size_t copy_length, size_t backward_reference_offset) {\r
- return BROTLI_SCORE_BASE + BROTLI_LITERAL_BYTE_SCORE * (score_t)copy_length -\r
- BROTLI_DISTANCE_BIT_PENALTY * Log2FloorNonZero(backward_reference_offset);\r
-}\r
-\r
-static BROTLI_INLINE score_t BackwardReferenceScoreUsingLastDistance(\r
- size_t copy_length) {\r
- return BROTLI_LITERAL_BYTE_SCORE * (score_t)copy_length +\r
- BROTLI_SCORE_BASE + 15;\r
-}\r
-\r
-static BROTLI_INLINE score_t BackwardReferencePenaltyUsingLastDistance(\r
- size_t distance_short_code) {\r
- return (score_t)39 + ((0x1CA10 >> (distance_short_code & 0xE)) & 0xE);\r
-}\r
-\r
-static BROTLI_INLINE BROTLI_BOOL TestStaticDictionaryItem(\r
- const BrotliEncoderDictionary* dictionary, size_t item, const uint8_t* data,\r
- size_t max_length, size_t max_backward, size_t max_distance,\r
- HasherSearchResult* out) {\r
- size_t len;\r
- size_t word_idx;\r
- size_t offset;\r
- size_t matchlen;\r
- size_t backward;\r
- score_t score;\r
- len = item & 0x1F;\r
- word_idx = item >> 5;\r
- offset = dictionary->words->offsets_by_length[len] + len * word_idx;\r
- if (len > max_length) {\r
- return BROTLI_FALSE;\r
- }\r
-\r
- matchlen =\r
- FindMatchLengthWithLimit(data, &dictionary->words->data[offset], len);\r
- if (matchlen + dictionary->cutoffTransformsCount <= len || matchlen == 0) {\r
- return BROTLI_FALSE;\r
- }\r
- {\r
- size_t cut = len - matchlen;\r
- size_t transform_id = (cut << 2) +\r
- (size_t)((dictionary->cutoffTransforms >> (cut * 6)) & 0x3F);\r
- backward = max_backward + 1 + word_idx +\r
- (transform_id << dictionary->words->size_bits_by_length[len]);\r
- }\r
- if (backward > max_distance) {\r
- return BROTLI_FALSE;\r
- }\r
- score = BackwardReferenceScore(matchlen, backward);\r
- if (score < out->score) {\r
- return BROTLI_FALSE;\r
- }\r
- out->len = matchlen;\r
- out->len_code_delta = (int)len - (int)matchlen;\r
- out->distance = backward;\r
- out->score = score;\r
- return BROTLI_TRUE;\r
-}\r
-\r
-static BROTLI_INLINE void SearchInStaticDictionary(\r
- const BrotliEncoderDictionary* dictionary,\r
- HasherHandle handle, const uint8_t* data, size_t max_length,\r
- size_t max_backward, size_t max_distance,\r
- HasherSearchResult* out, BROTLI_BOOL shallow) {\r
- size_t key;\r
- size_t i;\r
- HasherCommon* self = GetHasherCommon(handle);\r
- if (self->dict_num_matches < (self->dict_num_lookups >> 7)) {\r
- return;\r
- }\r
- key = Hash14(data) << 1;\r
- for (i = 0; i < (shallow ? 1u : 2u); ++i, ++key) {\r
- size_t item = dictionary->hash_table[key];\r
- self->dict_num_lookups++;\r
- if (item != 0) {\r
- BROTLI_BOOL item_matches = TestStaticDictionaryItem(\r
- dictionary, item, data, max_length, max_backward, max_distance, out);\r
- if (item_matches) {\r
- self->dict_num_matches++;\r
- }\r
- }\r
- }\r
-}\r
-\r
-typedef struct BackwardMatch {\r
- uint32_t distance;\r
- uint32_t length_and_code;\r
-} BackwardMatch;\r
-\r
-static BROTLI_INLINE void InitBackwardMatch(BackwardMatch* self,\r
- size_t dist, size_t len) {\r
- self->distance = (uint32_t)dist;\r
- self->length_and_code = (uint32_t)(len << 5);\r
-}\r
-\r
-static BROTLI_INLINE void InitDictionaryBackwardMatch(BackwardMatch* self,\r
- size_t dist, size_t len, size_t len_code) {\r
- self->distance = (uint32_t)dist;\r
- self->length_and_code =\r
- (uint32_t)((len << 5) | (len == len_code ? 0 : len_code));\r
-}\r
-\r
-static BROTLI_INLINE size_t BackwardMatchLength(const BackwardMatch* self) {\r
- return self->length_and_code >> 5;\r
-}\r
-\r
-static BROTLI_INLINE size_t BackwardMatchLengthCode(const BackwardMatch* self) {\r
- size_t code = self->length_and_code & 31;\r
- return code ? code : BackwardMatchLength(self);\r
-}\r
-\r
-#define EXPAND_CAT(a, b) CAT(a, b)\r
-#define CAT(a, b) a ## b\r
-#define FN(X) EXPAND_CAT(X, HASHER())\r
-\r
-#define HASHER() H10\r
-#define BUCKET_BITS 17\r
-#define MAX_TREE_SEARCH_DEPTH 64\r
-#define MAX_TREE_COMP_LENGTH 128\r
-#include "./hash_to_binary_tree_inc.h" /* NOLINT(build/include) */\r
-#undef MAX_TREE_SEARCH_DEPTH\r
-#undef MAX_TREE_COMP_LENGTH\r
-#undef BUCKET_BITS\r
-#undef HASHER\r
-/* MAX_NUM_MATCHES == 64 + MAX_TREE_SEARCH_DEPTH */\r
-#define MAX_NUM_MATCHES_H10 128\r
-\r
-/* For BUCKET_SWEEP == 1, enabling the dictionary lookup makes compression\r
- a little faster (0.5% - 1%) and it compresses 0.15% better on small text\r
- and HTML inputs. */\r
-\r
-#define HASHER() H2\r
-#define BUCKET_BITS 16\r
-#define BUCKET_SWEEP 1\r
-#define HASH_LEN 5\r
-#define USE_DICTIONARY 1\r
-#include "./hash_longest_match_quickly_inc.h" /* NOLINT(build/include) */\r
-#undef BUCKET_SWEEP\r
-#undef USE_DICTIONARY\r
-#undef HASHER\r
-\r
-#define HASHER() H3\r
-#define BUCKET_SWEEP 2\r
-#define USE_DICTIONARY 0\r
-#include "./hash_longest_match_quickly_inc.h" /* NOLINT(build/include) */\r
-#undef USE_DICTIONARY\r
-#undef BUCKET_SWEEP\r
-#undef BUCKET_BITS\r
-#undef HASHER\r
-\r
-#define HASHER() H4\r
-#define BUCKET_BITS 17\r
-#define BUCKET_SWEEP 4\r
-#define USE_DICTIONARY 1\r
-#include "./hash_longest_match_quickly_inc.h" /* NOLINT(build/include) */\r
-#undef USE_DICTIONARY\r
-#undef HASH_LEN\r
-#undef BUCKET_SWEEP\r
-#undef BUCKET_BITS\r
-#undef HASHER\r
-\r
-#define HASHER() H5\r
-#include "./hash_longest_match_inc.h" /* NOLINT(build/include) */\r
-#undef HASHER\r
-\r
-#define HASHER() H6\r
-#include "./hash_longest_match64_inc.h" /* NOLINT(build/include) */\r
-#undef HASHER\r
-\r
-#define BUCKET_BITS 15\r
-\r
-#define NUM_LAST_DISTANCES_TO_CHECK 4\r
-#define NUM_BANKS 1\r
-#define BANK_BITS 16\r
-#define HASHER() H40\r
-#include "./hash_forgetful_chain_inc.h" /* NOLINT(build/include) */\r
-#undef HASHER\r
-#undef NUM_LAST_DISTANCES_TO_CHECK\r
-\r
-#define NUM_LAST_DISTANCES_TO_CHECK 10\r
-#define HASHER() H41\r
-#include "./hash_forgetful_chain_inc.h" /* NOLINT(build/include) */\r
-#undef HASHER\r
-#undef NUM_LAST_DISTANCES_TO_CHECK\r
-#undef NUM_BANKS\r
-#undef BANK_BITS\r
-\r
-#define NUM_LAST_DISTANCES_TO_CHECK 16\r
-#define NUM_BANKS 512\r
-#define BANK_BITS 9\r
-#define HASHER() H42\r
-#include "./hash_forgetful_chain_inc.h" /* NOLINT(build/include) */\r
-#undef HASHER\r
-#undef NUM_LAST_DISTANCES_TO_CHECK\r
-#undef NUM_BANKS\r
-#undef BANK_BITS\r
-\r
-#undef BUCKET_BITS\r
-\r
-#define HASHER() H54\r
-#define BUCKET_BITS 20\r
-#define BUCKET_SWEEP 4\r
-#define HASH_LEN 7\r
-#define USE_DICTIONARY 0\r
-#include "./hash_longest_match_quickly_inc.h" /* NOLINT(build/include) */\r
-#undef USE_DICTIONARY\r
-#undef HASH_LEN\r
-#undef BUCKET_SWEEP\r
-#undef BUCKET_BITS\r
-#undef HASHER\r
-\r
-/* fast large window hashers */\r
-\r
-#define HASHER() HROLLING_FAST\r
-#define CHUNKLEN 32\r
-#define JUMP 4\r
-#define NUMBUCKETS 16777216\r
-#define MASK ((NUMBUCKETS * 64) - 1)\r
-#include "./hash_rolling_inc.h" /* NOLINT(build/include) */\r
-#undef JUMP\r
-#undef HASHER\r
-\r
-\r
-#define HASHER() HROLLING\r
-#define JUMP 1\r
-#include "./hash_rolling_inc.h" /* NOLINT(build/include) */\r
-#undef MASK\r
-#undef NUMBUCKETS\r
-#undef JUMP\r
-#undef CHUNKLEN\r
-#undef HASHER\r
-\r
-#define HASHER() H35\r
-#define HASHER_A H3\r
-#define HASHER_B HROLLING_FAST\r
-#include "./hash_composite_inc.h" /* NOLINT(build/include) */\r
-#undef HASHER_A\r
-#undef HASHER_B\r
-#undef HASHER\r
-\r
-#define HASHER() H55\r
-#define HASHER_A H54\r
-#define HASHER_B HROLLING_FAST\r
-#include "./hash_composite_inc.h" /* NOLINT(build/include) */\r
-#undef HASHER_A\r
-#undef HASHER_B\r
-#undef HASHER\r
-\r
-#define HASHER() H65\r
-#define HASHER_A H6\r
-#define HASHER_B HROLLING\r
-#include "./hash_composite_inc.h" /* NOLINT(build/include) */\r
-#undef HASHER_A\r
-#undef HASHER_B\r
-#undef HASHER\r
-\r
-#undef FN\r
-#undef CAT\r
-#undef EXPAND_CAT\r
-\r
-#define FOR_GENERIC_HASHERS(H) H(2) H(3) H(4) H(5) H(6) H(40) H(41) H(42) H(54)\\r
- H(35) H(55) H(65)\r
-#define FOR_ALL_HASHERS(H) FOR_GENERIC_HASHERS(H) H(10)\r
-\r
-static BROTLI_INLINE void DestroyHasher(\r
- MemoryManager* m, HasherHandle* handle) {\r
- if (*handle == NULL) return;\r
- BROTLI_FREE(m, *handle);\r
-}\r
-\r
-static BROTLI_INLINE void HasherReset(HasherHandle handle) {\r
- if (handle == NULL) return;\r
- GetHasherCommon(handle)->is_prepared_ = BROTLI_FALSE;\r
-}\r
-\r
-static BROTLI_INLINE size_t HasherSize(const BrotliEncoderParams* params,\r
- BROTLI_BOOL one_shot, const size_t input_size) {\r
- size_t result = sizeof(HasherCommon);\r
- switch (params->hasher.type) {\r
-#define SIZE_(N) \\r
- case N: \\r
- result += HashMemAllocInBytesH ## N(params, one_shot, input_size); \\r
- break;\r
- FOR_ALL_HASHERS(SIZE_)\r
-#undef SIZE_\r
- default:\r
- break;\r
- }\r
- return result;\r
-}\r
-\r
-static BROTLI_INLINE void HasherSetup(MemoryManager* m, HasherHandle* handle,\r
- BrotliEncoderParams* params, const uint8_t* data, size_t position,\r
- size_t input_size, BROTLI_BOOL is_last) {\r
- HasherHandle self = NULL;\r
- HasherCommon* common = NULL;\r
- BROTLI_BOOL one_shot = (position == 0 && is_last);\r
- if (*handle == NULL) {\r
- size_t alloc_size;\r
- ChooseHasher(params, ¶ms->hasher);\r
- alloc_size = HasherSize(params, one_shot, input_size);\r
- self = BROTLI_ALLOC(m, uint8_t, alloc_size);\r
- if (BROTLI_IS_OOM(m)) return;\r
- *handle = self;\r
- common = GetHasherCommon(self);\r
- common->params = params->hasher;\r
- switch (common->params.type) {\r
-#define INITIALIZE_(N) \\r
- case N: \\r
- InitializeH ## N(*handle, params); \\r
- break;\r
- FOR_ALL_HASHERS(INITIALIZE_);\r
-#undef INITIALIZE_\r
- default:\r
- break;\r
- }\r
- HasherReset(*handle);\r
- }\r
-\r
- self = *handle;\r
- common = GetHasherCommon(self);\r
- if (!common->is_prepared_) {\r
- switch (common->params.type) {\r
-#define PREPARE_(N) \\r
- case N: \\r
- PrepareH ## N(self, one_shot, input_size, data); \\r
- break;\r
- FOR_ALL_HASHERS(PREPARE_)\r
-#undef PREPARE_\r
- default: break;\r
- }\r
- if (position == 0) {\r
- common->dict_num_lookups = 0;\r
- common->dict_num_matches = 0;\r
- }\r
- common->is_prepared_ = BROTLI_TRUE;\r
- }\r
-}\r
-\r
-static BROTLI_INLINE void InitOrStitchToPreviousBlock(\r
- MemoryManager* m, HasherHandle* handle, const uint8_t* data, size_t mask,\r
- BrotliEncoderParams* params, size_t position, size_t input_size,\r
- BROTLI_BOOL is_last) {\r
- HasherHandle self;\r
- HasherSetup(m, handle, params, data, position, input_size, is_last);\r
- if (BROTLI_IS_OOM(m)) return;\r
- self = *handle;\r
- switch (GetHasherCommon(self)->params.type) {\r
-#define INIT_(N) \\r
- case N: \\r
- StitchToPreviousBlockH ## N(self, input_size, position, data, mask); \\r
- break;\r
- FOR_ALL_HASHERS(INIT_)\r
-#undef INIT_\r
- default: break;\r
- }\r
-}\r
-\r
-#if defined(__cplusplus) || defined(c_plusplus)\r
-} /* extern "C" */\r
-#endif\r
-\r
-#endif /* BROTLI_ENC_HASH_H_ */\r