+++ /dev/null
-/* NOLINT(build/header_guard) */\r
-/* Copyright 2013 Google Inc. All Rights Reserved.\r
-\r
- Distributed under MIT license.\r
- See file LICENSE for detail or copy at https://opensource.org/licenses/MIT\r
-*/\r
-\r
-/* template parameters: FN, CODE */\r
-\r
-#define HistogramType FN(Histogram)\r
-\r
-/* Computes the bit cost reduction by combining out[idx1] and out[idx2] and if\r
- it is below a threshold, stores the pair (idx1, idx2) in the *pairs queue. */\r
-BROTLI_INTERNAL void FN(BrotliCompareAndPushToQueue)(\r
- const HistogramType* out, const uint32_t* cluster_size, uint32_t idx1,\r
- uint32_t idx2, size_t max_num_pairs, HistogramPair* pairs,\r
- size_t* num_pairs) CODE({\r
- BROTLI_BOOL is_good_pair = BROTLI_FALSE;\r
- HistogramPair p;\r
- p.idx1 = p.idx2 = 0;\r
- p.cost_diff = p.cost_combo = 0;\r
- if (idx1 == idx2) {\r
- return;\r
- }\r
- if (idx2 < idx1) {\r
- uint32_t t = idx2;\r
- idx2 = idx1;\r
- idx1 = t;\r
- }\r
- p.idx1 = idx1;\r
- p.idx2 = idx2;\r
- p.cost_diff = 0.5 * ClusterCostDiff(cluster_size[idx1], cluster_size[idx2]);\r
- p.cost_diff -= out[idx1].bit_cost_;\r
- p.cost_diff -= out[idx2].bit_cost_;\r
-\r
- if (out[idx1].total_count_ == 0) {\r
- p.cost_combo = out[idx2].bit_cost_;\r
- is_good_pair = BROTLI_TRUE;\r
- } else if (out[idx2].total_count_ == 0) {\r
- p.cost_combo = out[idx1].bit_cost_;\r
- is_good_pair = BROTLI_TRUE;\r
- } else {\r
- double threshold = *num_pairs == 0 ? 1e99 :\r
- BROTLI_MAX(double, 0.0, pairs[0].cost_diff);\r
- HistogramType combo = out[idx1];\r
- double cost_combo;\r
- FN(HistogramAddHistogram)(&combo, &out[idx2]);\r
- cost_combo = FN(BrotliPopulationCost)(&combo);\r
- if (cost_combo < threshold - p.cost_diff) {\r
- p.cost_combo = cost_combo;\r
- is_good_pair = BROTLI_TRUE;\r
- }\r
- }\r
- if (is_good_pair) {\r
- p.cost_diff += p.cost_combo;\r
- if (*num_pairs > 0 && HistogramPairIsLess(&pairs[0], &p)) {\r
- /* Replace the top of the queue if needed. */\r
- if (*num_pairs < max_num_pairs) {\r
- pairs[*num_pairs] = pairs[0];\r
- ++(*num_pairs);\r
- }\r
- pairs[0] = p;\r
- } else if (*num_pairs < max_num_pairs) {\r
- pairs[*num_pairs] = p;\r
- ++(*num_pairs);\r
- }\r
- }\r
-})\r
-\r
-BROTLI_INTERNAL size_t FN(BrotliHistogramCombine)(HistogramType* out,\r
- uint32_t* cluster_size,\r
- uint32_t* symbols,\r
- uint32_t* clusters,\r
- HistogramPair* pairs,\r
- size_t num_clusters,\r
- size_t symbols_size,\r
- size_t max_clusters,\r
- size_t max_num_pairs) CODE({\r
- double cost_diff_threshold = 0.0;\r
- size_t min_cluster_size = 1;\r
- size_t num_pairs = 0;\r
-\r
- {\r
- /* We maintain a vector of histogram pairs, with the property that the pair\r
- with the maximum bit cost reduction is the first. */\r
- size_t idx1;\r
- for (idx1 = 0; idx1 < num_clusters; ++idx1) {\r
- size_t idx2;\r
- for (idx2 = idx1 + 1; idx2 < num_clusters; ++idx2) {\r
- FN(BrotliCompareAndPushToQueue)(out, cluster_size, clusters[idx1],\r
- clusters[idx2], max_num_pairs, &pairs[0], &num_pairs);\r
- }\r
- }\r
- }\r
-\r
- while (num_clusters > min_cluster_size) {\r
- uint32_t best_idx1;\r
- uint32_t best_idx2;\r
- size_t i;\r
- if (pairs[0].cost_diff >= cost_diff_threshold) {\r
- cost_diff_threshold = 1e99;\r
- min_cluster_size = max_clusters;\r
- continue;\r
- }\r
- /* Take the best pair from the top of heap. */\r
- best_idx1 = pairs[0].idx1;\r
- best_idx2 = pairs[0].idx2;\r
- FN(HistogramAddHistogram)(&out[best_idx1], &out[best_idx2]);\r
- out[best_idx1].bit_cost_ = pairs[0].cost_combo;\r
- cluster_size[best_idx1] += cluster_size[best_idx2];\r
- for (i = 0; i < symbols_size; ++i) {\r
- if (symbols[i] == best_idx2) {\r
- symbols[i] = best_idx1;\r
- }\r
- }\r
- for (i = 0; i < num_clusters; ++i) {\r
- if (clusters[i] == best_idx2) {\r
- memmove(&clusters[i], &clusters[i + 1],\r
- (num_clusters - i - 1) * sizeof(clusters[0]));\r
- break;\r
- }\r
- }\r
- --num_clusters;\r
- {\r
- /* Remove pairs intersecting the just combined best pair. */\r
- size_t copy_to_idx = 0;\r
- for (i = 0; i < num_pairs; ++i) {\r
- HistogramPair* p = &pairs[i];\r
- if (p->idx1 == best_idx1 || p->idx2 == best_idx1 ||\r
- p->idx1 == best_idx2 || p->idx2 == best_idx2) {\r
- /* Remove invalid pair from the queue. */\r
- continue;\r
- }\r
- if (HistogramPairIsLess(&pairs[0], p)) {\r
- /* Replace the top of the queue if needed. */\r
- HistogramPair front = pairs[0];\r
- pairs[0] = *p;\r
- pairs[copy_to_idx] = front;\r
- } else {\r
- pairs[copy_to_idx] = *p;\r
- }\r
- ++copy_to_idx;\r
- }\r
- num_pairs = copy_to_idx;\r
- }\r
-\r
- /* Push new pairs formed with the combined histogram to the heap. */\r
- for (i = 0; i < num_clusters; ++i) {\r
- FN(BrotliCompareAndPushToQueue)(out, cluster_size, best_idx1, clusters[i],\r
- max_num_pairs, &pairs[0], &num_pairs);\r
- }\r
- }\r
- return num_clusters;\r
-})\r
-\r
-/* What is the bit cost of moving histogram from cur_symbol to candidate. */\r
-BROTLI_INTERNAL double FN(BrotliHistogramBitCostDistance)(\r
- const HistogramType* histogram, const HistogramType* candidate) CODE({\r
- if (histogram->total_count_ == 0) {\r
- return 0.0;\r
- } else {\r
- HistogramType tmp = *histogram;\r
- FN(HistogramAddHistogram)(&tmp, candidate);\r
- return FN(BrotliPopulationCost)(&tmp) - candidate->bit_cost_;\r
- }\r
-})\r
-\r
-/* Find the best 'out' histogram for each of the 'in' histograms.\r
- When called, clusters[0..num_clusters) contains the unique values from\r
- symbols[0..in_size), but this property is not preserved in this function.\r
- Note: we assume that out[]->bit_cost_ is already up-to-date. */\r
-BROTLI_INTERNAL void FN(BrotliHistogramRemap)(const HistogramType* in,\r
- size_t in_size, const uint32_t* clusters, size_t num_clusters,\r
- HistogramType* out, uint32_t* symbols) CODE({\r
- size_t i;\r
- for (i = 0; i < in_size; ++i) {\r
- uint32_t best_out = i == 0 ? symbols[0] : symbols[i - 1];\r
- double best_bits =\r
- FN(BrotliHistogramBitCostDistance)(&in[i], &out[best_out]);\r
- size_t j;\r
- for (j = 0; j < num_clusters; ++j) {\r
- const double cur_bits =\r
- FN(BrotliHistogramBitCostDistance)(&in[i], &out[clusters[j]]);\r
- if (cur_bits < best_bits) {\r
- best_bits = cur_bits;\r
- best_out = clusters[j];\r
- }\r
- }\r
- symbols[i] = best_out;\r
- }\r
-\r
- /* Recompute each out based on raw and symbols. */\r
- for (i = 0; i < num_clusters; ++i) {\r
- FN(HistogramClear)(&out[clusters[i]]);\r
- }\r
- for (i = 0; i < in_size; ++i) {\r
- FN(HistogramAddHistogram)(&out[symbols[i]], &in[i]);\r
- }\r
-})\r
-\r
-/* Reorders elements of the out[0..length) array and changes values in\r
- symbols[0..length) array in the following way:\r
- * when called, symbols[] contains indexes into out[], and has N unique\r
- values (possibly N < length)\r
- * on return, symbols'[i] = f(symbols[i]) and\r
- out'[symbols'[i]] = out[symbols[i]], for each 0 <= i < length,\r
- where f is a bijection between the range of symbols[] and [0..N), and\r
- the first occurrences of values in symbols'[i] come in consecutive\r
- increasing order.\r
- Returns N, the number of unique values in symbols[]. */\r
-BROTLI_INTERNAL size_t FN(BrotliHistogramReindex)(MemoryManager* m,\r
- HistogramType* out, uint32_t* symbols, size_t length) CODE({\r
- static const uint32_t kInvalidIndex = BROTLI_UINT32_MAX;\r
- uint32_t* new_index = BROTLI_ALLOC(m, uint32_t, length);\r
- uint32_t next_index;\r
- HistogramType* tmp;\r
- size_t i;\r
- if (BROTLI_IS_OOM(m)) return 0;\r
- for (i = 0; i < length; ++i) {\r
- new_index[i] = kInvalidIndex;\r
- }\r
- next_index = 0;\r
- for (i = 0; i < length; ++i) {\r
- if (new_index[symbols[i]] == kInvalidIndex) {\r
- new_index[symbols[i]] = next_index;\r
- ++next_index;\r
- }\r
- }\r
- /* TODO: by using idea of "cycle-sort" we can avoid allocation of\r
- tmp and reduce the number of copying by the factor of 2. */\r
- tmp = BROTLI_ALLOC(m, HistogramType, next_index);\r
- if (BROTLI_IS_OOM(m)) return 0;\r
- next_index = 0;\r
- for (i = 0; i < length; ++i) {\r
- if (new_index[symbols[i]] == next_index) {\r
- tmp[next_index] = out[symbols[i]];\r
- ++next_index;\r
- }\r
- symbols[i] = new_index[symbols[i]];\r
- }\r
- BROTLI_FREE(m, new_index);\r
- for (i = 0; i < next_index; ++i) {\r
- out[i] = tmp[i];\r
- }\r
- BROTLI_FREE(m, tmp);\r
- return next_index;\r
-})\r
-\r
-BROTLI_INTERNAL void FN(BrotliClusterHistograms)(\r
- MemoryManager* m, const HistogramType* in, const size_t in_size,\r
- size_t max_histograms, HistogramType* out, size_t* out_size,\r
- uint32_t* histogram_symbols) CODE({\r
- uint32_t* cluster_size = BROTLI_ALLOC(m, uint32_t, in_size);\r
- uint32_t* clusters = BROTLI_ALLOC(m, uint32_t, in_size);\r
- size_t num_clusters = 0;\r
- const size_t max_input_histograms = 64;\r
- size_t pairs_capacity = max_input_histograms * max_input_histograms / 2;\r
- /* For the first pass of clustering, we allow all pairs. */\r
- HistogramPair* pairs = BROTLI_ALLOC(m, HistogramPair, pairs_capacity + 1);\r
- size_t i;\r
-\r
- if (BROTLI_IS_OOM(m)) return;\r
-\r
- for (i = 0; i < in_size; ++i) {\r
- cluster_size[i] = 1;\r
- }\r
-\r
- for (i = 0; i < in_size; ++i) {\r
- out[i] = in[i];\r
- out[i].bit_cost_ = FN(BrotliPopulationCost)(&in[i]);\r
- histogram_symbols[i] = (uint32_t)i;\r
- }\r
-\r
- for (i = 0; i < in_size; i += max_input_histograms) {\r
- size_t num_to_combine =\r
- BROTLI_MIN(size_t, in_size - i, max_input_histograms);\r
- size_t num_new_clusters;\r
- size_t j;\r
- for (j = 0; j < num_to_combine; ++j) {\r
- clusters[num_clusters + j] = (uint32_t)(i + j);\r
- }\r
- num_new_clusters =\r
- FN(BrotliHistogramCombine)(out, cluster_size,\r
- &histogram_symbols[i],\r
- &clusters[num_clusters], pairs,\r
- num_to_combine, num_to_combine,\r
- max_histograms, pairs_capacity);\r
- num_clusters += num_new_clusters;\r
- }\r
-\r
- {\r
- /* For the second pass, we limit the total number of histogram pairs.\r
- After this limit is reached, we only keep searching for the best pair. */\r
- size_t max_num_pairs = BROTLI_MIN(size_t,\r
- 64 * num_clusters, (num_clusters / 2) * num_clusters);\r
- BROTLI_ENSURE_CAPACITY(\r
- m, HistogramPair, pairs, pairs_capacity, max_num_pairs + 1);\r
- if (BROTLI_IS_OOM(m)) return;\r
-\r
- /* Collapse similar histograms. */\r
- num_clusters = FN(BrotliHistogramCombine)(out, cluster_size,\r
- histogram_symbols, clusters,\r
- pairs, num_clusters, in_size,\r
- max_histograms, max_num_pairs);\r
- }\r
- BROTLI_FREE(m, pairs);\r
- BROTLI_FREE(m, cluster_size);\r
- /* Find the optimal map from original histograms to the final ones. */\r
- FN(BrotliHistogramRemap)(in, in_size, clusters, num_clusters,\r
- out, histogram_symbols);\r
- BROTLI_FREE(m, clusters);\r
- /* Convert the context map to a canonical form. */\r
- *out_size = FN(BrotliHistogramReindex)(m, out, histogram_symbols, in_size);\r
- if (BROTLI_IS_OOM(m)) return;\r
-})\r
-\r
-#undef HistogramType\r