]> git.proxmox.com Git - mirror_edk2.git/blob - BaseTools/Source/C/BrotliCompress/enc/cluster_inc.h
BaseTools: Copy Brotli algorithm 3rd party source code for tool
[mirror_edk2.git] / BaseTools / Source / C / BrotliCompress / enc / cluster_inc.h
1 /* NOLINT(build/header_guard) */
2 /* Copyright 2013 Google Inc. All Rights Reserved.
3
4 Distributed under MIT license.
5 See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
6 */
7
8 /* template parameters: FN, CODE */
9
10 #define HistogramType FN(Histogram)
11
12 /* Computes the bit cost reduction by combining out[idx1] and out[idx2] and if
13 it is below a threshold, stores the pair (idx1, idx2) in the *pairs queue. */
14 BROTLI_INTERNAL void FN(BrotliCompareAndPushToQueue)(
15 const HistogramType* out, const uint32_t* cluster_size, uint32_t idx1,
16 uint32_t idx2, size_t max_num_pairs, HistogramPair* pairs,
17 size_t* num_pairs) CODE({
18 BROTLI_BOOL is_good_pair = BROTLI_FALSE;
19 HistogramPair p;
20 if (idx1 == idx2) {
21 return;
22 }
23 if (idx2 < idx1) {
24 uint32_t t = idx2;
25 idx2 = idx1;
26 idx1 = t;
27 }
28 p.idx1 = idx1;
29 p.idx2 = idx2;
30 p.cost_diff = 0.5 * ClusterCostDiff(cluster_size[idx1], cluster_size[idx2]);
31 p.cost_diff -= out[idx1].bit_cost_;
32 p.cost_diff -= out[idx2].bit_cost_;
33
34 if (out[idx1].total_count_ == 0) {
35 p.cost_combo = out[idx2].bit_cost_;
36 is_good_pair = BROTLI_TRUE;
37 } else if (out[idx2].total_count_ == 0) {
38 p.cost_combo = out[idx1].bit_cost_;
39 is_good_pair = BROTLI_TRUE;
40 } else {
41 double threshold = *num_pairs == 0 ? 1e99 :
42 BROTLI_MAX(double, 0.0, pairs[0].cost_diff);
43 HistogramType combo = out[idx1];
44 double cost_combo;
45 FN(HistogramAddHistogram)(&combo, &out[idx2]);
46 cost_combo = FN(BrotliPopulationCost)(&combo);
47 if (cost_combo < threshold - p.cost_diff) {
48 p.cost_combo = cost_combo;
49 is_good_pair = BROTLI_TRUE;
50 }
51 }
52 if (is_good_pair) {
53 p.cost_diff += p.cost_combo;
54 if (*num_pairs > 0 && HistogramPairIsLess(&pairs[0], &p)) {
55 /* Replace the top of the queue if needed. */
56 if (*num_pairs < max_num_pairs) {
57 pairs[*num_pairs] = pairs[0];
58 ++(*num_pairs);
59 }
60 pairs[0] = p;
61 } else if (*num_pairs < max_num_pairs) {
62 pairs[*num_pairs] = p;
63 ++(*num_pairs);
64 }
65 }
66 })
67
68 BROTLI_INTERNAL size_t FN(BrotliHistogramCombine)(HistogramType* out,
69 uint32_t* cluster_size,
70 uint32_t* symbols,
71 uint32_t* clusters,
72 HistogramPair* pairs,
73 size_t num_clusters,
74 size_t symbols_size,
75 size_t max_clusters,
76 size_t max_num_pairs) CODE({
77 double cost_diff_threshold = 0.0;
78 size_t min_cluster_size = 1;
79 size_t num_pairs = 0;
80
81 {
82 /* We maintain a vector of histogram pairs, with the property that the pair
83 with the maximum bit cost reduction is the first. */
84 size_t idx1;
85 for (idx1 = 0; idx1 < num_clusters; ++idx1) {
86 size_t idx2;
87 for (idx2 = idx1 + 1; idx2 < num_clusters; ++idx2) {
88 FN(BrotliCompareAndPushToQueue)(out, cluster_size, clusters[idx1],
89 clusters[idx2], max_num_pairs, &pairs[0], &num_pairs);
90 }
91 }
92 }
93
94 while (num_clusters > min_cluster_size) {
95 uint32_t best_idx1;
96 uint32_t best_idx2;
97 size_t i;
98 if (pairs[0].cost_diff >= cost_diff_threshold) {
99 cost_diff_threshold = 1e99;
100 min_cluster_size = max_clusters;
101 continue;
102 }
103 /* Take the best pair from the top of heap. */
104 best_idx1 = pairs[0].idx1;
105 best_idx2 = pairs[0].idx2;
106 FN(HistogramAddHistogram)(&out[best_idx1], &out[best_idx2]);
107 out[best_idx1].bit_cost_ = pairs[0].cost_combo;
108 cluster_size[best_idx1] += cluster_size[best_idx2];
109 for (i = 0; i < symbols_size; ++i) {
110 if (symbols[i] == best_idx2) {
111 symbols[i] = best_idx1;
112 }
113 }
114 for (i = 0; i < num_clusters; ++i) {
115 if (clusters[i] == best_idx2) {
116 memmove(&clusters[i], &clusters[i + 1],
117 (num_clusters - i - 1) * sizeof(clusters[0]));
118 break;
119 }
120 }
121 --num_clusters;
122 {
123 /* Remove pairs intersecting the just combined best pair. */
124 size_t copy_to_idx = 0;
125 for (i = 0; i < num_pairs; ++i) {
126 HistogramPair* p = &pairs[i];
127 if (p->idx1 == best_idx1 || p->idx2 == best_idx1 ||
128 p->idx1 == best_idx2 || p->idx2 == best_idx2) {
129 /* Remove invalid pair from the queue. */
130 continue;
131 }
132 if (HistogramPairIsLess(&pairs[0], p)) {
133 /* Replace the top of the queue if needed. */
134 HistogramPair front = pairs[0];
135 pairs[0] = *p;
136 pairs[copy_to_idx] = front;
137 } else {
138 pairs[copy_to_idx] = *p;
139 }
140 ++copy_to_idx;
141 }
142 num_pairs = copy_to_idx;
143 }
144
145 /* Push new pairs formed with the combined histogram to the heap. */
146 for (i = 0; i < num_clusters; ++i) {
147 FN(BrotliCompareAndPushToQueue)(out, cluster_size, best_idx1, clusters[i],
148 max_num_pairs, &pairs[0], &num_pairs);
149 }
150 }
151 return num_clusters;
152 })
153
154 /* What is the bit cost of moving histogram from cur_symbol to candidate. */
155 BROTLI_INTERNAL double FN(BrotliHistogramBitCostDistance)(
156 const HistogramType* histogram, const HistogramType* candidate) CODE({
157 if (histogram->total_count_ == 0) {
158 return 0.0;
159 } else {
160 HistogramType tmp = *histogram;
161 FN(HistogramAddHistogram)(&tmp, candidate);
162 return FN(BrotliPopulationCost)(&tmp) - candidate->bit_cost_;
163 }
164 })
165
166 /* Find the best 'out' histogram for each of the 'in' histograms.
167 When called, clusters[0..num_clusters) contains the unique values from
168 symbols[0..in_size), but this property is not preserved in this function.
169 Note: we assume that out[]->bit_cost_ is already up-to-date. */
170 BROTLI_INTERNAL void FN(BrotliHistogramRemap)(const HistogramType* in,
171 size_t in_size, const uint32_t* clusters, size_t num_clusters,
172 HistogramType* out, uint32_t* symbols) CODE({
173 size_t i;
174 for (i = 0; i < in_size; ++i) {
175 uint32_t best_out = i == 0 ? symbols[0] : symbols[i - 1];
176 double best_bits =
177 FN(BrotliHistogramBitCostDistance)(&in[i], &out[best_out]);
178 size_t j;
179 for (j = 0; j < num_clusters; ++j) {
180 const double cur_bits =
181 FN(BrotliHistogramBitCostDistance)(&in[i], &out[clusters[j]]);
182 if (cur_bits < best_bits) {
183 best_bits = cur_bits;
184 best_out = clusters[j];
185 }
186 }
187 symbols[i] = best_out;
188 }
189
190 /* Recompute each out based on raw and symbols. */
191 for (i = 0; i < num_clusters; ++i) {
192 FN(HistogramClear)(&out[clusters[i]]);
193 }
194 for (i = 0; i < in_size; ++i) {
195 FN(HistogramAddHistogram)(&out[symbols[i]], &in[i]);
196 }
197 })
198
199 /* Reorders elements of the out[0..length) array and changes values in
200 symbols[0..length) array in the following way:
201 * when called, symbols[] contains indexes into out[], and has N unique
202 values (possibly N < length)
203 * on return, symbols'[i] = f(symbols[i]) and
204 out'[symbols'[i]] = out[symbols[i]], for each 0 <= i < length,
205 where f is a bijection between the range of symbols[] and [0..N), and
206 the first occurrences of values in symbols'[i] come in consecutive
207 increasing order.
208 Returns N, the number of unique values in symbols[]. */
209 BROTLI_INTERNAL size_t FN(BrotliHistogramReindex)(MemoryManager* m,
210 HistogramType* out, uint32_t* symbols, size_t length) CODE({
211 static const uint32_t kInvalidIndex = BROTLI_UINT32_MAX;
212 uint32_t* new_index = BROTLI_ALLOC(m, uint32_t, length);
213 uint32_t next_index;
214 HistogramType* tmp;
215 size_t i;
216 if (BROTLI_IS_OOM(m)) return 0;
217 for (i = 0; i < length; ++i) {
218 new_index[i] = kInvalidIndex;
219 }
220 next_index = 0;
221 for (i = 0; i < length; ++i) {
222 if (new_index[symbols[i]] == kInvalidIndex) {
223 new_index[symbols[i]] = next_index;
224 ++next_index;
225 }
226 }
227 /* TODO: by using idea of "cycle-sort" we can avoid allocation of
228 tmp and reduce the number of copying by the factor of 2. */
229 tmp = BROTLI_ALLOC(m, HistogramType, next_index);
230 if (BROTLI_IS_OOM(m)) return 0;
231 next_index = 0;
232 for (i = 0; i < length; ++i) {
233 if (new_index[symbols[i]] == next_index) {
234 tmp[next_index] = out[symbols[i]];
235 ++next_index;
236 }
237 symbols[i] = new_index[symbols[i]];
238 }
239 BROTLI_FREE(m, new_index);
240 for (i = 0; i < next_index; ++i) {
241 out[i] = tmp[i];
242 }
243 BROTLI_FREE(m, tmp);
244 return next_index;
245 })
246
247 BROTLI_INTERNAL void FN(BrotliClusterHistograms)(
248 MemoryManager* m, const HistogramType* in, const size_t in_size,
249 size_t max_histograms, HistogramType* out, size_t* out_size,
250 uint32_t* histogram_symbols) CODE({
251 uint32_t* cluster_size = BROTLI_ALLOC(m, uint32_t, in_size);
252 uint32_t* clusters = BROTLI_ALLOC(m, uint32_t, in_size);
253 size_t num_clusters = 0;
254 const size_t max_input_histograms = 64;
255 size_t pairs_capacity = max_input_histograms * max_input_histograms / 2;
256 /* For the first pass of clustering, we allow all pairs. */
257 HistogramPair* pairs = BROTLI_ALLOC(m, HistogramPair, pairs_capacity + 1);
258 size_t i;
259
260 if (BROTLI_IS_OOM(m)) return;
261
262 for (i = 0; i < in_size; ++i) {
263 cluster_size[i] = 1;
264 }
265
266 for (i = 0; i < in_size; ++i) {
267 out[i] = in[i];
268 out[i].bit_cost_ = FN(BrotliPopulationCost)(&in[i]);
269 histogram_symbols[i] = (uint32_t)i;
270 }
271
272 for (i = 0; i < in_size; i += max_input_histograms) {
273 size_t num_to_combine =
274 BROTLI_MIN(size_t, in_size - i, max_input_histograms);
275 size_t num_new_clusters;
276 size_t j;
277 for (j = 0; j < num_to_combine; ++j) {
278 clusters[num_clusters + j] = (uint32_t)(i + j);
279 }
280 num_new_clusters =
281 FN(BrotliHistogramCombine)(out, cluster_size,
282 &histogram_symbols[i],
283 &clusters[num_clusters], pairs,
284 num_to_combine, num_to_combine,
285 max_histograms, pairs_capacity);
286 num_clusters += num_new_clusters;
287 }
288
289 {
290 /* For the second pass, we limit the total number of histogram pairs.
291 After this limit is reached, we only keep searching for the best pair. */
292 size_t max_num_pairs = BROTLI_MIN(size_t,
293 64 * num_clusters, (num_clusters / 2) * num_clusters);
294 BROTLI_ENSURE_CAPACITY(
295 m, HistogramPair, pairs, pairs_capacity, max_num_pairs + 1);
296 if (BROTLI_IS_OOM(m)) return;
297
298 /* Collapse similar histograms. */
299 num_clusters = FN(BrotliHistogramCombine)(out, cluster_size,
300 histogram_symbols, clusters,
301 pairs, num_clusters, in_size,
302 max_histograms, max_num_pairs);
303 }
304 BROTLI_FREE(m, pairs);
305 BROTLI_FREE(m, cluster_size);
306 /* Find the optimal map from original histograms to the final ones. */
307 FN(BrotliHistogramRemap)(in, in_size, clusters, num_clusters,
308 out, histogram_symbols);
309 BROTLI_FREE(m, clusters);
310 /* Convert the context map to a canonical form. */
311 *out_size = FN(BrotliHistogramReindex)(m, out, histogram_symbols, in_size);
312 if (BROTLI_IS_OOM(m)) return;
313 })
314
315 #undef HistogramType