1 /* NOLINT(build/header_guard) */
2 /* Copyright 2013 Google Inc. All Rights Reserved.
4 Distributed under MIT license.
5 See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
8 /* template parameters: FN, CODE */
10 #define HistogramType FN(Histogram)
12 /* Computes the bit cost reduction by combining out[idx1] and out[idx2] and if
13 it is below a threshold, stores the pair (idx1, idx2) in the *pairs queue. */
14 BROTLI_INTERNAL
void FN(BrotliCompareAndPushToQueue
)(
15 const HistogramType
* out
, const uint32_t* cluster_size
, uint32_t idx1
,
16 uint32_t idx2
, size_t max_num_pairs
, HistogramPair
* pairs
,
17 size_t* num_pairs
) CODE({
18 BROTLI_BOOL is_good_pair
= BROTLI_FALSE
;
30 p
.cost_diff
= 0.5 * ClusterCostDiff(cluster_size
[idx1
], cluster_size
[idx2
]);
31 p
.cost_diff
-= out
[idx1
].bit_cost_
;
32 p
.cost_diff
-= out
[idx2
].bit_cost_
;
34 if (out
[idx1
].total_count_
== 0) {
35 p
.cost_combo
= out
[idx2
].bit_cost_
;
36 is_good_pair
= BROTLI_TRUE
;
37 } else if (out
[idx2
].total_count_
== 0) {
38 p
.cost_combo
= out
[idx1
].bit_cost_
;
39 is_good_pair
= BROTLI_TRUE
;
41 double threshold
= *num_pairs
== 0 ? 1e99
:
42 BROTLI_MAX(double, 0.0, pairs
[0].cost_diff
);
43 HistogramType combo
= out
[idx1
];
45 FN(HistogramAddHistogram
)(&combo
, &out
[idx2
]);
46 cost_combo
= FN(BrotliPopulationCost
)(&combo
);
47 if (cost_combo
< threshold
- p
.cost_diff
) {
48 p
.cost_combo
= cost_combo
;
49 is_good_pair
= BROTLI_TRUE
;
53 p
.cost_diff
+= p
.cost_combo
;
54 if (*num_pairs
> 0 && HistogramPairIsLess(&pairs
[0], &p
)) {
55 /* Replace the top of the queue if needed. */
56 if (*num_pairs
< max_num_pairs
) {
57 pairs
[*num_pairs
] = pairs
[0];
61 } else if (*num_pairs
< max_num_pairs
) {
62 pairs
[*num_pairs
] = p
;
68 BROTLI_INTERNAL
size_t FN(BrotliHistogramCombine
)(HistogramType
* out
,
69 uint32_t* cluster_size
,
76 size_t max_num_pairs
) CODE({
77 double cost_diff_threshold
= 0.0;
78 size_t min_cluster_size
= 1;
82 /* We maintain a vector of histogram pairs, with the property that the pair
83 with the maximum bit cost reduction is the first. */
85 for (idx1
= 0; idx1
< num_clusters
; ++idx1
) {
87 for (idx2
= idx1
+ 1; idx2
< num_clusters
; ++idx2
) {
88 FN(BrotliCompareAndPushToQueue
)(out
, cluster_size
, clusters
[idx1
],
89 clusters
[idx2
], max_num_pairs
, &pairs
[0], &num_pairs
);
94 while (num_clusters
> min_cluster_size
) {
98 if (pairs
[0].cost_diff
>= cost_diff_threshold
) {
99 cost_diff_threshold
= 1e99
;
100 min_cluster_size
= max_clusters
;
103 /* Take the best pair from the top of heap. */
104 best_idx1
= pairs
[0].idx1
;
105 best_idx2
= pairs
[0].idx2
;
106 FN(HistogramAddHistogram
)(&out
[best_idx1
], &out
[best_idx2
]);
107 out
[best_idx1
].bit_cost_
= pairs
[0].cost_combo
;
108 cluster_size
[best_idx1
] += cluster_size
[best_idx2
];
109 for (i
= 0; i
< symbols_size
; ++i
) {
110 if (symbols
[i
] == best_idx2
) {
111 symbols
[i
] = best_idx1
;
114 for (i
= 0; i
< num_clusters
; ++i
) {
115 if (clusters
[i
] == best_idx2
) {
116 memmove(&clusters
[i
], &clusters
[i
+ 1],
117 (num_clusters
- i
- 1) * sizeof(clusters
[0]));
123 /* Remove pairs intersecting the just combined best pair. */
124 size_t copy_to_idx
= 0;
125 for (i
= 0; i
< num_pairs
; ++i
) {
126 HistogramPair
* p
= &pairs
[i
];
127 if (p
->idx1
== best_idx1
|| p
->idx2
== best_idx1
||
128 p
->idx1
== best_idx2
|| p
->idx2
== best_idx2
) {
129 /* Remove invalid pair from the queue. */
132 if (HistogramPairIsLess(&pairs
[0], p
)) {
133 /* Replace the top of the queue if needed. */
134 HistogramPair front
= pairs
[0];
136 pairs
[copy_to_idx
] = front
;
138 pairs
[copy_to_idx
] = *p
;
142 num_pairs
= copy_to_idx
;
145 /* Push new pairs formed with the combined histogram to the heap. */
146 for (i
= 0; i
< num_clusters
; ++i
) {
147 FN(BrotliCompareAndPushToQueue
)(out
, cluster_size
, best_idx1
, clusters
[i
],
148 max_num_pairs
, &pairs
[0], &num_pairs
);
154 /* What is the bit cost of moving histogram from cur_symbol to candidate. */
155 BROTLI_INTERNAL
double FN(BrotliHistogramBitCostDistance
)(
156 const HistogramType
* histogram
, const HistogramType
* candidate
) CODE({
157 if (histogram
->total_count_
== 0) {
160 HistogramType tmp
= *histogram
;
161 FN(HistogramAddHistogram
)(&tmp
, candidate
);
162 return FN(BrotliPopulationCost
)(&tmp
) - candidate
->bit_cost_
;
166 /* Find the best 'out' histogram for each of the 'in' histograms.
167 When called, clusters[0..num_clusters) contains the unique values from
168 symbols[0..in_size), but this property is not preserved in this function.
169 Note: we assume that out[]->bit_cost_ is already up-to-date. */
170 BROTLI_INTERNAL
void FN(BrotliHistogramRemap
)(const HistogramType
* in
,
171 size_t in_size
, const uint32_t* clusters
, size_t num_clusters
,
172 HistogramType
* out
, uint32_t* symbols
) CODE({
174 for (i
= 0; i
< in_size
; ++i
) {
175 uint32_t best_out
= i
== 0 ? symbols
[0] : symbols
[i
- 1];
177 FN(BrotliHistogramBitCostDistance
)(&in
[i
], &out
[best_out
]);
179 for (j
= 0; j
< num_clusters
; ++j
) {
180 const double cur_bits
=
181 FN(BrotliHistogramBitCostDistance
)(&in
[i
], &out
[clusters
[j
]]);
182 if (cur_bits
< best_bits
) {
183 best_bits
= cur_bits
;
184 best_out
= clusters
[j
];
187 symbols
[i
] = best_out
;
190 /* Recompute each out based on raw and symbols. */
191 for (i
= 0; i
< num_clusters
; ++i
) {
192 FN(HistogramClear
)(&out
[clusters
[i
]]);
194 for (i
= 0; i
< in_size
; ++i
) {
195 FN(HistogramAddHistogram
)(&out
[symbols
[i
]], &in
[i
]);
199 /* Reorders elements of the out[0..length) array and changes values in
200 symbols[0..length) array in the following way:
201 * when called, symbols[] contains indexes into out[], and has N unique
202 values (possibly N < length)
203 * on return, symbols'[i] = f(symbols[i]) and
204 out'[symbols'[i]] = out[symbols[i]], for each 0 <= i < length,
205 where f is a bijection between the range of symbols[] and [0..N), and
206 the first occurrences of values in symbols'[i] come in consecutive
208 Returns N, the number of unique values in symbols[]. */
209 BROTLI_INTERNAL
size_t FN(BrotliHistogramReindex
)(MemoryManager
* m
,
210 HistogramType
* out
, uint32_t* symbols
, size_t length
) CODE({
211 static const uint32_t kInvalidIndex
= BROTLI_UINT32_MAX
;
212 uint32_t* new_index
= BROTLI_ALLOC(m
, uint32_t, length
);
216 if (BROTLI_IS_OOM(m
)) return 0;
217 for (i
= 0; i
< length
; ++i
) {
218 new_index
[i
] = kInvalidIndex
;
221 for (i
= 0; i
< length
; ++i
) {
222 if (new_index
[symbols
[i
]] == kInvalidIndex
) {
223 new_index
[symbols
[i
]] = next_index
;
227 /* TODO: by using idea of "cycle-sort" we can avoid allocation of
228 tmp and reduce the number of copying by the factor of 2. */
229 tmp
= BROTLI_ALLOC(m
, HistogramType
, next_index
);
230 if (BROTLI_IS_OOM(m
)) return 0;
232 for (i
= 0; i
< length
; ++i
) {
233 if (new_index
[symbols
[i
]] == next_index
) {
234 tmp
[next_index
] = out
[symbols
[i
]];
237 symbols
[i
] = new_index
[symbols
[i
]];
239 BROTLI_FREE(m
, new_index
);
240 for (i
= 0; i
< next_index
; ++i
) {
247 BROTLI_INTERNAL
void FN(BrotliClusterHistograms
)(
248 MemoryManager
* m
, const HistogramType
* in
, const size_t in_size
,
249 size_t max_histograms
, HistogramType
* out
, size_t* out_size
,
250 uint32_t* histogram_symbols
) CODE({
251 uint32_t* cluster_size
= BROTLI_ALLOC(m
, uint32_t, in_size
);
252 uint32_t* clusters
= BROTLI_ALLOC(m
, uint32_t, in_size
);
253 size_t num_clusters
= 0;
254 const size_t max_input_histograms
= 64;
255 size_t pairs_capacity
= max_input_histograms
* max_input_histograms
/ 2;
256 /* For the first pass of clustering, we allow all pairs. */
257 HistogramPair
* pairs
= BROTLI_ALLOC(m
, HistogramPair
, pairs_capacity
+ 1);
260 if (BROTLI_IS_OOM(m
)) return;
262 for (i
= 0; i
< in_size
; ++i
) {
266 for (i
= 0; i
< in_size
; ++i
) {
268 out
[i
].bit_cost_
= FN(BrotliPopulationCost
)(&in
[i
]);
269 histogram_symbols
[i
] = (uint32_t)i
;
272 for (i
= 0; i
< in_size
; i
+= max_input_histograms
) {
273 size_t num_to_combine
=
274 BROTLI_MIN(size_t, in_size
- i
, max_input_histograms
);
275 size_t num_new_clusters
;
277 for (j
= 0; j
< num_to_combine
; ++j
) {
278 clusters
[num_clusters
+ j
] = (uint32_t)(i
+ j
);
281 FN(BrotliHistogramCombine
)(out
, cluster_size
,
282 &histogram_symbols
[i
],
283 &clusters
[num_clusters
], pairs
,
284 num_to_combine
, num_to_combine
,
285 max_histograms
, pairs_capacity
);
286 num_clusters
+= num_new_clusters
;
290 /* For the second pass, we limit the total number of histogram pairs.
291 After this limit is reached, we only keep searching for the best pair. */
292 size_t max_num_pairs
= BROTLI_MIN(size_t,
293 64 * num_clusters
, (num_clusters
/ 2) * num_clusters
);
294 BROTLI_ENSURE_CAPACITY(
295 m
, HistogramPair
, pairs
, pairs_capacity
, max_num_pairs
+ 1);
296 if (BROTLI_IS_OOM(m
)) return;
298 /* Collapse similar histograms. */
299 num_clusters
= FN(BrotliHistogramCombine
)(out
, cluster_size
,
300 histogram_symbols
, clusters
,
301 pairs
, num_clusters
, in_size
,
302 max_histograms
, max_num_pairs
);
304 BROTLI_FREE(m
, pairs
);
305 BROTLI_FREE(m
, cluster_size
);
306 /* Find the optimal map from original histograms to the final ones. */
307 FN(BrotliHistogramRemap
)(in
, in_size
, clusters
, num_clusters
,
308 out
, histogram_symbols
);
309 BROTLI_FREE(m
, clusters
);
310 /* Convert the context map to a canonical form. */
311 *out_size
= FN(BrotliHistogramReindex
)(m
, out
, histogram_symbols
, in_size
);
312 if (BROTLI_IS_OOM(m
)) return;