1 /* Copyright 2015 Google Inc. All Rights Reserved.
3 Distributed under MIT license.
4 See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
7 /* Algorithms for distributing the literals and commands of a metablock between
8 block types and contexts. */
10 #include "./metablock.h"
12 #include "../common/constants.h"
13 #include "../common/types.h"
14 #include "./bit_cost.h"
15 #include "./block_splitter.h"
16 #include "./cluster.h"
17 #include "./context.h"
18 #include "./entropy_encode.h"
19 #include "./histogram.h"
22 #include "./quality.h"
24 #if defined(__cplusplus) || defined(c_plusplus)
28 void BrotliBuildMetaBlock(MemoryManager
* m
,
29 const uint8_t* ringbuffer
,
32 const BrotliEncoderParams
* params
,
37 ContextType literal_context_mode
,
39 /* Histogram ids need to fit in one byte. */
40 static const size_t kMaxNumberOfHistograms
= 256;
41 HistogramDistance
* distance_histograms
;
42 HistogramLiteral
* literal_histograms
;
43 ContextType
* literal_context_modes
;
44 size_t num_literal_contexts
;
45 size_t num_distance_contexts
;
48 BrotliSplitBlock(m
, cmds
, num_commands
,
49 ringbuffer
, pos
, mask
, params
,
53 if (BROTLI_IS_OOM(m
)) return;
55 literal_context_modes
=
56 BROTLI_ALLOC(m
, ContextType
, mb
->literal_split
.num_types
);
57 if (BROTLI_IS_OOM(m
)) return;
58 for (i
= 0; i
< mb
->literal_split
.num_types
; ++i
) {
59 literal_context_modes
[i
] = literal_context_mode
;
62 num_literal_contexts
=
63 mb
->literal_split
.num_types
<< BROTLI_LITERAL_CONTEXT_BITS
;
64 num_distance_contexts
=
65 mb
->distance_split
.num_types
<< BROTLI_DISTANCE_CONTEXT_BITS
;
66 literal_histograms
= BROTLI_ALLOC(m
, HistogramLiteral
, num_literal_contexts
);
67 if (BROTLI_IS_OOM(m
)) return;
68 ClearHistogramsLiteral(literal_histograms
, num_literal_contexts
);
70 assert(mb
->command_histograms
== 0);
71 mb
->command_histograms_size
= mb
->command_split
.num_types
;
72 mb
->command_histograms
=
73 BROTLI_ALLOC(m
, HistogramCommand
, mb
->command_histograms_size
);
74 if (BROTLI_IS_OOM(m
)) return;
75 ClearHistogramsCommand(mb
->command_histograms
, mb
->command_histograms_size
);
77 BROTLI_ALLOC(m
, HistogramDistance
, num_distance_contexts
);
78 if (BROTLI_IS_OOM(m
)) return;
79 ClearHistogramsDistance(distance_histograms
, num_distance_contexts
);
80 BrotliBuildHistogramsWithContext(cmds
, num_commands
,
81 &mb
->literal_split
, &mb
->command_split
, &mb
->distance_split
,
82 ringbuffer
, pos
, mask
, prev_byte
, prev_byte2
, literal_context_modes
,
83 literal_histograms
, mb
->command_histograms
, distance_histograms
);
84 BROTLI_FREE(m
, literal_context_modes
);
86 assert(mb
->literal_context_map
== 0);
87 mb
->literal_context_map_size
=
88 mb
->literal_split
.num_types
<< BROTLI_LITERAL_CONTEXT_BITS
;
89 mb
->literal_context_map
=
90 BROTLI_ALLOC(m
, uint32_t, mb
->literal_context_map_size
);
91 if (BROTLI_IS_OOM(m
)) return;
92 assert(mb
->literal_histograms
== 0);
93 mb
->literal_histograms_size
= mb
->literal_context_map_size
;
94 mb
->literal_histograms
=
95 BROTLI_ALLOC(m
, HistogramLiteral
, mb
->literal_histograms_size
);
96 if (BROTLI_IS_OOM(m
)) return;
97 BrotliClusterHistogramsLiteral(m
, literal_histograms
,
98 mb
->literal_context_map_size
,
99 kMaxNumberOfHistograms
,
100 mb
->literal_histograms
,
101 &mb
->literal_histograms_size
,
102 mb
->literal_context_map
);
103 if (BROTLI_IS_OOM(m
)) return;
104 BROTLI_FREE(m
, literal_histograms
);
106 assert(mb
->distance_context_map
== 0);
107 mb
->distance_context_map_size
=
108 mb
->distance_split
.num_types
<< BROTLI_DISTANCE_CONTEXT_BITS
;
109 mb
->distance_context_map
=
110 BROTLI_ALLOC(m
, uint32_t, mb
->distance_context_map_size
);
111 if (BROTLI_IS_OOM(m
)) return;
112 assert(mb
->distance_histograms
== 0);
113 mb
->distance_histograms_size
= mb
->distance_context_map_size
;
114 mb
->distance_histograms
=
115 BROTLI_ALLOC(m
, HistogramDistance
, mb
->distance_histograms_size
);
116 if (BROTLI_IS_OOM(m
)) return;
117 BrotliClusterHistogramsDistance(m
, distance_histograms
,
118 mb
->distance_context_map_size
,
119 kMaxNumberOfHistograms
,
120 mb
->distance_histograms
,
121 &mb
->distance_histograms_size
,
122 mb
->distance_context_map
);
123 if (BROTLI_IS_OOM(m
)) return;
124 BROTLI_FREE(m
, distance_histograms
);
127 #define FN(X) X ## Literal
128 #include "./metablock_inc.h" /* NOLINT(build/include) */
131 #define FN(X) X ## Command
132 #include "./metablock_inc.h" /* NOLINT(build/include) */
135 #define FN(X) X ## Distance
136 #include "./metablock_inc.h" /* NOLINT(build/include) */
139 void BrotliBuildMetaBlockGreedy(MemoryManager
* m
,
140 const uint8_t* ringbuffer
,
143 const Command
*commands
,
145 MetaBlockSplit
* mb
) {
146 BlockSplitterLiteral lit_blocks
;
147 BlockSplitterCommand cmd_blocks
;
148 BlockSplitterDistance dist_blocks
;
149 size_t num_literals
= 0;
151 for (i
= 0; i
< n_commands
; ++i
) {
152 num_literals
+= commands
[i
].insert_len_
;
155 InitBlockSplitterLiteral(m
, &lit_blocks
, 256, 512, 400.0, num_literals
,
156 &mb
->literal_split
, &mb
->literal_histograms
,
157 &mb
->literal_histograms_size
);
158 if (BROTLI_IS_OOM(m
)) return;
159 InitBlockSplitterCommand(m
, &cmd_blocks
, BROTLI_NUM_COMMAND_SYMBOLS
, 1024,
160 500.0, n_commands
, &mb
->command_split
, &mb
->command_histograms
,
161 &mb
->command_histograms_size
);
162 if (BROTLI_IS_OOM(m
)) return;
163 InitBlockSplitterDistance(m
, &dist_blocks
, 64, 512, 100.0, n_commands
,
164 &mb
->distance_split
, &mb
->distance_histograms
,
165 &mb
->distance_histograms_size
);
166 if (BROTLI_IS_OOM(m
)) return;
168 for (i
= 0; i
< n_commands
; ++i
) {
169 const Command cmd
= commands
[i
];
171 BlockSplitterAddSymbolCommand(&cmd_blocks
, cmd
.cmd_prefix_
);
172 for (j
= cmd
.insert_len_
; j
!= 0; --j
) {
173 BlockSplitterAddSymbolLiteral(&lit_blocks
, ringbuffer
[pos
& mask
]);
176 pos
+= CommandCopyLen(&cmd
);
177 if (CommandCopyLen(&cmd
) && cmd
.cmd_prefix_
>= 128) {
178 BlockSplitterAddSymbolDistance(&dist_blocks
, cmd
.dist_prefix_
);
182 BlockSplitterFinishBlockLiteral(&lit_blocks
, /* is_final = */ BROTLI_TRUE
);
183 BlockSplitterFinishBlockCommand(&cmd_blocks
, /* is_final = */ BROTLI_TRUE
);
184 BlockSplitterFinishBlockDistance(&dist_blocks
, /* is_final = */ BROTLI_TRUE
);
187 /* Greedy block splitter for one block category (literal, command or distance).
188 Gathers histograms for all context buckets. */
189 typedef struct ContextBlockSplitter
{
190 /* Alphabet size of particular block category. */
191 size_t alphabet_size_
;
192 size_t num_contexts_
;
193 size_t max_block_types_
;
194 /* We collect at least this many symbols for each block. */
195 size_t min_block_size_
;
196 /* We merge histograms A and B if
197 entropy(A+B) < entropy(A) + entropy(B) + split_threshold_,
198 where A is the current histogram and B is the histogram of the last or the
199 second last block type. */
200 double split_threshold_
;
203 BlockSplit
* split_
; /* not owned */
204 HistogramLiteral
* histograms_
; /* not owned */
205 size_t* histograms_size_
; /* not owned */
207 /* The number of symbols that we want to collect before deciding on whether
208 or not to merge the block with a previous one or emit a new block. */
209 size_t target_block_size_
;
210 /* The number of symbols in the current histogram. */
212 /* Offset of the current histogram. */
213 size_t curr_histogram_ix_
;
214 /* Offset of the histograms of the previous two block types. */
215 size_t last_histogram_ix_
[2];
216 /* Entropy of the previous two block types. */
217 double* last_entropy_
;
218 /* The number of times we merged the current block with the last one. */
219 size_t merge_last_count_
;
220 } ContextBlockSplitter
;
222 static void InitContextBlockSplitter(
223 MemoryManager
* m
, ContextBlockSplitter
* self
, size_t alphabet_size
,
224 size_t num_contexts
, size_t min_block_size
, double split_threshold
,
225 size_t num_symbols
, BlockSplit
* split
, HistogramLiteral
** histograms
,
226 size_t* histograms_size
) {
227 size_t max_num_blocks
= num_symbols
/ min_block_size
+ 1;
228 size_t max_num_types
;
230 self
->alphabet_size_
= alphabet_size
;
231 self
->num_contexts_
= num_contexts
;
232 self
->max_block_types_
= BROTLI_MAX_NUMBER_OF_BLOCK_TYPES
/ num_contexts
;
233 self
->min_block_size_
= min_block_size
;
234 self
->split_threshold_
= split_threshold
;
235 self
->num_blocks_
= 0;
236 self
->split_
= split
;
237 self
->histograms_size_
= histograms_size
;
238 self
->target_block_size_
= min_block_size
;
239 self
->block_size_
= 0;
240 self
->curr_histogram_ix_
= 0;
241 self
->merge_last_count_
= 0;
243 /* We have to allocate one more histogram than the maximum number of block
244 types for the current histogram when the meta-block is too big. */
246 BROTLI_MIN(size_t, max_num_blocks
, self
->max_block_types_
+ 1);
247 BROTLI_ENSURE_CAPACITY(m
, uint8_t,
248 split
->types
, split
->types_alloc_size
, max_num_blocks
);
249 BROTLI_ENSURE_CAPACITY(m
, uint32_t,
250 split
->lengths
, split
->lengths_alloc_size
, max_num_blocks
);
251 if (BROTLI_IS_OOM(m
)) return;
252 split
->num_blocks
= max_num_blocks
;
253 self
->last_entropy_
= BROTLI_ALLOC(m
, double, 2 * num_contexts
);
254 if (BROTLI_IS_OOM(m
)) return;
255 assert(*histograms
== 0);
256 *histograms_size
= max_num_types
* num_contexts
;
257 *histograms
= BROTLI_ALLOC(m
, HistogramLiteral
, *histograms_size
);
258 self
->histograms_
= *histograms
;
259 if (BROTLI_IS_OOM(m
)) return;
260 /* Clear only current historgram. */
261 ClearHistogramsLiteral(&self
->histograms_
[0], num_contexts
);
262 self
->last_histogram_ix_
[0] = self
->last_histogram_ix_
[1] = 0;
265 static void CleanupContextBlockSplitter(
266 MemoryManager
* m
, ContextBlockSplitter
* self
) {
267 BROTLI_FREE(m
, self
->last_entropy_
);
270 /* Does either of three things:
271 (1) emits the current block with a new block type;
272 (2) emits the current block with the type of the second last block;
273 (3) merges the current block with the last block. */
274 static void ContextBlockSplitterFinishBlock(
275 MemoryManager
* m
, ContextBlockSplitter
* self
, BROTLI_BOOL is_final
) {
276 BlockSplit
* split
= self
->split_
;
277 const size_t num_contexts
= self
->num_contexts_
;
278 double* last_entropy
= self
->last_entropy_
;
279 HistogramLiteral
* histograms
= self
->histograms_
;
281 if (self
->block_size_
< self
->min_block_size_
) {
282 self
->block_size_
= self
->min_block_size_
;
284 if (self
->num_blocks_
== 0) {
286 /* Create first block. */
287 split
->lengths
[0] = (uint32_t)self
->block_size_
;
290 for (i
= 0; i
< num_contexts
; ++i
) {
292 BitsEntropy(histograms
[i
].data_
, self
->alphabet_size_
);
293 last_entropy
[num_contexts
+ i
] = last_entropy
[i
];
297 self
->curr_histogram_ix_
+= num_contexts
;
298 if (self
->curr_histogram_ix_
< *self
->histograms_size_
) {
299 ClearHistogramsLiteral(
300 &self
->histograms_
[self
->curr_histogram_ix_
], self
->num_contexts_
);
302 self
->block_size_
= 0;
303 } else if (self
->block_size_
> 0) {
304 /* Try merging the set of histograms for the current block type with the
305 respective set of histograms for the last and second last block types.
306 Decide over the split based on the total reduction of entropy across
308 double* entropy
= BROTLI_ALLOC(m
, double, num_contexts
);
309 HistogramLiteral
* combined_histo
=
310 BROTLI_ALLOC(m
, HistogramLiteral
, 2 * num_contexts
);
311 double* combined_entropy
= BROTLI_ALLOC(m
, double, 2 * num_contexts
);
312 double diff
[2] = { 0.0 };
314 if (BROTLI_IS_OOM(m
)) return;
315 for (i
= 0; i
< num_contexts
; ++i
) {
316 size_t curr_histo_ix
= self
->curr_histogram_ix_
+ i
;
318 entropy
[i
] = BitsEntropy(histograms
[curr_histo_ix
].data_
,
319 self
->alphabet_size_
);
320 for (j
= 0; j
< 2; ++j
) {
321 size_t jx
= j
* num_contexts
+ i
;
322 size_t last_histogram_ix
= self
->last_histogram_ix_
[j
] + i
;
323 combined_histo
[jx
] = histograms
[curr_histo_ix
];
324 HistogramAddHistogramLiteral(&combined_histo
[jx
],
325 &histograms
[last_histogram_ix
]);
326 combined_entropy
[jx
] = BitsEntropy(
327 &combined_histo
[jx
].data_
[0], self
->alphabet_size_
);
328 diff
[j
] += combined_entropy
[jx
] - entropy
[i
] - last_entropy
[jx
];
332 if (split
->num_types
< self
->max_block_types_
&&
333 diff
[0] > self
->split_threshold_
&&
334 diff
[1] > self
->split_threshold_
) {
335 /* Create new block. */
336 split
->lengths
[self
->num_blocks_
] = (uint32_t)self
->block_size_
;
337 split
->types
[self
->num_blocks_
] = (uint8_t)split
->num_types
;
338 self
->last_histogram_ix_
[1] = self
->last_histogram_ix_
[0];
339 self
->last_histogram_ix_
[0] = split
->num_types
* num_contexts
;
340 for (i
= 0; i
< num_contexts
; ++i
) {
341 last_entropy
[num_contexts
+ i
] = last_entropy
[i
];
342 last_entropy
[i
] = entropy
[i
];
346 self
->curr_histogram_ix_
+= num_contexts
;
347 if (self
->curr_histogram_ix_
< *self
->histograms_size_
) {
348 ClearHistogramsLiteral(
349 &self
->histograms_
[self
->curr_histogram_ix_
], self
->num_contexts_
);
351 self
->block_size_
= 0;
352 self
->merge_last_count_
= 0;
353 self
->target_block_size_
= self
->min_block_size_
;
354 } else if (diff
[1] < diff
[0] - 20.0) {
355 /* Combine this block with second last block. */
356 split
->lengths
[self
->num_blocks_
] = (uint32_t)self
->block_size_
;
357 split
->types
[self
->num_blocks_
] = split
->types
[self
->num_blocks_
- 2];
358 BROTLI_SWAP(size_t, self
->last_histogram_ix_
, 0, 1);
359 for (i
= 0; i
< num_contexts
; ++i
) {
360 histograms
[self
->last_histogram_ix_
[0] + i
] =
361 combined_histo
[num_contexts
+ i
];
362 last_entropy
[num_contexts
+ i
] = last_entropy
[i
];
363 last_entropy
[i
] = combined_entropy
[num_contexts
+ i
];
364 HistogramClearLiteral(&histograms
[self
->curr_histogram_ix_
+ i
]);
367 self
->block_size_
= 0;
368 self
->merge_last_count_
= 0;
369 self
->target_block_size_
= self
->min_block_size_
;
371 /* Combine this block with last block. */
372 split
->lengths
[self
->num_blocks_
- 1] += (uint32_t)self
->block_size_
;
373 for (i
= 0; i
< num_contexts
; ++i
) {
374 histograms
[self
->last_histogram_ix_
[0] + i
] = combined_histo
[i
];
375 last_entropy
[i
] = combined_entropy
[i
];
376 if (split
->num_types
== 1) {
377 last_entropy
[num_contexts
+ i
] = last_entropy
[i
];
379 HistogramClearLiteral(&histograms
[self
->curr_histogram_ix_
+ i
]);
381 self
->block_size_
= 0;
382 if (++self
->merge_last_count_
> 1) {
383 self
->target_block_size_
+= self
->min_block_size_
;
386 BROTLI_FREE(m
, combined_entropy
);
387 BROTLI_FREE(m
, combined_histo
);
388 BROTLI_FREE(m
, entropy
);
391 *self
->histograms_size_
= split
->num_types
* num_contexts
;
392 split
->num_blocks
= self
->num_blocks_
;
396 /* Adds the next symbol to the current block type and context. When the
397 current block reaches the target size, decides on merging the block. */
398 static void ContextBlockSplitterAddSymbol(MemoryManager
* m
,
399 ContextBlockSplitter
* self
, size_t symbol
, size_t context
) {
400 HistogramAddLiteral(&self
->histograms_
[self
->curr_histogram_ix_
+ context
],
403 if (self
->block_size_
== self
->target_block_size_
) {
404 ContextBlockSplitterFinishBlock(m
, self
, /* is_final = */ BROTLI_FALSE
);
405 if (BROTLI_IS_OOM(m
)) return;
409 void BrotliBuildMetaBlockGreedyWithContexts(MemoryManager
* m
,
410 const uint8_t* ringbuffer
,
415 ContextType literal_context_mode
,
417 const uint32_t* static_context_map
,
418 const Command
*commands
,
420 MetaBlockSplit
* mb
) {
421 ContextBlockSplitter lit_blocks
;
422 BlockSplitterCommand cmd_blocks
;
423 BlockSplitterDistance dist_blocks
;
424 size_t num_literals
= 0;
426 for (i
= 0; i
< n_commands
; ++i
) {
427 num_literals
+= commands
[i
].insert_len_
;
430 InitContextBlockSplitter(m
, &lit_blocks
, 256, num_contexts
, 512, 400.0,
431 num_literals
, &mb
->literal_split
, &mb
->literal_histograms
,
432 &mb
->literal_histograms_size
);
433 if (BROTLI_IS_OOM(m
)) return;
434 InitBlockSplitterCommand(m
, &cmd_blocks
, BROTLI_NUM_COMMAND_SYMBOLS
, 1024,
435 500.0, n_commands
, &mb
->command_split
, &mb
->command_histograms
,
436 &mb
->command_histograms_size
);
437 if (BROTLI_IS_OOM(m
)) return;
438 InitBlockSplitterDistance(m
, &dist_blocks
, 64, 512, 100.0, n_commands
,
439 &mb
->distance_split
, &mb
->distance_histograms
,
440 &mb
->distance_histograms_size
);
441 if (BROTLI_IS_OOM(m
)) return;
443 for (i
= 0; i
< n_commands
; ++i
) {
444 const Command cmd
= commands
[i
];
446 BlockSplitterAddSymbolCommand(&cmd_blocks
, cmd
.cmd_prefix_
);
447 for (j
= cmd
.insert_len_
; j
!= 0; --j
) {
448 size_t context
= Context(prev_byte
, prev_byte2
, literal_context_mode
);
449 uint8_t literal
= ringbuffer
[pos
& mask
];
450 ContextBlockSplitterAddSymbol(
451 m
, &lit_blocks
, literal
, static_context_map
[context
]);
452 prev_byte2
= prev_byte
;
453 if (BROTLI_IS_OOM(m
)) return;
457 pos
+= CommandCopyLen(&cmd
);
458 if (CommandCopyLen(&cmd
)) {
459 prev_byte2
= ringbuffer
[(pos
- 2) & mask
];
460 prev_byte
= ringbuffer
[(pos
- 1) & mask
];
461 if (cmd
.cmd_prefix_
>= 128) {
462 BlockSplitterAddSymbolDistance(&dist_blocks
, cmd
.dist_prefix_
);
467 ContextBlockSplitterFinishBlock(m
, &lit_blocks
, /* is_final = */ BROTLI_TRUE
);
468 if (BROTLI_IS_OOM(m
)) return;
469 CleanupContextBlockSplitter(m
, &lit_blocks
);
470 BlockSplitterFinishBlockCommand(&cmd_blocks
, /* is_final = */ BROTLI_TRUE
);
471 BlockSplitterFinishBlockDistance(&dist_blocks
, /* is_final = */ BROTLI_TRUE
);
473 assert(mb
->literal_context_map
== 0);
474 mb
->literal_context_map_size
=
475 mb
->literal_split
.num_types
<< BROTLI_LITERAL_CONTEXT_BITS
;
476 mb
->literal_context_map
=
477 BROTLI_ALLOC(m
, uint32_t, mb
->literal_context_map_size
);
478 if (BROTLI_IS_OOM(m
)) return;
480 for (i
= 0; i
< mb
->literal_split
.num_types
; ++i
) {
482 for (j
= 0; j
< (1u << BROTLI_LITERAL_CONTEXT_BITS
); ++j
) {
483 mb
->literal_context_map
[(i
<< BROTLI_LITERAL_CONTEXT_BITS
) + j
] =
484 (uint32_t)(i
* num_contexts
) + static_context_map
[j
];
489 void BrotliOptimizeHistograms(size_t num_direct_distance_codes
,
490 size_t distance_postfix_bits
,
491 MetaBlockSplit
* mb
) {
492 uint8_t good_for_rle
[BROTLI_NUM_COMMAND_SYMBOLS
];
493 size_t num_distance_codes
;
495 for (i
= 0; i
< mb
->literal_histograms_size
; ++i
) {
496 BrotliOptimizeHuffmanCountsForRle(256, mb
->literal_histograms
[i
].data_
,
499 for (i
= 0; i
< mb
->command_histograms_size
; ++i
) {
500 BrotliOptimizeHuffmanCountsForRle(BROTLI_NUM_COMMAND_SYMBOLS
,
501 mb
->command_histograms
[i
].data_
,
504 num_distance_codes
= BROTLI_NUM_DISTANCE_SHORT_CODES
+
505 num_direct_distance_codes
+ (48u << distance_postfix_bits
);
506 for (i
= 0; i
< mb
->distance_histograms_size
; ++i
) {
507 BrotliOptimizeHuffmanCountsForRle(num_distance_codes
,
508 mb
->distance_histograms
[i
].data_
,
513 #if defined(__cplusplus) || defined(c_plusplus)