1 /* NOLINT(build/header_guard) */
2 /* Copyright 2015 Google Inc. All Rights Reserved.
4 Distributed under MIT license.
5 See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
8 /* template parameters: FN */
10 #define HistogramType FN(Histogram)
12 /* Greedy block splitter for one block category (literal, command or distance).
14 typedef struct FN(BlockSplitter
) {
15 /* Alphabet size of particular block category. */
16 size_t alphabet_size_
;
17 /* We collect at least this many symbols for each block. */
18 size_t min_block_size_
;
19 /* We merge histograms A and B if
20 entropy(A+B) < entropy(A) + entropy(B) + split_threshold_,
21 where A is the current histogram and B is the histogram of the last or the
22 second last block type. */
23 double split_threshold_
;
26 BlockSplit
* split_
; /* not owned */
27 HistogramType
* histograms_
; /* not owned */
28 size_t* histograms_size_
; /* not owned */
30 /* The number of symbols that we want to collect before deciding on whether
31 or not to merge the block with a previous one or emit a new block. */
32 size_t target_block_size_
;
33 /* The number of symbols in the current histogram. */
35 /* Offset of the current histogram. */
36 size_t curr_histogram_ix_
;
37 /* Offset of the histograms of the previous two block types. */
38 size_t last_histogram_ix_
[2];
39 /* Entropy of the previous two block types. */
40 double last_entropy_
[2];
41 /* The number of times we merged the current block with the last one. */
42 size_t merge_last_count_
;
45 static void FN(InitBlockSplitter
)(
46 MemoryManager
* m
, FN(BlockSplitter
)* self
, size_t alphabet_size
,
47 size_t min_block_size
, double split_threshold
, size_t num_symbols
,
48 BlockSplit
* split
, HistogramType
** histograms
, size_t* histograms_size
) {
49 size_t max_num_blocks
= num_symbols
/ min_block_size
+ 1;
50 /* We have to allocate one more histogram than the maximum number of block
51 types for the current histogram when the meta-block is too big. */
52 size_t max_num_types
=
53 BROTLI_MIN(size_t, max_num_blocks
, BROTLI_MAX_NUMBER_OF_BLOCK_TYPES
+ 1);
54 self
->alphabet_size_
= alphabet_size
;
55 self
->min_block_size_
= min_block_size
;
56 self
->split_threshold_
= split_threshold
;
57 self
->num_blocks_
= 0;
59 self
->histograms_size_
= histograms_size
;
60 self
->target_block_size_
= min_block_size
;
61 self
->block_size_
= 0;
62 self
->curr_histogram_ix_
= 0;
63 self
->merge_last_count_
= 0;
64 BROTLI_ENSURE_CAPACITY(m
, uint8_t,
65 split
->types
, split
->types_alloc_size
, max_num_blocks
);
66 BROTLI_ENSURE_CAPACITY(m
, uint32_t,
67 split
->lengths
, split
->lengths_alloc_size
, max_num_blocks
);
68 if (BROTLI_IS_OOM(m
)) return;
69 self
->split_
->num_blocks
= max_num_blocks
;
70 assert(*histograms
== 0);
71 *histograms_size
= max_num_types
;
72 *histograms
= BROTLI_ALLOC(m
, HistogramType
, *histograms_size
);
73 self
->histograms_
= *histograms
;
74 if (BROTLI_IS_OOM(m
)) return;
75 /* Clear only current histogram. */
76 FN(HistogramClear
)(&self
->histograms_
[0]);
77 self
->last_histogram_ix_
[0] = self
->last_histogram_ix_
[1] = 0;
80 /* Does either of three things:
81 (1) emits the current block with a new block type;
82 (2) emits the current block with the type of the second last block;
83 (3) merges the current block with the last block. */
84 static void FN(BlockSplitterFinishBlock
)(
85 FN(BlockSplitter
)* self
, BROTLI_BOOL is_final
) {
86 BlockSplit
* split
= self
->split_
;
87 double* last_entropy
= self
->last_entropy_
;
88 HistogramType
* histograms
= self
->histograms_
;
90 BROTLI_MAX(size_t, self
->block_size_
, self
->min_block_size_
);
91 if (self
->num_blocks_
== 0) {
92 /* Create first block. */
93 split
->lengths
[0] = (uint32_t)self
->block_size_
;
96 BitsEntropy(histograms
[0].data_
, self
->alphabet_size_
);
97 last_entropy
[1] = last_entropy
[0];
100 ++self
->curr_histogram_ix_
;
101 if (self
->curr_histogram_ix_
< *self
->histograms_size_
)
102 FN(HistogramClear
)(&histograms
[self
->curr_histogram_ix_
]);
103 self
->block_size_
= 0;
104 } else if (self
->block_size_
> 0) {
105 double entropy
= BitsEntropy(histograms
[self
->curr_histogram_ix_
].data_
,
106 self
->alphabet_size_
);
107 HistogramType combined_histo
[2];
108 double combined_entropy
[2];
111 for (j
= 0; j
< 2; ++j
) {
112 size_t last_histogram_ix
= self
->last_histogram_ix_
[j
];
113 combined_histo
[j
] = histograms
[self
->curr_histogram_ix_
];
114 FN(HistogramAddHistogram
)(&combined_histo
[j
],
115 &histograms
[last_histogram_ix
]);
116 combined_entropy
[j
] = BitsEntropy(
117 &combined_histo
[j
].data_
[0], self
->alphabet_size_
);
118 diff
[j
] = combined_entropy
[j
] - entropy
- last_entropy
[j
];
121 if (split
->num_types
< BROTLI_MAX_NUMBER_OF_BLOCK_TYPES
&&
122 diff
[0] > self
->split_threshold_
&&
123 diff
[1] > self
->split_threshold_
) {
124 /* Create new block. */
125 split
->lengths
[self
->num_blocks_
] = (uint32_t)self
->block_size_
;
126 split
->types
[self
->num_blocks_
] = (uint8_t)split
->num_types
;
127 self
->last_histogram_ix_
[1] = self
->last_histogram_ix_
[0];
128 self
->last_histogram_ix_
[0] = (uint8_t)split
->num_types
;
129 last_entropy
[1] = last_entropy
[0];
130 last_entropy
[0] = entropy
;
133 ++self
->curr_histogram_ix_
;
134 if (self
->curr_histogram_ix_
< *self
->histograms_size_
)
135 FN(HistogramClear
)(&histograms
[self
->curr_histogram_ix_
]);
136 self
->block_size_
= 0;
137 self
->merge_last_count_
= 0;
138 self
->target_block_size_
= self
->min_block_size_
;
139 } else if (diff
[1] < diff
[0] - 20.0) {
140 /* Combine this block with second last block. */
141 split
->lengths
[self
->num_blocks_
] = (uint32_t)self
->block_size_
;
142 split
->types
[self
->num_blocks_
] = split
->types
[self
->num_blocks_
- 2];
143 BROTLI_SWAP(size_t, self
->last_histogram_ix_
, 0, 1);
144 histograms
[self
->last_histogram_ix_
[0]] = combined_histo
[1];
145 last_entropy
[1] = last_entropy
[0];
146 last_entropy
[0] = combined_entropy
[1];
148 self
->block_size_
= 0;
149 FN(HistogramClear
)(&histograms
[self
->curr_histogram_ix_
]);
150 self
->merge_last_count_
= 0;
151 self
->target_block_size_
= self
->min_block_size_
;
153 /* Combine this block with last block. */
154 split
->lengths
[self
->num_blocks_
- 1] += (uint32_t)self
->block_size_
;
155 histograms
[self
->last_histogram_ix_
[0]] = combined_histo
[0];
156 last_entropy
[0] = combined_entropy
[0];
157 if (split
->num_types
== 1) {
158 last_entropy
[1] = last_entropy
[0];
160 self
->block_size_
= 0;
161 FN(HistogramClear
)(&histograms
[self
->curr_histogram_ix_
]);
162 if (++self
->merge_last_count_
> 1) {
163 self
->target_block_size_
+= self
->min_block_size_
;
168 *self
->histograms_size_
= split
->num_types
;
169 split
->num_blocks
= self
->num_blocks_
;
173 /* Adds the next symbol to the current histogram. When the current histogram
174 reaches the target size, decides on merging the block. */
175 static void FN(BlockSplitterAddSymbol
)(FN(BlockSplitter
)* self
, size_t symbol
) {
176 FN(HistogramAdd
)(&self
->histograms_
[self
->curr_histogram_ix_
], symbol
);
178 if (self
->block_size_
== self
->target_block_size_
) {
179 FN(BlockSplitterFinishBlock
)(self
, /* is_final = */ BROTLI_FALSE
);