]>
Commit | Line | Data |
---|---|---|
11b7501a SB |
1 | /* NOLINT(build/header_guard) */\r |
2 | /* Copyright 2015 Google Inc. All Rights Reserved.\r | |
3 | \r | |
4 | Distributed under MIT license.\r | |
5 | See file LICENSE for detail or copy at https://opensource.org/licenses/MIT\r | |
6 | */\r | |
7 | \r | |
8 | /* template parameters: FN */\r | |
9 | \r | |
10 | #define HistogramType FN(Histogram)\r | |
11 | \r | |
12 | /* Greedy block splitter for one block category (literal, command or distance).\r | |
13 | */\r | |
14 | typedef struct FN(BlockSplitter) {\r | |
15 | /* Alphabet size of particular block category. */\r | |
16 | size_t alphabet_size_;\r | |
17 | /* We collect at least this many symbols for each block. */\r | |
18 | size_t min_block_size_;\r | |
19 | /* We merge histograms A and B if\r | |
20 | entropy(A+B) < entropy(A) + entropy(B) + split_threshold_,\r | |
21 | where A is the current histogram and B is the histogram of the last or the\r | |
22 | second last block type. */\r | |
23 | double split_threshold_;\r | |
24 | \r | |
25 | size_t num_blocks_;\r | |
26 | BlockSplit* split_; /* not owned */\r | |
27 | HistogramType* histograms_; /* not owned */\r | |
28 | size_t* histograms_size_; /* not owned */\r | |
29 | \r | |
30 | /* The number of symbols that we want to collect before deciding on whether\r | |
31 | or not to merge the block with a previous one or emit a new block. */\r | |
32 | size_t target_block_size_;\r | |
33 | /* The number of symbols in the current histogram. */\r | |
34 | size_t block_size_;\r | |
35 | /* Offset of the current histogram. */\r | |
36 | size_t curr_histogram_ix_;\r | |
37 | /* Offset of the histograms of the previous two block types. */\r | |
38 | size_t last_histogram_ix_[2];\r | |
39 | /* Entropy of the previous two block types. */\r | |
40 | double last_entropy_[2];\r | |
41 | /* The number of times we merged the current block with the last one. */\r | |
42 | size_t merge_last_count_;\r | |
43 | } FN(BlockSplitter);\r | |
44 | \r | |
45 | static void FN(InitBlockSplitter)(\r | |
46 | MemoryManager* m, FN(BlockSplitter)* self, size_t alphabet_size,\r | |
47 | size_t min_block_size, double split_threshold, size_t num_symbols,\r | |
48 | BlockSplit* split, HistogramType** histograms, size_t* histograms_size) {\r | |
49 | size_t max_num_blocks = num_symbols / min_block_size + 1;\r | |
50 | /* We have to allocate one more histogram than the maximum number of block\r | |
51 | types for the current histogram when the meta-block is too big. */\r | |
52 | size_t max_num_types =\r | |
53 | BROTLI_MIN(size_t, max_num_blocks, BROTLI_MAX_NUMBER_OF_BLOCK_TYPES + 1);\r | |
54 | self->alphabet_size_ = alphabet_size;\r | |
55 | self->min_block_size_ = min_block_size;\r | |
56 | self->split_threshold_ = split_threshold;\r | |
57 | self->num_blocks_ = 0;\r | |
58 | self->split_ = split;\r | |
59 | self->histograms_size_ = histograms_size;\r | |
60 | self->target_block_size_ = min_block_size;\r | |
61 | self->block_size_ = 0;\r | |
62 | self->curr_histogram_ix_ = 0;\r | |
63 | self->merge_last_count_ = 0;\r | |
64 | BROTLI_ENSURE_CAPACITY(m, uint8_t,\r | |
65 | split->types, split->types_alloc_size, max_num_blocks);\r | |
66 | BROTLI_ENSURE_CAPACITY(m, uint32_t,\r | |
67 | split->lengths, split->lengths_alloc_size, max_num_blocks);\r | |
68 | if (BROTLI_IS_OOM(m)) return;\r | |
69 | self->split_->num_blocks = max_num_blocks;\r | |
70 | assert(*histograms == 0);\r | |
71 | *histograms_size = max_num_types;\r | |
72 | *histograms = BROTLI_ALLOC(m, HistogramType, *histograms_size);\r | |
73 | self->histograms_ = *histograms;\r | |
74 | if (BROTLI_IS_OOM(m)) return;\r | |
75 | /* Clear only current histogram. */\r | |
76 | FN(HistogramClear)(&self->histograms_[0]);\r | |
77 | self->last_histogram_ix_[0] = self->last_histogram_ix_[1] = 0;\r | |
78 | }\r | |
79 | \r | |
80 | /* Does either of three things:\r | |
81 | (1) emits the current block with a new block type;\r | |
82 | (2) emits the current block with the type of the second last block;\r | |
83 | (3) merges the current block with the last block. */\r | |
84 | static void FN(BlockSplitterFinishBlock)(\r | |
85 | FN(BlockSplitter)* self, BROTLI_BOOL is_final) {\r | |
86 | BlockSplit* split = self->split_;\r | |
87 | double* last_entropy = self->last_entropy_;\r | |
88 | HistogramType* histograms = self->histograms_;\r | |
89 | self->block_size_ =\r | |
90 | BROTLI_MAX(size_t, self->block_size_, self->min_block_size_);\r | |
91 | if (self->num_blocks_ == 0) {\r | |
92 | /* Create first block. */\r | |
93 | split->lengths[0] = (uint32_t)self->block_size_;\r | |
94 | split->types[0] = 0;\r | |
95 | last_entropy[0] =\r | |
96 | BitsEntropy(histograms[0].data_, self->alphabet_size_);\r | |
97 | last_entropy[1] = last_entropy[0];\r | |
98 | ++self->num_blocks_;\r | |
99 | ++split->num_types;\r | |
100 | ++self->curr_histogram_ix_;\r | |
101 | if (self->curr_histogram_ix_ < *self->histograms_size_)\r | |
102 | FN(HistogramClear)(&histograms[self->curr_histogram_ix_]);\r | |
103 | self->block_size_ = 0;\r | |
104 | } else if (self->block_size_ > 0) {\r | |
105 | double entropy = BitsEntropy(histograms[self->curr_histogram_ix_].data_,\r | |
106 | self->alphabet_size_);\r | |
107 | HistogramType combined_histo[2];\r | |
108 | double combined_entropy[2];\r | |
109 | double diff[2];\r | |
110 | size_t j;\r | |
111 | for (j = 0; j < 2; ++j) {\r | |
112 | size_t last_histogram_ix = self->last_histogram_ix_[j];\r | |
113 | combined_histo[j] = histograms[self->curr_histogram_ix_];\r | |
114 | FN(HistogramAddHistogram)(&combined_histo[j],\r | |
115 | &histograms[last_histogram_ix]);\r | |
116 | combined_entropy[j] = BitsEntropy(\r | |
117 | &combined_histo[j].data_[0], self->alphabet_size_);\r | |
118 | diff[j] = combined_entropy[j] - entropy - last_entropy[j];\r | |
119 | }\r | |
120 | \r | |
121 | if (split->num_types < BROTLI_MAX_NUMBER_OF_BLOCK_TYPES &&\r | |
122 | diff[0] > self->split_threshold_ &&\r | |
123 | diff[1] > self->split_threshold_) {\r | |
124 | /* Create new block. */\r | |
125 | split->lengths[self->num_blocks_] = (uint32_t)self->block_size_;\r | |
126 | split->types[self->num_blocks_] = (uint8_t)split->num_types;\r | |
127 | self->last_histogram_ix_[1] = self->last_histogram_ix_[0];\r | |
128 | self->last_histogram_ix_[0] = (uint8_t)split->num_types;\r | |
129 | last_entropy[1] = last_entropy[0];\r | |
130 | last_entropy[0] = entropy;\r | |
131 | ++self->num_blocks_;\r | |
132 | ++split->num_types;\r | |
133 | ++self->curr_histogram_ix_;\r | |
134 | if (self->curr_histogram_ix_ < *self->histograms_size_)\r | |
135 | FN(HistogramClear)(&histograms[self->curr_histogram_ix_]);\r | |
136 | self->block_size_ = 0;\r | |
137 | self->merge_last_count_ = 0;\r | |
138 | self->target_block_size_ = self->min_block_size_;\r | |
139 | } else if (diff[1] < diff[0] - 20.0) {\r | |
140 | /* Combine this block with second last block. */\r | |
141 | split->lengths[self->num_blocks_] = (uint32_t)self->block_size_;\r | |
142 | split->types[self->num_blocks_] = split->types[self->num_blocks_ - 2];\r | |
143 | BROTLI_SWAP(size_t, self->last_histogram_ix_, 0, 1);\r | |
144 | histograms[self->last_histogram_ix_[0]] = combined_histo[1];\r | |
145 | last_entropy[1] = last_entropy[0];\r | |
146 | last_entropy[0] = combined_entropy[1];\r | |
147 | ++self->num_blocks_;\r | |
148 | self->block_size_ = 0;\r | |
149 | FN(HistogramClear)(&histograms[self->curr_histogram_ix_]);\r | |
150 | self->merge_last_count_ = 0;\r | |
151 | self->target_block_size_ = self->min_block_size_;\r | |
152 | } else {\r | |
153 | /* Combine this block with last block. */\r | |
154 | split->lengths[self->num_blocks_ - 1] += (uint32_t)self->block_size_;\r | |
155 | histograms[self->last_histogram_ix_[0]] = combined_histo[0];\r | |
156 | last_entropy[0] = combined_entropy[0];\r | |
157 | if (split->num_types == 1) {\r | |
158 | last_entropy[1] = last_entropy[0];\r | |
159 | }\r | |
160 | self->block_size_ = 0;\r | |
161 | FN(HistogramClear)(&histograms[self->curr_histogram_ix_]);\r | |
162 | if (++self->merge_last_count_ > 1) {\r | |
163 | self->target_block_size_ += self->min_block_size_;\r | |
164 | }\r | |
165 | }\r | |
166 | }\r | |
167 | if (is_final) {\r | |
168 | *self->histograms_size_ = split->num_types;\r | |
169 | split->num_blocks = self->num_blocks_;\r | |
170 | }\r | |
171 | }\r | |
172 | \r | |
173 | /* Adds the next symbol to the current histogram. When the current histogram\r | |
174 | reaches the target size, decides on merging the block. */\r | |
175 | static void FN(BlockSplitterAddSymbol)(FN(BlockSplitter)* self, size_t symbol) {\r | |
176 | FN(HistogramAdd)(&self->histograms_[self->curr_histogram_ix_], symbol);\r | |
177 | ++self->block_size_;\r | |
178 | if (self->block_size_ == self->target_block_size_) {\r | |
179 | FN(BlockSplitterFinishBlock)(self, /* is_final = */ BROTLI_FALSE);\r | |
180 | }\r | |
181 | }\r | |
182 | \r | |
183 | #undef HistogramType\r |