1 /* Copyright 2013 Google Inc. All Rights Reserved.
3 Distributed under MIT license.
4 See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
7 /* Block split point selection utilities. */
9 #include "./block_splitter.h"
12 #include <string.h> /* memcpy, memset */
14 #include "./bit_cost.h"
15 #include "./cluster.h"
16 #include "./command.h"
17 #include "./fast_log.h"
18 #include "./histogram.h"
21 #include "./quality.h"
23 #if defined(__cplusplus) || defined(c_plusplus)
27 static const size_t kMaxLiteralHistograms
= 100;
28 static const size_t kMaxCommandHistograms
= 50;
29 static const double kLiteralBlockSwitchCost
= 28.1;
30 static const double kCommandBlockSwitchCost
= 13.5;
31 static const double kDistanceBlockSwitchCost
= 14.6;
32 static const size_t kLiteralStrideLength
= 70;
33 static const size_t kCommandStrideLength
= 40;
34 static const size_t kSymbolsPerLiteralHistogram
= 544;
35 static const size_t kSymbolsPerCommandHistogram
= 530;
36 static const size_t kSymbolsPerDistanceHistogram
= 544;
37 static const size_t kMinLengthForBlockSplitting
= 128;
38 static const size_t kIterMulForRefining
= 2;
39 static const size_t kMinItersForRefining
= 100;
41 static size_t CountLiterals(const Command
* cmds
, const size_t num_commands
) {
42 /* Count how many we have. */
43 size_t total_length
= 0;
45 for (i
= 0; i
< num_commands
; ++i
) {
46 total_length
+= cmds
[i
].insert_len_
;
51 static void CopyLiteralsToByteArray(const Command
* cmds
,
52 const size_t num_commands
,
58 size_t from_pos
= offset
& mask
;
60 for (i
= 0; i
< num_commands
; ++i
) {
61 size_t insert_len
= cmds
[i
].insert_len_
;
62 if (from_pos
+ insert_len
> mask
) {
63 size_t head_size
= mask
+ 1 - from_pos
;
64 memcpy(literals
+ pos
, data
+ from_pos
, head_size
);
67 insert_len
-= head_size
;
70 memcpy(literals
+ pos
, data
+ from_pos
, insert_len
);
73 from_pos
= (from_pos
+ insert_len
+ CommandCopyLen(&cmds
[i
])) & mask
;
77 static BROTLI_INLINE
unsigned int MyRand(unsigned int* seed
) {
85 static BROTLI_INLINE
double BitCost(size_t count
) {
86 return count
== 0 ? -2.0 : FastLog2(count
);
89 #define HISTOGRAMS_PER_BATCH 64
90 #define CLUSTERS_PER_BATCH 16
92 #define FN(X) X ## Literal
93 #define DataType uint8_t
94 /* NOLINTNEXTLINE(build/include) */
95 #include "./block_splitter_inc.h"
99 #define FN(X) X ## Command
100 #define DataType uint16_t
101 /* NOLINTNEXTLINE(build/include) */
102 #include "./block_splitter_inc.h"
105 #define FN(X) X ## Distance
106 /* NOLINTNEXTLINE(build/include) */
107 #include "./block_splitter_inc.h"
111 void BrotliInitBlockSplit(BlockSplit
* self
) {
113 self
->num_blocks
= 0;
116 self
->types_alloc_size
= 0;
117 self
->lengths_alloc_size
= 0;
120 void BrotliDestroyBlockSplit(MemoryManager
* m
, BlockSplit
* self
) {
121 BROTLI_FREE(m
, self
->types
);
122 BROTLI_FREE(m
, self
->lengths
);
125 void BrotliSplitBlock(MemoryManager
* m
,
127 const size_t num_commands
,
131 const BrotliEncoderParams
* params
,
132 BlockSplit
* literal_split
,
133 BlockSplit
* insert_and_copy_split
,
134 BlockSplit
* dist_split
) {
136 size_t literals_count
= CountLiterals(cmds
, num_commands
);
137 uint8_t* literals
= BROTLI_ALLOC(m
, uint8_t, literals_count
);
138 if (BROTLI_IS_OOM(m
)) return;
139 /* Create a continuous array of literals. */
140 CopyLiteralsToByteArray(cmds
, num_commands
, data
, pos
, mask
, literals
);
141 /* Create the block split on the array of literals.
142 Literal histograms have alphabet size 256. */
143 SplitByteVectorLiteral(
144 m
, literals
, literals_count
,
145 kSymbolsPerLiteralHistogram
, kMaxLiteralHistograms
,
146 kLiteralStrideLength
, kLiteralBlockSwitchCost
, params
,
148 if (BROTLI_IS_OOM(m
)) return;
149 BROTLI_FREE(m
, literals
);
153 /* Compute prefix codes for commands. */
154 uint16_t* insert_and_copy_codes
= BROTLI_ALLOC(m
, uint16_t, num_commands
);
156 if (BROTLI_IS_OOM(m
)) return;
157 for (i
= 0; i
< num_commands
; ++i
) {
158 insert_and_copy_codes
[i
] = cmds
[i
].cmd_prefix_
;
160 /* Create the block split on the array of command prefixes. */
161 SplitByteVectorCommand(
162 m
, insert_and_copy_codes
, num_commands
,
163 kSymbolsPerCommandHistogram
, kMaxCommandHistograms
,
164 kCommandStrideLength
, kCommandBlockSwitchCost
, params
,
165 insert_and_copy_split
);
166 if (BROTLI_IS_OOM(m
)) return;
167 /* TODO: reuse for distances? */
168 BROTLI_FREE(m
, insert_and_copy_codes
);
172 /* Create a continuous array of distance prefixes. */
173 uint16_t* distance_prefixes
= BROTLI_ALLOC(m
, uint16_t, num_commands
);
176 if (BROTLI_IS_OOM(m
)) return;
177 for (i
= 0; i
< num_commands
; ++i
) {
178 const Command
* cmd
= &cmds
[i
];
179 if (CommandCopyLen(cmd
) && cmd
->cmd_prefix_
>= 128) {
180 distance_prefixes
[j
++] = cmd
->dist_prefix_
;
183 /* Create the block split on the array of distance prefixes. */
184 SplitByteVectorDistance(
185 m
, distance_prefixes
, j
,
186 kSymbolsPerDistanceHistogram
, kMaxCommandHistograms
,
187 kCommandStrideLength
, kDistanceBlockSwitchCost
, params
,
189 if (BROTLI_IS_OOM(m
)) return;
190 BROTLI_FREE(m
, distance_prefixes
);
195 #if defined(__cplusplus) || defined(c_plusplus)