1 /* NOLINT(build/header_guard) */
2 /* Copyright 2018 Google Inc. All Rights Reserved.
4 Distributed under MIT license.
5 See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
8 /* template parameters: FN, JUMP, NUMBUCKETS, MASK, CHUNKLEN */
9 /* NUMBUCKETS / (MASK + 1) = probability of storing and using hash code. */
10 /* JUMP = skip bytes for speedup */
12 /* Rolling hash for long distance long string matches. Stores one position
13 per bucket, bucket key is computed over a long region. */
15 #define HashRolling HASHER()
17 static const uint32_t FN(kRollingHashMul32
) = 69069;
18 static const uint32_t FN(kInvalidPos
) = 0xffffffff;
20 /* This hasher uses a longer forward length, but returning a higher value here
21 will hurt compression by the main hasher when combined with a composite
22 hasher. The hasher tests for forward itself instead. */
23 static BROTLI_INLINE
size_t FN(HashTypeLength
)(void) { return 4; }
24 static BROTLI_INLINE
size_t FN(StoreLookahead
)(void) { return 4; }
26 /* Computes a code from a single byte. A lookup table of 256 values could be
27 used, but simply adding 1 works about as good. */
28 static uint32_t FN(HashByte
)(uint8_t byte
) {
29 return (uint32_t)byte
+ 1u;
32 static uint32_t FN(HashRollingFunctionInitial
)(uint32_t state
, uint8_t add
,
34 return (uint32_t)(factor
* state
+ FN(HashByte
)(add
));
37 static uint32_t FN(HashRollingFunction
)(uint32_t state
, uint8_t add
,
38 uint8_t rem
, uint32_t factor
,
39 uint32_t factor_remove
) {
40 return (uint32_t)(factor
* state
+
41 FN(HashByte
)(add
) - factor_remove
* FN(HashByte
)(rem
));
44 typedef struct HashRolling
{
51 uint32_t factor_remove
;
54 static BROTLI_INLINE HashRolling
* FN(Self
)(HasherHandle handle
) {
55 return (HashRolling
*)&(GetHasherCommon(handle
)[1]);
58 static void FN(Initialize
)(
59 HasherHandle handle
, const BrotliEncoderParams
* params
) {
60 HashRolling
* self
= FN(Self
)(handle
);
65 self
->factor
= FN(kRollingHashMul32
);
67 /* Compute the factor of the oldest byte to remove: factor**steps modulo
68 0xffffffff (the multiplications rely on 32-bit overflow) */
69 self
->factor_remove
= 1;
70 for (i
= 0; i
< CHUNKLEN
; i
+= JUMP
) {
71 self
->factor_remove
*= self
->factor
;
74 self
->table
= (uint32_t*)((HasherHandle
)self
+ sizeof(HashRolling
));
75 for (i
= 0; i
< NUMBUCKETS
; i
++) {
76 self
->table
[i
] = FN(kInvalidPos
);
79 BROTLI_UNUSED(params
);
82 static void FN(Prepare
)(HasherHandle handle
, BROTLI_BOOL one_shot
,
83 size_t input_size
, const uint8_t* data
) {
84 HashRolling
* self
= FN(Self
)(handle
);
86 /* Too small size, cannot use this hasher. */
87 if (input_size
< CHUNKLEN
) return;
89 for (i
= 0; i
< CHUNKLEN
; i
+= JUMP
) {
90 self
->state
= FN(HashRollingFunctionInitial
)(
91 self
->state
, data
[i
], self
->factor
);
93 BROTLI_UNUSED(one_shot
);
96 static BROTLI_INLINE
size_t FN(HashMemAllocInBytes
)(
97 const BrotliEncoderParams
* params
, BROTLI_BOOL one_shot
,
99 return sizeof(HashRolling
) + NUMBUCKETS
* sizeof(uint32_t);
100 BROTLI_UNUSED(params
);
101 BROTLI_UNUSED(one_shot
);
102 BROTLI_UNUSED(input_size
);
105 static BROTLI_INLINE
void FN(Store
)(HasherHandle BROTLI_RESTRICT handle
,
106 const uint8_t* BROTLI_RESTRICT data
, const size_t mask
, const size_t ix
) {
107 BROTLI_UNUSED(handle
);
113 static BROTLI_INLINE
void FN(StoreRange
)(HasherHandle handle
,
114 const uint8_t* data
, const size_t mask
, const size_t ix_start
,
115 const size_t ix_end
) {
116 BROTLI_UNUSED(handle
);
119 BROTLI_UNUSED(ix_start
);
120 BROTLI_UNUSED(ix_end
);
123 static BROTLI_INLINE
void FN(StitchToPreviousBlock
)(HasherHandle handle
,
124 size_t num_bytes
, size_t position
, const uint8_t* ringbuffer
,
125 size_t ring_buffer_mask
) {
126 /* In this case we must re-initialize the hasher from scratch from the
128 HashRolling
* self
= FN(Self
)(handle
);
129 size_t position_masked
;
130 size_t available
= num_bytes
;
131 if ((position
& (JUMP
- 1)) != 0) {
132 size_t diff
= JUMP
- (position
& (JUMP
- 1));
133 available
= (diff
> available
) ? 0 : (available
- diff
);
136 position_masked
= position
& ring_buffer_mask
;
137 /* wrapping around ringbuffer not handled. */
138 if (available
> ring_buffer_mask
- position_masked
) {
139 available
= ring_buffer_mask
- position_masked
;
142 FN(Prepare
)(handle
, BROTLI_FALSE
, available
,
143 ringbuffer
+ (position
& ring_buffer_mask
));
144 self
->next_ix
= position
;
145 BROTLI_UNUSED(num_bytes
);
148 static BROTLI_INLINE
void FN(PrepareDistanceCache
)(
149 HasherHandle handle
, int* BROTLI_RESTRICT distance_cache
) {
150 BROTLI_UNUSED(handle
);
151 BROTLI_UNUSED(distance_cache
);
154 static BROTLI_INLINE
void FN(FindLongestMatch
)(HasherHandle handle
,
155 const BrotliEncoderDictionary
* dictionary
,
156 const uint8_t* BROTLI_RESTRICT data
, const size_t ring_buffer_mask
,
157 const int* BROTLI_RESTRICT distance_cache
, const size_t cur_ix
,
158 const size_t max_length
, const size_t max_backward
, const size_t gap
,
159 const size_t max_distance
, HasherSearchResult
* BROTLI_RESTRICT out
) {
160 HashRolling
* self
= FN(Self
)(handle
);
161 const size_t cur_ix_masked
= cur_ix
& ring_buffer_mask
;
162 size_t pos
= self
->next_ix
;
164 if ((cur_ix
& (JUMP
- 1)) != 0) return;
166 /* Not enough lookahead */
167 if (max_length
< CHUNKLEN
) return;
169 for (pos
= self
->next_ix
; pos
<= cur_ix
; pos
+= JUMP
) {
170 uint32_t code
= self
->state
& MASK
;
172 uint8_t rem
= data
[pos
& ring_buffer_mask
];
173 uint8_t add
= data
[(pos
+ CHUNKLEN
) & ring_buffer_mask
];
174 size_t found_ix
= FN(kInvalidPos
);
176 self
->state
= FN(HashRollingFunction
)(
177 self
->state
, add
, rem
, self
->factor
, self
->factor_remove
);
179 if (code
< NUMBUCKETS
) {
180 found_ix
= self
->table
[code
];
181 self
->table
[code
] = (uint32_t)pos
;
182 if (pos
== cur_ix
&& found_ix
!= FN(kInvalidPos
)) {
183 /* The cast to 32-bit makes backward distances up to 4GB work even
184 if cur_ix is above 4GB, despite using 32-bit values in the table. */
185 size_t backward
= (uint32_t)(cur_ix
- found_ix
);
186 if (backward
<= max_backward
) {
187 const size_t found_ix_masked
= found_ix
& ring_buffer_mask
;
188 const size_t len
= FindMatchLengthWithLimit(&data
[found_ix_masked
],
189 &data
[cur_ix_masked
],
191 if (len
>= 4 && len
> out
->len
) {
192 score_t score
= BackwardReferenceScore(len
, backward
);
193 if (score
> out
->score
) {
195 out
->distance
= backward
;
197 out
->len_code_delta
= 0;
205 self
->next_ix
= cur_ix
+ JUMP
;
207 /* NOTE: this hasher does not search in the dictionary. It is used as
208 backup-hasher, the main hasher already searches in it. */
209 BROTLI_UNUSED(dictionary
);
210 BROTLI_UNUSED(distance_cache
);
212 BROTLI_UNUSED(max_distance
);