]>
Commit | Line | Data |
---|---|---|
7c673cae | 1 | // Copyright (c) 2011-present, Facebook, Inc. All rights reserved. |
11fdf7f2 TL |
2 | // This source code is licensed under both the GPLv2 (found in the |
3 | // COPYING file in the root directory) and Apache 2.0 License | |
4 | // (found in the LICENSE.Apache file in the root directory). | |
7c673cae FG |
5 | // |
6 | // Copyright (c) 2011 The LevelDB Authors. All rights reserved. | |
7 | // Use of this source code is governed by a BSD-style license that can be | |
8 | // found in the LICENSE file. See the AUTHORS file for names of contributors. | |
9 | // | |
10 | #pragma once | |
11 | ||
12 | #include <algorithm> | |
13 | #include <limits> | |
494da23a TL |
14 | #ifdef ROCKSDB_MALLOC_USABLE_SIZE |
15 | #ifdef OS_FREEBSD | |
16 | #include <malloc_np.h> | |
17 | #else // OS_FREEBSD | |
18 | #include <malloc.h> | |
19 | #endif // OS_FREEBSD | |
20 | #endif // ROCKSDB_MALLOC_USABLE_SIZE | |
7c673cae FG |
21 | #include <string> |
22 | ||
23 | #include "rocksdb/options.h" | |
494da23a | 24 | #include "rocksdb/table.h" |
7c673cae | 25 | #include "util/coding.h" |
11fdf7f2 | 26 | #include "util/compression_context_cache.h" |
494da23a TL |
27 | #include "util/memory_allocator.h" |
28 | #include "util/string_util.h" | |
7c673cae FG |
29 | |
30 | #ifdef SNAPPY | |
31 | #include <snappy.h> | |
32 | #endif | |
33 | ||
34 | #ifdef ZLIB | |
35 | #include <zlib.h> | |
36 | #endif | |
37 | ||
38 | #ifdef BZIP2 | |
39 | #include <bzlib.h> | |
40 | #endif | |
41 | ||
42 | #if defined(LZ4) | |
43 | #include <lz4.h> | |
44 | #include <lz4hc.h> | |
45 | #endif | |
46 | ||
47 | #if defined(ZSTD) | |
48 | #include <zstd.h> | |
11fdf7f2 TL |
49 | #if ZSTD_VERSION_NUMBER >= 10103 // v1.1.3+ |
50 | #include <zdict.h> | |
51 | #endif // ZSTD_VERSION_NUMBER >= 10103 | |
52 | namespace rocksdb { | |
53 | // Need this for the context allocation override | |
54 | // On windows we need to do this explicitly | |
55 | #if (ZSTD_VERSION_NUMBER >= 500) | |
56 | #if defined(ROCKSDB_JEMALLOC) && defined(OS_WIN) && \ | |
57 | defined(ZSTD_STATIC_LINKING_ONLY) | |
58 | #define ROCKSDB_ZSTD_CUSTOM_MEM | |
59 | namespace port { | |
60 | ZSTD_customMem GetJeZstdAllocationOverrides(); | |
61 | } // namespace port | |
62 | #endif // defined(ROCKSDB_JEMALLOC) && defined(OS_WIN) && | |
63 | // defined(ZSTD_STATIC_LINKING_ONLY) | |
64 | ||
494da23a TL |
65 | // We require `ZSTD_sizeof_DDict` and `ZSTD_createDDict_byReference` to use |
66 | // `ZSTD_DDict`. The former was introduced in v1.0.0 and the latter was | |
67 | // introduced in v1.1.3. But an important bug fix for `ZSTD_sizeof_DDict` came | |
68 | // in v1.1.4, so that is the version we require. As of today's latest version | |
69 | // (v1.3.8), they are both still in the experimental API, which means they are | |
70 | // only exported when the compiler flag `ZSTD_STATIC_LINKING_ONLY` is set. | |
71 | #if defined(ZSTD_STATIC_LINKING_ONLY) && ZSTD_VERSION_NUMBER >= 10104 | |
72 | #define ROCKSDB_ZSTD_DDICT | |
73 | #endif // defined(ZSTD_STATIC_LINKING_ONLY) && ZSTD_VERSION_NUMBER >= 10104 | |
74 | ||
11fdf7f2 TL |
75 | // Cached data represents a portion that can be re-used |
76 | // If, in the future we have more than one native context to | |
77 | // cache we can arrange this as a tuple | |
78 | class ZSTDUncompressCachedData { | |
79 | public: | |
80 | using ZSTDNativeContext = ZSTD_DCtx*; | |
81 | ZSTDUncompressCachedData() {} | |
82 | // Init from cache | |
83 | ZSTDUncompressCachedData(const ZSTDUncompressCachedData& o) = delete; | |
84 | ZSTDUncompressCachedData& operator=(const ZSTDUncompressCachedData&) = delete; | |
85 | ZSTDUncompressCachedData(ZSTDUncompressCachedData&& o) ROCKSDB_NOEXCEPT | |
86 | : ZSTDUncompressCachedData() { | |
87 | *this = std::move(o); | |
88 | } | |
89 | ZSTDUncompressCachedData& operator=(ZSTDUncompressCachedData&& o) | |
90 | ROCKSDB_NOEXCEPT { | |
91 | assert(zstd_ctx_ == nullptr); | |
92 | std::swap(zstd_ctx_, o.zstd_ctx_); | |
93 | std::swap(cache_idx_, o.cache_idx_); | |
94 | return *this; | |
95 | } | |
96 | ZSTDNativeContext Get() const { return zstd_ctx_; } | |
97 | int64_t GetCacheIndex() const { return cache_idx_; } | |
98 | void CreateIfNeeded() { | |
99 | if (zstd_ctx_ == nullptr) { | |
100 | #ifdef ROCKSDB_ZSTD_CUSTOM_MEM | |
101 | zstd_ctx_ = | |
102 | ZSTD_createDCtx_advanced(port::GetJeZstdAllocationOverrides()); | |
103 | #else // ROCKSDB_ZSTD_CUSTOM_MEM | |
104 | zstd_ctx_ = ZSTD_createDCtx(); | |
105 | #endif // ROCKSDB_ZSTD_CUSTOM_MEM | |
106 | cache_idx_ = -1; | |
107 | } | |
108 | } | |
109 | void InitFromCache(const ZSTDUncompressCachedData& o, int64_t idx) { | |
110 | zstd_ctx_ = o.zstd_ctx_; | |
111 | cache_idx_ = idx; | |
112 | } | |
113 | ~ZSTDUncompressCachedData() { | |
114 | if (zstd_ctx_ != nullptr && cache_idx_ == -1) { | |
115 | ZSTD_freeDCtx(zstd_ctx_); | |
116 | } | |
117 | } | |
118 | ||
119 | private: | |
120 | ZSTDNativeContext zstd_ctx_ = nullptr; | |
121 | int64_t cache_idx_ = -1; // -1 means this instance owns the context | |
122 | }; | |
123 | #endif // (ZSTD_VERSION_NUMBER >= 500) | |
124 | } // namespace rocksdb | |
125 | #endif // ZSTD | |
126 | ||
127 | #if !(defined ZSTD) || !(ZSTD_VERSION_NUMBER >= 500) | |
128 | namespace rocksdb { | |
129 | class ZSTDUncompressCachedData { | |
130 | void* padding; // unused | |
131 | public: | |
132 | using ZSTDNativeContext = void*; | |
133 | ZSTDUncompressCachedData() {} | |
134 | ZSTDUncompressCachedData(const ZSTDUncompressCachedData&) {} | |
135 | ZSTDUncompressCachedData& operator=(const ZSTDUncompressCachedData&) = delete; | |
136 | ZSTDUncompressCachedData(ZSTDUncompressCachedData&&) | |
137 | ROCKSDB_NOEXCEPT = default; | |
138 | ZSTDUncompressCachedData& operator=(ZSTDUncompressCachedData&&) | |
139 | ROCKSDB_NOEXCEPT = default; | |
140 | ZSTDNativeContext Get() const { return nullptr; } | |
141 | int64_t GetCacheIndex() const { return -1; } | |
142 | void CreateIfNeeded() {} | |
143 | void InitFromCache(const ZSTDUncompressCachedData&, int64_t) {} | |
144 | private: | |
145 | void ignore_padding__() { padding = nullptr; } | |
146 | }; | |
147 | } // namespace rocksdb | |
7c673cae FG |
148 | #endif |
149 | ||
150 | #if defined(XPRESS) | |
151 | #include "port/xpress.h" | |
152 | #endif | |
153 | ||
154 | namespace rocksdb { | |
155 | ||
494da23a TL |
156 | // Holds dictionary and related data, like ZSTD's digested compression |
157 | // dictionary. | |
158 | struct CompressionDict { | |
159 | #if ZSTD_VERSION_NUMBER >= 700 | |
160 | ZSTD_CDict* zstd_cdict_ = nullptr; | |
161 | #endif // ZSTD_VERSION_NUMBER >= 700 | |
162 | std::string dict_; | |
163 | ||
164 | public: | |
165 | #if ZSTD_VERSION_NUMBER >= 700 | |
166 | CompressionDict(std::string dict, CompressionType type, int level) { | |
167 | #else // ZSTD_VERSION_NUMBER >= 700 | |
168 | CompressionDict(std::string dict, CompressionType /*type*/, int /*level*/) { | |
169 | #endif // ZSTD_VERSION_NUMBER >= 700 | |
170 | dict_ = std::move(dict); | |
171 | #if ZSTD_VERSION_NUMBER >= 700 | |
172 | zstd_cdict_ = nullptr; | |
173 | if (!dict_.empty() && (type == kZSTD || type == kZSTDNotFinalCompression)) { | |
174 | if (level == CompressionOptions::kDefaultCompressionLevel) { | |
175 | // 3 is the value of ZSTD_CLEVEL_DEFAULT (not exposed publicly), see | |
176 | // https://github.com/facebook/zstd/issues/1148 | |
177 | level = 3; | |
178 | } | |
179 | // Should be safe (but slower) if below call fails as we'll use the | |
180 | // raw dictionary to compress. | |
181 | zstd_cdict_ = ZSTD_createCDict(dict_.data(), dict_.size(), level); | |
182 | assert(zstd_cdict_ != nullptr); | |
183 | } | |
184 | #endif // ZSTD_VERSION_NUMBER >= 700 | |
185 | } | |
186 | ||
187 | ~CompressionDict() { | |
188 | #if ZSTD_VERSION_NUMBER >= 700 | |
189 | size_t res = 0; | |
190 | if (zstd_cdict_ != nullptr) { | |
191 | res = ZSTD_freeCDict(zstd_cdict_); | |
192 | } | |
193 | assert(res == 0); // Last I checked they can't fail | |
194 | (void)res; // prevent unused var warning | |
195 | #endif // ZSTD_VERSION_NUMBER >= 700 | |
196 | } | |
197 | ||
198 | #if ZSTD_VERSION_NUMBER >= 700 | |
199 | const ZSTD_CDict* GetDigestedZstdCDict() const { return zstd_cdict_; } | |
200 | #endif // ZSTD_VERSION_NUMBER >= 700 | |
201 | ||
202 | Slice GetRawDict() const { return dict_; } | |
203 | ||
204 | static const CompressionDict& GetEmptyDict() { | |
205 | static CompressionDict empty_dict{}; | |
206 | return empty_dict; | |
207 | } | |
208 | ||
209 | CompressionDict() = default; | |
210 | // Disable copy/move | |
211 | CompressionDict(const CompressionDict&) = delete; | |
212 | CompressionDict& operator=(const CompressionDict&) = delete; | |
213 | CompressionDict(CompressionDict&&) = delete; | |
214 | CompressionDict& operator=(CompressionDict&&) = delete; | |
215 | }; | |
216 | ||
217 | // Holds dictionary and related data, like ZSTD's digested uncompression | |
218 | // dictionary. | |
219 | struct UncompressionDict { | |
220 | #ifdef ROCKSDB_ZSTD_DDICT | |
221 | ZSTD_DDict* zstd_ddict_; | |
222 | #endif // ROCKSDB_ZSTD_DDICT | |
223 | // Block containing the data for the compression dictionary. It may be | |
224 | // redundant with the data held in `zstd_ddict_`. | |
225 | std::string dict_; | |
226 | // This `Statistics` pointer is intended to be used upon block cache eviction, | |
227 | // so only needs to be populated on `UncompressionDict`s that'll be inserted | |
228 | // into block cache. | |
229 | Statistics* statistics_; | |
230 | ||
231 | #ifdef ROCKSDB_ZSTD_DDICT | |
232 | UncompressionDict(std::string dict, bool using_zstd, | |
233 | Statistics* _statistics = nullptr) { | |
234 | #else // ROCKSDB_ZSTD_DDICT | |
235 | UncompressionDict(std::string dict, bool /*using_zstd*/, | |
236 | Statistics* _statistics = nullptr) { | |
237 | #endif // ROCKSDB_ZSTD_DDICT | |
238 | dict_ = std::move(dict); | |
239 | statistics_ = _statistics; | |
240 | #ifdef ROCKSDB_ZSTD_DDICT | |
241 | zstd_ddict_ = nullptr; | |
242 | if (!dict_.empty() && using_zstd) { | |
243 | zstd_ddict_ = ZSTD_createDDict_byReference(dict_.data(), dict_.size()); | |
244 | assert(zstd_ddict_ != nullptr); | |
245 | } | |
246 | #endif // ROCKSDB_ZSTD_DDICT | |
247 | } | |
248 | ||
249 | ~UncompressionDict() { | |
250 | #ifdef ROCKSDB_ZSTD_DDICT | |
251 | size_t res = 0; | |
252 | if (zstd_ddict_ != nullptr) { | |
253 | res = ZSTD_freeDDict(zstd_ddict_); | |
254 | } | |
255 | assert(res == 0); // Last I checked they can't fail | |
256 | (void)res; // prevent unused var warning | |
257 | #endif // ROCKSDB_ZSTD_DDICT | |
258 | } | |
259 | ||
260 | #ifdef ROCKSDB_ZSTD_DDICT | |
261 | const ZSTD_DDict* GetDigestedZstdDDict() const { return zstd_ddict_; } | |
262 | #endif // ROCKSDB_ZSTD_DDICT | |
263 | ||
264 | Slice GetRawDict() const { return dict_; } | |
265 | ||
266 | static const UncompressionDict& GetEmptyDict() { | |
267 | static UncompressionDict empty_dict{}; | |
268 | return empty_dict; | |
269 | } | |
270 | ||
271 | Statistics* statistics() const { return statistics_; } | |
272 | ||
273 | size_t ApproximateMemoryUsage() { | |
274 | size_t usage = 0; | |
275 | usage += sizeof(struct UncompressionDict); | |
276 | #ifdef ROCKSDB_ZSTD_DDICT | |
277 | usage += ZSTD_sizeof_DDict(zstd_ddict_); | |
278 | #endif // ROCKSDB_ZSTD_DDICT | |
279 | usage += dict_.size(); | |
280 | return usage; | |
281 | } | |
282 | ||
283 | UncompressionDict() = default; | |
284 | // Disable copy/move | |
285 | UncompressionDict(const CompressionDict&) = delete; | |
286 | UncompressionDict& operator=(const CompressionDict&) = delete; | |
287 | UncompressionDict(CompressionDict&&) = delete; | |
288 | UncompressionDict& operator=(CompressionDict&&) = delete; | |
289 | }; | |
290 | ||
11fdf7f2 TL |
291 | class CompressionContext { |
292 | private: | |
11fdf7f2 TL |
293 | #if defined(ZSTD) && (ZSTD_VERSION_NUMBER >= 500) |
294 | ZSTD_CCtx* zstd_ctx_ = nullptr; | |
494da23a TL |
295 | void CreateNativeContext(CompressionType type) { |
296 | if (type == kZSTD || type == kZSTDNotFinalCompression) { | |
11fdf7f2 TL |
297 | #ifdef ROCKSDB_ZSTD_CUSTOM_MEM |
298 | zstd_ctx_ = | |
299 | ZSTD_createCCtx_advanced(port::GetJeZstdAllocationOverrides()); | |
300 | #else // ROCKSDB_ZSTD_CUSTOM_MEM | |
301 | zstd_ctx_ = ZSTD_createCCtx(); | |
302 | #endif // ROCKSDB_ZSTD_CUSTOM_MEM | |
303 | } | |
304 | } | |
305 | void DestroyNativeContext() { | |
306 | if (zstd_ctx_ != nullptr) { | |
307 | ZSTD_freeCCtx(zstd_ctx_); | |
308 | } | |
309 | } | |
310 | ||
311 | public: | |
312 | // callable inside ZSTD_Compress | |
313 | ZSTD_CCtx* ZSTDPreallocCtx() const { | |
494da23a | 314 | assert(zstd_ctx_ != nullptr); |
11fdf7f2 TL |
315 | return zstd_ctx_; |
316 | } | |
494da23a | 317 | |
11fdf7f2 TL |
318 | #else // ZSTD && (ZSTD_VERSION_NUMBER >= 500) |
319 | private: | |
494da23a | 320 | void CreateNativeContext(CompressionType /* type */) {} |
11fdf7f2 TL |
321 | void DestroyNativeContext() {} |
322 | #endif // ZSTD && (ZSTD_VERSION_NUMBER >= 500) | |
323 | public: | |
494da23a TL |
324 | explicit CompressionContext(CompressionType type) { |
325 | CreateNativeContext(type); | |
11fdf7f2 TL |
326 | } |
327 | ~CompressionContext() { DestroyNativeContext(); } | |
328 | CompressionContext(const CompressionContext&) = delete; | |
329 | CompressionContext& operator=(const CompressionContext&) = delete; | |
494da23a TL |
330 | }; |
331 | ||
332 | class CompressionInfo { | |
333 | const CompressionOptions& opts_; | |
334 | const CompressionContext& context_; | |
335 | const CompressionDict& dict_; | |
336 | const CompressionType type_; | |
337 | const uint64_t sample_for_compression_; | |
338 | ||
339 | public: | |
340 | CompressionInfo(const CompressionOptions& _opts, | |
341 | const CompressionContext& _context, | |
342 | const CompressionDict& _dict, CompressionType _type, | |
343 | uint64_t _sample_for_compression) | |
344 | : opts_(_opts), | |
345 | context_(_context), | |
346 | dict_(_dict), | |
347 | type_(_type), | |
348 | sample_for_compression_(_sample_for_compression) {} | |
11fdf7f2 TL |
349 | |
350 | const CompressionOptions& options() const { return opts_; } | |
494da23a TL |
351 | const CompressionContext& context() const { return context_; } |
352 | const CompressionDict& dict() const { return dict_; } | |
11fdf7f2 | 353 | CompressionType type() const { return type_; } |
494da23a | 354 | uint64_t SampleForCompression() const { return sample_for_compression_; } |
11fdf7f2 TL |
355 | }; |
356 | ||
11fdf7f2 TL |
357 | class UncompressionContext { |
358 | private: | |
11fdf7f2 TL |
359 | CompressionContextCache* ctx_cache_ = nullptr; |
360 | ZSTDUncompressCachedData uncomp_cached_data_; | |
361 | ||
362 | public: | |
363 | struct NoCache {}; | |
364 | // Do not use context cache, used by TableBuilder | |
494da23a TL |
365 | UncompressionContext(NoCache, CompressionType /* type */) {} |
366 | ||
367 | explicit UncompressionContext(CompressionType type) { | |
368 | if (type == kZSTD || type == kZSTDNotFinalCompression) { | |
11fdf7f2 TL |
369 | ctx_cache_ = CompressionContextCache::Instance(); |
370 | uncomp_cached_data_ = ctx_cache_->GetCachedZSTDUncompressData(); | |
371 | } | |
372 | } | |
373 | ~UncompressionContext() { | |
494da23a | 374 | if (uncomp_cached_data_.GetCacheIndex() != -1) { |
11fdf7f2 TL |
375 | assert(ctx_cache_ != nullptr); |
376 | ctx_cache_->ReturnCachedZSTDUncompressData( | |
377 | uncomp_cached_data_.GetCacheIndex()); | |
378 | } | |
379 | } | |
380 | UncompressionContext(const UncompressionContext&) = delete; | |
381 | UncompressionContext& operator=(const UncompressionContext&) = delete; | |
382 | ||
383 | ZSTDUncompressCachedData::ZSTDNativeContext GetZSTDContext() const { | |
384 | return uncomp_cached_data_.Get(); | |
385 | } | |
494da23a TL |
386 | }; |
387 | ||
388 | class UncompressionInfo { | |
389 | const UncompressionContext& context_; | |
390 | const UncompressionDict& dict_; | |
391 | const CompressionType type_; | |
392 | ||
393 | public: | |
394 | UncompressionInfo(const UncompressionContext& _context, | |
395 | const UncompressionDict& _dict, CompressionType _type) | |
396 | : context_(_context), dict_(_dict), type_(_type) {} | |
397 | ||
398 | const UncompressionContext& context() const { return context_; } | |
399 | const UncompressionDict& dict() const { return dict_; } | |
11fdf7f2 | 400 | CompressionType type() const { return type_; } |
11fdf7f2 TL |
401 | }; |
402 | ||
7c673cae FG |
403 | inline bool Snappy_Supported() { |
404 | #ifdef SNAPPY | |
405 | return true; | |
11fdf7f2 | 406 | #else |
7c673cae | 407 | return false; |
11fdf7f2 | 408 | #endif |
7c673cae FG |
409 | } |
410 | ||
411 | inline bool Zlib_Supported() { | |
412 | #ifdef ZLIB | |
413 | return true; | |
11fdf7f2 | 414 | #else |
7c673cae | 415 | return false; |
11fdf7f2 | 416 | #endif |
7c673cae FG |
417 | } |
418 | ||
419 | inline bool BZip2_Supported() { | |
420 | #ifdef BZIP2 | |
421 | return true; | |
11fdf7f2 | 422 | #else |
7c673cae | 423 | return false; |
11fdf7f2 | 424 | #endif |
7c673cae FG |
425 | } |
426 | ||
427 | inline bool LZ4_Supported() { | |
428 | #ifdef LZ4 | |
429 | return true; | |
11fdf7f2 | 430 | #else |
7c673cae | 431 | return false; |
11fdf7f2 | 432 | #endif |
7c673cae FG |
433 | } |
434 | ||
435 | inline bool XPRESS_Supported() { | |
436 | #ifdef XPRESS | |
437 | return true; | |
11fdf7f2 | 438 | #else |
7c673cae | 439 | return false; |
11fdf7f2 | 440 | #endif |
7c673cae FG |
441 | } |
442 | ||
443 | inline bool ZSTD_Supported() { | |
444 | #ifdef ZSTD | |
445 | // ZSTD format is finalized since version 0.8.0. | |
446 | return (ZSTD_versionNumber() >= 800); | |
11fdf7f2 | 447 | #else |
7c673cae | 448 | return false; |
11fdf7f2 | 449 | #endif |
7c673cae FG |
450 | } |
451 | ||
452 | inline bool ZSTDNotFinal_Supported() { | |
453 | #ifdef ZSTD | |
454 | return true; | |
11fdf7f2 | 455 | #else |
7c673cae | 456 | return false; |
11fdf7f2 | 457 | #endif |
7c673cae FG |
458 | } |
459 | ||
460 | inline bool CompressionTypeSupported(CompressionType compression_type) { | |
461 | switch (compression_type) { | |
462 | case kNoCompression: | |
463 | return true; | |
464 | case kSnappyCompression: | |
465 | return Snappy_Supported(); | |
466 | case kZlibCompression: | |
467 | return Zlib_Supported(); | |
468 | case kBZip2Compression: | |
469 | return BZip2_Supported(); | |
470 | case kLZ4Compression: | |
471 | return LZ4_Supported(); | |
472 | case kLZ4HCCompression: | |
473 | return LZ4_Supported(); | |
474 | case kXpressCompression: | |
475 | return XPRESS_Supported(); | |
476 | case kZSTDNotFinalCompression: | |
477 | return ZSTDNotFinal_Supported(); | |
478 | case kZSTD: | |
479 | return ZSTD_Supported(); | |
480 | default: | |
481 | assert(false); | |
482 | return false; | |
483 | } | |
484 | } | |
485 | ||
486 | inline std::string CompressionTypeToString(CompressionType compression_type) { | |
487 | switch (compression_type) { | |
488 | case kNoCompression: | |
489 | return "NoCompression"; | |
490 | case kSnappyCompression: | |
491 | return "Snappy"; | |
492 | case kZlibCompression: | |
493 | return "Zlib"; | |
494 | case kBZip2Compression: | |
495 | return "BZip2"; | |
496 | case kLZ4Compression: | |
497 | return "LZ4"; | |
498 | case kLZ4HCCompression: | |
499 | return "LZ4HC"; | |
500 | case kXpressCompression: | |
501 | return "Xpress"; | |
502 | case kZSTD: | |
7c673cae | 503 | return "ZSTD"; |
11fdf7f2 TL |
504 | case kZSTDNotFinalCompression: |
505 | return "ZSTDNotFinal"; | |
7c673cae FG |
506 | default: |
507 | assert(false); | |
508 | return ""; | |
509 | } | |
510 | } | |
511 | ||
494da23a TL |
512 | inline std::string CompressionOptionsToString( |
513 | CompressionOptions& compression_options) { | |
514 | std::string result; | |
515 | result.reserve(512); | |
516 | result.append("window_bits=") | |
517 | .append(ToString(compression_options.window_bits)) | |
518 | .append("; "); | |
519 | result.append("level=") | |
520 | .append(ToString(compression_options.level)) | |
521 | .append("; "); | |
522 | result.append("strategy=") | |
523 | .append(ToString(compression_options.strategy)) | |
524 | .append("; "); | |
525 | result.append("max_dict_bytes=") | |
526 | .append(ToString(compression_options.max_dict_bytes)) | |
527 | .append("; "); | |
528 | result.append("zstd_max_train_bytes=") | |
529 | .append(ToString(compression_options.zstd_max_train_bytes)) | |
530 | .append("; "); | |
531 | result.append("enabled=") | |
532 | .append(ToString(compression_options.enabled)) | |
533 | .append("; "); | |
534 | return result; | |
535 | } | |
536 | ||
7c673cae FG |
537 | // compress_format_version can have two values: |
538 | // 1 -- decompressed sizes for BZip2 and Zlib are not included in the compressed | |
539 | // block. Also, decompressed sizes for LZ4 are encoded in platform-dependent | |
540 | // way. | |
541 | // 2 -- Zlib, BZip2 and LZ4 encode decompressed size as Varint32 just before the | |
542 | // start of compressed block. Snappy format is the same as version 1. | |
543 | ||
494da23a TL |
544 | inline bool Snappy_Compress(const CompressionInfo& /*info*/, const char* input, |
545 | size_t length, ::std::string* output) { | |
7c673cae FG |
546 | #ifdef SNAPPY |
547 | output->resize(snappy::MaxCompressedLength(length)); | |
548 | size_t outlen; | |
549 | snappy::RawCompress(input, length, &(*output)[0], &outlen); | |
550 | output->resize(outlen); | |
551 | return true; | |
11fdf7f2 TL |
552 | #else |
553 | (void)input; | |
554 | (void)length; | |
555 | (void)output; | |
7c673cae | 556 | return false; |
11fdf7f2 | 557 | #endif |
7c673cae FG |
558 | } |
559 | ||
560 | inline bool Snappy_GetUncompressedLength(const char* input, size_t length, | |
561 | size_t* result) { | |
562 | #ifdef SNAPPY | |
563 | return snappy::GetUncompressedLength(input, length, result); | |
564 | #else | |
11fdf7f2 TL |
565 | (void)input; |
566 | (void)length; | |
567 | (void)result; | |
7c673cae FG |
568 | return false; |
569 | #endif | |
570 | } | |
571 | ||
11fdf7f2 | 572 | inline bool Snappy_Uncompress(const char* input, size_t length, char* output) { |
7c673cae FG |
573 | #ifdef SNAPPY |
574 | return snappy::RawUncompress(input, length, output); | |
575 | #else | |
11fdf7f2 TL |
576 | (void)input; |
577 | (void)length; | |
578 | (void)output; | |
7c673cae FG |
579 | return false; |
580 | #endif | |
581 | } | |
582 | ||
583 | namespace compression { | |
584 | // returns size | |
585 | inline size_t PutDecompressedSizeInfo(std::string* output, uint32_t length) { | |
586 | PutVarint32(output, length); | |
587 | return output->size(); | |
588 | } | |
589 | ||
590 | inline bool GetDecompressedSizeInfo(const char** input_data, | |
591 | size_t* input_length, | |
592 | uint32_t* output_len) { | |
593 | auto new_input_data = | |
594 | GetVarint32Ptr(*input_data, *input_data + *input_length, output_len); | |
595 | if (new_input_data == nullptr) { | |
596 | return false; | |
597 | } | |
598 | *input_length -= (new_input_data - *input_data); | |
599 | *input_data = new_input_data; | |
600 | return true; | |
601 | } | |
602 | } // namespace compression | |
603 | ||
604 | // compress_format_version == 1 -- decompressed size is not included in the | |
605 | // block header | |
606 | // compress_format_version == 2 -- decompressed size is included in the block | |
607 | // header in varint32 format | |
608 | // @param compression_dict Data for presetting the compression library's | |
609 | // dictionary. | |
494da23a | 610 | inline bool Zlib_Compress(const CompressionInfo& info, |
7c673cae | 611 | uint32_t compress_format_version, const char* input, |
11fdf7f2 | 612 | size_t length, ::std::string* output) { |
7c673cae FG |
613 | #ifdef ZLIB |
614 | if (length > std::numeric_limits<uint32_t>::max()) { | |
615 | // Can't compress more than 4GB | |
616 | return false; | |
617 | } | |
618 | ||
619 | size_t output_header_len = 0; | |
620 | if (compress_format_version == 2) { | |
621 | output_header_len = compression::PutDecompressedSizeInfo( | |
622 | output, static_cast<uint32_t>(length)); | |
623 | } | |
624 | // Resize output to be the plain data length. | |
625 | // This may not be big enough if the compression actually expands data. | |
626 | output->resize(output_header_len + length); | |
627 | ||
628 | // The memLevel parameter specifies how much memory should be allocated for | |
629 | // the internal compression state. | |
630 | // memLevel=1 uses minimum memory but is slow and reduces compression ratio. | |
631 | // memLevel=9 uses maximum memory for optimal speed. | |
632 | // The default value is 8. See zconf.h for more details. | |
633 | static const int memLevel = 8; | |
11fdf7f2 | 634 | int level; |
494da23a | 635 | if (info.options().level == CompressionOptions::kDefaultCompressionLevel) { |
11fdf7f2 TL |
636 | level = Z_DEFAULT_COMPRESSION; |
637 | } else { | |
494da23a | 638 | level = info.options().level; |
11fdf7f2 | 639 | } |
7c673cae FG |
640 | z_stream _stream; |
641 | memset(&_stream, 0, sizeof(z_stream)); | |
494da23a TL |
642 | int st = deflateInit2(&_stream, level, Z_DEFLATED, info.options().window_bits, |
643 | memLevel, info.options().strategy); | |
7c673cae FG |
644 | if (st != Z_OK) { |
645 | return false; | |
646 | } | |
647 | ||
494da23a TL |
648 | Slice compression_dict = info.dict().GetRawDict(); |
649 | if (compression_dict.size()) { | |
7c673cae | 650 | // Initialize the compression library's dictionary |
494da23a TL |
651 | st = deflateSetDictionary( |
652 | &_stream, reinterpret_cast<const Bytef*>(compression_dict.data()), | |
653 | static_cast<unsigned int>(compression_dict.size())); | |
7c673cae FG |
654 | if (st != Z_OK) { |
655 | deflateEnd(&_stream); | |
656 | return false; | |
657 | } | |
658 | } | |
659 | ||
660 | // Compress the input, and put compressed data in output. | |
11fdf7f2 | 661 | _stream.next_in = (Bytef*)input; |
7c673cae FG |
662 | _stream.avail_in = static_cast<unsigned int>(length); |
663 | ||
664 | // Initialize the output size. | |
665 | _stream.avail_out = static_cast<unsigned int>(length); | |
666 | _stream.next_out = reinterpret_cast<Bytef*>(&(*output)[output_header_len]); | |
667 | ||
668 | bool compressed = false; | |
669 | st = deflate(&_stream, Z_FINISH); | |
670 | if (st == Z_STREAM_END) { | |
671 | compressed = true; | |
672 | output->resize(output->size() - _stream.avail_out); | |
673 | } | |
674 | // The only return value we really care about is Z_STREAM_END. | |
675 | // Z_OK means insufficient output space. This means the compression is | |
676 | // bigger than decompressed size. Just fail the compression in that case. | |
677 | ||
678 | deflateEnd(&_stream); | |
679 | return compressed; | |
11fdf7f2 | 680 | #else |
494da23a | 681 | (void)info; |
11fdf7f2 TL |
682 | (void)compress_format_version; |
683 | (void)input; | |
684 | (void)length; | |
685 | (void)output; | |
7c673cae | 686 | return false; |
11fdf7f2 | 687 | #endif |
7c673cae FG |
688 | } |
689 | ||
690 | // compress_format_version == 1 -- decompressed size is not included in the | |
691 | // block header | |
692 | // compress_format_version == 2 -- decompressed size is included in the block | |
693 | // header in varint32 format | |
694 | // @param compression_dict Data for presetting the compression library's | |
695 | // dictionary. | |
494da23a TL |
696 | inline CacheAllocationPtr Zlib_Uncompress( |
697 | const UncompressionInfo& info, const char* input_data, size_t input_length, | |
698 | int* decompress_size, uint32_t compress_format_version, | |
699 | MemoryAllocator* allocator = nullptr, int windowBits = -14) { | |
7c673cae FG |
700 | #ifdef ZLIB |
701 | uint32_t output_len = 0; | |
702 | if (compress_format_version == 2) { | |
703 | if (!compression::GetDecompressedSizeInfo(&input_data, &input_length, | |
704 | &output_len)) { | |
705 | return nullptr; | |
706 | } | |
707 | } else { | |
708 | // Assume the decompressed data size will 5x of compressed size, but round | |
709 | // to the page size | |
710 | size_t proposed_output_len = ((input_length * 5) & (~(4096 - 1))) + 4096; | |
711 | output_len = static_cast<uint32_t>( | |
712 | std::min(proposed_output_len, | |
713 | static_cast<size_t>(std::numeric_limits<uint32_t>::max()))); | |
714 | } | |
715 | ||
716 | z_stream _stream; | |
717 | memset(&_stream, 0, sizeof(z_stream)); | |
718 | ||
719 | // For raw inflate, the windowBits should be -8..-15. | |
720 | // If windowBits is bigger than zero, it will use either zlib | |
721 | // header or gzip header. Adding 32 to it will do automatic detection. | |
11fdf7f2 TL |
722 | int st = |
723 | inflateInit2(&_stream, windowBits > 0 ? windowBits + 32 : windowBits); | |
7c673cae FG |
724 | if (st != Z_OK) { |
725 | return nullptr; | |
726 | } | |
727 | ||
494da23a TL |
728 | Slice compression_dict = info.dict().GetRawDict(); |
729 | if (compression_dict.size()) { | |
7c673cae | 730 | // Initialize the compression library's dictionary |
494da23a TL |
731 | st = inflateSetDictionary( |
732 | &_stream, reinterpret_cast<const Bytef*>(compression_dict.data()), | |
733 | static_cast<unsigned int>(compression_dict.size())); | |
7c673cae FG |
734 | if (st != Z_OK) { |
735 | return nullptr; | |
736 | } | |
737 | } | |
738 | ||
11fdf7f2 | 739 | _stream.next_in = (Bytef*)input_data; |
7c673cae FG |
740 | _stream.avail_in = static_cast<unsigned int>(input_length); |
741 | ||
494da23a | 742 | auto output = AllocateBlock(output_len, allocator); |
7c673cae | 743 | |
494da23a | 744 | _stream.next_out = (Bytef*)output.get(); |
7c673cae FG |
745 | _stream.avail_out = static_cast<unsigned int>(output_len); |
746 | ||
747 | bool done = false; | |
748 | while (!done) { | |
749 | st = inflate(&_stream, Z_SYNC_FLUSH); | |
750 | switch (st) { | |
751 | case Z_STREAM_END: | |
752 | done = true; | |
753 | break; | |
754 | case Z_OK: { | |
755 | // No output space. Increase the output space by 20%. | |
756 | // We should never run out of output space if | |
757 | // compress_format_version == 2 | |
758 | assert(compress_format_version != 2); | |
759 | size_t old_sz = output_len; | |
11fdf7f2 | 760 | uint32_t output_len_delta = output_len / 5; |
7c673cae | 761 | output_len += output_len_delta < 10 ? 10 : output_len_delta; |
494da23a TL |
762 | auto tmp = AllocateBlock(output_len, allocator); |
763 | memcpy(tmp.get(), output.get(), old_sz); | |
764 | output = std::move(tmp); | |
7c673cae FG |
765 | |
766 | // Set more output. | |
494da23a | 767 | _stream.next_out = (Bytef*)(output.get() + old_sz); |
7c673cae FG |
768 | _stream.avail_out = static_cast<unsigned int>(output_len - old_sz); |
769 | break; | |
770 | } | |
771 | case Z_BUF_ERROR: | |
772 | default: | |
7c673cae FG |
773 | inflateEnd(&_stream); |
774 | return nullptr; | |
775 | } | |
776 | } | |
777 | ||
778 | // If we encoded decompressed block size, we should have no bytes left | |
779 | assert(compress_format_version != 2 || _stream.avail_out == 0); | |
780 | *decompress_size = static_cast<int>(output_len - _stream.avail_out); | |
781 | inflateEnd(&_stream); | |
782 | return output; | |
11fdf7f2 | 783 | #else |
494da23a | 784 | (void)info; |
11fdf7f2 TL |
785 | (void)input_data; |
786 | (void)input_length; | |
787 | (void)decompress_size; | |
788 | (void)compress_format_version; | |
494da23a | 789 | (void)allocator; |
11fdf7f2 | 790 | (void)windowBits; |
7c673cae | 791 | return nullptr; |
11fdf7f2 | 792 | #endif |
7c673cae FG |
793 | } |
794 | ||
795 | // compress_format_version == 1 -- decompressed size is not included in the | |
796 | // block header | |
797 | // compress_format_version == 2 -- decompressed size is included in the block | |
798 | // header in varint32 format | |
494da23a | 799 | inline bool BZip2_Compress(const CompressionInfo& /*info*/, |
11fdf7f2 TL |
800 | uint32_t compress_format_version, const char* input, |
801 | size_t length, ::std::string* output) { | |
7c673cae FG |
802 | #ifdef BZIP2 |
803 | if (length > std::numeric_limits<uint32_t>::max()) { | |
804 | // Can't compress more than 4GB | |
805 | return false; | |
806 | } | |
807 | size_t output_header_len = 0; | |
808 | if (compress_format_version == 2) { | |
809 | output_header_len = compression::PutDecompressedSizeInfo( | |
810 | output, static_cast<uint32_t>(length)); | |
811 | } | |
812 | // Resize output to be the plain data length. | |
813 | // This may not be big enough if the compression actually expands data. | |
814 | output->resize(output_header_len + length); | |
815 | ||
7c673cae FG |
816 | bz_stream _stream; |
817 | memset(&_stream, 0, sizeof(bz_stream)); | |
818 | ||
819 | // Block size 1 is 100K. | |
820 | // 0 is for silent. | |
821 | // 30 is the default workFactor | |
822 | int st = BZ2_bzCompressInit(&_stream, 1, 0, 30); | |
823 | if (st != BZ_OK) { | |
824 | return false; | |
825 | } | |
826 | ||
827 | // Compress the input, and put compressed data in output. | |
11fdf7f2 | 828 | _stream.next_in = (char*)input; |
7c673cae FG |
829 | _stream.avail_in = static_cast<unsigned int>(length); |
830 | ||
831 | // Initialize the output size. | |
832 | _stream.avail_out = static_cast<unsigned int>(length); | |
833 | _stream.next_out = reinterpret_cast<char*>(&(*output)[output_header_len]); | |
834 | ||
835 | bool compressed = false; | |
836 | st = BZ2_bzCompress(&_stream, BZ_FINISH); | |
837 | if (st == BZ_STREAM_END) { | |
838 | compressed = true; | |
839 | output->resize(output->size() - _stream.avail_out); | |
840 | } | |
841 | // The only return value we really care about is BZ_STREAM_END. | |
842 | // BZ_FINISH_OK means insufficient output space. This means the compression | |
843 | // is bigger than decompressed size. Just fail the compression in that case. | |
844 | ||
845 | BZ2_bzCompressEnd(&_stream); | |
846 | return compressed; | |
11fdf7f2 TL |
847 | #else |
848 | (void)compress_format_version; | |
849 | (void)input; | |
850 | (void)length; | |
851 | (void)output; | |
7c673cae | 852 | return false; |
11fdf7f2 | 853 | #endif |
7c673cae FG |
854 | } |
855 | ||
856 | // compress_format_version == 1 -- decompressed size is not included in the | |
857 | // block header | |
858 | // compress_format_version == 2 -- decompressed size is included in the block | |
859 | // header in varint32 format | |
494da23a TL |
860 | inline CacheAllocationPtr BZip2_Uncompress( |
861 | const char* input_data, size_t input_length, int* decompress_size, | |
862 | uint32_t compress_format_version, MemoryAllocator* allocator = nullptr) { | |
7c673cae FG |
863 | #ifdef BZIP2 |
864 | uint32_t output_len = 0; | |
865 | if (compress_format_version == 2) { | |
866 | if (!compression::GetDecompressedSizeInfo(&input_data, &input_length, | |
867 | &output_len)) { | |
868 | return nullptr; | |
869 | } | |
870 | } else { | |
871 | // Assume the decompressed data size will 5x of compressed size, but round | |
872 | // to the next page size | |
873 | size_t proposed_output_len = ((input_length * 5) & (~(4096 - 1))) + 4096; | |
874 | output_len = static_cast<uint32_t>( | |
875 | std::min(proposed_output_len, | |
876 | static_cast<size_t>(std::numeric_limits<uint32_t>::max()))); | |
877 | } | |
878 | ||
879 | bz_stream _stream; | |
880 | memset(&_stream, 0, sizeof(bz_stream)); | |
881 | ||
882 | int st = BZ2_bzDecompressInit(&_stream, 0, 0); | |
883 | if (st != BZ_OK) { | |
884 | return nullptr; | |
885 | } | |
886 | ||
11fdf7f2 | 887 | _stream.next_in = (char*)input_data; |
7c673cae FG |
888 | _stream.avail_in = static_cast<unsigned int>(input_length); |
889 | ||
494da23a | 890 | auto output = AllocateBlock(output_len, allocator); |
7c673cae | 891 | |
494da23a | 892 | _stream.next_out = (char*)output.get(); |
7c673cae FG |
893 | _stream.avail_out = static_cast<unsigned int>(output_len); |
894 | ||
895 | bool done = false; | |
896 | while (!done) { | |
897 | st = BZ2_bzDecompress(&_stream); | |
898 | switch (st) { | |
899 | case BZ_STREAM_END: | |
900 | done = true; | |
901 | break; | |
902 | case BZ_OK: { | |
903 | // No output space. Increase the output space by 20%. | |
904 | // We should never run out of output space if | |
905 | // compress_format_version == 2 | |
906 | assert(compress_format_version != 2); | |
907 | uint32_t old_sz = output_len; | |
908 | output_len = output_len * 1.2; | |
494da23a TL |
909 | auto tmp = AllocateBlock(output_len, allocator); |
910 | memcpy(tmp.get(), output.get(), old_sz); | |
911 | output = std::move(tmp); | |
7c673cae FG |
912 | |
913 | // Set more output. | |
494da23a | 914 | _stream.next_out = (char*)(output.get() + old_sz); |
7c673cae FG |
915 | _stream.avail_out = static_cast<unsigned int>(output_len - old_sz); |
916 | break; | |
917 | } | |
918 | default: | |
7c673cae FG |
919 | BZ2_bzDecompressEnd(&_stream); |
920 | return nullptr; | |
921 | } | |
922 | } | |
923 | ||
924 | // If we encoded decompressed block size, we should have no bytes left | |
925 | assert(compress_format_version != 2 || _stream.avail_out == 0); | |
926 | *decompress_size = static_cast<int>(output_len - _stream.avail_out); | |
927 | BZ2_bzDecompressEnd(&_stream); | |
928 | return output; | |
11fdf7f2 TL |
929 | #else |
930 | (void)input_data; | |
931 | (void)input_length; | |
932 | (void)decompress_size; | |
933 | (void)compress_format_version; | |
494da23a | 934 | (void)allocator; |
7c673cae | 935 | return nullptr; |
11fdf7f2 | 936 | #endif |
7c673cae FG |
937 | } |
938 | ||
939 | // compress_format_version == 1 -- decompressed size is included in the | |
940 | // block header using memcpy, which makes database non-portable) | |
941 | // compress_format_version == 2 -- decompressed size is included in the block | |
942 | // header in varint32 format | |
943 | // @param compression_dict Data for presetting the compression library's | |
944 | // dictionary. | |
494da23a | 945 | inline bool LZ4_Compress(const CompressionInfo& info, |
7c673cae | 946 | uint32_t compress_format_version, const char* input, |
11fdf7f2 | 947 | size_t length, ::std::string* output) { |
7c673cae FG |
948 | #ifdef LZ4 |
949 | if (length > std::numeric_limits<uint32_t>::max()) { | |
950 | // Can't compress more than 4GB | |
951 | return false; | |
952 | } | |
953 | ||
954 | size_t output_header_len = 0; | |
955 | if (compress_format_version == 2) { | |
956 | // new encoding, using varint32 to store size information | |
957 | output_header_len = compression::PutDecompressedSizeInfo( | |
958 | output, static_cast<uint32_t>(length)); | |
959 | } else { | |
960 | // legacy encoding, which is not really portable (depends on big/little | |
961 | // endianness) | |
962 | output_header_len = 8; | |
963 | output->resize(output_header_len); | |
964 | char* p = const_cast<char*>(output->c_str()); | |
965 | memcpy(p, &length, sizeof(length)); | |
966 | } | |
967 | int compress_bound = LZ4_compressBound(static_cast<int>(length)); | |
968 | output->resize(static_cast<size_t>(output_header_len + compress_bound)); | |
969 | ||
970 | int outlen; | |
971 | #if LZ4_VERSION_NUMBER >= 10400 // r124+ | |
972 | LZ4_stream_t* stream = LZ4_createStream(); | |
494da23a TL |
973 | Slice compression_dict = info.dict().GetRawDict(); |
974 | if (compression_dict.size()) { | |
975 | LZ4_loadDict(stream, compression_dict.data(), | |
976 | static_cast<int>(compression_dict.size())); | |
7c673cae FG |
977 | } |
978 | #if LZ4_VERSION_NUMBER >= 10700 // r129+ | |
11fdf7f2 TL |
979 | outlen = |
980 | LZ4_compress_fast_continue(stream, input, &(*output)[output_header_len], | |
981 | static_cast<int>(length), compress_bound, 1); | |
7c673cae FG |
982 | #else // up to r128 |
983 | outlen = LZ4_compress_limitedOutput_continue( | |
984 | stream, input, &(*output)[output_header_len], static_cast<int>(length), | |
985 | compress_bound); | |
986 | #endif | |
987 | LZ4_freeStream(stream); | |
988 | #else // up to r123 | |
989 | outlen = LZ4_compress_limitedOutput(input, &(*output)[output_header_len], | |
990 | static_cast<int>(length), compress_bound); | |
494da23a | 991 | (void)ctx; |
7c673cae FG |
992 | #endif // LZ4_VERSION_NUMBER >= 10400 |
993 | ||
994 | if (outlen == 0) { | |
995 | return false; | |
996 | } | |
997 | output->resize(static_cast<size_t>(output_header_len + outlen)); | |
998 | return true; | |
11fdf7f2 | 999 | #else // LZ4 |
494da23a | 1000 | (void)info; |
11fdf7f2 TL |
1001 | (void)compress_format_version; |
1002 | (void)input; | |
1003 | (void)length; | |
1004 | (void)output; | |
7c673cae | 1005 | return false; |
11fdf7f2 | 1006 | #endif |
7c673cae FG |
1007 | } |
1008 | ||
1009 | // compress_format_version == 1 -- decompressed size is included in the | |
1010 | // block header using memcpy, which makes database non-portable) | |
1011 | // compress_format_version == 2 -- decompressed size is included in the block | |
1012 | // header in varint32 format | |
1013 | // @param compression_dict Data for presetting the compression library's | |
1014 | // dictionary. | |
494da23a TL |
1015 | inline CacheAllocationPtr LZ4_Uncompress(const UncompressionInfo& info, |
1016 | const char* input_data, | |
1017 | size_t input_length, | |
1018 | int* decompress_size, | |
1019 | uint32_t compress_format_version, | |
1020 | MemoryAllocator* allocator = nullptr) { | |
7c673cae FG |
1021 | #ifdef LZ4 |
1022 | uint32_t output_len = 0; | |
1023 | if (compress_format_version == 2) { | |
1024 | // new encoding, using varint32 to store size information | |
1025 | if (!compression::GetDecompressedSizeInfo(&input_data, &input_length, | |
1026 | &output_len)) { | |
1027 | return nullptr; | |
1028 | } | |
1029 | } else { | |
1030 | // legacy encoding, which is not really portable (depends on big/little | |
1031 | // endianness) | |
1032 | if (input_length < 8) { | |
1033 | return nullptr; | |
1034 | } | |
1035 | memcpy(&output_len, input_data, sizeof(output_len)); | |
1036 | input_length -= 8; | |
1037 | input_data += 8; | |
1038 | } | |
1039 | ||
494da23a | 1040 | auto output = AllocateBlock(output_len, allocator); |
7c673cae FG |
1041 | #if LZ4_VERSION_NUMBER >= 10400 // r124+ |
1042 | LZ4_streamDecode_t* stream = LZ4_createStreamDecode(); | |
494da23a TL |
1043 | Slice compression_dict = info.dict().GetRawDict(); |
1044 | if (compression_dict.size()) { | |
1045 | LZ4_setStreamDecode(stream, compression_dict.data(), | |
1046 | static_cast<int>(compression_dict.size())); | |
7c673cae FG |
1047 | } |
1048 | *decompress_size = LZ4_decompress_safe_continue( | |
494da23a | 1049 | stream, input_data, output.get(), static_cast<int>(input_length), |
7c673cae FG |
1050 | static_cast<int>(output_len)); |
1051 | LZ4_freeStreamDecode(stream); | |
1052 | #else // up to r123 | |
494da23a TL |
1053 | *decompress_size = LZ4_decompress_safe(input_data, output.get(), |
1054 | static_cast<int>(input_length), | |
1055 | static_cast<int>(output_len)); | |
1056 | (void)ctx; | |
7c673cae FG |
1057 | #endif // LZ4_VERSION_NUMBER >= 10400 |
1058 | ||
1059 | if (*decompress_size < 0) { | |
7c673cae FG |
1060 | return nullptr; |
1061 | } | |
1062 | assert(*decompress_size == static_cast<int>(output_len)); | |
1063 | return output; | |
11fdf7f2 | 1064 | #else // LZ4 |
494da23a | 1065 | (void)info; |
11fdf7f2 TL |
1066 | (void)input_data; |
1067 | (void)input_length; | |
1068 | (void)decompress_size; | |
1069 | (void)compress_format_version; | |
494da23a | 1070 | (void)allocator; |
7c673cae | 1071 | return nullptr; |
11fdf7f2 | 1072 | #endif |
7c673cae FG |
1073 | } |
1074 | ||
1075 | // compress_format_version == 1 -- decompressed size is included in the | |
1076 | // block header using memcpy, which makes database non-portable) | |
1077 | // compress_format_version == 2 -- decompressed size is included in the block | |
1078 | // header in varint32 format | |
1079 | // @param compression_dict Data for presetting the compression library's | |
1080 | // dictionary. | |
494da23a | 1081 | inline bool LZ4HC_Compress(const CompressionInfo& info, |
7c673cae | 1082 | uint32_t compress_format_version, const char* input, |
11fdf7f2 | 1083 | size_t length, ::std::string* output) { |
7c673cae FG |
1084 | #ifdef LZ4 |
1085 | if (length > std::numeric_limits<uint32_t>::max()) { | |
1086 | // Can't compress more than 4GB | |
1087 | return false; | |
1088 | } | |
1089 | ||
1090 | size_t output_header_len = 0; | |
1091 | if (compress_format_version == 2) { | |
1092 | // new encoding, using varint32 to store size information | |
1093 | output_header_len = compression::PutDecompressedSizeInfo( | |
1094 | output, static_cast<uint32_t>(length)); | |
1095 | } else { | |
1096 | // legacy encoding, which is not really portable (depends on big/little | |
1097 | // endianness) | |
1098 | output_header_len = 8; | |
1099 | output->resize(output_header_len); | |
1100 | char* p = const_cast<char*>(output->c_str()); | |
1101 | memcpy(p, &length, sizeof(length)); | |
1102 | } | |
1103 | int compress_bound = LZ4_compressBound(static_cast<int>(length)); | |
1104 | output->resize(static_cast<size_t>(output_header_len + compress_bound)); | |
1105 | ||
1106 | int outlen; | |
11fdf7f2 | 1107 | int level; |
494da23a | 1108 | if (info.options().level == CompressionOptions::kDefaultCompressionLevel) { |
11fdf7f2 TL |
1109 | level = 0; // lz4hc.h says any value < 1 will be sanitized to default |
1110 | } else { | |
494da23a | 1111 | level = info.options().level; |
11fdf7f2 | 1112 | } |
7c673cae FG |
1113 | #if LZ4_VERSION_NUMBER >= 10400 // r124+ |
1114 | LZ4_streamHC_t* stream = LZ4_createStreamHC(); | |
11fdf7f2 | 1115 | LZ4_resetStreamHC(stream, level); |
494da23a | 1116 | Slice compression_dict = info.dict().GetRawDict(); |
7c673cae | 1117 | const char* compression_dict_data = |
494da23a TL |
1118 | compression_dict.size() > 0 ? compression_dict.data() : nullptr; |
1119 | size_t compression_dict_size = compression_dict.size(); | |
7c673cae FG |
1120 | LZ4_loadDictHC(stream, compression_dict_data, |
1121 | static_cast<int>(compression_dict_size)); | |
1122 | ||
1123 | #if LZ4_VERSION_NUMBER >= 10700 // r129+ | |
1124 | outlen = | |
1125 | LZ4_compress_HC_continue(stream, input, &(*output)[output_header_len], | |
1126 | static_cast<int>(length), compress_bound); | |
1127 | #else // r124-r128 | |
1128 | outlen = LZ4_compressHC_limitedOutput_continue( | |
1129 | stream, input, &(*output)[output_header_len], static_cast<int>(length), | |
1130 | compress_bound); | |
1131 | #endif // LZ4_VERSION_NUMBER >= 10700 | |
1132 | LZ4_freeStreamHC(stream); | |
1133 | ||
1134 | #elif LZ4_VERSION_MAJOR // r113-r123 | |
1135 | outlen = LZ4_compressHC2_limitedOutput(input, &(*output)[output_header_len], | |
1136 | static_cast<int>(length), | |
11fdf7f2 | 1137 | compress_bound, level); |
7c673cae FG |
1138 | #else // up to r112 |
1139 | outlen = | |
1140 | LZ4_compressHC_limitedOutput(input, &(*output)[output_header_len], | |
1141 | static_cast<int>(length), compress_bound); | |
1142 | #endif // LZ4_VERSION_NUMBER >= 10400 | |
1143 | ||
1144 | if (outlen == 0) { | |
1145 | return false; | |
1146 | } | |
1147 | output->resize(static_cast<size_t>(output_header_len + outlen)); | |
1148 | return true; | |
11fdf7f2 | 1149 | #else // LZ4 |
494da23a | 1150 | (void)info; |
11fdf7f2 TL |
1151 | (void)compress_format_version; |
1152 | (void)input; | |
1153 | (void)length; | |
1154 | (void)output; | |
7c673cae | 1155 | return false; |
11fdf7f2 | 1156 | #endif |
7c673cae FG |
1157 | } |
1158 | ||
7c673cae | 1159 | #ifdef XPRESS |
11fdf7f2 TL |
1160 | inline bool XPRESS_Compress(const char* input, size_t length, |
1161 | std::string* output) { | |
7c673cae | 1162 | return port::xpress::Compress(input, length, output); |
11fdf7f2 TL |
1163 | } |
1164 | #else | |
1165 | inline bool XPRESS_Compress(const char* /*input*/, size_t /*length*/, | |
1166 | std::string* /*output*/) { | |
7c673cae FG |
1167 | return false; |
1168 | } | |
11fdf7f2 | 1169 | #endif |
7c673cae | 1170 | |
7c673cae | 1171 | #ifdef XPRESS |
11fdf7f2 TL |
1172 | inline char* XPRESS_Uncompress(const char* input_data, size_t input_length, |
1173 | int* decompress_size) { | |
7c673cae | 1174 | return port::xpress::Decompress(input_data, input_length, decompress_size); |
11fdf7f2 TL |
1175 | } |
1176 | #else | |
1177 | inline char* XPRESS_Uncompress(const char* /*input_data*/, | |
1178 | size_t /*input_length*/, | |
1179 | int* /*decompress_size*/) { | |
7c673cae FG |
1180 | return nullptr; |
1181 | } | |
11fdf7f2 | 1182 | #endif |
7c673cae | 1183 | |
494da23a | 1184 | inline bool ZSTD_Compress(const CompressionInfo& info, const char* input, |
11fdf7f2 | 1185 | size_t length, ::std::string* output) { |
7c673cae FG |
1186 | #ifdef ZSTD |
1187 | if (length > std::numeric_limits<uint32_t>::max()) { | |
1188 | // Can't compress more than 4GB | |
1189 | return false; | |
1190 | } | |
1191 | ||
1192 | size_t output_header_len = compression::PutDecompressedSizeInfo( | |
1193 | output, static_cast<uint32_t>(length)); | |
1194 | ||
1195 | size_t compressBound = ZSTD_compressBound(length); | |
1196 | output->resize(static_cast<size_t>(output_header_len + compressBound)); | |
11fdf7f2 TL |
1197 | size_t outlen = 0; |
1198 | int level; | |
494da23a | 1199 | if (info.options().level == CompressionOptions::kDefaultCompressionLevel) { |
11fdf7f2 TL |
1200 | // 3 is the value of ZSTD_CLEVEL_DEFAULT (not exposed publicly), see |
1201 | // https://github.com/facebook/zstd/issues/1148 | |
1202 | level = 3; | |
1203 | } else { | |
494da23a | 1204 | level = info.options().level; |
11fdf7f2 | 1205 | } |
7c673cae | 1206 | #if ZSTD_VERSION_NUMBER >= 500 // v0.5.0+ |
494da23a | 1207 | ZSTD_CCtx* context = info.context().ZSTDPreallocCtx(); |
11fdf7f2 | 1208 | assert(context != nullptr); |
494da23a TL |
1209 | #if ZSTD_VERSION_NUMBER >= 700 // v0.7.0+ |
1210 | if (info.dict().GetDigestedZstdCDict() != nullptr) { | |
1211 | outlen = ZSTD_compress_usingCDict(context, &(*output)[output_header_len], | |
1212 | compressBound, input, length, | |
1213 | info.dict().GetDigestedZstdCDict()); | |
1214 | } | |
1215 | #endif // ZSTD_VERSION_NUMBER >= 700 | |
1216 | if (outlen == 0) { | |
1217 | outlen = ZSTD_compress_usingDict(context, &(*output)[output_header_len], | |
1218 | compressBound, input, length, | |
1219 | info.dict().GetRawDict().data(), | |
1220 | info.dict().GetRawDict().size(), level); | |
1221 | } | |
11fdf7f2 | 1222 | #else // up to v0.4.x |
7c673cae | 1223 | outlen = ZSTD_compress(&(*output)[output_header_len], compressBound, input, |
11fdf7f2 | 1224 | length, level); |
7c673cae FG |
1225 | #endif // ZSTD_VERSION_NUMBER >= 500 |
1226 | if (outlen == 0) { | |
1227 | return false; | |
1228 | } | |
1229 | output->resize(output_header_len + outlen); | |
1230 | return true; | |
11fdf7f2 | 1231 | #else // ZSTD |
494da23a | 1232 | (void)info; |
11fdf7f2 TL |
1233 | (void)input; |
1234 | (void)length; | |
1235 | (void)output; | |
7c673cae | 1236 | return false; |
11fdf7f2 | 1237 | #endif |
7c673cae FG |
1238 | } |
1239 | ||
1240 | // @param compression_dict Data for presetting the compression library's | |
1241 | // dictionary. | |
494da23a TL |
1242 | inline CacheAllocationPtr ZSTD_Uncompress( |
1243 | const UncompressionInfo& info, const char* input_data, size_t input_length, | |
1244 | int* decompress_size, MemoryAllocator* allocator = nullptr) { | |
7c673cae FG |
1245 | #ifdef ZSTD |
1246 | uint32_t output_len = 0; | |
1247 | if (!compression::GetDecompressedSizeInfo(&input_data, &input_length, | |
1248 | &output_len)) { | |
1249 | return nullptr; | |
1250 | } | |
1251 | ||
494da23a TL |
1252 | auto output = AllocateBlock(output_len, allocator); |
1253 | size_t actual_output_length = 0; | |
7c673cae | 1254 | #if ZSTD_VERSION_NUMBER >= 500 // v0.5.0+ |
494da23a | 1255 | ZSTD_DCtx* context = info.context().GetZSTDContext(); |
11fdf7f2 | 1256 | assert(context != nullptr); |
494da23a TL |
1257 | #ifdef ROCKSDB_ZSTD_DDICT |
1258 | if (info.dict().GetDigestedZstdDDict() != nullptr) { | |
1259 | actual_output_length = ZSTD_decompress_usingDDict( | |
1260 | context, output.get(), output_len, input_data, input_length, | |
1261 | info.dict().GetDigestedZstdDDict()); | |
1262 | } | |
1263 | #endif // ROCKSDB_ZSTD_DDICT | |
1264 | if (actual_output_length == 0) { | |
1265 | actual_output_length = ZSTD_decompress_usingDict( | |
1266 | context, output.get(), output_len, input_data, input_length, | |
1267 | info.dict().GetRawDict().data(), info.dict().GetRawDict().size()); | |
1268 | } | |
11fdf7f2 | 1269 | #else // up to v0.4.x |
494da23a | 1270 | (void)info; |
7c673cae | 1271 | actual_output_length = |
494da23a | 1272 | ZSTD_decompress(output.get(), output_len, input_data, input_length); |
7c673cae FG |
1273 | #endif // ZSTD_VERSION_NUMBER >= 500 |
1274 | assert(actual_output_length == output_len); | |
1275 | *decompress_size = static_cast<int>(actual_output_length); | |
1276 | return output; | |
11fdf7f2 | 1277 | #else // ZSTD |
494da23a | 1278 | (void)info; |
11fdf7f2 TL |
1279 | (void)input_data; |
1280 | (void)input_length; | |
1281 | (void)decompress_size; | |
494da23a | 1282 | (void)allocator; |
7c673cae | 1283 | return nullptr; |
11fdf7f2 TL |
1284 | #endif |
1285 | } | |
1286 | ||
494da23a TL |
1287 | inline bool ZSTD_TrainDictionarySupported() { |
1288 | #ifdef ZSTD | |
1289 | // Dictionary trainer is available since v0.6.1 for static linking, but not | |
1290 | // available for dynamic linking until v1.1.3. For now we enable the feature | |
1291 | // in v1.1.3+ only. | |
1292 | return (ZSTD_versionNumber() >= 10103); | |
1293 | #else | |
1294 | return false; | |
1295 | #endif | |
1296 | } | |
1297 | ||
11fdf7f2 TL |
1298 | inline std::string ZSTD_TrainDictionary(const std::string& samples, |
1299 | const std::vector<size_t>& sample_lens, | |
1300 | size_t max_dict_bytes) { | |
1301 | // Dictionary trainer is available since v0.6.1 for static linking, but not | |
1302 | // available for dynamic linking until v1.1.3. For now we enable the feature | |
1303 | // in v1.1.3+ only. | |
1304 | #if ZSTD_VERSION_NUMBER >= 10103 // v1.1.3+ | |
494da23a TL |
1305 | assert(samples.empty() == sample_lens.empty()); |
1306 | if (samples.empty()) { | |
1307 | return ""; | |
1308 | } | |
11fdf7f2 TL |
1309 | std::string dict_data(max_dict_bytes, '\0'); |
1310 | size_t dict_len = ZDICT_trainFromBuffer( | |
1311 | &dict_data[0], max_dict_bytes, &samples[0], &sample_lens[0], | |
1312 | static_cast<unsigned>(sample_lens.size())); | |
1313 | if (ZDICT_isError(dict_len)) { | |
1314 | return ""; | |
1315 | } | |
1316 | assert(dict_len <= max_dict_bytes); | |
1317 | dict_data.resize(dict_len); | |
1318 | return dict_data; | |
1319 | #else // up to v1.1.2 | |
1320 | assert(false); | |
1321 | (void)samples; | |
1322 | (void)sample_lens; | |
1323 | (void)max_dict_bytes; | |
1324 | return ""; | |
1325 | #endif // ZSTD_VERSION_NUMBER >= 10103 | |
1326 | } | |
1327 | ||
1328 | inline std::string ZSTD_TrainDictionary(const std::string& samples, | |
1329 | size_t sample_len_shift, | |
1330 | size_t max_dict_bytes) { | |
1331 | // Dictionary trainer is available since v0.6.1, but ZSTD was marked stable | |
1332 | // only since v0.8.0. For now we enable the feature in stable versions only. | |
1333 | #if ZSTD_VERSION_NUMBER >= 10103 // v1.1.3+ | |
1334 | // skips potential partial sample at the end of "samples" | |
1335 | size_t num_samples = samples.size() >> sample_len_shift; | |
1336 | std::vector<size_t> sample_lens(num_samples, size_t(1) << sample_len_shift); | |
1337 | return ZSTD_TrainDictionary(samples, sample_lens, max_dict_bytes); | |
1338 | #else // up to v1.1.2 | |
1339 | assert(false); | |
1340 | (void)samples; | |
1341 | (void)sample_len_shift; | |
1342 | (void)max_dict_bytes; | |
1343 | return ""; | |
1344 | #endif // ZSTD_VERSION_NUMBER >= 10103 | |
7c673cae FG |
1345 | } |
1346 | ||
1347 | } // namespace rocksdb |