]> git.proxmox.com Git - ceph.git/blame - ceph/src/rocksdb/util/compression.h
import 14.2.4 nautilus point release
[ceph.git] / ceph / src / rocksdb / util / compression.h
CommitLineData
7c673cae 1// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
11fdf7f2
TL
2// This source code is licensed under both the GPLv2 (found in the
3// COPYING file in the root directory) and Apache 2.0 License
4// (found in the LICENSE.Apache file in the root directory).
7c673cae
FG
5//
6// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
7// Use of this source code is governed by a BSD-style license that can be
8// found in the LICENSE file. See the AUTHORS file for names of contributors.
9//
10#pragma once
11
12#include <algorithm>
13#include <limits>
494da23a
TL
14#ifdef ROCKSDB_MALLOC_USABLE_SIZE
15#ifdef OS_FREEBSD
16#include <malloc_np.h>
17#else // OS_FREEBSD
18#include <malloc.h>
19#endif // OS_FREEBSD
20#endif // ROCKSDB_MALLOC_USABLE_SIZE
7c673cae
FG
21#include <string>
22
23#include "rocksdb/options.h"
494da23a 24#include "rocksdb/table.h"
7c673cae 25#include "util/coding.h"
11fdf7f2 26#include "util/compression_context_cache.h"
494da23a
TL
27#include "util/memory_allocator.h"
28#include "util/string_util.h"
7c673cae
FG
29
30#ifdef SNAPPY
31#include <snappy.h>
32#endif
33
34#ifdef ZLIB
35#include <zlib.h>
36#endif
37
38#ifdef BZIP2
39#include <bzlib.h>
40#endif
41
42#if defined(LZ4)
43#include <lz4.h>
44#include <lz4hc.h>
45#endif
46
47#if defined(ZSTD)
48#include <zstd.h>
11fdf7f2
TL
49#if ZSTD_VERSION_NUMBER >= 10103 // v1.1.3+
50#include <zdict.h>
51#endif // ZSTD_VERSION_NUMBER >= 10103
52namespace rocksdb {
53// Need this for the context allocation override
54// On windows we need to do this explicitly
55#if (ZSTD_VERSION_NUMBER >= 500)
56#if defined(ROCKSDB_JEMALLOC) && defined(OS_WIN) && \
57 defined(ZSTD_STATIC_LINKING_ONLY)
58#define ROCKSDB_ZSTD_CUSTOM_MEM
59namespace port {
60ZSTD_customMem GetJeZstdAllocationOverrides();
61} // namespace port
62#endif // defined(ROCKSDB_JEMALLOC) && defined(OS_WIN) &&
63 // defined(ZSTD_STATIC_LINKING_ONLY)
64
494da23a
TL
65// We require `ZSTD_sizeof_DDict` and `ZSTD_createDDict_byReference` to use
66// `ZSTD_DDict`. The former was introduced in v1.0.0 and the latter was
67// introduced in v1.1.3. But an important bug fix for `ZSTD_sizeof_DDict` came
68// in v1.1.4, so that is the version we require. As of today's latest version
69// (v1.3.8), they are both still in the experimental API, which means they are
70// only exported when the compiler flag `ZSTD_STATIC_LINKING_ONLY` is set.
71#if defined(ZSTD_STATIC_LINKING_ONLY) && ZSTD_VERSION_NUMBER >= 10104
72#define ROCKSDB_ZSTD_DDICT
73#endif // defined(ZSTD_STATIC_LINKING_ONLY) && ZSTD_VERSION_NUMBER >= 10104
74
11fdf7f2
TL
75// Cached data represents a portion that can be re-used
76// If, in the future we have more than one native context to
77// cache we can arrange this as a tuple
78class ZSTDUncompressCachedData {
79 public:
80 using ZSTDNativeContext = ZSTD_DCtx*;
81 ZSTDUncompressCachedData() {}
82 // Init from cache
83 ZSTDUncompressCachedData(const ZSTDUncompressCachedData& o) = delete;
84 ZSTDUncompressCachedData& operator=(const ZSTDUncompressCachedData&) = delete;
85 ZSTDUncompressCachedData(ZSTDUncompressCachedData&& o) ROCKSDB_NOEXCEPT
86 : ZSTDUncompressCachedData() {
87 *this = std::move(o);
88 }
89 ZSTDUncompressCachedData& operator=(ZSTDUncompressCachedData&& o)
90 ROCKSDB_NOEXCEPT {
91 assert(zstd_ctx_ == nullptr);
92 std::swap(zstd_ctx_, o.zstd_ctx_);
93 std::swap(cache_idx_, o.cache_idx_);
94 return *this;
95 }
96 ZSTDNativeContext Get() const { return zstd_ctx_; }
97 int64_t GetCacheIndex() const { return cache_idx_; }
98 void CreateIfNeeded() {
99 if (zstd_ctx_ == nullptr) {
100#ifdef ROCKSDB_ZSTD_CUSTOM_MEM
101 zstd_ctx_ =
102 ZSTD_createDCtx_advanced(port::GetJeZstdAllocationOverrides());
103#else // ROCKSDB_ZSTD_CUSTOM_MEM
104 zstd_ctx_ = ZSTD_createDCtx();
105#endif // ROCKSDB_ZSTD_CUSTOM_MEM
106 cache_idx_ = -1;
107 }
108 }
109 void InitFromCache(const ZSTDUncompressCachedData& o, int64_t idx) {
110 zstd_ctx_ = o.zstd_ctx_;
111 cache_idx_ = idx;
112 }
113 ~ZSTDUncompressCachedData() {
114 if (zstd_ctx_ != nullptr && cache_idx_ == -1) {
115 ZSTD_freeDCtx(zstd_ctx_);
116 }
117 }
118
119 private:
120 ZSTDNativeContext zstd_ctx_ = nullptr;
121 int64_t cache_idx_ = -1; // -1 means this instance owns the context
122};
123#endif // (ZSTD_VERSION_NUMBER >= 500)
124} // namespace rocksdb
125#endif // ZSTD
126
127#if !(defined ZSTD) || !(ZSTD_VERSION_NUMBER >= 500)
128namespace rocksdb {
129class ZSTDUncompressCachedData {
130 void* padding; // unused
131 public:
132 using ZSTDNativeContext = void*;
133 ZSTDUncompressCachedData() {}
134 ZSTDUncompressCachedData(const ZSTDUncompressCachedData&) {}
135 ZSTDUncompressCachedData& operator=(const ZSTDUncompressCachedData&) = delete;
136 ZSTDUncompressCachedData(ZSTDUncompressCachedData&&)
137 ROCKSDB_NOEXCEPT = default;
138 ZSTDUncompressCachedData& operator=(ZSTDUncompressCachedData&&)
139 ROCKSDB_NOEXCEPT = default;
140 ZSTDNativeContext Get() const { return nullptr; }
141 int64_t GetCacheIndex() const { return -1; }
142 void CreateIfNeeded() {}
143 void InitFromCache(const ZSTDUncompressCachedData&, int64_t) {}
144 private:
145 void ignore_padding__() { padding = nullptr; }
146};
147} // namespace rocksdb
7c673cae
FG
148#endif
149
150#if defined(XPRESS)
151#include "port/xpress.h"
152#endif
153
154namespace rocksdb {
155
494da23a
TL
156// Holds dictionary and related data, like ZSTD's digested compression
157// dictionary.
158struct CompressionDict {
159#if ZSTD_VERSION_NUMBER >= 700
160 ZSTD_CDict* zstd_cdict_ = nullptr;
161#endif // ZSTD_VERSION_NUMBER >= 700
162 std::string dict_;
163
164 public:
165#if ZSTD_VERSION_NUMBER >= 700
166 CompressionDict(std::string dict, CompressionType type, int level) {
167#else // ZSTD_VERSION_NUMBER >= 700
168 CompressionDict(std::string dict, CompressionType /*type*/, int /*level*/) {
169#endif // ZSTD_VERSION_NUMBER >= 700
170 dict_ = std::move(dict);
171#if ZSTD_VERSION_NUMBER >= 700
172 zstd_cdict_ = nullptr;
173 if (!dict_.empty() && (type == kZSTD || type == kZSTDNotFinalCompression)) {
174 if (level == CompressionOptions::kDefaultCompressionLevel) {
175 // 3 is the value of ZSTD_CLEVEL_DEFAULT (not exposed publicly), see
176 // https://github.com/facebook/zstd/issues/1148
177 level = 3;
178 }
179 // Should be safe (but slower) if below call fails as we'll use the
180 // raw dictionary to compress.
181 zstd_cdict_ = ZSTD_createCDict(dict_.data(), dict_.size(), level);
182 assert(zstd_cdict_ != nullptr);
183 }
184#endif // ZSTD_VERSION_NUMBER >= 700
185 }
186
187 ~CompressionDict() {
188#if ZSTD_VERSION_NUMBER >= 700
189 size_t res = 0;
190 if (zstd_cdict_ != nullptr) {
191 res = ZSTD_freeCDict(zstd_cdict_);
192 }
193 assert(res == 0); // Last I checked they can't fail
194 (void)res; // prevent unused var warning
195#endif // ZSTD_VERSION_NUMBER >= 700
196 }
197
198#if ZSTD_VERSION_NUMBER >= 700
199 const ZSTD_CDict* GetDigestedZstdCDict() const { return zstd_cdict_; }
200#endif // ZSTD_VERSION_NUMBER >= 700
201
202 Slice GetRawDict() const { return dict_; }
203
204 static const CompressionDict& GetEmptyDict() {
205 static CompressionDict empty_dict{};
206 return empty_dict;
207 }
208
209 CompressionDict() = default;
210 // Disable copy/move
211 CompressionDict(const CompressionDict&) = delete;
212 CompressionDict& operator=(const CompressionDict&) = delete;
213 CompressionDict(CompressionDict&&) = delete;
214 CompressionDict& operator=(CompressionDict&&) = delete;
215};
216
217// Holds dictionary and related data, like ZSTD's digested uncompression
218// dictionary.
219struct UncompressionDict {
220#ifdef ROCKSDB_ZSTD_DDICT
221 ZSTD_DDict* zstd_ddict_;
222#endif // ROCKSDB_ZSTD_DDICT
223 // Block containing the data for the compression dictionary. It may be
224 // redundant with the data held in `zstd_ddict_`.
225 std::string dict_;
226 // This `Statistics` pointer is intended to be used upon block cache eviction,
227 // so only needs to be populated on `UncompressionDict`s that'll be inserted
228 // into block cache.
229 Statistics* statistics_;
230
231#ifdef ROCKSDB_ZSTD_DDICT
232 UncompressionDict(std::string dict, bool using_zstd,
233 Statistics* _statistics = nullptr) {
234#else // ROCKSDB_ZSTD_DDICT
235 UncompressionDict(std::string dict, bool /*using_zstd*/,
236 Statistics* _statistics = nullptr) {
237#endif // ROCKSDB_ZSTD_DDICT
238 dict_ = std::move(dict);
239 statistics_ = _statistics;
240#ifdef ROCKSDB_ZSTD_DDICT
241 zstd_ddict_ = nullptr;
242 if (!dict_.empty() && using_zstd) {
243 zstd_ddict_ = ZSTD_createDDict_byReference(dict_.data(), dict_.size());
244 assert(zstd_ddict_ != nullptr);
245 }
246#endif // ROCKSDB_ZSTD_DDICT
247 }
248
249 ~UncompressionDict() {
250#ifdef ROCKSDB_ZSTD_DDICT
251 size_t res = 0;
252 if (zstd_ddict_ != nullptr) {
253 res = ZSTD_freeDDict(zstd_ddict_);
254 }
255 assert(res == 0); // Last I checked they can't fail
256 (void)res; // prevent unused var warning
257#endif // ROCKSDB_ZSTD_DDICT
258 }
259
260#ifdef ROCKSDB_ZSTD_DDICT
261 const ZSTD_DDict* GetDigestedZstdDDict() const { return zstd_ddict_; }
262#endif // ROCKSDB_ZSTD_DDICT
263
264 Slice GetRawDict() const { return dict_; }
265
266 static const UncompressionDict& GetEmptyDict() {
267 static UncompressionDict empty_dict{};
268 return empty_dict;
269 }
270
271 Statistics* statistics() const { return statistics_; }
272
273 size_t ApproximateMemoryUsage() {
274 size_t usage = 0;
275 usage += sizeof(struct UncompressionDict);
276#ifdef ROCKSDB_ZSTD_DDICT
277 usage += ZSTD_sizeof_DDict(zstd_ddict_);
278#endif // ROCKSDB_ZSTD_DDICT
279 usage += dict_.size();
280 return usage;
281 }
282
283 UncompressionDict() = default;
284 // Disable copy/move
285 UncompressionDict(const CompressionDict&) = delete;
286 UncompressionDict& operator=(const CompressionDict&) = delete;
287 UncompressionDict(CompressionDict&&) = delete;
288 UncompressionDict& operator=(CompressionDict&&) = delete;
289};
290
11fdf7f2
TL
291class CompressionContext {
292 private:
11fdf7f2
TL
293#if defined(ZSTD) && (ZSTD_VERSION_NUMBER >= 500)
294 ZSTD_CCtx* zstd_ctx_ = nullptr;
494da23a
TL
295 void CreateNativeContext(CompressionType type) {
296 if (type == kZSTD || type == kZSTDNotFinalCompression) {
11fdf7f2
TL
297#ifdef ROCKSDB_ZSTD_CUSTOM_MEM
298 zstd_ctx_ =
299 ZSTD_createCCtx_advanced(port::GetJeZstdAllocationOverrides());
300#else // ROCKSDB_ZSTD_CUSTOM_MEM
301 zstd_ctx_ = ZSTD_createCCtx();
302#endif // ROCKSDB_ZSTD_CUSTOM_MEM
303 }
304 }
305 void DestroyNativeContext() {
306 if (zstd_ctx_ != nullptr) {
307 ZSTD_freeCCtx(zstd_ctx_);
308 }
309 }
310
311 public:
312 // callable inside ZSTD_Compress
313 ZSTD_CCtx* ZSTDPreallocCtx() const {
494da23a 314 assert(zstd_ctx_ != nullptr);
11fdf7f2
TL
315 return zstd_ctx_;
316 }
494da23a 317
11fdf7f2
TL
318#else // ZSTD && (ZSTD_VERSION_NUMBER >= 500)
319 private:
494da23a 320 void CreateNativeContext(CompressionType /* type */) {}
11fdf7f2
TL
321 void DestroyNativeContext() {}
322#endif // ZSTD && (ZSTD_VERSION_NUMBER >= 500)
323 public:
494da23a
TL
324 explicit CompressionContext(CompressionType type) {
325 CreateNativeContext(type);
11fdf7f2
TL
326 }
327 ~CompressionContext() { DestroyNativeContext(); }
328 CompressionContext(const CompressionContext&) = delete;
329 CompressionContext& operator=(const CompressionContext&) = delete;
494da23a
TL
330};
331
332class CompressionInfo {
333 const CompressionOptions& opts_;
334 const CompressionContext& context_;
335 const CompressionDict& dict_;
336 const CompressionType type_;
337 const uint64_t sample_for_compression_;
338
339 public:
340 CompressionInfo(const CompressionOptions& _opts,
341 const CompressionContext& _context,
342 const CompressionDict& _dict, CompressionType _type,
343 uint64_t _sample_for_compression)
344 : opts_(_opts),
345 context_(_context),
346 dict_(_dict),
347 type_(_type),
348 sample_for_compression_(_sample_for_compression) {}
11fdf7f2
TL
349
350 const CompressionOptions& options() const { return opts_; }
494da23a
TL
351 const CompressionContext& context() const { return context_; }
352 const CompressionDict& dict() const { return dict_; }
11fdf7f2 353 CompressionType type() const { return type_; }
494da23a 354 uint64_t SampleForCompression() const { return sample_for_compression_; }
11fdf7f2
TL
355};
356
11fdf7f2
TL
357class UncompressionContext {
358 private:
11fdf7f2
TL
359 CompressionContextCache* ctx_cache_ = nullptr;
360 ZSTDUncompressCachedData uncomp_cached_data_;
361
362 public:
363 struct NoCache {};
364 // Do not use context cache, used by TableBuilder
494da23a
TL
365 UncompressionContext(NoCache, CompressionType /* type */) {}
366
367 explicit UncompressionContext(CompressionType type) {
368 if (type == kZSTD || type == kZSTDNotFinalCompression) {
11fdf7f2
TL
369 ctx_cache_ = CompressionContextCache::Instance();
370 uncomp_cached_data_ = ctx_cache_->GetCachedZSTDUncompressData();
371 }
372 }
373 ~UncompressionContext() {
494da23a 374 if (uncomp_cached_data_.GetCacheIndex() != -1) {
11fdf7f2
TL
375 assert(ctx_cache_ != nullptr);
376 ctx_cache_->ReturnCachedZSTDUncompressData(
377 uncomp_cached_data_.GetCacheIndex());
378 }
379 }
380 UncompressionContext(const UncompressionContext&) = delete;
381 UncompressionContext& operator=(const UncompressionContext&) = delete;
382
383 ZSTDUncompressCachedData::ZSTDNativeContext GetZSTDContext() const {
384 return uncomp_cached_data_.Get();
385 }
494da23a
TL
386};
387
388class UncompressionInfo {
389 const UncompressionContext& context_;
390 const UncompressionDict& dict_;
391 const CompressionType type_;
392
393 public:
394 UncompressionInfo(const UncompressionContext& _context,
395 const UncompressionDict& _dict, CompressionType _type)
396 : context_(_context), dict_(_dict), type_(_type) {}
397
398 const UncompressionContext& context() const { return context_; }
399 const UncompressionDict& dict() const { return dict_; }
11fdf7f2 400 CompressionType type() const { return type_; }
11fdf7f2
TL
401};
402
7c673cae
FG
403inline bool Snappy_Supported() {
404#ifdef SNAPPY
405 return true;
11fdf7f2 406#else
7c673cae 407 return false;
11fdf7f2 408#endif
7c673cae
FG
409}
410
411inline bool Zlib_Supported() {
412#ifdef ZLIB
413 return true;
11fdf7f2 414#else
7c673cae 415 return false;
11fdf7f2 416#endif
7c673cae
FG
417}
418
419inline bool BZip2_Supported() {
420#ifdef BZIP2
421 return true;
11fdf7f2 422#else
7c673cae 423 return false;
11fdf7f2 424#endif
7c673cae
FG
425}
426
427inline bool LZ4_Supported() {
428#ifdef LZ4
429 return true;
11fdf7f2 430#else
7c673cae 431 return false;
11fdf7f2 432#endif
7c673cae
FG
433}
434
435inline bool XPRESS_Supported() {
436#ifdef XPRESS
437 return true;
11fdf7f2 438#else
7c673cae 439 return false;
11fdf7f2 440#endif
7c673cae
FG
441}
442
443inline bool ZSTD_Supported() {
444#ifdef ZSTD
445 // ZSTD format is finalized since version 0.8.0.
446 return (ZSTD_versionNumber() >= 800);
11fdf7f2 447#else
7c673cae 448 return false;
11fdf7f2 449#endif
7c673cae
FG
450}
451
452inline bool ZSTDNotFinal_Supported() {
453#ifdef ZSTD
454 return true;
11fdf7f2 455#else
7c673cae 456 return false;
11fdf7f2 457#endif
7c673cae
FG
458}
459
460inline bool CompressionTypeSupported(CompressionType compression_type) {
461 switch (compression_type) {
462 case kNoCompression:
463 return true;
464 case kSnappyCompression:
465 return Snappy_Supported();
466 case kZlibCompression:
467 return Zlib_Supported();
468 case kBZip2Compression:
469 return BZip2_Supported();
470 case kLZ4Compression:
471 return LZ4_Supported();
472 case kLZ4HCCompression:
473 return LZ4_Supported();
474 case kXpressCompression:
475 return XPRESS_Supported();
476 case kZSTDNotFinalCompression:
477 return ZSTDNotFinal_Supported();
478 case kZSTD:
479 return ZSTD_Supported();
480 default:
481 assert(false);
482 return false;
483 }
484}
485
486inline std::string CompressionTypeToString(CompressionType compression_type) {
487 switch (compression_type) {
488 case kNoCompression:
489 return "NoCompression";
490 case kSnappyCompression:
491 return "Snappy";
492 case kZlibCompression:
493 return "Zlib";
494 case kBZip2Compression:
495 return "BZip2";
496 case kLZ4Compression:
497 return "LZ4";
498 case kLZ4HCCompression:
499 return "LZ4HC";
500 case kXpressCompression:
501 return "Xpress";
502 case kZSTD:
7c673cae 503 return "ZSTD";
11fdf7f2
TL
504 case kZSTDNotFinalCompression:
505 return "ZSTDNotFinal";
7c673cae
FG
506 default:
507 assert(false);
508 return "";
509 }
510}
511
494da23a
TL
512inline std::string CompressionOptionsToString(
513 CompressionOptions& compression_options) {
514 std::string result;
515 result.reserve(512);
516 result.append("window_bits=")
517 .append(ToString(compression_options.window_bits))
518 .append("; ");
519 result.append("level=")
520 .append(ToString(compression_options.level))
521 .append("; ");
522 result.append("strategy=")
523 .append(ToString(compression_options.strategy))
524 .append("; ");
525 result.append("max_dict_bytes=")
526 .append(ToString(compression_options.max_dict_bytes))
527 .append("; ");
528 result.append("zstd_max_train_bytes=")
529 .append(ToString(compression_options.zstd_max_train_bytes))
530 .append("; ");
531 result.append("enabled=")
532 .append(ToString(compression_options.enabled))
533 .append("; ");
534 return result;
535}
536
7c673cae
FG
537// compress_format_version can have two values:
538// 1 -- decompressed sizes for BZip2 and Zlib are not included in the compressed
539// block. Also, decompressed sizes for LZ4 are encoded in platform-dependent
540// way.
541// 2 -- Zlib, BZip2 and LZ4 encode decompressed size as Varint32 just before the
542// start of compressed block. Snappy format is the same as version 1.
543
494da23a
TL
544inline bool Snappy_Compress(const CompressionInfo& /*info*/, const char* input,
545 size_t length, ::std::string* output) {
7c673cae
FG
546#ifdef SNAPPY
547 output->resize(snappy::MaxCompressedLength(length));
548 size_t outlen;
549 snappy::RawCompress(input, length, &(*output)[0], &outlen);
550 output->resize(outlen);
551 return true;
11fdf7f2
TL
552#else
553 (void)input;
554 (void)length;
555 (void)output;
7c673cae 556 return false;
11fdf7f2 557#endif
7c673cae
FG
558}
559
560inline bool Snappy_GetUncompressedLength(const char* input, size_t length,
561 size_t* result) {
562#ifdef SNAPPY
563 return snappy::GetUncompressedLength(input, length, result);
564#else
11fdf7f2
TL
565 (void)input;
566 (void)length;
567 (void)result;
7c673cae
FG
568 return false;
569#endif
570}
571
11fdf7f2 572inline bool Snappy_Uncompress(const char* input, size_t length, char* output) {
7c673cae
FG
573#ifdef SNAPPY
574 return snappy::RawUncompress(input, length, output);
575#else
11fdf7f2
TL
576 (void)input;
577 (void)length;
578 (void)output;
7c673cae
FG
579 return false;
580#endif
581}
582
583namespace compression {
584// returns size
585inline size_t PutDecompressedSizeInfo(std::string* output, uint32_t length) {
586 PutVarint32(output, length);
587 return output->size();
588}
589
590inline bool GetDecompressedSizeInfo(const char** input_data,
591 size_t* input_length,
592 uint32_t* output_len) {
593 auto new_input_data =
594 GetVarint32Ptr(*input_data, *input_data + *input_length, output_len);
595 if (new_input_data == nullptr) {
596 return false;
597 }
598 *input_length -= (new_input_data - *input_data);
599 *input_data = new_input_data;
600 return true;
601}
602} // namespace compression
603
604// compress_format_version == 1 -- decompressed size is not included in the
605// block header
606// compress_format_version == 2 -- decompressed size is included in the block
607// header in varint32 format
608// @param compression_dict Data for presetting the compression library's
609// dictionary.
494da23a 610inline bool Zlib_Compress(const CompressionInfo& info,
7c673cae 611 uint32_t compress_format_version, const char* input,
11fdf7f2 612 size_t length, ::std::string* output) {
7c673cae
FG
613#ifdef ZLIB
614 if (length > std::numeric_limits<uint32_t>::max()) {
615 // Can't compress more than 4GB
616 return false;
617 }
618
619 size_t output_header_len = 0;
620 if (compress_format_version == 2) {
621 output_header_len = compression::PutDecompressedSizeInfo(
622 output, static_cast<uint32_t>(length));
623 }
624 // Resize output to be the plain data length.
625 // This may not be big enough if the compression actually expands data.
626 output->resize(output_header_len + length);
627
628 // The memLevel parameter specifies how much memory should be allocated for
629 // the internal compression state.
630 // memLevel=1 uses minimum memory but is slow and reduces compression ratio.
631 // memLevel=9 uses maximum memory for optimal speed.
632 // The default value is 8. See zconf.h for more details.
633 static const int memLevel = 8;
11fdf7f2 634 int level;
494da23a 635 if (info.options().level == CompressionOptions::kDefaultCompressionLevel) {
11fdf7f2
TL
636 level = Z_DEFAULT_COMPRESSION;
637 } else {
494da23a 638 level = info.options().level;
11fdf7f2 639 }
7c673cae
FG
640 z_stream _stream;
641 memset(&_stream, 0, sizeof(z_stream));
494da23a
TL
642 int st = deflateInit2(&_stream, level, Z_DEFLATED, info.options().window_bits,
643 memLevel, info.options().strategy);
7c673cae
FG
644 if (st != Z_OK) {
645 return false;
646 }
647
494da23a
TL
648 Slice compression_dict = info.dict().GetRawDict();
649 if (compression_dict.size()) {
7c673cae 650 // Initialize the compression library's dictionary
494da23a
TL
651 st = deflateSetDictionary(
652 &_stream, reinterpret_cast<const Bytef*>(compression_dict.data()),
653 static_cast<unsigned int>(compression_dict.size()));
7c673cae
FG
654 if (st != Z_OK) {
655 deflateEnd(&_stream);
656 return false;
657 }
658 }
659
660 // Compress the input, and put compressed data in output.
11fdf7f2 661 _stream.next_in = (Bytef*)input;
7c673cae
FG
662 _stream.avail_in = static_cast<unsigned int>(length);
663
664 // Initialize the output size.
665 _stream.avail_out = static_cast<unsigned int>(length);
666 _stream.next_out = reinterpret_cast<Bytef*>(&(*output)[output_header_len]);
667
668 bool compressed = false;
669 st = deflate(&_stream, Z_FINISH);
670 if (st == Z_STREAM_END) {
671 compressed = true;
672 output->resize(output->size() - _stream.avail_out);
673 }
674 // The only return value we really care about is Z_STREAM_END.
675 // Z_OK means insufficient output space. This means the compression is
676 // bigger than decompressed size. Just fail the compression in that case.
677
678 deflateEnd(&_stream);
679 return compressed;
11fdf7f2 680#else
494da23a 681 (void)info;
11fdf7f2
TL
682 (void)compress_format_version;
683 (void)input;
684 (void)length;
685 (void)output;
7c673cae 686 return false;
11fdf7f2 687#endif
7c673cae
FG
688}
689
690// compress_format_version == 1 -- decompressed size is not included in the
691// block header
692// compress_format_version == 2 -- decompressed size is included in the block
693// header in varint32 format
694// @param compression_dict Data for presetting the compression library's
695// dictionary.
494da23a
TL
696inline CacheAllocationPtr Zlib_Uncompress(
697 const UncompressionInfo& info, const char* input_data, size_t input_length,
698 int* decompress_size, uint32_t compress_format_version,
699 MemoryAllocator* allocator = nullptr, int windowBits = -14) {
7c673cae
FG
700#ifdef ZLIB
701 uint32_t output_len = 0;
702 if (compress_format_version == 2) {
703 if (!compression::GetDecompressedSizeInfo(&input_data, &input_length,
704 &output_len)) {
705 return nullptr;
706 }
707 } else {
708 // Assume the decompressed data size will 5x of compressed size, but round
709 // to the page size
710 size_t proposed_output_len = ((input_length * 5) & (~(4096 - 1))) + 4096;
711 output_len = static_cast<uint32_t>(
712 std::min(proposed_output_len,
713 static_cast<size_t>(std::numeric_limits<uint32_t>::max())));
714 }
715
716 z_stream _stream;
717 memset(&_stream, 0, sizeof(z_stream));
718
719 // For raw inflate, the windowBits should be -8..-15.
720 // If windowBits is bigger than zero, it will use either zlib
721 // header or gzip header. Adding 32 to it will do automatic detection.
11fdf7f2
TL
722 int st =
723 inflateInit2(&_stream, windowBits > 0 ? windowBits + 32 : windowBits);
7c673cae
FG
724 if (st != Z_OK) {
725 return nullptr;
726 }
727
494da23a
TL
728 Slice compression_dict = info.dict().GetRawDict();
729 if (compression_dict.size()) {
7c673cae 730 // Initialize the compression library's dictionary
494da23a
TL
731 st = inflateSetDictionary(
732 &_stream, reinterpret_cast<const Bytef*>(compression_dict.data()),
733 static_cast<unsigned int>(compression_dict.size()));
7c673cae
FG
734 if (st != Z_OK) {
735 return nullptr;
736 }
737 }
738
11fdf7f2 739 _stream.next_in = (Bytef*)input_data;
7c673cae
FG
740 _stream.avail_in = static_cast<unsigned int>(input_length);
741
494da23a 742 auto output = AllocateBlock(output_len, allocator);
7c673cae 743
494da23a 744 _stream.next_out = (Bytef*)output.get();
7c673cae
FG
745 _stream.avail_out = static_cast<unsigned int>(output_len);
746
747 bool done = false;
748 while (!done) {
749 st = inflate(&_stream, Z_SYNC_FLUSH);
750 switch (st) {
751 case Z_STREAM_END:
752 done = true;
753 break;
754 case Z_OK: {
755 // No output space. Increase the output space by 20%.
756 // We should never run out of output space if
757 // compress_format_version == 2
758 assert(compress_format_version != 2);
759 size_t old_sz = output_len;
11fdf7f2 760 uint32_t output_len_delta = output_len / 5;
7c673cae 761 output_len += output_len_delta < 10 ? 10 : output_len_delta;
494da23a
TL
762 auto tmp = AllocateBlock(output_len, allocator);
763 memcpy(tmp.get(), output.get(), old_sz);
764 output = std::move(tmp);
7c673cae
FG
765
766 // Set more output.
494da23a 767 _stream.next_out = (Bytef*)(output.get() + old_sz);
7c673cae
FG
768 _stream.avail_out = static_cast<unsigned int>(output_len - old_sz);
769 break;
770 }
771 case Z_BUF_ERROR:
772 default:
7c673cae
FG
773 inflateEnd(&_stream);
774 return nullptr;
775 }
776 }
777
778 // If we encoded decompressed block size, we should have no bytes left
779 assert(compress_format_version != 2 || _stream.avail_out == 0);
780 *decompress_size = static_cast<int>(output_len - _stream.avail_out);
781 inflateEnd(&_stream);
782 return output;
11fdf7f2 783#else
494da23a 784 (void)info;
11fdf7f2
TL
785 (void)input_data;
786 (void)input_length;
787 (void)decompress_size;
788 (void)compress_format_version;
494da23a 789 (void)allocator;
11fdf7f2 790 (void)windowBits;
7c673cae 791 return nullptr;
11fdf7f2 792#endif
7c673cae
FG
793}
794
795// compress_format_version == 1 -- decompressed size is not included in the
796// block header
797// compress_format_version == 2 -- decompressed size is included in the block
798// header in varint32 format
494da23a 799inline bool BZip2_Compress(const CompressionInfo& /*info*/,
11fdf7f2
TL
800 uint32_t compress_format_version, const char* input,
801 size_t length, ::std::string* output) {
7c673cae
FG
802#ifdef BZIP2
803 if (length > std::numeric_limits<uint32_t>::max()) {
804 // Can't compress more than 4GB
805 return false;
806 }
807 size_t output_header_len = 0;
808 if (compress_format_version == 2) {
809 output_header_len = compression::PutDecompressedSizeInfo(
810 output, static_cast<uint32_t>(length));
811 }
812 // Resize output to be the plain data length.
813 // This may not be big enough if the compression actually expands data.
814 output->resize(output_header_len + length);
815
7c673cae
FG
816 bz_stream _stream;
817 memset(&_stream, 0, sizeof(bz_stream));
818
819 // Block size 1 is 100K.
820 // 0 is for silent.
821 // 30 is the default workFactor
822 int st = BZ2_bzCompressInit(&_stream, 1, 0, 30);
823 if (st != BZ_OK) {
824 return false;
825 }
826
827 // Compress the input, and put compressed data in output.
11fdf7f2 828 _stream.next_in = (char*)input;
7c673cae
FG
829 _stream.avail_in = static_cast<unsigned int>(length);
830
831 // Initialize the output size.
832 _stream.avail_out = static_cast<unsigned int>(length);
833 _stream.next_out = reinterpret_cast<char*>(&(*output)[output_header_len]);
834
835 bool compressed = false;
836 st = BZ2_bzCompress(&_stream, BZ_FINISH);
837 if (st == BZ_STREAM_END) {
838 compressed = true;
839 output->resize(output->size() - _stream.avail_out);
840 }
841 // The only return value we really care about is BZ_STREAM_END.
842 // BZ_FINISH_OK means insufficient output space. This means the compression
843 // is bigger than decompressed size. Just fail the compression in that case.
844
845 BZ2_bzCompressEnd(&_stream);
846 return compressed;
11fdf7f2
TL
847#else
848 (void)compress_format_version;
849 (void)input;
850 (void)length;
851 (void)output;
7c673cae 852 return false;
11fdf7f2 853#endif
7c673cae
FG
854}
855
856// compress_format_version == 1 -- decompressed size is not included in the
857// block header
858// compress_format_version == 2 -- decompressed size is included in the block
859// header in varint32 format
494da23a
TL
860inline CacheAllocationPtr BZip2_Uncompress(
861 const char* input_data, size_t input_length, int* decompress_size,
862 uint32_t compress_format_version, MemoryAllocator* allocator = nullptr) {
7c673cae
FG
863#ifdef BZIP2
864 uint32_t output_len = 0;
865 if (compress_format_version == 2) {
866 if (!compression::GetDecompressedSizeInfo(&input_data, &input_length,
867 &output_len)) {
868 return nullptr;
869 }
870 } else {
871 // Assume the decompressed data size will 5x of compressed size, but round
872 // to the next page size
873 size_t proposed_output_len = ((input_length * 5) & (~(4096 - 1))) + 4096;
874 output_len = static_cast<uint32_t>(
875 std::min(proposed_output_len,
876 static_cast<size_t>(std::numeric_limits<uint32_t>::max())));
877 }
878
879 bz_stream _stream;
880 memset(&_stream, 0, sizeof(bz_stream));
881
882 int st = BZ2_bzDecompressInit(&_stream, 0, 0);
883 if (st != BZ_OK) {
884 return nullptr;
885 }
886
11fdf7f2 887 _stream.next_in = (char*)input_data;
7c673cae
FG
888 _stream.avail_in = static_cast<unsigned int>(input_length);
889
494da23a 890 auto output = AllocateBlock(output_len, allocator);
7c673cae 891
494da23a 892 _stream.next_out = (char*)output.get();
7c673cae
FG
893 _stream.avail_out = static_cast<unsigned int>(output_len);
894
895 bool done = false;
896 while (!done) {
897 st = BZ2_bzDecompress(&_stream);
898 switch (st) {
899 case BZ_STREAM_END:
900 done = true;
901 break;
902 case BZ_OK: {
903 // No output space. Increase the output space by 20%.
904 // We should never run out of output space if
905 // compress_format_version == 2
906 assert(compress_format_version != 2);
907 uint32_t old_sz = output_len;
908 output_len = output_len * 1.2;
494da23a
TL
909 auto tmp = AllocateBlock(output_len, allocator);
910 memcpy(tmp.get(), output.get(), old_sz);
911 output = std::move(tmp);
7c673cae
FG
912
913 // Set more output.
494da23a 914 _stream.next_out = (char*)(output.get() + old_sz);
7c673cae
FG
915 _stream.avail_out = static_cast<unsigned int>(output_len - old_sz);
916 break;
917 }
918 default:
7c673cae
FG
919 BZ2_bzDecompressEnd(&_stream);
920 return nullptr;
921 }
922 }
923
924 // If we encoded decompressed block size, we should have no bytes left
925 assert(compress_format_version != 2 || _stream.avail_out == 0);
926 *decompress_size = static_cast<int>(output_len - _stream.avail_out);
927 BZ2_bzDecompressEnd(&_stream);
928 return output;
11fdf7f2
TL
929#else
930 (void)input_data;
931 (void)input_length;
932 (void)decompress_size;
933 (void)compress_format_version;
494da23a 934 (void)allocator;
7c673cae 935 return nullptr;
11fdf7f2 936#endif
7c673cae
FG
937}
938
939// compress_format_version == 1 -- decompressed size is included in the
940// block header using memcpy, which makes database non-portable)
941// compress_format_version == 2 -- decompressed size is included in the block
942// header in varint32 format
943// @param compression_dict Data for presetting the compression library's
944// dictionary.
494da23a 945inline bool LZ4_Compress(const CompressionInfo& info,
7c673cae 946 uint32_t compress_format_version, const char* input,
11fdf7f2 947 size_t length, ::std::string* output) {
7c673cae
FG
948#ifdef LZ4
949 if (length > std::numeric_limits<uint32_t>::max()) {
950 // Can't compress more than 4GB
951 return false;
952 }
953
954 size_t output_header_len = 0;
955 if (compress_format_version == 2) {
956 // new encoding, using varint32 to store size information
957 output_header_len = compression::PutDecompressedSizeInfo(
958 output, static_cast<uint32_t>(length));
959 } else {
960 // legacy encoding, which is not really portable (depends on big/little
961 // endianness)
962 output_header_len = 8;
963 output->resize(output_header_len);
964 char* p = const_cast<char*>(output->c_str());
965 memcpy(p, &length, sizeof(length));
966 }
967 int compress_bound = LZ4_compressBound(static_cast<int>(length));
968 output->resize(static_cast<size_t>(output_header_len + compress_bound));
969
970 int outlen;
971#if LZ4_VERSION_NUMBER >= 10400 // r124+
972 LZ4_stream_t* stream = LZ4_createStream();
494da23a
TL
973 Slice compression_dict = info.dict().GetRawDict();
974 if (compression_dict.size()) {
975 LZ4_loadDict(stream, compression_dict.data(),
976 static_cast<int>(compression_dict.size()));
7c673cae
FG
977 }
978#if LZ4_VERSION_NUMBER >= 10700 // r129+
11fdf7f2
TL
979 outlen =
980 LZ4_compress_fast_continue(stream, input, &(*output)[output_header_len],
981 static_cast<int>(length), compress_bound, 1);
7c673cae
FG
982#else // up to r128
983 outlen = LZ4_compress_limitedOutput_continue(
984 stream, input, &(*output)[output_header_len], static_cast<int>(length),
985 compress_bound);
986#endif
987 LZ4_freeStream(stream);
988#else // up to r123
989 outlen = LZ4_compress_limitedOutput(input, &(*output)[output_header_len],
990 static_cast<int>(length), compress_bound);
494da23a 991 (void)ctx;
7c673cae
FG
992#endif // LZ4_VERSION_NUMBER >= 10400
993
994 if (outlen == 0) {
995 return false;
996 }
997 output->resize(static_cast<size_t>(output_header_len + outlen));
998 return true;
11fdf7f2 999#else // LZ4
494da23a 1000 (void)info;
11fdf7f2
TL
1001 (void)compress_format_version;
1002 (void)input;
1003 (void)length;
1004 (void)output;
7c673cae 1005 return false;
11fdf7f2 1006#endif
7c673cae
FG
1007}
1008
1009// compress_format_version == 1 -- decompressed size is included in the
1010// block header using memcpy, which makes database non-portable)
1011// compress_format_version == 2 -- decompressed size is included in the block
1012// header in varint32 format
1013// @param compression_dict Data for presetting the compression library's
1014// dictionary.
494da23a
TL
1015inline CacheAllocationPtr LZ4_Uncompress(const UncompressionInfo& info,
1016 const char* input_data,
1017 size_t input_length,
1018 int* decompress_size,
1019 uint32_t compress_format_version,
1020 MemoryAllocator* allocator = nullptr) {
7c673cae
FG
1021#ifdef LZ4
1022 uint32_t output_len = 0;
1023 if (compress_format_version == 2) {
1024 // new encoding, using varint32 to store size information
1025 if (!compression::GetDecompressedSizeInfo(&input_data, &input_length,
1026 &output_len)) {
1027 return nullptr;
1028 }
1029 } else {
1030 // legacy encoding, which is not really portable (depends on big/little
1031 // endianness)
1032 if (input_length < 8) {
1033 return nullptr;
1034 }
1035 memcpy(&output_len, input_data, sizeof(output_len));
1036 input_length -= 8;
1037 input_data += 8;
1038 }
1039
494da23a 1040 auto output = AllocateBlock(output_len, allocator);
7c673cae
FG
1041#if LZ4_VERSION_NUMBER >= 10400 // r124+
1042 LZ4_streamDecode_t* stream = LZ4_createStreamDecode();
494da23a
TL
1043 Slice compression_dict = info.dict().GetRawDict();
1044 if (compression_dict.size()) {
1045 LZ4_setStreamDecode(stream, compression_dict.data(),
1046 static_cast<int>(compression_dict.size()));
7c673cae
FG
1047 }
1048 *decompress_size = LZ4_decompress_safe_continue(
494da23a 1049 stream, input_data, output.get(), static_cast<int>(input_length),
7c673cae
FG
1050 static_cast<int>(output_len));
1051 LZ4_freeStreamDecode(stream);
1052#else // up to r123
494da23a
TL
1053 *decompress_size = LZ4_decompress_safe(input_data, output.get(),
1054 static_cast<int>(input_length),
1055 static_cast<int>(output_len));
1056 (void)ctx;
7c673cae
FG
1057#endif // LZ4_VERSION_NUMBER >= 10400
1058
1059 if (*decompress_size < 0) {
7c673cae
FG
1060 return nullptr;
1061 }
1062 assert(*decompress_size == static_cast<int>(output_len));
1063 return output;
11fdf7f2 1064#else // LZ4
494da23a 1065 (void)info;
11fdf7f2
TL
1066 (void)input_data;
1067 (void)input_length;
1068 (void)decompress_size;
1069 (void)compress_format_version;
494da23a 1070 (void)allocator;
7c673cae 1071 return nullptr;
11fdf7f2 1072#endif
7c673cae
FG
1073}
1074
1075// compress_format_version == 1 -- decompressed size is included in the
1076// block header using memcpy, which makes database non-portable)
1077// compress_format_version == 2 -- decompressed size is included in the block
1078// header in varint32 format
1079// @param compression_dict Data for presetting the compression library's
1080// dictionary.
494da23a 1081inline bool LZ4HC_Compress(const CompressionInfo& info,
7c673cae 1082 uint32_t compress_format_version, const char* input,
11fdf7f2 1083 size_t length, ::std::string* output) {
7c673cae
FG
1084#ifdef LZ4
1085 if (length > std::numeric_limits<uint32_t>::max()) {
1086 // Can't compress more than 4GB
1087 return false;
1088 }
1089
1090 size_t output_header_len = 0;
1091 if (compress_format_version == 2) {
1092 // new encoding, using varint32 to store size information
1093 output_header_len = compression::PutDecompressedSizeInfo(
1094 output, static_cast<uint32_t>(length));
1095 } else {
1096 // legacy encoding, which is not really portable (depends on big/little
1097 // endianness)
1098 output_header_len = 8;
1099 output->resize(output_header_len);
1100 char* p = const_cast<char*>(output->c_str());
1101 memcpy(p, &length, sizeof(length));
1102 }
1103 int compress_bound = LZ4_compressBound(static_cast<int>(length));
1104 output->resize(static_cast<size_t>(output_header_len + compress_bound));
1105
1106 int outlen;
11fdf7f2 1107 int level;
494da23a 1108 if (info.options().level == CompressionOptions::kDefaultCompressionLevel) {
11fdf7f2
TL
1109 level = 0; // lz4hc.h says any value < 1 will be sanitized to default
1110 } else {
494da23a 1111 level = info.options().level;
11fdf7f2 1112 }
7c673cae
FG
1113#if LZ4_VERSION_NUMBER >= 10400 // r124+
1114 LZ4_streamHC_t* stream = LZ4_createStreamHC();
11fdf7f2 1115 LZ4_resetStreamHC(stream, level);
494da23a 1116 Slice compression_dict = info.dict().GetRawDict();
7c673cae 1117 const char* compression_dict_data =
494da23a
TL
1118 compression_dict.size() > 0 ? compression_dict.data() : nullptr;
1119 size_t compression_dict_size = compression_dict.size();
7c673cae
FG
1120 LZ4_loadDictHC(stream, compression_dict_data,
1121 static_cast<int>(compression_dict_size));
1122
1123#if LZ4_VERSION_NUMBER >= 10700 // r129+
1124 outlen =
1125 LZ4_compress_HC_continue(stream, input, &(*output)[output_header_len],
1126 static_cast<int>(length), compress_bound);
1127#else // r124-r128
1128 outlen = LZ4_compressHC_limitedOutput_continue(
1129 stream, input, &(*output)[output_header_len], static_cast<int>(length),
1130 compress_bound);
1131#endif // LZ4_VERSION_NUMBER >= 10700
1132 LZ4_freeStreamHC(stream);
1133
1134#elif LZ4_VERSION_MAJOR // r113-r123
1135 outlen = LZ4_compressHC2_limitedOutput(input, &(*output)[output_header_len],
1136 static_cast<int>(length),
11fdf7f2 1137 compress_bound, level);
7c673cae
FG
1138#else // up to r112
1139 outlen =
1140 LZ4_compressHC_limitedOutput(input, &(*output)[output_header_len],
1141 static_cast<int>(length), compress_bound);
1142#endif // LZ4_VERSION_NUMBER >= 10400
1143
1144 if (outlen == 0) {
1145 return false;
1146 }
1147 output->resize(static_cast<size_t>(output_header_len + outlen));
1148 return true;
11fdf7f2 1149#else // LZ4
494da23a 1150 (void)info;
11fdf7f2
TL
1151 (void)compress_format_version;
1152 (void)input;
1153 (void)length;
1154 (void)output;
7c673cae 1155 return false;
11fdf7f2 1156#endif
7c673cae
FG
1157}
1158
7c673cae 1159#ifdef XPRESS
11fdf7f2
TL
1160inline bool XPRESS_Compress(const char* input, size_t length,
1161 std::string* output) {
7c673cae 1162 return port::xpress::Compress(input, length, output);
11fdf7f2
TL
1163}
1164#else
1165inline bool XPRESS_Compress(const char* /*input*/, size_t /*length*/,
1166 std::string* /*output*/) {
7c673cae
FG
1167 return false;
1168}
11fdf7f2 1169#endif
7c673cae 1170
7c673cae 1171#ifdef XPRESS
11fdf7f2
TL
1172inline char* XPRESS_Uncompress(const char* input_data, size_t input_length,
1173 int* decompress_size) {
7c673cae 1174 return port::xpress::Decompress(input_data, input_length, decompress_size);
11fdf7f2
TL
1175}
1176#else
1177inline char* XPRESS_Uncompress(const char* /*input_data*/,
1178 size_t /*input_length*/,
1179 int* /*decompress_size*/) {
7c673cae
FG
1180 return nullptr;
1181}
11fdf7f2 1182#endif
7c673cae 1183
494da23a 1184inline bool ZSTD_Compress(const CompressionInfo& info, const char* input,
11fdf7f2 1185 size_t length, ::std::string* output) {
7c673cae
FG
1186#ifdef ZSTD
1187 if (length > std::numeric_limits<uint32_t>::max()) {
1188 // Can't compress more than 4GB
1189 return false;
1190 }
1191
1192 size_t output_header_len = compression::PutDecompressedSizeInfo(
1193 output, static_cast<uint32_t>(length));
1194
1195 size_t compressBound = ZSTD_compressBound(length);
1196 output->resize(static_cast<size_t>(output_header_len + compressBound));
11fdf7f2
TL
1197 size_t outlen = 0;
1198 int level;
494da23a 1199 if (info.options().level == CompressionOptions::kDefaultCompressionLevel) {
11fdf7f2
TL
1200 // 3 is the value of ZSTD_CLEVEL_DEFAULT (not exposed publicly), see
1201 // https://github.com/facebook/zstd/issues/1148
1202 level = 3;
1203 } else {
494da23a 1204 level = info.options().level;
11fdf7f2 1205 }
7c673cae 1206#if ZSTD_VERSION_NUMBER >= 500 // v0.5.0+
494da23a 1207 ZSTD_CCtx* context = info.context().ZSTDPreallocCtx();
11fdf7f2 1208 assert(context != nullptr);
494da23a
TL
1209#if ZSTD_VERSION_NUMBER >= 700 // v0.7.0+
1210 if (info.dict().GetDigestedZstdCDict() != nullptr) {
1211 outlen = ZSTD_compress_usingCDict(context, &(*output)[output_header_len],
1212 compressBound, input, length,
1213 info.dict().GetDigestedZstdCDict());
1214 }
1215#endif // ZSTD_VERSION_NUMBER >= 700
1216 if (outlen == 0) {
1217 outlen = ZSTD_compress_usingDict(context, &(*output)[output_header_len],
1218 compressBound, input, length,
1219 info.dict().GetRawDict().data(),
1220 info.dict().GetRawDict().size(), level);
1221 }
11fdf7f2 1222#else // up to v0.4.x
7c673cae 1223 outlen = ZSTD_compress(&(*output)[output_header_len], compressBound, input,
11fdf7f2 1224 length, level);
7c673cae
FG
1225#endif // ZSTD_VERSION_NUMBER >= 500
1226 if (outlen == 0) {
1227 return false;
1228 }
1229 output->resize(output_header_len + outlen);
1230 return true;
11fdf7f2 1231#else // ZSTD
494da23a 1232 (void)info;
11fdf7f2
TL
1233 (void)input;
1234 (void)length;
1235 (void)output;
7c673cae 1236 return false;
11fdf7f2 1237#endif
7c673cae
FG
1238}
1239
1240// @param compression_dict Data for presetting the compression library's
1241// dictionary.
494da23a
TL
1242inline CacheAllocationPtr ZSTD_Uncompress(
1243 const UncompressionInfo& info, const char* input_data, size_t input_length,
1244 int* decompress_size, MemoryAllocator* allocator = nullptr) {
7c673cae
FG
1245#ifdef ZSTD
1246 uint32_t output_len = 0;
1247 if (!compression::GetDecompressedSizeInfo(&input_data, &input_length,
1248 &output_len)) {
1249 return nullptr;
1250 }
1251
494da23a
TL
1252 auto output = AllocateBlock(output_len, allocator);
1253 size_t actual_output_length = 0;
7c673cae 1254#if ZSTD_VERSION_NUMBER >= 500 // v0.5.0+
494da23a 1255 ZSTD_DCtx* context = info.context().GetZSTDContext();
11fdf7f2 1256 assert(context != nullptr);
494da23a
TL
1257#ifdef ROCKSDB_ZSTD_DDICT
1258 if (info.dict().GetDigestedZstdDDict() != nullptr) {
1259 actual_output_length = ZSTD_decompress_usingDDict(
1260 context, output.get(), output_len, input_data, input_length,
1261 info.dict().GetDigestedZstdDDict());
1262 }
1263#endif // ROCKSDB_ZSTD_DDICT
1264 if (actual_output_length == 0) {
1265 actual_output_length = ZSTD_decompress_usingDict(
1266 context, output.get(), output_len, input_data, input_length,
1267 info.dict().GetRawDict().data(), info.dict().GetRawDict().size());
1268 }
11fdf7f2 1269#else // up to v0.4.x
494da23a 1270 (void)info;
7c673cae 1271 actual_output_length =
494da23a 1272 ZSTD_decompress(output.get(), output_len, input_data, input_length);
7c673cae
FG
1273#endif // ZSTD_VERSION_NUMBER >= 500
1274 assert(actual_output_length == output_len);
1275 *decompress_size = static_cast<int>(actual_output_length);
1276 return output;
11fdf7f2 1277#else // ZSTD
494da23a 1278 (void)info;
11fdf7f2
TL
1279 (void)input_data;
1280 (void)input_length;
1281 (void)decompress_size;
494da23a 1282 (void)allocator;
7c673cae 1283 return nullptr;
11fdf7f2
TL
1284#endif
1285}
1286
494da23a
TL
1287inline bool ZSTD_TrainDictionarySupported() {
1288#ifdef ZSTD
1289 // Dictionary trainer is available since v0.6.1 for static linking, but not
1290 // available for dynamic linking until v1.1.3. For now we enable the feature
1291 // in v1.1.3+ only.
1292 return (ZSTD_versionNumber() >= 10103);
1293#else
1294 return false;
1295#endif
1296}
1297
11fdf7f2
TL
1298inline std::string ZSTD_TrainDictionary(const std::string& samples,
1299 const std::vector<size_t>& sample_lens,
1300 size_t max_dict_bytes) {
1301 // Dictionary trainer is available since v0.6.1 for static linking, but not
1302 // available for dynamic linking until v1.1.3. For now we enable the feature
1303 // in v1.1.3+ only.
1304#if ZSTD_VERSION_NUMBER >= 10103 // v1.1.3+
494da23a
TL
1305 assert(samples.empty() == sample_lens.empty());
1306 if (samples.empty()) {
1307 return "";
1308 }
11fdf7f2
TL
1309 std::string dict_data(max_dict_bytes, '\0');
1310 size_t dict_len = ZDICT_trainFromBuffer(
1311 &dict_data[0], max_dict_bytes, &samples[0], &sample_lens[0],
1312 static_cast<unsigned>(sample_lens.size()));
1313 if (ZDICT_isError(dict_len)) {
1314 return "";
1315 }
1316 assert(dict_len <= max_dict_bytes);
1317 dict_data.resize(dict_len);
1318 return dict_data;
1319#else // up to v1.1.2
1320 assert(false);
1321 (void)samples;
1322 (void)sample_lens;
1323 (void)max_dict_bytes;
1324 return "";
1325#endif // ZSTD_VERSION_NUMBER >= 10103
1326}
1327
1328inline std::string ZSTD_TrainDictionary(const std::string& samples,
1329 size_t sample_len_shift,
1330 size_t max_dict_bytes) {
1331 // Dictionary trainer is available since v0.6.1, but ZSTD was marked stable
1332 // only since v0.8.0. For now we enable the feature in stable versions only.
1333#if ZSTD_VERSION_NUMBER >= 10103 // v1.1.3+
1334 // skips potential partial sample at the end of "samples"
1335 size_t num_samples = samples.size() >> sample_len_shift;
1336 std::vector<size_t> sample_lens(num_samples, size_t(1) << sample_len_shift);
1337 return ZSTD_TrainDictionary(samples, sample_lens, max_dict_bytes);
1338#else // up to v1.1.2
1339 assert(false);
1340 (void)samples;
1341 (void)sample_len_shift;
1342 (void)max_dict_bytes;
1343 return "";
1344#endif // ZSTD_VERSION_NUMBER >= 10103
7c673cae
FG
1345}
1346
1347} // namespace rocksdb