]> git.proxmox.com Git - ceph.git/blame - ceph/src/rocksdb/db/column_family.cc
import quincy beta 17.1.0
[ceph.git] / ceph / src / rocksdb / db / column_family.cc
CommitLineData
7c673cae 1// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
11fdf7f2
TL
2// This source code is licensed under both the GPLv2 (found in the
3// COPYING file in the root directory) and Apache 2.0 License
4// (found in the LICENSE.Apache file in the root directory).
7c673cae
FG
5//
6// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
7// Use of this source code is governed by a BSD-style license that can be
8// found in the LICENSE file. See the AUTHORS file for names of contributors.
9
10#include "db/column_family.h"
11
7c673cae 12#include <algorithm>
f67539c2 13#include <cinttypes>
7c673cae 14#include <limits>
20effc67 15#include <sstream>
f67539c2
TL
16#include <string>
17#include <vector>
7c673cae 18
20effc67 19#include "db/blob/blob_file_cache.h"
f67539c2
TL
20#include "db/compaction/compaction_picker.h"
21#include "db/compaction/compaction_picker_fifo.h"
22#include "db/compaction/compaction_picker_level.h"
23#include "db/compaction/compaction_picker_universal.h"
24#include "db/db_impl/db_impl.h"
7c673cae
FG
25#include "db/internal_stats.h"
26#include "db/job_context.h"
494da23a 27#include "db/range_del_aggregator.h"
7c673cae
FG
28#include "db/table_properties_collector.h"
29#include "db/version_set.h"
30#include "db/write_controller.h"
f67539c2 31#include "file/sst_file_manager_impl.h"
7c673cae
FG
32#include "memtable/hash_skiplist_rep.h"
33#include "monitoring/thread_status_util.h"
34#include "options/options_helper.h"
f67539c2 35#include "port/port.h"
20effc67 36#include "rocksdb/table.h"
11fdf7f2 37#include "table/merging_iterator.h"
7c673cae 38#include "util/autovector.h"
20effc67 39#include "util/cast_util.h"
7c673cae
FG
40#include "util/compression.h"
41
f67539c2 42namespace ROCKSDB_NAMESPACE {
7c673cae
FG
43
44ColumnFamilyHandleImpl::ColumnFamilyHandleImpl(
45 ColumnFamilyData* column_family_data, DBImpl* db, InstrumentedMutex* mutex)
46 : cfd_(column_family_data), db_(db), mutex_(mutex) {
47 if (cfd_ != nullptr) {
48 cfd_->Ref();
49 }
50}
51
52ColumnFamilyHandleImpl::~ColumnFamilyHandleImpl() {
53 if (cfd_ != nullptr) {
54#ifndef ROCKSDB_LITE
55 for (auto& listener : cfd_->ioptions()->listeners) {
56 listener->OnColumnFamilyHandleDeletionStarted(this);
57 }
58#endif // ROCKSDB_LITE
59 // Job id == 0 means that this is not our background process, but rather
60 // user thread
11fdf7f2
TL
61 // Need to hold some shared pointers owned by the initial_cf_options
62 // before final cleaning up finishes.
63 ColumnFamilyOptions initial_cf_options_copy = cfd_->initial_cf_options();
7c673cae
FG
64 JobContext job_context(0);
65 mutex_->Lock();
f67539c2
TL
66 bool dropped = cfd_->IsDropped();
67 if (cfd_->UnrefAndTryDelete()) {
68 if (dropped) {
69 db_->FindObsoleteFiles(&job_context, false, true);
70 }
7c673cae 71 }
7c673cae
FG
72 mutex_->Unlock();
73 if (job_context.HaveSomethingToDelete()) {
494da23a
TL
74 bool defer_purge =
75 db_->immutable_db_options().avoid_unnecessary_blocking_io;
76 db_->PurgeObsoleteFiles(job_context, defer_purge);
77 if (defer_purge) {
78 mutex_->Lock();
79 db_->SchedulePurge();
80 mutex_->Unlock();
81 }
7c673cae
FG
82 }
83 job_context.Clean();
84 }
85}
86
87uint32_t ColumnFamilyHandleImpl::GetID() const { return cfd()->GetID(); }
88
89const std::string& ColumnFamilyHandleImpl::GetName() const {
90 return cfd()->GetName();
91}
92
93Status ColumnFamilyHandleImpl::GetDescriptor(ColumnFamilyDescriptor* desc) {
94#ifndef ROCKSDB_LITE
95 // accessing mutable cf-options requires db mutex.
96 InstrumentedMutexLock l(mutex_);
97 *desc = ColumnFamilyDescriptor(cfd()->GetName(), cfd()->GetLatestCFOptions());
98 return Status::OK();
99#else
11fdf7f2 100 (void)desc;
7c673cae
FG
101 return Status::NotSupported();
102#endif // !ROCKSDB_LITE
103}
104
105const Comparator* ColumnFamilyHandleImpl::GetComparator() const {
106 return cfd()->user_comparator();
107}
108
109void GetIntTblPropCollectorFactory(
110 const ImmutableCFOptions& ioptions,
111 std::vector<std::unique_ptr<IntTblPropCollectorFactory>>*
112 int_tbl_prop_collector_factories) {
113 auto& collector_factories = ioptions.table_properties_collector_factories;
114 for (size_t i = 0; i < ioptions.table_properties_collector_factories.size();
115 ++i) {
116 assert(collector_factories[i]);
117 int_tbl_prop_collector_factories->emplace_back(
118 new UserKeyTablePropertiesCollectorFactory(collector_factories[i]));
119 }
7c673cae
FG
120}
121
122Status CheckCompressionSupported(const ColumnFamilyOptions& cf_options) {
123 if (!cf_options.compression_per_level.empty()) {
124 for (size_t level = 0; level < cf_options.compression_per_level.size();
125 ++level) {
126 if (!CompressionTypeSupported(cf_options.compression_per_level[level])) {
127 return Status::InvalidArgument(
128 "Compression type " +
129 CompressionTypeToString(cf_options.compression_per_level[level]) +
130 " is not linked with the binary.");
131 }
132 }
133 } else {
134 if (!CompressionTypeSupported(cf_options.compression)) {
135 return Status::InvalidArgument(
136 "Compression type " +
137 CompressionTypeToString(cf_options.compression) +
138 " is not linked with the binary.");
139 }
140 }
11fdf7f2 141 if (cf_options.compression_opts.zstd_max_train_bytes > 0) {
494da23a 142 if (!ZSTD_TrainDictionarySupported()) {
11fdf7f2 143 return Status::InvalidArgument(
494da23a
TL
144 "zstd dictionary trainer cannot be used because ZSTD 1.1.3+ "
145 "is not linked with the binary.");
11fdf7f2
TL
146 }
147 if (cf_options.compression_opts.max_dict_bytes == 0) {
148 return Status::InvalidArgument(
149 "The dictionary size limit (`CompressionOptions::max_dict_bytes`) "
150 "should be nonzero if we're using zstd's dictionary generator.");
151 }
152 }
20effc67
TL
153
154 if (!CompressionTypeSupported(cf_options.blob_compression_type)) {
155 std::ostringstream oss;
156 oss << "The specified blob compression type "
157 << CompressionTypeToString(cf_options.blob_compression_type)
158 << " is not available.";
159
160 return Status::InvalidArgument(oss.str());
161 }
162
7c673cae
FG
163 return Status::OK();
164}
165
166Status CheckConcurrentWritesSupported(const ColumnFamilyOptions& cf_options) {
167 if (cf_options.inplace_update_support) {
168 return Status::InvalidArgument(
169 "In-place memtable updates (inplace_update_support) is not compatible "
170 "with concurrent writes (allow_concurrent_memtable_write)");
171 }
172 if (!cf_options.memtable_factory->IsInsertConcurrentlySupported()) {
173 return Status::InvalidArgument(
174 "Memtable doesn't concurrent writes (allow_concurrent_memtable_write)");
175 }
176 return Status::OK();
177}
178
11fdf7f2
TL
179Status CheckCFPathsSupported(const DBOptions& db_options,
180 const ColumnFamilyOptions& cf_options) {
181 // More than one cf_paths are supported only in universal
182 // and level compaction styles. This function also checks the case
183 // in which cf_paths is not specified, which results in db_paths
184 // being used.
185 if ((cf_options.compaction_style != kCompactionStyleUniversal) &&
186 (cf_options.compaction_style != kCompactionStyleLevel)) {
187 if (cf_options.cf_paths.size() > 1) {
188 return Status::NotSupported(
189 "More than one CF paths are only supported in "
190 "universal and level compaction styles. ");
191 } else if (cf_options.cf_paths.empty() &&
192 db_options.db_paths.size() > 1) {
193 return Status::NotSupported(
194 "More than one DB paths are only supported in "
195 "universal and level compaction styles. ");
196 }
197 }
198 return Status::OK();
199}
200
f67539c2
TL
201namespace {
202const uint64_t kDefaultTtl = 0xfffffffffffffffe;
203const uint64_t kDefaultPeriodicCompSecs = 0xfffffffffffffffe;
204}; // namespace
205
7c673cae
FG
206ColumnFamilyOptions SanitizeOptions(const ImmutableDBOptions& db_options,
207 const ColumnFamilyOptions& src) {
208 ColumnFamilyOptions result = src;
209 size_t clamp_max = std::conditional<
210 sizeof(size_t) == 4, std::integral_constant<size_t, 0xffffffff>,
211 std::integral_constant<uint64_t, 64ull << 30>>::type::value;
212 ClipToRange(&result.write_buffer_size, ((size_t)64) << 10, clamp_max);
213 // if user sets arena_block_size, we trust user to use this value. Otherwise,
214 // calculate a proper value from writer_buffer_size;
215 if (result.arena_block_size <= 0) {
216 result.arena_block_size = result.write_buffer_size / 8;
217
218 // Align up to 4k
219 const size_t align = 4 * 1024;
220 result.arena_block_size =
221 ((result.arena_block_size + align - 1) / align) * align;
222 }
223 result.min_write_buffer_number_to_merge =
224 std::min(result.min_write_buffer_number_to_merge,
225 result.max_write_buffer_number - 1);
226 if (result.min_write_buffer_number_to_merge < 1) {
227 result.min_write_buffer_number_to_merge = 1;
228 }
229
230 if (result.num_levels < 1) {
231 result.num_levels = 1;
232 }
233 if (result.compaction_style == kCompactionStyleLevel &&
234 result.num_levels < 2) {
235 result.num_levels = 2;
236 }
11fdf7f2
TL
237
238 if (result.compaction_style == kCompactionStyleUniversal &&
239 db_options.allow_ingest_behind && result.num_levels < 3) {
240 result.num_levels = 3;
241 }
242
7c673cae
FG
243 if (result.max_write_buffer_number < 2) {
244 result.max_write_buffer_number = 2;
245 }
f67539c2
TL
246 // fall back max_write_buffer_number_to_maintain if
247 // max_write_buffer_size_to_maintain is not set
248 if (result.max_write_buffer_size_to_maintain < 0) {
249 result.max_write_buffer_size_to_maintain =
250 result.max_write_buffer_number *
251 static_cast<int64_t>(result.write_buffer_size);
252 } else if (result.max_write_buffer_size_to_maintain == 0 &&
253 result.max_write_buffer_number_to_maintain < 0) {
7c673cae
FG
254 result.max_write_buffer_number_to_maintain = result.max_write_buffer_number;
255 }
256 // bloom filter size shouldn't exceed 1/4 of memtable size.
257 if (result.memtable_prefix_bloom_size_ratio > 0.25) {
258 result.memtable_prefix_bloom_size_ratio = 0.25;
259 } else if (result.memtable_prefix_bloom_size_ratio < 0) {
260 result.memtable_prefix_bloom_size_ratio = 0;
261 }
262
263 if (!result.prefix_extractor) {
264 assert(result.memtable_factory);
265 Slice name = result.memtable_factory->Name();
266 if (name.compare("HashSkipListRepFactory") == 0 ||
267 name.compare("HashLinkListRepFactory") == 0) {
268 result.memtable_factory = std::make_shared<SkipListFactory>();
269 }
270 }
271
272 if (result.compaction_style == kCompactionStyleFIFO) {
273 result.num_levels = 1;
274 // since we delete level0 files in FIFO compaction when there are too many
275 // of them, these options don't really mean anything
7c673cae
FG
276 result.level0_slowdown_writes_trigger = std::numeric_limits<int>::max();
277 result.level0_stop_writes_trigger = std::numeric_limits<int>::max();
278 }
279
280 if (result.max_bytes_for_level_multiplier <= 0) {
281 result.max_bytes_for_level_multiplier = 1;
282 }
283
284 if (result.level0_file_num_compaction_trigger == 0) {
285 ROCKS_LOG_WARN(db_options.info_log.get(),
286 "level0_file_num_compaction_trigger cannot be 0");
287 result.level0_file_num_compaction_trigger = 1;
288 }
289
290 if (result.level0_stop_writes_trigger <
291 result.level0_slowdown_writes_trigger ||
292 result.level0_slowdown_writes_trigger <
293 result.level0_file_num_compaction_trigger) {
294 ROCKS_LOG_WARN(db_options.info_log.get(),
295 "This condition must be satisfied: "
296 "level0_stop_writes_trigger(%d) >= "
297 "level0_slowdown_writes_trigger(%d) >= "
298 "level0_file_num_compaction_trigger(%d)",
299 result.level0_stop_writes_trigger,
300 result.level0_slowdown_writes_trigger,
301 result.level0_file_num_compaction_trigger);
302 if (result.level0_slowdown_writes_trigger <
303 result.level0_file_num_compaction_trigger) {
304 result.level0_slowdown_writes_trigger =
305 result.level0_file_num_compaction_trigger;
306 }
307 if (result.level0_stop_writes_trigger <
308 result.level0_slowdown_writes_trigger) {
309 result.level0_stop_writes_trigger = result.level0_slowdown_writes_trigger;
310 }
311 ROCKS_LOG_WARN(db_options.info_log.get(),
312 "Adjust the value to "
313 "level0_stop_writes_trigger(%d)"
314 "level0_slowdown_writes_trigger(%d)"
315 "level0_file_num_compaction_trigger(%d)",
316 result.level0_stop_writes_trigger,
317 result.level0_slowdown_writes_trigger,
318 result.level0_file_num_compaction_trigger);
319 }
320
321 if (result.soft_pending_compaction_bytes_limit == 0) {
322 result.soft_pending_compaction_bytes_limit =
323 result.hard_pending_compaction_bytes_limit;
324 } else if (result.hard_pending_compaction_bytes_limit > 0 &&
325 result.soft_pending_compaction_bytes_limit >
326 result.hard_pending_compaction_bytes_limit) {
327 result.soft_pending_compaction_bytes_limit =
328 result.hard_pending_compaction_bytes_limit;
329 }
330
11fdf7f2
TL
331#ifndef ROCKSDB_LITE
332 // When the DB is stopped, it's possible that there are some .trash files that
333 // were not deleted yet, when we open the DB we will find these .trash files
334 // and schedule them to be deleted (or delete immediately if SstFileManager
335 // was not used)
336 auto sfm = static_cast<SstFileManagerImpl*>(db_options.sst_file_manager.get());
337 for (size_t i = 0; i < result.cf_paths.size(); i++) {
338 DeleteScheduler::CleanupDirectory(db_options.env, sfm, result.cf_paths[i].path);
339 }
340#endif
341
342 if (result.cf_paths.empty()) {
343 result.cf_paths = db_options.db_paths;
344 }
345
7c673cae
FG
346 if (result.level_compaction_dynamic_level_bytes) {
347 if (result.compaction_style != kCompactionStyleLevel ||
11fdf7f2 348 result.cf_paths.size() > 1U) {
7c673cae
FG
349 // 1. level_compaction_dynamic_level_bytes only makes sense for
350 // level-based compaction.
351 // 2. we don't yet know how to make both of this feature and multiple
352 // DB path work.
353 result.level_compaction_dynamic_level_bytes = false;
354 }
355 }
356
357 if (result.max_compaction_bytes == 0) {
358 result.max_compaction_bytes = result.target_file_size_base * 25;
359 }
360
20effc67
TL
361 bool is_block_based_table = (result.table_factory->IsInstanceOf(
362 TableFactory::kBlockBasedTableName()));
f67539c2
TL
363
364 const uint64_t kAdjustedTtl = 30 * 24 * 60 * 60;
365 if (result.ttl == kDefaultTtl) {
366 if (is_block_based_table &&
367 result.compaction_style != kCompactionStyleFIFO) {
368 result.ttl = kAdjustedTtl;
369 } else {
370 result.ttl = 0;
371 }
372 }
373
374 const uint64_t kAdjustedPeriodicCompSecs = 30 * 24 * 60 * 60;
375
376 // Turn on periodic compactions and set them to occur once every 30 days if
377 // compaction filters are used and periodic_compaction_seconds is set to the
378 // default value.
379 if (result.compaction_style != kCompactionStyleFIFO) {
380 if ((result.compaction_filter != nullptr ||
381 result.compaction_filter_factory != nullptr) &&
382 result.periodic_compaction_seconds == kDefaultPeriodicCompSecs &&
383 is_block_based_table) {
384 result.periodic_compaction_seconds = kAdjustedPeriodicCompSecs;
385 }
386 } else {
387 // result.compaction_style == kCompactionStyleFIFO
388 if (result.ttl == 0) {
389 if (is_block_based_table) {
390 if (result.periodic_compaction_seconds == kDefaultPeriodicCompSecs) {
391 result.periodic_compaction_seconds = kAdjustedPeriodicCompSecs;
392 }
393 result.ttl = result.periodic_compaction_seconds;
394 }
395 } else if (result.periodic_compaction_seconds != 0) {
396 result.ttl = std::min(result.ttl, result.periodic_compaction_seconds);
397 }
398 }
399
400 // TTL compactions would work similar to Periodic Compactions in Universal in
401 // most of the cases. So, if ttl is set, execute the periodic compaction
402 // codepath.
403 if (result.compaction_style == kCompactionStyleUniversal && result.ttl != 0) {
404 if (result.periodic_compaction_seconds != 0) {
405 result.periodic_compaction_seconds =
406 std::min(result.ttl, result.periodic_compaction_seconds);
407 } else {
408 result.periodic_compaction_seconds = result.ttl;
409 }
410 }
411
412 if (result.periodic_compaction_seconds == kDefaultPeriodicCompSecs) {
413 result.periodic_compaction_seconds = 0;
414 }
415
7c673cae
FG
416 return result;
417}
418
419int SuperVersion::dummy = 0;
420void* const SuperVersion::kSVInUse = &SuperVersion::dummy;
421void* const SuperVersion::kSVObsolete = nullptr;
422
423SuperVersion::~SuperVersion() {
424 for (auto td : to_delete) {
425 delete td;
426 }
427}
428
429SuperVersion* SuperVersion::Ref() {
430 refs.fetch_add(1, std::memory_order_relaxed);
431 return this;
432}
433
434bool SuperVersion::Unref() {
435 // fetch_sub returns the previous value of ref
436 uint32_t previous_refs = refs.fetch_sub(1);
437 assert(previous_refs > 0);
438 return previous_refs == 1;
439}
440
441void SuperVersion::Cleanup() {
442 assert(refs.load(std::memory_order_relaxed) == 0);
443 imm->Unref(&to_delete);
444 MemTable* m = mem->Unref();
445 if (m != nullptr) {
446 auto* memory_usage = current->cfd()->imm()->current_memory_usage();
447 assert(*memory_usage >= m->ApproximateMemoryUsage());
448 *memory_usage -= m->ApproximateMemoryUsage();
449 to_delete.push_back(m);
450 }
451 current->Unref();
f67539c2
TL
452 if (cfd->Unref()) {
453 delete cfd;
454 }
7c673cae
FG
455}
456
f67539c2
TL
457void SuperVersion::Init(ColumnFamilyData* new_cfd, MemTable* new_mem,
458 MemTableListVersion* new_imm, Version* new_current) {
459 cfd = new_cfd;
7c673cae
FG
460 mem = new_mem;
461 imm = new_imm;
462 current = new_current;
f67539c2 463 cfd->Ref();
7c673cae
FG
464 mem->Ref();
465 imm->Ref();
466 current->Ref();
467 refs.store(1, std::memory_order_relaxed);
468}
469
470namespace {
471void SuperVersionUnrefHandle(void* ptr) {
472 // UnrefHandle is called when a thread exists or a ThreadLocalPtr gets
473 // destroyed. When former happens, the thread shouldn't see kSVInUse.
474 // When latter happens, we are in ~ColumnFamilyData(), no get should happen as
475 // well.
476 SuperVersion* sv = static_cast<SuperVersion*>(ptr);
11fdf7f2
TL
477 bool was_last_ref __attribute__((__unused__));
478 was_last_ref = sv->Unref();
479 // Thread-local SuperVersions can't outlive ColumnFamilyData::super_version_.
480 // This is important because we can't do SuperVersion cleanup here.
481 // That would require locking DB mutex, which would deadlock because
482 // SuperVersionUnrefHandle is called with locked ThreadLocalPtr mutex.
483 assert(!was_last_ref);
7c673cae
FG
484}
485} // anonymous namespace
486
20effc67
TL
487std::vector<std::string> ColumnFamilyData::GetDbPaths() const {
488 std::vector<std::string> paths;
489 paths.reserve(ioptions_.cf_paths.size());
490 for (const DbPath& db_path : ioptions_.cf_paths) {
491 paths.emplace_back(db_path.path);
492 }
493 return paths;
494}
495
496const uint32_t ColumnFamilyData::kDummyColumnFamilyDataId = port::kMaxUint32;
497
7c673cae
FG
498ColumnFamilyData::ColumnFamilyData(
499 uint32_t id, const std::string& name, Version* _dummy_versions,
500 Cache* _table_cache, WriteBufferManager* write_buffer_manager,
501 const ColumnFamilyOptions& cf_options, const ImmutableDBOptions& db_options,
f67539c2 502 const FileOptions& file_options, ColumnFamilySet* column_family_set,
20effc67
TL
503 BlockCacheTracer* const block_cache_tracer,
504 const std::shared_ptr<IOTracer>& io_tracer)
7c673cae
FG
505 : id_(id),
506 name_(name),
507 dummy_versions_(_dummy_versions),
508 current_(nullptr),
509 refs_(0),
11fdf7f2 510 initialized_(false),
7c673cae
FG
511 dropped_(false),
512 internal_comparator_(cf_options.comparator),
513 initial_cf_options_(SanitizeOptions(db_options, cf_options)),
514 ioptions_(db_options, initial_cf_options_),
515 mutable_cf_options_(initial_cf_options_),
11fdf7f2
TL
516 is_delete_range_supported_(
517 cf_options.table_factory->IsDeleteRangeSupported()),
7c673cae
FG
518 write_buffer_manager_(write_buffer_manager),
519 mem_(nullptr),
520 imm_(ioptions_.min_write_buffer_number_to_merge,
f67539c2
TL
521 ioptions_.max_write_buffer_number_to_maintain,
522 ioptions_.max_write_buffer_size_to_maintain),
7c673cae
FG
523 super_version_(nullptr),
524 super_version_number_(0),
525 local_sv_(new ThreadLocalPtr(&SuperVersionUnrefHandle)),
526 next_(nullptr),
527 prev_(nullptr),
528 log_number_(0),
11fdf7f2 529 flush_reason_(FlushReason::kOthers),
7c673cae 530 column_family_set_(column_family_set),
11fdf7f2
TL
531 queued_for_flush_(false),
532 queued_for_compaction_(false),
7c673cae 533 prev_compaction_needed_bytes_(0),
11fdf7f2 534 allow_2pc_(db_options.allow_2pc),
20effc67
TL
535 last_memtable_id_(0),
536 db_paths_registered_(false) {
537 if (id_ != kDummyColumnFamilyDataId) {
538 // TODO(cc): RegisterDbPaths can be expensive, considering moving it
539 // outside of this constructor which might be called with db mutex held.
540 // TODO(cc): considering using ioptions_.fs, currently some tests rely on
541 // EnvWrapper, that's the main reason why we use env here.
542 Status s = ioptions_.env->RegisterDbPaths(GetDbPaths());
543 if (s.ok()) {
544 db_paths_registered_ = true;
545 } else {
546 ROCKS_LOG_ERROR(
547 ioptions_.info_log,
548 "Failed to register data paths of column family (id: %d, name: %s)",
549 id_, name_.c_str());
550 }
551 }
7c673cae
FG
552 Ref();
553
554 // Convert user defined table properties collector factories to internal ones.
555 GetIntTblPropCollectorFactory(ioptions_, &int_tbl_prop_collector_factories_);
556
557 // if _dummy_versions is nullptr, then this is a dummy column family.
558 if (_dummy_versions != nullptr) {
559 internal_stats_.reset(
560 new InternalStats(ioptions_.num_levels, db_options.env, this));
f67539c2 561 table_cache_.reset(new TableCache(ioptions_, file_options, _table_cache,
20effc67
TL
562 block_cache_tracer, io_tracer));
563 blob_file_cache_.reset(
564 new BlobFileCache(_table_cache, ioptions(), soptions(), id_,
565 internal_stats_->GetBlobFileReadHist()));
566
7c673cae
FG
567 if (ioptions_.compaction_style == kCompactionStyleLevel) {
568 compaction_picker_.reset(
569 new LevelCompactionPicker(ioptions_, &internal_comparator_));
570#ifndef ROCKSDB_LITE
571 } else if (ioptions_.compaction_style == kCompactionStyleUniversal) {
572 compaction_picker_.reset(
573 new UniversalCompactionPicker(ioptions_, &internal_comparator_));
574 } else if (ioptions_.compaction_style == kCompactionStyleFIFO) {
575 compaction_picker_.reset(
576 new FIFOCompactionPicker(ioptions_, &internal_comparator_));
577 } else if (ioptions_.compaction_style == kCompactionStyleNone) {
578 compaction_picker_.reset(new NullCompactionPicker(
579 ioptions_, &internal_comparator_));
580 ROCKS_LOG_WARN(ioptions_.info_log,
581 "Column family %s does not use any background compaction. "
582 "Compactions can only be done via CompactFiles\n",
583 GetName().c_str());
584#endif // !ROCKSDB_LITE
585 } else {
586 ROCKS_LOG_ERROR(ioptions_.info_log,
587 "Unable to recognize the specified compaction style %d. "
588 "Column family %s will use kCompactionStyleLevel.\n",
589 ioptions_.compaction_style, GetName().c_str());
590 compaction_picker_.reset(
591 new LevelCompactionPicker(ioptions_, &internal_comparator_));
592 }
593
594 if (column_family_set_->NumberOfColumnFamilies() < 10) {
595 ROCKS_LOG_INFO(ioptions_.info_log,
596 "--------------- Options for column family [%s]:\n",
597 name.c_str());
598 initial_cf_options_.Dump(ioptions_.info_log);
599 } else {
600 ROCKS_LOG_INFO(ioptions_.info_log, "\t(skipping printing options)\n");
601 }
602 }
603
604 RecalculateWriteStallConditions(mutable_cf_options_);
605}
606
607// DB mutex held
608ColumnFamilyData::~ColumnFamilyData() {
609 assert(refs_.load(std::memory_order_relaxed) == 0);
610 // remove from linked list
611 auto prev = prev_;
612 auto next = next_;
613 prev->next_ = next;
614 next->prev_ = prev;
615
616 if (!dropped_ && column_family_set_ != nullptr) {
617 // If it's dropped, it's already removed from column family set
618 // If column_family_set_ == nullptr, this is dummy CFD and not in
619 // ColumnFamilySet
620 column_family_set_->RemoveColumnFamily(this);
621 }
622
623 if (current_ != nullptr) {
624 current_->Unref();
625 }
626
627 // It would be wrong if this ColumnFamilyData is in flush_queue_ or
628 // compaction_queue_ and we destroyed it
11fdf7f2
TL
629 assert(!queued_for_flush_);
630 assert(!queued_for_compaction_);
f67539c2 631 assert(super_version_ == nullptr);
7c673cae
FG
632
633 if (dummy_versions_ != nullptr) {
634 // List must be empty
635 assert(dummy_versions_->TEST_Next() == dummy_versions_);
11fdf7f2
TL
636 bool deleted __attribute__((__unused__));
637 deleted = dummy_versions_->Unref();
7c673cae
FG
638 assert(deleted);
639 }
640
641 if (mem_ != nullptr) {
642 delete mem_->Unref();
643 }
644 autovector<MemTable*> to_delete;
645 imm_.current()->Unref(&to_delete);
646 for (MemTable* m : to_delete) {
647 delete m;
648 }
20effc67
TL
649
650 if (db_paths_registered_) {
651 // TODO(cc): considering using ioptions_.fs, currently some tests rely on
652 // EnvWrapper, that's the main reason why we use env here.
653 Status s = ioptions_.env->UnregisterDbPaths(GetDbPaths());
654 if (!s.ok()) {
655 ROCKS_LOG_ERROR(
656 ioptions_.info_log,
657 "Failed to unregister data paths of column family (id: %d, name: %s)",
658 id_, name_.c_str());
659 }
660 }
7c673cae
FG
661}
662
f67539c2
TL
663bool ColumnFamilyData::UnrefAndTryDelete() {
664 int old_refs = refs_.fetch_sub(1);
665 assert(old_refs > 0);
666
667 if (old_refs == 1) {
668 assert(super_version_ == nullptr);
669 delete this;
670 return true;
671 }
672
673 if (old_refs == 2 && super_version_ != nullptr) {
674 // Only the super_version_ holds me
675 SuperVersion* sv = super_version_;
676 super_version_ = nullptr;
677 // Release SuperVersion reference kept in ThreadLocalPtr.
678 // This must be done outside of mutex_ since unref handler can lock mutex.
679 sv->db_mutex->Unlock();
680 local_sv_.reset();
681 sv->db_mutex->Lock();
682
683 if (sv->Unref()) {
684 // May delete this ColumnFamilyData after calling Cleanup()
685 sv->Cleanup();
686 delete sv;
687 return true;
688 }
689 }
690 return false;
691}
692
7c673cae
FG
693void ColumnFamilyData::SetDropped() {
694 // can't drop default CF
695 assert(id_ != 0);
696 dropped_ = true;
697 write_controller_token_.reset();
698
699 // remove from column_family_set
700 column_family_set_->RemoveColumnFamily(this);
701}
702
703ColumnFamilyOptions ColumnFamilyData::GetLatestCFOptions() const {
704 return BuildColumnFamilyOptions(initial_cf_options_, mutable_cf_options_);
705}
706
707uint64_t ColumnFamilyData::OldestLogToKeep() {
708 auto current_log = GetLogNumber();
709
710 if (allow_2pc_) {
11fdf7f2
TL
711 autovector<MemTable*> empty_list;
712 auto imm_prep_log =
713 imm()->PrecomputeMinLogContainingPrepSection(empty_list);
7c673cae
FG
714 auto mem_prep_log = mem()->GetMinLogContainingPrepSection();
715
716 if (imm_prep_log > 0 && imm_prep_log < current_log) {
717 current_log = imm_prep_log;
718 }
719
720 if (mem_prep_log > 0 && mem_prep_log < current_log) {
721 current_log = mem_prep_log;
722 }
723 }
724
725 return current_log;
726}
727
728const double kIncSlowdownRatio = 0.8;
729const double kDecSlowdownRatio = 1 / kIncSlowdownRatio;
730const double kNearStopSlowdownRatio = 0.6;
731const double kDelayRecoverSlowdownRatio = 1.4;
732
733namespace {
734// If penalize_stop is true, we further reduce slowdown rate.
735std::unique_ptr<WriteControllerToken> SetupDelay(
736 WriteController* write_controller, uint64_t compaction_needed_bytes,
737 uint64_t prev_compaction_need_bytes, bool penalize_stop,
738 bool auto_comapctions_disabled) {
739 const uint64_t kMinWriteRate = 16 * 1024u; // Minimum write rate 16KB/s.
740
741 uint64_t max_write_rate = write_controller->max_delayed_write_rate();
742 uint64_t write_rate = write_controller->delayed_write_rate();
743
744 if (auto_comapctions_disabled) {
745 // When auto compaction is disabled, always use the value user gave.
746 write_rate = max_write_rate;
747 } else if (write_controller->NeedsDelay() && max_write_rate > kMinWriteRate) {
748 // If user gives rate less than kMinWriteRate, don't adjust it.
749 //
750 // If already delayed, need to adjust based on previous compaction debt.
751 // When there are two or more column families require delay, we always
752 // increase or reduce write rate based on information for one single
753 // column family. It is likely to be OK but we can improve if there is a
754 // problem.
755 // Ignore compaction_needed_bytes = 0 case because compaction_needed_bytes
756 // is only available in level-based compaction
757 //
758 // If the compaction debt stays the same as previously, we also further slow
759 // down. It usually means a mem table is full. It's mainly for the case
760 // where both of flush and compaction are much slower than the speed we
761 // insert to mem tables, so we need to actively slow down before we get
762 // feedback signal from compaction and flushes to avoid the full stop
763 // because of hitting the max write buffer number.
764 //
765 // If DB just falled into the stop condition, we need to further reduce
766 // the write rate to avoid the stop condition.
767 if (penalize_stop) {
11fdf7f2 768 // Penalize the near stop or stop condition by more aggressive slowdown.
7c673cae
FG
769 // This is to provide the long term slowdown increase signal.
770 // The penalty is more than the reward of recovering to the normal
771 // condition.
772 write_rate = static_cast<uint64_t>(static_cast<double>(write_rate) *
773 kNearStopSlowdownRatio);
774 if (write_rate < kMinWriteRate) {
775 write_rate = kMinWriteRate;
776 }
777 } else if (prev_compaction_need_bytes > 0 &&
778 prev_compaction_need_bytes <= compaction_needed_bytes) {
779 write_rate = static_cast<uint64_t>(static_cast<double>(write_rate) *
780 kIncSlowdownRatio);
781 if (write_rate < kMinWriteRate) {
782 write_rate = kMinWriteRate;
783 }
784 } else if (prev_compaction_need_bytes > compaction_needed_bytes) {
785 // We are speeding up by ratio of kSlowdownRatio when we have paid
786 // compaction debt. But we'll never speed up to faster than the write rate
787 // given by users.
788 write_rate = static_cast<uint64_t>(static_cast<double>(write_rate) *
789 kDecSlowdownRatio);
790 if (write_rate > max_write_rate) {
791 write_rate = max_write_rate;
792 }
793 }
794 }
795 return write_controller->GetDelayToken(write_rate);
796}
797
798int GetL0ThresholdSpeedupCompaction(int level0_file_num_compaction_trigger,
799 int level0_slowdown_writes_trigger) {
800 // SanitizeOptions() ensures it.
801 assert(level0_file_num_compaction_trigger <= level0_slowdown_writes_trigger);
802
803 if (level0_file_num_compaction_trigger < 0) {
804 return std::numeric_limits<int>::max();
805 }
806
807 const int64_t twice_level0_trigger =
808 static_cast<int64_t>(level0_file_num_compaction_trigger) * 2;
809
810 const int64_t one_fourth_trigger_slowdown =
811 static_cast<int64_t>(level0_file_num_compaction_trigger) +
812 ((level0_slowdown_writes_trigger - level0_file_num_compaction_trigger) /
813 4);
814
815 assert(twice_level0_trigger >= 0);
816 assert(one_fourth_trigger_slowdown >= 0);
817
818 // 1/4 of the way between L0 compaction trigger threshold and slowdown
819 // condition.
820 // Or twice as compaction trigger, if it is smaller.
821 int64_t res = std::min(twice_level0_trigger, one_fourth_trigger_slowdown);
822 if (res >= port::kMaxInt32) {
823 return port::kMaxInt32;
824 } else {
825 // res fits in int
826 return static_cast<int>(res);
827 }
828}
829} // namespace
830
11fdf7f2
TL
831std::pair<WriteStallCondition, ColumnFamilyData::WriteStallCause>
832ColumnFamilyData::GetWriteStallConditionAndCause(
833 int num_unflushed_memtables, int num_l0_files,
834 uint64_t num_compaction_needed_bytes,
835 const MutableCFOptions& mutable_cf_options) {
836 if (num_unflushed_memtables >= mutable_cf_options.max_write_buffer_number) {
837 return {WriteStallCondition::kStopped, WriteStallCause::kMemtableLimit};
838 } else if (!mutable_cf_options.disable_auto_compactions &&
839 num_l0_files >= mutable_cf_options.level0_stop_writes_trigger) {
840 return {WriteStallCondition::kStopped, WriteStallCause::kL0FileCountLimit};
841 } else if (!mutable_cf_options.disable_auto_compactions &&
842 mutable_cf_options.hard_pending_compaction_bytes_limit > 0 &&
843 num_compaction_needed_bytes >=
844 mutable_cf_options.hard_pending_compaction_bytes_limit) {
845 return {WriteStallCondition::kStopped,
846 WriteStallCause::kPendingCompactionBytes};
847 } else if (mutable_cf_options.max_write_buffer_number > 3 &&
848 num_unflushed_memtables >=
849 mutable_cf_options.max_write_buffer_number - 1) {
850 return {WriteStallCondition::kDelayed, WriteStallCause::kMemtableLimit};
851 } else if (!mutable_cf_options.disable_auto_compactions &&
852 mutable_cf_options.level0_slowdown_writes_trigger >= 0 &&
853 num_l0_files >=
854 mutable_cf_options.level0_slowdown_writes_trigger) {
855 return {WriteStallCondition::kDelayed, WriteStallCause::kL0FileCountLimit};
856 } else if (!mutable_cf_options.disable_auto_compactions &&
857 mutable_cf_options.soft_pending_compaction_bytes_limit > 0 &&
858 num_compaction_needed_bytes >=
859 mutable_cf_options.soft_pending_compaction_bytes_limit) {
860 return {WriteStallCondition::kDelayed,
861 WriteStallCause::kPendingCompactionBytes};
862 }
863 return {WriteStallCondition::kNormal, WriteStallCause::kNone};
864}
865
866WriteStallCondition ColumnFamilyData::RecalculateWriteStallConditions(
7c673cae 867 const MutableCFOptions& mutable_cf_options) {
11fdf7f2 868 auto write_stall_condition = WriteStallCondition::kNormal;
7c673cae
FG
869 if (current_ != nullptr) {
870 auto* vstorage = current_->storage_info();
871 auto write_controller = column_family_set_->write_controller_;
872 uint64_t compaction_needed_bytes =
873 vstorage->estimated_compaction_needed_bytes();
874
11fdf7f2
TL
875 auto write_stall_condition_and_cause = GetWriteStallConditionAndCause(
876 imm()->NumNotFlushed(), vstorage->l0_delay_trigger_count(),
877 vstorage->estimated_compaction_needed_bytes(), mutable_cf_options);
878 write_stall_condition = write_stall_condition_and_cause.first;
879 auto write_stall_cause = write_stall_condition_and_cause.second;
880
7c673cae
FG
881 bool was_stopped = write_controller->IsStopped();
882 bool needed_delay = write_controller->NeedsDelay();
883
11fdf7f2
TL
884 if (write_stall_condition == WriteStallCondition::kStopped &&
885 write_stall_cause == WriteStallCause::kMemtableLimit) {
7c673cae 886 write_controller_token_ = write_controller->GetStopToken();
11fdf7f2 887 internal_stats_->AddCFStats(InternalStats::MEMTABLE_LIMIT_STOPS, 1);
7c673cae
FG
888 ROCKS_LOG_WARN(
889 ioptions_.info_log,
890 "[%s] Stopping writes because we have %d immutable memtables "
891 "(waiting for flush), max_write_buffer_number is set to %d",
892 name_.c_str(), imm()->NumNotFlushed(),
893 mutable_cf_options.max_write_buffer_number);
11fdf7f2
TL
894 } else if (write_stall_condition == WriteStallCondition::kStopped &&
895 write_stall_cause == WriteStallCause::kL0FileCountLimit) {
7c673cae 896 write_controller_token_ = write_controller->GetStopToken();
11fdf7f2 897 internal_stats_->AddCFStats(InternalStats::L0_FILE_COUNT_LIMIT_STOPS, 1);
7c673cae
FG
898 if (compaction_picker_->IsLevel0CompactionInProgress()) {
899 internal_stats_->AddCFStats(
11fdf7f2 900 InternalStats::LOCKED_L0_FILE_COUNT_LIMIT_STOPS, 1);
7c673cae
FG
901 }
902 ROCKS_LOG_WARN(ioptions_.info_log,
903 "[%s] Stopping writes because we have %d level-0 files",
904 name_.c_str(), vstorage->l0_delay_trigger_count());
11fdf7f2
TL
905 } else if (write_stall_condition == WriteStallCondition::kStopped &&
906 write_stall_cause == WriteStallCause::kPendingCompactionBytes) {
7c673cae
FG
907 write_controller_token_ = write_controller->GetStopToken();
908 internal_stats_->AddCFStats(
11fdf7f2 909 InternalStats::PENDING_COMPACTION_BYTES_LIMIT_STOPS, 1);
7c673cae
FG
910 ROCKS_LOG_WARN(
911 ioptions_.info_log,
912 "[%s] Stopping writes because of estimated pending compaction "
913 "bytes %" PRIu64,
914 name_.c_str(), compaction_needed_bytes);
11fdf7f2
TL
915 } else if (write_stall_condition == WriteStallCondition::kDelayed &&
916 write_stall_cause == WriteStallCause::kMemtableLimit) {
7c673cae
FG
917 write_controller_token_ =
918 SetupDelay(write_controller, compaction_needed_bytes,
919 prev_compaction_needed_bytes_, was_stopped,
920 mutable_cf_options.disable_auto_compactions);
11fdf7f2 921 internal_stats_->AddCFStats(InternalStats::MEMTABLE_LIMIT_SLOWDOWNS, 1);
7c673cae
FG
922 ROCKS_LOG_WARN(
923 ioptions_.info_log,
924 "[%s] Stalling writes because we have %d immutable memtables "
925 "(waiting for flush), max_write_buffer_number is set to %d "
926 "rate %" PRIu64,
927 name_.c_str(), imm()->NumNotFlushed(),
928 mutable_cf_options.max_write_buffer_number,
929 write_controller->delayed_write_rate());
11fdf7f2
TL
930 } else if (write_stall_condition == WriteStallCondition::kDelayed &&
931 write_stall_cause == WriteStallCause::kL0FileCountLimit) {
7c673cae
FG
932 // L0 is the last two files from stopping.
933 bool near_stop = vstorage->l0_delay_trigger_count() >=
934 mutable_cf_options.level0_stop_writes_trigger - 2;
935 write_controller_token_ =
936 SetupDelay(write_controller, compaction_needed_bytes,
937 prev_compaction_needed_bytes_, was_stopped || near_stop,
938 mutable_cf_options.disable_auto_compactions);
11fdf7f2
TL
939 internal_stats_->AddCFStats(InternalStats::L0_FILE_COUNT_LIMIT_SLOWDOWNS,
940 1);
7c673cae
FG
941 if (compaction_picker_->IsLevel0CompactionInProgress()) {
942 internal_stats_->AddCFStats(
11fdf7f2 943 InternalStats::LOCKED_L0_FILE_COUNT_LIMIT_SLOWDOWNS, 1);
7c673cae
FG
944 }
945 ROCKS_LOG_WARN(ioptions_.info_log,
946 "[%s] Stalling writes because we have %d level-0 files "
947 "rate %" PRIu64,
948 name_.c_str(), vstorage->l0_delay_trigger_count(),
949 write_controller->delayed_write_rate());
11fdf7f2
TL
950 } else if (write_stall_condition == WriteStallCondition::kDelayed &&
951 write_stall_cause == WriteStallCause::kPendingCompactionBytes) {
7c673cae
FG
952 // If the distance to hard limit is less than 1/4 of the gap between soft
953 // and
954 // hard bytes limit, we think it is near stop and speed up the slowdown.
955 bool near_stop =
956 mutable_cf_options.hard_pending_compaction_bytes_limit > 0 &&
957 (compaction_needed_bytes -
958 mutable_cf_options.soft_pending_compaction_bytes_limit) >
959 3 * (mutable_cf_options.hard_pending_compaction_bytes_limit -
960 mutable_cf_options.soft_pending_compaction_bytes_limit) /
961 4;
962
963 write_controller_token_ =
964 SetupDelay(write_controller, compaction_needed_bytes,
965 prev_compaction_needed_bytes_, was_stopped || near_stop,
966 mutable_cf_options.disable_auto_compactions);
967 internal_stats_->AddCFStats(
11fdf7f2 968 InternalStats::PENDING_COMPACTION_BYTES_LIMIT_SLOWDOWNS, 1);
7c673cae
FG
969 ROCKS_LOG_WARN(
970 ioptions_.info_log,
971 "[%s] Stalling writes because of estimated pending compaction "
972 "bytes %" PRIu64 " rate %" PRIu64,
973 name_.c_str(), vstorage->estimated_compaction_needed_bytes(),
974 write_controller->delayed_write_rate());
975 } else {
11fdf7f2 976 assert(write_stall_condition == WriteStallCondition::kNormal);
7c673cae
FG
977 if (vstorage->l0_delay_trigger_count() >=
978 GetL0ThresholdSpeedupCompaction(
979 mutable_cf_options.level0_file_num_compaction_trigger,
980 mutable_cf_options.level0_slowdown_writes_trigger)) {
981 write_controller_token_ =
982 write_controller->GetCompactionPressureToken();
11fdf7f2 983 ROCKS_LOG_INFO(
7c673cae
FG
984 ioptions_.info_log,
985 "[%s] Increasing compaction threads because we have %d level-0 "
986 "files ",
987 name_.c_str(), vstorage->l0_delay_trigger_count());
988 } else if (vstorage->estimated_compaction_needed_bytes() >=
989 mutable_cf_options.soft_pending_compaction_bytes_limit / 4) {
990 // Increase compaction threads if bytes needed for compaction exceeds
991 // 1/4 of threshold for slowing down.
992 // If soft pending compaction byte limit is not set, always speed up
993 // compaction.
994 write_controller_token_ =
995 write_controller->GetCompactionPressureToken();
996 if (mutable_cf_options.soft_pending_compaction_bytes_limit > 0) {
11fdf7f2 997 ROCKS_LOG_INFO(
7c673cae
FG
998 ioptions_.info_log,
999 "[%s] Increasing compaction threads because of estimated pending "
1000 "compaction "
1001 "bytes %" PRIu64,
1002 name_.c_str(), vstorage->estimated_compaction_needed_bytes());
1003 }
1004 } else {
1005 write_controller_token_.reset();
1006 }
1007 // If the DB recovers from delay conditions, we reward with reducing
1008 // double the slowdown ratio. This is to balance the long term slowdown
1009 // increase signal.
1010 if (needed_delay) {
1011 uint64_t write_rate = write_controller->delayed_write_rate();
1012 write_controller->set_delayed_write_rate(static_cast<uint64_t>(
1013 static_cast<double>(write_rate) * kDelayRecoverSlowdownRatio));
11fdf7f2
TL
1014 // Set the low pri limit to be 1/4 the delayed write rate.
1015 // Note we don't reset this value even after delay condition is relased.
1016 // Low-pri rate will continue to apply if there is a compaction
1017 // pressure.
1018 write_controller->low_pri_rate_limiter()->SetBytesPerSecond(write_rate /
1019 4);
7c673cae
FG
1020 }
1021 }
1022 prev_compaction_needed_bytes_ = compaction_needed_bytes;
1023 }
11fdf7f2 1024 return write_stall_condition;
7c673cae
FG
1025}
1026
f67539c2
TL
1027const FileOptions* ColumnFamilyData::soptions() const {
1028 return &(column_family_set_->file_options_);
7c673cae
FG
1029}
1030
1031void ColumnFamilyData::SetCurrent(Version* current_version) {
1032 current_ = current_version;
1033}
1034
1035uint64_t ColumnFamilyData::GetNumLiveVersions() const {
1036 return VersionSet::GetNumLiveVersions(dummy_versions_);
1037}
1038
1039uint64_t ColumnFamilyData::GetTotalSstFilesSize() const {
1040 return VersionSet::GetTotalSstFilesSize(dummy_versions_);
1041}
1042
11fdf7f2
TL
1043uint64_t ColumnFamilyData::GetLiveSstFilesSize() const {
1044 return current_->GetSstFilesSize();
1045}
1046
7c673cae
FG
1047MemTable* ColumnFamilyData::ConstructNewMemtable(
1048 const MutableCFOptions& mutable_cf_options, SequenceNumber earliest_seq) {
7c673cae 1049 return new MemTable(internal_comparator_, ioptions_, mutable_cf_options,
11fdf7f2 1050 write_buffer_manager_, earliest_seq, id_);
7c673cae
FG
1051}
1052
1053void ColumnFamilyData::CreateNewMemtable(
1054 const MutableCFOptions& mutable_cf_options, SequenceNumber earliest_seq) {
1055 if (mem_ != nullptr) {
1056 delete mem_->Unref();
1057 }
1058 SetMemtable(ConstructNewMemtable(mutable_cf_options, earliest_seq));
1059 mem_->Ref();
1060}
1061
1062bool ColumnFamilyData::NeedsCompaction() const {
20effc67
TL
1063 return !mutable_cf_options_.disable_auto_compactions &&
1064 compaction_picker_->NeedsCompaction(current_->storage_info());
7c673cae
FG
1065}
1066
1067Compaction* ColumnFamilyData::PickCompaction(
20effc67
TL
1068 const MutableCFOptions& mutable_options,
1069 const MutableDBOptions& mutable_db_options, LogBuffer* log_buffer) {
f67539c2
TL
1070 SequenceNumber earliest_mem_seqno =
1071 std::min(mem_->GetEarliestSequenceNumber(),
1072 imm_.current()->GetEarliestSequenceNumber(false));
7c673cae 1073 auto* result = compaction_picker_->PickCompaction(
20effc67
TL
1074 GetName(), mutable_options, mutable_db_options, current_->storage_info(),
1075 log_buffer, earliest_mem_seqno);
7c673cae
FG
1076 if (result != nullptr) {
1077 result->SetInputVersion(current_);
1078 }
1079 return result;
1080}
1081
1082bool ColumnFamilyData::RangeOverlapWithCompaction(
1083 const Slice& smallest_user_key, const Slice& largest_user_key,
1084 int level) const {
1085 return compaction_picker_->RangeOverlapWithCompaction(
1086 smallest_user_key, largest_user_key, level);
1087}
1088
11fdf7f2
TL
1089Status ColumnFamilyData::RangesOverlapWithMemtables(
1090 const autovector<Range>& ranges, SuperVersion* super_version,
20effc67 1091 bool allow_data_in_errors, bool* overlap) {
11fdf7f2
TL
1092 assert(overlap != nullptr);
1093 *overlap = false;
1094 // Create an InternalIterator over all unflushed memtables
1095 Arena arena;
1096 ReadOptions read_opts;
1097 read_opts.total_order_seek = true;
1098 MergeIteratorBuilder merge_iter_builder(&internal_comparator_, &arena);
1099 merge_iter_builder.AddIterator(
1100 super_version->mem->NewIterator(read_opts, &arena));
1101 super_version->imm->AddIterators(read_opts, &merge_iter_builder);
1102 ScopedArenaIterator memtable_iter(merge_iter_builder.Finish());
1103
494da23a
TL
1104 auto read_seq = super_version->current->version_set()->LastSequence();
1105 ReadRangeDelAggregator range_del_agg(&internal_comparator_, read_seq);
11fdf7f2 1106 auto* active_range_del_iter =
494da23a
TL
1107 super_version->mem->NewRangeTombstoneIterator(read_opts, read_seq);
1108 range_del_agg.AddTombstones(
1109 std::unique_ptr<FragmentedRangeTombstoneIterator>(active_range_del_iter));
11fdf7f2 1110 Status status;
20effc67
TL
1111 status = super_version->imm->AddRangeTombstoneIterators(
1112 read_opts, nullptr /* arena */, &range_del_agg);
1113 // AddRangeTombstoneIterators always return Status::OK.
1114 assert(status.ok());
1115
11fdf7f2
TL
1116 for (size_t i = 0; i < ranges.size() && status.ok() && !*overlap; ++i) {
1117 auto* vstorage = super_version->current->storage_info();
1118 auto* ucmp = vstorage->InternalComparator()->user_comparator();
1119 InternalKey range_start(ranges[i].start, kMaxSequenceNumber,
1120 kValueTypeForSeek);
1121 memtable_iter->Seek(range_start.Encode());
1122 status = memtable_iter->status();
1123 ParsedInternalKey seek_result;
20effc67
TL
1124
1125 if (status.ok() && memtable_iter->Valid()) {
1126 status = ParseInternalKey(memtable_iter->key(), &seek_result,
1127 allow_data_in_errors);
11fdf7f2 1128 }
20effc67 1129
11fdf7f2
TL
1130 if (status.ok()) {
1131 if (memtable_iter->Valid() &&
1132 ucmp->Compare(seek_result.user_key, ranges[i].limit) <= 0) {
1133 *overlap = true;
1134 } else if (range_del_agg.IsRangeOverlapped(ranges[i].start,
1135 ranges[i].limit)) {
1136 *overlap = true;
1137 }
1138 }
1139 }
1140 return status;
1141}
1142
7c673cae
FG
1143const int ColumnFamilyData::kCompactAllLevels = -1;
1144const int ColumnFamilyData::kCompactToBaseLevel = -2;
1145
1146Compaction* ColumnFamilyData::CompactRange(
20effc67
TL
1147 const MutableCFOptions& mutable_cf_options,
1148 const MutableDBOptions& mutable_db_options, int input_level,
f67539c2 1149 int output_level, const CompactRangeOptions& compact_range_options,
11fdf7f2 1150 const InternalKey* begin, const InternalKey* end,
f67539c2
TL
1151 InternalKey** compaction_end, bool* conflict,
1152 uint64_t max_file_num_to_ignore) {
7c673cae 1153 auto* result = compaction_picker_->CompactRange(
20effc67
TL
1154 GetName(), mutable_cf_options, mutable_db_options,
1155 current_->storage_info(), input_level, output_level,
1156 compact_range_options, begin, end, compaction_end, conflict,
f67539c2 1157 max_file_num_to_ignore);
7c673cae
FG
1158 if (result != nullptr) {
1159 result->SetInputVersion(current_);
1160 }
1161 return result;
1162}
1163
f67539c2
TL
1164SuperVersion* ColumnFamilyData::GetReferencedSuperVersion(DBImpl* db) {
1165 SuperVersion* sv = GetThreadLocalSuperVersion(db);
7c673cae
FG
1166 sv->Ref();
1167 if (!ReturnThreadLocalSuperVersion(sv)) {
11fdf7f2
TL
1168 // This Unref() corresponds to the Ref() in GetThreadLocalSuperVersion()
1169 // when the thread-local pointer was populated. So, the Ref() earlier in
1170 // this function still prevents the returned SuperVersion* from being
1171 // deleted out from under the caller.
7c673cae
FG
1172 sv->Unref();
1173 }
1174 return sv;
1175}
1176
f67539c2 1177SuperVersion* ColumnFamilyData::GetThreadLocalSuperVersion(DBImpl* db) {
7c673cae
FG
1178 // The SuperVersion is cached in thread local storage to avoid acquiring
1179 // mutex when SuperVersion does not change since the last use. When a new
1180 // SuperVersion is installed, the compaction or flush thread cleans up
1181 // cached SuperVersion in all existing thread local storage. To avoid
1182 // acquiring mutex for this operation, we use atomic Swap() on the thread
1183 // local pointer to guarantee exclusive access. If the thread local pointer
1184 // is being used while a new SuperVersion is installed, the cached
1185 // SuperVersion can become stale. In that case, the background thread would
1186 // have swapped in kSVObsolete. We re-check the value at when returning
1187 // SuperVersion back to thread local, with an atomic compare and swap.
1188 // The superversion will need to be released if detected to be stale.
1189 void* ptr = local_sv_->Swap(SuperVersion::kSVInUse);
1190 // Invariant:
1191 // (1) Scrape (always) installs kSVObsolete in ThreadLocal storage
1192 // (2) the Swap above (always) installs kSVInUse, ThreadLocal storage
1193 // should only keep kSVInUse before ReturnThreadLocalSuperVersion call
1194 // (if no Scrape happens).
1195 assert(ptr != SuperVersion::kSVInUse);
11fdf7f2 1196 SuperVersion* sv = static_cast<SuperVersion*>(ptr);
7c673cae
FG
1197 if (sv == SuperVersion::kSVObsolete ||
1198 sv->version_number != super_version_number_.load()) {
1199 RecordTick(ioptions_.statistics, NUMBER_SUPERVERSION_ACQUIRES);
1200 SuperVersion* sv_to_delete = nullptr;
1201
1202 if (sv && sv->Unref()) {
1203 RecordTick(ioptions_.statistics, NUMBER_SUPERVERSION_CLEANUPS);
f67539c2 1204 db->mutex()->Lock();
7c673cae
FG
1205 // NOTE: underlying resources held by superversion (sst files) might
1206 // not be released until the next background job.
1207 sv->Cleanup();
f67539c2
TL
1208 if (db->immutable_db_options().avoid_unnecessary_blocking_io) {
1209 db->AddSuperVersionsToFreeQueue(sv);
1210 db->SchedulePurge();
1211 } else {
1212 sv_to_delete = sv;
1213 }
7c673cae 1214 } else {
f67539c2 1215 db->mutex()->Lock();
7c673cae
FG
1216 }
1217 sv = super_version_->Ref();
f67539c2 1218 db->mutex()->Unlock();
7c673cae
FG
1219
1220 delete sv_to_delete;
1221 }
1222 assert(sv != nullptr);
1223 return sv;
1224}
1225
1226bool ColumnFamilyData::ReturnThreadLocalSuperVersion(SuperVersion* sv) {
1227 assert(sv != nullptr);
1228 // Put the SuperVersion back
1229 void* expected = SuperVersion::kSVInUse;
1230 if (local_sv_->CompareAndSwap(static_cast<void*>(sv), expected)) {
1231 // When we see kSVInUse in the ThreadLocal, we are sure ThreadLocal
1232 // storage has not been altered and no Scrape has happened. The
1233 // SuperVersion is still current.
1234 return true;
1235 } else {
1236 // ThreadLocal scrape happened in the process of this GetImpl call (after
1237 // thread local Swap() at the beginning and before CompareAndSwap()).
1238 // This means the SuperVersion it holds is obsolete.
1239 assert(expected == SuperVersion::kSVObsolete);
1240 }
1241 return false;
1242}
1243
11fdf7f2
TL
1244void ColumnFamilyData::InstallSuperVersion(
1245 SuperVersionContext* sv_context, InstrumentedMutex* db_mutex) {
7c673cae 1246 db_mutex->AssertHeld();
11fdf7f2 1247 return InstallSuperVersion(sv_context, db_mutex, mutable_cf_options_);
7c673cae
FG
1248}
1249
11fdf7f2
TL
1250void ColumnFamilyData::InstallSuperVersion(
1251 SuperVersionContext* sv_context, InstrumentedMutex* db_mutex,
7c673cae 1252 const MutableCFOptions& mutable_cf_options) {
11fdf7f2 1253 SuperVersion* new_superversion = sv_context->new_superversion.release();
7c673cae
FG
1254 new_superversion->db_mutex = db_mutex;
1255 new_superversion->mutable_cf_options = mutable_cf_options;
f67539c2 1256 new_superversion->Init(this, mem_, imm_.current(), current_);
7c673cae
FG
1257 SuperVersion* old_superversion = super_version_;
1258 super_version_ = new_superversion;
1259 ++super_version_number_;
1260 super_version_->version_number = super_version_number_;
11fdf7f2
TL
1261 super_version_->write_stall_condition =
1262 RecalculateWriteStallConditions(mutable_cf_options);
1263
1264 if (old_superversion != nullptr) {
1265 // Reset SuperVersions cached in thread local storage.
1266 // This should be done before old_superversion->Unref(). That's to ensure
1267 // that local_sv_ never holds the last reference to SuperVersion, since
1268 // it has no means to safely do SuperVersion cleanup.
1269 ResetThreadLocalSuperVersions();
1270
1271 if (old_superversion->mutable_cf_options.write_buffer_size !=
1272 mutable_cf_options.write_buffer_size) {
1273 mem_->UpdateWriteBufferSize(mutable_cf_options.write_buffer_size);
1274 }
1275 if (old_superversion->write_stall_condition !=
1276 new_superversion->write_stall_condition) {
1277 sv_context->PushWriteStallNotification(
1278 old_superversion->write_stall_condition,
1279 new_superversion->write_stall_condition, GetName(), ioptions());
1280 }
1281 if (old_superversion->Unref()) {
1282 old_superversion->Cleanup();
1283 sv_context->superversions_to_free.push_back(old_superversion);
1284 }
7c673cae 1285 }
7c673cae
FG
1286}
1287
1288void ColumnFamilyData::ResetThreadLocalSuperVersions() {
1289 autovector<void*> sv_ptrs;
1290 local_sv_->Scrape(&sv_ptrs, SuperVersion::kSVObsolete);
1291 for (auto ptr : sv_ptrs) {
1292 assert(ptr);
1293 if (ptr == SuperVersion::kSVInUse) {
1294 continue;
1295 }
1296 auto sv = static_cast<SuperVersion*>(ptr);
11fdf7f2
TL
1297 bool was_last_ref __attribute__((__unused__));
1298 was_last_ref = sv->Unref();
1299 // sv couldn't have been the last reference because
1300 // ResetThreadLocalSuperVersions() is called before
1301 // unref'ing super_version_.
1302 assert(!was_last_ref);
7c673cae
FG
1303 }
1304}
1305
f67539c2
TL
1306Status ColumnFamilyData::ValidateOptions(
1307 const DBOptions& db_options, const ColumnFamilyOptions& cf_options) {
1308 Status s;
1309 s = CheckCompressionSupported(cf_options);
1310 if (s.ok() && db_options.allow_concurrent_memtable_write) {
1311 s = CheckConcurrentWritesSupported(cf_options);
1312 }
1313 if (s.ok() && db_options.unordered_write &&
1314 cf_options.max_successive_merges != 0) {
1315 s = Status::InvalidArgument(
1316 "max_successive_merges > 0 is incompatible with unordered_write");
1317 }
1318 if (s.ok()) {
1319 s = CheckCFPathsSupported(db_options, cf_options);
1320 }
1321 if (!s.ok()) {
1322 return s;
1323 }
1324
1325 if (cf_options.ttl > 0 && cf_options.ttl != kDefaultTtl) {
20effc67
TL
1326 if (!cf_options.table_factory->IsInstanceOf(
1327 TableFactory::kBlockBasedTableName())) {
f67539c2
TL
1328 return Status::NotSupported(
1329 "TTL is only supported in Block-Based Table format. ");
1330 }
1331 }
1332
1333 if (cf_options.periodic_compaction_seconds > 0 &&
1334 cf_options.periodic_compaction_seconds != kDefaultPeriodicCompSecs) {
20effc67
TL
1335 if (!cf_options.table_factory->IsInstanceOf(
1336 TableFactory::kBlockBasedTableName())) {
f67539c2
TL
1337 return Status::NotSupported(
1338 "Periodic Compaction is only supported in "
1339 "Block-Based Table format. ");
1340 }
1341 }
1342 return s;
1343}
1344
7c673cae
FG
1345#ifndef ROCKSDB_LITE
1346Status ColumnFamilyData::SetOptions(
f67539c2
TL
1347 const DBOptions& db_options,
1348 const std::unordered_map<std::string, std::string>& options_map) {
7c673cae 1349 MutableCFOptions new_mutable_cf_options;
11fdf7f2
TL
1350 Status s =
1351 GetMutableOptionsFromStrings(mutable_cf_options_, options_map,
1352 ioptions_.info_log, &new_mutable_cf_options);
f67539c2
TL
1353 if (s.ok()) {
1354 ColumnFamilyOptions cf_options =
1355 BuildColumnFamilyOptions(initial_cf_options_, new_mutable_cf_options);
1356 s = ValidateOptions(db_options, cf_options);
1357 }
7c673cae
FG
1358 if (s.ok()) {
1359 mutable_cf_options_ = new_mutable_cf_options;
1360 mutable_cf_options_.RefreshDerivedOptions(ioptions_);
1361 }
1362 return s;
1363}
1364#endif // ROCKSDB_LITE
1365
11fdf7f2
TL
1366// REQUIRES: DB mutex held
1367Env::WriteLifeTimeHint ColumnFamilyData::CalculateSSTWriteHint(int level) {
1368 if (initial_cf_options_.compaction_style != kCompactionStyleLevel) {
1369 return Env::WLTH_NOT_SET;
1370 }
1371 if (level == 0) {
1372 return Env::WLTH_MEDIUM;
1373 }
1374 int base_level = current_->storage_info()->base_level();
1375
1376 // L1: medium, L2: long, ...
1377 if (level - base_level >= 2) {
1378 return Env::WLTH_EXTREME;
f67539c2
TL
1379 } else if (level < base_level) {
1380 // There is no restriction which prevents level passed in to be smaller
1381 // than base_level.
1382 return Env::WLTH_MEDIUM;
11fdf7f2
TL
1383 }
1384 return static_cast<Env::WriteLifeTimeHint>(level - base_level +
1385 static_cast<int>(Env::WLTH_MEDIUM));
1386}
1387
f67539c2 1388Status ColumnFamilyData::AddDirectories(
20effc67 1389 std::map<std::string, std::shared_ptr<FSDirectory>>* created_dirs) {
11fdf7f2 1390 Status s;
f67539c2 1391 assert(created_dirs != nullptr);
11fdf7f2
TL
1392 assert(data_dirs_.empty());
1393 for (auto& p : ioptions_.cf_paths) {
f67539c2
TL
1394 auto existing_dir = created_dirs->find(p.path);
1395
1396 if (existing_dir == created_dirs->end()) {
20effc67
TL
1397 std::unique_ptr<FSDirectory> path_directory;
1398 s = DBImpl::CreateAndNewDirectory(ioptions_.fs, p.path, &path_directory);
f67539c2
TL
1399 if (!s.ok()) {
1400 return s;
1401 }
1402 assert(path_directory != nullptr);
1403 data_dirs_.emplace_back(path_directory.release());
1404 (*created_dirs)[p.path] = data_dirs_.back();
1405 } else {
1406 data_dirs_.emplace_back(existing_dir->second);
11fdf7f2 1407 }
11fdf7f2
TL
1408 }
1409 assert(data_dirs_.size() == ioptions_.cf_paths.size());
1410 return s;
1411}
1412
20effc67 1413FSDirectory* ColumnFamilyData::GetDataDir(size_t path_id) const {
11fdf7f2
TL
1414 if (data_dirs_.empty()) {
1415 return nullptr;
1416 }
1417
1418 assert(path_id < data_dirs_.size());
1419 return data_dirs_[path_id].get();
1420}
1421
7c673cae
FG
1422ColumnFamilySet::ColumnFamilySet(const std::string& dbname,
1423 const ImmutableDBOptions* db_options,
f67539c2 1424 const FileOptions& file_options,
7c673cae 1425 Cache* table_cache,
20effc67
TL
1426 WriteBufferManager* _write_buffer_manager,
1427 WriteController* _write_controller,
1428 BlockCacheTracer* const block_cache_tracer,
1429 const std::shared_ptr<IOTracer>& io_tracer)
7c673cae 1430 : max_column_family_(0),
f67539c2 1431 dummy_cfd_(new ColumnFamilyData(
20effc67
TL
1432 ColumnFamilyData::kDummyColumnFamilyDataId, "", nullptr, nullptr,
1433 nullptr, ColumnFamilyOptions(), *db_options, file_options, nullptr,
1434 block_cache_tracer, io_tracer)),
7c673cae
FG
1435 default_cfd_cache_(nullptr),
1436 db_name_(dbname),
1437 db_options_(db_options),
f67539c2 1438 file_options_(file_options),
7c673cae 1439 table_cache_(table_cache),
20effc67
TL
1440 write_buffer_manager_(_write_buffer_manager),
1441 write_controller_(_write_controller),
1442 block_cache_tracer_(block_cache_tracer),
1443 io_tracer_(io_tracer) {
7c673cae
FG
1444 // initialize linked list
1445 dummy_cfd_->prev_ = dummy_cfd_;
1446 dummy_cfd_->next_ = dummy_cfd_;
1447}
1448
1449ColumnFamilySet::~ColumnFamilySet() {
1450 while (column_family_data_.size() > 0) {
1451 // cfd destructor will delete itself from column_family_data_
1452 auto cfd = column_family_data_.begin()->second;
11fdf7f2 1453 bool last_ref __attribute__((__unused__));
f67539c2 1454 last_ref = cfd->UnrefAndTryDelete();
11fdf7f2 1455 assert(last_ref);
7c673cae 1456 }
11fdf7f2 1457 bool dummy_last_ref __attribute__((__unused__));
f67539c2 1458 dummy_last_ref = dummy_cfd_->UnrefAndTryDelete();
11fdf7f2 1459 assert(dummy_last_ref);
7c673cae
FG
1460}
1461
1462ColumnFamilyData* ColumnFamilySet::GetDefault() const {
1463 assert(default_cfd_cache_ != nullptr);
1464 return default_cfd_cache_;
1465}
1466
1467ColumnFamilyData* ColumnFamilySet::GetColumnFamily(uint32_t id) const {
1468 auto cfd_iter = column_family_data_.find(id);
1469 if (cfd_iter != column_family_data_.end()) {
1470 return cfd_iter->second;
1471 } else {
1472 return nullptr;
1473 }
1474}
1475
1476ColumnFamilyData* ColumnFamilySet::GetColumnFamily(const std::string& name)
1477 const {
1478 auto cfd_iter = column_families_.find(name);
1479 if (cfd_iter != column_families_.end()) {
1480 auto cfd = GetColumnFamily(cfd_iter->second);
1481 assert(cfd != nullptr);
1482 return cfd;
1483 } else {
1484 return nullptr;
1485 }
1486}
1487
1488uint32_t ColumnFamilySet::GetNextColumnFamilyID() {
1489 return ++max_column_family_;
1490}
1491
1492uint32_t ColumnFamilySet::GetMaxColumnFamily() { return max_column_family_; }
1493
1494void ColumnFamilySet::UpdateMaxColumnFamily(uint32_t new_max_column_family) {
1495 max_column_family_ = std::max(new_max_column_family, max_column_family_);
1496}
1497
1498size_t ColumnFamilySet::NumberOfColumnFamilies() const {
1499 return column_families_.size();
1500}
1501
1502// under a DB mutex AND write thread
1503ColumnFamilyData* ColumnFamilySet::CreateColumnFamily(
1504 const std::string& name, uint32_t id, Version* dummy_versions,
1505 const ColumnFamilyOptions& options) {
1506 assert(column_families_.find(name) == column_families_.end());
1507 ColumnFamilyData* new_cfd = new ColumnFamilyData(
1508 id, name, dummy_versions, table_cache_, write_buffer_manager_, options,
20effc67 1509 *db_options_, file_options_, this, block_cache_tracer_, io_tracer_);
7c673cae
FG
1510 column_families_.insert({name, id});
1511 column_family_data_.insert({id, new_cfd});
1512 max_column_family_ = std::max(max_column_family_, id);
1513 // add to linked list
1514 new_cfd->next_ = dummy_cfd_;
1515 auto prev = dummy_cfd_->prev_;
1516 new_cfd->prev_ = prev;
1517 prev->next_ = new_cfd;
1518 dummy_cfd_->prev_ = new_cfd;
1519 if (id == 0) {
1520 default_cfd_cache_ = new_cfd;
1521 }
1522 return new_cfd;
1523}
1524
1525// REQUIRES: DB mutex held
1526void ColumnFamilySet::FreeDeadColumnFamilies() {
1527 autovector<ColumnFamilyData*> to_delete;
1528 for (auto cfd = dummy_cfd_->next_; cfd != dummy_cfd_; cfd = cfd->next_) {
1529 if (cfd->refs_.load(std::memory_order_relaxed) == 0) {
1530 to_delete.push_back(cfd);
1531 }
1532 }
1533 for (auto cfd : to_delete) {
1534 // this is very rare, so it's not a problem that we do it under a mutex
1535 delete cfd;
1536 }
1537}
1538
1539// under a DB mutex AND from a write thread
1540void ColumnFamilySet::RemoveColumnFamily(ColumnFamilyData* cfd) {
1541 auto cfd_iter = column_family_data_.find(cfd->GetID());
1542 assert(cfd_iter != column_family_data_.end());
1543 column_family_data_.erase(cfd_iter);
1544 column_families_.erase(cfd->GetName());
1545}
1546
1547// under a DB mutex OR from a write thread
1548bool ColumnFamilyMemTablesImpl::Seek(uint32_t column_family_id) {
1549 if (column_family_id == 0) {
1550 // optimization for common case
1551 current_ = column_family_set_->GetDefault();
1552 } else {
1553 current_ = column_family_set_->GetColumnFamily(column_family_id);
1554 }
1555 handle_.SetCFD(current_);
1556 return current_ != nullptr;
1557}
1558
1559uint64_t ColumnFamilyMemTablesImpl::GetLogNumber() const {
1560 assert(current_ != nullptr);
1561 return current_->GetLogNumber();
1562}
1563
1564MemTable* ColumnFamilyMemTablesImpl::GetMemTable() const {
1565 assert(current_ != nullptr);
1566 return current_->mem();
1567}
1568
1569ColumnFamilyHandle* ColumnFamilyMemTablesImpl::GetColumnFamilyHandle() {
1570 assert(current_ != nullptr);
1571 return &handle_;
1572}
1573
1574uint32_t GetColumnFamilyID(ColumnFamilyHandle* column_family) {
1575 uint32_t column_family_id = 0;
1576 if (column_family != nullptr) {
20effc67 1577 auto cfh = static_cast_with_check<ColumnFamilyHandleImpl>(column_family);
7c673cae
FG
1578 column_family_id = cfh->GetID();
1579 }
1580 return column_family_id;
1581}
1582
1583const Comparator* GetColumnFamilyUserComparator(
1584 ColumnFamilyHandle* column_family) {
1585 if (column_family != nullptr) {
1586 return column_family->GetComparator();
1587 }
1588 return nullptr;
1589}
1590
f67539c2 1591} // namespace ROCKSDB_NAMESPACE