1 // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
2 // This source code is licensed under both the GPLv2 (found in the
3 // COPYING file in the root directory) and Apache 2.0 License
4 // (found in the LICENSE.Apache file in the root directory).
6 // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
7 // Use of this source code is governed by a BSD-style license that can be
8 // found in the LICENSE file. See the AUTHORS file for names of contributors.
12 #include <unordered_map>
17 #include "db/memtable_list.h"
18 #include "db/table_cache.h"
19 #include "db/table_properties_collector.h"
20 #include "db/write_batch_internal.h"
21 #include "db/write_controller.h"
22 #include "options/cf_options.h"
23 #include "rocksdb/compaction_job_stats.h"
24 #include "rocksdb/db.h"
25 #include "rocksdb/env.h"
26 #include "rocksdb/options.h"
27 #include "trace_replay/block_cache_tracer.h"
28 #include "util/thread_local.h"
30 namespace ROCKSDB_NAMESPACE
{
34 class VersionStorageInfo
;
36 class MemTableListVersion
;
37 class CompactionPicker
;
41 class ColumnFamilyData
;
44 class InstrumentedMutex
;
45 class InstrumentedMutexLock
;
46 struct SuperVersionContext
;
49 extern const double kIncSlowdownRatio
;
50 // This file contains a list of data structures for managing column family
53 // The basic relationships among classes declared here are illustrated as
56 // +----------------------+ +----------------------+ +--------+
57 // +---+ ColumnFamilyHandle 1 | +--+ ColumnFamilyHandle 2 | | DBImpl |
58 // | +----------------------+ | +----------------------+ +----+---+
59 // | +--------------------------+ |
60 // | | +-----------------------------+
62 // | | +-----------------------------v-------------------------------+
64 // | | | ColumnFamilySet |
66 // | | +-------------+--------------------------+----------------+---+
68 // | +-------------------------------------+ | |
70 // | +-------------v-------------+ +-----v----v---------+
72 // | | ColumnFamilyData 1 | | ColumnFamilyData 2 | ......
78 // +--------+---+--+-+----+----+ +--------------------++
81 // | | | +-----------------------+
82 // | | +-----------+ |
84 // +--------+--------+ | | |
85 // | | | | +----------v----------+
86 // +---> |SuperVersion 1.a +-----------------> |
87 // | +------+ | | MemTableListVersion |
88 // +---+-------------+ | | | | |
89 // | | | | +----+------------+---+
90 // | current | | | | |
91 // | +-------------+ | |mem | |
93 // +-v---v-------+ +---v--v---+ +-----v----+ +----v-----+
95 // | Version 1.a | | memtable | | memtable | | memtable |
96 // | | | 1.a | | 1.b | | 1.c |
97 // +-------------+ | | | | | |
98 // +----------+ +----------+ +----------+
100 // DBImpl keeps a ColumnFamilySet, which references to all column families by
101 // pointing to respective ColumnFamilyData object of each column family.
102 // This is how DBImpl can list and operate on all the column families.
103 // ColumnFamilyHandle also points to ColumnFamilyData directly, so that
104 // when a user executes a query, it can directly find memtables and Version
105 // as well as SuperVersion to the column family, without going through
108 // ColumnFamilySet points to the latest view of the LSM-tree (list of memtables
109 // and SST files) indirectly, while ongoing operations may hold references
110 // to a current or an out-of-date SuperVersion, which in turn points to a
111 // point-in-time view of the LSM-tree. This guarantees the memtables and SST
112 // files being operated on will not go away, until the SuperVersion is
113 // unreferenced to 0 and destoryed.
115 // The following graph illustrates a possible referencing relationships:
117 // Column +--------------+ current +-----------+
118 // Family +---->+ +------------------->+ |
119 // Data | SuperVersion +----------+ | Version A |
121 // Iter2 +----->+ | +-------v------+ +-----------+
122 // +-----+--------+ | MemtableList +----------------> Empty
123 // | | Version r | +-----------+
124 // | +--------------+ | |
125 // +------------------+ current| Version B |
126 // +--------------+ | +----->+ |
127 // | | | | +-----+-----+
128 // Compaction +>+ SuperVersion +-------------+ ^
129 // Job | 2 +------+ | |current
130 // | +----+ | | mem | +------------+
131 // +--------------+ | | +---------------------> |
132 // | +------------------------> MemTable a |
134 // +--------------+ | | +------------+
135 // | +--------------------------+
136 // Iter1 +-----> SuperVersion | | +------------+
137 // | 1 +------------------------------>+ |
138 // | +-+ | mem | MemTable b |
139 // +--------------+ | | | |
140 // | | +--------------+ +-----^------+
141 // | |imm | MemtableList | |
142 // | +--->+ Version s +------------+
143 // | +--------------+
144 // | +--------------+
145 // | | MemtableList |
146 // +------>+ Version t +--------> Empty
147 // imm +--------------+
149 // In this example, even if the current LSM-tree consists of Version A and
150 // memtable a, which is also referenced by SuperVersion, two older SuperVersion
151 // SuperVersion2 and Superversion1 still exist, and are referenced by a
152 // compaction job and an old iterator Iter1, respectively. SuperVersion2
153 // contains Version B, memtable a and memtable b; SuperVersion1 contains
154 // Version B and memtable b (mutable). As a result, Version B and memtable b
155 // are prevented from being destroyed or deleted.
157 // ColumnFamilyHandleImpl is the class that clients use to access different
158 // column families. It has non-trivial destructor, which gets called when client
159 // is done using the column family
160 class ColumnFamilyHandleImpl
: public ColumnFamilyHandle
{
162 // create while holding the mutex
163 ColumnFamilyHandleImpl(
164 ColumnFamilyData
* cfd
, DBImpl
* db
, InstrumentedMutex
* mutex
);
165 // destroy without mutex
166 virtual ~ColumnFamilyHandleImpl();
167 virtual ColumnFamilyData
* cfd() const { return cfd_
; }
169 virtual uint32_t GetID() const override
;
170 virtual const std::string
& GetName() const override
;
171 virtual Status
GetDescriptor(ColumnFamilyDescriptor
* desc
) override
;
172 virtual const Comparator
* GetComparator() const override
;
175 ColumnFamilyData
* cfd_
;
177 InstrumentedMutex
* mutex_
;
180 // Does not ref-count ColumnFamilyData
181 // We use this dummy ColumnFamilyHandleImpl because sometimes MemTableInserter
182 // calls DBImpl methods. When this happens, MemTableInserter need access to
183 // ColumnFamilyHandle (same as the client would need). In that case, we feed
184 // MemTableInserter dummy ColumnFamilyHandle and enable it to call DBImpl
186 class ColumnFamilyHandleInternal
: public ColumnFamilyHandleImpl
{
188 ColumnFamilyHandleInternal()
189 : ColumnFamilyHandleImpl(nullptr, nullptr, nullptr), internal_cfd_(nullptr) {}
191 void SetCFD(ColumnFamilyData
* _cfd
) { internal_cfd_
= _cfd
; }
192 virtual ColumnFamilyData
* cfd() const override
{ return internal_cfd_
; }
195 ColumnFamilyData
* internal_cfd_
;
198 // holds references to memtable, all immutable memtables and version
199 struct SuperVersion
{
200 // Accessing members of this class is not thread-safe and requires external
201 // synchronization (ie db mutex held or on write thread).
202 ColumnFamilyData
* cfd
;
204 MemTableListVersion
* imm
;
206 MutableCFOptions mutable_cf_options
;
207 // Version number of the current SuperVersion
208 uint64_t version_number
;
209 WriteStallCondition write_stall_condition
;
211 InstrumentedMutex
* db_mutex
;
213 // should be called outside the mutex
214 SuperVersion() = default;
217 // If Unref() returns true, Cleanup() should be called with mutex held
218 // before deleting this SuperVersion.
221 // call these two methods with db mutex held
222 // Cleanup unrefs mem, imm and current. Also, it stores all memtables
223 // that needs to be deleted in to_delete vector. Unrefing those
224 // objects needs to be done in the mutex
226 void Init(ColumnFamilyData
* new_cfd
, MemTable
* new_mem
,
227 MemTableListVersion
* new_imm
, Version
* new_current
);
229 // The value of dummy is not actually used. kSVInUse takes its address as a
230 // mark in the thread local storage to indicate the SuperVersion is in use
231 // by thread. This way, the value of kSVInUse is guaranteed to have no
232 // conflict with SuperVersion object address and portable on different
235 static void* const kSVInUse
;
236 static void* const kSVObsolete
;
239 std::atomic
<uint32_t> refs
;
240 // We need to_delete because during Cleanup(), imm->Unref() returns
241 // all memtables that we need to free through this vector. We then
242 // delete all those memtables outside of mutex, during destruction
243 autovector
<MemTable
*> to_delete
;
246 extern Status
CheckCompressionSupported(const ColumnFamilyOptions
& cf_options
);
248 extern Status
CheckConcurrentWritesSupported(
249 const ColumnFamilyOptions
& cf_options
);
251 extern Status
CheckCFPathsSupported(const DBOptions
& db_options
,
252 const ColumnFamilyOptions
& cf_options
);
254 extern ColumnFamilyOptions
SanitizeOptions(const ImmutableDBOptions
& db_options
,
255 const ColumnFamilyOptions
& src
);
256 // Wrap user defined table proproties collector factories `from cf_options`
257 // into internal ones in int_tbl_prop_collector_factories. Add a system internal
259 extern void GetIntTblPropCollectorFactory(
260 const ImmutableCFOptions
& ioptions
,
261 std::vector
<std::unique_ptr
<IntTblPropCollectorFactory
>>*
262 int_tbl_prop_collector_factories
);
264 class ColumnFamilySet
;
266 // This class keeps all the data that a column family needs.
267 // Most methods require DB mutex held, unless otherwise noted
268 class ColumnFamilyData
{
273 uint32_t GetID() const { return id_
; }
275 const std::string
& GetName() const { return name_
; }
277 // Ref() can only be called from a context where the caller can guarantee
278 // that ColumnFamilyData is alive (while holding a non-zero ref already,
279 // holding a DB mutex, or as the leader in a write batch group).
280 void Ref() { refs_
.fetch_add(1); }
282 // Unref decreases the reference count, but does not handle deletion
283 // when the count goes to 0. If this method returns true then the
284 // caller should delete the instance immediately, or later, by calling
285 // FreeDeadColumnFamilies(). Unref() can only be called while holding
286 // a DB mutex, or during single-threaded recovery.
288 int old_refs
= refs_
.fetch_sub(1);
289 assert(old_refs
> 0);
290 return old_refs
== 1;
293 // UnrefAndTryDelete() decreases the reference count and do free if needed,
294 // return true if this is freed else false, UnrefAndTryDelete() can only
295 // be called while holding a DB mutex, or during single-threaded recovery.
296 bool UnrefAndTryDelete();
298 // SetDropped() can only be called under following conditions:
299 // 1) Holding a DB mutex,
300 // 2) from single-threaded write thread, AND
301 // 3) from single-threaded VersionSet::LogAndApply()
302 // After dropping column family no other operation on that column family
303 // will be executed. All the files and memory will be, however, kept around
304 // until client drops the column family handle. That way, client can still
305 // access data from dropped column family.
306 // Column family can be dropped and still alive. In that state:
307 // *) Compaction and flush is not executed on the dropped column family.
308 // *) Client can continue reading from column family. Writes will fail unless
309 // WriteOptions::ignore_missing_column_families is true
310 // When the dropped column family is unreferenced, then we:
311 // *) Remove column family from the linked list maintained by ColumnFamilySet
312 // *) delete all memory associated with that column family
313 // *) delete all the files associated with that column family
315 bool IsDropped() const { return dropped_
.load(std::memory_order_relaxed
); }
318 int NumberLevels() const { return ioptions_
.num_levels
; }
320 void SetLogNumber(uint64_t log_number
) { log_number_
= log_number
; }
321 uint64_t GetLogNumber() const { return log_number_
; }
323 void SetFlushReason(FlushReason flush_reason
) {
324 flush_reason_
= flush_reason
;
326 FlushReason
GetFlushReason() const { return flush_reason_
; }
328 const FileOptions
* soptions() const;
329 const ImmutableCFOptions
* ioptions() const { return &ioptions_
; }
330 // REQUIRES: DB mutex held
331 // This returns the MutableCFOptions used by current SuperVersion
332 // You should use this API to reference MutableCFOptions most of the time.
333 const MutableCFOptions
* GetCurrentMutableCFOptions() const {
334 return &(super_version_
->mutable_cf_options
);
336 // REQUIRES: DB mutex held
337 // This returns the latest MutableCFOptions, which may be not in effect yet.
338 const MutableCFOptions
* GetLatestMutableCFOptions() const {
339 return &mutable_cf_options_
;
342 // REQUIRES: DB mutex held
343 // Build ColumnFamiliesOptions with immutable options and latest mutable
345 ColumnFamilyOptions
GetLatestCFOptions() const;
347 bool is_delete_range_supported() { return is_delete_range_supported_
; }
349 // Validate CF options against DB options
350 static Status
ValidateOptions(const DBOptions
& db_options
,
351 const ColumnFamilyOptions
& cf_options
);
353 // REQUIRES: DB mutex held
355 const DBOptions
& db_options
,
356 const std::unordered_map
<std::string
, std::string
>& options_map
);
357 #endif // ROCKSDB_LITE
359 InternalStats
* internal_stats() { return internal_stats_
.get(); }
361 MemTableList
* imm() { return &imm_
; }
362 MemTable
* mem() { return mem_
; }
363 Version
* current() { return current_
; }
364 Version
* dummy_versions() { return dummy_versions_
; }
365 void SetCurrent(Version
* _current
);
366 uint64_t GetNumLiveVersions() const; // REQUIRE: DB mutex held
367 uint64_t GetTotalSstFilesSize() const; // REQUIRE: DB mutex held
368 uint64_t GetLiveSstFilesSize() const; // REQUIRE: DB mutex held
369 void SetMemtable(MemTable
* new_mem
) {
370 uint64_t memtable_id
= last_memtable_id_
.fetch_add(1) + 1;
371 new_mem
->SetID(memtable_id
);
375 // calculate the oldest log needed for the durability of this column family
376 uint64_t OldestLogToKeep();
378 // See Memtable constructor for explanation of earliest_seq param.
379 MemTable
* ConstructNewMemtable(const MutableCFOptions
& mutable_cf_options
,
380 SequenceNumber earliest_seq
);
381 void CreateNewMemtable(const MutableCFOptions
& mutable_cf_options
,
382 SequenceNumber earliest_seq
);
384 TableCache
* table_cache() const { return table_cache_
.get(); }
385 BlobFileCache
* blob_file_cache() const { return blob_file_cache_
.get(); }
387 // See documentation in compaction_picker.h
388 // REQUIRES: DB mutex held
389 bool NeedsCompaction() const;
390 // REQUIRES: DB mutex held
391 Compaction
* PickCompaction(const MutableCFOptions
& mutable_options
,
392 const MutableDBOptions
& mutable_db_options
,
393 LogBuffer
* log_buffer
);
395 // Check if the passed range overlap with any running compactions.
396 // REQUIRES: DB mutex held
397 bool RangeOverlapWithCompaction(const Slice
& smallest_user_key
,
398 const Slice
& largest_user_key
,
401 // Check if the passed ranges overlap with any unflushed memtables
402 // (immutable or mutable).
404 // @param super_version A referenced SuperVersion that will be held for the
405 // duration of this function.
408 Status
RangesOverlapWithMemtables(const autovector
<Range
>& ranges
,
409 SuperVersion
* super_version
,
410 bool allow_data_in_errors
, bool* overlap
);
412 // A flag to tell a manual compaction is to compact all levels together
413 // instead of a specific level.
414 static const int kCompactAllLevels
;
415 // A flag to tell a manual compaction's output is base level.
416 static const int kCompactToBaseLevel
;
417 // REQUIRES: DB mutex held
418 Compaction
* CompactRange(const MutableCFOptions
& mutable_cf_options
,
419 const MutableDBOptions
& mutable_db_options
,
420 int input_level
, int output_level
,
421 const CompactRangeOptions
& compact_range_options
,
422 const InternalKey
* begin
, const InternalKey
* end
,
423 InternalKey
** compaction_end
, bool* manual_conflict
,
424 uint64_t max_file_num_to_ignore
);
426 CompactionPicker
* compaction_picker() { return compaction_picker_
.get(); }
428 const Comparator
* user_comparator() const {
429 return internal_comparator_
.user_comparator();
432 const InternalKeyComparator
& internal_comparator() const {
433 return internal_comparator_
;
436 const std::vector
<std::unique_ptr
<IntTblPropCollectorFactory
>>*
437 int_tbl_prop_collector_factories() const {
438 return &int_tbl_prop_collector_factories_
;
441 SuperVersion
* GetSuperVersion() { return super_version_
; }
443 // Return a already referenced SuperVersion to be used safely.
444 SuperVersion
* GetReferencedSuperVersion(DBImpl
* db
);
446 // Get SuperVersion stored in thread local storage. If it does not exist,
447 // get a reference from a current SuperVersion.
448 SuperVersion
* GetThreadLocalSuperVersion(DBImpl
* db
);
449 // Try to return SuperVersion back to thread local storage. Retrun true on
450 // success and false on failure. It fails when the thread local storage
451 // contains anything other than SuperVersion::kSVInUse flag.
452 bool ReturnThreadLocalSuperVersion(SuperVersion
* sv
);
454 uint64_t GetSuperVersionNumber() const {
455 return super_version_number_
.load();
457 // will return a pointer to SuperVersion* if previous SuperVersion
458 // if its reference count is zero and needs deletion or nullptr if not
459 // As argument takes a pointer to allocated SuperVersion to enable
460 // the clients to allocate SuperVersion outside of mutex.
461 // IMPORTANT: Only call this from DBImpl::InstallSuperVersion()
462 void InstallSuperVersion(SuperVersionContext
* sv_context
,
463 InstrumentedMutex
* db_mutex
,
464 const MutableCFOptions
& mutable_cf_options
);
465 void InstallSuperVersion(SuperVersionContext
* sv_context
,
466 InstrumentedMutex
* db_mutex
);
468 void ResetThreadLocalSuperVersions();
470 // Protected by DB mutex
471 void set_queued_for_flush(bool value
) { queued_for_flush_
= value
; }
472 void set_queued_for_compaction(bool value
) { queued_for_compaction_
= value
; }
473 bool queued_for_flush() { return queued_for_flush_
; }
474 bool queued_for_compaction() { return queued_for_compaction_
; }
476 enum class WriteStallCause
{
480 kPendingCompactionBytes
,
482 static std::pair
<WriteStallCondition
, WriteStallCause
>
483 GetWriteStallConditionAndCause(int num_unflushed_memtables
, int num_l0_files
,
484 uint64_t num_compaction_needed_bytes
,
485 const MutableCFOptions
& mutable_cf_options
);
487 // Recalculate some small conditions, which are changed only during
488 // compaction, adding new memtable and/or
489 // recalculation of compaction score. These values are used in
490 // DBImpl::MakeRoomForWrite function to decide, if it need to make
492 WriteStallCondition
RecalculateWriteStallConditions(
493 const MutableCFOptions
& mutable_cf_options
);
495 void set_initialized() { initialized_
.store(true); }
497 bool initialized() const { return initialized_
.load(); }
499 const ColumnFamilyOptions
& initial_cf_options() {
500 return initial_cf_options_
;
503 Env::WriteLifeTimeHint
CalculateSSTWriteHint(int level
);
505 // created_dirs remembers directory created, so that we don't need to call
506 // the same data creation operation again.
507 Status
AddDirectories(
508 std::map
<std::string
, std::shared_ptr
<FSDirectory
>>* created_dirs
);
510 FSDirectory
* GetDataDir(size_t path_id
) const;
512 ThreadLocalPtr
* TEST_GetLocalSV() { return local_sv_
.get(); }
515 friend class ColumnFamilySet
;
516 static const uint32_t kDummyColumnFamilyDataId
;
517 ColumnFamilyData(uint32_t id
, const std::string
& name
,
518 Version
* dummy_versions
, Cache
* table_cache
,
519 WriteBufferManager
* write_buffer_manager
,
520 const ColumnFamilyOptions
& options
,
521 const ImmutableDBOptions
& db_options
,
522 const FileOptions
& file_options
,
523 ColumnFamilySet
* column_family_set
,
524 BlockCacheTracer
* const block_cache_tracer
,
525 const std::shared_ptr
<IOTracer
>& io_tracer
);
527 std::vector
<std::string
> GetDbPaths() const;
530 const std::string name_
;
531 Version
* dummy_versions_
; // Head of circular doubly-linked list of versions.
532 Version
* current_
; // == dummy_versions->prev_
534 std::atomic
<int> refs_
; // outstanding references to ColumnFamilyData
535 std::atomic
<bool> initialized_
;
536 std::atomic
<bool> dropped_
; // true if client dropped it
538 const InternalKeyComparator internal_comparator_
;
539 std::vector
<std::unique_ptr
<IntTblPropCollectorFactory
>>
540 int_tbl_prop_collector_factories_
;
542 const ColumnFamilyOptions initial_cf_options_
;
543 const ImmutableCFOptions ioptions_
;
544 MutableCFOptions mutable_cf_options_
;
546 const bool is_delete_range_supported_
;
548 std::unique_ptr
<TableCache
> table_cache_
;
549 std::unique_ptr
<BlobFileCache
> blob_file_cache_
;
551 std::unique_ptr
<InternalStats
> internal_stats_
;
553 WriteBufferManager
* write_buffer_manager_
;
557 SuperVersion
* super_version_
;
559 // An ordinal representing the current SuperVersion. Updated by
560 // InstallSuperVersion(), i.e. incremented every time super_version_
562 std::atomic
<uint64_t> super_version_number_
;
564 // Thread's local copy of SuperVersion pointer
565 // This needs to be destructed before mutex_
566 std::unique_ptr
<ThreadLocalPtr
> local_sv_
;
568 // pointers for a circular linked list. we use it to support iterations over
569 // all column families that are alive (note: dropped column families can also
570 // be alive as long as client holds a reference)
571 ColumnFamilyData
* next_
;
572 ColumnFamilyData
* prev_
;
574 // This is the earliest log file number that contains data from this
575 // Column Family. All earlier log files must be ignored and not
577 uint64_t log_number_
;
579 std::atomic
<FlushReason
> flush_reason_
;
581 // An object that keeps all the compaction stats
582 // and picks the next compaction
583 std::unique_ptr
<CompactionPicker
> compaction_picker_
;
585 ColumnFamilySet
* column_family_set_
;
587 std::unique_ptr
<WriteControllerToken
> write_controller_token_
;
589 // If true --> this ColumnFamily is currently present in DBImpl::flush_queue_
590 bool queued_for_flush_
;
592 // If true --> this ColumnFamily is currently present in
593 // DBImpl::compaction_queue_
594 bool queued_for_compaction_
;
596 uint64_t prev_compaction_needed_bytes_
;
598 // if the database was opened with 2pc enabled
601 // Memtable id to track flush.
602 std::atomic
<uint64_t> last_memtable_id_
;
604 // Directories corresponding to cf_paths.
605 std::vector
<std::shared_ptr
<FSDirectory
>> data_dirs_
;
607 bool db_paths_registered_
;
610 // ColumnFamilySet has interesting thread-safety requirements
611 // * CreateColumnFamily() or RemoveColumnFamily() -- need to be protected by DB
612 // mutex AND executed in the write thread.
613 // CreateColumnFamily() should ONLY be called from VersionSet::LogAndApply() AND
614 // single-threaded write thread. It is also called during Recovery and in
616 // RemoveColumnFamily() is only called from SetDropped(). DB mutex needs to be
617 // held and it needs to be executed from the write thread. SetDropped() also
618 // guarantees that it will be called only from single-threaded LogAndApply(),
619 // but this condition is not that important.
620 // * Iteration -- hold DB mutex, but you can release it in the body of
621 // iteration. If you release DB mutex in body, reference the column
622 // family before the mutex and unreference after you unlock, since the column
623 // family might get dropped when the DB mutex is released
624 // * GetDefault() -- thread safe
625 // * GetColumnFamily() -- either inside of DB mutex or from a write thread
626 // * GetNextColumnFamilyID(), GetMaxColumnFamily(), UpdateMaxColumnFamily(),
627 // NumberOfColumnFamilies -- inside of DB mutex
628 class ColumnFamilySet
{
630 // ColumnFamilySet supports iteration
633 explicit iterator(ColumnFamilyData
* cfd
)
635 iterator
& operator++() {
636 // dropped column families might still be included in this iteration
637 // (we're only removing them when client drops the last reference to the
639 // dummy is never dead, so this will never be infinite
641 current_
= current_
->next_
;
642 } while (current_
->refs_
.load(std::memory_order_relaxed
) == 0);
645 bool operator!=(const iterator
& other
) {
646 return this->current_
!= other
.current_
;
648 ColumnFamilyData
* operator*() { return current_
; }
651 ColumnFamilyData
* current_
;
654 ColumnFamilySet(const std::string
& dbname
,
655 const ImmutableDBOptions
* db_options
,
656 const FileOptions
& file_options
, Cache
* table_cache
,
657 WriteBufferManager
* _write_buffer_manager
,
658 WriteController
* _write_controller
,
659 BlockCacheTracer
* const block_cache_tracer
,
660 const std::shared_ptr
<IOTracer
>& io_tracer
);
663 ColumnFamilyData
* GetDefault() const;
664 // GetColumnFamily() calls return nullptr if column family is not found
665 ColumnFamilyData
* GetColumnFamily(uint32_t id
) const;
666 ColumnFamilyData
* GetColumnFamily(const std::string
& name
) const;
667 // this call will return the next available column family ID. it guarantees
668 // that there is no column family with id greater than or equal to the
669 // returned value in the current running instance or anytime in RocksDB
671 uint32_t GetNextColumnFamilyID();
672 uint32_t GetMaxColumnFamily();
673 void UpdateMaxColumnFamily(uint32_t new_max_column_family
);
674 size_t NumberOfColumnFamilies() const;
676 ColumnFamilyData
* CreateColumnFamily(const std::string
& name
, uint32_t id
,
677 Version
* dummy_version
,
678 const ColumnFamilyOptions
& options
);
680 iterator
begin() { return iterator(dummy_cfd_
->next_
); }
681 iterator
end() { return iterator(dummy_cfd_
); }
683 // REQUIRES: DB mutex held
684 // Don't call while iterating over ColumnFamilySet
685 void FreeDeadColumnFamilies();
687 Cache
* get_table_cache() { return table_cache_
; }
689 WriteBufferManager
* write_buffer_manager() { return write_buffer_manager_
; }
691 WriteController
* write_controller() { return write_controller_
; }
694 friend class ColumnFamilyData
;
695 // helper function that gets called from cfd destructor
696 // REQUIRES: DB mutex held
697 void RemoveColumnFamily(ColumnFamilyData
* cfd
);
699 // column_families_ and column_family_data_ need to be protected:
700 // * when mutating both conditions have to be satisfied:
701 // 1. DB mutex locked
702 // 2. thread currently in single-threaded write thread
703 // * when reading, at least one condition needs to be satisfied:
704 // 1. DB mutex locked
705 // 2. accessed from a single-threaded write thread
706 std::unordered_map
<std::string
, uint32_t> column_families_
;
707 std::unordered_map
<uint32_t, ColumnFamilyData
*> column_family_data_
;
709 uint32_t max_column_family_
;
710 ColumnFamilyData
* dummy_cfd_
;
711 // We don't hold the refcount here, since default column family always exists
712 // We are also not responsible for cleaning up default_cfd_cache_. This is
713 // just a cache that makes common case (accessing default column family)
715 ColumnFamilyData
* default_cfd_cache_
;
717 const std::string db_name_
;
718 const ImmutableDBOptions
* const db_options_
;
719 const FileOptions file_options_
;
721 WriteBufferManager
* write_buffer_manager_
;
722 WriteController
* write_controller_
;
723 BlockCacheTracer
* const block_cache_tracer_
;
724 std::shared_ptr
<IOTracer
> io_tracer_
;
727 // We use ColumnFamilyMemTablesImpl to provide WriteBatch a way to access
728 // memtables of different column families (specified by ID in the write batch)
729 class ColumnFamilyMemTablesImpl
: public ColumnFamilyMemTables
{
731 explicit ColumnFamilyMemTablesImpl(ColumnFamilySet
* column_family_set
)
732 : column_family_set_(column_family_set
), current_(nullptr) {}
734 // Constructs a ColumnFamilyMemTablesImpl equivalent to one constructed
735 // with the arguments used to construct *orig.
736 explicit ColumnFamilyMemTablesImpl(ColumnFamilyMemTablesImpl
* orig
)
737 : column_family_set_(orig
->column_family_set_
), current_(nullptr) {}
739 // sets current_ to ColumnFamilyData with column_family_id
740 // returns false if column family doesn't exist
741 // REQUIRES: use this function of DBImpl::column_family_memtables_ should be
742 // under a DB mutex OR from a write thread
743 bool Seek(uint32_t column_family_id
) override
;
745 // Returns log number of the selected column family
746 // REQUIRES: under a DB mutex OR from a write thread
747 uint64_t GetLogNumber() const override
;
749 // REQUIRES: Seek() called first
750 // REQUIRES: use this function of DBImpl::column_family_memtables_ should be
751 // under a DB mutex OR from a write thread
752 virtual MemTable
* GetMemTable() const override
;
754 // Returns column family handle for the selected column family
755 // REQUIRES: use this function of DBImpl::column_family_memtables_ should be
756 // under a DB mutex OR from a write thread
757 virtual ColumnFamilyHandle
* GetColumnFamilyHandle() override
;
759 // Cannot be called while another thread is calling Seek().
760 // REQUIRES: use this function of DBImpl::column_family_memtables_ should be
761 // under a DB mutex OR from a write thread
762 virtual ColumnFamilyData
* current() override
{ return current_
; }
765 ColumnFamilySet
* column_family_set_
;
766 ColumnFamilyData
* current_
;
767 ColumnFamilyHandleInternal handle_
;
770 extern uint32_t GetColumnFamilyID(ColumnFamilyHandle
* column_family
);
772 extern const Comparator
* GetColumnFamilyUserComparator(
773 ColumnFamilyHandle
* column_family
);
775 } // namespace ROCKSDB_NAMESPACE