1 // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
2 // This source code is licensed under both the GPLv2 (found in the
3 // COPYING file in the root directory) and Apache 2.0 License
4 // (found in the LICENSE.Apache file in the root directory).
6 // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
7 // Use of this source code is governed by a BSD-style license that can be
8 // found in the LICENSE file. See the AUTHORS file for names of contributors.
20 #include "db/column_family.h"
21 #include "db/dbformat.h"
22 #include "db/flush_scheduler.h"
23 #include "db/internal_stats.h"
24 #include "db/job_context.h"
25 #include "db/log_writer.h"
26 #include "db/logs_with_prep_tracker.h"
27 #include "db/memtable_list.h"
28 #include "db/snapshot_impl.h"
29 #include "db/version_edit.h"
30 #include "db/write_controller.h"
31 #include "db/write_thread.h"
32 #include "logging/event_logger.h"
33 #include "monitoring/instrumented_mutex.h"
34 #include "options/db_options.h"
35 #include "port/port.h"
36 #include "rocksdb/db.h"
37 #include "rocksdb/env.h"
38 #include "rocksdb/listener.h"
39 #include "rocksdb/memtablerep.h"
40 #include "rocksdb/transaction_log.h"
41 #include "table/scoped_arena_iterator.h"
42 #include "util/autovector.h"
43 #include "util/stop_watch.h"
44 #include "util/thread_local.h"
46 namespace ROCKSDB_NAMESPACE
{
50 class SnapshotChecker
;
59 // TODO(icanadi) make effort to reduce number of parameters here
60 // IMPORTANT: mutable_cf_options needs to be alive while FlushJob is alive
61 FlushJob(const std::string
& dbname
, ColumnFamilyData
* cfd
,
62 const ImmutableDBOptions
& db_options
,
63 const MutableCFOptions
& mutable_cf_options
,
64 const uint64_t* max_memtable_id
, const FileOptions
& file_options
,
65 VersionSet
* versions
, InstrumentedMutex
* db_mutex
,
66 std::atomic
<bool>* shutting_down
,
67 std::vector
<SequenceNumber
> existing_snapshots
,
68 SequenceNumber earliest_write_conflict_snapshot
,
69 SnapshotChecker
* snapshot_checker
, JobContext
* job_context
,
70 LogBuffer
* log_buffer
, FSDirectory
* db_directory
,
71 FSDirectory
* output_file_directory
,
72 CompressionType output_compression
, Statistics
* stats
,
73 EventLogger
* event_logger
, bool measure_io_stats
,
74 const bool sync_output_directory
, const bool write_manifest
,
75 Env::Priority thread_pri
, const std::shared_ptr
<IOTracer
>& io_tracer
,
76 const std::string
& db_id
= "", const std::string
& db_session_id
= "",
77 std::string full_history_ts_low
= "");
81 // Require db_mutex held.
82 // Once PickMemTable() is called, either Run() or Cancel() has to be called.
84 Status
Run(LogsWithPrepTracker
* prep_tracker
= nullptr,
85 FileMetaData
* file_meta
= nullptr);
87 const autovector
<MemTable
*>& GetMemTables() const { return mems_
; }
90 std::list
<std::unique_ptr
<FlushJobInfo
>>* GetCommittedFlushJobsInfo() {
91 return &committed_flush_jobs_info_
;
93 #endif // !ROCKSDB_LITE
95 // Return the IO status
96 IOStatus
io_status() const { return io_status_
; }
99 void ReportStartedFlush();
100 void ReportFlushInputSize(const autovector
<MemTable
*>& mems
);
101 void RecordFlushIOStats();
102 Status
WriteLevel0Table();
104 std::unique_ptr
<FlushJobInfo
> GetFlushJobInfo() const;
105 #endif // !ROCKSDB_LITE
107 const std::string
& dbname_
;
108 const std::string db_id_
;
109 const std::string db_session_id_
;
110 ColumnFamilyData
* cfd_
;
111 const ImmutableDBOptions
& db_options_
;
112 const MutableCFOptions
& mutable_cf_options_
;
113 // Pointer to a variable storing the largest memtable id to flush in this
114 // flush job. RocksDB uses this variable to select the memtables to flush in
115 // this job. All memtables in this column family with an ID smaller than or
116 // equal to *max_memtable_id_ will be selected for flush. If null, then all
117 // memtables in the column family will be selected.
118 const uint64_t* max_memtable_id_
;
119 const FileOptions file_options_
;
120 VersionSet
* versions_
;
121 InstrumentedMutex
* db_mutex_
;
122 std::atomic
<bool>* shutting_down_
;
123 std::vector
<SequenceNumber
> existing_snapshots_
;
124 SequenceNumber earliest_write_conflict_snapshot_
;
125 SnapshotChecker
* snapshot_checker_
;
126 JobContext
* job_context_
;
127 LogBuffer
* log_buffer_
;
128 FSDirectory
* db_directory_
;
129 FSDirectory
* output_file_directory_
;
130 CompressionType output_compression_
;
132 EventLogger
* event_logger_
;
133 TableProperties table_properties_
;
134 bool measure_io_stats_
;
135 // True if this flush job should call fsync on the output directory. False
137 // Usually sync_output_directory_ is true. A flush job needs to call sync on
138 // the output directory before committing to the MANIFEST.
139 // However, an individual flush job does not have to call sync on the output
140 // directory if it is part of an atomic flush. After all flush jobs in the
141 // atomic flush succeed, call sync once on each distinct output directory.
142 const bool sync_output_directory_
;
143 // True if this flush job should write to MANIFEST after successfully
144 // flushing memtables. False otherwise.
145 // Usually write_manifest_ is true. A flush job commits to the MANIFEST after
146 // flushing the memtables.
147 // However, an individual flush job cannot rashly write to the MANIFEST
148 // immediately after it finishes the flush if it is part of an atomic flush.
149 // In this case, only after all flush jobs succeed in flush can RocksDB
150 // commit to the MANIFEST.
151 const bool write_manifest_
;
152 // The current flush job can commit flush result of a concurrent flush job.
153 // We collect FlushJobInfo of all jobs committed by current job and fire
154 // OnFlushCompleted for them.
155 std::list
<std::unique_ptr
<FlushJobInfo
>> committed_flush_jobs_info_
;
157 // Variables below are set by PickMemTable():
159 autovector
<MemTable
*> mems_
;
162 bool pick_memtable_called
;
163 Env::Priority thread_pri_
;
166 const std::shared_ptr
<IOTracer
> io_tracer_
;
168 const std::string full_history_ts_low_
;
171 } // namespace ROCKSDB_NAMESPACE