]> git.proxmox.com Git - ceph.git/blob - ceph/src/rocksdb/db/flush_job.h
import quincy beta 17.1.0
[ceph.git] / ceph / src / rocksdb / db / flush_job.h
1 // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
2 // This source code is licensed under both the GPLv2 (found in the
3 // COPYING file in the root directory) and Apache 2.0 License
4 // (found in the LICENSE.Apache file in the root directory).
5 //
6 // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
7 // Use of this source code is governed by a BSD-style license that can be
8 // found in the LICENSE file. See the AUTHORS file for names of contributors.
9 #pragma once
10
11 #include <atomic>
12 #include <deque>
13 #include <limits>
14 #include <list>
15 #include <set>
16 #include <string>
17 #include <utility>
18 #include <vector>
19
20 #include "db/column_family.h"
21 #include "db/dbformat.h"
22 #include "db/flush_scheduler.h"
23 #include "db/internal_stats.h"
24 #include "db/job_context.h"
25 #include "db/log_writer.h"
26 #include "db/logs_with_prep_tracker.h"
27 #include "db/memtable_list.h"
28 #include "db/snapshot_impl.h"
29 #include "db/version_edit.h"
30 #include "db/write_controller.h"
31 #include "db/write_thread.h"
32 #include "logging/event_logger.h"
33 #include "monitoring/instrumented_mutex.h"
34 #include "options/db_options.h"
35 #include "port/port.h"
36 #include "rocksdb/db.h"
37 #include "rocksdb/env.h"
38 #include "rocksdb/listener.h"
39 #include "rocksdb/memtablerep.h"
40 #include "rocksdb/transaction_log.h"
41 #include "table/scoped_arena_iterator.h"
42 #include "util/autovector.h"
43 #include "util/stop_watch.h"
44 #include "util/thread_local.h"
45
46 namespace ROCKSDB_NAMESPACE {
47
48 class DBImpl;
49 class MemTable;
50 class SnapshotChecker;
51 class TableCache;
52 class Version;
53 class VersionEdit;
54 class VersionSet;
55 class Arena;
56
57 class FlushJob {
58 public:
59 // TODO(icanadi) make effort to reduce number of parameters here
60 // IMPORTANT: mutable_cf_options needs to be alive while FlushJob is alive
61 FlushJob(const std::string& dbname, ColumnFamilyData* cfd,
62 const ImmutableDBOptions& db_options,
63 const MutableCFOptions& mutable_cf_options,
64 const uint64_t* max_memtable_id, const FileOptions& file_options,
65 VersionSet* versions, InstrumentedMutex* db_mutex,
66 std::atomic<bool>* shutting_down,
67 std::vector<SequenceNumber> existing_snapshots,
68 SequenceNumber earliest_write_conflict_snapshot,
69 SnapshotChecker* snapshot_checker, JobContext* job_context,
70 LogBuffer* log_buffer, FSDirectory* db_directory,
71 FSDirectory* output_file_directory,
72 CompressionType output_compression, Statistics* stats,
73 EventLogger* event_logger, bool measure_io_stats,
74 const bool sync_output_directory, const bool write_manifest,
75 Env::Priority thread_pri, const std::shared_ptr<IOTracer>& io_tracer,
76 const std::string& db_id = "", const std::string& db_session_id = "",
77 std::string full_history_ts_low = "");
78
79 ~FlushJob();
80
81 // Require db_mutex held.
82 // Once PickMemTable() is called, either Run() or Cancel() has to be called.
83 void PickMemTable();
84 Status Run(LogsWithPrepTracker* prep_tracker = nullptr,
85 FileMetaData* file_meta = nullptr);
86 void Cancel();
87 const autovector<MemTable*>& GetMemTables() const { return mems_; }
88
89 #ifndef ROCKSDB_LITE
90 std::list<std::unique_ptr<FlushJobInfo>>* GetCommittedFlushJobsInfo() {
91 return &committed_flush_jobs_info_;
92 }
93 #endif // !ROCKSDB_LITE
94
95 // Return the IO status
96 IOStatus io_status() const { return io_status_; }
97
98 private:
99 void ReportStartedFlush();
100 void ReportFlushInputSize(const autovector<MemTable*>& mems);
101 void RecordFlushIOStats();
102 Status WriteLevel0Table();
103 #ifndef ROCKSDB_LITE
104 std::unique_ptr<FlushJobInfo> GetFlushJobInfo() const;
105 #endif // !ROCKSDB_LITE
106
107 const std::string& dbname_;
108 const std::string db_id_;
109 const std::string db_session_id_;
110 ColumnFamilyData* cfd_;
111 const ImmutableDBOptions& db_options_;
112 const MutableCFOptions& mutable_cf_options_;
113 // Pointer to a variable storing the largest memtable id to flush in this
114 // flush job. RocksDB uses this variable to select the memtables to flush in
115 // this job. All memtables in this column family with an ID smaller than or
116 // equal to *max_memtable_id_ will be selected for flush. If null, then all
117 // memtables in the column family will be selected.
118 const uint64_t* max_memtable_id_;
119 const FileOptions file_options_;
120 VersionSet* versions_;
121 InstrumentedMutex* db_mutex_;
122 std::atomic<bool>* shutting_down_;
123 std::vector<SequenceNumber> existing_snapshots_;
124 SequenceNumber earliest_write_conflict_snapshot_;
125 SnapshotChecker* snapshot_checker_;
126 JobContext* job_context_;
127 LogBuffer* log_buffer_;
128 FSDirectory* db_directory_;
129 FSDirectory* output_file_directory_;
130 CompressionType output_compression_;
131 Statistics* stats_;
132 EventLogger* event_logger_;
133 TableProperties table_properties_;
134 bool measure_io_stats_;
135 // True if this flush job should call fsync on the output directory. False
136 // otherwise.
137 // Usually sync_output_directory_ is true. A flush job needs to call sync on
138 // the output directory before committing to the MANIFEST.
139 // However, an individual flush job does not have to call sync on the output
140 // directory if it is part of an atomic flush. After all flush jobs in the
141 // atomic flush succeed, call sync once on each distinct output directory.
142 const bool sync_output_directory_;
143 // True if this flush job should write to MANIFEST after successfully
144 // flushing memtables. False otherwise.
145 // Usually write_manifest_ is true. A flush job commits to the MANIFEST after
146 // flushing the memtables.
147 // However, an individual flush job cannot rashly write to the MANIFEST
148 // immediately after it finishes the flush if it is part of an atomic flush.
149 // In this case, only after all flush jobs succeed in flush can RocksDB
150 // commit to the MANIFEST.
151 const bool write_manifest_;
152 // The current flush job can commit flush result of a concurrent flush job.
153 // We collect FlushJobInfo of all jobs committed by current job and fire
154 // OnFlushCompleted for them.
155 std::list<std::unique_ptr<FlushJobInfo>> committed_flush_jobs_info_;
156
157 // Variables below are set by PickMemTable():
158 FileMetaData meta_;
159 autovector<MemTable*> mems_;
160 VersionEdit* edit_;
161 Version* base_;
162 bool pick_memtable_called;
163 Env::Priority thread_pri_;
164 IOStatus io_status_;
165
166 const std::shared_ptr<IOTracer> io_tracer_;
167
168 const std::string full_history_ts_low_;
169 };
170
171 } // namespace ROCKSDB_NAMESPACE