]>
Commit | Line | Data |
---|---|---|
7c673cae | 1 | // Copyright (c) 2011-present, Facebook, Inc. All rights reserved. |
11fdf7f2 TL |
2 | // This source code is licensed under both the GPLv2 (found in the |
3 | // COPYING file in the root directory) and Apache 2.0 License | |
4 | // (found in the LICENSE.Apache file in the root directory). | |
7c673cae FG |
5 | // |
6 | // Copyright (c) 2011 The LevelDB Authors. All rights reserved. | |
7 | // Use of this source code is governed by a BSD-style license that can be | |
8 | // found in the LICENSE file. See the AUTHORS file for names of contributors. | |
9 | #pragma once | |
10 | ||
11 | #include <atomic> | |
12 | #include <deque> | |
13 | #include <limits> | |
f67539c2 | 14 | #include <list> |
7c673cae | 15 | #include <set> |
f67539c2 | 16 | #include <string> |
7c673cae FG |
17 | #include <utility> |
18 | #include <vector> | |
7c673cae FG |
19 | |
20 | #include "db/column_family.h" | |
21 | #include "db/dbformat.h" | |
22 | #include "db/flush_scheduler.h" | |
23 | #include "db/internal_stats.h" | |
24 | #include "db/job_context.h" | |
25 | #include "db/log_writer.h" | |
11fdf7f2 | 26 | #include "db/logs_with_prep_tracker.h" |
7c673cae FG |
27 | #include "db/memtable_list.h" |
28 | #include "db/snapshot_impl.h" | |
29 | #include "db/version_edit.h" | |
30 | #include "db/write_controller.h" | |
31 | #include "db/write_thread.h" | |
f67539c2 | 32 | #include "logging/event_logger.h" |
7c673cae FG |
33 | #include "monitoring/instrumented_mutex.h" |
34 | #include "options/db_options.h" | |
35 | #include "port/port.h" | |
36 | #include "rocksdb/db.h" | |
37 | #include "rocksdb/env.h" | |
f67539c2 | 38 | #include "rocksdb/listener.h" |
7c673cae FG |
39 | #include "rocksdb/memtablerep.h" |
40 | #include "rocksdb/transaction_log.h" | |
41 | #include "table/scoped_arena_iterator.h" | |
42 | #include "util/autovector.h" | |
7c673cae FG |
43 | #include "util/stop_watch.h" |
44 | #include "util/thread_local.h" | |
45 | ||
f67539c2 | 46 | namespace ROCKSDB_NAMESPACE { |
7c673cae | 47 | |
11fdf7f2 | 48 | class DBImpl; |
7c673cae | 49 | class MemTable; |
11fdf7f2 | 50 | class SnapshotChecker; |
7c673cae FG |
51 | class TableCache; |
52 | class Version; | |
53 | class VersionEdit; | |
54 | class VersionSet; | |
55 | class Arena; | |
56 | ||
57 | class FlushJob { | |
58 | public: | |
59 | // TODO(icanadi) make effort to reduce number of parameters here | |
60 | // IMPORTANT: mutable_cf_options needs to be alive while FlushJob is alive | |
61 | FlushJob(const std::string& dbname, ColumnFamilyData* cfd, | |
62 | const ImmutableDBOptions& db_options, | |
63 | const MutableCFOptions& mutable_cf_options, | |
f67539c2 | 64 | const uint64_t* max_memtable_id, const FileOptions& file_options, |
494da23a TL |
65 | VersionSet* versions, InstrumentedMutex* db_mutex, |
66 | std::atomic<bool>* shutting_down, | |
7c673cae FG |
67 | std::vector<SequenceNumber> existing_snapshots, |
68 | SequenceNumber earliest_write_conflict_snapshot, | |
11fdf7f2 | 69 | SnapshotChecker* snapshot_checker, JobContext* job_context, |
20effc67 TL |
70 | LogBuffer* log_buffer, FSDirectory* db_directory, |
71 | FSDirectory* output_file_directory, | |
72 | CompressionType output_compression, Statistics* stats, | |
73 | EventLogger* event_logger, bool measure_io_stats, | |
494da23a | 74 | const bool sync_output_directory, const bool write_manifest, |
20effc67 TL |
75 | Env::Priority thread_pri, const std::shared_ptr<IOTracer>& io_tracer, |
76 | const std::string& db_id = "", const std::string& db_session_id = "", | |
77 | std::string full_history_ts_low = ""); | |
7c673cae FG |
78 | |
79 | ~FlushJob(); | |
80 | ||
81 | // Require db_mutex held. | |
11fdf7f2 | 82 | // Once PickMemTable() is called, either Run() or Cancel() has to be called. |
7c673cae | 83 | void PickMemTable(); |
11fdf7f2 TL |
84 | Status Run(LogsWithPrepTracker* prep_tracker = nullptr, |
85 | FileMetaData* file_meta = nullptr); | |
7c673cae | 86 | void Cancel(); |
494da23a | 87 | const autovector<MemTable*>& GetMemTables() const { return mems_; } |
7c673cae | 88 | |
f67539c2 TL |
89 | #ifndef ROCKSDB_LITE |
90 | std::list<std::unique_ptr<FlushJobInfo>>* GetCommittedFlushJobsInfo() { | |
91 | return &committed_flush_jobs_info_; | |
92 | } | |
93 | #endif // !ROCKSDB_LITE | |
94 | ||
20effc67 TL |
95 | // Return the IO status |
96 | IOStatus io_status() const { return io_status_; } | |
97 | ||
7c673cae FG |
98 | private: |
99 | void ReportStartedFlush(); | |
100 | void ReportFlushInputSize(const autovector<MemTable*>& mems); | |
101 | void RecordFlushIOStats(); | |
102 | Status WriteLevel0Table(); | |
f67539c2 TL |
103 | #ifndef ROCKSDB_LITE |
104 | std::unique_ptr<FlushJobInfo> GetFlushJobInfo() const; | |
105 | #endif // !ROCKSDB_LITE | |
494da23a | 106 | |
7c673cae | 107 | const std::string& dbname_; |
20effc67 TL |
108 | const std::string db_id_; |
109 | const std::string db_session_id_; | |
7c673cae FG |
110 | ColumnFamilyData* cfd_; |
111 | const ImmutableDBOptions& db_options_; | |
112 | const MutableCFOptions& mutable_cf_options_; | |
494da23a TL |
113 | // Pointer to a variable storing the largest memtable id to flush in this |
114 | // flush job. RocksDB uses this variable to select the memtables to flush in | |
115 | // this job. All memtables in this column family with an ID smaller than or | |
116 | // equal to *max_memtable_id_ will be selected for flush. If null, then all | |
117 | // memtables in the column family will be selected. | |
118 | const uint64_t* max_memtable_id_; | |
f67539c2 | 119 | const FileOptions file_options_; |
7c673cae FG |
120 | VersionSet* versions_; |
121 | InstrumentedMutex* db_mutex_; | |
122 | std::atomic<bool>* shutting_down_; | |
123 | std::vector<SequenceNumber> existing_snapshots_; | |
124 | SequenceNumber earliest_write_conflict_snapshot_; | |
11fdf7f2 | 125 | SnapshotChecker* snapshot_checker_; |
7c673cae FG |
126 | JobContext* job_context_; |
127 | LogBuffer* log_buffer_; | |
20effc67 TL |
128 | FSDirectory* db_directory_; |
129 | FSDirectory* output_file_directory_; | |
7c673cae FG |
130 | CompressionType output_compression_; |
131 | Statistics* stats_; | |
132 | EventLogger* event_logger_; | |
133 | TableProperties table_properties_; | |
134 | bool measure_io_stats_; | |
494da23a TL |
135 | // True if this flush job should call fsync on the output directory. False |
136 | // otherwise. | |
137 | // Usually sync_output_directory_ is true. A flush job needs to call sync on | |
138 | // the output directory before committing to the MANIFEST. | |
139 | // However, an individual flush job does not have to call sync on the output | |
140 | // directory if it is part of an atomic flush. After all flush jobs in the | |
141 | // atomic flush succeed, call sync once on each distinct output directory. | |
142 | const bool sync_output_directory_; | |
143 | // True if this flush job should write to MANIFEST after successfully | |
144 | // flushing memtables. False otherwise. | |
145 | // Usually write_manifest_ is true. A flush job commits to the MANIFEST after | |
146 | // flushing the memtables. | |
147 | // However, an individual flush job cannot rashly write to the MANIFEST | |
148 | // immediately after it finishes the flush if it is part of an atomic flush. | |
149 | // In this case, only after all flush jobs succeed in flush can RocksDB | |
150 | // commit to the MANIFEST. | |
151 | const bool write_manifest_; | |
f67539c2 TL |
152 | // The current flush job can commit flush result of a concurrent flush job. |
153 | // We collect FlushJobInfo of all jobs committed by current job and fire | |
154 | // OnFlushCompleted for them. | |
155 | std::list<std::unique_ptr<FlushJobInfo>> committed_flush_jobs_info_; | |
7c673cae FG |
156 | |
157 | // Variables below are set by PickMemTable(): | |
158 | FileMetaData meta_; | |
159 | autovector<MemTable*> mems_; | |
160 | VersionEdit* edit_; | |
161 | Version* base_; | |
162 | bool pick_memtable_called; | |
494da23a | 163 | Env::Priority thread_pri_; |
20effc67 TL |
164 | IOStatus io_status_; |
165 | ||
166 | const std::shared_ptr<IOTracer> io_tracer_; | |
167 | ||
168 | const std::string full_history_ts_low_; | |
7c673cae FG |
169 | }; |
170 | ||
f67539c2 | 171 | } // namespace ROCKSDB_NAMESPACE |