]> git.proxmox.com Git - ceph.git/blob - ceph/src/rocksdb/db/external_sst_file_ingestion_job.h
import quincy beta 17.1.0
[ceph.git] / ceph / src / rocksdb / db / external_sst_file_ingestion_job.h
1 // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
2 // This source code is licensed under both the GPLv2 (found in the
3 // COPYING file in the root directory) and Apache 2.0 License
4 // (found in the LICENSE.Apache file in the root directory).
5
6 #pragma once
7 #include <string>
8 #include <unordered_set>
9 #include <vector>
10
11 #include "db/column_family.h"
12 #include "db/dbformat.h"
13 #include "db/internal_stats.h"
14 #include "db/snapshot_impl.h"
15 #include "env/file_system_tracer.h"
16 #include "logging/event_logger.h"
17 #include "options/db_options.h"
18 #include "rocksdb/db.h"
19 #include "rocksdb/env.h"
20 #include "rocksdb/sst_file_writer.h"
21 #include "util/autovector.h"
22
23 namespace ROCKSDB_NAMESPACE {
24
25 class Directories;
26
27 struct IngestedFileInfo {
28 // External file path
29 std::string external_file_path;
30 // Smallest internal key in external file
31 InternalKey smallest_internal_key;
32 // Largest internal key in external file
33 InternalKey largest_internal_key;
34 // Sequence number for keys in external file
35 SequenceNumber original_seqno;
36 // Offset of the global sequence number field in the file, will
37 // be zero if version is 1 (global seqno is not supported)
38 size_t global_seqno_offset;
39 // External file size
40 uint64_t file_size;
41 // total number of keys in external file
42 uint64_t num_entries;
43 // total number of range deletions in external file
44 uint64_t num_range_deletions;
45 // Id of column family this file shoule be ingested into
46 uint32_t cf_id;
47 // TableProperties read from external file
48 TableProperties table_properties;
49 // Version of external file
50 int version;
51
52 // FileDescriptor for the file inside the DB
53 FileDescriptor fd;
54 // file path that we picked for file inside the DB
55 std::string internal_file_path;
56 // Global sequence number that we picked for the file inside the DB
57 SequenceNumber assigned_seqno = 0;
58 // Level inside the DB we picked for the external file.
59 int picked_level = 0;
60 // Whether to copy or link the external sst file. copy_file will be set to
61 // false if ingestion_options.move_files is true and underlying FS
62 // supports link operation. Need to provide a default value to make the
63 // undefined-behavior sanity check of llvm happy. Since
64 // ingestion_options.move_files is false by default, thus copy_file is true
65 // by default.
66 bool copy_file = true;
67 // The checksum of ingested file
68 std::string file_checksum;
69 // The name of checksum function that generate the checksum
70 std::string file_checksum_func_name;
71 };
72
73 class ExternalSstFileIngestionJob {
74 public:
75 ExternalSstFileIngestionJob(
76 Env* env, VersionSet* versions, ColumnFamilyData* cfd,
77 const ImmutableDBOptions& db_options, const EnvOptions& env_options,
78 SnapshotList* db_snapshots,
79 const IngestExternalFileOptions& ingestion_options,
80 Directories* directories, EventLogger* event_logger,
81 const std::shared_ptr<IOTracer>& io_tracer)
82 : env_(env),
83 fs_(db_options.fs, io_tracer),
84 versions_(versions),
85 cfd_(cfd),
86 db_options_(db_options),
87 env_options_(env_options),
88 db_snapshots_(db_snapshots),
89 ingestion_options_(ingestion_options),
90 directories_(directories),
91 event_logger_(event_logger),
92 job_start_time_(env_->NowMicros()),
93 consumed_seqno_count_(0),
94 io_tracer_(io_tracer) {
95 assert(directories != nullptr);
96 }
97
98 // Prepare the job by copying external files into the DB.
99 Status Prepare(const std::vector<std::string>& external_files_paths,
100 const std::vector<std::string>& files_checksums,
101 const std::vector<std::string>& files_checksum_func_names,
102 uint64_t next_file_number, SuperVersion* sv);
103
104 // Check if we need to flush the memtable before running the ingestion job
105 // This will be true if the files we are ingesting are overlapping with any
106 // key range in the memtable.
107 //
108 // @param super_version A referenced SuperVersion that will be held for the
109 // duration of this function.
110 //
111 // Thread-safe
112 Status NeedsFlush(bool* flush_needed, SuperVersion* super_version);
113
114 // Will execute the ingestion job and prepare edit() to be applied.
115 // REQUIRES: Mutex held
116 Status Run();
117
118 // Update column family stats.
119 // REQUIRES: Mutex held
120 void UpdateStats();
121
122 // Cleanup after successful/failed job
123 void Cleanup(const Status& status);
124
125 VersionEdit* edit() { return &edit_; }
126
127 const autovector<IngestedFileInfo>& files_to_ingest() const {
128 return files_to_ingest_;
129 }
130
131 // How many sequence numbers did we consume as part of the ingest job?
132 int ConsumedSequenceNumbersCount() const { return consumed_seqno_count_; }
133
134 private:
135 // Open the external file and populate `file_to_ingest` with all the
136 // external information we need to ingest this file.
137 Status GetIngestedFileInfo(const std::string& external_file,
138 IngestedFileInfo* file_to_ingest,
139 SuperVersion* sv);
140
141 // Assign `file_to_ingest` the appropriate sequence number and the lowest
142 // possible level that it can be ingested to according to compaction_style.
143 // REQUIRES: Mutex held
144 Status AssignLevelAndSeqnoForIngestedFile(SuperVersion* sv,
145 bool force_global_seqno,
146 CompactionStyle compaction_style,
147 SequenceNumber last_seqno,
148 IngestedFileInfo* file_to_ingest,
149 SequenceNumber* assigned_seqno);
150
151 // File that we want to ingest behind always goes to the lowest level;
152 // we just check that it fits in the level, that DB allows ingest_behind,
153 // and that we don't have 0 seqnums at the upper levels.
154 // REQUIRES: Mutex held
155 Status CheckLevelForIngestedBehindFile(IngestedFileInfo* file_to_ingest);
156
157 // Set the file global sequence number to `seqno`
158 Status AssignGlobalSeqnoForIngestedFile(IngestedFileInfo* file_to_ingest,
159 SequenceNumber seqno);
160 // Generate the file checksum and store in the IngestedFileInfo
161 IOStatus GenerateChecksumForIngestedFile(IngestedFileInfo* file_to_ingest);
162
163 // Check if `file_to_ingest` can fit in level `level`
164 // REQUIRES: Mutex held
165 bool IngestedFileFitInLevel(const IngestedFileInfo* file_to_ingest,
166 int level);
167
168 // Helper method to sync given file.
169 template <typename TWritableFile>
170 Status SyncIngestedFile(TWritableFile* file);
171
172 Env* env_;
173 FileSystemPtr fs_;
174 VersionSet* versions_;
175 ColumnFamilyData* cfd_;
176 const ImmutableDBOptions& db_options_;
177 const EnvOptions& env_options_;
178 SnapshotList* db_snapshots_;
179 autovector<IngestedFileInfo> files_to_ingest_;
180 const IngestExternalFileOptions& ingestion_options_;
181 Directories* directories_;
182 EventLogger* event_logger_;
183 VersionEdit edit_;
184 uint64_t job_start_time_;
185 int consumed_seqno_count_;
186 // Set in ExternalSstFileIngestionJob::Prepare(), if true all files are
187 // ingested in L0
188 bool files_overlap_{false};
189 // Set in ExternalSstFileIngestionJob::Prepare(), if true and DB
190 // file_checksum_gen_factory is set, DB will generate checksum each file.
191 bool need_generate_file_checksum_{true};
192 std::shared_ptr<IOTracer> io_tracer_;
193 };
194
195 } // namespace ROCKSDB_NAMESPACE