]> git.proxmox.com Git - ceph.git/blob - ceph/src/rocksdb/db/external_sst_file_ingestion_job.h
import 14.2.4 nautilus point release
[ceph.git] / ceph / src / rocksdb / db / external_sst_file_ingestion_job.h
1 // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
2 // This source code is licensed under both the GPLv2 (found in the
3 // COPYING file in the root directory) and Apache 2.0 License
4 // (found in the LICENSE.Apache file in the root directory).
5
6 #pragma once
7 #include <string>
8 #include <unordered_set>
9 #include <vector>
10
11 #include "db/column_family.h"
12 #include "db/dbformat.h"
13 #include "db/internal_stats.h"
14 #include "db/snapshot_impl.h"
15 #include "options/db_options.h"
16 #include "rocksdb/db.h"
17 #include "rocksdb/env.h"
18 #include "rocksdb/sst_file_writer.h"
19 #include "util/autovector.h"
20
21 namespace rocksdb {
22
23 struct IngestedFileInfo {
24 // External file path
25 std::string external_file_path;
26 // Smallest user key in external file
27 std::string smallest_user_key;
28 // Largest user key in external file
29 std::string largest_user_key;
30 // Sequence number for keys in external file
31 SequenceNumber original_seqno;
32 // Offset of the global sequence number field in the file, will
33 // be zero if version is 1 (global seqno is not supported)
34 size_t global_seqno_offset;
35 // External file size
36 uint64_t file_size;
37 // total number of keys in external file
38 uint64_t num_entries;
39 // total number of range deletions in external file
40 uint64_t num_range_deletions;
41 // Id of column family this file shoule be ingested into
42 uint32_t cf_id;
43 // TableProperties read from external file
44 TableProperties table_properties;
45 // Version of external file
46 int version;
47
48 // FileDescriptor for the file inside the DB
49 FileDescriptor fd;
50 // file path that we picked for file inside the DB
51 std::string internal_file_path;
52 // Global sequence number that we picked for the file inside the DB
53 SequenceNumber assigned_seqno = 0;
54 // Level inside the DB we picked for the external file.
55 int picked_level = 0;
56 // Whether to copy or link the external sst file. copy_file will be set to
57 // false if ingestion_options.move_files is true and underlying FS
58 // supports link operation. Need to provide a default value to make the
59 // undefined-behavior sanity check of llvm happy. Since
60 // ingestion_options.move_files is false by default, thus copy_file is true
61 // by default.
62 bool copy_file = true;
63
64 InternalKey smallest_internal_key() const {
65 return InternalKey(smallest_user_key, assigned_seqno,
66 ValueType::kTypeValue);
67 }
68
69 InternalKey largest_internal_key() const {
70 return InternalKey(largest_user_key, assigned_seqno, ValueType::kTypeValue);
71 }
72 };
73
74 class ExternalSstFileIngestionJob {
75 public:
76 ExternalSstFileIngestionJob(
77 Env* env, VersionSet* versions, ColumnFamilyData* cfd,
78 const ImmutableDBOptions& db_options, const EnvOptions& env_options,
79 SnapshotList* db_snapshots,
80 const IngestExternalFileOptions& ingestion_options)
81 : env_(env),
82 versions_(versions),
83 cfd_(cfd),
84 db_options_(db_options),
85 env_options_(env_options),
86 db_snapshots_(db_snapshots),
87 ingestion_options_(ingestion_options),
88 job_start_time_(env_->NowMicros()),
89 consumed_seqno_(false) {}
90
91 // Prepare the job by copying external files into the DB.
92 Status Prepare(const std::vector<std::string>& external_files_paths,
93 uint64_t next_file_number, SuperVersion* sv);
94
95 // Check if we need to flush the memtable before running the ingestion job
96 // This will be true if the files we are ingesting are overlapping with any
97 // key range in the memtable.
98 //
99 // @param super_version A referenced SuperVersion that will be held for the
100 // duration of this function.
101 //
102 // Thread-safe
103 Status NeedsFlush(bool* flush_needed, SuperVersion* super_version);
104
105 // Will execute the ingestion job and prepare edit() to be applied.
106 // REQUIRES: Mutex held
107 Status Run();
108
109 // Update column family stats.
110 // REQUIRES: Mutex held
111 void UpdateStats();
112
113 // Cleanup after successful/failed job
114 void Cleanup(const Status& status);
115
116 VersionEdit* edit() { return &edit_; }
117
118 const autovector<IngestedFileInfo>& files_to_ingest() const {
119 return files_to_ingest_;
120 }
121
122 // Whether to increment VersionSet's seqno after this job runs
123 bool ShouldIncrementLastSequence() const { return consumed_seqno_; }
124
125 private:
126 // Open the external file and populate `file_to_ingest` with all the
127 // external information we need to ingest this file.
128 Status GetIngestedFileInfo(const std::string& external_file,
129 IngestedFileInfo* file_to_ingest,
130 SuperVersion* sv);
131
132 // Assign `file_to_ingest` the appropriate sequence number and the lowest
133 // possible level that it can be ingested to according to compaction_style.
134 // REQUIRES: Mutex held
135 Status AssignLevelAndSeqnoForIngestedFile(SuperVersion* sv,
136 bool force_global_seqno,
137 CompactionStyle compaction_style,
138 IngestedFileInfo* file_to_ingest,
139 SequenceNumber* assigned_seqno);
140
141 // File that we want to ingest behind always goes to the lowest level;
142 // we just check that it fits in the level, that DB allows ingest_behind,
143 // and that we don't have 0 seqnums at the upper levels.
144 // REQUIRES: Mutex held
145 Status CheckLevelForIngestedBehindFile(IngestedFileInfo* file_to_ingest);
146
147 // Set the file global sequence number to `seqno`
148 Status AssignGlobalSeqnoForIngestedFile(IngestedFileInfo* file_to_ingest,
149 SequenceNumber seqno);
150
151 // Check if `file_to_ingest` can fit in level `level`
152 // REQUIRES: Mutex held
153 bool IngestedFileFitInLevel(const IngestedFileInfo* file_to_ingest,
154 int level);
155
156 Env* env_;
157 VersionSet* versions_;
158 ColumnFamilyData* cfd_;
159 const ImmutableDBOptions& db_options_;
160 const EnvOptions& env_options_;
161 SnapshotList* db_snapshots_;
162 autovector<IngestedFileInfo> files_to_ingest_;
163 const IngestExternalFileOptions& ingestion_options_;
164 VersionEdit edit_;
165 uint64_t job_start_time_;
166 bool consumed_seqno_;
167 };
168
169 } // namespace rocksdb