1 // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
2 // This source code is licensed under both the GPLv2 (found in the
3 // COPYING file in the root directory) and Apache 2.0 License
4 // (found in the LICENSE.Apache file in the root directory).
8 #include <unordered_set>
11 #include "db/column_family.h"
12 #include "db/dbformat.h"
13 #include "db/internal_stats.h"
14 #include "db/snapshot_impl.h"
15 #include "env/file_system_tracer.h"
16 #include "logging/event_logger.h"
17 #include "options/db_options.h"
18 #include "rocksdb/db.h"
19 #include "rocksdb/env.h"
20 #include "rocksdb/sst_file_writer.h"
21 #include "util/autovector.h"
23 namespace ROCKSDB_NAMESPACE
{
27 struct IngestedFileInfo
{
29 std::string external_file_path
;
30 // Smallest internal key in external file
31 InternalKey smallest_internal_key
;
32 // Largest internal key in external file
33 InternalKey largest_internal_key
;
34 // Sequence number for keys in external file
35 SequenceNumber original_seqno
;
36 // Offset of the global sequence number field in the file, will
37 // be zero if version is 1 (global seqno is not supported)
38 size_t global_seqno_offset
;
41 // total number of keys in external file
43 // total number of range deletions in external file
44 uint64_t num_range_deletions
;
45 // Id of column family this file shoule be ingested into
47 // TableProperties read from external file
48 TableProperties table_properties
;
49 // Version of external file
52 // FileDescriptor for the file inside the DB
54 // file path that we picked for file inside the DB
55 std::string internal_file_path
;
56 // Global sequence number that we picked for the file inside the DB
57 SequenceNumber assigned_seqno
= 0;
58 // Level inside the DB we picked for the external file.
60 // Whether to copy or link the external sst file. copy_file will be set to
61 // false if ingestion_options.move_files is true and underlying FS
62 // supports link operation. Need to provide a default value to make the
63 // undefined-behavior sanity check of llvm happy. Since
64 // ingestion_options.move_files is false by default, thus copy_file is true
66 bool copy_file
= true;
67 // The checksum of ingested file
68 std::string file_checksum
;
69 // The name of checksum function that generate the checksum
70 std::string file_checksum_func_name
;
73 class ExternalSstFileIngestionJob
{
75 ExternalSstFileIngestionJob(
76 Env
* env
, VersionSet
* versions
, ColumnFamilyData
* cfd
,
77 const ImmutableDBOptions
& db_options
, const EnvOptions
& env_options
,
78 SnapshotList
* db_snapshots
,
79 const IngestExternalFileOptions
& ingestion_options
,
80 Directories
* directories
, EventLogger
* event_logger
,
81 const std::shared_ptr
<IOTracer
>& io_tracer
)
83 fs_(db_options
.fs
, io_tracer
),
86 db_options_(db_options
),
87 env_options_(env_options
),
88 db_snapshots_(db_snapshots
),
89 ingestion_options_(ingestion_options
),
90 directories_(directories
),
91 event_logger_(event_logger
),
92 job_start_time_(env_
->NowMicros()),
93 consumed_seqno_count_(0),
94 io_tracer_(io_tracer
) {
95 assert(directories
!= nullptr);
98 // Prepare the job by copying external files into the DB.
99 Status
Prepare(const std::vector
<std::string
>& external_files_paths
,
100 const std::vector
<std::string
>& files_checksums
,
101 const std::vector
<std::string
>& files_checksum_func_names
,
102 uint64_t next_file_number
, SuperVersion
* sv
);
104 // Check if we need to flush the memtable before running the ingestion job
105 // This will be true if the files we are ingesting are overlapping with any
106 // key range in the memtable.
108 // @param super_version A referenced SuperVersion that will be held for the
109 // duration of this function.
112 Status
NeedsFlush(bool* flush_needed
, SuperVersion
* super_version
);
114 // Will execute the ingestion job and prepare edit() to be applied.
115 // REQUIRES: Mutex held
118 // Update column family stats.
119 // REQUIRES: Mutex held
122 // Cleanup after successful/failed job
123 void Cleanup(const Status
& status
);
125 VersionEdit
* edit() { return &edit_
; }
127 const autovector
<IngestedFileInfo
>& files_to_ingest() const {
128 return files_to_ingest_
;
131 // How many sequence numbers did we consume as part of the ingest job?
132 int ConsumedSequenceNumbersCount() const { return consumed_seqno_count_
; }
135 // Open the external file and populate `file_to_ingest` with all the
136 // external information we need to ingest this file.
137 Status
GetIngestedFileInfo(const std::string
& external_file
,
138 IngestedFileInfo
* file_to_ingest
,
141 // Assign `file_to_ingest` the appropriate sequence number and the lowest
142 // possible level that it can be ingested to according to compaction_style.
143 // REQUIRES: Mutex held
144 Status
AssignLevelAndSeqnoForIngestedFile(SuperVersion
* sv
,
145 bool force_global_seqno
,
146 CompactionStyle compaction_style
,
147 SequenceNumber last_seqno
,
148 IngestedFileInfo
* file_to_ingest
,
149 SequenceNumber
* assigned_seqno
);
151 // File that we want to ingest behind always goes to the lowest level;
152 // we just check that it fits in the level, that DB allows ingest_behind,
153 // and that we don't have 0 seqnums at the upper levels.
154 // REQUIRES: Mutex held
155 Status
CheckLevelForIngestedBehindFile(IngestedFileInfo
* file_to_ingest
);
157 // Set the file global sequence number to `seqno`
158 Status
AssignGlobalSeqnoForIngestedFile(IngestedFileInfo
* file_to_ingest
,
159 SequenceNumber seqno
);
160 // Generate the file checksum and store in the IngestedFileInfo
161 IOStatus
GenerateChecksumForIngestedFile(IngestedFileInfo
* file_to_ingest
);
163 // Check if `file_to_ingest` can fit in level `level`
164 // REQUIRES: Mutex held
165 bool IngestedFileFitInLevel(const IngestedFileInfo
* file_to_ingest
,
168 // Helper method to sync given file.
169 template <typename TWritableFile
>
170 Status
SyncIngestedFile(TWritableFile
* file
);
174 VersionSet
* versions_
;
175 ColumnFamilyData
* cfd_
;
176 const ImmutableDBOptions
& db_options_
;
177 const EnvOptions
& env_options_
;
178 SnapshotList
* db_snapshots_
;
179 autovector
<IngestedFileInfo
> files_to_ingest_
;
180 const IngestExternalFileOptions
& ingestion_options_
;
181 Directories
* directories_
;
182 EventLogger
* event_logger_
;
184 uint64_t job_start_time_
;
185 int consumed_seqno_count_
;
186 // Set in ExternalSstFileIngestionJob::Prepare(), if true all files are
188 bool files_overlap_
{false};
189 // Set in ExternalSstFileIngestionJob::Prepare(), if true and DB
190 // file_checksum_gen_factory is set, DB will generate checksum each file.
191 bool need_generate_file_checksum_
{true};
192 std::shared_ptr
<IOTracer
> io_tracer_
;
195 } // namespace ROCKSDB_NAMESPACE