1 // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
2 // This source code is licensed under both the GPLv2 (found in the
3 // COPYING file in the root directory) and Apache 2.0 License
4 // (found in the LICENSE.Apache file in the root directory).
8 #include <unordered_set>
11 #include "db/column_family.h"
12 #include "db/dbformat.h"
13 #include "db/internal_stats.h"
14 #include "db/snapshot_impl.h"
15 #include "options/db_options.h"
16 #include "rocksdb/db.h"
17 #include "rocksdb/env.h"
18 #include "rocksdb/sst_file_writer.h"
19 #include "util/autovector.h"
23 struct IngestedFileInfo
{
25 std::string external_file_path
;
26 // Smallest user key in external file
27 std::string smallest_user_key
;
28 // Largest user key in external file
29 std::string largest_user_key
;
30 // Sequence number for keys in external file
31 SequenceNumber original_seqno
;
32 // Offset of the global sequence number field in the file, will
33 // be zero if version is 1 (global seqno is not supported)
34 size_t global_seqno_offset
;
37 // total number of keys in external file
39 // total number of range deletions in external file
40 uint64_t num_range_deletions
;
41 // Id of column family this file shoule be ingested into
43 // TableProperties read from external file
44 TableProperties table_properties
;
45 // Version of external file
48 // FileDescriptor for the file inside the DB
50 // file path that we picked for file inside the DB
51 std::string internal_file_path
;
52 // Global sequence number that we picked for the file inside the DB
53 SequenceNumber assigned_seqno
= 0;
54 // Level inside the DB we picked for the external file.
56 // Whether to copy or link the external sst file. copy_file will be set to
57 // false if ingestion_options.move_files is true and underlying FS
58 // supports link operation. Need to provide a default value to make the
59 // undefined-behavior sanity check of llvm happy. Since
60 // ingestion_options.move_files is false by default, thus copy_file is true
62 bool copy_file
= true;
64 InternalKey
smallest_internal_key() const {
65 return InternalKey(smallest_user_key
, assigned_seqno
,
66 ValueType::kTypeValue
);
69 InternalKey
largest_internal_key() const {
70 return InternalKey(largest_user_key
, assigned_seqno
, ValueType::kTypeValue
);
74 class ExternalSstFileIngestionJob
{
76 ExternalSstFileIngestionJob(
77 Env
* env
, VersionSet
* versions
, ColumnFamilyData
* cfd
,
78 const ImmutableDBOptions
& db_options
, const EnvOptions
& env_options
,
79 SnapshotList
* db_snapshots
,
80 const IngestExternalFileOptions
& ingestion_options
)
84 db_options_(db_options
),
85 env_options_(env_options
),
86 db_snapshots_(db_snapshots
),
87 ingestion_options_(ingestion_options
),
88 job_start_time_(env_
->NowMicros()),
89 consumed_seqno_(false) {}
91 // Prepare the job by copying external files into the DB.
92 Status
Prepare(const std::vector
<std::string
>& external_files_paths
,
93 uint64_t next_file_number
, SuperVersion
* sv
);
95 // Check if we need to flush the memtable before running the ingestion job
96 // This will be true if the files we are ingesting are overlapping with any
97 // key range in the memtable.
99 // @param super_version A referenced SuperVersion that will be held for the
100 // duration of this function.
103 Status
NeedsFlush(bool* flush_needed
, SuperVersion
* super_version
);
105 // Will execute the ingestion job and prepare edit() to be applied.
106 // REQUIRES: Mutex held
109 // Update column family stats.
110 // REQUIRES: Mutex held
113 // Cleanup after successful/failed job
114 void Cleanup(const Status
& status
);
116 VersionEdit
* edit() { return &edit_
; }
118 const autovector
<IngestedFileInfo
>& files_to_ingest() const {
119 return files_to_ingest_
;
122 // Whether to increment VersionSet's seqno after this job runs
123 bool ShouldIncrementLastSequence() const { return consumed_seqno_
; }
126 // Open the external file and populate `file_to_ingest` with all the
127 // external information we need to ingest this file.
128 Status
GetIngestedFileInfo(const std::string
& external_file
,
129 IngestedFileInfo
* file_to_ingest
,
132 // Assign `file_to_ingest` the appropriate sequence number and the lowest
133 // possible level that it can be ingested to according to compaction_style.
134 // REQUIRES: Mutex held
135 Status
AssignLevelAndSeqnoForIngestedFile(SuperVersion
* sv
,
136 bool force_global_seqno
,
137 CompactionStyle compaction_style
,
138 IngestedFileInfo
* file_to_ingest
,
139 SequenceNumber
* assigned_seqno
);
141 // File that we want to ingest behind always goes to the lowest level;
142 // we just check that it fits in the level, that DB allows ingest_behind,
143 // and that we don't have 0 seqnums at the upper levels.
144 // REQUIRES: Mutex held
145 Status
CheckLevelForIngestedBehindFile(IngestedFileInfo
* file_to_ingest
);
147 // Set the file global sequence number to `seqno`
148 Status
AssignGlobalSeqnoForIngestedFile(IngestedFileInfo
* file_to_ingest
,
149 SequenceNumber seqno
);
151 // Check if `file_to_ingest` can fit in level `level`
152 // REQUIRES: Mutex held
153 bool IngestedFileFitInLevel(const IngestedFileInfo
* file_to_ingest
,
157 VersionSet
* versions_
;
158 ColumnFamilyData
* cfd_
;
159 const ImmutableDBOptions
& db_options_
;
160 const EnvOptions
& env_options_
;
161 SnapshotList
* db_snapshots_
;
162 autovector
<IngestedFileInfo
> files_to_ingest_
;
163 const IngestExternalFileOptions
& ingestion_options_
;
165 uint64_t job_start_time_
;
166 bool consumed_seqno_
;
169 } // namespace rocksdb