]>
Commit | Line | Data |
---|---|---|
7c673cae | 1 | // Copyright (c) 2011-present, Facebook, Inc. All rights reserved. |
11fdf7f2 TL |
2 | // This source code is licensed under both the GPLv2 (found in the |
3 | // COPYING file in the root directory) and Apache 2.0 License | |
4 | // (found in the LICENSE.Apache file in the root directory). | |
7c673cae FG |
5 | |
6 | #pragma once | |
7 | #include <string> | |
8 | #include <unordered_set> | |
9 | #include <vector> | |
10 | ||
11 | #include "db/column_family.h" | |
12 | #include "db/dbformat.h" | |
13 | #include "db/internal_stats.h" | |
14 | #include "db/snapshot_impl.h" | |
15 | #include "options/db_options.h" | |
16 | #include "rocksdb/db.h" | |
17 | #include "rocksdb/env.h" | |
18 | #include "rocksdb/sst_file_writer.h" | |
19 | #include "util/autovector.h" | |
20 | ||
21 | namespace rocksdb { | |
22 | ||
23 | struct IngestedFileInfo { | |
24 | // External file path | |
25 | std::string external_file_path; | |
26 | // Smallest user key in external file | |
27 | std::string smallest_user_key; | |
28 | // Largest user key in external file | |
29 | std::string largest_user_key; | |
30 | // Sequence number for keys in external file | |
31 | SequenceNumber original_seqno; | |
32 | // Offset of the global sequence number field in the file, will | |
33 | // be zero if version is 1 (global seqno is not supported) | |
34 | size_t global_seqno_offset; | |
35 | // External file size | |
36 | uint64_t file_size; | |
37 | // total number of keys in external file | |
38 | uint64_t num_entries; | |
11fdf7f2 TL |
39 | // total number of range deletions in external file |
40 | uint64_t num_range_deletions; | |
7c673cae FG |
41 | // Id of column family this file shoule be ingested into |
42 | uint32_t cf_id; | |
43 | // TableProperties read from external file | |
44 | TableProperties table_properties; | |
45 | // Version of external file | |
46 | int version; | |
47 | ||
48 | // FileDescriptor for the file inside the DB | |
49 | FileDescriptor fd; | |
50 | // file path that we picked for file inside the DB | |
11fdf7f2 | 51 | std::string internal_file_path; |
7c673cae FG |
52 | // Global sequence number that we picked for the file inside the DB |
53 | SequenceNumber assigned_seqno = 0; | |
54 | // Level inside the DB we picked for the external file. | |
55 | int picked_level = 0; | |
11fdf7f2 TL |
56 | // Whether to copy or link the external sst file. copy_file will be set to |
57 | // false if ingestion_options.move_files is true and underlying FS | |
58 | // supports link operation. Need to provide a default value to make the | |
59 | // undefined-behavior sanity check of llvm happy. Since | |
60 | // ingestion_options.move_files is false by default, thus copy_file is true | |
61 | // by default. | |
62 | bool copy_file = true; | |
7c673cae FG |
63 | |
64 | InternalKey smallest_internal_key() const { | |
65 | return InternalKey(smallest_user_key, assigned_seqno, | |
66 | ValueType::kTypeValue); | |
67 | } | |
68 | ||
69 | InternalKey largest_internal_key() const { | |
70 | return InternalKey(largest_user_key, assigned_seqno, ValueType::kTypeValue); | |
71 | } | |
72 | }; | |
73 | ||
74 | class ExternalSstFileIngestionJob { | |
75 | public: | |
76 | ExternalSstFileIngestionJob( | |
77 | Env* env, VersionSet* versions, ColumnFamilyData* cfd, | |
78 | const ImmutableDBOptions& db_options, const EnvOptions& env_options, | |
79 | SnapshotList* db_snapshots, | |
80 | const IngestExternalFileOptions& ingestion_options) | |
81 | : env_(env), | |
82 | versions_(versions), | |
83 | cfd_(cfd), | |
84 | db_options_(db_options), | |
85 | env_options_(env_options), | |
86 | db_snapshots_(db_snapshots), | |
87 | ingestion_options_(ingestion_options), | |
494da23a TL |
88 | job_start_time_(env_->NowMicros()), |
89 | consumed_seqno_(false) {} | |
7c673cae FG |
90 | |
91 | // Prepare the job by copying external files into the DB. | |
11fdf7f2 TL |
92 | Status Prepare(const std::vector<std::string>& external_files_paths, |
93 | uint64_t next_file_number, SuperVersion* sv); | |
7c673cae FG |
94 | |
95 | // Check if we need to flush the memtable before running the ingestion job | |
96 | // This will be true if the files we are ingesting are overlapping with any | |
97 | // key range in the memtable. | |
11fdf7f2 TL |
98 | // |
99 | // @param super_version A referenced SuperVersion that will be held for the | |
100 | // duration of this function. | |
101 | // | |
102 | // Thread-safe | |
103 | Status NeedsFlush(bool* flush_needed, SuperVersion* super_version); | |
7c673cae FG |
104 | |
105 | // Will execute the ingestion job and prepare edit() to be applied. | |
106 | // REQUIRES: Mutex held | |
107 | Status Run(); | |
108 | ||
109 | // Update column family stats. | |
110 | // REQUIRES: Mutex held | |
111 | void UpdateStats(); | |
112 | ||
11fdf7f2 | 113 | // Cleanup after successful/failed job |
7c673cae FG |
114 | void Cleanup(const Status& status); |
115 | ||
116 | VersionEdit* edit() { return &edit_; } | |
117 | ||
118 | const autovector<IngestedFileInfo>& files_to_ingest() const { | |
119 | return files_to_ingest_; | |
120 | } | |
121 | ||
494da23a TL |
122 | // Whether to increment VersionSet's seqno after this job runs |
123 | bool ShouldIncrementLastSequence() const { return consumed_seqno_; } | |
124 | ||
7c673cae FG |
125 | private: |
126 | // Open the external file and populate `file_to_ingest` with all the | |
127 | // external information we need to ingest this file. | |
128 | Status GetIngestedFileInfo(const std::string& external_file, | |
11fdf7f2 TL |
129 | IngestedFileInfo* file_to_ingest, |
130 | SuperVersion* sv); | |
7c673cae FG |
131 | |
132 | // Assign `file_to_ingest` the appropriate sequence number and the lowest | |
133 | // possible level that it can be ingested to according to compaction_style. | |
134 | // REQUIRES: Mutex held | |
135 | Status AssignLevelAndSeqnoForIngestedFile(SuperVersion* sv, | |
136 | bool force_global_seqno, | |
137 | CompactionStyle compaction_style, | |
138 | IngestedFileInfo* file_to_ingest, | |
139 | SequenceNumber* assigned_seqno); | |
140 | ||
11fdf7f2 TL |
141 | // File that we want to ingest behind always goes to the lowest level; |
142 | // we just check that it fits in the level, that DB allows ingest_behind, | |
143 | // and that we don't have 0 seqnums at the upper levels. | |
144 | // REQUIRES: Mutex held | |
145 | Status CheckLevelForIngestedBehindFile(IngestedFileInfo* file_to_ingest); | |
146 | ||
7c673cae FG |
147 | // Set the file global sequence number to `seqno` |
148 | Status AssignGlobalSeqnoForIngestedFile(IngestedFileInfo* file_to_ingest, | |
149 | SequenceNumber seqno); | |
150 | ||
7c673cae FG |
151 | // Check if `file_to_ingest` can fit in level `level` |
152 | // REQUIRES: Mutex held | |
153 | bool IngestedFileFitInLevel(const IngestedFileInfo* file_to_ingest, | |
154 | int level); | |
155 | ||
156 | Env* env_; | |
157 | VersionSet* versions_; | |
158 | ColumnFamilyData* cfd_; | |
159 | const ImmutableDBOptions& db_options_; | |
160 | const EnvOptions& env_options_; | |
161 | SnapshotList* db_snapshots_; | |
162 | autovector<IngestedFileInfo> files_to_ingest_; | |
163 | const IngestExternalFileOptions& ingestion_options_; | |
164 | VersionEdit edit_; | |
165 | uint64_t job_start_time_; | |
494da23a | 166 | bool consumed_seqno_; |
7c673cae FG |
167 | }; |
168 | ||
169 | } // namespace rocksdb |