]>
Commit | Line | Data |
---|---|---|
7c673cae | 1 | // Copyright (c) 2011-present, Facebook, Inc. All rights reserved. |
11fdf7f2 TL |
2 | // This source code is licensed under both the GPLv2 (found in the |
3 | // COPYING file in the root directory) and Apache 2.0 License | |
4 | // (found in the LICENSE.Apache file in the root directory). | |
7c673cae FG |
5 | |
6 | #include <functional> | |
7 | ||
8 | #include "db/db_test_util.h" | |
20effc67 | 9 | #include "db/version_edit.h" |
7c673cae FG |
10 | #include "port/port.h" |
11 | #include "port/stack_trace.h" | |
12 | #include "rocksdb/sst_file_writer.h" | |
f67539c2 | 13 | #include "test_util/testutil.h" |
20effc67 TL |
14 | #include "util/random.h" |
15 | #include "utilities/fault_injection_env.h" | |
7c673cae | 16 | |
f67539c2 | 17 | namespace ROCKSDB_NAMESPACE { |
7c673cae FG |
18 | |
19 | #ifndef ROCKSDB_LITE | |
494da23a TL |
20 | class ExternalSSTFileBasicTest |
21 | : public DBTestBase, | |
22 | public ::testing::WithParamInterface<std::tuple<bool, bool>> { | |
7c673cae | 23 | public: |
20effc67 TL |
24 | ExternalSSTFileBasicTest() |
25 | : DBTestBase("/external_sst_file_basic_test", /*env_do_fsync=*/true) { | |
7c673cae | 26 | sst_files_dir_ = dbname_ + "/sst_files/"; |
20effc67 | 27 | fault_injection_test_env_.reset(new FaultInjectionTestEnv(env_)); |
7c673cae FG |
28 | DestroyAndRecreateExternalSSTFilesDir(); |
29 | } | |
30 | ||
31 | void DestroyAndRecreateExternalSSTFilesDir() { | |
20effc67 | 32 | DestroyDir(env_, sst_files_dir_); |
7c673cae FG |
33 | env_->CreateDir(sst_files_dir_); |
34 | } | |
35 | ||
36 | Status DeprecatedAddFile(const std::vector<std::string>& files, | |
37 | bool move_files = false, | |
38 | bool skip_snapshot_check = false) { | |
39 | IngestExternalFileOptions opts; | |
40 | opts.move_files = move_files; | |
41 | opts.snapshot_consistency = !skip_snapshot_check; | |
42 | opts.allow_global_seqno = false; | |
43 | opts.allow_blocking_flush = false; | |
44 | return db_->IngestExternalFile(files, opts); | |
45 | } | |
46 | ||
20effc67 TL |
47 | Status AddFileWithFileChecksum( |
48 | const std::vector<std::string>& files, | |
49 | const std::vector<std::string>& files_checksums, | |
50 | const std::vector<std::string>& files_checksum_func_names, | |
51 | bool verify_file_checksum = true, bool move_files = false, | |
52 | bool skip_snapshot_check = false, bool write_global_seqno = true) { | |
53 | IngestExternalFileOptions opts; | |
54 | opts.move_files = move_files; | |
55 | opts.snapshot_consistency = !skip_snapshot_check; | |
56 | opts.allow_global_seqno = false; | |
57 | opts.allow_blocking_flush = false; | |
58 | opts.write_global_seqno = write_global_seqno; | |
59 | opts.verify_file_checksum = verify_file_checksum; | |
60 | ||
61 | IngestExternalFileArg arg; | |
62 | arg.column_family = db_->DefaultColumnFamily(); | |
63 | arg.external_files = files; | |
64 | arg.options = opts; | |
65 | arg.files_checksums = files_checksums; | |
66 | arg.files_checksum_func_names = files_checksum_func_names; | |
67 | return db_->IngestExternalFiles({arg}); | |
68 | } | |
69 | ||
7c673cae | 70 | Status GenerateAndAddExternalFile( |
11fdf7f2 TL |
71 | const Options options, std::vector<int> keys, |
72 | const std::vector<ValueType>& value_types, | |
73 | std::vector<std::pair<int, int>> range_deletions, int file_id, | |
494da23a | 74 | bool write_global_seqno, bool verify_checksums_before_ingest, |
7c673cae | 75 | std::map<std::string, std::string>* true_data) { |
11fdf7f2 | 76 | assert(value_types.size() == 1 || keys.size() == value_types.size()); |
7c673cae FG |
77 | std::string file_path = sst_files_dir_ + ToString(file_id); |
78 | SstFileWriter sst_file_writer(EnvOptions(), options); | |
79 | ||
80 | Status s = sst_file_writer.Open(file_path); | |
81 | if (!s.ok()) { | |
82 | return s; | |
83 | } | |
11fdf7f2 TL |
84 | for (size_t i = 0; i < range_deletions.size(); i++) { |
85 | // Account for the effect of range deletions on true_data before | |
86 | // all point operators, even though sst_file_writer.DeleteRange | |
87 | // must be called before other sst_file_writer methods. This is | |
88 | // because point writes take precedence over range deletions | |
89 | // in the same ingested sst. | |
90 | std::string start_key = Key(range_deletions[i].first); | |
91 | std::string end_key = Key(range_deletions[i].second); | |
92 | s = sst_file_writer.DeleteRange(start_key, end_key); | |
93 | if (!s.ok()) { | |
94 | sst_file_writer.Finish(); | |
95 | return s; | |
96 | } | |
97 | auto start_key_it = true_data->find(start_key); | |
98 | if (start_key_it == true_data->end()) { | |
99 | start_key_it = true_data->upper_bound(start_key); | |
100 | } | |
101 | auto end_key_it = true_data->find(end_key); | |
102 | if (end_key_it == true_data->end()) { | |
103 | end_key_it = true_data->upper_bound(end_key); | |
104 | } | |
105 | true_data->erase(start_key_it, end_key_it); | |
106 | } | |
107 | for (size_t i = 0; i < keys.size(); i++) { | |
108 | std::string key = Key(keys[i]); | |
109 | std::string value = Key(keys[i]) + ToString(file_id); | |
110 | ValueType value_type = | |
111 | (value_types.size() == 1 ? value_types[0] : value_types[i]); | |
112 | switch (value_type) { | |
113 | case ValueType::kTypeValue: | |
114 | s = sst_file_writer.Put(key, value); | |
115 | (*true_data)[key] = value; | |
116 | break; | |
117 | case ValueType::kTypeMerge: | |
118 | s = sst_file_writer.Merge(key, value); | |
119 | // we only use TestPutOperator in this test | |
120 | (*true_data)[key] = value; | |
121 | break; | |
122 | case ValueType::kTypeDeletion: | |
123 | s = sst_file_writer.Delete(key); | |
124 | true_data->erase(key); | |
125 | break; | |
126 | default: | |
127 | return Status::InvalidArgument("Value type is not supported"); | |
128 | } | |
7c673cae FG |
129 | if (!s.ok()) { |
130 | sst_file_writer.Finish(); | |
131 | return s; | |
132 | } | |
133 | } | |
134 | s = sst_file_writer.Finish(); | |
135 | ||
136 | if (s.ok()) { | |
137 | IngestExternalFileOptions ifo; | |
138 | ifo.allow_global_seqno = true; | |
494da23a TL |
139 | ifo.write_global_seqno = write_global_seqno; |
140 | ifo.verify_checksums_before_ingest = verify_checksums_before_ingest; | |
7c673cae FG |
141 | s = db_->IngestExternalFile({file_path}, ifo); |
142 | } | |
7c673cae FG |
143 | return s; |
144 | } | |
145 | ||
11fdf7f2 TL |
146 | Status GenerateAndAddExternalFile( |
147 | const Options options, std::vector<int> keys, | |
148 | const std::vector<ValueType>& value_types, int file_id, | |
494da23a | 149 | bool write_global_seqno, bool verify_checksums_before_ingest, |
11fdf7f2 | 150 | std::map<std::string, std::string>* true_data) { |
494da23a TL |
151 | return GenerateAndAddExternalFile( |
152 | options, keys, value_types, {}, file_id, write_global_seqno, | |
153 | verify_checksums_before_ingest, true_data); | |
11fdf7f2 TL |
154 | } |
155 | ||
156 | Status GenerateAndAddExternalFile( | |
157 | const Options options, std::vector<int> keys, const ValueType value_type, | |
494da23a TL |
158 | int file_id, bool write_global_seqno, bool verify_checksums_before_ingest, |
159 | std::map<std::string, std::string>* true_data) { | |
160 | return GenerateAndAddExternalFile( | |
161 | options, keys, std::vector<ValueType>(1, value_type), file_id, | |
162 | write_global_seqno, verify_checksums_before_ingest, true_data); | |
11fdf7f2 TL |
163 | } |
164 | ||
20effc67 | 165 | ~ExternalSSTFileBasicTest() override { DestroyDir(env_, sst_files_dir_); } |
7c673cae FG |
166 | |
167 | protected: | |
168 | std::string sst_files_dir_; | |
f67539c2 | 169 | std::unique_ptr<FaultInjectionTestEnv> fault_injection_test_env_; |
7c673cae FG |
170 | }; |
171 | ||
172 | TEST_F(ExternalSSTFileBasicTest, Basic) { | |
173 | Options options = CurrentOptions(); | |
174 | ||
175 | SstFileWriter sst_file_writer(EnvOptions(), options); | |
176 | ||
177 | // Current file size should be 0 after sst_file_writer init and before open a | |
178 | // file. | |
179 | ASSERT_EQ(sst_file_writer.FileSize(), 0); | |
180 | ||
181 | // file1.sst (0 => 99) | |
182 | std::string file1 = sst_files_dir_ + "file1.sst"; | |
183 | ASSERT_OK(sst_file_writer.Open(file1)); | |
184 | for (int k = 0; k < 100; k++) { | |
11fdf7f2 | 185 | ASSERT_OK(sst_file_writer.Put(Key(k), Key(k) + "_val")); |
7c673cae FG |
186 | } |
187 | ExternalSstFileInfo file1_info; | |
188 | Status s = sst_file_writer.Finish(&file1_info); | |
189 | ASSERT_TRUE(s.ok()) << s.ToString(); | |
190 | ||
191 | // Current file size should be non-zero after success write. | |
192 | ASSERT_GT(sst_file_writer.FileSize(), 0); | |
193 | ||
194 | ASSERT_EQ(file1_info.file_path, file1); | |
195 | ASSERT_EQ(file1_info.num_entries, 100); | |
196 | ASSERT_EQ(file1_info.smallest_key, Key(0)); | |
197 | ASSERT_EQ(file1_info.largest_key, Key(99)); | |
11fdf7f2 TL |
198 | ASSERT_EQ(file1_info.num_range_del_entries, 0); |
199 | ASSERT_EQ(file1_info.smallest_range_del_key, ""); | |
200 | ASSERT_EQ(file1_info.largest_range_del_key, ""); | |
20effc67 TL |
201 | ASSERT_EQ(file1_info.file_checksum, kUnknownFileChecksum); |
202 | ASSERT_EQ(file1_info.file_checksum_func_name, kUnknownFileChecksumFuncName); | |
203 | // sst_file_writer already finished, cannot add this value | |
204 | s = sst_file_writer.Put(Key(100), "bad_val"); | |
205 | ASSERT_FALSE(s.ok()) << s.ToString(); | |
206 | s = sst_file_writer.DeleteRange(Key(100), Key(200)); | |
207 | ASSERT_FALSE(s.ok()) << s.ToString(); | |
208 | ||
209 | DestroyAndReopen(options); | |
210 | // Add file using file path | |
211 | s = DeprecatedAddFile({file1}); | |
212 | ASSERT_TRUE(s.ok()) << s.ToString(); | |
213 | ASSERT_EQ(db_->GetLatestSequenceNumber(), 0U); | |
214 | for (int k = 0; k < 100; k++) { | |
215 | ASSERT_EQ(Get(Key(k)), Key(k) + "_val"); | |
216 | } | |
217 | ||
218 | DestroyAndRecreateExternalSSTFilesDir(); | |
219 | } | |
220 | ||
221 | class ChecksumVerifyHelper { | |
222 | private: | |
223 | Options options_; | |
224 | ||
225 | public: | |
226 | ChecksumVerifyHelper(Options& options) : options_(options) {} | |
227 | ~ChecksumVerifyHelper() {} | |
228 | ||
229 | Status GetSingleFileChecksumAndFuncName( | |
230 | const std::string& file_path, std::string* file_checksum, | |
231 | std::string* file_checksum_func_name) { | |
232 | Status s; | |
233 | EnvOptions soptions; | |
234 | std::unique_ptr<SequentialFile> file_reader; | |
235 | s = options_.env->NewSequentialFile(file_path, &file_reader, soptions); | |
236 | if (!s.ok()) { | |
237 | return s; | |
238 | } | |
239 | std::unique_ptr<char[]> scratch(new char[2048]); | |
240 | Slice result; | |
241 | FileChecksumGenFactory* file_checksum_gen_factory = | |
242 | options_.file_checksum_gen_factory.get(); | |
243 | if (file_checksum_gen_factory == nullptr) { | |
244 | *file_checksum = kUnknownFileChecksum; | |
245 | *file_checksum_func_name = kUnknownFileChecksumFuncName; | |
246 | return Status::OK(); | |
247 | } else { | |
248 | FileChecksumGenContext gen_context; | |
249 | std::unique_ptr<FileChecksumGenerator> file_checksum_gen = | |
250 | file_checksum_gen_factory->CreateFileChecksumGenerator(gen_context); | |
251 | *file_checksum_func_name = file_checksum_gen->Name(); | |
252 | s = file_reader->Read(2048, &result, scratch.get()); | |
253 | if (!s.ok()) { | |
254 | return s; | |
255 | } | |
256 | while (result.size() != 0) { | |
257 | file_checksum_gen->Update(scratch.get(), result.size()); | |
258 | s = file_reader->Read(2048, &result, scratch.get()); | |
259 | if (!s.ok()) { | |
260 | return s; | |
261 | } | |
262 | } | |
263 | file_checksum_gen->Finalize(); | |
264 | *file_checksum = file_checksum_gen->GetChecksum(); | |
265 | } | |
266 | return Status::OK(); | |
267 | } | |
268 | }; | |
269 | ||
270 | TEST_F(ExternalSSTFileBasicTest, BasicWithFileChecksumCrc32c) { | |
271 | Options options = CurrentOptions(); | |
272 | options.file_checksum_gen_factory = GetFileChecksumGenCrc32cFactory(); | |
273 | ChecksumVerifyHelper checksum_helper(options); | |
274 | ||
275 | SstFileWriter sst_file_writer(EnvOptions(), options); | |
276 | ||
277 | // Current file size should be 0 after sst_file_writer init and before open a | |
278 | // file. | |
279 | ASSERT_EQ(sst_file_writer.FileSize(), 0); | |
280 | ||
281 | // file1.sst (0 => 99) | |
282 | std::string file1 = sst_files_dir_ + "file1.sst"; | |
283 | ASSERT_OK(sst_file_writer.Open(file1)); | |
284 | for (int k = 0; k < 100; k++) { | |
285 | ASSERT_OK(sst_file_writer.Put(Key(k), Key(k) + "_val")); | |
286 | } | |
287 | ExternalSstFileInfo file1_info; | |
288 | Status s = sst_file_writer.Finish(&file1_info); | |
289 | ASSERT_TRUE(s.ok()) << s.ToString(); | |
290 | std::string file_checksum, file_checksum_func_name; | |
291 | ASSERT_OK(checksum_helper.GetSingleFileChecksumAndFuncName( | |
292 | file1, &file_checksum, &file_checksum_func_name)); | |
293 | ||
294 | // Current file size should be non-zero after success write. | |
295 | ASSERT_GT(sst_file_writer.FileSize(), 0); | |
296 | ||
297 | ASSERT_EQ(file1_info.file_path, file1); | |
298 | ASSERT_EQ(file1_info.num_entries, 100); | |
299 | ASSERT_EQ(file1_info.smallest_key, Key(0)); | |
300 | ASSERT_EQ(file1_info.largest_key, Key(99)); | |
301 | ASSERT_EQ(file1_info.num_range_del_entries, 0); | |
302 | ASSERT_EQ(file1_info.smallest_range_del_key, ""); | |
303 | ASSERT_EQ(file1_info.largest_range_del_key, ""); | |
304 | ASSERT_EQ(file1_info.file_checksum, file_checksum); | |
305 | ASSERT_EQ(file1_info.file_checksum_func_name, file_checksum_func_name); | |
7c673cae | 306 | // sst_file_writer already finished, cannot add this value |
11fdf7f2 TL |
307 | s = sst_file_writer.Put(Key(100), "bad_val"); |
308 | ASSERT_FALSE(s.ok()) << s.ToString(); | |
309 | s = sst_file_writer.DeleteRange(Key(100), Key(200)); | |
7c673cae FG |
310 | ASSERT_FALSE(s.ok()) << s.ToString(); |
311 | ||
312 | DestroyAndReopen(options); | |
313 | // Add file using file path | |
314 | s = DeprecatedAddFile({file1}); | |
315 | ASSERT_TRUE(s.ok()) << s.ToString(); | |
316 | ASSERT_EQ(db_->GetLatestSequenceNumber(), 0U); | |
317 | for (int k = 0; k < 100; k++) { | |
318 | ASSERT_EQ(Get(Key(k)), Key(k) + "_val"); | |
319 | } | |
320 | ||
321 | DestroyAndRecreateExternalSSTFilesDir(); | |
322 | } | |
323 | ||
20effc67 TL |
324 | TEST_F(ExternalSSTFileBasicTest, IngestFileWithFileChecksum) { |
325 | Options old_options = CurrentOptions(); | |
326 | Options options = CurrentOptions(); | |
327 | options.file_checksum_gen_factory = GetFileChecksumGenCrc32cFactory(); | |
328 | const ImmutableCFOptions ioptions(options); | |
329 | ChecksumVerifyHelper checksum_helper(options); | |
330 | ||
331 | SstFileWriter sst_file_writer(EnvOptions(), options); | |
332 | ||
333 | // file01.sst (1000 => 1099) | |
334 | std::string file1 = sst_files_dir_ + "file01.sst"; | |
335 | ASSERT_OK(sst_file_writer.Open(file1)); | |
336 | for (int k = 1000; k < 1100; k++) { | |
337 | ASSERT_OK(sst_file_writer.Put(Key(k), Key(k) + "_val")); | |
338 | } | |
339 | ExternalSstFileInfo file1_info; | |
340 | Status s = sst_file_writer.Finish(&file1_info); | |
341 | ASSERT_TRUE(s.ok()) << s.ToString(); | |
342 | ASSERT_EQ(file1_info.file_path, file1); | |
343 | ASSERT_EQ(file1_info.num_entries, 100); | |
344 | ASSERT_EQ(file1_info.smallest_key, Key(1000)); | |
345 | ASSERT_EQ(file1_info.largest_key, Key(1099)); | |
346 | std::string file_checksum1, file_checksum_func_name1; | |
347 | ASSERT_OK(checksum_helper.GetSingleFileChecksumAndFuncName( | |
348 | file1, &file_checksum1, &file_checksum_func_name1)); | |
349 | ASSERT_EQ(file1_info.file_checksum, file_checksum1); | |
350 | ASSERT_EQ(file1_info.file_checksum_func_name, file_checksum_func_name1); | |
351 | ||
352 | // file02.sst (1100 => 1299) | |
353 | std::string file2 = sst_files_dir_ + "file02.sst"; | |
354 | ASSERT_OK(sst_file_writer.Open(file2)); | |
355 | for (int k = 1100; k < 1300; k++) { | |
356 | ASSERT_OK(sst_file_writer.Put(Key(k), Key(k) + "_val")); | |
357 | } | |
358 | ExternalSstFileInfo file2_info; | |
359 | s = sst_file_writer.Finish(&file2_info); | |
360 | ASSERT_TRUE(s.ok()) << s.ToString(); | |
361 | ASSERT_EQ(file2_info.file_path, file2); | |
362 | ASSERT_EQ(file2_info.num_entries, 200); | |
363 | ASSERT_EQ(file2_info.smallest_key, Key(1100)); | |
364 | ASSERT_EQ(file2_info.largest_key, Key(1299)); | |
365 | std::string file_checksum2, file_checksum_func_name2; | |
366 | ASSERT_OK(checksum_helper.GetSingleFileChecksumAndFuncName( | |
367 | file2, &file_checksum2, &file_checksum_func_name2)); | |
368 | ASSERT_EQ(file2_info.file_checksum, file_checksum2); | |
369 | ASSERT_EQ(file2_info.file_checksum_func_name, file_checksum_func_name2); | |
370 | ||
371 | // file03.sst (1300 => 1499) | |
372 | std::string file3 = sst_files_dir_ + "file03.sst"; | |
373 | ASSERT_OK(sst_file_writer.Open(file3)); | |
374 | for (int k = 1300; k < 1500; k++) { | |
375 | ASSERT_OK(sst_file_writer.Put(Key(k), Key(k) + "_val_overlap")); | |
376 | } | |
377 | ExternalSstFileInfo file3_info; | |
378 | s = sst_file_writer.Finish(&file3_info); | |
379 | ASSERT_TRUE(s.ok()) << s.ToString(); | |
380 | ASSERT_EQ(file3_info.file_path, file3); | |
381 | ASSERT_EQ(file3_info.num_entries, 200); | |
382 | ASSERT_EQ(file3_info.smallest_key, Key(1300)); | |
383 | ASSERT_EQ(file3_info.largest_key, Key(1499)); | |
384 | std::string file_checksum3, file_checksum_func_name3; | |
385 | ASSERT_OK(checksum_helper.GetSingleFileChecksumAndFuncName( | |
386 | file3, &file_checksum3, &file_checksum_func_name3)); | |
387 | ASSERT_EQ(file3_info.file_checksum, file_checksum3); | |
388 | ASSERT_EQ(file3_info.file_checksum_func_name, file_checksum_func_name3); | |
389 | ||
390 | // file04.sst (1500 => 1799) | |
391 | std::string file4 = sst_files_dir_ + "file04.sst"; | |
392 | ASSERT_OK(sst_file_writer.Open(file4)); | |
393 | for (int k = 1500; k < 1800; k++) { | |
394 | ASSERT_OK(sst_file_writer.Put(Key(k), Key(k) + "_val_overlap")); | |
395 | } | |
396 | ExternalSstFileInfo file4_info; | |
397 | s = sst_file_writer.Finish(&file4_info); | |
398 | ASSERT_TRUE(s.ok()) << s.ToString(); | |
399 | ASSERT_EQ(file4_info.file_path, file4); | |
400 | ASSERT_EQ(file4_info.num_entries, 300); | |
401 | ASSERT_EQ(file4_info.smallest_key, Key(1500)); | |
402 | ASSERT_EQ(file4_info.largest_key, Key(1799)); | |
403 | std::string file_checksum4, file_checksum_func_name4; | |
404 | ASSERT_OK(checksum_helper.GetSingleFileChecksumAndFuncName( | |
405 | file4, &file_checksum4, &file_checksum_func_name4)); | |
406 | ASSERT_EQ(file4_info.file_checksum, file_checksum4); | |
407 | ASSERT_EQ(file4_info.file_checksum_func_name, file_checksum_func_name4); | |
408 | ||
409 | // file05.sst (1800 => 1899) | |
410 | std::string file5 = sst_files_dir_ + "file05.sst"; | |
411 | ASSERT_OK(sst_file_writer.Open(file5)); | |
412 | for (int k = 1800; k < 2000; k++) { | |
413 | ASSERT_OK(sst_file_writer.Put(Key(k), Key(k) + "_val_overlap")); | |
414 | } | |
415 | ExternalSstFileInfo file5_info; | |
416 | s = sst_file_writer.Finish(&file5_info); | |
417 | ASSERT_TRUE(s.ok()) << s.ToString(); | |
418 | ASSERT_EQ(file5_info.file_path, file5); | |
419 | ASSERT_EQ(file5_info.num_entries, 200); | |
420 | ASSERT_EQ(file5_info.smallest_key, Key(1800)); | |
421 | ASSERT_EQ(file5_info.largest_key, Key(1999)); | |
422 | std::string file_checksum5, file_checksum_func_name5; | |
423 | ASSERT_OK(checksum_helper.GetSingleFileChecksumAndFuncName( | |
424 | file5, &file_checksum5, &file_checksum_func_name5)); | |
425 | ASSERT_EQ(file5_info.file_checksum, file_checksum5); | |
426 | ASSERT_EQ(file5_info.file_checksum_func_name, file_checksum_func_name5); | |
427 | ||
428 | // file06.sst (2000 => 2199) | |
429 | std::string file6 = sst_files_dir_ + "file06.sst"; | |
430 | ASSERT_OK(sst_file_writer.Open(file6)); | |
431 | for (int k = 2000; k < 2200; k++) { | |
432 | ASSERT_OK(sst_file_writer.Put(Key(k), Key(k) + "_val_overlap")); | |
433 | } | |
434 | ExternalSstFileInfo file6_info; | |
435 | s = sst_file_writer.Finish(&file6_info); | |
436 | ASSERT_TRUE(s.ok()) << s.ToString(); | |
437 | ASSERT_EQ(file6_info.file_path, file6); | |
438 | ASSERT_EQ(file6_info.num_entries, 200); | |
439 | ASSERT_EQ(file6_info.smallest_key, Key(2000)); | |
440 | ASSERT_EQ(file6_info.largest_key, Key(2199)); | |
441 | std::string file_checksum6, file_checksum_func_name6; | |
442 | ASSERT_OK(checksum_helper.GetSingleFileChecksumAndFuncName( | |
443 | file6, &file_checksum6, &file_checksum_func_name6)); | |
444 | ASSERT_EQ(file6_info.file_checksum, file_checksum6); | |
445 | ASSERT_EQ(file6_info.file_checksum_func_name, file_checksum_func_name6); | |
446 | ||
447 | s = AddFileWithFileChecksum({file1}, {file_checksum1, "xyz"}, | |
448 | {file_checksum1}, true, false, false, false); | |
449 | // does not care the checksum input since db does not enable file checksum | |
450 | ASSERT_TRUE(s.ok()) << s.ToString(); | |
451 | ASSERT_OK(env_->FileExists(file1)); | |
452 | std::vector<LiveFileMetaData> live_files; | |
453 | dbfull()->GetLiveFilesMetaData(&live_files); | |
454 | std::set<std::string> set1; | |
455 | for (auto f : live_files) { | |
456 | set1.insert(f.name); | |
457 | ASSERT_EQ(f.file_checksum, kUnknownFileChecksum); | |
458 | ASSERT_EQ(f.file_checksum_func_name, kUnknownFileChecksumFuncName); | |
459 | } | |
460 | ||
461 | // Reopen Db with checksum enabled | |
462 | Reopen(options); | |
463 | // Enable verify_file_checksum option | |
464 | // The checksum vector does not match, fail the ingestion | |
465 | s = AddFileWithFileChecksum({file2}, {file_checksum2, "xyz"}, | |
466 | {file_checksum_func_name2}, true, false, false, | |
467 | false); | |
468 | ASSERT_FALSE(s.ok()) << s.ToString(); | |
469 | ||
470 | // Enable verify_file_checksum option | |
471 | // The checksum name does not match, fail the ingestion | |
472 | s = AddFileWithFileChecksum({file2}, {file_checksum2}, {"xyz"}, true, false, | |
473 | false, false); | |
474 | ASSERT_FALSE(s.ok()) << s.ToString(); | |
475 | ||
476 | // Enable verify_file_checksum option | |
477 | // The checksum itself does not match, fail the ingestion | |
478 | s = AddFileWithFileChecksum({file2}, {"xyz"}, {file_checksum_func_name2}, | |
479 | true, false, false, false); | |
480 | ASSERT_FALSE(s.ok()) << s.ToString(); | |
481 | ||
482 | // Enable verify_file_checksum option | |
483 | // All matches, ingestion is successful | |
484 | s = AddFileWithFileChecksum({file2}, {file_checksum2}, | |
485 | {file_checksum_func_name2}, true, false, false, | |
486 | false); | |
487 | ASSERT_TRUE(s.ok()) << s.ToString(); | |
488 | std::vector<LiveFileMetaData> live_files1; | |
489 | dbfull()->GetLiveFilesMetaData(&live_files1); | |
490 | for (auto f : live_files1) { | |
491 | if (set1.find(f.name) == set1.end()) { | |
492 | ASSERT_EQ(f.file_checksum, file_checksum2); | |
493 | ASSERT_EQ(f.file_checksum_func_name, file_checksum_func_name2); | |
494 | set1.insert(f.name); | |
495 | } | |
496 | } | |
497 | ASSERT_OK(env_->FileExists(file2)); | |
498 | ||
499 | // Enable verify_file_checksum option | |
500 | // No checksum information is provided, generate it when ingesting | |
501 | std::vector<std::string> checksum, checksum_func; | |
502 | s = AddFileWithFileChecksum({file3}, checksum, checksum_func, true, false, | |
503 | false, false); | |
504 | ASSERT_TRUE(s.ok()) << s.ToString(); | |
505 | std::vector<LiveFileMetaData> live_files2; | |
506 | dbfull()->GetLiveFilesMetaData(&live_files2); | |
507 | for (auto f : live_files2) { | |
508 | if (set1.find(f.name) == set1.end()) { | |
509 | ASSERT_EQ(f.file_checksum, file_checksum3); | |
510 | ASSERT_EQ(f.file_checksum_func_name, file_checksum_func_name3); | |
511 | set1.insert(f.name); | |
512 | } | |
513 | } | |
514 | ASSERT_TRUE(s.ok()) << s.ToString(); | |
515 | ASSERT_OK(env_->FileExists(file3)); | |
516 | ||
517 | // Does not enable verify_file_checksum options | |
518 | // The checksum name does not match, fail the ingestion | |
519 | s = AddFileWithFileChecksum({file4}, {file_checksum4}, {"xyz"}, false, false, | |
520 | false, false); | |
521 | ASSERT_FALSE(s.ok()) << s.ToString(); | |
522 | ||
523 | // Does not enable verify_file_checksum options | |
524 | // Checksum function name matches, store the checksum being ingested. | |
525 | s = AddFileWithFileChecksum({file4}, {"asd"}, {file_checksum_func_name4}, | |
526 | false, false, false, false); | |
527 | ASSERT_TRUE(s.ok()) << s.ToString(); | |
528 | std::vector<LiveFileMetaData> live_files3; | |
529 | dbfull()->GetLiveFilesMetaData(&live_files3); | |
530 | for (auto f : live_files3) { | |
531 | if (set1.find(f.name) == set1.end()) { | |
532 | ASSERT_FALSE(f.file_checksum == file_checksum4); | |
533 | ASSERT_EQ(f.file_checksum, "asd"); | |
534 | ASSERT_EQ(f.file_checksum_func_name, file_checksum_func_name4); | |
535 | set1.insert(f.name); | |
536 | } | |
537 | } | |
538 | ASSERT_TRUE(s.ok()) << s.ToString(); | |
539 | ASSERT_OK(env_->FileExists(file4)); | |
540 | ||
541 | // enable verify_file_checksum options, DB enable checksum, and enable | |
542 | // write_global_seq. So the checksum stored is different from the one | |
543 | // ingested due to the sequence number changes. | |
544 | s = AddFileWithFileChecksum({file5}, {file_checksum5}, | |
545 | {file_checksum_func_name5}, true, false, false, | |
546 | true); | |
547 | ASSERT_OK(s); | |
548 | ASSERT_TRUE(s.ok()) << s.ToString(); | |
549 | std::vector<LiveFileMetaData> live_files4; | |
550 | dbfull()->GetLiveFilesMetaData(&live_files4); | |
551 | for (auto f : live_files4) { | |
552 | if (set1.find(f.name) == set1.end()) { | |
553 | std::string cur_checksum5, cur_checksum_func_name5; | |
554 | ASSERT_OK(checksum_helper.GetSingleFileChecksumAndFuncName( | |
555 | dbname_ + f.name, &cur_checksum5, &cur_checksum_func_name5)); | |
556 | ASSERT_EQ(f.file_checksum, cur_checksum5); | |
557 | ASSERT_EQ(f.file_checksum_func_name, file_checksum_func_name5); | |
558 | set1.insert(f.name); | |
559 | } | |
560 | } | |
561 | ASSERT_TRUE(s.ok()) << s.ToString(); | |
562 | ASSERT_OK(env_->FileExists(file5)); | |
563 | ||
564 | // Does not enable verify_file_checksum options and also the ingested file | |
565 | // checksum information is empty. DB will generate and store the checksum | |
566 | // in Manifest. | |
567 | std::vector<std::string> files_c6, files_name6; | |
568 | s = AddFileWithFileChecksum({file6}, files_c6, files_name6, false, false, | |
569 | false, false); | |
570 | ASSERT_TRUE(s.ok()) << s.ToString(); | |
571 | std::vector<LiveFileMetaData> live_files6; | |
572 | dbfull()->GetLiveFilesMetaData(&live_files6); | |
573 | for (auto f : live_files6) { | |
574 | if (set1.find(f.name) == set1.end()) { | |
575 | ASSERT_EQ(f.file_checksum, file_checksum6); | |
576 | ASSERT_EQ(f.file_checksum_func_name, file_checksum_func_name6); | |
577 | set1.insert(f.name); | |
578 | } | |
579 | } | |
580 | ASSERT_TRUE(s.ok()) << s.ToString(); | |
581 | ASSERT_OK(env_->FileExists(file6)); | |
582 | } | |
583 | ||
7c673cae FG |
584 | TEST_F(ExternalSSTFileBasicTest, NoCopy) { |
585 | Options options = CurrentOptions(); | |
586 | const ImmutableCFOptions ioptions(options); | |
587 | ||
588 | SstFileWriter sst_file_writer(EnvOptions(), options); | |
589 | ||
590 | // file1.sst (0 => 99) | |
591 | std::string file1 = sst_files_dir_ + "file1.sst"; | |
592 | ASSERT_OK(sst_file_writer.Open(file1)); | |
593 | for (int k = 0; k < 100; k++) { | |
11fdf7f2 | 594 | ASSERT_OK(sst_file_writer.Put(Key(k), Key(k) + "_val")); |
7c673cae FG |
595 | } |
596 | ExternalSstFileInfo file1_info; | |
597 | Status s = sst_file_writer.Finish(&file1_info); | |
598 | ASSERT_TRUE(s.ok()) << s.ToString(); | |
599 | ASSERT_EQ(file1_info.file_path, file1); | |
600 | ASSERT_EQ(file1_info.num_entries, 100); | |
601 | ASSERT_EQ(file1_info.smallest_key, Key(0)); | |
602 | ASSERT_EQ(file1_info.largest_key, Key(99)); | |
603 | ||
604 | // file2.sst (100 => 299) | |
605 | std::string file2 = sst_files_dir_ + "file2.sst"; | |
606 | ASSERT_OK(sst_file_writer.Open(file2)); | |
607 | for (int k = 100; k < 300; k++) { | |
11fdf7f2 | 608 | ASSERT_OK(sst_file_writer.Put(Key(k), Key(k) + "_val")); |
7c673cae FG |
609 | } |
610 | ExternalSstFileInfo file2_info; | |
611 | s = sst_file_writer.Finish(&file2_info); | |
612 | ASSERT_TRUE(s.ok()) << s.ToString(); | |
613 | ASSERT_EQ(file2_info.file_path, file2); | |
614 | ASSERT_EQ(file2_info.num_entries, 200); | |
615 | ASSERT_EQ(file2_info.smallest_key, Key(100)); | |
616 | ASSERT_EQ(file2_info.largest_key, Key(299)); | |
617 | ||
618 | // file3.sst (110 => 124) .. overlap with file2.sst | |
619 | std::string file3 = sst_files_dir_ + "file3.sst"; | |
620 | ASSERT_OK(sst_file_writer.Open(file3)); | |
621 | for (int k = 110; k < 125; k++) { | |
11fdf7f2 | 622 | ASSERT_OK(sst_file_writer.Put(Key(k), Key(k) + "_val_overlap")); |
7c673cae FG |
623 | } |
624 | ExternalSstFileInfo file3_info; | |
625 | s = sst_file_writer.Finish(&file3_info); | |
626 | ASSERT_TRUE(s.ok()) << s.ToString(); | |
627 | ASSERT_EQ(file3_info.file_path, file3); | |
628 | ASSERT_EQ(file3_info.num_entries, 15); | |
629 | ASSERT_EQ(file3_info.smallest_key, Key(110)); | |
630 | ASSERT_EQ(file3_info.largest_key, Key(124)); | |
11fdf7f2 | 631 | |
7c673cae FG |
632 | s = DeprecatedAddFile({file1}, true /* move file */); |
633 | ASSERT_TRUE(s.ok()) << s.ToString(); | |
634 | ASSERT_EQ(Status::NotFound(), env_->FileExists(file1)); | |
635 | ||
636 | s = DeprecatedAddFile({file2}, false /* copy file */); | |
637 | ASSERT_TRUE(s.ok()) << s.ToString(); | |
638 | ASSERT_OK(env_->FileExists(file2)); | |
639 | ||
11fdf7f2 TL |
640 | // This file has overlapping values with the existing data |
641 | s = DeprecatedAddFile({file3}, true /* move file */); | |
7c673cae FG |
642 | ASSERT_FALSE(s.ok()) << s.ToString(); |
643 | ASSERT_OK(env_->FileExists(file3)); | |
644 | ||
645 | for (int k = 0; k < 300; k++) { | |
646 | ASSERT_EQ(Get(Key(k)), Key(k) + "_val"); | |
647 | } | |
648 | } | |
649 | ||
494da23a TL |
650 | TEST_P(ExternalSSTFileBasicTest, IngestFileWithGlobalSeqnoPickedSeqno) { |
651 | bool write_global_seqno = std::get<0>(GetParam()); | |
652 | bool verify_checksums_before_ingest = std::get<1>(GetParam()); | |
7c673cae FG |
653 | do { |
654 | Options options = CurrentOptions(); | |
655 | DestroyAndReopen(options); | |
656 | std::map<std::string, std::string> true_data; | |
657 | ||
658 | int file_id = 1; | |
659 | ||
494da23a TL |
660 | ASSERT_OK(GenerateAndAddExternalFile( |
661 | options, {1, 2, 3, 4, 5, 6}, ValueType::kTypeValue, file_id++, | |
662 | write_global_seqno, verify_checksums_before_ingest, &true_data)); | |
11fdf7f2 | 663 | // File doesn't overwrite any keys, no seqno needed |
7c673cae FG |
664 | ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 0); |
665 | ||
494da23a TL |
666 | ASSERT_OK(GenerateAndAddExternalFile( |
667 | options, {10, 11, 12, 13}, ValueType::kTypeValue, file_id++, | |
668 | write_global_seqno, verify_checksums_before_ingest, &true_data)); | |
11fdf7f2 | 669 | // File doesn't overwrite any keys, no seqno needed |
7c673cae FG |
670 | ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 0); |
671 | ||
11fdf7f2 | 672 | ASSERT_OK(GenerateAndAddExternalFile( |
494da23a TL |
673 | options, {1, 4, 6}, ValueType::kTypeValue, file_id++, |
674 | write_global_seqno, verify_checksums_before_ingest, &true_data)); | |
11fdf7f2 | 675 | // File overwrites some keys, a seqno will be assigned |
7c673cae FG |
676 | ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 1); |
677 | ||
11fdf7f2 | 678 | ASSERT_OK(GenerateAndAddExternalFile( |
494da23a TL |
679 | options, {11, 15, 19}, ValueType::kTypeValue, file_id++, |
680 | write_global_seqno, verify_checksums_before_ingest, &true_data)); | |
11fdf7f2 | 681 | // File overwrites some keys, a seqno will be assigned |
7c673cae FG |
682 | ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 2); |
683 | ||
11fdf7f2 | 684 | ASSERT_OK(GenerateAndAddExternalFile( |
494da23a TL |
685 | options, {120, 130}, ValueType::kTypeValue, file_id++, |
686 | write_global_seqno, verify_checksums_before_ingest, &true_data)); | |
11fdf7f2 | 687 | // File doesn't overwrite any keys, no seqno needed |
7c673cae FG |
688 | ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 2); |
689 | ||
11fdf7f2 | 690 | ASSERT_OK(GenerateAndAddExternalFile( |
494da23a TL |
691 | options, {1, 130}, ValueType::kTypeValue, file_id++, write_global_seqno, |
692 | verify_checksums_before_ingest, &true_data)); | |
11fdf7f2 | 693 | // File overwrites some keys, a seqno will be assigned |
7c673cae FG |
694 | ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 3); |
695 | ||
696 | // Write some keys through normal write path | |
697 | for (int i = 0; i < 50; i++) { | |
698 | ASSERT_OK(Put(Key(i), "memtable")); | |
699 | true_data[Key(i)] = "memtable"; | |
700 | } | |
701 | SequenceNumber last_seqno = dbfull()->GetLatestSequenceNumber(); | |
702 | ||
11fdf7f2 | 703 | ASSERT_OK(GenerateAndAddExternalFile( |
494da23a TL |
704 | options, {60, 61, 62}, ValueType::kTypeValue, file_id++, |
705 | write_global_seqno, verify_checksums_before_ingest, &true_data)); | |
11fdf7f2 | 706 | // File doesn't overwrite any keys, no seqno needed |
7c673cae FG |
707 | ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), last_seqno); |
708 | ||
11fdf7f2 | 709 | ASSERT_OK(GenerateAndAddExternalFile( |
494da23a TL |
710 | options, {40, 41, 42}, ValueType::kTypeValue, file_id++, |
711 | write_global_seqno, verify_checksums_before_ingest, &true_data)); | |
11fdf7f2 TL |
712 | // File overwrites some keys, a seqno will be assigned |
713 | ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), last_seqno + 1); | |
714 | ||
715 | ASSERT_OK(GenerateAndAddExternalFile( | |
494da23a TL |
716 | options, {20, 30, 40}, ValueType::kTypeValue, file_id++, |
717 | write_global_seqno, verify_checksums_before_ingest, &true_data)); | |
11fdf7f2 TL |
718 | // File overwrites some keys, a seqno will be assigned |
719 | ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), last_seqno + 2); | |
720 | ||
721 | const Snapshot* snapshot = db_->GetSnapshot(); | |
722 | ||
723 | // We will need a seqno for the file regardless if the file overwrite | |
724 | // keys in the DB or not because we have a snapshot | |
725 | ASSERT_OK(GenerateAndAddExternalFile( | |
494da23a TL |
726 | options, {1000, 1002}, ValueType::kTypeValue, file_id++, |
727 | write_global_seqno, verify_checksums_before_ingest, &true_data)); | |
11fdf7f2 TL |
728 | // A global seqno will be assigned anyway because of the snapshot |
729 | ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), last_seqno + 3); | |
730 | ||
731 | ASSERT_OK(GenerateAndAddExternalFile( | |
494da23a TL |
732 | options, {2000, 3002}, ValueType::kTypeValue, file_id++, |
733 | write_global_seqno, verify_checksums_before_ingest, &true_data)); | |
11fdf7f2 TL |
734 | // A global seqno will be assigned anyway because of the snapshot |
735 | ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), last_seqno + 4); | |
736 | ||
494da23a TL |
737 | ASSERT_OK(GenerateAndAddExternalFile( |
738 | options, {1, 20, 40, 100, 150}, ValueType::kTypeValue, file_id++, | |
739 | write_global_seqno, verify_checksums_before_ingest, &true_data)); | |
11fdf7f2 TL |
740 | // A global seqno will be assigned anyway because of the snapshot |
741 | ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), last_seqno + 5); | |
742 | ||
743 | db_->ReleaseSnapshot(snapshot); | |
744 | ||
745 | ASSERT_OK(GenerateAndAddExternalFile( | |
494da23a TL |
746 | options, {5000, 5001}, ValueType::kTypeValue, file_id++, |
747 | write_global_seqno, verify_checksums_before_ingest, &true_data)); | |
11fdf7f2 TL |
748 | // No snapshot anymore, no need to assign a seqno |
749 | ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), last_seqno + 5); | |
750 | ||
751 | size_t kcnt = 0; | |
752 | VerifyDBFromMap(true_data, &kcnt, false); | |
494da23a | 753 | } while (ChangeOptionsForFileIngestionTest()); |
11fdf7f2 TL |
754 | } |
755 | ||
494da23a TL |
756 | TEST_P(ExternalSSTFileBasicTest, IngestFileWithMultipleValueType) { |
757 | bool write_global_seqno = std::get<0>(GetParam()); | |
758 | bool verify_checksums_before_ingest = std::get<1>(GetParam()); | |
11fdf7f2 TL |
759 | do { |
760 | Options options = CurrentOptions(); | |
761 | options.merge_operator.reset(new TestPutOperator()); | |
762 | DestroyAndReopen(options); | |
763 | std::map<std::string, std::string> true_data; | |
764 | ||
765 | int file_id = 1; | |
766 | ||
494da23a TL |
767 | ASSERT_OK(GenerateAndAddExternalFile( |
768 | options, {1, 2, 3, 4, 5, 6}, ValueType::kTypeValue, file_id++, | |
769 | write_global_seqno, verify_checksums_before_ingest, &true_data)); | |
11fdf7f2 TL |
770 | // File doesn't overwrite any keys, no seqno needed |
771 | ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 0); | |
772 | ||
494da23a TL |
773 | ASSERT_OK(GenerateAndAddExternalFile( |
774 | options, {10, 11, 12, 13}, ValueType::kTypeValue, file_id++, | |
775 | write_global_seqno, verify_checksums_before_ingest, &true_data)); | |
11fdf7f2 TL |
776 | // File doesn't overwrite any keys, no seqno needed |
777 | ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 0); | |
778 | ||
779 | ASSERT_OK(GenerateAndAddExternalFile( | |
494da23a TL |
780 | options, {1, 4, 6}, ValueType::kTypeMerge, file_id++, |
781 | write_global_seqno, verify_checksums_before_ingest, &true_data)); | |
11fdf7f2 TL |
782 | // File overwrites some keys, a seqno will be assigned |
783 | ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 1); | |
784 | ||
494da23a TL |
785 | ASSERT_OK(GenerateAndAddExternalFile( |
786 | options, {11, 15, 19}, ValueType::kTypeDeletion, file_id++, | |
787 | write_global_seqno, verify_checksums_before_ingest, &true_data)); | |
11fdf7f2 TL |
788 | // File overwrites some keys, a seqno will be assigned |
789 | ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 2); | |
790 | ||
791 | ASSERT_OK(GenerateAndAddExternalFile( | |
494da23a TL |
792 | options, {120, 130}, ValueType::kTypeMerge, file_id++, |
793 | write_global_seqno, verify_checksums_before_ingest, &true_data)); | |
11fdf7f2 TL |
794 | // File doesn't overwrite any keys, no seqno needed |
795 | ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 2); | |
796 | ||
797 | ASSERT_OK(GenerateAndAddExternalFile( | |
494da23a TL |
798 | options, {1, 130}, ValueType::kTypeDeletion, file_id++, |
799 | write_global_seqno, verify_checksums_before_ingest, &true_data)); | |
11fdf7f2 TL |
800 | // File overwrites some keys, a seqno will be assigned |
801 | ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 3); | |
802 | ||
494da23a TL |
803 | ASSERT_OK(GenerateAndAddExternalFile( |
804 | options, {120}, {ValueType::kTypeValue}, {{120, 135}}, file_id++, | |
805 | write_global_seqno, verify_checksums_before_ingest, &true_data)); | |
11fdf7f2 TL |
806 | // File overwrites some keys, a seqno will be assigned |
807 | ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 4); | |
808 | ||
494da23a TL |
809 | ASSERT_OK(GenerateAndAddExternalFile( |
810 | options, {}, {}, {{110, 120}}, file_id++, write_global_seqno, | |
811 | verify_checksums_before_ingest, &true_data)); | |
11fdf7f2 TL |
812 | // The range deletion ends on a key, but it doesn't actually delete |
813 | // this key because the largest key in the range is exclusive. Still, | |
814 | // it counts as an overlap so a new seqno will be assigned. | |
815 | ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 5); | |
816 | ||
494da23a TL |
817 | ASSERT_OK(GenerateAndAddExternalFile( |
818 | options, {}, {}, {{100, 109}}, file_id++, write_global_seqno, | |
819 | verify_checksums_before_ingest, &true_data)); | |
11fdf7f2 TL |
820 | // File doesn't overwrite any keys, no seqno needed |
821 | ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 5); | |
822 | ||
823 | // Write some keys through normal write path | |
824 | for (int i = 0; i < 50; i++) { | |
825 | ASSERT_OK(Put(Key(i), "memtable")); | |
826 | true_data[Key(i)] = "memtable"; | |
827 | } | |
828 | SequenceNumber last_seqno = dbfull()->GetLatestSequenceNumber(); | |
829 | ||
830 | ASSERT_OK(GenerateAndAddExternalFile( | |
494da23a TL |
831 | options, {60, 61, 62}, ValueType::kTypeValue, file_id++, |
832 | write_global_seqno, verify_checksums_before_ingest, &true_data)); | |
11fdf7f2 TL |
833 | // File doesn't overwrite any keys, no seqno needed |
834 | ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), last_seqno); | |
835 | ||
836 | ASSERT_OK(GenerateAndAddExternalFile( | |
494da23a TL |
837 | options, {40, 41, 42}, ValueType::kTypeMerge, file_id++, |
838 | write_global_seqno, verify_checksums_before_ingest, &true_data)); | |
11fdf7f2 | 839 | // File overwrites some keys, a seqno will be assigned |
7c673cae FG |
840 | ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), last_seqno + 1); |
841 | ||
494da23a TL |
842 | ASSERT_OK(GenerateAndAddExternalFile( |
843 | options, {20, 30, 40}, ValueType::kTypeDeletion, file_id++, | |
844 | write_global_seqno, verify_checksums_before_ingest, &true_data)); | |
11fdf7f2 | 845 | // File overwrites some keys, a seqno will be assigned |
7c673cae FG |
846 | ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), last_seqno + 2); |
847 | ||
848 | const Snapshot* snapshot = db_->GetSnapshot(); | |
849 | ||
850 | // We will need a seqno for the file regardless if the file overwrite | |
851 | // keys in the DB or not because we have a snapshot | |
11fdf7f2 | 852 | ASSERT_OK(GenerateAndAddExternalFile( |
494da23a TL |
853 | options, {1000, 1002}, ValueType::kTypeMerge, file_id++, |
854 | write_global_seqno, verify_checksums_before_ingest, &true_data)); | |
7c673cae FG |
855 | // A global seqno will be assigned anyway because of the snapshot |
856 | ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), last_seqno + 3); | |
857 | ||
11fdf7f2 | 858 | ASSERT_OK(GenerateAndAddExternalFile( |
494da23a TL |
859 | options, {2000, 3002}, ValueType::kTypeMerge, file_id++, |
860 | write_global_seqno, verify_checksums_before_ingest, &true_data)); | |
7c673cae FG |
861 | // A global seqno will be assigned anyway because of the snapshot |
862 | ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), last_seqno + 4); | |
863 | ||
494da23a TL |
864 | ASSERT_OK(GenerateAndAddExternalFile( |
865 | options, {1, 20, 40, 100, 150}, ValueType::kTypeMerge, file_id++, | |
866 | write_global_seqno, verify_checksums_before_ingest, &true_data)); | |
7c673cae FG |
867 | // A global seqno will be assigned anyway because of the snapshot |
868 | ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), last_seqno + 5); | |
869 | ||
870 | db_->ReleaseSnapshot(snapshot); | |
871 | ||
11fdf7f2 | 872 | ASSERT_OK(GenerateAndAddExternalFile( |
494da23a TL |
873 | options, {5000, 5001}, ValueType::kTypeValue, file_id++, |
874 | write_global_seqno, verify_checksums_before_ingest, &true_data)); | |
11fdf7f2 TL |
875 | // No snapshot anymore, no need to assign a seqno |
876 | ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), last_seqno + 5); | |
877 | ||
878 | size_t kcnt = 0; | |
879 | VerifyDBFromMap(true_data, &kcnt, false); | |
494da23a | 880 | } while (ChangeOptionsForFileIngestionTest()); |
11fdf7f2 TL |
881 | } |
882 | ||
494da23a TL |
883 | TEST_P(ExternalSSTFileBasicTest, IngestFileWithMixedValueType) { |
884 | bool write_global_seqno = std::get<0>(GetParam()); | |
885 | bool verify_checksums_before_ingest = std::get<1>(GetParam()); | |
11fdf7f2 TL |
886 | do { |
887 | Options options = CurrentOptions(); | |
888 | options.merge_operator.reset(new TestPutOperator()); | |
889 | DestroyAndReopen(options); | |
890 | std::map<std::string, std::string> true_data; | |
891 | ||
892 | int file_id = 1; | |
893 | ||
894 | ASSERT_OK(GenerateAndAddExternalFile( | |
895 | options, {1, 2, 3, 4, 5, 6}, | |
896 | {ValueType::kTypeValue, ValueType::kTypeMerge, ValueType::kTypeValue, | |
897 | ValueType::kTypeMerge, ValueType::kTypeValue, ValueType::kTypeMerge}, | |
494da23a TL |
898 | file_id++, write_global_seqno, verify_checksums_before_ingest, |
899 | &true_data)); | |
11fdf7f2 TL |
900 | // File doesn't overwrite any keys, no seqno needed |
901 | ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 0); | |
902 | ||
903 | ASSERT_OK(GenerateAndAddExternalFile( | |
904 | options, {10, 11, 12, 13}, | |
905 | {ValueType::kTypeValue, ValueType::kTypeMerge, ValueType::kTypeValue, | |
906 | ValueType::kTypeMerge}, | |
494da23a TL |
907 | file_id++, write_global_seqno, verify_checksums_before_ingest, |
908 | &true_data)); | |
11fdf7f2 TL |
909 | // File doesn't overwrite any keys, no seqno needed |
910 | ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 0); | |
911 | ||
912 | ASSERT_OK(GenerateAndAddExternalFile( | |
494da23a TL |
913 | options, {1, 4, 6}, |
914 | {ValueType::kTypeDeletion, ValueType::kTypeValue, | |
915 | ValueType::kTypeMerge}, | |
916 | file_id++, write_global_seqno, verify_checksums_before_ingest, | |
917 | &true_data)); | |
11fdf7f2 TL |
918 | // File overwrites some keys, a seqno will be assigned |
919 | ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 1); | |
920 | ||
921 | ASSERT_OK(GenerateAndAddExternalFile( | |
494da23a TL |
922 | options, {11, 15, 19}, |
923 | {ValueType::kTypeDeletion, ValueType::kTypeMerge, | |
924 | ValueType::kTypeValue}, | |
925 | file_id++, write_global_seqno, verify_checksums_before_ingest, | |
926 | &true_data)); | |
11fdf7f2 TL |
927 | // File overwrites some keys, a seqno will be assigned |
928 | ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 2); | |
929 | ||
930 | ASSERT_OK(GenerateAndAddExternalFile( | |
931 | options, {120, 130}, {ValueType::kTypeValue, ValueType::kTypeMerge}, | |
494da23a TL |
932 | file_id++, write_global_seqno, verify_checksums_before_ingest, |
933 | &true_data)); | |
11fdf7f2 TL |
934 | // File doesn't overwrite any keys, no seqno needed |
935 | ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 2); | |
936 | ||
937 | ASSERT_OK(GenerateAndAddExternalFile( | |
938 | options, {1, 130}, {ValueType::kTypeMerge, ValueType::kTypeDeletion}, | |
494da23a TL |
939 | file_id++, write_global_seqno, verify_checksums_before_ingest, |
940 | &true_data)); | |
11fdf7f2 TL |
941 | // File overwrites some keys, a seqno will be assigned |
942 | ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 3); | |
943 | ||
944 | ASSERT_OK(GenerateAndAddExternalFile( | |
945 | options, {150, 151, 152}, | |
946 | {ValueType::kTypeValue, ValueType::kTypeMerge, | |
947 | ValueType::kTypeDeletion}, | |
494da23a TL |
948 | {{150, 160}, {180, 190}}, file_id++, write_global_seqno, |
949 | verify_checksums_before_ingest, &true_data)); | |
11fdf7f2 TL |
950 | // File doesn't overwrite any keys, no seqno needed |
951 | ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 3); | |
952 | ||
953 | ASSERT_OK(GenerateAndAddExternalFile( | |
954 | options, {150, 151, 152}, | |
955 | {ValueType::kTypeValue, ValueType::kTypeMerge, ValueType::kTypeValue}, | |
494da23a TL |
956 | {{200, 250}}, file_id++, write_global_seqno, |
957 | verify_checksums_before_ingest, &true_data)); | |
11fdf7f2 TL |
958 | // File overwrites some keys, a seqno will be assigned |
959 | ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 4); | |
960 | ||
961 | ASSERT_OK(GenerateAndAddExternalFile( | |
962 | options, {300, 301, 302}, | |
963 | {ValueType::kTypeValue, ValueType::kTypeMerge, | |
964 | ValueType::kTypeDeletion}, | |
494da23a TL |
965 | {{1, 2}, {152, 154}}, file_id++, write_global_seqno, |
966 | verify_checksums_before_ingest, &true_data)); | |
11fdf7f2 TL |
967 | // File overwrites some keys, a seqno will be assigned |
968 | ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 5); | |
969 | ||
970 | // Write some keys through normal write path | |
971 | for (int i = 0; i < 50; i++) { | |
972 | ASSERT_OK(Put(Key(i), "memtable")); | |
973 | true_data[Key(i)] = "memtable"; | |
974 | } | |
975 | SequenceNumber last_seqno = dbfull()->GetLatestSequenceNumber(); | |
976 | ||
977 | ASSERT_OK(GenerateAndAddExternalFile( | |
978 | options, {60, 61, 62}, | |
979 | {ValueType::kTypeValue, ValueType::kTypeMerge, ValueType::kTypeValue}, | |
494da23a TL |
980 | file_id++, write_global_seqno, verify_checksums_before_ingest, |
981 | &true_data)); | |
11fdf7f2 TL |
982 | // File doesn't overwrite any keys, no seqno needed |
983 | ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), last_seqno); | |
984 | ||
985 | ASSERT_OK(GenerateAndAddExternalFile( | |
986 | options, {40, 41, 42}, | |
987 | {ValueType::kTypeValue, ValueType::kTypeDeletion, | |
988 | ValueType::kTypeDeletion}, | |
494da23a TL |
989 | file_id++, write_global_seqno, verify_checksums_before_ingest, |
990 | &true_data)); | |
11fdf7f2 TL |
991 | // File overwrites some keys, a seqno will be assigned |
992 | ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), last_seqno + 1); | |
993 | ||
994 | ASSERT_OK(GenerateAndAddExternalFile( | |
995 | options, {20, 30, 40}, | |
996 | {ValueType::kTypeDeletion, ValueType::kTypeDeletion, | |
997 | ValueType::kTypeDeletion}, | |
494da23a TL |
998 | file_id++, write_global_seqno, verify_checksums_before_ingest, |
999 | &true_data)); | |
11fdf7f2 TL |
1000 | // File overwrites some keys, a seqno will be assigned |
1001 | ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), last_seqno + 2); | |
1002 | ||
1003 | const Snapshot* snapshot = db_->GetSnapshot(); | |
1004 | ||
1005 | // We will need a seqno for the file regardless if the file overwrite | |
1006 | // keys in the DB or not because we have a snapshot | |
1007 | ASSERT_OK(GenerateAndAddExternalFile( | |
1008 | options, {1000, 1002}, {ValueType::kTypeValue, ValueType::kTypeMerge}, | |
494da23a TL |
1009 | file_id++, write_global_seqno, verify_checksums_before_ingest, |
1010 | &true_data)); | |
11fdf7f2 TL |
1011 | // A global seqno will be assigned anyway because of the snapshot |
1012 | ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), last_seqno + 3); | |
1013 | ||
1014 | ASSERT_OK(GenerateAndAddExternalFile( | |
1015 | options, {2000, 3002}, {ValueType::kTypeValue, ValueType::kTypeMerge}, | |
494da23a TL |
1016 | file_id++, write_global_seqno, verify_checksums_before_ingest, |
1017 | &true_data)); | |
11fdf7f2 TL |
1018 | // A global seqno will be assigned anyway because of the snapshot |
1019 | ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), last_seqno + 4); | |
1020 | ||
1021 | ASSERT_OK(GenerateAndAddExternalFile( | |
1022 | options, {1, 20, 40, 100, 150}, | |
1023 | {ValueType::kTypeDeletion, ValueType::kTypeDeletion, | |
1024 | ValueType::kTypeValue, ValueType::kTypeMerge, ValueType::kTypeMerge}, | |
494da23a TL |
1025 | file_id++, write_global_seqno, verify_checksums_before_ingest, |
1026 | &true_data)); | |
11fdf7f2 TL |
1027 | // A global seqno will be assigned anyway because of the snapshot |
1028 | ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), last_seqno + 5); | |
1029 | ||
1030 | db_->ReleaseSnapshot(snapshot); | |
1031 | ||
1032 | ASSERT_OK(GenerateAndAddExternalFile( | |
1033 | options, {5000, 5001}, {ValueType::kTypeValue, ValueType::kTypeMerge}, | |
494da23a TL |
1034 | file_id++, write_global_seqno, verify_checksums_before_ingest, |
1035 | &true_data)); | |
7c673cae FG |
1036 | // No snapshot anymore, no need to assign a seqno |
1037 | ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), last_seqno + 5); | |
1038 | ||
1039 | size_t kcnt = 0; | |
1040 | VerifyDBFromMap(true_data, &kcnt, false); | |
494da23a | 1041 | } while (ChangeOptionsForFileIngestionTest()); |
7c673cae FG |
1042 | } |
1043 | ||
1044 | TEST_F(ExternalSSTFileBasicTest, FadviseTrigger) { | |
1045 | Options options = CurrentOptions(); | |
1046 | const int kNumKeys = 10000; | |
1047 | ||
1048 | size_t total_fadvised_bytes = 0; | |
f67539c2 | 1049 | ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( |
11fdf7f2 | 1050 | "SstFileWriter::Rep::InvalidatePageCache", [&](void* arg) { |
7c673cae FG |
1051 | size_t fadvise_size = *(reinterpret_cast<size_t*>(arg)); |
1052 | total_fadvised_bytes += fadvise_size; | |
1053 | }); | |
f67539c2 | 1054 | ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); |
7c673cae FG |
1055 | |
1056 | std::unique_ptr<SstFileWriter> sst_file_writer; | |
1057 | ||
1058 | std::string sst_file_path = sst_files_dir_ + "file_fadvise_disable.sst"; | |
1059 | sst_file_writer.reset( | |
1060 | new SstFileWriter(EnvOptions(), options, nullptr, false)); | |
1061 | ASSERT_OK(sst_file_writer->Open(sst_file_path)); | |
1062 | for (int i = 0; i < kNumKeys; i++) { | |
11fdf7f2 | 1063 | ASSERT_OK(sst_file_writer->Put(Key(i), Key(i))); |
7c673cae FG |
1064 | } |
1065 | ASSERT_OK(sst_file_writer->Finish()); | |
1066 | // fadvise disabled | |
1067 | ASSERT_EQ(total_fadvised_bytes, 0); | |
1068 | ||
7c673cae FG |
1069 | sst_file_path = sst_files_dir_ + "file_fadvise_enable.sst"; |
1070 | sst_file_writer.reset( | |
1071 | new SstFileWriter(EnvOptions(), options, nullptr, true)); | |
1072 | ASSERT_OK(sst_file_writer->Open(sst_file_path)); | |
1073 | for (int i = 0; i < kNumKeys; i++) { | |
11fdf7f2 | 1074 | ASSERT_OK(sst_file_writer->Put(Key(i), Key(i))); |
7c673cae FG |
1075 | } |
1076 | ASSERT_OK(sst_file_writer->Finish()); | |
1077 | // fadvise enabled | |
1078 | ASSERT_EQ(total_fadvised_bytes, sst_file_writer->FileSize()); | |
1079 | ASSERT_GT(total_fadvised_bytes, 0); | |
1080 | ||
f67539c2 TL |
1081 | ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); |
1082 | } | |
1083 | ||
1084 | TEST_F(ExternalSSTFileBasicTest, SyncFailure) { | |
1085 | Options options; | |
1086 | options.create_if_missing = true; | |
1087 | options.env = fault_injection_test_env_.get(); | |
1088 | ||
1089 | std::vector<std::pair<std::string, std::string>> test_cases = { | |
1090 | {"ExternalSstFileIngestionJob::BeforeSyncIngestedFile", | |
1091 | "ExternalSstFileIngestionJob::AfterSyncIngestedFile"}, | |
1092 | {"ExternalSstFileIngestionJob::BeforeSyncDir", | |
1093 | "ExternalSstFileIngestionJob::AfterSyncDir"}, | |
1094 | {"ExternalSstFileIngestionJob::BeforeSyncGlobalSeqno", | |
1095 | "ExternalSstFileIngestionJob::AfterSyncGlobalSeqno"}}; | |
1096 | ||
1097 | for (size_t i = 0; i < test_cases.size(); i++) { | |
1098 | SyncPoint::GetInstance()->SetCallBack(test_cases[i].first, [&](void*) { | |
1099 | fault_injection_test_env_->SetFilesystemActive(false); | |
1100 | }); | |
1101 | SyncPoint::GetInstance()->SetCallBack(test_cases[i].second, [&](void*) { | |
1102 | fault_injection_test_env_->SetFilesystemActive(true); | |
1103 | }); | |
1104 | SyncPoint::GetInstance()->EnableProcessing(); | |
1105 | ||
1106 | DestroyAndReopen(options); | |
1107 | if (i == 2) { | |
1108 | ASSERT_OK(Put("foo", "v1")); | |
1109 | } | |
1110 | ||
1111 | Options sst_file_writer_options; | |
20effc67 | 1112 | sst_file_writer_options.env = env_; |
f67539c2 TL |
1113 | std::unique_ptr<SstFileWriter> sst_file_writer( |
1114 | new SstFileWriter(EnvOptions(), sst_file_writer_options)); | |
1115 | std::string file_name = | |
1116 | sst_files_dir_ + "sync_failure_test_" + ToString(i) + ".sst"; | |
1117 | ASSERT_OK(sst_file_writer->Open(file_name)); | |
1118 | ASSERT_OK(sst_file_writer->Put("bar", "v2")); | |
1119 | ASSERT_OK(sst_file_writer->Finish()); | |
1120 | ||
1121 | IngestExternalFileOptions ingest_opt; | |
1122 | if (i == 0) { | |
1123 | ingest_opt.move_files = true; | |
1124 | } | |
1125 | const Snapshot* snapshot = db_->GetSnapshot(); | |
1126 | if (i == 2) { | |
1127 | ingest_opt.write_global_seqno = true; | |
1128 | } | |
1129 | ASSERT_FALSE(db_->IngestExternalFile({file_name}, ingest_opt).ok()); | |
1130 | db_->ReleaseSnapshot(snapshot); | |
1131 | ||
1132 | SyncPoint::GetInstance()->DisableProcessing(); | |
1133 | SyncPoint::GetInstance()->ClearAllCallBacks(); | |
1134 | Destroy(options); | |
1135 | } | |
1136 | } | |
1137 | ||
1138 | TEST_F(ExternalSSTFileBasicTest, VerifyChecksumReadahead) { | |
1139 | Options options; | |
1140 | options.create_if_missing = true; | |
20effc67 | 1141 | SpecialEnv senv(env_); |
f67539c2 TL |
1142 | options.env = &senv; |
1143 | DestroyAndReopen(options); | |
1144 | ||
1145 | Options sst_file_writer_options; | |
20effc67 | 1146 | sst_file_writer_options.env = env_; |
f67539c2 TL |
1147 | std::unique_ptr<SstFileWriter> sst_file_writer( |
1148 | new SstFileWriter(EnvOptions(), sst_file_writer_options)); | |
1149 | std::string file_name = sst_files_dir_ + "verify_checksum_readahead_test.sst"; | |
1150 | ASSERT_OK(sst_file_writer->Open(file_name)); | |
1151 | Random rnd(301); | |
20effc67 | 1152 | std::string value = rnd.RandomString(4000); |
f67539c2 TL |
1153 | for (int i = 0; i < 5000; i++) { |
1154 | ASSERT_OK(sst_file_writer->Put(DBTestBase::Key(i), value)); | |
1155 | } | |
1156 | ASSERT_OK(sst_file_writer->Finish()); | |
1157 | ||
1158 | // Ingest it once without verifying checksums to see the baseline | |
1159 | // preads. | |
1160 | IngestExternalFileOptions ingest_opt; | |
1161 | ingest_opt.move_files = false; | |
1162 | senv.count_random_reads_ = true; | |
1163 | senv.random_read_bytes_counter_ = 0; | |
1164 | ASSERT_OK(db_->IngestExternalFile({file_name}, ingest_opt)); | |
1165 | ||
1166 | auto base_num_reads = senv.random_read_counter_.Read(); | |
1167 | // Make sure the counter is enabled. | |
1168 | ASSERT_GT(base_num_reads, 0); | |
1169 | ||
1170 | // Ingest again and observe the reads made for for readahead. | |
1171 | ingest_opt.move_files = false; | |
1172 | ingest_opt.verify_checksums_before_ingest = true; | |
1173 | ingest_opt.verify_checksums_readahead_size = size_t{2 * 1024 * 1024}; | |
1174 | ||
1175 | senv.count_random_reads_ = true; | |
1176 | senv.random_read_bytes_counter_ = 0; | |
1177 | ASSERT_OK(db_->IngestExternalFile({file_name}, ingest_opt)); | |
1178 | ||
1179 | // Make sure the counter is enabled. | |
1180 | ASSERT_GT(senv.random_read_counter_.Read() - base_num_reads, 0); | |
1181 | ||
1182 | // The SST file is about 20MB. Readahead size is 2MB. | |
1183 | // Give a conservative 15 reads for metadata blocks, the number | |
1184 | // of random reads should be within 20 MB / 2MB + 15 = 25. | |
1185 | ASSERT_LE(senv.random_read_counter_.Read() - base_num_reads, 40); | |
1186 | ||
1187 | Destroy(options); | |
7c673cae FG |
1188 | } |
1189 | ||
20effc67 TL |
1190 | TEST_F(ExternalSSTFileBasicTest, IngestRangeDeletionTombstoneWithGlobalSeqno) { |
1191 | for (int i = 5; i < 25; i++) { | |
1192 | ASSERT_OK(db_->Put(WriteOptions(), db_->DefaultColumnFamily(), Key(i), | |
1193 | Key(i) + "_val")); | |
1194 | } | |
1195 | ||
1196 | Options options = CurrentOptions(); | |
1197 | options.disable_auto_compactions = true; | |
1198 | Reopen(options); | |
1199 | SstFileWriter sst_file_writer(EnvOptions(), options); | |
1200 | ||
1201 | // file.sst (delete 0 => 30) | |
1202 | std::string file = sst_files_dir_ + "file.sst"; | |
1203 | ASSERT_OK(sst_file_writer.Open(file)); | |
1204 | ASSERT_OK(sst_file_writer.DeleteRange(Key(0), Key(30))); | |
1205 | ExternalSstFileInfo file_info; | |
1206 | ASSERT_OK(sst_file_writer.Finish(&file_info)); | |
1207 | ASSERT_EQ(file_info.file_path, file); | |
1208 | ASSERT_EQ(file_info.num_entries, 0); | |
1209 | ASSERT_EQ(file_info.smallest_key, ""); | |
1210 | ASSERT_EQ(file_info.largest_key, ""); | |
1211 | ASSERT_EQ(file_info.num_range_del_entries, 1); | |
1212 | ASSERT_EQ(file_info.smallest_range_del_key, Key(0)); | |
1213 | ASSERT_EQ(file_info.largest_range_del_key, Key(30)); | |
1214 | ||
1215 | IngestExternalFileOptions ifo; | |
1216 | ifo.move_files = true; | |
1217 | ifo.snapshot_consistency = true; | |
1218 | ifo.allow_global_seqno = true; | |
1219 | ifo.write_global_seqno = true; | |
1220 | ifo.verify_checksums_before_ingest = false; | |
1221 | ASSERT_OK(db_->IngestExternalFile({file}, ifo)); | |
1222 | ||
1223 | for (int i = 5; i < 25; i++) { | |
1224 | std::string res; | |
1225 | ASSERT_TRUE(db_->Get(ReadOptions(), Key(i), &res).IsNotFound()); | |
1226 | } | |
1227 | } | |
1228 | ||
494da23a | 1229 | TEST_P(ExternalSSTFileBasicTest, IngestionWithRangeDeletions) { |
11fdf7f2 TL |
1230 | int kNumLevels = 7; |
1231 | Options options = CurrentOptions(); | |
1232 | options.disable_auto_compactions = true; | |
1233 | options.num_levels = kNumLevels; | |
1234 | Reopen(options); | |
1235 | ||
1236 | std::map<std::string, std::string> true_data; | |
1237 | int file_id = 1; | |
1238 | // prevent range deletions from being dropped due to becoming obsolete. | |
1239 | const Snapshot* snapshot = db_->GetSnapshot(); | |
1240 | ||
1241 | // range del [0, 50) in L6 file, [50, 100) in L0 file, [100, 150) in memtable | |
1242 | for (int i = 0; i < 3; i++) { | |
1243 | if (i != 0) { | |
1244 | db_->Flush(FlushOptions()); | |
1245 | if (i == 1) { | |
1246 | MoveFilesToLevel(kNumLevels - 1); | |
1247 | } | |
1248 | } | |
1249 | ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), | |
1250 | Key(50 * i), Key(50 * (i + 1)))); | |
1251 | } | |
1252 | ASSERT_EQ(1, NumTableFilesAtLevel(0)); | |
1253 | ASSERT_EQ(0, NumTableFilesAtLevel(kNumLevels - 2)); | |
1254 | ASSERT_EQ(1, NumTableFilesAtLevel(kNumLevels - 1)); | |
1255 | ||
494da23a TL |
1256 | bool write_global_seqno = std::get<0>(GetParam()); |
1257 | bool verify_checksums_before_ingest = std::get<1>(GetParam()); | |
11fdf7f2 TL |
1258 | // overlaps with L0 file but not memtable, so flush is skipped and file is |
1259 | // ingested into L0 | |
1260 | SequenceNumber last_seqno = dbfull()->GetLatestSequenceNumber(); | |
1261 | ASSERT_OK(GenerateAndAddExternalFile( | |
1262 | options, {60, 90}, {ValueType::kTypeValue, ValueType::kTypeValue}, | |
494da23a TL |
1263 | {{65, 70}, {70, 85}}, file_id++, write_global_seqno, |
1264 | verify_checksums_before_ingest, &true_data)); | |
11fdf7f2 TL |
1265 | ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), ++last_seqno); |
1266 | ASSERT_EQ(2, NumTableFilesAtLevel(0)); | |
1267 | ASSERT_EQ(0, NumTableFilesAtLevel(kNumLevels - 2)); | |
1268 | ASSERT_EQ(1, NumTableFilesAtLevel(options.num_levels - 1)); | |
1269 | ||
1270 | // overlaps with L6 file but not memtable or L0 file, so flush is skipped and | |
1271 | // file is ingested into L5 | |
1272 | ASSERT_OK(GenerateAndAddExternalFile( | |
1273 | options, {10, 40}, {ValueType::kTypeValue, ValueType::kTypeValue}, | |
494da23a TL |
1274 | file_id++, write_global_seqno, verify_checksums_before_ingest, |
1275 | &true_data)); | |
11fdf7f2 TL |
1276 | ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), ++last_seqno); |
1277 | ASSERT_EQ(2, NumTableFilesAtLevel(0)); | |
1278 | ASSERT_EQ(1, NumTableFilesAtLevel(kNumLevels - 2)); | |
1279 | ASSERT_EQ(1, NumTableFilesAtLevel(options.num_levels - 1)); | |
1280 | ||
1281 | // overlaps with L5 file but not memtable or L0 file, so flush is skipped and | |
1282 | // file is ingested into L4 | |
494da23a TL |
1283 | ASSERT_OK(GenerateAndAddExternalFile( |
1284 | options, {}, {}, {{5, 15}}, file_id++, write_global_seqno, | |
1285 | verify_checksums_before_ingest, &true_data)); | |
11fdf7f2 TL |
1286 | ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), ++last_seqno); |
1287 | ASSERT_EQ(2, NumTableFilesAtLevel(0)); | |
1288 | ASSERT_EQ(1, NumTableFilesAtLevel(kNumLevels - 2)); | |
1289 | ASSERT_EQ(1, NumTableFilesAtLevel(options.num_levels - 2)); | |
1290 | ASSERT_EQ(1, NumTableFilesAtLevel(options.num_levels - 1)); | |
1291 | ||
1292 | // ingested file overlaps with memtable, so flush is triggered before the file | |
1293 | // is ingested such that the ingested data is considered newest. So L0 file | |
1294 | // count increases by two. | |
1295 | ASSERT_OK(GenerateAndAddExternalFile( | |
1296 | options, {100, 140}, {ValueType::kTypeValue, ValueType::kTypeValue}, | |
494da23a TL |
1297 | file_id++, write_global_seqno, verify_checksums_before_ingest, |
1298 | &true_data)); | |
11fdf7f2 TL |
1299 | ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), ++last_seqno); |
1300 | ASSERT_EQ(4, NumTableFilesAtLevel(0)); | |
1301 | ASSERT_EQ(1, NumTableFilesAtLevel(kNumLevels - 2)); | |
1302 | ASSERT_EQ(1, NumTableFilesAtLevel(options.num_levels - 1)); | |
1303 | ||
1304 | // snapshot unneeded now that all range deletions are persisted | |
1305 | db_->ReleaseSnapshot(snapshot); | |
1306 | ||
1307 | // overlaps with nothing, so places at bottom level and skips incrementing | |
1308 | // seqnum. | |
1309 | ASSERT_OK(GenerateAndAddExternalFile( | |
1310 | options, {151, 175}, {ValueType::kTypeValue, ValueType::kTypeValue}, | |
494da23a TL |
1311 | {{160, 200}}, file_id++, write_global_seqno, |
1312 | verify_checksums_before_ingest, &true_data)); | |
11fdf7f2 TL |
1313 | ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), last_seqno); |
1314 | ASSERT_EQ(4, NumTableFilesAtLevel(0)); | |
1315 | ASSERT_EQ(1, NumTableFilesAtLevel(kNumLevels - 2)); | |
1316 | ASSERT_EQ(2, NumTableFilesAtLevel(options.num_levels - 1)); | |
1317 | } | |
1318 | ||
f67539c2 TL |
1319 | TEST_F(ExternalSSTFileBasicTest, AdjacentRangeDeletionTombstones) { |
1320 | Options options = CurrentOptions(); | |
1321 | SstFileWriter sst_file_writer(EnvOptions(), options); | |
1322 | ||
1323 | // file8.sst (delete 300 => 400) | |
1324 | std::string file8 = sst_files_dir_ + "file8.sst"; | |
1325 | ASSERT_OK(sst_file_writer.Open(file8)); | |
1326 | ASSERT_OK(sst_file_writer.DeleteRange(Key(300), Key(400))); | |
1327 | ExternalSstFileInfo file8_info; | |
1328 | Status s = sst_file_writer.Finish(&file8_info); | |
1329 | ASSERT_TRUE(s.ok()) << s.ToString(); | |
1330 | ASSERT_EQ(file8_info.file_path, file8); | |
1331 | ASSERT_EQ(file8_info.num_entries, 0); | |
1332 | ASSERT_EQ(file8_info.smallest_key, ""); | |
1333 | ASSERT_EQ(file8_info.largest_key, ""); | |
1334 | ASSERT_EQ(file8_info.num_range_del_entries, 1); | |
1335 | ASSERT_EQ(file8_info.smallest_range_del_key, Key(300)); | |
1336 | ASSERT_EQ(file8_info.largest_range_del_key, Key(400)); | |
1337 | ||
1338 | // file9.sst (delete 400 => 500) | |
1339 | std::string file9 = sst_files_dir_ + "file9.sst"; | |
1340 | ASSERT_OK(sst_file_writer.Open(file9)); | |
1341 | ASSERT_OK(sst_file_writer.DeleteRange(Key(400), Key(500))); | |
1342 | ExternalSstFileInfo file9_info; | |
1343 | s = sst_file_writer.Finish(&file9_info); | |
1344 | ASSERT_TRUE(s.ok()) << s.ToString(); | |
1345 | ASSERT_EQ(file9_info.file_path, file9); | |
1346 | ASSERT_EQ(file9_info.num_entries, 0); | |
1347 | ASSERT_EQ(file9_info.smallest_key, ""); | |
1348 | ASSERT_EQ(file9_info.largest_key, ""); | |
1349 | ASSERT_EQ(file9_info.num_range_del_entries, 1); | |
1350 | ASSERT_EQ(file9_info.smallest_range_del_key, Key(400)); | |
1351 | ASSERT_EQ(file9_info.largest_range_del_key, Key(500)); | |
1352 | ||
1353 | // Range deletion tombstones are exclusive on their end key, so these SSTs | |
1354 | // should not be considered as overlapping. | |
1355 | s = DeprecatedAddFile({file8, file9}); | |
1356 | ASSERT_TRUE(s.ok()) << s.ToString(); | |
1357 | ASSERT_EQ(db_->GetLatestSequenceNumber(), 0U); | |
1358 | DestroyAndRecreateExternalSSTFilesDir(); | |
1359 | } | |
1360 | ||
494da23a TL |
1361 | TEST_P(ExternalSSTFileBasicTest, IngestFileWithBadBlockChecksum) { |
1362 | bool change_checksum_called = false; | |
1363 | const auto& change_checksum = [&](void* arg) { | |
1364 | if (!change_checksum_called) { | |
1365 | char* buf = reinterpret_cast<char*>(arg); | |
1366 | assert(nullptr != buf); | |
1367 | buf[0] ^= 0x1; | |
1368 | change_checksum_called = true; | |
1369 | } | |
1370 | }; | |
1371 | SyncPoint::GetInstance()->DisableProcessing(); | |
1372 | SyncPoint::GetInstance()->ClearAllCallBacks(); | |
1373 | SyncPoint::GetInstance()->SetCallBack( | |
1374 | "BlockBasedTableBuilder::WriteRawBlock:TamperWithChecksum", | |
1375 | change_checksum); | |
1376 | SyncPoint::GetInstance()->EnableProcessing(); | |
1377 | int file_id = 0; | |
1378 | bool write_global_seqno = std::get<0>(GetParam()); | |
1379 | bool verify_checksums_before_ingest = std::get<1>(GetParam()); | |
1380 | do { | |
1381 | Options options = CurrentOptions(); | |
1382 | DestroyAndReopen(options); | |
1383 | std::map<std::string, std::string> true_data; | |
1384 | Status s = GenerateAndAddExternalFile( | |
1385 | options, {1, 2, 3, 4, 5, 6}, ValueType::kTypeValue, file_id++, | |
1386 | write_global_seqno, verify_checksums_before_ingest, &true_data); | |
1387 | if (verify_checksums_before_ingest) { | |
1388 | ASSERT_NOK(s); | |
1389 | } else { | |
1390 | ASSERT_OK(s); | |
1391 | } | |
1392 | change_checksum_called = false; | |
1393 | } while (ChangeOptionsForFileIngestionTest()); | |
1394 | } | |
1395 | ||
1396 | TEST_P(ExternalSSTFileBasicTest, IngestFileWithFirstByteTampered) { | |
1397 | SyncPoint::GetInstance()->DisableProcessing(); | |
1398 | int file_id = 0; | |
1399 | EnvOptions env_options; | |
1400 | do { | |
1401 | Options options = CurrentOptions(); | |
1402 | std::string file_path = sst_files_dir_ + ToString(file_id++); | |
1403 | SstFileWriter sst_file_writer(env_options, options); | |
1404 | Status s = sst_file_writer.Open(file_path); | |
1405 | ASSERT_OK(s); | |
1406 | for (int i = 0; i != 100; ++i) { | |
1407 | std::string key = Key(i); | |
1408 | std::string value = Key(i) + ToString(0); | |
1409 | ASSERT_OK(sst_file_writer.Put(key, value)); | |
1410 | } | |
1411 | ASSERT_OK(sst_file_writer.Finish()); | |
1412 | { | |
1413 | // Get file size | |
1414 | uint64_t file_size = 0; | |
1415 | ASSERT_OK(env_->GetFileSize(file_path, &file_size)); | |
1416 | ASSERT_GT(file_size, 8); | |
1417 | std::unique_ptr<RandomRWFile> rwfile; | |
1418 | ASSERT_OK(env_->NewRandomRWFile(file_path, &rwfile, EnvOptions())); | |
1419 | // Manually corrupt the file | |
1420 | // We deterministically corrupt the first byte because we currently | |
1421 | // cannot choose a random offset. The reason for this limitation is that | |
1422 | // we do not checksum property block at present. | |
1423 | const uint64_t offset = 0; | |
1424 | char scratch[8] = {0}; | |
1425 | Slice buf; | |
1426 | ASSERT_OK(rwfile->Read(offset, sizeof(scratch), &buf, scratch)); | |
1427 | scratch[0] ^= 0xff; // flip one bit | |
1428 | ASSERT_OK(rwfile->Write(offset, buf)); | |
1429 | } | |
1430 | // Ingest file. | |
1431 | IngestExternalFileOptions ifo; | |
1432 | ifo.write_global_seqno = std::get<0>(GetParam()); | |
1433 | ifo.verify_checksums_before_ingest = std::get<1>(GetParam()); | |
1434 | s = db_->IngestExternalFile({file_path}, ifo); | |
1435 | if (ifo.verify_checksums_before_ingest) { | |
1436 | ASSERT_NOK(s); | |
1437 | } else { | |
1438 | ASSERT_OK(s); | |
1439 | } | |
1440 | } while (ChangeOptionsForFileIngestionTest()); | |
1441 | } | |
1442 | ||
1443 | TEST_P(ExternalSSTFileBasicTest, IngestExternalFileWithCorruptedPropsBlock) { | |
1444 | bool verify_checksums_before_ingest = std::get<1>(GetParam()); | |
1445 | if (!verify_checksums_before_ingest) { | |
1446 | return; | |
1447 | } | |
1448 | uint64_t props_block_offset = 0; | |
1449 | size_t props_block_size = 0; | |
1450 | const auto& get_props_block_offset = [&](void* arg) { | |
1451 | props_block_offset = *reinterpret_cast<uint64_t*>(arg); | |
1452 | }; | |
1453 | const auto& get_props_block_size = [&](void* arg) { | |
1454 | props_block_size = *reinterpret_cast<uint64_t*>(arg); | |
1455 | }; | |
1456 | SyncPoint::GetInstance()->DisableProcessing(); | |
1457 | SyncPoint::GetInstance()->ClearAllCallBacks(); | |
1458 | SyncPoint::GetInstance()->SetCallBack( | |
1459 | "BlockBasedTableBuilder::WritePropertiesBlock:GetPropsBlockOffset", | |
1460 | get_props_block_offset); | |
1461 | SyncPoint::GetInstance()->SetCallBack( | |
1462 | "BlockBasedTableBuilder::WritePropertiesBlock:GetPropsBlockSize", | |
1463 | get_props_block_size); | |
1464 | SyncPoint::GetInstance()->EnableProcessing(); | |
1465 | int file_id = 0; | |
1466 | Random64 rand(time(nullptr)); | |
1467 | do { | |
1468 | std::string file_path = sst_files_dir_ + ToString(file_id++); | |
1469 | Options options = CurrentOptions(); | |
1470 | SstFileWriter sst_file_writer(EnvOptions(), options); | |
1471 | Status s = sst_file_writer.Open(file_path); | |
1472 | ASSERT_OK(s); | |
1473 | for (int i = 0; i != 100; ++i) { | |
1474 | std::string key = Key(i); | |
1475 | std::string value = Key(i) + ToString(0); | |
1476 | ASSERT_OK(sst_file_writer.Put(key, value)); | |
1477 | } | |
1478 | ASSERT_OK(sst_file_writer.Finish()); | |
1479 | ||
1480 | { | |
1481 | std::unique_ptr<RandomRWFile> rwfile; | |
1482 | ASSERT_OK(env_->NewRandomRWFile(file_path, &rwfile, EnvOptions())); | |
1483 | // Manually corrupt the file | |
1484 | ASSERT_GT(props_block_size, 8); | |
1485 | uint64_t offset = | |
1486 | props_block_offset + rand.Next() % (props_block_size - 8); | |
1487 | char scratch[8] = {0}; | |
1488 | Slice buf; | |
1489 | ASSERT_OK(rwfile->Read(offset, sizeof(scratch), &buf, scratch)); | |
1490 | scratch[0] ^= 0xff; // flip one bit | |
1491 | ASSERT_OK(rwfile->Write(offset, buf)); | |
1492 | } | |
1493 | ||
1494 | // Ingest file. | |
1495 | IngestExternalFileOptions ifo; | |
1496 | ifo.write_global_seqno = std::get<0>(GetParam()); | |
1497 | ifo.verify_checksums_before_ingest = true; | |
1498 | s = db_->IngestExternalFile({file_path}, ifo); | |
1499 | ASSERT_NOK(s); | |
1500 | } while (ChangeOptionsForFileIngestionTest()); | |
1501 | } | |
1502 | ||
f67539c2 TL |
1503 | TEST_F(ExternalSSTFileBasicTest, OverlappingFiles) { |
1504 | Options options = CurrentOptions(); | |
1505 | ||
1506 | std::vector<std::string> files; | |
1507 | { | |
1508 | SstFileWriter sst_file_writer(EnvOptions(), options); | |
1509 | std::string file1 = sst_files_dir_ + "file1.sst"; | |
1510 | ASSERT_OK(sst_file_writer.Open(file1)); | |
1511 | ASSERT_OK(sst_file_writer.Put("a", "z")); | |
1512 | ASSERT_OK(sst_file_writer.Put("i", "m")); | |
1513 | ExternalSstFileInfo file1_info; | |
1514 | ASSERT_OK(sst_file_writer.Finish(&file1_info)); | |
1515 | files.push_back(std::move(file1)); | |
1516 | } | |
1517 | { | |
1518 | SstFileWriter sst_file_writer(EnvOptions(), options); | |
1519 | std::string file2 = sst_files_dir_ + "file2.sst"; | |
1520 | ASSERT_OK(sst_file_writer.Open(file2)); | |
1521 | ASSERT_OK(sst_file_writer.Put("i", "k")); | |
1522 | ExternalSstFileInfo file2_info; | |
1523 | ASSERT_OK(sst_file_writer.Finish(&file2_info)); | |
1524 | files.push_back(std::move(file2)); | |
1525 | } | |
1526 | ||
1527 | IngestExternalFileOptions ifo; | |
1528 | ASSERT_OK(db_->IngestExternalFile(files, ifo)); | |
1529 | ASSERT_EQ(Get("a"), "z"); | |
1530 | ASSERT_EQ(Get("i"), "k"); | |
1531 | ||
1532 | int total_keys = 0; | |
1533 | Iterator* iter = db_->NewIterator(ReadOptions()); | |
1534 | for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { | |
1535 | ASSERT_OK(iter->status()); | |
1536 | total_keys++; | |
1537 | } | |
1538 | delete iter; | |
1539 | ASSERT_EQ(total_keys, 2); | |
1540 | ||
1541 | ASSERT_EQ(2, NumTableFilesAtLevel(0)); | |
1542 | } | |
1543 | ||
494da23a TL |
1544 | INSTANTIATE_TEST_CASE_P(ExternalSSTFileBasicTest, ExternalSSTFileBasicTest, |
1545 | testing::Values(std::make_tuple(true, true), | |
1546 | std::make_tuple(true, false), | |
1547 | std::make_tuple(false, true), | |
1548 | std::make_tuple(false, false))); | |
1549 | ||
7c673cae FG |
1550 | #endif // ROCKSDB_LITE |
1551 | ||
f67539c2 | 1552 | } // namespace ROCKSDB_NAMESPACE |
7c673cae FG |
1553 | |
1554 | int main(int argc, char** argv) { | |
f67539c2 | 1555 | ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); |
7c673cae FG |
1556 | ::testing::InitGoogleTest(&argc, argv); |
1557 | return RUN_ALL_TESTS(); | |
1558 | } |