]> git.proxmox.com Git - ceph.git/blame - ceph/src/rocksdb/db/external_sst_file_basic_test.cc
import quincy beta 17.1.0
[ceph.git] / ceph / src / rocksdb / db / external_sst_file_basic_test.cc
CommitLineData
7c673cae 1// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
11fdf7f2
TL
2// This source code is licensed under both the GPLv2 (found in the
3// COPYING file in the root directory) and Apache 2.0 License
4// (found in the LICENSE.Apache file in the root directory).
7c673cae
FG
5
6#include <functional>
7
8#include "db/db_test_util.h"
20effc67 9#include "db/version_edit.h"
7c673cae
FG
10#include "port/port.h"
11#include "port/stack_trace.h"
12#include "rocksdb/sst_file_writer.h"
f67539c2 13#include "test_util/testutil.h"
20effc67
TL
14#include "util/random.h"
15#include "utilities/fault_injection_env.h"
7c673cae 16
f67539c2 17namespace ROCKSDB_NAMESPACE {
7c673cae
FG
18
19#ifndef ROCKSDB_LITE
494da23a
TL
20class ExternalSSTFileBasicTest
21 : public DBTestBase,
22 public ::testing::WithParamInterface<std::tuple<bool, bool>> {
7c673cae 23 public:
20effc67
TL
24 ExternalSSTFileBasicTest()
25 : DBTestBase("/external_sst_file_basic_test", /*env_do_fsync=*/true) {
7c673cae 26 sst_files_dir_ = dbname_ + "/sst_files/";
20effc67 27 fault_injection_test_env_.reset(new FaultInjectionTestEnv(env_));
7c673cae
FG
28 DestroyAndRecreateExternalSSTFilesDir();
29 }
30
31 void DestroyAndRecreateExternalSSTFilesDir() {
20effc67 32 DestroyDir(env_, sst_files_dir_);
7c673cae
FG
33 env_->CreateDir(sst_files_dir_);
34 }
35
36 Status DeprecatedAddFile(const std::vector<std::string>& files,
37 bool move_files = false,
38 bool skip_snapshot_check = false) {
39 IngestExternalFileOptions opts;
40 opts.move_files = move_files;
41 opts.snapshot_consistency = !skip_snapshot_check;
42 opts.allow_global_seqno = false;
43 opts.allow_blocking_flush = false;
44 return db_->IngestExternalFile(files, opts);
45 }
46
20effc67
TL
47 Status AddFileWithFileChecksum(
48 const std::vector<std::string>& files,
49 const std::vector<std::string>& files_checksums,
50 const std::vector<std::string>& files_checksum_func_names,
51 bool verify_file_checksum = true, bool move_files = false,
52 bool skip_snapshot_check = false, bool write_global_seqno = true) {
53 IngestExternalFileOptions opts;
54 opts.move_files = move_files;
55 opts.snapshot_consistency = !skip_snapshot_check;
56 opts.allow_global_seqno = false;
57 opts.allow_blocking_flush = false;
58 opts.write_global_seqno = write_global_seqno;
59 opts.verify_file_checksum = verify_file_checksum;
60
61 IngestExternalFileArg arg;
62 arg.column_family = db_->DefaultColumnFamily();
63 arg.external_files = files;
64 arg.options = opts;
65 arg.files_checksums = files_checksums;
66 arg.files_checksum_func_names = files_checksum_func_names;
67 return db_->IngestExternalFiles({arg});
68 }
69
7c673cae 70 Status GenerateAndAddExternalFile(
11fdf7f2
TL
71 const Options options, std::vector<int> keys,
72 const std::vector<ValueType>& value_types,
73 std::vector<std::pair<int, int>> range_deletions, int file_id,
494da23a 74 bool write_global_seqno, bool verify_checksums_before_ingest,
7c673cae 75 std::map<std::string, std::string>* true_data) {
11fdf7f2 76 assert(value_types.size() == 1 || keys.size() == value_types.size());
7c673cae
FG
77 std::string file_path = sst_files_dir_ + ToString(file_id);
78 SstFileWriter sst_file_writer(EnvOptions(), options);
79
80 Status s = sst_file_writer.Open(file_path);
81 if (!s.ok()) {
82 return s;
83 }
11fdf7f2
TL
84 for (size_t i = 0; i < range_deletions.size(); i++) {
85 // Account for the effect of range deletions on true_data before
86 // all point operators, even though sst_file_writer.DeleteRange
87 // must be called before other sst_file_writer methods. This is
88 // because point writes take precedence over range deletions
89 // in the same ingested sst.
90 std::string start_key = Key(range_deletions[i].first);
91 std::string end_key = Key(range_deletions[i].second);
92 s = sst_file_writer.DeleteRange(start_key, end_key);
93 if (!s.ok()) {
94 sst_file_writer.Finish();
95 return s;
96 }
97 auto start_key_it = true_data->find(start_key);
98 if (start_key_it == true_data->end()) {
99 start_key_it = true_data->upper_bound(start_key);
100 }
101 auto end_key_it = true_data->find(end_key);
102 if (end_key_it == true_data->end()) {
103 end_key_it = true_data->upper_bound(end_key);
104 }
105 true_data->erase(start_key_it, end_key_it);
106 }
107 for (size_t i = 0; i < keys.size(); i++) {
108 std::string key = Key(keys[i]);
109 std::string value = Key(keys[i]) + ToString(file_id);
110 ValueType value_type =
111 (value_types.size() == 1 ? value_types[0] : value_types[i]);
112 switch (value_type) {
113 case ValueType::kTypeValue:
114 s = sst_file_writer.Put(key, value);
115 (*true_data)[key] = value;
116 break;
117 case ValueType::kTypeMerge:
118 s = sst_file_writer.Merge(key, value);
119 // we only use TestPutOperator in this test
120 (*true_data)[key] = value;
121 break;
122 case ValueType::kTypeDeletion:
123 s = sst_file_writer.Delete(key);
124 true_data->erase(key);
125 break;
126 default:
127 return Status::InvalidArgument("Value type is not supported");
128 }
7c673cae
FG
129 if (!s.ok()) {
130 sst_file_writer.Finish();
131 return s;
132 }
133 }
134 s = sst_file_writer.Finish();
135
136 if (s.ok()) {
137 IngestExternalFileOptions ifo;
138 ifo.allow_global_seqno = true;
494da23a
TL
139 ifo.write_global_seqno = write_global_seqno;
140 ifo.verify_checksums_before_ingest = verify_checksums_before_ingest;
7c673cae
FG
141 s = db_->IngestExternalFile({file_path}, ifo);
142 }
7c673cae
FG
143 return s;
144 }
145
11fdf7f2
TL
146 Status GenerateAndAddExternalFile(
147 const Options options, std::vector<int> keys,
148 const std::vector<ValueType>& value_types, int file_id,
494da23a 149 bool write_global_seqno, bool verify_checksums_before_ingest,
11fdf7f2 150 std::map<std::string, std::string>* true_data) {
494da23a
TL
151 return GenerateAndAddExternalFile(
152 options, keys, value_types, {}, file_id, write_global_seqno,
153 verify_checksums_before_ingest, true_data);
11fdf7f2
TL
154 }
155
156 Status GenerateAndAddExternalFile(
157 const Options options, std::vector<int> keys, const ValueType value_type,
494da23a
TL
158 int file_id, bool write_global_seqno, bool verify_checksums_before_ingest,
159 std::map<std::string, std::string>* true_data) {
160 return GenerateAndAddExternalFile(
161 options, keys, std::vector<ValueType>(1, value_type), file_id,
162 write_global_seqno, verify_checksums_before_ingest, true_data);
11fdf7f2
TL
163 }
164
20effc67 165 ~ExternalSSTFileBasicTest() override { DestroyDir(env_, sst_files_dir_); }
7c673cae
FG
166
167 protected:
168 std::string sst_files_dir_;
f67539c2 169 std::unique_ptr<FaultInjectionTestEnv> fault_injection_test_env_;
7c673cae
FG
170};
171
172TEST_F(ExternalSSTFileBasicTest, Basic) {
173 Options options = CurrentOptions();
174
175 SstFileWriter sst_file_writer(EnvOptions(), options);
176
177 // Current file size should be 0 after sst_file_writer init and before open a
178 // file.
179 ASSERT_EQ(sst_file_writer.FileSize(), 0);
180
181 // file1.sst (0 => 99)
182 std::string file1 = sst_files_dir_ + "file1.sst";
183 ASSERT_OK(sst_file_writer.Open(file1));
184 for (int k = 0; k < 100; k++) {
11fdf7f2 185 ASSERT_OK(sst_file_writer.Put(Key(k), Key(k) + "_val"));
7c673cae
FG
186 }
187 ExternalSstFileInfo file1_info;
188 Status s = sst_file_writer.Finish(&file1_info);
189 ASSERT_TRUE(s.ok()) << s.ToString();
190
191 // Current file size should be non-zero after success write.
192 ASSERT_GT(sst_file_writer.FileSize(), 0);
193
194 ASSERT_EQ(file1_info.file_path, file1);
195 ASSERT_EQ(file1_info.num_entries, 100);
196 ASSERT_EQ(file1_info.smallest_key, Key(0));
197 ASSERT_EQ(file1_info.largest_key, Key(99));
11fdf7f2
TL
198 ASSERT_EQ(file1_info.num_range_del_entries, 0);
199 ASSERT_EQ(file1_info.smallest_range_del_key, "");
200 ASSERT_EQ(file1_info.largest_range_del_key, "");
20effc67
TL
201 ASSERT_EQ(file1_info.file_checksum, kUnknownFileChecksum);
202 ASSERT_EQ(file1_info.file_checksum_func_name, kUnknownFileChecksumFuncName);
203 // sst_file_writer already finished, cannot add this value
204 s = sst_file_writer.Put(Key(100), "bad_val");
205 ASSERT_FALSE(s.ok()) << s.ToString();
206 s = sst_file_writer.DeleteRange(Key(100), Key(200));
207 ASSERT_FALSE(s.ok()) << s.ToString();
208
209 DestroyAndReopen(options);
210 // Add file using file path
211 s = DeprecatedAddFile({file1});
212 ASSERT_TRUE(s.ok()) << s.ToString();
213 ASSERT_EQ(db_->GetLatestSequenceNumber(), 0U);
214 for (int k = 0; k < 100; k++) {
215 ASSERT_EQ(Get(Key(k)), Key(k) + "_val");
216 }
217
218 DestroyAndRecreateExternalSSTFilesDir();
219}
220
221class ChecksumVerifyHelper {
222 private:
223 Options options_;
224
225 public:
226 ChecksumVerifyHelper(Options& options) : options_(options) {}
227 ~ChecksumVerifyHelper() {}
228
229 Status GetSingleFileChecksumAndFuncName(
230 const std::string& file_path, std::string* file_checksum,
231 std::string* file_checksum_func_name) {
232 Status s;
233 EnvOptions soptions;
234 std::unique_ptr<SequentialFile> file_reader;
235 s = options_.env->NewSequentialFile(file_path, &file_reader, soptions);
236 if (!s.ok()) {
237 return s;
238 }
239 std::unique_ptr<char[]> scratch(new char[2048]);
240 Slice result;
241 FileChecksumGenFactory* file_checksum_gen_factory =
242 options_.file_checksum_gen_factory.get();
243 if (file_checksum_gen_factory == nullptr) {
244 *file_checksum = kUnknownFileChecksum;
245 *file_checksum_func_name = kUnknownFileChecksumFuncName;
246 return Status::OK();
247 } else {
248 FileChecksumGenContext gen_context;
249 std::unique_ptr<FileChecksumGenerator> file_checksum_gen =
250 file_checksum_gen_factory->CreateFileChecksumGenerator(gen_context);
251 *file_checksum_func_name = file_checksum_gen->Name();
252 s = file_reader->Read(2048, &result, scratch.get());
253 if (!s.ok()) {
254 return s;
255 }
256 while (result.size() != 0) {
257 file_checksum_gen->Update(scratch.get(), result.size());
258 s = file_reader->Read(2048, &result, scratch.get());
259 if (!s.ok()) {
260 return s;
261 }
262 }
263 file_checksum_gen->Finalize();
264 *file_checksum = file_checksum_gen->GetChecksum();
265 }
266 return Status::OK();
267 }
268};
269
270TEST_F(ExternalSSTFileBasicTest, BasicWithFileChecksumCrc32c) {
271 Options options = CurrentOptions();
272 options.file_checksum_gen_factory = GetFileChecksumGenCrc32cFactory();
273 ChecksumVerifyHelper checksum_helper(options);
274
275 SstFileWriter sst_file_writer(EnvOptions(), options);
276
277 // Current file size should be 0 after sst_file_writer init and before open a
278 // file.
279 ASSERT_EQ(sst_file_writer.FileSize(), 0);
280
281 // file1.sst (0 => 99)
282 std::string file1 = sst_files_dir_ + "file1.sst";
283 ASSERT_OK(sst_file_writer.Open(file1));
284 for (int k = 0; k < 100; k++) {
285 ASSERT_OK(sst_file_writer.Put(Key(k), Key(k) + "_val"));
286 }
287 ExternalSstFileInfo file1_info;
288 Status s = sst_file_writer.Finish(&file1_info);
289 ASSERT_TRUE(s.ok()) << s.ToString();
290 std::string file_checksum, file_checksum_func_name;
291 ASSERT_OK(checksum_helper.GetSingleFileChecksumAndFuncName(
292 file1, &file_checksum, &file_checksum_func_name));
293
294 // Current file size should be non-zero after success write.
295 ASSERT_GT(sst_file_writer.FileSize(), 0);
296
297 ASSERT_EQ(file1_info.file_path, file1);
298 ASSERT_EQ(file1_info.num_entries, 100);
299 ASSERT_EQ(file1_info.smallest_key, Key(0));
300 ASSERT_EQ(file1_info.largest_key, Key(99));
301 ASSERT_EQ(file1_info.num_range_del_entries, 0);
302 ASSERT_EQ(file1_info.smallest_range_del_key, "");
303 ASSERT_EQ(file1_info.largest_range_del_key, "");
304 ASSERT_EQ(file1_info.file_checksum, file_checksum);
305 ASSERT_EQ(file1_info.file_checksum_func_name, file_checksum_func_name);
7c673cae 306 // sst_file_writer already finished, cannot add this value
11fdf7f2
TL
307 s = sst_file_writer.Put(Key(100), "bad_val");
308 ASSERT_FALSE(s.ok()) << s.ToString();
309 s = sst_file_writer.DeleteRange(Key(100), Key(200));
7c673cae
FG
310 ASSERT_FALSE(s.ok()) << s.ToString();
311
312 DestroyAndReopen(options);
313 // Add file using file path
314 s = DeprecatedAddFile({file1});
315 ASSERT_TRUE(s.ok()) << s.ToString();
316 ASSERT_EQ(db_->GetLatestSequenceNumber(), 0U);
317 for (int k = 0; k < 100; k++) {
318 ASSERT_EQ(Get(Key(k)), Key(k) + "_val");
319 }
320
321 DestroyAndRecreateExternalSSTFilesDir();
322}
323
20effc67
TL
324TEST_F(ExternalSSTFileBasicTest, IngestFileWithFileChecksum) {
325 Options old_options = CurrentOptions();
326 Options options = CurrentOptions();
327 options.file_checksum_gen_factory = GetFileChecksumGenCrc32cFactory();
328 const ImmutableCFOptions ioptions(options);
329 ChecksumVerifyHelper checksum_helper(options);
330
331 SstFileWriter sst_file_writer(EnvOptions(), options);
332
333 // file01.sst (1000 => 1099)
334 std::string file1 = sst_files_dir_ + "file01.sst";
335 ASSERT_OK(sst_file_writer.Open(file1));
336 for (int k = 1000; k < 1100; k++) {
337 ASSERT_OK(sst_file_writer.Put(Key(k), Key(k) + "_val"));
338 }
339 ExternalSstFileInfo file1_info;
340 Status s = sst_file_writer.Finish(&file1_info);
341 ASSERT_TRUE(s.ok()) << s.ToString();
342 ASSERT_EQ(file1_info.file_path, file1);
343 ASSERT_EQ(file1_info.num_entries, 100);
344 ASSERT_EQ(file1_info.smallest_key, Key(1000));
345 ASSERT_EQ(file1_info.largest_key, Key(1099));
346 std::string file_checksum1, file_checksum_func_name1;
347 ASSERT_OK(checksum_helper.GetSingleFileChecksumAndFuncName(
348 file1, &file_checksum1, &file_checksum_func_name1));
349 ASSERT_EQ(file1_info.file_checksum, file_checksum1);
350 ASSERT_EQ(file1_info.file_checksum_func_name, file_checksum_func_name1);
351
352 // file02.sst (1100 => 1299)
353 std::string file2 = sst_files_dir_ + "file02.sst";
354 ASSERT_OK(sst_file_writer.Open(file2));
355 for (int k = 1100; k < 1300; k++) {
356 ASSERT_OK(sst_file_writer.Put(Key(k), Key(k) + "_val"));
357 }
358 ExternalSstFileInfo file2_info;
359 s = sst_file_writer.Finish(&file2_info);
360 ASSERT_TRUE(s.ok()) << s.ToString();
361 ASSERT_EQ(file2_info.file_path, file2);
362 ASSERT_EQ(file2_info.num_entries, 200);
363 ASSERT_EQ(file2_info.smallest_key, Key(1100));
364 ASSERT_EQ(file2_info.largest_key, Key(1299));
365 std::string file_checksum2, file_checksum_func_name2;
366 ASSERT_OK(checksum_helper.GetSingleFileChecksumAndFuncName(
367 file2, &file_checksum2, &file_checksum_func_name2));
368 ASSERT_EQ(file2_info.file_checksum, file_checksum2);
369 ASSERT_EQ(file2_info.file_checksum_func_name, file_checksum_func_name2);
370
371 // file03.sst (1300 => 1499)
372 std::string file3 = sst_files_dir_ + "file03.sst";
373 ASSERT_OK(sst_file_writer.Open(file3));
374 for (int k = 1300; k < 1500; k++) {
375 ASSERT_OK(sst_file_writer.Put(Key(k), Key(k) + "_val_overlap"));
376 }
377 ExternalSstFileInfo file3_info;
378 s = sst_file_writer.Finish(&file3_info);
379 ASSERT_TRUE(s.ok()) << s.ToString();
380 ASSERT_EQ(file3_info.file_path, file3);
381 ASSERT_EQ(file3_info.num_entries, 200);
382 ASSERT_EQ(file3_info.smallest_key, Key(1300));
383 ASSERT_EQ(file3_info.largest_key, Key(1499));
384 std::string file_checksum3, file_checksum_func_name3;
385 ASSERT_OK(checksum_helper.GetSingleFileChecksumAndFuncName(
386 file3, &file_checksum3, &file_checksum_func_name3));
387 ASSERT_EQ(file3_info.file_checksum, file_checksum3);
388 ASSERT_EQ(file3_info.file_checksum_func_name, file_checksum_func_name3);
389
390 // file04.sst (1500 => 1799)
391 std::string file4 = sst_files_dir_ + "file04.sst";
392 ASSERT_OK(sst_file_writer.Open(file4));
393 for (int k = 1500; k < 1800; k++) {
394 ASSERT_OK(sst_file_writer.Put(Key(k), Key(k) + "_val_overlap"));
395 }
396 ExternalSstFileInfo file4_info;
397 s = sst_file_writer.Finish(&file4_info);
398 ASSERT_TRUE(s.ok()) << s.ToString();
399 ASSERT_EQ(file4_info.file_path, file4);
400 ASSERT_EQ(file4_info.num_entries, 300);
401 ASSERT_EQ(file4_info.smallest_key, Key(1500));
402 ASSERT_EQ(file4_info.largest_key, Key(1799));
403 std::string file_checksum4, file_checksum_func_name4;
404 ASSERT_OK(checksum_helper.GetSingleFileChecksumAndFuncName(
405 file4, &file_checksum4, &file_checksum_func_name4));
406 ASSERT_EQ(file4_info.file_checksum, file_checksum4);
407 ASSERT_EQ(file4_info.file_checksum_func_name, file_checksum_func_name4);
408
409 // file05.sst (1800 => 1899)
410 std::string file5 = sst_files_dir_ + "file05.sst";
411 ASSERT_OK(sst_file_writer.Open(file5));
412 for (int k = 1800; k < 2000; k++) {
413 ASSERT_OK(sst_file_writer.Put(Key(k), Key(k) + "_val_overlap"));
414 }
415 ExternalSstFileInfo file5_info;
416 s = sst_file_writer.Finish(&file5_info);
417 ASSERT_TRUE(s.ok()) << s.ToString();
418 ASSERT_EQ(file5_info.file_path, file5);
419 ASSERT_EQ(file5_info.num_entries, 200);
420 ASSERT_EQ(file5_info.smallest_key, Key(1800));
421 ASSERT_EQ(file5_info.largest_key, Key(1999));
422 std::string file_checksum5, file_checksum_func_name5;
423 ASSERT_OK(checksum_helper.GetSingleFileChecksumAndFuncName(
424 file5, &file_checksum5, &file_checksum_func_name5));
425 ASSERT_EQ(file5_info.file_checksum, file_checksum5);
426 ASSERT_EQ(file5_info.file_checksum_func_name, file_checksum_func_name5);
427
428 // file06.sst (2000 => 2199)
429 std::string file6 = sst_files_dir_ + "file06.sst";
430 ASSERT_OK(sst_file_writer.Open(file6));
431 for (int k = 2000; k < 2200; k++) {
432 ASSERT_OK(sst_file_writer.Put(Key(k), Key(k) + "_val_overlap"));
433 }
434 ExternalSstFileInfo file6_info;
435 s = sst_file_writer.Finish(&file6_info);
436 ASSERT_TRUE(s.ok()) << s.ToString();
437 ASSERT_EQ(file6_info.file_path, file6);
438 ASSERT_EQ(file6_info.num_entries, 200);
439 ASSERT_EQ(file6_info.smallest_key, Key(2000));
440 ASSERT_EQ(file6_info.largest_key, Key(2199));
441 std::string file_checksum6, file_checksum_func_name6;
442 ASSERT_OK(checksum_helper.GetSingleFileChecksumAndFuncName(
443 file6, &file_checksum6, &file_checksum_func_name6));
444 ASSERT_EQ(file6_info.file_checksum, file_checksum6);
445 ASSERT_EQ(file6_info.file_checksum_func_name, file_checksum_func_name6);
446
447 s = AddFileWithFileChecksum({file1}, {file_checksum1, "xyz"},
448 {file_checksum1}, true, false, false, false);
449 // does not care the checksum input since db does not enable file checksum
450 ASSERT_TRUE(s.ok()) << s.ToString();
451 ASSERT_OK(env_->FileExists(file1));
452 std::vector<LiveFileMetaData> live_files;
453 dbfull()->GetLiveFilesMetaData(&live_files);
454 std::set<std::string> set1;
455 for (auto f : live_files) {
456 set1.insert(f.name);
457 ASSERT_EQ(f.file_checksum, kUnknownFileChecksum);
458 ASSERT_EQ(f.file_checksum_func_name, kUnknownFileChecksumFuncName);
459 }
460
461 // Reopen Db with checksum enabled
462 Reopen(options);
463 // Enable verify_file_checksum option
464 // The checksum vector does not match, fail the ingestion
465 s = AddFileWithFileChecksum({file2}, {file_checksum2, "xyz"},
466 {file_checksum_func_name2}, true, false, false,
467 false);
468 ASSERT_FALSE(s.ok()) << s.ToString();
469
470 // Enable verify_file_checksum option
471 // The checksum name does not match, fail the ingestion
472 s = AddFileWithFileChecksum({file2}, {file_checksum2}, {"xyz"}, true, false,
473 false, false);
474 ASSERT_FALSE(s.ok()) << s.ToString();
475
476 // Enable verify_file_checksum option
477 // The checksum itself does not match, fail the ingestion
478 s = AddFileWithFileChecksum({file2}, {"xyz"}, {file_checksum_func_name2},
479 true, false, false, false);
480 ASSERT_FALSE(s.ok()) << s.ToString();
481
482 // Enable verify_file_checksum option
483 // All matches, ingestion is successful
484 s = AddFileWithFileChecksum({file2}, {file_checksum2},
485 {file_checksum_func_name2}, true, false, false,
486 false);
487 ASSERT_TRUE(s.ok()) << s.ToString();
488 std::vector<LiveFileMetaData> live_files1;
489 dbfull()->GetLiveFilesMetaData(&live_files1);
490 for (auto f : live_files1) {
491 if (set1.find(f.name) == set1.end()) {
492 ASSERT_EQ(f.file_checksum, file_checksum2);
493 ASSERT_EQ(f.file_checksum_func_name, file_checksum_func_name2);
494 set1.insert(f.name);
495 }
496 }
497 ASSERT_OK(env_->FileExists(file2));
498
499 // Enable verify_file_checksum option
500 // No checksum information is provided, generate it when ingesting
501 std::vector<std::string> checksum, checksum_func;
502 s = AddFileWithFileChecksum({file3}, checksum, checksum_func, true, false,
503 false, false);
504 ASSERT_TRUE(s.ok()) << s.ToString();
505 std::vector<LiveFileMetaData> live_files2;
506 dbfull()->GetLiveFilesMetaData(&live_files2);
507 for (auto f : live_files2) {
508 if (set1.find(f.name) == set1.end()) {
509 ASSERT_EQ(f.file_checksum, file_checksum3);
510 ASSERT_EQ(f.file_checksum_func_name, file_checksum_func_name3);
511 set1.insert(f.name);
512 }
513 }
514 ASSERT_TRUE(s.ok()) << s.ToString();
515 ASSERT_OK(env_->FileExists(file3));
516
517 // Does not enable verify_file_checksum options
518 // The checksum name does not match, fail the ingestion
519 s = AddFileWithFileChecksum({file4}, {file_checksum4}, {"xyz"}, false, false,
520 false, false);
521 ASSERT_FALSE(s.ok()) << s.ToString();
522
523 // Does not enable verify_file_checksum options
524 // Checksum function name matches, store the checksum being ingested.
525 s = AddFileWithFileChecksum({file4}, {"asd"}, {file_checksum_func_name4},
526 false, false, false, false);
527 ASSERT_TRUE(s.ok()) << s.ToString();
528 std::vector<LiveFileMetaData> live_files3;
529 dbfull()->GetLiveFilesMetaData(&live_files3);
530 for (auto f : live_files3) {
531 if (set1.find(f.name) == set1.end()) {
532 ASSERT_FALSE(f.file_checksum == file_checksum4);
533 ASSERT_EQ(f.file_checksum, "asd");
534 ASSERT_EQ(f.file_checksum_func_name, file_checksum_func_name4);
535 set1.insert(f.name);
536 }
537 }
538 ASSERT_TRUE(s.ok()) << s.ToString();
539 ASSERT_OK(env_->FileExists(file4));
540
541 // enable verify_file_checksum options, DB enable checksum, and enable
542 // write_global_seq. So the checksum stored is different from the one
543 // ingested due to the sequence number changes.
544 s = AddFileWithFileChecksum({file5}, {file_checksum5},
545 {file_checksum_func_name5}, true, false, false,
546 true);
547 ASSERT_OK(s);
548 ASSERT_TRUE(s.ok()) << s.ToString();
549 std::vector<LiveFileMetaData> live_files4;
550 dbfull()->GetLiveFilesMetaData(&live_files4);
551 for (auto f : live_files4) {
552 if (set1.find(f.name) == set1.end()) {
553 std::string cur_checksum5, cur_checksum_func_name5;
554 ASSERT_OK(checksum_helper.GetSingleFileChecksumAndFuncName(
555 dbname_ + f.name, &cur_checksum5, &cur_checksum_func_name5));
556 ASSERT_EQ(f.file_checksum, cur_checksum5);
557 ASSERT_EQ(f.file_checksum_func_name, file_checksum_func_name5);
558 set1.insert(f.name);
559 }
560 }
561 ASSERT_TRUE(s.ok()) << s.ToString();
562 ASSERT_OK(env_->FileExists(file5));
563
564 // Does not enable verify_file_checksum options and also the ingested file
565 // checksum information is empty. DB will generate and store the checksum
566 // in Manifest.
567 std::vector<std::string> files_c6, files_name6;
568 s = AddFileWithFileChecksum({file6}, files_c6, files_name6, false, false,
569 false, false);
570 ASSERT_TRUE(s.ok()) << s.ToString();
571 std::vector<LiveFileMetaData> live_files6;
572 dbfull()->GetLiveFilesMetaData(&live_files6);
573 for (auto f : live_files6) {
574 if (set1.find(f.name) == set1.end()) {
575 ASSERT_EQ(f.file_checksum, file_checksum6);
576 ASSERT_EQ(f.file_checksum_func_name, file_checksum_func_name6);
577 set1.insert(f.name);
578 }
579 }
580 ASSERT_TRUE(s.ok()) << s.ToString();
581 ASSERT_OK(env_->FileExists(file6));
582}
583
7c673cae
FG
584TEST_F(ExternalSSTFileBasicTest, NoCopy) {
585 Options options = CurrentOptions();
586 const ImmutableCFOptions ioptions(options);
587
588 SstFileWriter sst_file_writer(EnvOptions(), options);
589
590 // file1.sst (0 => 99)
591 std::string file1 = sst_files_dir_ + "file1.sst";
592 ASSERT_OK(sst_file_writer.Open(file1));
593 for (int k = 0; k < 100; k++) {
11fdf7f2 594 ASSERT_OK(sst_file_writer.Put(Key(k), Key(k) + "_val"));
7c673cae
FG
595 }
596 ExternalSstFileInfo file1_info;
597 Status s = sst_file_writer.Finish(&file1_info);
598 ASSERT_TRUE(s.ok()) << s.ToString();
599 ASSERT_EQ(file1_info.file_path, file1);
600 ASSERT_EQ(file1_info.num_entries, 100);
601 ASSERT_EQ(file1_info.smallest_key, Key(0));
602 ASSERT_EQ(file1_info.largest_key, Key(99));
603
604 // file2.sst (100 => 299)
605 std::string file2 = sst_files_dir_ + "file2.sst";
606 ASSERT_OK(sst_file_writer.Open(file2));
607 for (int k = 100; k < 300; k++) {
11fdf7f2 608 ASSERT_OK(sst_file_writer.Put(Key(k), Key(k) + "_val"));
7c673cae
FG
609 }
610 ExternalSstFileInfo file2_info;
611 s = sst_file_writer.Finish(&file2_info);
612 ASSERT_TRUE(s.ok()) << s.ToString();
613 ASSERT_EQ(file2_info.file_path, file2);
614 ASSERT_EQ(file2_info.num_entries, 200);
615 ASSERT_EQ(file2_info.smallest_key, Key(100));
616 ASSERT_EQ(file2_info.largest_key, Key(299));
617
618 // file3.sst (110 => 124) .. overlap with file2.sst
619 std::string file3 = sst_files_dir_ + "file3.sst";
620 ASSERT_OK(sst_file_writer.Open(file3));
621 for (int k = 110; k < 125; k++) {
11fdf7f2 622 ASSERT_OK(sst_file_writer.Put(Key(k), Key(k) + "_val_overlap"));
7c673cae
FG
623 }
624 ExternalSstFileInfo file3_info;
625 s = sst_file_writer.Finish(&file3_info);
626 ASSERT_TRUE(s.ok()) << s.ToString();
627 ASSERT_EQ(file3_info.file_path, file3);
628 ASSERT_EQ(file3_info.num_entries, 15);
629 ASSERT_EQ(file3_info.smallest_key, Key(110));
630 ASSERT_EQ(file3_info.largest_key, Key(124));
11fdf7f2 631
7c673cae
FG
632 s = DeprecatedAddFile({file1}, true /* move file */);
633 ASSERT_TRUE(s.ok()) << s.ToString();
634 ASSERT_EQ(Status::NotFound(), env_->FileExists(file1));
635
636 s = DeprecatedAddFile({file2}, false /* copy file */);
637 ASSERT_TRUE(s.ok()) << s.ToString();
638 ASSERT_OK(env_->FileExists(file2));
639
11fdf7f2
TL
640 // This file has overlapping values with the existing data
641 s = DeprecatedAddFile({file3}, true /* move file */);
7c673cae
FG
642 ASSERT_FALSE(s.ok()) << s.ToString();
643 ASSERT_OK(env_->FileExists(file3));
644
645 for (int k = 0; k < 300; k++) {
646 ASSERT_EQ(Get(Key(k)), Key(k) + "_val");
647 }
648}
649
494da23a
TL
650TEST_P(ExternalSSTFileBasicTest, IngestFileWithGlobalSeqnoPickedSeqno) {
651 bool write_global_seqno = std::get<0>(GetParam());
652 bool verify_checksums_before_ingest = std::get<1>(GetParam());
7c673cae
FG
653 do {
654 Options options = CurrentOptions();
655 DestroyAndReopen(options);
656 std::map<std::string, std::string> true_data;
657
658 int file_id = 1;
659
494da23a
TL
660 ASSERT_OK(GenerateAndAddExternalFile(
661 options, {1, 2, 3, 4, 5, 6}, ValueType::kTypeValue, file_id++,
662 write_global_seqno, verify_checksums_before_ingest, &true_data));
11fdf7f2 663 // File doesn't overwrite any keys, no seqno needed
7c673cae
FG
664 ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 0);
665
494da23a
TL
666 ASSERT_OK(GenerateAndAddExternalFile(
667 options, {10, 11, 12, 13}, ValueType::kTypeValue, file_id++,
668 write_global_seqno, verify_checksums_before_ingest, &true_data));
11fdf7f2 669 // File doesn't overwrite any keys, no seqno needed
7c673cae
FG
670 ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 0);
671
11fdf7f2 672 ASSERT_OK(GenerateAndAddExternalFile(
494da23a
TL
673 options, {1, 4, 6}, ValueType::kTypeValue, file_id++,
674 write_global_seqno, verify_checksums_before_ingest, &true_data));
11fdf7f2 675 // File overwrites some keys, a seqno will be assigned
7c673cae
FG
676 ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 1);
677
11fdf7f2 678 ASSERT_OK(GenerateAndAddExternalFile(
494da23a
TL
679 options, {11, 15, 19}, ValueType::kTypeValue, file_id++,
680 write_global_seqno, verify_checksums_before_ingest, &true_data));
11fdf7f2 681 // File overwrites some keys, a seqno will be assigned
7c673cae
FG
682 ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 2);
683
11fdf7f2 684 ASSERT_OK(GenerateAndAddExternalFile(
494da23a
TL
685 options, {120, 130}, ValueType::kTypeValue, file_id++,
686 write_global_seqno, verify_checksums_before_ingest, &true_data));
11fdf7f2 687 // File doesn't overwrite any keys, no seqno needed
7c673cae
FG
688 ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 2);
689
11fdf7f2 690 ASSERT_OK(GenerateAndAddExternalFile(
494da23a
TL
691 options, {1, 130}, ValueType::kTypeValue, file_id++, write_global_seqno,
692 verify_checksums_before_ingest, &true_data));
11fdf7f2 693 // File overwrites some keys, a seqno will be assigned
7c673cae
FG
694 ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 3);
695
696 // Write some keys through normal write path
697 for (int i = 0; i < 50; i++) {
698 ASSERT_OK(Put(Key(i), "memtable"));
699 true_data[Key(i)] = "memtable";
700 }
701 SequenceNumber last_seqno = dbfull()->GetLatestSequenceNumber();
702
11fdf7f2 703 ASSERT_OK(GenerateAndAddExternalFile(
494da23a
TL
704 options, {60, 61, 62}, ValueType::kTypeValue, file_id++,
705 write_global_seqno, verify_checksums_before_ingest, &true_data));
11fdf7f2 706 // File doesn't overwrite any keys, no seqno needed
7c673cae
FG
707 ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), last_seqno);
708
11fdf7f2 709 ASSERT_OK(GenerateAndAddExternalFile(
494da23a
TL
710 options, {40, 41, 42}, ValueType::kTypeValue, file_id++,
711 write_global_seqno, verify_checksums_before_ingest, &true_data));
11fdf7f2
TL
712 // File overwrites some keys, a seqno will be assigned
713 ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), last_seqno + 1);
714
715 ASSERT_OK(GenerateAndAddExternalFile(
494da23a
TL
716 options, {20, 30, 40}, ValueType::kTypeValue, file_id++,
717 write_global_seqno, verify_checksums_before_ingest, &true_data));
11fdf7f2
TL
718 // File overwrites some keys, a seqno will be assigned
719 ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), last_seqno + 2);
720
721 const Snapshot* snapshot = db_->GetSnapshot();
722
723 // We will need a seqno for the file regardless if the file overwrite
724 // keys in the DB or not because we have a snapshot
725 ASSERT_OK(GenerateAndAddExternalFile(
494da23a
TL
726 options, {1000, 1002}, ValueType::kTypeValue, file_id++,
727 write_global_seqno, verify_checksums_before_ingest, &true_data));
11fdf7f2
TL
728 // A global seqno will be assigned anyway because of the snapshot
729 ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), last_seqno + 3);
730
731 ASSERT_OK(GenerateAndAddExternalFile(
494da23a
TL
732 options, {2000, 3002}, ValueType::kTypeValue, file_id++,
733 write_global_seqno, verify_checksums_before_ingest, &true_data));
11fdf7f2
TL
734 // A global seqno will be assigned anyway because of the snapshot
735 ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), last_seqno + 4);
736
494da23a
TL
737 ASSERT_OK(GenerateAndAddExternalFile(
738 options, {1, 20, 40, 100, 150}, ValueType::kTypeValue, file_id++,
739 write_global_seqno, verify_checksums_before_ingest, &true_data));
11fdf7f2
TL
740 // A global seqno will be assigned anyway because of the snapshot
741 ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), last_seqno + 5);
742
743 db_->ReleaseSnapshot(snapshot);
744
745 ASSERT_OK(GenerateAndAddExternalFile(
494da23a
TL
746 options, {5000, 5001}, ValueType::kTypeValue, file_id++,
747 write_global_seqno, verify_checksums_before_ingest, &true_data));
11fdf7f2
TL
748 // No snapshot anymore, no need to assign a seqno
749 ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), last_seqno + 5);
750
751 size_t kcnt = 0;
752 VerifyDBFromMap(true_data, &kcnt, false);
494da23a 753 } while (ChangeOptionsForFileIngestionTest());
11fdf7f2
TL
754}
755
494da23a
TL
756TEST_P(ExternalSSTFileBasicTest, IngestFileWithMultipleValueType) {
757 bool write_global_seqno = std::get<0>(GetParam());
758 bool verify_checksums_before_ingest = std::get<1>(GetParam());
11fdf7f2
TL
759 do {
760 Options options = CurrentOptions();
761 options.merge_operator.reset(new TestPutOperator());
762 DestroyAndReopen(options);
763 std::map<std::string, std::string> true_data;
764
765 int file_id = 1;
766
494da23a
TL
767 ASSERT_OK(GenerateAndAddExternalFile(
768 options, {1, 2, 3, 4, 5, 6}, ValueType::kTypeValue, file_id++,
769 write_global_seqno, verify_checksums_before_ingest, &true_data));
11fdf7f2
TL
770 // File doesn't overwrite any keys, no seqno needed
771 ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 0);
772
494da23a
TL
773 ASSERT_OK(GenerateAndAddExternalFile(
774 options, {10, 11, 12, 13}, ValueType::kTypeValue, file_id++,
775 write_global_seqno, verify_checksums_before_ingest, &true_data));
11fdf7f2
TL
776 // File doesn't overwrite any keys, no seqno needed
777 ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 0);
778
779 ASSERT_OK(GenerateAndAddExternalFile(
494da23a
TL
780 options, {1, 4, 6}, ValueType::kTypeMerge, file_id++,
781 write_global_seqno, verify_checksums_before_ingest, &true_data));
11fdf7f2
TL
782 // File overwrites some keys, a seqno will be assigned
783 ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 1);
784
494da23a
TL
785 ASSERT_OK(GenerateAndAddExternalFile(
786 options, {11, 15, 19}, ValueType::kTypeDeletion, file_id++,
787 write_global_seqno, verify_checksums_before_ingest, &true_data));
11fdf7f2
TL
788 // File overwrites some keys, a seqno will be assigned
789 ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 2);
790
791 ASSERT_OK(GenerateAndAddExternalFile(
494da23a
TL
792 options, {120, 130}, ValueType::kTypeMerge, file_id++,
793 write_global_seqno, verify_checksums_before_ingest, &true_data));
11fdf7f2
TL
794 // File doesn't overwrite any keys, no seqno needed
795 ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 2);
796
797 ASSERT_OK(GenerateAndAddExternalFile(
494da23a
TL
798 options, {1, 130}, ValueType::kTypeDeletion, file_id++,
799 write_global_seqno, verify_checksums_before_ingest, &true_data));
11fdf7f2
TL
800 // File overwrites some keys, a seqno will be assigned
801 ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 3);
802
494da23a
TL
803 ASSERT_OK(GenerateAndAddExternalFile(
804 options, {120}, {ValueType::kTypeValue}, {{120, 135}}, file_id++,
805 write_global_seqno, verify_checksums_before_ingest, &true_data));
11fdf7f2
TL
806 // File overwrites some keys, a seqno will be assigned
807 ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 4);
808
494da23a
TL
809 ASSERT_OK(GenerateAndAddExternalFile(
810 options, {}, {}, {{110, 120}}, file_id++, write_global_seqno,
811 verify_checksums_before_ingest, &true_data));
11fdf7f2
TL
812 // The range deletion ends on a key, but it doesn't actually delete
813 // this key because the largest key in the range is exclusive. Still,
814 // it counts as an overlap so a new seqno will be assigned.
815 ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 5);
816
494da23a
TL
817 ASSERT_OK(GenerateAndAddExternalFile(
818 options, {}, {}, {{100, 109}}, file_id++, write_global_seqno,
819 verify_checksums_before_ingest, &true_data));
11fdf7f2
TL
820 // File doesn't overwrite any keys, no seqno needed
821 ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 5);
822
823 // Write some keys through normal write path
824 for (int i = 0; i < 50; i++) {
825 ASSERT_OK(Put(Key(i), "memtable"));
826 true_data[Key(i)] = "memtable";
827 }
828 SequenceNumber last_seqno = dbfull()->GetLatestSequenceNumber();
829
830 ASSERT_OK(GenerateAndAddExternalFile(
494da23a
TL
831 options, {60, 61, 62}, ValueType::kTypeValue, file_id++,
832 write_global_seqno, verify_checksums_before_ingest, &true_data));
11fdf7f2
TL
833 // File doesn't overwrite any keys, no seqno needed
834 ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), last_seqno);
835
836 ASSERT_OK(GenerateAndAddExternalFile(
494da23a
TL
837 options, {40, 41, 42}, ValueType::kTypeMerge, file_id++,
838 write_global_seqno, verify_checksums_before_ingest, &true_data));
11fdf7f2 839 // File overwrites some keys, a seqno will be assigned
7c673cae
FG
840 ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), last_seqno + 1);
841
494da23a
TL
842 ASSERT_OK(GenerateAndAddExternalFile(
843 options, {20, 30, 40}, ValueType::kTypeDeletion, file_id++,
844 write_global_seqno, verify_checksums_before_ingest, &true_data));
11fdf7f2 845 // File overwrites some keys, a seqno will be assigned
7c673cae
FG
846 ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), last_seqno + 2);
847
848 const Snapshot* snapshot = db_->GetSnapshot();
849
850 // We will need a seqno for the file regardless if the file overwrite
851 // keys in the DB or not because we have a snapshot
11fdf7f2 852 ASSERT_OK(GenerateAndAddExternalFile(
494da23a
TL
853 options, {1000, 1002}, ValueType::kTypeMerge, file_id++,
854 write_global_seqno, verify_checksums_before_ingest, &true_data));
7c673cae
FG
855 // A global seqno will be assigned anyway because of the snapshot
856 ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), last_seqno + 3);
857
11fdf7f2 858 ASSERT_OK(GenerateAndAddExternalFile(
494da23a
TL
859 options, {2000, 3002}, ValueType::kTypeMerge, file_id++,
860 write_global_seqno, verify_checksums_before_ingest, &true_data));
7c673cae
FG
861 // A global seqno will be assigned anyway because of the snapshot
862 ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), last_seqno + 4);
863
494da23a
TL
864 ASSERT_OK(GenerateAndAddExternalFile(
865 options, {1, 20, 40, 100, 150}, ValueType::kTypeMerge, file_id++,
866 write_global_seqno, verify_checksums_before_ingest, &true_data));
7c673cae
FG
867 // A global seqno will be assigned anyway because of the snapshot
868 ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), last_seqno + 5);
869
870 db_->ReleaseSnapshot(snapshot);
871
11fdf7f2 872 ASSERT_OK(GenerateAndAddExternalFile(
494da23a
TL
873 options, {5000, 5001}, ValueType::kTypeValue, file_id++,
874 write_global_seqno, verify_checksums_before_ingest, &true_data));
11fdf7f2
TL
875 // No snapshot anymore, no need to assign a seqno
876 ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), last_seqno + 5);
877
878 size_t kcnt = 0;
879 VerifyDBFromMap(true_data, &kcnt, false);
494da23a 880 } while (ChangeOptionsForFileIngestionTest());
11fdf7f2
TL
881}
882
494da23a
TL
883TEST_P(ExternalSSTFileBasicTest, IngestFileWithMixedValueType) {
884 bool write_global_seqno = std::get<0>(GetParam());
885 bool verify_checksums_before_ingest = std::get<1>(GetParam());
11fdf7f2
TL
886 do {
887 Options options = CurrentOptions();
888 options.merge_operator.reset(new TestPutOperator());
889 DestroyAndReopen(options);
890 std::map<std::string, std::string> true_data;
891
892 int file_id = 1;
893
894 ASSERT_OK(GenerateAndAddExternalFile(
895 options, {1, 2, 3, 4, 5, 6},
896 {ValueType::kTypeValue, ValueType::kTypeMerge, ValueType::kTypeValue,
897 ValueType::kTypeMerge, ValueType::kTypeValue, ValueType::kTypeMerge},
494da23a
TL
898 file_id++, write_global_seqno, verify_checksums_before_ingest,
899 &true_data));
11fdf7f2
TL
900 // File doesn't overwrite any keys, no seqno needed
901 ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 0);
902
903 ASSERT_OK(GenerateAndAddExternalFile(
904 options, {10, 11, 12, 13},
905 {ValueType::kTypeValue, ValueType::kTypeMerge, ValueType::kTypeValue,
906 ValueType::kTypeMerge},
494da23a
TL
907 file_id++, write_global_seqno, verify_checksums_before_ingest,
908 &true_data));
11fdf7f2
TL
909 // File doesn't overwrite any keys, no seqno needed
910 ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 0);
911
912 ASSERT_OK(GenerateAndAddExternalFile(
494da23a
TL
913 options, {1, 4, 6},
914 {ValueType::kTypeDeletion, ValueType::kTypeValue,
915 ValueType::kTypeMerge},
916 file_id++, write_global_seqno, verify_checksums_before_ingest,
917 &true_data));
11fdf7f2
TL
918 // File overwrites some keys, a seqno will be assigned
919 ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 1);
920
921 ASSERT_OK(GenerateAndAddExternalFile(
494da23a
TL
922 options, {11, 15, 19},
923 {ValueType::kTypeDeletion, ValueType::kTypeMerge,
924 ValueType::kTypeValue},
925 file_id++, write_global_seqno, verify_checksums_before_ingest,
926 &true_data));
11fdf7f2
TL
927 // File overwrites some keys, a seqno will be assigned
928 ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 2);
929
930 ASSERT_OK(GenerateAndAddExternalFile(
931 options, {120, 130}, {ValueType::kTypeValue, ValueType::kTypeMerge},
494da23a
TL
932 file_id++, write_global_seqno, verify_checksums_before_ingest,
933 &true_data));
11fdf7f2
TL
934 // File doesn't overwrite any keys, no seqno needed
935 ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 2);
936
937 ASSERT_OK(GenerateAndAddExternalFile(
938 options, {1, 130}, {ValueType::kTypeMerge, ValueType::kTypeDeletion},
494da23a
TL
939 file_id++, write_global_seqno, verify_checksums_before_ingest,
940 &true_data));
11fdf7f2
TL
941 // File overwrites some keys, a seqno will be assigned
942 ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 3);
943
944 ASSERT_OK(GenerateAndAddExternalFile(
945 options, {150, 151, 152},
946 {ValueType::kTypeValue, ValueType::kTypeMerge,
947 ValueType::kTypeDeletion},
494da23a
TL
948 {{150, 160}, {180, 190}}, file_id++, write_global_seqno,
949 verify_checksums_before_ingest, &true_data));
11fdf7f2
TL
950 // File doesn't overwrite any keys, no seqno needed
951 ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 3);
952
953 ASSERT_OK(GenerateAndAddExternalFile(
954 options, {150, 151, 152},
955 {ValueType::kTypeValue, ValueType::kTypeMerge, ValueType::kTypeValue},
494da23a
TL
956 {{200, 250}}, file_id++, write_global_seqno,
957 verify_checksums_before_ingest, &true_data));
11fdf7f2
TL
958 // File overwrites some keys, a seqno will be assigned
959 ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 4);
960
961 ASSERT_OK(GenerateAndAddExternalFile(
962 options, {300, 301, 302},
963 {ValueType::kTypeValue, ValueType::kTypeMerge,
964 ValueType::kTypeDeletion},
494da23a
TL
965 {{1, 2}, {152, 154}}, file_id++, write_global_seqno,
966 verify_checksums_before_ingest, &true_data));
11fdf7f2
TL
967 // File overwrites some keys, a seqno will be assigned
968 ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 5);
969
970 // Write some keys through normal write path
971 for (int i = 0; i < 50; i++) {
972 ASSERT_OK(Put(Key(i), "memtable"));
973 true_data[Key(i)] = "memtable";
974 }
975 SequenceNumber last_seqno = dbfull()->GetLatestSequenceNumber();
976
977 ASSERT_OK(GenerateAndAddExternalFile(
978 options, {60, 61, 62},
979 {ValueType::kTypeValue, ValueType::kTypeMerge, ValueType::kTypeValue},
494da23a
TL
980 file_id++, write_global_seqno, verify_checksums_before_ingest,
981 &true_data));
11fdf7f2
TL
982 // File doesn't overwrite any keys, no seqno needed
983 ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), last_seqno);
984
985 ASSERT_OK(GenerateAndAddExternalFile(
986 options, {40, 41, 42},
987 {ValueType::kTypeValue, ValueType::kTypeDeletion,
988 ValueType::kTypeDeletion},
494da23a
TL
989 file_id++, write_global_seqno, verify_checksums_before_ingest,
990 &true_data));
11fdf7f2
TL
991 // File overwrites some keys, a seqno will be assigned
992 ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), last_seqno + 1);
993
994 ASSERT_OK(GenerateAndAddExternalFile(
995 options, {20, 30, 40},
996 {ValueType::kTypeDeletion, ValueType::kTypeDeletion,
997 ValueType::kTypeDeletion},
494da23a
TL
998 file_id++, write_global_seqno, verify_checksums_before_ingest,
999 &true_data));
11fdf7f2
TL
1000 // File overwrites some keys, a seqno will be assigned
1001 ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), last_seqno + 2);
1002
1003 const Snapshot* snapshot = db_->GetSnapshot();
1004
1005 // We will need a seqno for the file regardless if the file overwrite
1006 // keys in the DB or not because we have a snapshot
1007 ASSERT_OK(GenerateAndAddExternalFile(
1008 options, {1000, 1002}, {ValueType::kTypeValue, ValueType::kTypeMerge},
494da23a
TL
1009 file_id++, write_global_seqno, verify_checksums_before_ingest,
1010 &true_data));
11fdf7f2
TL
1011 // A global seqno will be assigned anyway because of the snapshot
1012 ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), last_seqno + 3);
1013
1014 ASSERT_OK(GenerateAndAddExternalFile(
1015 options, {2000, 3002}, {ValueType::kTypeValue, ValueType::kTypeMerge},
494da23a
TL
1016 file_id++, write_global_seqno, verify_checksums_before_ingest,
1017 &true_data));
11fdf7f2
TL
1018 // A global seqno will be assigned anyway because of the snapshot
1019 ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), last_seqno + 4);
1020
1021 ASSERT_OK(GenerateAndAddExternalFile(
1022 options, {1, 20, 40, 100, 150},
1023 {ValueType::kTypeDeletion, ValueType::kTypeDeletion,
1024 ValueType::kTypeValue, ValueType::kTypeMerge, ValueType::kTypeMerge},
494da23a
TL
1025 file_id++, write_global_seqno, verify_checksums_before_ingest,
1026 &true_data));
11fdf7f2
TL
1027 // A global seqno will be assigned anyway because of the snapshot
1028 ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), last_seqno + 5);
1029
1030 db_->ReleaseSnapshot(snapshot);
1031
1032 ASSERT_OK(GenerateAndAddExternalFile(
1033 options, {5000, 5001}, {ValueType::kTypeValue, ValueType::kTypeMerge},
494da23a
TL
1034 file_id++, write_global_seqno, verify_checksums_before_ingest,
1035 &true_data));
7c673cae
FG
1036 // No snapshot anymore, no need to assign a seqno
1037 ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), last_seqno + 5);
1038
1039 size_t kcnt = 0;
1040 VerifyDBFromMap(true_data, &kcnt, false);
494da23a 1041 } while (ChangeOptionsForFileIngestionTest());
7c673cae
FG
1042}
1043
1044TEST_F(ExternalSSTFileBasicTest, FadviseTrigger) {
1045 Options options = CurrentOptions();
1046 const int kNumKeys = 10000;
1047
1048 size_t total_fadvised_bytes = 0;
f67539c2 1049 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
11fdf7f2 1050 "SstFileWriter::Rep::InvalidatePageCache", [&](void* arg) {
7c673cae
FG
1051 size_t fadvise_size = *(reinterpret_cast<size_t*>(arg));
1052 total_fadvised_bytes += fadvise_size;
1053 });
f67539c2 1054 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
7c673cae
FG
1055
1056 std::unique_ptr<SstFileWriter> sst_file_writer;
1057
1058 std::string sst_file_path = sst_files_dir_ + "file_fadvise_disable.sst";
1059 sst_file_writer.reset(
1060 new SstFileWriter(EnvOptions(), options, nullptr, false));
1061 ASSERT_OK(sst_file_writer->Open(sst_file_path));
1062 for (int i = 0; i < kNumKeys; i++) {
11fdf7f2 1063 ASSERT_OK(sst_file_writer->Put(Key(i), Key(i)));
7c673cae
FG
1064 }
1065 ASSERT_OK(sst_file_writer->Finish());
1066 // fadvise disabled
1067 ASSERT_EQ(total_fadvised_bytes, 0);
1068
7c673cae
FG
1069 sst_file_path = sst_files_dir_ + "file_fadvise_enable.sst";
1070 sst_file_writer.reset(
1071 new SstFileWriter(EnvOptions(), options, nullptr, true));
1072 ASSERT_OK(sst_file_writer->Open(sst_file_path));
1073 for (int i = 0; i < kNumKeys; i++) {
11fdf7f2 1074 ASSERT_OK(sst_file_writer->Put(Key(i), Key(i)));
7c673cae
FG
1075 }
1076 ASSERT_OK(sst_file_writer->Finish());
1077 // fadvise enabled
1078 ASSERT_EQ(total_fadvised_bytes, sst_file_writer->FileSize());
1079 ASSERT_GT(total_fadvised_bytes, 0);
1080
f67539c2
TL
1081 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
1082}
1083
1084TEST_F(ExternalSSTFileBasicTest, SyncFailure) {
1085 Options options;
1086 options.create_if_missing = true;
1087 options.env = fault_injection_test_env_.get();
1088
1089 std::vector<std::pair<std::string, std::string>> test_cases = {
1090 {"ExternalSstFileIngestionJob::BeforeSyncIngestedFile",
1091 "ExternalSstFileIngestionJob::AfterSyncIngestedFile"},
1092 {"ExternalSstFileIngestionJob::BeforeSyncDir",
1093 "ExternalSstFileIngestionJob::AfterSyncDir"},
1094 {"ExternalSstFileIngestionJob::BeforeSyncGlobalSeqno",
1095 "ExternalSstFileIngestionJob::AfterSyncGlobalSeqno"}};
1096
1097 for (size_t i = 0; i < test_cases.size(); i++) {
1098 SyncPoint::GetInstance()->SetCallBack(test_cases[i].first, [&](void*) {
1099 fault_injection_test_env_->SetFilesystemActive(false);
1100 });
1101 SyncPoint::GetInstance()->SetCallBack(test_cases[i].second, [&](void*) {
1102 fault_injection_test_env_->SetFilesystemActive(true);
1103 });
1104 SyncPoint::GetInstance()->EnableProcessing();
1105
1106 DestroyAndReopen(options);
1107 if (i == 2) {
1108 ASSERT_OK(Put("foo", "v1"));
1109 }
1110
1111 Options sst_file_writer_options;
20effc67 1112 sst_file_writer_options.env = env_;
f67539c2
TL
1113 std::unique_ptr<SstFileWriter> sst_file_writer(
1114 new SstFileWriter(EnvOptions(), sst_file_writer_options));
1115 std::string file_name =
1116 sst_files_dir_ + "sync_failure_test_" + ToString(i) + ".sst";
1117 ASSERT_OK(sst_file_writer->Open(file_name));
1118 ASSERT_OK(sst_file_writer->Put("bar", "v2"));
1119 ASSERT_OK(sst_file_writer->Finish());
1120
1121 IngestExternalFileOptions ingest_opt;
1122 if (i == 0) {
1123 ingest_opt.move_files = true;
1124 }
1125 const Snapshot* snapshot = db_->GetSnapshot();
1126 if (i == 2) {
1127 ingest_opt.write_global_seqno = true;
1128 }
1129 ASSERT_FALSE(db_->IngestExternalFile({file_name}, ingest_opt).ok());
1130 db_->ReleaseSnapshot(snapshot);
1131
1132 SyncPoint::GetInstance()->DisableProcessing();
1133 SyncPoint::GetInstance()->ClearAllCallBacks();
1134 Destroy(options);
1135 }
1136}
1137
1138TEST_F(ExternalSSTFileBasicTest, VerifyChecksumReadahead) {
1139 Options options;
1140 options.create_if_missing = true;
20effc67 1141 SpecialEnv senv(env_);
f67539c2
TL
1142 options.env = &senv;
1143 DestroyAndReopen(options);
1144
1145 Options sst_file_writer_options;
20effc67 1146 sst_file_writer_options.env = env_;
f67539c2
TL
1147 std::unique_ptr<SstFileWriter> sst_file_writer(
1148 new SstFileWriter(EnvOptions(), sst_file_writer_options));
1149 std::string file_name = sst_files_dir_ + "verify_checksum_readahead_test.sst";
1150 ASSERT_OK(sst_file_writer->Open(file_name));
1151 Random rnd(301);
20effc67 1152 std::string value = rnd.RandomString(4000);
f67539c2
TL
1153 for (int i = 0; i < 5000; i++) {
1154 ASSERT_OK(sst_file_writer->Put(DBTestBase::Key(i), value));
1155 }
1156 ASSERT_OK(sst_file_writer->Finish());
1157
1158 // Ingest it once without verifying checksums to see the baseline
1159 // preads.
1160 IngestExternalFileOptions ingest_opt;
1161 ingest_opt.move_files = false;
1162 senv.count_random_reads_ = true;
1163 senv.random_read_bytes_counter_ = 0;
1164 ASSERT_OK(db_->IngestExternalFile({file_name}, ingest_opt));
1165
1166 auto base_num_reads = senv.random_read_counter_.Read();
1167 // Make sure the counter is enabled.
1168 ASSERT_GT(base_num_reads, 0);
1169
1170 // Ingest again and observe the reads made for for readahead.
1171 ingest_opt.move_files = false;
1172 ingest_opt.verify_checksums_before_ingest = true;
1173 ingest_opt.verify_checksums_readahead_size = size_t{2 * 1024 * 1024};
1174
1175 senv.count_random_reads_ = true;
1176 senv.random_read_bytes_counter_ = 0;
1177 ASSERT_OK(db_->IngestExternalFile({file_name}, ingest_opt));
1178
1179 // Make sure the counter is enabled.
1180 ASSERT_GT(senv.random_read_counter_.Read() - base_num_reads, 0);
1181
1182 // The SST file is about 20MB. Readahead size is 2MB.
1183 // Give a conservative 15 reads for metadata blocks, the number
1184 // of random reads should be within 20 MB / 2MB + 15 = 25.
1185 ASSERT_LE(senv.random_read_counter_.Read() - base_num_reads, 40);
1186
1187 Destroy(options);
7c673cae
FG
1188}
1189
20effc67
TL
1190TEST_F(ExternalSSTFileBasicTest, IngestRangeDeletionTombstoneWithGlobalSeqno) {
1191 for (int i = 5; i < 25; i++) {
1192 ASSERT_OK(db_->Put(WriteOptions(), db_->DefaultColumnFamily(), Key(i),
1193 Key(i) + "_val"));
1194 }
1195
1196 Options options = CurrentOptions();
1197 options.disable_auto_compactions = true;
1198 Reopen(options);
1199 SstFileWriter sst_file_writer(EnvOptions(), options);
1200
1201 // file.sst (delete 0 => 30)
1202 std::string file = sst_files_dir_ + "file.sst";
1203 ASSERT_OK(sst_file_writer.Open(file));
1204 ASSERT_OK(sst_file_writer.DeleteRange(Key(0), Key(30)));
1205 ExternalSstFileInfo file_info;
1206 ASSERT_OK(sst_file_writer.Finish(&file_info));
1207 ASSERT_EQ(file_info.file_path, file);
1208 ASSERT_EQ(file_info.num_entries, 0);
1209 ASSERT_EQ(file_info.smallest_key, "");
1210 ASSERT_EQ(file_info.largest_key, "");
1211 ASSERT_EQ(file_info.num_range_del_entries, 1);
1212 ASSERT_EQ(file_info.smallest_range_del_key, Key(0));
1213 ASSERT_EQ(file_info.largest_range_del_key, Key(30));
1214
1215 IngestExternalFileOptions ifo;
1216 ifo.move_files = true;
1217 ifo.snapshot_consistency = true;
1218 ifo.allow_global_seqno = true;
1219 ifo.write_global_seqno = true;
1220 ifo.verify_checksums_before_ingest = false;
1221 ASSERT_OK(db_->IngestExternalFile({file}, ifo));
1222
1223 for (int i = 5; i < 25; i++) {
1224 std::string res;
1225 ASSERT_TRUE(db_->Get(ReadOptions(), Key(i), &res).IsNotFound());
1226 }
1227}
1228
494da23a 1229TEST_P(ExternalSSTFileBasicTest, IngestionWithRangeDeletions) {
11fdf7f2
TL
1230 int kNumLevels = 7;
1231 Options options = CurrentOptions();
1232 options.disable_auto_compactions = true;
1233 options.num_levels = kNumLevels;
1234 Reopen(options);
1235
1236 std::map<std::string, std::string> true_data;
1237 int file_id = 1;
1238 // prevent range deletions from being dropped due to becoming obsolete.
1239 const Snapshot* snapshot = db_->GetSnapshot();
1240
1241 // range del [0, 50) in L6 file, [50, 100) in L0 file, [100, 150) in memtable
1242 for (int i = 0; i < 3; i++) {
1243 if (i != 0) {
1244 db_->Flush(FlushOptions());
1245 if (i == 1) {
1246 MoveFilesToLevel(kNumLevels - 1);
1247 }
1248 }
1249 ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(),
1250 Key(50 * i), Key(50 * (i + 1))));
1251 }
1252 ASSERT_EQ(1, NumTableFilesAtLevel(0));
1253 ASSERT_EQ(0, NumTableFilesAtLevel(kNumLevels - 2));
1254 ASSERT_EQ(1, NumTableFilesAtLevel(kNumLevels - 1));
1255
494da23a
TL
1256 bool write_global_seqno = std::get<0>(GetParam());
1257 bool verify_checksums_before_ingest = std::get<1>(GetParam());
11fdf7f2
TL
1258 // overlaps with L0 file but not memtable, so flush is skipped and file is
1259 // ingested into L0
1260 SequenceNumber last_seqno = dbfull()->GetLatestSequenceNumber();
1261 ASSERT_OK(GenerateAndAddExternalFile(
1262 options, {60, 90}, {ValueType::kTypeValue, ValueType::kTypeValue},
494da23a
TL
1263 {{65, 70}, {70, 85}}, file_id++, write_global_seqno,
1264 verify_checksums_before_ingest, &true_data));
11fdf7f2
TL
1265 ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), ++last_seqno);
1266 ASSERT_EQ(2, NumTableFilesAtLevel(0));
1267 ASSERT_EQ(0, NumTableFilesAtLevel(kNumLevels - 2));
1268 ASSERT_EQ(1, NumTableFilesAtLevel(options.num_levels - 1));
1269
1270 // overlaps with L6 file but not memtable or L0 file, so flush is skipped and
1271 // file is ingested into L5
1272 ASSERT_OK(GenerateAndAddExternalFile(
1273 options, {10, 40}, {ValueType::kTypeValue, ValueType::kTypeValue},
494da23a
TL
1274 file_id++, write_global_seqno, verify_checksums_before_ingest,
1275 &true_data));
11fdf7f2
TL
1276 ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), ++last_seqno);
1277 ASSERT_EQ(2, NumTableFilesAtLevel(0));
1278 ASSERT_EQ(1, NumTableFilesAtLevel(kNumLevels - 2));
1279 ASSERT_EQ(1, NumTableFilesAtLevel(options.num_levels - 1));
1280
1281 // overlaps with L5 file but not memtable or L0 file, so flush is skipped and
1282 // file is ingested into L4
494da23a
TL
1283 ASSERT_OK(GenerateAndAddExternalFile(
1284 options, {}, {}, {{5, 15}}, file_id++, write_global_seqno,
1285 verify_checksums_before_ingest, &true_data));
11fdf7f2
TL
1286 ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), ++last_seqno);
1287 ASSERT_EQ(2, NumTableFilesAtLevel(0));
1288 ASSERT_EQ(1, NumTableFilesAtLevel(kNumLevels - 2));
1289 ASSERT_EQ(1, NumTableFilesAtLevel(options.num_levels - 2));
1290 ASSERT_EQ(1, NumTableFilesAtLevel(options.num_levels - 1));
1291
1292 // ingested file overlaps with memtable, so flush is triggered before the file
1293 // is ingested such that the ingested data is considered newest. So L0 file
1294 // count increases by two.
1295 ASSERT_OK(GenerateAndAddExternalFile(
1296 options, {100, 140}, {ValueType::kTypeValue, ValueType::kTypeValue},
494da23a
TL
1297 file_id++, write_global_seqno, verify_checksums_before_ingest,
1298 &true_data));
11fdf7f2
TL
1299 ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), ++last_seqno);
1300 ASSERT_EQ(4, NumTableFilesAtLevel(0));
1301 ASSERT_EQ(1, NumTableFilesAtLevel(kNumLevels - 2));
1302 ASSERT_EQ(1, NumTableFilesAtLevel(options.num_levels - 1));
1303
1304 // snapshot unneeded now that all range deletions are persisted
1305 db_->ReleaseSnapshot(snapshot);
1306
1307 // overlaps with nothing, so places at bottom level and skips incrementing
1308 // seqnum.
1309 ASSERT_OK(GenerateAndAddExternalFile(
1310 options, {151, 175}, {ValueType::kTypeValue, ValueType::kTypeValue},
494da23a
TL
1311 {{160, 200}}, file_id++, write_global_seqno,
1312 verify_checksums_before_ingest, &true_data));
11fdf7f2
TL
1313 ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), last_seqno);
1314 ASSERT_EQ(4, NumTableFilesAtLevel(0));
1315 ASSERT_EQ(1, NumTableFilesAtLevel(kNumLevels - 2));
1316 ASSERT_EQ(2, NumTableFilesAtLevel(options.num_levels - 1));
1317}
1318
f67539c2
TL
1319TEST_F(ExternalSSTFileBasicTest, AdjacentRangeDeletionTombstones) {
1320 Options options = CurrentOptions();
1321 SstFileWriter sst_file_writer(EnvOptions(), options);
1322
1323 // file8.sst (delete 300 => 400)
1324 std::string file8 = sst_files_dir_ + "file8.sst";
1325 ASSERT_OK(sst_file_writer.Open(file8));
1326 ASSERT_OK(sst_file_writer.DeleteRange(Key(300), Key(400)));
1327 ExternalSstFileInfo file8_info;
1328 Status s = sst_file_writer.Finish(&file8_info);
1329 ASSERT_TRUE(s.ok()) << s.ToString();
1330 ASSERT_EQ(file8_info.file_path, file8);
1331 ASSERT_EQ(file8_info.num_entries, 0);
1332 ASSERT_EQ(file8_info.smallest_key, "");
1333 ASSERT_EQ(file8_info.largest_key, "");
1334 ASSERT_EQ(file8_info.num_range_del_entries, 1);
1335 ASSERT_EQ(file8_info.smallest_range_del_key, Key(300));
1336 ASSERT_EQ(file8_info.largest_range_del_key, Key(400));
1337
1338 // file9.sst (delete 400 => 500)
1339 std::string file9 = sst_files_dir_ + "file9.sst";
1340 ASSERT_OK(sst_file_writer.Open(file9));
1341 ASSERT_OK(sst_file_writer.DeleteRange(Key(400), Key(500)));
1342 ExternalSstFileInfo file9_info;
1343 s = sst_file_writer.Finish(&file9_info);
1344 ASSERT_TRUE(s.ok()) << s.ToString();
1345 ASSERT_EQ(file9_info.file_path, file9);
1346 ASSERT_EQ(file9_info.num_entries, 0);
1347 ASSERT_EQ(file9_info.smallest_key, "");
1348 ASSERT_EQ(file9_info.largest_key, "");
1349 ASSERT_EQ(file9_info.num_range_del_entries, 1);
1350 ASSERT_EQ(file9_info.smallest_range_del_key, Key(400));
1351 ASSERT_EQ(file9_info.largest_range_del_key, Key(500));
1352
1353 // Range deletion tombstones are exclusive on their end key, so these SSTs
1354 // should not be considered as overlapping.
1355 s = DeprecatedAddFile({file8, file9});
1356 ASSERT_TRUE(s.ok()) << s.ToString();
1357 ASSERT_EQ(db_->GetLatestSequenceNumber(), 0U);
1358 DestroyAndRecreateExternalSSTFilesDir();
1359}
1360
494da23a
TL
1361TEST_P(ExternalSSTFileBasicTest, IngestFileWithBadBlockChecksum) {
1362 bool change_checksum_called = false;
1363 const auto& change_checksum = [&](void* arg) {
1364 if (!change_checksum_called) {
1365 char* buf = reinterpret_cast<char*>(arg);
1366 assert(nullptr != buf);
1367 buf[0] ^= 0x1;
1368 change_checksum_called = true;
1369 }
1370 };
1371 SyncPoint::GetInstance()->DisableProcessing();
1372 SyncPoint::GetInstance()->ClearAllCallBacks();
1373 SyncPoint::GetInstance()->SetCallBack(
1374 "BlockBasedTableBuilder::WriteRawBlock:TamperWithChecksum",
1375 change_checksum);
1376 SyncPoint::GetInstance()->EnableProcessing();
1377 int file_id = 0;
1378 bool write_global_seqno = std::get<0>(GetParam());
1379 bool verify_checksums_before_ingest = std::get<1>(GetParam());
1380 do {
1381 Options options = CurrentOptions();
1382 DestroyAndReopen(options);
1383 std::map<std::string, std::string> true_data;
1384 Status s = GenerateAndAddExternalFile(
1385 options, {1, 2, 3, 4, 5, 6}, ValueType::kTypeValue, file_id++,
1386 write_global_seqno, verify_checksums_before_ingest, &true_data);
1387 if (verify_checksums_before_ingest) {
1388 ASSERT_NOK(s);
1389 } else {
1390 ASSERT_OK(s);
1391 }
1392 change_checksum_called = false;
1393 } while (ChangeOptionsForFileIngestionTest());
1394}
1395
1396TEST_P(ExternalSSTFileBasicTest, IngestFileWithFirstByteTampered) {
1397 SyncPoint::GetInstance()->DisableProcessing();
1398 int file_id = 0;
1399 EnvOptions env_options;
1400 do {
1401 Options options = CurrentOptions();
1402 std::string file_path = sst_files_dir_ + ToString(file_id++);
1403 SstFileWriter sst_file_writer(env_options, options);
1404 Status s = sst_file_writer.Open(file_path);
1405 ASSERT_OK(s);
1406 for (int i = 0; i != 100; ++i) {
1407 std::string key = Key(i);
1408 std::string value = Key(i) + ToString(0);
1409 ASSERT_OK(sst_file_writer.Put(key, value));
1410 }
1411 ASSERT_OK(sst_file_writer.Finish());
1412 {
1413 // Get file size
1414 uint64_t file_size = 0;
1415 ASSERT_OK(env_->GetFileSize(file_path, &file_size));
1416 ASSERT_GT(file_size, 8);
1417 std::unique_ptr<RandomRWFile> rwfile;
1418 ASSERT_OK(env_->NewRandomRWFile(file_path, &rwfile, EnvOptions()));
1419 // Manually corrupt the file
1420 // We deterministically corrupt the first byte because we currently
1421 // cannot choose a random offset. The reason for this limitation is that
1422 // we do not checksum property block at present.
1423 const uint64_t offset = 0;
1424 char scratch[8] = {0};
1425 Slice buf;
1426 ASSERT_OK(rwfile->Read(offset, sizeof(scratch), &buf, scratch));
1427 scratch[0] ^= 0xff; // flip one bit
1428 ASSERT_OK(rwfile->Write(offset, buf));
1429 }
1430 // Ingest file.
1431 IngestExternalFileOptions ifo;
1432 ifo.write_global_seqno = std::get<0>(GetParam());
1433 ifo.verify_checksums_before_ingest = std::get<1>(GetParam());
1434 s = db_->IngestExternalFile({file_path}, ifo);
1435 if (ifo.verify_checksums_before_ingest) {
1436 ASSERT_NOK(s);
1437 } else {
1438 ASSERT_OK(s);
1439 }
1440 } while (ChangeOptionsForFileIngestionTest());
1441}
1442
1443TEST_P(ExternalSSTFileBasicTest, IngestExternalFileWithCorruptedPropsBlock) {
1444 bool verify_checksums_before_ingest = std::get<1>(GetParam());
1445 if (!verify_checksums_before_ingest) {
1446 return;
1447 }
1448 uint64_t props_block_offset = 0;
1449 size_t props_block_size = 0;
1450 const auto& get_props_block_offset = [&](void* arg) {
1451 props_block_offset = *reinterpret_cast<uint64_t*>(arg);
1452 };
1453 const auto& get_props_block_size = [&](void* arg) {
1454 props_block_size = *reinterpret_cast<uint64_t*>(arg);
1455 };
1456 SyncPoint::GetInstance()->DisableProcessing();
1457 SyncPoint::GetInstance()->ClearAllCallBacks();
1458 SyncPoint::GetInstance()->SetCallBack(
1459 "BlockBasedTableBuilder::WritePropertiesBlock:GetPropsBlockOffset",
1460 get_props_block_offset);
1461 SyncPoint::GetInstance()->SetCallBack(
1462 "BlockBasedTableBuilder::WritePropertiesBlock:GetPropsBlockSize",
1463 get_props_block_size);
1464 SyncPoint::GetInstance()->EnableProcessing();
1465 int file_id = 0;
1466 Random64 rand(time(nullptr));
1467 do {
1468 std::string file_path = sst_files_dir_ + ToString(file_id++);
1469 Options options = CurrentOptions();
1470 SstFileWriter sst_file_writer(EnvOptions(), options);
1471 Status s = sst_file_writer.Open(file_path);
1472 ASSERT_OK(s);
1473 for (int i = 0; i != 100; ++i) {
1474 std::string key = Key(i);
1475 std::string value = Key(i) + ToString(0);
1476 ASSERT_OK(sst_file_writer.Put(key, value));
1477 }
1478 ASSERT_OK(sst_file_writer.Finish());
1479
1480 {
1481 std::unique_ptr<RandomRWFile> rwfile;
1482 ASSERT_OK(env_->NewRandomRWFile(file_path, &rwfile, EnvOptions()));
1483 // Manually corrupt the file
1484 ASSERT_GT(props_block_size, 8);
1485 uint64_t offset =
1486 props_block_offset + rand.Next() % (props_block_size - 8);
1487 char scratch[8] = {0};
1488 Slice buf;
1489 ASSERT_OK(rwfile->Read(offset, sizeof(scratch), &buf, scratch));
1490 scratch[0] ^= 0xff; // flip one bit
1491 ASSERT_OK(rwfile->Write(offset, buf));
1492 }
1493
1494 // Ingest file.
1495 IngestExternalFileOptions ifo;
1496 ifo.write_global_seqno = std::get<0>(GetParam());
1497 ifo.verify_checksums_before_ingest = true;
1498 s = db_->IngestExternalFile({file_path}, ifo);
1499 ASSERT_NOK(s);
1500 } while (ChangeOptionsForFileIngestionTest());
1501}
1502
f67539c2
TL
1503TEST_F(ExternalSSTFileBasicTest, OverlappingFiles) {
1504 Options options = CurrentOptions();
1505
1506 std::vector<std::string> files;
1507 {
1508 SstFileWriter sst_file_writer(EnvOptions(), options);
1509 std::string file1 = sst_files_dir_ + "file1.sst";
1510 ASSERT_OK(sst_file_writer.Open(file1));
1511 ASSERT_OK(sst_file_writer.Put("a", "z"));
1512 ASSERT_OK(sst_file_writer.Put("i", "m"));
1513 ExternalSstFileInfo file1_info;
1514 ASSERT_OK(sst_file_writer.Finish(&file1_info));
1515 files.push_back(std::move(file1));
1516 }
1517 {
1518 SstFileWriter sst_file_writer(EnvOptions(), options);
1519 std::string file2 = sst_files_dir_ + "file2.sst";
1520 ASSERT_OK(sst_file_writer.Open(file2));
1521 ASSERT_OK(sst_file_writer.Put("i", "k"));
1522 ExternalSstFileInfo file2_info;
1523 ASSERT_OK(sst_file_writer.Finish(&file2_info));
1524 files.push_back(std::move(file2));
1525 }
1526
1527 IngestExternalFileOptions ifo;
1528 ASSERT_OK(db_->IngestExternalFile(files, ifo));
1529 ASSERT_EQ(Get("a"), "z");
1530 ASSERT_EQ(Get("i"), "k");
1531
1532 int total_keys = 0;
1533 Iterator* iter = db_->NewIterator(ReadOptions());
1534 for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
1535 ASSERT_OK(iter->status());
1536 total_keys++;
1537 }
1538 delete iter;
1539 ASSERT_EQ(total_keys, 2);
1540
1541 ASSERT_EQ(2, NumTableFilesAtLevel(0));
1542}
1543
494da23a
TL
1544INSTANTIATE_TEST_CASE_P(ExternalSSTFileBasicTest, ExternalSSTFileBasicTest,
1545 testing::Values(std::make_tuple(true, true),
1546 std::make_tuple(true, false),
1547 std::make_tuple(false, true),
1548 std::make_tuple(false, false)));
1549
7c673cae
FG
1550#endif // ROCKSDB_LITE
1551
f67539c2 1552} // namespace ROCKSDB_NAMESPACE
7c673cae
FG
1553
1554int main(int argc, char** argv) {
f67539c2 1555 ROCKSDB_NAMESPACE::port::InstallStackTraceHandler();
7c673cae
FG
1556 ::testing::InitGoogleTest(&argc, argv);
1557 return RUN_ALL_TESTS();
1558}