1 // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
2 // This source code is licensed under both the GPLv2 (found in the
3 // COPYING file in the root directory) and Apache 2.0 License
4 // (found in the LICENSE.Apache file in the root directory).
8 #include "db/db_test_util.h"
9 #include "db/version_edit.h"
10 #include "port/port.h"
11 #include "port/stack_trace.h"
12 #include "rocksdb/sst_file_writer.h"
13 #include "test_util/testharness.h"
14 #include "test_util/testutil.h"
15 #include "util/random.h"
16 #include "utilities/fault_injection_env.h"
18 namespace ROCKSDB_NAMESPACE
{
21 class ExternalSSTFileBasicTest
23 public ::testing::WithParamInterface
<std::tuple
<bool, bool>> {
25 ExternalSSTFileBasicTest()
26 : DBTestBase("external_sst_file_basic_test", /*env_do_fsync=*/true) {
27 sst_files_dir_
= dbname_
+ "_sst_files/";
28 fault_injection_test_env_
.reset(new FaultInjectionTestEnv(env_
));
29 DestroyAndRecreateExternalSSTFilesDir();
31 // Check if the Env supports RandomRWFile
32 std::string file_path
= sst_files_dir_
+ "test_random_rw_file";
33 std::unique_ptr
<WritableFile
> wfile
;
34 assert(env_
->NewWritableFile(file_path
, &wfile
, EnvOptions()).ok());
36 std::unique_ptr
<RandomRWFile
> rwfile
;
37 Status s
= env_
->NewRandomRWFile(file_path
, &rwfile
, EnvOptions());
38 if (s
.IsNotSupported()) {
39 random_rwfile_supported_
= false;
42 random_rwfile_supported_
= true;
45 EXPECT_OK(env_
->DeleteFile(file_path
));
48 void DestroyAndRecreateExternalSSTFilesDir() {
49 ASSERT_OK(DestroyDir(env_
, sst_files_dir_
));
50 ASSERT_OK(env_
->CreateDir(sst_files_dir_
));
53 Status
DeprecatedAddFile(const std::vector
<std::string
>& files
,
54 bool move_files
= false,
55 bool skip_snapshot_check
= false) {
56 IngestExternalFileOptions opts
;
57 opts
.move_files
= move_files
;
58 opts
.snapshot_consistency
= !skip_snapshot_check
;
59 opts
.allow_global_seqno
= false;
60 opts
.allow_blocking_flush
= false;
61 return db_
->IngestExternalFile(files
, opts
);
64 Status
AddFileWithFileChecksum(
65 const std::vector
<std::string
>& files
,
66 const std::vector
<std::string
>& files_checksums
,
67 const std::vector
<std::string
>& files_checksum_func_names
,
68 bool verify_file_checksum
= true, bool move_files
= false,
69 bool skip_snapshot_check
= false, bool write_global_seqno
= true) {
70 IngestExternalFileOptions opts
;
71 opts
.move_files
= move_files
;
72 opts
.snapshot_consistency
= !skip_snapshot_check
;
73 opts
.allow_global_seqno
= false;
74 opts
.allow_blocking_flush
= false;
75 opts
.write_global_seqno
= write_global_seqno
;
76 opts
.verify_file_checksum
= verify_file_checksum
;
78 IngestExternalFileArg arg
;
79 arg
.column_family
= db_
->DefaultColumnFamily();
80 arg
.external_files
= files
;
82 arg
.files_checksums
= files_checksums
;
83 arg
.files_checksum_func_names
= files_checksum_func_names
;
84 return db_
->IngestExternalFiles({arg
});
87 Status
GenerateAndAddExternalFile(
88 const Options options
, std::vector
<int> keys
,
89 const std::vector
<ValueType
>& value_types
,
90 std::vector
<std::pair
<int, int>> range_deletions
, int file_id
,
91 bool write_global_seqno
, bool verify_checksums_before_ingest
,
92 std::map
<std::string
, std::string
>* true_data
) {
93 assert(value_types
.size() == 1 || keys
.size() == value_types
.size());
94 std::string file_path
= sst_files_dir_
+ std::to_string(file_id
);
95 SstFileWriter
sst_file_writer(EnvOptions(), options
);
97 Status s
= sst_file_writer
.Open(file_path
);
101 for (size_t i
= 0; i
< range_deletions
.size(); i
++) {
102 // Account for the effect of range deletions on true_data before
103 // all point operators, even though sst_file_writer.DeleteRange
104 // must be called before other sst_file_writer methods. This is
105 // because point writes take precedence over range deletions
106 // in the same ingested sst.
107 std::string start_key
= Key(range_deletions
[i
].first
);
108 std::string end_key
= Key(range_deletions
[i
].second
);
109 s
= sst_file_writer
.DeleteRange(start_key
, end_key
);
111 sst_file_writer
.Finish();
114 auto start_key_it
= true_data
->find(start_key
);
115 if (start_key_it
== true_data
->end()) {
116 start_key_it
= true_data
->upper_bound(start_key
);
118 auto end_key_it
= true_data
->find(end_key
);
119 if (end_key_it
== true_data
->end()) {
120 end_key_it
= true_data
->upper_bound(end_key
);
122 true_data
->erase(start_key_it
, end_key_it
);
124 for (size_t i
= 0; i
< keys
.size(); i
++) {
125 std::string key
= Key(keys
[i
]);
126 std::string value
= Key(keys
[i
]) + std::to_string(file_id
);
127 ValueType value_type
=
128 (value_types
.size() == 1 ? value_types
[0] : value_types
[i
]);
129 switch (value_type
) {
130 case ValueType::kTypeValue
:
131 s
= sst_file_writer
.Put(key
, value
);
132 (*true_data
)[key
] = value
;
134 case ValueType::kTypeMerge
:
135 s
= sst_file_writer
.Merge(key
, value
);
136 // we only use TestPutOperator in this test
137 (*true_data
)[key
] = value
;
139 case ValueType::kTypeDeletion
:
140 s
= sst_file_writer
.Delete(key
);
141 true_data
->erase(key
);
144 return Status::InvalidArgument("Value type is not supported");
147 sst_file_writer
.Finish();
151 s
= sst_file_writer
.Finish();
154 IngestExternalFileOptions ifo
;
155 ifo
.allow_global_seqno
= true;
156 ifo
.write_global_seqno
= write_global_seqno
;
157 ifo
.verify_checksums_before_ingest
= verify_checksums_before_ingest
;
158 s
= db_
->IngestExternalFile({file_path
}, ifo
);
163 Status
GenerateAndAddExternalFile(
164 const Options options
, std::vector
<int> keys
,
165 const std::vector
<ValueType
>& value_types
, int file_id
,
166 bool write_global_seqno
, bool verify_checksums_before_ingest
,
167 std::map
<std::string
, std::string
>* true_data
) {
168 return GenerateAndAddExternalFile(
169 options
, keys
, value_types
, {}, file_id
, write_global_seqno
,
170 verify_checksums_before_ingest
, true_data
);
173 Status
GenerateAndAddExternalFile(
174 const Options options
, std::vector
<int> keys
, const ValueType value_type
,
175 int file_id
, bool write_global_seqno
, bool verify_checksums_before_ingest
,
176 std::map
<std::string
, std::string
>* true_data
) {
177 return GenerateAndAddExternalFile(
178 options
, keys
, std::vector
<ValueType
>(1, value_type
), file_id
,
179 write_global_seqno
, verify_checksums_before_ingest
, true_data
);
182 ~ExternalSSTFileBasicTest() override
{
183 DestroyDir(env_
, sst_files_dir_
).PermitUncheckedError();
187 std::string sst_files_dir_
;
188 std::unique_ptr
<FaultInjectionTestEnv
> fault_injection_test_env_
;
189 bool random_rwfile_supported_
;
192 TEST_F(ExternalSSTFileBasicTest
, Basic
) {
193 Options options
= CurrentOptions();
195 SstFileWriter
sst_file_writer(EnvOptions(), options
);
197 // Current file size should be 0 after sst_file_writer init and before open a
199 ASSERT_EQ(sst_file_writer
.FileSize(), 0);
201 // file1.sst (0 => 99)
202 std::string file1
= sst_files_dir_
+ "file1.sst";
203 ASSERT_OK(sst_file_writer
.Open(file1
));
204 for (int k
= 0; k
< 100; k
++) {
205 ASSERT_OK(sst_file_writer
.Put(Key(k
), Key(k
) + "_val"));
207 ExternalSstFileInfo file1_info
;
208 Status s
= sst_file_writer
.Finish(&file1_info
);
209 ASSERT_OK(s
) << s
.ToString();
211 // Current file size should be non-zero after success write.
212 ASSERT_GT(sst_file_writer
.FileSize(), 0);
214 ASSERT_EQ(file1_info
.file_path
, file1
);
215 ASSERT_EQ(file1_info
.num_entries
, 100);
216 ASSERT_EQ(file1_info
.smallest_key
, Key(0));
217 ASSERT_EQ(file1_info
.largest_key
, Key(99));
218 ASSERT_EQ(file1_info
.num_range_del_entries
, 0);
219 ASSERT_EQ(file1_info
.smallest_range_del_key
, "");
220 ASSERT_EQ(file1_info
.largest_range_del_key
, "");
221 ASSERT_EQ(file1_info
.file_checksum
, kUnknownFileChecksum
);
222 ASSERT_EQ(file1_info
.file_checksum_func_name
, kUnknownFileChecksumFuncName
);
223 // sst_file_writer already finished, cannot add this value
224 s
= sst_file_writer
.Put(Key(100), "bad_val");
225 ASSERT_NOK(s
) << s
.ToString();
226 s
= sst_file_writer
.DeleteRange(Key(100), Key(200));
227 ASSERT_NOK(s
) << s
.ToString();
229 DestroyAndReopen(options
);
230 // Add file using file path
231 s
= DeprecatedAddFile({file1
});
232 ASSERT_OK(s
) << s
.ToString();
233 ASSERT_EQ(db_
->GetLatestSequenceNumber(), 0U);
234 for (int k
= 0; k
< 100; k
++) {
235 ASSERT_EQ(Get(Key(k
)), Key(k
) + "_val");
238 DestroyAndRecreateExternalSSTFilesDir();
241 class ChecksumVerifyHelper
{
246 ChecksumVerifyHelper(Options
& options
) : options_(options
) {}
247 ~ChecksumVerifyHelper() {}
249 Status
GetSingleFileChecksumAndFuncName(
250 const std::string
& file_path
, std::string
* file_checksum
,
251 std::string
* file_checksum_func_name
) {
254 std::unique_ptr
<SequentialFile
> file_reader
;
255 s
= options_
.env
->NewSequentialFile(file_path
, &file_reader
, soptions
);
259 std::unique_ptr
<char[]> scratch(new char[2048]);
261 FileChecksumGenFactory
* file_checksum_gen_factory
=
262 options_
.file_checksum_gen_factory
.get();
263 if (file_checksum_gen_factory
== nullptr) {
264 *file_checksum
= kUnknownFileChecksum
;
265 *file_checksum_func_name
= kUnknownFileChecksumFuncName
;
268 FileChecksumGenContext gen_context
;
269 std::unique_ptr
<FileChecksumGenerator
> file_checksum_gen
=
270 file_checksum_gen_factory
->CreateFileChecksumGenerator(gen_context
);
271 *file_checksum_func_name
= file_checksum_gen
->Name();
272 s
= file_reader
->Read(2048, &result
, scratch
.get());
276 while (result
.size() != 0) {
277 file_checksum_gen
->Update(scratch
.get(), result
.size());
278 s
= file_reader
->Read(2048, &result
, scratch
.get());
283 file_checksum_gen
->Finalize();
284 *file_checksum
= file_checksum_gen
->GetChecksum();
290 TEST_F(ExternalSSTFileBasicTest
, BasicWithFileChecksumCrc32c
) {
291 Options options
= CurrentOptions();
292 options
.file_checksum_gen_factory
= GetFileChecksumGenCrc32cFactory();
293 ChecksumVerifyHelper
checksum_helper(options
);
295 SstFileWriter
sst_file_writer(EnvOptions(), options
);
297 // Current file size should be 0 after sst_file_writer init and before open a
299 ASSERT_EQ(sst_file_writer
.FileSize(), 0);
301 // file1.sst (0 => 99)
302 std::string file1
= sst_files_dir_
+ "file1.sst";
303 ASSERT_OK(sst_file_writer
.Open(file1
));
304 for (int k
= 0; k
< 100; k
++) {
305 ASSERT_OK(sst_file_writer
.Put(Key(k
), Key(k
) + "_val"));
307 ExternalSstFileInfo file1_info
;
308 Status s
= sst_file_writer
.Finish(&file1_info
);
309 ASSERT_OK(s
) << s
.ToString();
310 std::string file_checksum
, file_checksum_func_name
;
311 ASSERT_OK(checksum_helper
.GetSingleFileChecksumAndFuncName(
312 file1
, &file_checksum
, &file_checksum_func_name
));
314 // Current file size should be non-zero after success write.
315 ASSERT_GT(sst_file_writer
.FileSize(), 0);
317 ASSERT_EQ(file1_info
.file_path
, file1
);
318 ASSERT_EQ(file1_info
.num_entries
, 100);
319 ASSERT_EQ(file1_info
.smallest_key
, Key(0));
320 ASSERT_EQ(file1_info
.largest_key
, Key(99));
321 ASSERT_EQ(file1_info
.num_range_del_entries
, 0);
322 ASSERT_EQ(file1_info
.smallest_range_del_key
, "");
323 ASSERT_EQ(file1_info
.largest_range_del_key
, "");
324 ASSERT_EQ(file1_info
.file_checksum
, file_checksum
);
325 ASSERT_EQ(file1_info
.file_checksum_func_name
, file_checksum_func_name
);
326 // sst_file_writer already finished, cannot add this value
327 s
= sst_file_writer
.Put(Key(100), "bad_val");
328 ASSERT_NOK(s
) << s
.ToString();
329 s
= sst_file_writer
.DeleteRange(Key(100), Key(200));
330 ASSERT_NOK(s
) << s
.ToString();
332 DestroyAndReopen(options
);
333 // Add file using file path
334 s
= DeprecatedAddFile({file1
});
335 ASSERT_OK(s
) << s
.ToString();
336 ASSERT_EQ(db_
->GetLatestSequenceNumber(), 0U);
337 for (int k
= 0; k
< 100; k
++) {
338 ASSERT_EQ(Get(Key(k
)), Key(k
) + "_val");
341 DestroyAndRecreateExternalSSTFilesDir();
344 TEST_F(ExternalSSTFileBasicTest
, IngestFileWithFileChecksum
) {
345 Options old_options
= CurrentOptions();
346 Options options
= CurrentOptions();
347 options
.file_checksum_gen_factory
= GetFileChecksumGenCrc32cFactory();
348 const ImmutableCFOptions
ioptions(options
);
349 ChecksumVerifyHelper
checksum_helper(options
);
351 SstFileWriter
sst_file_writer(EnvOptions(), options
);
353 // file01.sst (1000 => 1099)
354 std::string file1
= sst_files_dir_
+ "file01.sst";
355 ASSERT_OK(sst_file_writer
.Open(file1
));
356 for (int k
= 1000; k
< 1100; k
++) {
357 ASSERT_OK(sst_file_writer
.Put(Key(k
), Key(k
) + "_val"));
359 ExternalSstFileInfo file1_info
;
360 Status s
= sst_file_writer
.Finish(&file1_info
);
361 ASSERT_OK(s
) << s
.ToString();
362 ASSERT_EQ(file1_info
.file_path
, file1
);
363 ASSERT_EQ(file1_info
.num_entries
, 100);
364 ASSERT_EQ(file1_info
.smallest_key
, Key(1000));
365 ASSERT_EQ(file1_info
.largest_key
, Key(1099));
366 std::string file_checksum1
, file_checksum_func_name1
;
367 ASSERT_OK(checksum_helper
.GetSingleFileChecksumAndFuncName(
368 file1
, &file_checksum1
, &file_checksum_func_name1
));
369 ASSERT_EQ(file1_info
.file_checksum
, file_checksum1
);
370 ASSERT_EQ(file1_info
.file_checksum_func_name
, file_checksum_func_name1
);
372 // file02.sst (1100 => 1299)
373 std::string file2
= sst_files_dir_
+ "file02.sst";
374 ASSERT_OK(sst_file_writer
.Open(file2
));
375 for (int k
= 1100; k
< 1300; k
++) {
376 ASSERT_OK(sst_file_writer
.Put(Key(k
), Key(k
) + "_val"));
378 ExternalSstFileInfo file2_info
;
379 s
= sst_file_writer
.Finish(&file2_info
);
380 ASSERT_OK(s
) << s
.ToString();
381 ASSERT_EQ(file2_info
.file_path
, file2
);
382 ASSERT_EQ(file2_info
.num_entries
, 200);
383 ASSERT_EQ(file2_info
.smallest_key
, Key(1100));
384 ASSERT_EQ(file2_info
.largest_key
, Key(1299));
385 std::string file_checksum2
, file_checksum_func_name2
;
386 ASSERT_OK(checksum_helper
.GetSingleFileChecksumAndFuncName(
387 file2
, &file_checksum2
, &file_checksum_func_name2
));
388 ASSERT_EQ(file2_info
.file_checksum
, file_checksum2
);
389 ASSERT_EQ(file2_info
.file_checksum_func_name
, file_checksum_func_name2
);
391 // file03.sst (1300 => 1499)
392 std::string file3
= sst_files_dir_
+ "file03.sst";
393 ASSERT_OK(sst_file_writer
.Open(file3
));
394 for (int k
= 1300; k
< 1500; k
++) {
395 ASSERT_OK(sst_file_writer
.Put(Key(k
), Key(k
) + "_val_overlap"));
397 ExternalSstFileInfo file3_info
;
398 s
= sst_file_writer
.Finish(&file3_info
);
399 ASSERT_OK(s
) << s
.ToString();
400 ASSERT_EQ(file3_info
.file_path
, file3
);
401 ASSERT_EQ(file3_info
.num_entries
, 200);
402 ASSERT_EQ(file3_info
.smallest_key
, Key(1300));
403 ASSERT_EQ(file3_info
.largest_key
, Key(1499));
404 std::string file_checksum3
, file_checksum_func_name3
;
405 ASSERT_OK(checksum_helper
.GetSingleFileChecksumAndFuncName(
406 file3
, &file_checksum3
, &file_checksum_func_name3
));
407 ASSERT_EQ(file3_info
.file_checksum
, file_checksum3
);
408 ASSERT_EQ(file3_info
.file_checksum_func_name
, file_checksum_func_name3
);
410 // file04.sst (1500 => 1799)
411 std::string file4
= sst_files_dir_
+ "file04.sst";
412 ASSERT_OK(sst_file_writer
.Open(file4
));
413 for (int k
= 1500; k
< 1800; k
++) {
414 ASSERT_OK(sst_file_writer
.Put(Key(k
), Key(k
) + "_val_overlap"));
416 ExternalSstFileInfo file4_info
;
417 s
= sst_file_writer
.Finish(&file4_info
);
418 ASSERT_OK(s
) << s
.ToString();
419 ASSERT_EQ(file4_info
.file_path
, file4
);
420 ASSERT_EQ(file4_info
.num_entries
, 300);
421 ASSERT_EQ(file4_info
.smallest_key
, Key(1500));
422 ASSERT_EQ(file4_info
.largest_key
, Key(1799));
423 std::string file_checksum4
, file_checksum_func_name4
;
424 ASSERT_OK(checksum_helper
.GetSingleFileChecksumAndFuncName(
425 file4
, &file_checksum4
, &file_checksum_func_name4
));
426 ASSERT_EQ(file4_info
.file_checksum
, file_checksum4
);
427 ASSERT_EQ(file4_info
.file_checksum_func_name
, file_checksum_func_name4
);
429 // file05.sst (1800 => 1899)
430 std::string file5
= sst_files_dir_
+ "file05.sst";
431 ASSERT_OK(sst_file_writer
.Open(file5
));
432 for (int k
= 1800; k
< 2000; k
++) {
433 ASSERT_OK(sst_file_writer
.Put(Key(k
), Key(k
) + "_val_overlap"));
435 ExternalSstFileInfo file5_info
;
436 s
= sst_file_writer
.Finish(&file5_info
);
437 ASSERT_OK(s
) << s
.ToString();
438 ASSERT_EQ(file5_info
.file_path
, file5
);
439 ASSERT_EQ(file5_info
.num_entries
, 200);
440 ASSERT_EQ(file5_info
.smallest_key
, Key(1800));
441 ASSERT_EQ(file5_info
.largest_key
, Key(1999));
442 std::string file_checksum5
, file_checksum_func_name5
;
443 ASSERT_OK(checksum_helper
.GetSingleFileChecksumAndFuncName(
444 file5
, &file_checksum5
, &file_checksum_func_name5
));
445 ASSERT_EQ(file5_info
.file_checksum
, file_checksum5
);
446 ASSERT_EQ(file5_info
.file_checksum_func_name
, file_checksum_func_name5
);
448 // file06.sst (2000 => 2199)
449 std::string file6
= sst_files_dir_
+ "file06.sst";
450 ASSERT_OK(sst_file_writer
.Open(file6
));
451 for (int k
= 2000; k
< 2200; k
++) {
452 ASSERT_OK(sst_file_writer
.Put(Key(k
), Key(k
) + "_val_overlap"));
454 ExternalSstFileInfo file6_info
;
455 s
= sst_file_writer
.Finish(&file6_info
);
456 ASSERT_OK(s
) << s
.ToString();
457 ASSERT_EQ(file6_info
.file_path
, file6
);
458 ASSERT_EQ(file6_info
.num_entries
, 200);
459 ASSERT_EQ(file6_info
.smallest_key
, Key(2000));
460 ASSERT_EQ(file6_info
.largest_key
, Key(2199));
461 std::string file_checksum6
, file_checksum_func_name6
;
462 ASSERT_OK(checksum_helper
.GetSingleFileChecksumAndFuncName(
463 file6
, &file_checksum6
, &file_checksum_func_name6
));
464 ASSERT_EQ(file6_info
.file_checksum
, file_checksum6
);
465 ASSERT_EQ(file6_info
.file_checksum_func_name
, file_checksum_func_name6
);
467 s
= AddFileWithFileChecksum({file1
}, {file_checksum1
, "xyz"},
468 {file_checksum1
}, true, false, false, false);
469 // does not care the checksum input since db does not enable file checksum
470 ASSERT_OK(s
) << s
.ToString();
471 ASSERT_OK(env_
->FileExists(file1
));
472 std::vector
<LiveFileMetaData
> live_files
;
473 dbfull()->GetLiveFilesMetaData(&live_files
);
474 std::set
<std::string
> set1
;
475 for (auto f
: live_files
) {
477 ASSERT_EQ(f
.file_checksum
, kUnknownFileChecksum
);
478 ASSERT_EQ(f
.file_checksum_func_name
, kUnknownFileChecksumFuncName
);
481 // check the temperature of the file being ingested
482 ColumnFamilyMetaData metadata
;
483 db_
->GetColumnFamilyMetaData(&metadata
);
484 ASSERT_EQ(1, metadata
.file_count
);
485 ASSERT_EQ(Temperature::kUnknown
, metadata
.levels
[6].files
[0].temperature
);
486 auto size
= GetSstSizeHelper(Temperature::kUnknown
);
488 size
= GetSstSizeHelper(Temperature::kWarm
);
490 size
= GetSstSizeHelper(Temperature::kHot
);
492 size
= GetSstSizeHelper(Temperature::kCold
);
495 // Reopen Db with checksum enabled
497 // Enable verify_file_checksum option
498 // The checksum vector does not match, fail the ingestion
499 s
= AddFileWithFileChecksum({file2
}, {file_checksum2
, "xyz"},
500 {file_checksum_func_name2
}, true, false, false,
502 ASSERT_NOK(s
) << s
.ToString();
504 // Enable verify_file_checksum option
505 // The checksum name does not match, fail the ingestion
506 s
= AddFileWithFileChecksum({file2
}, {file_checksum2
}, {"xyz"}, true, false,
508 ASSERT_NOK(s
) << s
.ToString();
510 // Enable verify_file_checksum option
511 // The checksum itself does not match, fail the ingestion
512 s
= AddFileWithFileChecksum({file2
}, {"xyz"}, {file_checksum_func_name2
},
513 true, false, false, false);
514 ASSERT_NOK(s
) << s
.ToString();
516 // Enable verify_file_checksum option
517 // All matches, ingestion is successful
518 s
= AddFileWithFileChecksum({file2
}, {file_checksum2
},
519 {file_checksum_func_name2
}, true, false, false,
521 ASSERT_OK(s
) << s
.ToString();
522 std::vector
<LiveFileMetaData
> live_files1
;
523 dbfull()->GetLiveFilesMetaData(&live_files1
);
524 for (auto f
: live_files1
) {
525 if (set1
.find(f
.name
) == set1
.end()) {
526 ASSERT_EQ(f
.file_checksum
, file_checksum2
);
527 ASSERT_EQ(f
.file_checksum_func_name
, file_checksum_func_name2
);
531 ASSERT_OK(env_
->FileExists(file2
));
533 // Enable verify_file_checksum option
534 // No checksum information is provided, generate it when ingesting
535 std::vector
<std::string
> checksum
, checksum_func
;
536 s
= AddFileWithFileChecksum({file3
}, checksum
, checksum_func
, true, false,
538 ASSERT_OK(s
) << s
.ToString();
539 std::vector
<LiveFileMetaData
> live_files2
;
540 dbfull()->GetLiveFilesMetaData(&live_files2
);
541 for (auto f
: live_files2
) {
542 if (set1
.find(f
.name
) == set1
.end()) {
543 ASSERT_EQ(f
.file_checksum
, file_checksum3
);
544 ASSERT_EQ(f
.file_checksum_func_name
, file_checksum_func_name3
);
548 ASSERT_OK(s
) << s
.ToString();
549 ASSERT_OK(env_
->FileExists(file3
));
551 // Does not enable verify_file_checksum options
552 // The checksum name does not match, fail the ingestion
553 s
= AddFileWithFileChecksum({file4
}, {file_checksum4
}, {"xyz"}, false, false,
555 ASSERT_NOK(s
) << s
.ToString();
557 // Does not enable verify_file_checksum options
558 // Checksum function name matches, store the checksum being ingested.
559 s
= AddFileWithFileChecksum({file4
}, {"asd"}, {file_checksum_func_name4
},
560 false, false, false, false);
561 ASSERT_OK(s
) << s
.ToString();
562 std::vector
<LiveFileMetaData
> live_files3
;
563 dbfull()->GetLiveFilesMetaData(&live_files3
);
564 for (auto f
: live_files3
) {
565 if (set1
.find(f
.name
) == set1
.end()) {
566 ASSERT_FALSE(f
.file_checksum
== file_checksum4
);
567 ASSERT_EQ(f
.file_checksum
, "asd");
568 ASSERT_EQ(f
.file_checksum_func_name
, file_checksum_func_name4
);
572 ASSERT_OK(s
) << s
.ToString();
573 ASSERT_OK(env_
->FileExists(file4
));
575 // enable verify_file_checksum options, DB enable checksum, and enable
576 // write_global_seq. So the checksum stored is different from the one
577 // ingested due to the sequence number changes.
578 s
= AddFileWithFileChecksum({file5
}, {file_checksum5
},
579 {file_checksum_func_name5
}, true, false, false,
581 ASSERT_OK(s
) << s
.ToString();
582 std::vector
<LiveFileMetaData
> live_files4
;
583 dbfull()->GetLiveFilesMetaData(&live_files4
);
584 for (auto f
: live_files4
) {
585 if (set1
.find(f
.name
) == set1
.end()) {
586 std::string cur_checksum5
, cur_checksum_func_name5
;
587 ASSERT_OK(checksum_helper
.GetSingleFileChecksumAndFuncName(
588 dbname_
+ f
.name
, &cur_checksum5
, &cur_checksum_func_name5
));
589 ASSERT_EQ(f
.file_checksum
, cur_checksum5
);
590 ASSERT_EQ(f
.file_checksum_func_name
, file_checksum_func_name5
);
594 ASSERT_OK(s
) << s
.ToString();
595 ASSERT_OK(env_
->FileExists(file5
));
597 // Does not enable verify_file_checksum options and also the ingested file
598 // checksum information is empty. DB will generate and store the checksum
600 std::vector
<std::string
> files_c6
, files_name6
;
601 s
= AddFileWithFileChecksum({file6
}, files_c6
, files_name6
, false, false,
603 ASSERT_OK(s
) << s
.ToString();
604 std::vector
<LiveFileMetaData
> live_files6
;
605 dbfull()->GetLiveFilesMetaData(&live_files6
);
606 for (auto f
: live_files6
) {
607 if (set1
.find(f
.name
) == set1
.end()) {
608 ASSERT_EQ(f
.file_checksum
, file_checksum6
);
609 ASSERT_EQ(f
.file_checksum_func_name
, file_checksum_func_name6
);
613 ASSERT_OK(s
) << s
.ToString();
614 ASSERT_OK(env_
->FileExists(file6
));
615 db_
->GetColumnFamilyMetaData(&metadata
);
616 size
= GetSstSizeHelper(Temperature::kUnknown
);
618 size
= GetSstSizeHelper(Temperature::kWarm
);
620 size
= GetSstSizeHelper(Temperature::kHot
);
622 size
= GetSstSizeHelper(Temperature::kCold
);
626 TEST_F(ExternalSSTFileBasicTest
, NoCopy
) {
627 Options options
= CurrentOptions();
628 const ImmutableCFOptions
ioptions(options
);
630 SstFileWriter
sst_file_writer(EnvOptions(), options
);
632 // file1.sst (0 => 99)
633 std::string file1
= sst_files_dir_
+ "file1.sst";
634 ASSERT_OK(sst_file_writer
.Open(file1
));
635 for (int k
= 0; k
< 100; k
++) {
636 ASSERT_OK(sst_file_writer
.Put(Key(k
), Key(k
) + "_val"));
638 ExternalSstFileInfo file1_info
;
639 Status s
= sst_file_writer
.Finish(&file1_info
);
640 ASSERT_OK(s
) << s
.ToString();
641 ASSERT_EQ(file1_info
.file_path
, file1
);
642 ASSERT_EQ(file1_info
.num_entries
, 100);
643 ASSERT_EQ(file1_info
.smallest_key
, Key(0));
644 ASSERT_EQ(file1_info
.largest_key
, Key(99));
646 // file2.sst (100 => 299)
647 std::string file2
= sst_files_dir_
+ "file2.sst";
648 ASSERT_OK(sst_file_writer
.Open(file2
));
649 for (int k
= 100; k
< 300; k
++) {
650 ASSERT_OK(sst_file_writer
.Put(Key(k
), Key(k
) + "_val"));
652 ExternalSstFileInfo file2_info
;
653 s
= sst_file_writer
.Finish(&file2_info
);
654 ASSERT_OK(s
) << s
.ToString();
655 ASSERT_EQ(file2_info
.file_path
, file2
);
656 ASSERT_EQ(file2_info
.num_entries
, 200);
657 ASSERT_EQ(file2_info
.smallest_key
, Key(100));
658 ASSERT_EQ(file2_info
.largest_key
, Key(299));
660 // file3.sst (110 => 124) .. overlap with file2.sst
661 std::string file3
= sst_files_dir_
+ "file3.sst";
662 ASSERT_OK(sst_file_writer
.Open(file3
));
663 for (int k
= 110; k
< 125; k
++) {
664 ASSERT_OK(sst_file_writer
.Put(Key(k
), Key(k
) + "_val_overlap"));
666 ExternalSstFileInfo file3_info
;
667 s
= sst_file_writer
.Finish(&file3_info
);
668 ASSERT_OK(s
) << s
.ToString();
669 ASSERT_EQ(file3_info
.file_path
, file3
);
670 ASSERT_EQ(file3_info
.num_entries
, 15);
671 ASSERT_EQ(file3_info
.smallest_key
, Key(110));
672 ASSERT_EQ(file3_info
.largest_key
, Key(124));
674 s
= DeprecatedAddFile({file1
}, true /* move file */);
675 ASSERT_OK(s
) << s
.ToString();
676 ASSERT_EQ(Status::NotFound(), env_
->FileExists(file1
));
678 s
= DeprecatedAddFile({file2
}, false /* copy file */);
679 ASSERT_OK(s
) << s
.ToString();
680 ASSERT_OK(env_
->FileExists(file2
));
682 // This file has overlapping values with the existing data
683 s
= DeprecatedAddFile({file3
}, true /* move file */);
684 ASSERT_NOK(s
) << s
.ToString();
685 ASSERT_OK(env_
->FileExists(file3
));
687 for (int k
= 0; k
< 300; k
++) {
688 ASSERT_EQ(Get(Key(k
)), Key(k
) + "_val");
692 TEST_P(ExternalSSTFileBasicTest
, IngestFileWithGlobalSeqnoPickedSeqno
) {
693 bool write_global_seqno
= std::get
<0>(GetParam());
694 bool verify_checksums_before_ingest
= std::get
<1>(GetParam());
696 Options options
= CurrentOptions();
697 DestroyAndReopen(options
);
698 std::map
<std::string
, std::string
> true_data
;
702 ASSERT_OK(GenerateAndAddExternalFile(
703 options
, {1, 2, 3, 4, 5, 6}, ValueType::kTypeValue
, file_id
++,
704 write_global_seqno
, verify_checksums_before_ingest
, &true_data
));
705 // File doesn't overwrite any keys, no seqno needed
706 ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 0);
708 ASSERT_OK(GenerateAndAddExternalFile(
709 options
, {10, 11, 12, 13}, ValueType::kTypeValue
, file_id
++,
710 write_global_seqno
, verify_checksums_before_ingest
, &true_data
));
711 // File doesn't overwrite any keys, no seqno needed
712 ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 0);
714 ASSERT_OK(GenerateAndAddExternalFile(
715 options
, {1, 4, 6}, ValueType::kTypeValue
, file_id
++,
716 write_global_seqno
, verify_checksums_before_ingest
, &true_data
));
717 // File overwrites some keys, a seqno will be assigned
718 ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 1);
720 ASSERT_OK(GenerateAndAddExternalFile(
721 options
, {11, 15, 19}, ValueType::kTypeValue
, file_id
++,
722 write_global_seqno
, verify_checksums_before_ingest
, &true_data
));
723 // File overwrites some keys, a seqno will be assigned
724 ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 2);
726 ASSERT_OK(GenerateAndAddExternalFile(
727 options
, {120, 130}, ValueType::kTypeValue
, file_id
++,
728 write_global_seqno
, verify_checksums_before_ingest
, &true_data
));
729 // File doesn't overwrite any keys, no seqno needed
730 ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 2);
732 ASSERT_OK(GenerateAndAddExternalFile(
733 options
, {1, 130}, ValueType::kTypeValue
, file_id
++, write_global_seqno
,
734 verify_checksums_before_ingest
, &true_data
));
735 // File overwrites some keys, a seqno will be assigned
736 ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 3);
738 // Write some keys through normal write path
739 for (int i
= 0; i
< 50; i
++) {
740 ASSERT_OK(Put(Key(i
), "memtable"));
741 true_data
[Key(i
)] = "memtable";
743 SequenceNumber last_seqno
= dbfull()->GetLatestSequenceNumber();
745 ASSERT_OK(GenerateAndAddExternalFile(
746 options
, {60, 61, 62}, ValueType::kTypeValue
, file_id
++,
747 write_global_seqno
, verify_checksums_before_ingest
, &true_data
));
748 // File doesn't overwrite any keys, no seqno needed
749 ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), last_seqno
);
751 ASSERT_OK(GenerateAndAddExternalFile(
752 options
, {40, 41, 42}, ValueType::kTypeValue
, file_id
++,
753 write_global_seqno
, verify_checksums_before_ingest
, &true_data
));
754 // File overwrites some keys, a seqno will be assigned
755 ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), last_seqno
+ 1);
757 ASSERT_OK(GenerateAndAddExternalFile(
758 options
, {20, 30, 40}, ValueType::kTypeValue
, file_id
++,
759 write_global_seqno
, verify_checksums_before_ingest
, &true_data
));
760 // File overwrites some keys, a seqno will be assigned
761 ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), last_seqno
+ 2);
763 const Snapshot
* snapshot
= db_
->GetSnapshot();
765 // We will need a seqno for the file regardless if the file overwrite
766 // keys in the DB or not because we have a snapshot
767 ASSERT_OK(GenerateAndAddExternalFile(
768 options
, {1000, 1002}, ValueType::kTypeValue
, file_id
++,
769 write_global_seqno
, verify_checksums_before_ingest
, &true_data
));
770 // A global seqno will be assigned anyway because of the snapshot
771 ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), last_seqno
+ 3);
773 ASSERT_OK(GenerateAndAddExternalFile(
774 options
, {2000, 3002}, ValueType::kTypeValue
, file_id
++,
775 write_global_seqno
, verify_checksums_before_ingest
, &true_data
));
776 // A global seqno will be assigned anyway because of the snapshot
777 ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), last_seqno
+ 4);
779 ASSERT_OK(GenerateAndAddExternalFile(
780 options
, {1, 20, 40, 100, 150}, ValueType::kTypeValue
, file_id
++,
781 write_global_seqno
, verify_checksums_before_ingest
, &true_data
));
782 // A global seqno will be assigned anyway because of the snapshot
783 ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), last_seqno
+ 5);
785 db_
->ReleaseSnapshot(snapshot
);
787 ASSERT_OK(GenerateAndAddExternalFile(
788 options
, {5000, 5001}, ValueType::kTypeValue
, file_id
++,
789 write_global_seqno
, verify_checksums_before_ingest
, &true_data
));
790 // No snapshot anymore, no need to assign a seqno
791 ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), last_seqno
+ 5);
794 VerifyDBFromMap(true_data
, &kcnt
, false);
795 } while (ChangeOptionsForFileIngestionTest());
798 TEST_P(ExternalSSTFileBasicTest
, IngestFileWithMultipleValueType
) {
799 bool write_global_seqno
= std::get
<0>(GetParam());
800 bool verify_checksums_before_ingest
= std::get
<1>(GetParam());
802 Options options
= CurrentOptions();
803 options
.merge_operator
.reset(new TestPutOperator());
804 DestroyAndReopen(options
);
805 std::map
<std::string
, std::string
> true_data
;
809 ASSERT_OK(GenerateAndAddExternalFile(
810 options
, {1, 2, 3, 4, 5, 6}, ValueType::kTypeValue
, file_id
++,
811 write_global_seqno
, verify_checksums_before_ingest
, &true_data
));
812 // File doesn't overwrite any keys, no seqno needed
813 ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 0);
815 ASSERT_OK(GenerateAndAddExternalFile(
816 options
, {10, 11, 12, 13}, ValueType::kTypeValue
, file_id
++,
817 write_global_seqno
, verify_checksums_before_ingest
, &true_data
));
818 // File doesn't overwrite any keys, no seqno needed
819 ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 0);
821 ASSERT_OK(GenerateAndAddExternalFile(
822 options
, {1, 4, 6}, ValueType::kTypeMerge
, file_id
++,
823 write_global_seqno
, verify_checksums_before_ingest
, &true_data
));
824 // File overwrites some keys, a seqno will be assigned
825 ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 1);
827 ASSERT_OK(GenerateAndAddExternalFile(
828 options
, {11, 15, 19}, ValueType::kTypeDeletion
, file_id
++,
829 write_global_seqno
, verify_checksums_before_ingest
, &true_data
));
830 // File overwrites some keys, a seqno will be assigned
831 ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 2);
833 ASSERT_OK(GenerateAndAddExternalFile(
834 options
, {120, 130}, ValueType::kTypeMerge
, file_id
++,
835 write_global_seqno
, verify_checksums_before_ingest
, &true_data
));
836 // File doesn't overwrite any keys, no seqno needed
837 ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 2);
839 ASSERT_OK(GenerateAndAddExternalFile(
840 options
, {1, 130}, ValueType::kTypeDeletion
, file_id
++,
841 write_global_seqno
, verify_checksums_before_ingest
, &true_data
));
842 // File overwrites some keys, a seqno will be assigned
843 ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 3);
845 ASSERT_OK(GenerateAndAddExternalFile(
846 options
, {120}, {ValueType::kTypeValue
}, {{120, 135}}, file_id
++,
847 write_global_seqno
, verify_checksums_before_ingest
, &true_data
));
848 // File overwrites some keys, a seqno will be assigned
849 ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 4);
851 ASSERT_OK(GenerateAndAddExternalFile(
852 options
, {}, {}, {{110, 120}}, file_id
++, write_global_seqno
,
853 verify_checksums_before_ingest
, &true_data
));
854 // The range deletion ends on a key, but it doesn't actually delete
855 // this key because the largest key in the range is exclusive. Still,
856 // it counts as an overlap so a new seqno will be assigned.
857 ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 5);
859 ASSERT_OK(GenerateAndAddExternalFile(
860 options
, {}, {}, {{100, 109}}, file_id
++, write_global_seqno
,
861 verify_checksums_before_ingest
, &true_data
));
862 // File doesn't overwrite any keys, no seqno needed
863 ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 5);
865 // Write some keys through normal write path
866 for (int i
= 0; i
< 50; i
++) {
867 ASSERT_OK(Put(Key(i
), "memtable"));
868 true_data
[Key(i
)] = "memtable";
870 SequenceNumber last_seqno
= dbfull()->GetLatestSequenceNumber();
872 ASSERT_OK(GenerateAndAddExternalFile(
873 options
, {60, 61, 62}, ValueType::kTypeValue
, file_id
++,
874 write_global_seqno
, verify_checksums_before_ingest
, &true_data
));
875 // File doesn't overwrite any keys, no seqno needed
876 ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), last_seqno
);
878 ASSERT_OK(GenerateAndAddExternalFile(
879 options
, {40, 41, 42}, ValueType::kTypeMerge
, file_id
++,
880 write_global_seqno
, verify_checksums_before_ingest
, &true_data
));
881 // File overwrites some keys, a seqno will be assigned
882 ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), last_seqno
+ 1);
884 ASSERT_OK(GenerateAndAddExternalFile(
885 options
, {20, 30, 40}, ValueType::kTypeDeletion
, file_id
++,
886 write_global_seqno
, verify_checksums_before_ingest
, &true_data
));
887 // File overwrites some keys, a seqno will be assigned
888 ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), last_seqno
+ 2);
890 const Snapshot
* snapshot
= db_
->GetSnapshot();
892 // We will need a seqno for the file regardless if the file overwrite
893 // keys in the DB or not because we have a snapshot
894 ASSERT_OK(GenerateAndAddExternalFile(
895 options
, {1000, 1002}, ValueType::kTypeMerge
, file_id
++,
896 write_global_seqno
, verify_checksums_before_ingest
, &true_data
));
897 // A global seqno will be assigned anyway because of the snapshot
898 ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), last_seqno
+ 3);
900 ASSERT_OK(GenerateAndAddExternalFile(
901 options
, {2000, 3002}, ValueType::kTypeMerge
, file_id
++,
902 write_global_seqno
, verify_checksums_before_ingest
, &true_data
));
903 // A global seqno will be assigned anyway because of the snapshot
904 ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), last_seqno
+ 4);
906 ASSERT_OK(GenerateAndAddExternalFile(
907 options
, {1, 20, 40, 100, 150}, ValueType::kTypeMerge
, file_id
++,
908 write_global_seqno
, verify_checksums_before_ingest
, &true_data
));
909 // A global seqno will be assigned anyway because of the snapshot
910 ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), last_seqno
+ 5);
912 db_
->ReleaseSnapshot(snapshot
);
914 ASSERT_OK(GenerateAndAddExternalFile(
915 options
, {5000, 5001}, ValueType::kTypeValue
, file_id
++,
916 write_global_seqno
, verify_checksums_before_ingest
, &true_data
));
917 // No snapshot anymore, no need to assign a seqno
918 ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), last_seqno
+ 5);
921 VerifyDBFromMap(true_data
, &kcnt
, false);
922 } while (ChangeOptionsForFileIngestionTest());
925 TEST_P(ExternalSSTFileBasicTest
, IngestFileWithMixedValueType
) {
926 bool write_global_seqno
= std::get
<0>(GetParam());
927 bool verify_checksums_before_ingest
= std::get
<1>(GetParam());
929 Options options
= CurrentOptions();
930 options
.merge_operator
.reset(new TestPutOperator());
931 DestroyAndReopen(options
);
932 std::map
<std::string
, std::string
> true_data
;
936 ASSERT_OK(GenerateAndAddExternalFile(
937 options
, {1, 2, 3, 4, 5, 6},
938 {ValueType::kTypeValue
, ValueType::kTypeMerge
, ValueType::kTypeValue
,
939 ValueType::kTypeMerge
, ValueType::kTypeValue
, ValueType::kTypeMerge
},
940 file_id
++, write_global_seqno
, verify_checksums_before_ingest
,
942 // File doesn't overwrite any keys, no seqno needed
943 ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 0);
945 ASSERT_OK(GenerateAndAddExternalFile(
946 options
, {10, 11, 12, 13},
947 {ValueType::kTypeValue
, ValueType::kTypeMerge
, ValueType::kTypeValue
,
948 ValueType::kTypeMerge
},
949 file_id
++, write_global_seqno
, verify_checksums_before_ingest
,
951 // File doesn't overwrite any keys, no seqno needed
952 ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 0);
954 ASSERT_OK(GenerateAndAddExternalFile(
956 {ValueType::kTypeDeletion
, ValueType::kTypeValue
,
957 ValueType::kTypeMerge
},
958 file_id
++, write_global_seqno
, verify_checksums_before_ingest
,
960 // File overwrites some keys, a seqno will be assigned
961 ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 1);
963 ASSERT_OK(GenerateAndAddExternalFile(
964 options
, {11, 15, 19},
965 {ValueType::kTypeDeletion
, ValueType::kTypeMerge
,
966 ValueType::kTypeValue
},
967 file_id
++, write_global_seqno
, verify_checksums_before_ingest
,
969 // File overwrites some keys, a seqno will be assigned
970 ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 2);
972 ASSERT_OK(GenerateAndAddExternalFile(
973 options
, {120, 130}, {ValueType::kTypeValue
, ValueType::kTypeMerge
},
974 file_id
++, write_global_seqno
, verify_checksums_before_ingest
,
976 // File doesn't overwrite any keys, no seqno needed
977 ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 2);
979 ASSERT_OK(GenerateAndAddExternalFile(
980 options
, {1, 130}, {ValueType::kTypeMerge
, ValueType::kTypeDeletion
},
981 file_id
++, write_global_seqno
, verify_checksums_before_ingest
,
983 // File overwrites some keys, a seqno will be assigned
984 ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 3);
986 ASSERT_OK(GenerateAndAddExternalFile(
987 options
, {150, 151, 152},
988 {ValueType::kTypeValue
, ValueType::kTypeMerge
,
989 ValueType::kTypeDeletion
},
990 {{150, 160}, {180, 190}}, file_id
++, write_global_seqno
,
991 verify_checksums_before_ingest
, &true_data
));
992 // File doesn't overwrite any keys, no seqno needed
993 ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 3);
995 ASSERT_OK(GenerateAndAddExternalFile(
996 options
, {150, 151, 152},
997 {ValueType::kTypeValue
, ValueType::kTypeMerge
, ValueType::kTypeValue
},
998 {{200, 250}}, file_id
++, write_global_seqno
,
999 verify_checksums_before_ingest
, &true_data
));
1000 // File overwrites some keys, a seqno will be assigned
1001 ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 4);
1003 ASSERT_OK(GenerateAndAddExternalFile(
1004 options
, {300, 301, 302},
1005 {ValueType::kTypeValue
, ValueType::kTypeMerge
,
1006 ValueType::kTypeDeletion
},
1007 {{1, 2}, {152, 154}}, file_id
++, write_global_seqno
,
1008 verify_checksums_before_ingest
, &true_data
));
1009 // File overwrites some keys, a seqno will be assigned
1010 ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 5);
1012 // Write some keys through normal write path
1013 for (int i
= 0; i
< 50; i
++) {
1014 ASSERT_OK(Put(Key(i
), "memtable"));
1015 true_data
[Key(i
)] = "memtable";
1017 SequenceNumber last_seqno
= dbfull()->GetLatestSequenceNumber();
1019 ASSERT_OK(GenerateAndAddExternalFile(
1020 options
, {60, 61, 62},
1021 {ValueType::kTypeValue
, ValueType::kTypeMerge
, ValueType::kTypeValue
},
1022 file_id
++, write_global_seqno
, verify_checksums_before_ingest
,
1024 // File doesn't overwrite any keys, no seqno needed
1025 ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), last_seqno
);
1027 ASSERT_OK(GenerateAndAddExternalFile(
1028 options
, {40, 41, 42},
1029 {ValueType::kTypeValue
, ValueType::kTypeDeletion
,
1030 ValueType::kTypeDeletion
},
1031 file_id
++, write_global_seqno
, verify_checksums_before_ingest
,
1033 // File overwrites some keys, a seqno will be assigned
1034 ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), last_seqno
+ 1);
1036 ASSERT_OK(GenerateAndAddExternalFile(
1037 options
, {20, 30, 40},
1038 {ValueType::kTypeDeletion
, ValueType::kTypeDeletion
,
1039 ValueType::kTypeDeletion
},
1040 file_id
++, write_global_seqno
, verify_checksums_before_ingest
,
1042 // File overwrites some keys, a seqno will be assigned
1043 ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), last_seqno
+ 2);
1045 const Snapshot
* snapshot
= db_
->GetSnapshot();
1047 // We will need a seqno for the file regardless if the file overwrite
1048 // keys in the DB or not because we have a snapshot
1049 ASSERT_OK(GenerateAndAddExternalFile(
1050 options
, {1000, 1002}, {ValueType::kTypeValue
, ValueType::kTypeMerge
},
1051 file_id
++, write_global_seqno
, verify_checksums_before_ingest
,
1053 // A global seqno will be assigned anyway because of the snapshot
1054 ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), last_seqno
+ 3);
1056 ASSERT_OK(GenerateAndAddExternalFile(
1057 options
, {2000, 3002}, {ValueType::kTypeValue
, ValueType::kTypeMerge
},
1058 file_id
++, write_global_seqno
, verify_checksums_before_ingest
,
1060 // A global seqno will be assigned anyway because of the snapshot
1061 ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), last_seqno
+ 4);
1063 ASSERT_OK(GenerateAndAddExternalFile(
1064 options
, {1, 20, 40, 100, 150},
1065 {ValueType::kTypeDeletion
, ValueType::kTypeDeletion
,
1066 ValueType::kTypeValue
, ValueType::kTypeMerge
, ValueType::kTypeMerge
},
1067 file_id
++, write_global_seqno
, verify_checksums_before_ingest
,
1069 // A global seqno will be assigned anyway because of the snapshot
1070 ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), last_seqno
+ 5);
1072 db_
->ReleaseSnapshot(snapshot
);
1074 ASSERT_OK(GenerateAndAddExternalFile(
1075 options
, {5000, 5001}, {ValueType::kTypeValue
, ValueType::kTypeMerge
},
1076 file_id
++, write_global_seqno
, verify_checksums_before_ingest
,
1078 // No snapshot anymore, no need to assign a seqno
1079 ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), last_seqno
+ 5);
1082 VerifyDBFromMap(true_data
, &kcnt
, false);
1083 } while (ChangeOptionsForFileIngestionTest());
1086 TEST_F(ExternalSSTFileBasicTest
, FadviseTrigger
) {
1087 Options options
= CurrentOptions();
1088 const int kNumKeys
= 10000;
1090 size_t total_fadvised_bytes
= 0;
1091 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
1092 "SstFileWriter::Rep::InvalidatePageCache", [&](void* arg
) {
1093 size_t fadvise_size
= *(reinterpret_cast<size_t*>(arg
));
1094 total_fadvised_bytes
+= fadvise_size
;
1096 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
1098 std::unique_ptr
<SstFileWriter
> sst_file_writer
;
1100 std::string sst_file_path
= sst_files_dir_
+ "file_fadvise_disable.sst";
1101 sst_file_writer
.reset(
1102 new SstFileWriter(EnvOptions(), options
, nullptr, false));
1103 ASSERT_OK(sst_file_writer
->Open(sst_file_path
));
1104 for (int i
= 0; i
< kNumKeys
; i
++) {
1105 ASSERT_OK(sst_file_writer
->Put(Key(i
), Key(i
)));
1107 ASSERT_OK(sst_file_writer
->Finish());
1109 ASSERT_EQ(total_fadvised_bytes
, 0);
1111 sst_file_path
= sst_files_dir_
+ "file_fadvise_enable.sst";
1112 sst_file_writer
.reset(
1113 new SstFileWriter(EnvOptions(), options
, nullptr, true));
1114 ASSERT_OK(sst_file_writer
->Open(sst_file_path
));
1115 for (int i
= 0; i
< kNumKeys
; i
++) {
1116 ASSERT_OK(sst_file_writer
->Put(Key(i
), Key(i
)));
1118 ASSERT_OK(sst_file_writer
->Finish());
1120 ASSERT_EQ(total_fadvised_bytes
, sst_file_writer
->FileSize());
1121 ASSERT_GT(total_fadvised_bytes
, 0);
1123 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
1126 TEST_F(ExternalSSTFileBasicTest
, SyncFailure
) {
1128 options
.create_if_missing
= true;
1129 options
.env
= fault_injection_test_env_
.get();
1131 std::vector
<std::pair
<std::string
, std::string
>> test_cases
= {
1132 {"ExternalSstFileIngestionJob::BeforeSyncIngestedFile",
1133 "ExternalSstFileIngestionJob::AfterSyncIngestedFile"},
1134 {"ExternalSstFileIngestionJob::BeforeSyncDir",
1135 "ExternalSstFileIngestionJob::AfterSyncDir"},
1136 {"ExternalSstFileIngestionJob::BeforeSyncGlobalSeqno",
1137 "ExternalSstFileIngestionJob::AfterSyncGlobalSeqno"}};
1139 for (size_t i
= 0; i
< test_cases
.size(); i
++) {
1140 bool no_sync
= false;
1141 SyncPoint::GetInstance()->SetCallBack(test_cases
[i
].first
, [&](void*) {
1142 fault_injection_test_env_
->SetFilesystemActive(false);
1144 SyncPoint::GetInstance()->SetCallBack(test_cases
[i
].second
, [&](void*) {
1145 fault_injection_test_env_
->SetFilesystemActive(true);
1148 SyncPoint::GetInstance()->SetCallBack(
1149 "ExternalSstFileIngestionJob::Prepare:Reopen", [&](void* s
) {
1150 Status
* status
= static_cast<Status
*>(s
);
1151 if (status
->IsNotSupported()) {
1157 SyncPoint::GetInstance()->SetCallBack(
1158 "ExternalSstFileIngestionJob::NewRandomRWFile", [&](void* s
) {
1159 Status
* status
= static_cast<Status
*>(s
);
1160 if (status
->IsNotSupported()) {
1165 SyncPoint::GetInstance()->EnableProcessing();
1167 DestroyAndReopen(options
);
1169 ASSERT_OK(Put("foo", "v1"));
1172 Options sst_file_writer_options
;
1173 sst_file_writer_options
.env
= fault_injection_test_env_
.get();
1174 std::unique_ptr
<SstFileWriter
> sst_file_writer(
1175 new SstFileWriter(EnvOptions(), sst_file_writer_options
));
1176 std::string file_name
=
1177 sst_files_dir_
+ "sync_failure_test_" + std::to_string(i
) + ".sst";
1178 ASSERT_OK(sst_file_writer
->Open(file_name
));
1179 ASSERT_OK(sst_file_writer
->Put("bar", "v2"));
1180 ASSERT_OK(sst_file_writer
->Finish());
1182 IngestExternalFileOptions ingest_opt
;
1184 ingest_opt
.move_files
= true;
1186 const Snapshot
* snapshot
= db_
->GetSnapshot();
1188 ingest_opt
.write_global_seqno
= true;
1190 Status s
= db_
->IngestExternalFile({file_name
}, ingest_opt
);
1196 db_
->ReleaseSnapshot(snapshot
);
1198 SyncPoint::GetInstance()->DisableProcessing();
1199 SyncPoint::GetInstance()->ClearAllCallBacks();
1204 TEST_F(ExternalSSTFileBasicTest
, ReopenNotSupported
) {
1206 options
.create_if_missing
= true;
1209 SyncPoint::GetInstance()->SetCallBack(
1210 "ExternalSstFileIngestionJob::Prepare:Reopen", [&](void* arg
) {
1211 Status
* s
= static_cast<Status
*>(arg
);
1212 *s
= Status::NotSupported();
1214 SyncPoint::GetInstance()->EnableProcessing();
1216 DestroyAndReopen(options
);
1218 Options sst_file_writer_options
;
1219 sst_file_writer_options
.env
= env_
;
1220 std::unique_ptr
<SstFileWriter
> sst_file_writer(
1221 new SstFileWriter(EnvOptions(), sst_file_writer_options
));
1222 std::string file_name
=
1223 sst_files_dir_
+ "reopen_not_supported_test_" + ".sst";
1224 ASSERT_OK(sst_file_writer
->Open(file_name
));
1225 ASSERT_OK(sst_file_writer
->Put("bar", "v2"));
1226 ASSERT_OK(sst_file_writer
->Finish());
1228 IngestExternalFileOptions ingest_opt
;
1229 ingest_opt
.move_files
= true;
1230 const Snapshot
* snapshot
= db_
->GetSnapshot();
1231 ASSERT_OK(db_
->IngestExternalFile({file_name
}, ingest_opt
));
1232 db_
->ReleaseSnapshot(snapshot
);
1234 SyncPoint::GetInstance()->DisableProcessing();
1235 SyncPoint::GetInstance()->ClearAllCallBacks();
1239 TEST_F(ExternalSSTFileBasicTest
, VerifyChecksumReadahead
) {
1241 options
.create_if_missing
= true;
1242 SpecialEnv
senv(env_
);
1243 options
.env
= &senv
;
1244 DestroyAndReopen(options
);
1246 Options sst_file_writer_options
;
1247 sst_file_writer_options
.env
= env_
;
1248 std::unique_ptr
<SstFileWriter
> sst_file_writer(
1249 new SstFileWriter(EnvOptions(), sst_file_writer_options
));
1250 std::string file_name
= sst_files_dir_
+ "verify_checksum_readahead_test.sst";
1251 ASSERT_OK(sst_file_writer
->Open(file_name
));
1253 std::string value
= rnd
.RandomString(4000);
1254 for (int i
= 0; i
< 5000; i
++) {
1255 ASSERT_OK(sst_file_writer
->Put(DBTestBase::Key(i
), value
));
1257 ASSERT_OK(sst_file_writer
->Finish());
1259 // Ingest it once without verifying checksums to see the baseline
1261 IngestExternalFileOptions ingest_opt
;
1262 ingest_opt
.move_files
= false;
1263 senv
.count_random_reads_
= true;
1264 senv
.random_read_bytes_counter_
= 0;
1265 ASSERT_OK(db_
->IngestExternalFile({file_name
}, ingest_opt
));
1267 auto base_num_reads
= senv
.random_read_counter_
.Read();
1268 // Make sure the counter is enabled.
1269 ASSERT_GT(base_num_reads
, 0);
1271 // Ingest again and observe the reads made for for readahead.
1272 ingest_opt
.move_files
= false;
1273 ingest_opt
.verify_checksums_before_ingest
= true;
1274 ingest_opt
.verify_checksums_readahead_size
= size_t{2 * 1024 * 1024};
1276 senv
.count_random_reads_
= true;
1277 senv
.random_read_bytes_counter_
= 0;
1278 ASSERT_OK(db_
->IngestExternalFile({file_name
}, ingest_opt
));
1280 // Make sure the counter is enabled.
1281 ASSERT_GT(senv
.random_read_counter_
.Read() - base_num_reads
, 0);
1283 // The SST file is about 20MB. Readahead size is 2MB.
1284 // Give a conservative 15 reads for metadata blocks, the number
1285 // of random reads should be within 20 MB / 2MB + 15 = 25.
1286 ASSERT_LE(senv
.random_read_counter_
.Read() - base_num_reads
, 40);
1291 TEST_F(ExternalSSTFileBasicTest
, IngestRangeDeletionTombstoneWithGlobalSeqno
) {
1292 for (int i
= 5; i
< 25; i
++) {
1293 ASSERT_OK(db_
->Put(WriteOptions(), db_
->DefaultColumnFamily(), Key(i
),
1297 Options options
= CurrentOptions();
1298 options
.disable_auto_compactions
= true;
1300 SstFileWriter
sst_file_writer(EnvOptions(), options
);
1302 // file.sst (delete 0 => 30)
1303 std::string file
= sst_files_dir_
+ "file.sst";
1304 ASSERT_OK(sst_file_writer
.Open(file
));
1305 ASSERT_OK(sst_file_writer
.DeleteRange(Key(0), Key(30)));
1306 ExternalSstFileInfo file_info
;
1307 ASSERT_OK(sst_file_writer
.Finish(&file_info
));
1308 ASSERT_EQ(file_info
.file_path
, file
);
1309 ASSERT_EQ(file_info
.num_entries
, 0);
1310 ASSERT_EQ(file_info
.smallest_key
, "");
1311 ASSERT_EQ(file_info
.largest_key
, "");
1312 ASSERT_EQ(file_info
.num_range_del_entries
, 1);
1313 ASSERT_EQ(file_info
.smallest_range_del_key
, Key(0));
1314 ASSERT_EQ(file_info
.largest_range_del_key
, Key(30));
1316 IngestExternalFileOptions ifo
;
1317 ifo
.move_files
= true;
1318 ifo
.snapshot_consistency
= true;
1319 ifo
.allow_global_seqno
= true;
1320 ifo
.write_global_seqno
= true;
1321 ifo
.verify_checksums_before_ingest
= false;
1322 ASSERT_OK(db_
->IngestExternalFile({file
}, ifo
));
1324 for (int i
= 5; i
< 25; i
++) {
1326 ASSERT_TRUE(db_
->Get(ReadOptions(), Key(i
), &res
).IsNotFound());
1330 TEST_P(ExternalSSTFileBasicTest
, IngestionWithRangeDeletions
) {
1332 Options options
= CurrentOptions();
1333 options
.disable_auto_compactions
= true;
1334 options
.num_levels
= kNumLevels
;
1337 std::map
<std::string
, std::string
> true_data
;
1339 // prevent range deletions from being dropped due to becoming obsolete.
1340 const Snapshot
* snapshot
= db_
->GetSnapshot();
1342 // range del [0, 50) in L6 file, [50, 100) in L0 file, [100, 150) in memtable
1343 for (int i
= 0; i
< 3; i
++) {
1345 db_
->Flush(FlushOptions());
1347 MoveFilesToLevel(kNumLevels
- 1);
1350 ASSERT_OK(db_
->DeleteRange(WriteOptions(), db_
->DefaultColumnFamily(),
1351 Key(50 * i
), Key(50 * (i
+ 1))));
1353 ASSERT_EQ(1, NumTableFilesAtLevel(0));
1354 ASSERT_EQ(0, NumTableFilesAtLevel(kNumLevels
- 2));
1355 ASSERT_EQ(1, NumTableFilesAtLevel(kNumLevels
- 1));
1357 bool write_global_seqno
= std::get
<0>(GetParam());
1358 bool verify_checksums_before_ingest
= std::get
<1>(GetParam());
1359 // overlaps with L0 file but not memtable, so flush is skipped and file is
1361 SequenceNumber last_seqno
= dbfull()->GetLatestSequenceNumber();
1362 ASSERT_OK(GenerateAndAddExternalFile(
1363 options
, {60, 90}, {ValueType::kTypeValue
, ValueType::kTypeValue
},
1364 {{65, 70}, {70, 85}}, file_id
++, write_global_seqno
,
1365 verify_checksums_before_ingest
, &true_data
));
1366 ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), ++last_seqno
);
1367 ASSERT_EQ(2, NumTableFilesAtLevel(0));
1368 ASSERT_EQ(0, NumTableFilesAtLevel(kNumLevels
- 2));
1369 ASSERT_EQ(1, NumTableFilesAtLevel(options
.num_levels
- 1));
1371 // overlaps with L6 file but not memtable or L0 file, so flush is skipped and
1372 // file is ingested into L5
1373 ASSERT_OK(GenerateAndAddExternalFile(
1374 options
, {10, 40}, {ValueType::kTypeValue
, ValueType::kTypeValue
},
1375 file_id
++, write_global_seqno
, verify_checksums_before_ingest
,
1377 ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), ++last_seqno
);
1378 ASSERT_EQ(2, NumTableFilesAtLevel(0));
1379 ASSERT_EQ(1, NumTableFilesAtLevel(kNumLevels
- 2));
1380 ASSERT_EQ(1, NumTableFilesAtLevel(options
.num_levels
- 1));
1382 // overlaps with L5 file but not memtable or L0 file, so flush is skipped and
1383 // file is ingested into L4
1384 ASSERT_OK(GenerateAndAddExternalFile(
1385 options
, {}, {}, {{5, 15}}, file_id
++, write_global_seqno
,
1386 verify_checksums_before_ingest
, &true_data
));
1387 ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), ++last_seqno
);
1388 ASSERT_EQ(2, NumTableFilesAtLevel(0));
1389 ASSERT_EQ(1, NumTableFilesAtLevel(kNumLevels
- 2));
1390 ASSERT_EQ(1, NumTableFilesAtLevel(options
.num_levels
- 2));
1391 ASSERT_EQ(1, NumTableFilesAtLevel(options
.num_levels
- 1));
1393 // ingested file overlaps with memtable, so flush is triggered before the file
1394 // is ingested such that the ingested data is considered newest. So L0 file
1395 // count increases by two.
1396 ASSERT_OK(GenerateAndAddExternalFile(
1397 options
, {100, 140}, {ValueType::kTypeValue
, ValueType::kTypeValue
},
1398 file_id
++, write_global_seqno
, verify_checksums_before_ingest
,
1400 ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), ++last_seqno
);
1401 ASSERT_EQ(4, NumTableFilesAtLevel(0));
1402 ASSERT_EQ(1, NumTableFilesAtLevel(kNumLevels
- 2));
1403 ASSERT_EQ(1, NumTableFilesAtLevel(options
.num_levels
- 1));
1405 // snapshot unneeded now that all range deletions are persisted
1406 db_
->ReleaseSnapshot(snapshot
);
1408 // overlaps with nothing, so places at bottom level and skips incrementing
1410 ASSERT_OK(GenerateAndAddExternalFile(
1411 options
, {151, 175}, {ValueType::kTypeValue
, ValueType::kTypeValue
},
1412 {{160, 200}}, file_id
++, write_global_seqno
,
1413 verify_checksums_before_ingest
, &true_data
));
1414 ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), last_seqno
);
1415 ASSERT_EQ(4, NumTableFilesAtLevel(0));
1416 ASSERT_EQ(1, NumTableFilesAtLevel(kNumLevels
- 2));
1417 ASSERT_EQ(2, NumTableFilesAtLevel(options
.num_levels
- 1));
1420 TEST_F(ExternalSSTFileBasicTest
, AdjacentRangeDeletionTombstones
) {
1421 Options options
= CurrentOptions();
1422 SstFileWriter
sst_file_writer(EnvOptions(), options
);
1424 // file8.sst (delete 300 => 400)
1425 std::string file8
= sst_files_dir_
+ "file8.sst";
1426 ASSERT_OK(sst_file_writer
.Open(file8
));
1427 ASSERT_OK(sst_file_writer
.DeleteRange(Key(300), Key(400)));
1428 ExternalSstFileInfo file8_info
;
1429 Status s
= sst_file_writer
.Finish(&file8_info
);
1430 ASSERT_OK(s
) << s
.ToString();
1431 ASSERT_EQ(file8_info
.file_path
, file8
);
1432 ASSERT_EQ(file8_info
.num_entries
, 0);
1433 ASSERT_EQ(file8_info
.smallest_key
, "");
1434 ASSERT_EQ(file8_info
.largest_key
, "");
1435 ASSERT_EQ(file8_info
.num_range_del_entries
, 1);
1436 ASSERT_EQ(file8_info
.smallest_range_del_key
, Key(300));
1437 ASSERT_EQ(file8_info
.largest_range_del_key
, Key(400));
1439 // file9.sst (delete 400 => 500)
1440 std::string file9
= sst_files_dir_
+ "file9.sst";
1441 ASSERT_OK(sst_file_writer
.Open(file9
));
1442 ASSERT_OK(sst_file_writer
.DeleteRange(Key(400), Key(500)));
1443 ExternalSstFileInfo file9_info
;
1444 s
= sst_file_writer
.Finish(&file9_info
);
1445 ASSERT_OK(s
) << s
.ToString();
1446 ASSERT_EQ(file9_info
.file_path
, file9
);
1447 ASSERT_EQ(file9_info
.num_entries
, 0);
1448 ASSERT_EQ(file9_info
.smallest_key
, "");
1449 ASSERT_EQ(file9_info
.largest_key
, "");
1450 ASSERT_EQ(file9_info
.num_range_del_entries
, 1);
1451 ASSERT_EQ(file9_info
.smallest_range_del_key
, Key(400));
1452 ASSERT_EQ(file9_info
.largest_range_del_key
, Key(500));
1454 // Range deletion tombstones are exclusive on their end key, so these SSTs
1455 // should not be considered as overlapping.
1456 s
= DeprecatedAddFile({file8
, file9
});
1457 ASSERT_OK(s
) << s
.ToString();
1458 ASSERT_EQ(db_
->GetLatestSequenceNumber(), 0U);
1459 DestroyAndRecreateExternalSSTFilesDir();
1462 TEST_P(ExternalSSTFileBasicTest
, IngestFileWithBadBlockChecksum
) {
1463 bool change_checksum_called
= false;
1464 const auto& change_checksum
= [&](void* arg
) {
1465 if (!change_checksum_called
) {
1466 char* buf
= reinterpret_cast<char*>(arg
);
1467 assert(nullptr != buf
);
1469 change_checksum_called
= true;
1472 SyncPoint::GetInstance()->DisableProcessing();
1473 SyncPoint::GetInstance()->ClearAllCallBacks();
1474 SyncPoint::GetInstance()->SetCallBack(
1475 "BlockBasedTableBuilder::WriteMaybeCompressedBlock:TamperWithChecksum",
1477 SyncPoint::GetInstance()->EnableProcessing();
1479 bool write_global_seqno
= std::get
<0>(GetParam());
1480 bool verify_checksums_before_ingest
= std::get
<1>(GetParam());
1482 Options options
= CurrentOptions();
1483 DestroyAndReopen(options
);
1484 std::map
<std::string
, std::string
> true_data
;
1485 Status s
= GenerateAndAddExternalFile(
1486 options
, {1, 2, 3, 4, 5, 6}, ValueType::kTypeValue
, file_id
++,
1487 write_global_seqno
, verify_checksums_before_ingest
, &true_data
);
1488 if (verify_checksums_before_ingest
) {
1493 change_checksum_called
= false;
1494 } while (ChangeOptionsForFileIngestionTest());
1497 TEST_P(ExternalSSTFileBasicTest
, IngestFileWithFirstByteTampered
) {
1498 if (!random_rwfile_supported_
) {
1499 ROCKSDB_GTEST_SKIP("Test requires NewRandomRWFile support");
1502 SyncPoint::GetInstance()->DisableProcessing();
1504 EnvOptions env_options
;
1506 Options options
= CurrentOptions();
1507 std::string file_path
= sst_files_dir_
+ std::to_string(file_id
++);
1508 SstFileWriter
sst_file_writer(env_options
, options
);
1509 Status s
= sst_file_writer
.Open(file_path
);
1511 for (int i
= 0; i
!= 100; ++i
) {
1512 std::string key
= Key(i
);
1513 std::string value
= Key(i
) + std::to_string(0);
1514 ASSERT_OK(sst_file_writer
.Put(key
, value
));
1516 ASSERT_OK(sst_file_writer
.Finish());
1519 uint64_t file_size
= 0;
1520 ASSERT_OK(env_
->GetFileSize(file_path
, &file_size
));
1521 ASSERT_GT(file_size
, 8);
1522 std::unique_ptr
<RandomRWFile
> rwfile
;
1523 ASSERT_OK(env_
->NewRandomRWFile(file_path
, &rwfile
, EnvOptions()));
1524 // Manually corrupt the file
1525 // We deterministically corrupt the first byte because we currently
1526 // cannot choose a random offset. The reason for this limitation is that
1527 // we do not checksum property block at present.
1528 const uint64_t offset
= 0;
1529 char scratch
[8] = {0};
1531 ASSERT_OK(rwfile
->Read(offset
, sizeof(scratch
), &buf
, scratch
));
1532 scratch
[0] ^= 0xff; // flip one bit
1533 ASSERT_OK(rwfile
->Write(offset
, buf
));
1536 IngestExternalFileOptions ifo
;
1537 ifo
.write_global_seqno
= std::get
<0>(GetParam());
1538 ifo
.verify_checksums_before_ingest
= std::get
<1>(GetParam());
1539 s
= db_
->IngestExternalFile({file_path
}, ifo
);
1540 if (ifo
.verify_checksums_before_ingest
) {
1545 } while (ChangeOptionsForFileIngestionTest());
1548 TEST_P(ExternalSSTFileBasicTest
, IngestExternalFileWithCorruptedPropsBlock
) {
1549 bool verify_checksums_before_ingest
= std::get
<1>(GetParam());
1550 if (!verify_checksums_before_ingest
) {
1551 ROCKSDB_GTEST_BYPASS("Bypassing test when !verify_checksums_before_ingest");
1554 if (!random_rwfile_supported_
) {
1555 ROCKSDB_GTEST_SKIP("Test requires NewRandomRWFile support");
1558 uint64_t props_block_offset
= 0;
1559 size_t props_block_size
= 0;
1560 const auto& get_props_block_offset
= [&](void* arg
) {
1561 props_block_offset
= *reinterpret_cast<uint64_t*>(arg
);
1563 const auto& get_props_block_size
= [&](void* arg
) {
1564 props_block_size
= *reinterpret_cast<uint64_t*>(arg
);
1566 SyncPoint::GetInstance()->DisableProcessing();
1567 SyncPoint::GetInstance()->ClearAllCallBacks();
1568 SyncPoint::GetInstance()->SetCallBack(
1569 "BlockBasedTableBuilder::WritePropertiesBlock:GetPropsBlockOffset",
1570 get_props_block_offset
);
1571 SyncPoint::GetInstance()->SetCallBack(
1572 "BlockBasedTableBuilder::WritePropertiesBlock:GetPropsBlockSize",
1573 get_props_block_size
);
1574 SyncPoint::GetInstance()->EnableProcessing();
1576 Random64
rand(time(nullptr));
1578 std::string file_path
= sst_files_dir_
+ std::to_string(file_id
++);
1579 Options options
= CurrentOptions();
1580 SstFileWriter
sst_file_writer(EnvOptions(), options
);
1581 Status s
= sst_file_writer
.Open(file_path
);
1583 for (int i
= 0; i
!= 100; ++i
) {
1584 std::string key
= Key(i
);
1585 std::string value
= Key(i
) + std::to_string(0);
1586 ASSERT_OK(sst_file_writer
.Put(key
, value
));
1588 ASSERT_OK(sst_file_writer
.Finish());
1591 std::unique_ptr
<RandomRWFile
> rwfile
;
1592 ASSERT_OK(env_
->NewRandomRWFile(file_path
, &rwfile
, EnvOptions()));
1593 // Manually corrupt the file
1594 ASSERT_GT(props_block_size
, 8);
1596 props_block_offset
+ rand
.Next() % (props_block_size
- 8);
1597 char scratch
[8] = {0};
1599 ASSERT_OK(rwfile
->Read(offset
, sizeof(scratch
), &buf
, scratch
));
1600 scratch
[0] ^= 0xff; // flip one bit
1601 ASSERT_OK(rwfile
->Write(offset
, buf
));
1605 IngestExternalFileOptions ifo
;
1606 ifo
.write_global_seqno
= std::get
<0>(GetParam());
1607 ifo
.verify_checksums_before_ingest
= true;
1608 s
= db_
->IngestExternalFile({file_path
}, ifo
);
1610 } while (ChangeOptionsForFileIngestionTest());
1613 TEST_F(ExternalSSTFileBasicTest
, OverlappingFiles
) {
1614 Options options
= CurrentOptions();
1616 std::vector
<std::string
> files
;
1618 SstFileWriter
sst_file_writer(EnvOptions(), options
);
1619 std::string file1
= sst_files_dir_
+ "file1.sst";
1620 ASSERT_OK(sst_file_writer
.Open(file1
));
1621 ASSERT_OK(sst_file_writer
.Put("a", "z"));
1622 ASSERT_OK(sst_file_writer
.Put("i", "m"));
1623 ExternalSstFileInfo file1_info
;
1624 ASSERT_OK(sst_file_writer
.Finish(&file1_info
));
1625 files
.push_back(std::move(file1
));
1628 SstFileWriter
sst_file_writer(EnvOptions(), options
);
1629 std::string file2
= sst_files_dir_
+ "file2.sst";
1630 ASSERT_OK(sst_file_writer
.Open(file2
));
1631 ASSERT_OK(sst_file_writer
.Put("i", "k"));
1632 ExternalSstFileInfo file2_info
;
1633 ASSERT_OK(sst_file_writer
.Finish(&file2_info
));
1634 files
.push_back(std::move(file2
));
1637 IngestExternalFileOptions ifo
;
1638 ASSERT_OK(db_
->IngestExternalFile(files
, ifo
));
1639 ASSERT_EQ(Get("a"), "z");
1640 ASSERT_EQ(Get("i"), "k");
1643 Iterator
* iter
= db_
->NewIterator(ReadOptions());
1644 for (iter
->SeekToFirst(); iter
->Valid(); iter
->Next()) {
1645 ASSERT_OK(iter
->status());
1649 ASSERT_EQ(total_keys
, 2);
1651 ASSERT_EQ(2, NumTableFilesAtLevel(0));
1654 TEST_F(ExternalSSTFileBasicTest
, IngestFileAfterDBPut
) {
1655 // Repro https://github.com/facebook/rocksdb/issues/6245.
1656 // Flush three files to L0. Ingest one more file to trigger L0->L1 compaction
1657 // via trivial move. The bug happened when L1 files were incorrectly sorted
1658 // resulting in an old value for "k" returned by `Get()`.
1659 Options options
= CurrentOptions();
1661 ASSERT_OK(Put("k", "a"));
1663 ASSERT_OK(Put("k", "a"));
1665 ASSERT_OK(Put("k", "a"));
1667 SstFileWriter
sst_file_writer(EnvOptions(), options
);
1669 // Current file size should be 0 after sst_file_writer init and before open a
1671 ASSERT_EQ(sst_file_writer
.FileSize(), 0);
1673 std::string file1
= sst_files_dir_
+ "file1.sst";
1674 ASSERT_OK(sst_file_writer
.Open(file1
));
1675 ASSERT_OK(sst_file_writer
.Put("k", "b"));
1677 ExternalSstFileInfo file1_info
;
1678 Status s
= sst_file_writer
.Finish(&file1_info
);
1679 ASSERT_OK(s
) << s
.ToString();
1681 // Current file size should be non-zero after success write.
1682 ASSERT_GT(sst_file_writer
.FileSize(), 0);
1684 IngestExternalFileOptions ifo
;
1685 s
= db_
->IngestExternalFile({file1
}, ifo
);
1687 ASSERT_OK(dbfull()->TEST_WaitForCompact());
1689 ASSERT_EQ(Get("k"), "b");
1692 TEST_F(ExternalSSTFileBasicTest
, IngestWithTemperature
) {
1693 Options options
= CurrentOptions();
1694 const ImmutableCFOptions
ioptions(options
);
1695 options
.bottommost_temperature
= Temperature::kWarm
;
1696 SstFileWriter
sst_file_writer(EnvOptions(), options
);
1697 options
.level0_file_num_compaction_trigger
= 2;
1700 auto size
= GetSstSizeHelper(Temperature::kUnknown
);
1702 size
= GetSstSizeHelper(Temperature::kWarm
);
1704 size
= GetSstSizeHelper(Temperature::kHot
);
1707 // create file01.sst (1000 => 1099) and ingest it
1708 std::string file1
= sst_files_dir_
+ "file01.sst";
1709 ASSERT_OK(sst_file_writer
.Open(file1
));
1710 for (int k
= 1000; k
< 1100; k
++) {
1711 ASSERT_OK(sst_file_writer
.Put(Key(k
), Key(k
) + "_val"));
1713 ExternalSstFileInfo file1_info
;
1714 Status s
= sst_file_writer
.Finish(&file1_info
);
1716 ASSERT_EQ(file1_info
.file_path
, file1
);
1717 ASSERT_EQ(file1_info
.num_entries
, 100);
1718 ASSERT_EQ(file1_info
.smallest_key
, Key(1000));
1719 ASSERT_EQ(file1_info
.largest_key
, Key(1099));
1721 std::vector
<std::string
> files
;
1722 std::vector
<std::string
> files_checksums
;
1723 std::vector
<std::string
> files_checksum_func_names
;
1724 Temperature file_temperature
= Temperature::kWarm
;
1726 files
.push_back(file1
);
1727 IngestExternalFileOptions in_opts
;
1728 in_opts
.move_files
= false;
1729 in_opts
.snapshot_consistency
= true;
1730 in_opts
.allow_global_seqno
= false;
1731 in_opts
.allow_blocking_flush
= false;
1732 in_opts
.write_global_seqno
= true;
1733 in_opts
.verify_file_checksum
= false;
1734 IngestExternalFileArg arg
;
1735 arg
.column_family
= db_
->DefaultColumnFamily();
1736 arg
.external_files
= files
;
1737 arg
.options
= in_opts
;
1738 arg
.files_checksums
= files_checksums
;
1739 arg
.files_checksum_func_names
= files_checksum_func_names
;
1740 arg
.file_temperature
= file_temperature
;
1741 s
= db_
->IngestExternalFiles({arg
});
1744 // check the temperature of the file being ingested
1745 ColumnFamilyMetaData metadata
;
1746 db_
->GetColumnFamilyMetaData(&metadata
);
1747 ASSERT_EQ(1, metadata
.file_count
);
1748 ASSERT_EQ(Temperature::kWarm
, metadata
.levels
[6].files
[0].temperature
);
1749 size
= GetSstSizeHelper(Temperature::kUnknown
);
1751 size
= GetSstSizeHelper(Temperature::kWarm
);
1754 // non-bottommost file still has unknown temperature
1755 ASSERT_OK(Put("foo", "bar"));
1756 ASSERT_OK(Put("bar", "bar"));
1758 db_
->GetColumnFamilyMetaData(&metadata
);
1759 ASSERT_EQ(2, metadata
.file_count
);
1760 ASSERT_EQ(Temperature::kUnknown
, metadata
.levels
[0].files
[0].temperature
);
1761 size
= GetSstSizeHelper(Temperature::kUnknown
);
1763 size
= GetSstSizeHelper(Temperature::kWarm
);
1766 // reopen and check the information is persisted
1768 db_
->GetColumnFamilyMetaData(&metadata
);
1769 ASSERT_EQ(2, metadata
.file_count
);
1770 ASSERT_EQ(Temperature::kUnknown
, metadata
.levels
[0].files
[0].temperature
);
1771 ASSERT_EQ(Temperature::kWarm
, metadata
.levels
[6].files
[0].temperature
);
1772 size
= GetSstSizeHelper(Temperature::kUnknown
);
1774 size
= GetSstSizeHelper(Temperature::kWarm
);
1777 // check other non-exist temperatures
1778 size
= GetSstSizeHelper(Temperature::kHot
);
1780 size
= GetSstSizeHelper(Temperature::kCold
);
1783 ASSERT_TRUE(dbfull()->GetProperty(
1784 DB::Properties::kLiveSstFilesSizeAtTemperature
+ std::to_string(22),
1786 ASSERT_EQ(std::atoi(prop
.c_str()), 0);
1789 TEST_F(ExternalSSTFileBasicTest
, FailIfNotBottommostLevel
) {
1790 Options options
= GetDefaultOptions();
1792 std::string file_path
= sst_files_dir_
+ std::to_string(1);
1793 SstFileWriter
sfw(EnvOptions(), options
);
1795 ASSERT_OK(sfw
.Open(file_path
));
1796 ASSERT_OK(sfw
.Put("b", "dontcare"));
1797 ASSERT_OK(sfw
.Finish());
1799 // Test universal compaction + ingest with snapshot consistency
1800 options
.create_if_missing
= true;
1801 options
.compaction_style
= CompactionStyle::kCompactionStyleUniversal
;
1802 DestroyAndReopen(options
);
1804 const Snapshot
* snapshot
= db_
->GetSnapshot();
1805 ManagedSnapshot
snapshot_guard(db_
, snapshot
);
1806 IngestExternalFileOptions ifo
;
1807 ifo
.fail_if_not_bottommost_level
= true;
1808 ifo
.snapshot_consistency
= true;
1809 const Status s
= db_
->IngestExternalFile({file_path
}, ifo
);
1810 ASSERT_TRUE(s
.IsTryAgain());
1813 // Test level compaction
1814 options
.compaction_style
= CompactionStyle::kCompactionStyleLevel
;
1815 options
.num_levels
= 2;
1816 DestroyAndReopen(options
);
1817 ASSERT_OK(db_
->Put(WriteOptions(), "a", "dontcare"));
1818 ASSERT_OK(db_
->Put(WriteOptions(), "c", "dontcare"));
1819 ASSERT_OK(db_
->Flush(FlushOptions()));
1821 ASSERT_OK(db_
->Put(WriteOptions(), "b", "dontcare"));
1822 ASSERT_OK(db_
->Put(WriteOptions(), "d", "dontcare"));
1823 ASSERT_OK(db_
->Flush(FlushOptions()));
1826 CompactRangeOptions cro
;
1827 cro
.bottommost_level_compaction
= BottommostLevelCompaction::kForce
;
1828 ASSERT_OK(db_
->CompactRange(cro
, nullptr, nullptr));
1830 IngestExternalFileOptions ifo
;
1831 ifo
.fail_if_not_bottommost_level
= true;
1832 const Status s
= db_
->IngestExternalFile({file_path
}, ifo
);
1833 ASSERT_TRUE(s
.IsTryAgain());
1837 TEST_F(ExternalSSTFileBasicTest
, VerifyChecksum
) {
1838 const std::string kPutVal
= "put_val";
1839 const std::string kIngestedVal
= "ingested_val";
1841 ASSERT_OK(Put("k", kPutVal
, WriteOptions()));
1844 std::string external_file
= sst_files_dir_
+ "/file_to_ingest.sst";
1846 SstFileWriter sst_file_writer
{EnvOptions(), CurrentOptions()};
1848 ASSERT_OK(sst_file_writer
.Open(external_file
));
1849 ASSERT_OK(sst_file_writer
.Put("k", kIngestedVal
));
1850 ASSERT_OK(sst_file_writer
.Finish());
1853 ASSERT_OK(db_
->IngestExternalFile(db_
->DefaultColumnFamily(), {external_file
},
1854 IngestExternalFileOptions()));
1856 ASSERT_OK(db_
->VerifyChecksum());
1859 TEST_F(ExternalSSTFileBasicTest
, VerifySstUniqueId
) {
1860 const std::string kPutVal
= "put_val";
1861 const std::string kIngestedVal
= "ingested_val";
1863 ASSERT_OK(Put("k", kPutVal
, WriteOptions()));
1866 std::string external_file
= sst_files_dir_
+ "/file_to_ingest.sst";
1868 SstFileWriter sst_file_writer
{EnvOptions(), CurrentOptions()};
1870 ASSERT_OK(sst_file_writer
.Open(external_file
));
1871 ASSERT_OK(sst_file_writer
.Put("k", kIngestedVal
));
1872 ASSERT_OK(sst_file_writer
.Finish());
1875 ASSERT_OK(db_
->IngestExternalFile(db_
->DefaultColumnFamily(), {external_file
},
1876 IngestExternalFileOptions()));
1878 // Test ingest file without session_id and db_id (for example generated by an
1879 // older version of sst_writer)
1880 SyncPoint::GetInstance()->SetCallBack(
1881 "PropertyBlockBuilder::AddTableProperty:Start", [&](void* props_vs
) {
1882 auto props
= static_cast<TableProperties
*>(props_vs
);
1883 // update table property session_id to a different one
1884 props
->db_session_id
= "";
1887 std::atomic_int skipped
= 0, passed
= 0;
1888 SyncPoint::GetInstance()->SetCallBack(
1889 "BlockBasedTable::Open::SkippedVerifyUniqueId",
1890 [&](void* /*arg*/) { skipped
++; });
1891 SyncPoint::GetInstance()->SetCallBack(
1892 "BlockBasedTable::Open::PassedVerifyUniqueId",
1893 [&](void* /*arg*/) { passed
++; });
1894 SyncPoint::GetInstance()->EnableProcessing();
1896 auto options
= CurrentOptions();
1897 ASSERT_TRUE(options
.verify_sst_unique_id_in_manifest
);
1899 ASSERT_EQ(skipped
, 0);
1900 ASSERT_EQ(passed
, 2); // one flushed + one ingested
1902 external_file
= sst_files_dir_
+ "/file_to_ingest2.sst";
1904 SstFileWriter sst_file_writer
{EnvOptions(), CurrentOptions()};
1906 ASSERT_OK(sst_file_writer
.Open(external_file
));
1907 ASSERT_OK(sst_file_writer
.Put("k", kIngestedVal
));
1908 ASSERT_OK(sst_file_writer
.Finish());
1911 ASSERT_OK(db_
->IngestExternalFile(db_
->DefaultColumnFamily(), {external_file
},
1912 IngestExternalFileOptions()));
1914 // Two table file opens skipping verification:
1915 // * ExternalSstFileIngestionJob::GetIngestedFileInfo
1916 // * TableCache::GetTableReader
1917 ASSERT_EQ(skipped
, 2);
1918 ASSERT_EQ(passed
, 2);
1920 // Check same after re-open (except no GetIngestedFileInfo)
1924 ASSERT_EQ(skipped
, 1);
1925 ASSERT_EQ(passed
, 2);
1928 TEST_F(ExternalSSTFileBasicTest
, StableSnapshotWhileLoggingToManifest
) {
1929 const std::string kPutVal
= "put_val";
1930 const std::string kIngestedVal
= "ingested_val";
1932 ASSERT_OK(Put("k", kPutVal
, WriteOptions()));
1935 std::string external_file
= sst_files_dir_
+ "/file_to_ingest.sst";
1937 SstFileWriter sst_file_writer
{EnvOptions(), CurrentOptions()};
1938 ASSERT_OK(sst_file_writer
.Open(external_file
));
1939 ASSERT_OK(sst_file_writer
.Put("k", kIngestedVal
));
1940 ASSERT_OK(sst_file_writer
.Finish());
1943 const Snapshot
* snapshot
= nullptr;
1944 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
1945 "VersionSet::LogAndApply:WriteManifest", [&](void* /* arg */) {
1946 // prevent background compaction job to call this callback
1947 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
1948 snapshot
= db_
->GetSnapshot();
1949 ReadOptions read_opts
;
1950 read_opts
.snapshot
= snapshot
;
1952 ASSERT_OK(db_
->Get(read_opts
, "k", &value
));
1953 ASSERT_EQ(kPutVal
, value
);
1955 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
1957 ASSERT_OK(db_
->IngestExternalFile(db_
->DefaultColumnFamily(), {external_file
},
1958 IngestExternalFileOptions()));
1959 auto ingested_file_seqno
= db_
->GetLatestSequenceNumber();
1960 ASSERT_NE(nullptr, snapshot
);
1961 // snapshot is taken before SST ingestion is done
1962 ASSERT_EQ(ingested_file_seqno
, snapshot
->GetSequenceNumber() + 1);
1964 ReadOptions read_opts
;
1965 read_opts
.snapshot
= snapshot
;
1967 ASSERT_OK(db_
->Get(read_opts
, "k", &value
));
1968 ASSERT_EQ(kPutVal
, value
);
1969 db_
->ReleaseSnapshot(snapshot
);
1971 // After reopen, sequence number should be up current such that
1972 // ingested value is read
1973 Reopen(CurrentOptions());
1974 ASSERT_OK(db_
->Get(ReadOptions(), "k", &value
));
1975 ASSERT_EQ(kIngestedVal
, value
);
1977 // New write should get higher seqno compared to ingested file
1978 ASSERT_OK(Put("k", kPutVal
, WriteOptions()));
1979 ASSERT_EQ(db_
->GetLatestSequenceNumber(), ingested_file_seqno
+ 1);
1982 INSTANTIATE_TEST_CASE_P(ExternalSSTFileBasicTest
, ExternalSSTFileBasicTest
,
1983 testing::Values(std::make_tuple(true, true),
1984 std::make_tuple(true, false),
1985 std::make_tuple(false, true),
1986 std::make_tuple(false, false)));
1988 #endif // ROCKSDB_LITE
1990 } // namespace ROCKSDB_NAMESPACE
1992 int main(int argc
, char** argv
) {
1993 ROCKSDB_NAMESPACE::port::InstallStackTraceHandler();
1994 ::testing::InitGoogleTest(&argc
, argv
);
1995 RegisterCustomObjects(argc
, argv
);
1996 return RUN_ALL_TESTS();