]>
Commit | Line | Data |
---|---|---|
11fdf7f2 TL |
1 | // Copyright (c) 2011-present, Facebook, Inc. All rights reserved. |
2 | // This source code is licensed under both the GPLv2 (found in the | |
3 | // COPYING file in the root directory) and Apache 2.0 License | |
4 | // (found in the LICENSE.Apache file in the root directory). | |
5 | ||
6 | #include <cstdlib> | |
7 | #include <string> | |
8 | #include <unordered_map> | |
9 | ||
494da23a | 10 | #include "db/table_properties_collector.h" |
11fdf7f2 TL |
11 | #include "rocksdb/slice.h" |
12 | #include "table/block.h" | |
13 | #include "table/block_based_table_reader.h" | |
14 | #include "table/block_builder.h" | |
15 | #include "table/data_block_hash_index.h" | |
16 | #include "table/get_context.h" | |
494da23a | 17 | #include "table/table_builder.h" |
11fdf7f2 TL |
18 | #include "util/testharness.h" |
19 | #include "util/testutil.h" | |
20 | ||
21 | namespace rocksdb { | |
22 | ||
23 | bool SearchForOffset(DataBlockHashIndex& index, const char* data, | |
24 | uint16_t map_offset, const Slice& key, | |
25 | uint8_t& restart_point) { | |
26 | uint8_t entry = index.Lookup(data, map_offset, key); | |
27 | if (entry == kCollision) { | |
28 | return true; | |
29 | } | |
30 | ||
31 | if (entry == kNoEntry) { | |
32 | return false; | |
33 | } | |
34 | ||
35 | return entry == restart_point; | |
36 | } | |
37 | ||
38 | // Random KV generator similer to block_test | |
39 | static std::string RandomString(Random* rnd, int len) { | |
40 | std::string r; | |
41 | test::RandomString(rnd, len, &r); | |
42 | return r; | |
43 | } | |
44 | std::string GenerateKey(int primary_key, int secondary_key, int padding_size, | |
45 | Random* rnd) { | |
46 | char buf[50]; | |
47 | char* p = &buf[0]; | |
48 | snprintf(buf, sizeof(buf), "%6d%4d", primary_key, secondary_key); | |
49 | std::string k(p); | |
50 | if (padding_size) { | |
51 | k += RandomString(rnd, padding_size); | |
52 | } | |
53 | ||
54 | return k; | |
55 | } | |
56 | ||
57 | // Generate random key value pairs. | |
58 | // The generated key will be sorted. You can tune the parameters to generated | |
59 | // different kinds of test key/value pairs for different scenario. | |
60 | void GenerateRandomKVs(std::vector<std::string>* keys, | |
61 | std::vector<std::string>* values, const int from, | |
62 | const int len, const int step = 1, | |
63 | const int padding_size = 0, | |
64 | const int keys_share_prefix = 1) { | |
65 | Random rnd(302); | |
66 | ||
67 | // generate different prefix | |
68 | for (int i = from; i < from + len; i += step) { | |
69 | // generating keys that shares the prefix | |
70 | for (int j = 0; j < keys_share_prefix; ++j) { | |
71 | keys->emplace_back(GenerateKey(i, j, padding_size, &rnd)); | |
72 | ||
73 | // 100 bytes values | |
74 | values->emplace_back(RandomString(&rnd, 100)); | |
75 | } | |
76 | } | |
77 | } | |
78 | ||
79 | TEST(DataBlockHashIndex, DataBlockHashTestSmall) { | |
80 | DataBlockHashIndexBuilder builder; | |
81 | builder.Initialize(0.75 /*util_ratio*/); | |
82 | for (int j = 0; j < 5; j++) { | |
83 | for (uint8_t i = 0; i < 2 + j; i++) { | |
84 | std::string key("key" + std::to_string(i)); | |
85 | uint8_t restart_point = i; | |
86 | builder.Add(key, restart_point); | |
87 | } | |
88 | ||
89 | size_t estimated_size = builder.EstimateSize(); | |
90 | ||
91 | std::string buffer("fake"), buffer2; | |
92 | size_t original_size = buffer.size(); | |
93 | estimated_size += original_size; | |
94 | builder.Finish(buffer); | |
95 | ||
96 | ASSERT_EQ(buffer.size(), estimated_size); | |
97 | ||
98 | buffer2 = buffer; // test for the correctness of relative offset | |
99 | ||
100 | Slice s(buffer2); | |
101 | DataBlockHashIndex index; | |
102 | uint16_t map_offset; | |
103 | index.Initialize(s.data(), static_cast<uint16_t>(s.size()), &map_offset); | |
104 | ||
105 | // the additional hash map should start at the end of the buffer | |
106 | ASSERT_EQ(original_size, map_offset); | |
107 | for (uint8_t i = 0; i < 2; i++) { | |
108 | std::string key("key" + std::to_string(i)); | |
109 | uint8_t restart_point = i; | |
110 | ASSERT_TRUE( | |
111 | SearchForOffset(index, s.data(), map_offset, key, restart_point)); | |
112 | } | |
113 | builder.Reset(); | |
114 | } | |
115 | } | |
116 | ||
117 | TEST(DataBlockHashIndex, DataBlockHashTest) { | |
118 | // bucket_num = 200, #keys = 100. 50% utilization | |
119 | DataBlockHashIndexBuilder builder; | |
120 | builder.Initialize(0.75 /*util_ratio*/); | |
121 | ||
122 | for (uint8_t i = 0; i < 100; i++) { | |
123 | std::string key("key" + std::to_string(i)); | |
124 | uint8_t restart_point = i; | |
125 | builder.Add(key, restart_point); | |
126 | } | |
127 | ||
128 | size_t estimated_size = builder.EstimateSize(); | |
129 | ||
130 | std::string buffer("fake content"), buffer2; | |
131 | size_t original_size = buffer.size(); | |
132 | estimated_size += original_size; | |
133 | builder.Finish(buffer); | |
134 | ||
135 | ASSERT_EQ(buffer.size(), estimated_size); | |
136 | ||
137 | buffer2 = buffer; // test for the correctness of relative offset | |
138 | ||
139 | Slice s(buffer2); | |
140 | DataBlockHashIndex index; | |
141 | uint16_t map_offset; | |
142 | index.Initialize(s.data(), static_cast<uint16_t>(s.size()), &map_offset); | |
143 | ||
144 | // the additional hash map should start at the end of the buffer | |
145 | ASSERT_EQ(original_size, map_offset); | |
146 | for (uint8_t i = 0; i < 100; i++) { | |
147 | std::string key("key" + std::to_string(i)); | |
148 | uint8_t restart_point = i; | |
149 | ASSERT_TRUE( | |
150 | SearchForOffset(index, s.data(), map_offset, key, restart_point)); | |
151 | } | |
152 | } | |
153 | ||
154 | TEST(DataBlockHashIndex, DataBlockHashTestCollision) { | |
155 | // bucket_num = 2. There will be intense hash collisions | |
156 | DataBlockHashIndexBuilder builder; | |
157 | builder.Initialize(0.75 /*util_ratio*/); | |
158 | ||
159 | for (uint8_t i = 0; i < 100; i++) { | |
160 | std::string key("key" + std::to_string(i)); | |
161 | uint8_t restart_point = i; | |
162 | builder.Add(key, restart_point); | |
163 | } | |
164 | ||
165 | size_t estimated_size = builder.EstimateSize(); | |
166 | ||
167 | std::string buffer("some other fake content to take up space"), buffer2; | |
168 | size_t original_size = buffer.size(); | |
169 | estimated_size += original_size; | |
170 | builder.Finish(buffer); | |
171 | ||
172 | ASSERT_EQ(buffer.size(), estimated_size); | |
173 | ||
174 | buffer2 = buffer; // test for the correctness of relative offset | |
175 | ||
176 | Slice s(buffer2); | |
177 | DataBlockHashIndex index; | |
178 | uint16_t map_offset; | |
179 | index.Initialize(s.data(), static_cast<uint16_t>(s.size()), &map_offset); | |
180 | ||
181 | // the additional hash map should start at the end of the buffer | |
182 | ASSERT_EQ(original_size, map_offset); | |
183 | for (uint8_t i = 0; i < 100; i++) { | |
184 | std::string key("key" + std::to_string(i)); | |
185 | uint8_t restart_point = i; | |
186 | ASSERT_TRUE( | |
187 | SearchForOffset(index, s.data(), map_offset, key, restart_point)); | |
188 | } | |
189 | } | |
190 | ||
191 | TEST(DataBlockHashIndex, DataBlockHashTestLarge) { | |
192 | DataBlockHashIndexBuilder builder; | |
193 | builder.Initialize(0.75 /*util_ratio*/); | |
194 | std::unordered_map<std::string, uint8_t> m; | |
195 | ||
196 | for (uint8_t i = 0; i < 100; i++) { | |
197 | if (i % 2) { | |
198 | continue; // leave half of the keys out | |
199 | } | |
200 | std::string key = "key" + std::to_string(i); | |
201 | uint8_t restart_point = i; | |
202 | builder.Add(key, restart_point); | |
203 | m[key] = restart_point; | |
204 | } | |
205 | ||
206 | size_t estimated_size = builder.EstimateSize(); | |
207 | ||
208 | std::string buffer("filling stuff"), buffer2; | |
209 | size_t original_size = buffer.size(); | |
210 | estimated_size += original_size; | |
211 | builder.Finish(buffer); | |
212 | ||
213 | ASSERT_EQ(buffer.size(), estimated_size); | |
214 | ||
215 | buffer2 = buffer; // test for the correctness of relative offset | |
216 | ||
217 | Slice s(buffer2); | |
218 | DataBlockHashIndex index; | |
219 | uint16_t map_offset; | |
220 | index.Initialize(s.data(), static_cast<uint16_t>(s.size()), &map_offset); | |
221 | ||
222 | // the additional hash map should start at the end of the buffer | |
223 | ASSERT_EQ(original_size, map_offset); | |
224 | for (uint8_t i = 0; i < 100; i++) { | |
225 | std::string key = "key" + std::to_string(i); | |
226 | uint8_t restart_point = i; | |
227 | if (m.count(key)) { | |
228 | ASSERT_TRUE(m[key] == restart_point); | |
229 | ASSERT_TRUE( | |
230 | SearchForOffset(index, s.data(), map_offset, key, restart_point)); | |
231 | } else { | |
232 | // we allow false positve, so don't test the nonexisting keys. | |
233 | // when false positive happens, the search will continue to the | |
234 | // restart intervals to see if the key really exist. | |
235 | } | |
236 | } | |
237 | } | |
238 | ||
239 | TEST(DataBlockHashIndex, RestartIndexExceedMax) { | |
240 | DataBlockHashIndexBuilder builder; | |
241 | builder.Initialize(0.75 /*util_ratio*/); | |
242 | std::unordered_map<std::string, uint8_t> m; | |
243 | ||
244 | for (uint8_t i = 0; i <= 253; i++) { | |
245 | std::string key = "key" + std::to_string(i); | |
246 | uint8_t restart_point = i; | |
247 | builder.Add(key, restart_point); | |
248 | } | |
249 | ASSERT_TRUE(builder.Valid()); | |
250 | ||
251 | builder.Reset(); | |
252 | ||
253 | for (uint8_t i = 0; i <= 254; i++) { | |
254 | std::string key = "key" + std::to_string(i); | |
255 | uint8_t restart_point = i; | |
256 | builder.Add(key, restart_point); | |
257 | } | |
258 | ||
259 | ASSERT_FALSE(builder.Valid()); | |
260 | ||
261 | builder.Reset(); | |
262 | ASSERT_TRUE(builder.Valid()); | |
263 | } | |
264 | ||
265 | TEST(DataBlockHashIndex, BlockRestartIndexExceedMax) { | |
266 | Options options = Options(); | |
267 | ||
268 | BlockBuilder builder(1 /* block_restart_interval */, | |
269 | true /* use_delta_encoding */, | |
270 | false /* use_value_delta_encoding */, | |
271 | BlockBasedTableOptions::kDataBlockBinaryAndHash); | |
272 | ||
273 | // #restarts <= 253. HashIndex is valid | |
274 | for (int i = 0; i <= 253; i++) { | |
275 | std::string ukey = "key" + std::to_string(i); | |
276 | InternalKey ikey(ukey, 0, kTypeValue); | |
277 | builder.Add(ikey.Encode().ToString(), "value"); | |
278 | } | |
279 | ||
280 | { | |
281 | // read serialized contents of the block | |
282 | Slice rawblock = builder.Finish(); | |
283 | ||
284 | // create block reader | |
285 | BlockContents contents; | |
286 | contents.data = rawblock; | |
11fdf7f2 TL |
287 | Block reader(std::move(contents), kDisableGlobalSequenceNumber); |
288 | ||
289 | ASSERT_EQ(reader.IndexType(), | |
290 | BlockBasedTableOptions::kDataBlockBinaryAndHash); | |
291 | } | |
292 | ||
293 | builder.Reset(); | |
294 | ||
295 | // #restarts > 253. HashIndex is not used | |
296 | for (int i = 0; i <= 254; i++) { | |
297 | std::string ukey = "key" + std::to_string(i); | |
298 | InternalKey ikey(ukey, 0, kTypeValue); | |
299 | builder.Add(ikey.Encode().ToString(), "value"); | |
300 | } | |
301 | ||
302 | { | |
303 | // read serialized contents of the block | |
304 | Slice rawblock = builder.Finish(); | |
305 | ||
306 | // create block reader | |
307 | BlockContents contents; | |
308 | contents.data = rawblock; | |
11fdf7f2 TL |
309 | Block reader(std::move(contents), kDisableGlobalSequenceNumber); |
310 | ||
311 | ASSERT_EQ(reader.IndexType(), | |
312 | BlockBasedTableOptions::kDataBlockBinarySearch); | |
313 | } | |
314 | } | |
315 | ||
316 | TEST(DataBlockHashIndex, BlockSizeExceedMax) { | |
317 | Options options = Options(); | |
318 | std::string ukey(10, 'k'); | |
319 | InternalKey ikey(ukey, 0, kTypeValue); | |
320 | ||
321 | BlockBuilder builder(1 /* block_restart_interval */, | |
322 | false /* use_delta_encoding */, | |
323 | false /* use_value_delta_encoding */, | |
324 | BlockBasedTableOptions::kDataBlockBinaryAndHash); | |
325 | ||
326 | { | |
327 | // insert a large value. The block size plus HashIndex is 65536. | |
328 | std::string value(65502, 'v'); | |
329 | ||
330 | builder.Add(ikey.Encode().ToString(), value); | |
331 | ||
332 | // read serialized contents of the block | |
333 | Slice rawblock = builder.Finish(); | |
334 | ASSERT_LE(rawblock.size(), kMaxBlockSizeSupportedByHashIndex); | |
335 | std::cerr << "block size: " << rawblock.size() << std::endl; | |
336 | ||
337 | // create block reader | |
338 | BlockContents contents; | |
339 | contents.data = rawblock; | |
11fdf7f2 TL |
340 | Block reader(std::move(contents), kDisableGlobalSequenceNumber); |
341 | ||
342 | ASSERT_EQ(reader.IndexType(), | |
343 | BlockBasedTableOptions::kDataBlockBinaryAndHash); | |
344 | } | |
345 | ||
346 | builder.Reset(); | |
347 | ||
348 | { | |
349 | // insert a large value. The block size plus HashIndex would be 65537. | |
350 | // This excceed the max block size supported by HashIndex (65536). | |
351 | // So when build finishes HashIndex will not be created for the block. | |
352 | std::string value(65503, 'v'); | |
353 | ||
354 | builder.Add(ikey.Encode().ToString(), value); | |
355 | ||
356 | // read serialized contents of the block | |
357 | Slice rawblock = builder.Finish(); | |
358 | ASSERT_LE(rawblock.size(), kMaxBlockSizeSupportedByHashIndex); | |
359 | std::cerr << "block size: " << rawblock.size() << std::endl; | |
360 | ||
361 | // create block reader | |
362 | BlockContents contents; | |
363 | contents.data = rawblock; | |
11fdf7f2 TL |
364 | Block reader(std::move(contents), kDisableGlobalSequenceNumber); |
365 | ||
366 | // the index type have fallen back to binary when build finish. | |
367 | ASSERT_EQ(reader.IndexType(), | |
368 | BlockBasedTableOptions::kDataBlockBinarySearch); | |
369 | } | |
370 | } | |
371 | ||
372 | TEST(DataBlockHashIndex, BlockTestSingleKey) { | |
373 | Options options = Options(); | |
374 | ||
375 | BlockBuilder builder(16 /* block_restart_interval */, | |
376 | true /* use_delta_encoding */, | |
377 | false /* use_value_delta_encoding */, | |
378 | BlockBasedTableOptions::kDataBlockBinaryAndHash); | |
379 | ||
380 | std::string ukey("gopher"); | |
381 | std::string value("gold"); | |
382 | InternalKey ikey(ukey, 10, kTypeValue); | |
383 | builder.Add(ikey.Encode().ToString(), value /*value*/); | |
384 | ||
385 | // read serialized contents of the block | |
386 | Slice rawblock = builder.Finish(); | |
387 | ||
388 | // create block reader | |
389 | BlockContents contents; | |
390 | contents.data = rawblock; | |
11fdf7f2 TL |
391 | Block reader(std::move(contents), kDisableGlobalSequenceNumber); |
392 | ||
393 | const InternalKeyComparator icmp(BytewiseComparator()); | |
394 | auto iter = reader.NewIterator<DataBlockIter>(&icmp, icmp.user_comparator()); | |
395 | bool may_exist; | |
396 | // search in block for the key just inserted | |
397 | { | |
398 | InternalKey seek_ikey(ukey, 10, kValueTypeForSeek); | |
399 | may_exist = iter->SeekForGet(seek_ikey.Encode().ToString()); | |
400 | ASSERT_TRUE(may_exist); | |
401 | ASSERT_TRUE(iter->Valid()); | |
402 | ASSERT_EQ( | |
403 | options.comparator->Compare(iter->key(), ikey.Encode().ToString()), 0); | |
404 | ASSERT_EQ(iter->value(), value); | |
405 | } | |
406 | ||
407 | // search in block for the existing ukey, but with higher seqno | |
408 | { | |
409 | InternalKey seek_ikey(ukey, 20, kValueTypeForSeek); | |
410 | ||
411 | // HashIndex should be able to set the iter correctly | |
412 | may_exist = iter->SeekForGet(seek_ikey.Encode().ToString()); | |
413 | ASSERT_TRUE(may_exist); | |
414 | ASSERT_TRUE(iter->Valid()); | |
415 | ||
416 | // user key should match | |
417 | ASSERT_EQ(options.comparator->Compare(ExtractUserKey(iter->key()), ukey), | |
418 | 0); | |
419 | ||
420 | // seek_key seqno number should be greater than that of iter result | |
421 | ASSERT_GT(GetInternalKeySeqno(seek_ikey.Encode()), | |
422 | GetInternalKeySeqno(iter->key())); | |
423 | ||
424 | ASSERT_EQ(iter->value(), value); | |
425 | } | |
426 | ||
427 | // Search in block for the existing ukey, but with lower seqno | |
428 | // in this case, hash can find the only occurrence of the user_key, but | |
429 | // ParseNextDataKey() will skip it as it does not have a older seqno. | |
430 | // In this case, GetForSeek() is effective to locate the user_key, and | |
431 | // iter->Valid() == false indicates that we've reached to the end of | |
432 | // the block and the caller should continue searching the next block. | |
433 | { | |
434 | InternalKey seek_ikey(ukey, 5, kValueTypeForSeek); | |
435 | may_exist = iter->SeekForGet(seek_ikey.Encode().ToString()); | |
436 | ASSERT_TRUE(may_exist); | |
437 | ASSERT_FALSE(iter->Valid()); // should have reached to the end of block | |
438 | } | |
439 | ||
440 | delete iter; | |
441 | } | |
442 | ||
443 | TEST(DataBlockHashIndex, BlockTestLarge) { | |
444 | Random rnd(1019); | |
445 | Options options = Options(); | |
446 | std::vector<std::string> keys; | |
447 | std::vector<std::string> values; | |
448 | ||
449 | BlockBuilder builder(16 /* block_restart_interval */, | |
450 | true /* use_delta_encoding */, | |
451 | false /* use_value_delta_encoding */, | |
452 | BlockBasedTableOptions::kDataBlockBinaryAndHash); | |
453 | int num_records = 500; | |
454 | ||
455 | GenerateRandomKVs(&keys, &values, 0, num_records); | |
456 | ||
457 | // Generate keys. Adding a trailing "1" to indicate existent keys. | |
458 | // Later will Seeking for keys with a trailing "0" to test seeking | |
459 | // non-existent keys. | |
460 | for (int i = 0; i < num_records; i++) { | |
461 | std::string ukey(keys[i] + "1" /* existing key marker */); | |
462 | InternalKey ikey(ukey, 0, kTypeValue); | |
463 | builder.Add(ikey.Encode().ToString(), values[i]); | |
464 | } | |
465 | ||
466 | // read serialized contents of the block | |
467 | Slice rawblock = builder.Finish(); | |
468 | ||
469 | // create block reader | |
470 | BlockContents contents; | |
471 | contents.data = rawblock; | |
11fdf7f2 TL |
472 | Block reader(std::move(contents), kDisableGlobalSequenceNumber); |
473 | const InternalKeyComparator icmp(BytewiseComparator()); | |
474 | ||
475 | // random seek existent keys | |
476 | for (int i = 0; i < num_records; i++) { | |
477 | auto iter = | |
478 | reader.NewIterator<DataBlockIter>(&icmp, icmp.user_comparator()); | |
479 | // find a random key in the lookaside array | |
480 | int index = rnd.Uniform(num_records); | |
481 | std::string ukey(keys[index] + "1" /* existing key marker */); | |
482 | InternalKey ikey(ukey, 0, kTypeValue); | |
483 | ||
484 | // search in block for this key | |
485 | bool may_exist = iter->SeekForGet(ikey.Encode().ToString()); | |
486 | ASSERT_TRUE(may_exist); | |
487 | ASSERT_TRUE(iter->Valid()); | |
488 | ASSERT_EQ(values[index], iter->value()); | |
489 | ||
490 | delete iter; | |
491 | } | |
492 | ||
493 | // random seek non-existent user keys | |
494 | // In this case A), the user_key cannot be found in HashIndex. The key may | |
495 | // exist in the next block. So the iter is set invalidated to tell the | |
496 | // caller to search the next block. This test case belongs to this case A). | |
497 | // | |
498 | // Note that for non-existent keys, there is possibility of false positive, | |
499 | // i.e. the key is still hashed into some restart interval. | |
500 | // Two additional possible outcome: | |
501 | // B) linear seek the restart interval and not found, the iter stops at the | |
502 | // starting of the next restart interval. The key does not exist | |
503 | // anywhere. | |
504 | // C) linear seek the restart interval and not found, the iter stops at the | |
505 | // the end of the block, i.e. restarts_. The key may exist in the next | |
506 | // block. | |
507 | // So these combinations are possible when searching non-existent user_key: | |
508 | // | |
509 | // case# may_exist iter->Valid() | |
510 | // A true false | |
511 | // B false true | |
512 | // C true false | |
513 | ||
514 | for (int i = 0; i < num_records; i++) { | |
515 | auto iter = | |
516 | reader.NewIterator<DataBlockIter>(&icmp, icmp.user_comparator()); | |
517 | // find a random key in the lookaside array | |
518 | int index = rnd.Uniform(num_records); | |
519 | std::string ukey(keys[index] + "0" /* non-existing key marker */); | |
520 | InternalKey ikey(ukey, 0, kTypeValue); | |
521 | ||
522 | // search in block for this key | |
523 | bool may_exist = iter->SeekForGet(ikey.Encode().ToString()); | |
524 | if (!may_exist) { | |
525 | ASSERT_TRUE(iter->Valid()); | |
526 | } | |
527 | if (!iter->Valid()) { | |
528 | ASSERT_TRUE(may_exist); | |
529 | } | |
530 | ||
531 | delete iter; | |
532 | } | |
533 | } | |
534 | ||
535 | // helper routine for DataBlockHashIndex.BlockBoundary | |
536 | void TestBoundary(InternalKey& ik1, std::string& v1, InternalKey& ik2, | |
537 | std::string& v2, InternalKey& seek_ikey, | |
538 | GetContext& get_context, Options& options) { | |
494da23a TL |
539 | std::unique_ptr<WritableFileWriter> file_writer; |
540 | std::unique_ptr<RandomAccessFileReader> file_reader; | |
541 | std::unique_ptr<TableReader> table_reader; | |
11fdf7f2 TL |
542 | int level_ = -1; |
543 | ||
544 | std::vector<std::string> keys; | |
545 | const ImmutableCFOptions ioptions(options); | |
546 | const MutableCFOptions moptions(options); | |
547 | const InternalKeyComparator internal_comparator(options.comparator); | |
548 | ||
549 | EnvOptions soptions; | |
550 | ||
551 | soptions.use_mmap_reads = ioptions.allow_mmap_reads; | |
552 | file_writer.reset( | |
553 | test::GetWritableFileWriter(new test::StringSink(), "" /* don't care */)); | |
494da23a | 554 | std::unique_ptr<TableBuilder> builder; |
11fdf7f2 TL |
555 | std::vector<std::unique_ptr<IntTblPropCollectorFactory>> |
556 | int_tbl_prop_collector_factories; | |
557 | std::string column_family_name; | |
558 | builder.reset(ioptions.table_factory->NewTableBuilder( | |
559 | TableBuilderOptions(ioptions, moptions, internal_comparator, | |
560 | &int_tbl_prop_collector_factories, | |
494da23a TL |
561 | options.compression, options.sample_for_compression, |
562 | CompressionOptions(), false /* skip_filters */, | |
563 | column_family_name, level_), | |
11fdf7f2 TL |
564 | TablePropertiesCollectorFactory::Context::kUnknownColumnFamily, |
565 | file_writer.get())); | |
566 | ||
567 | builder->Add(ik1.Encode().ToString(), v1); | |
568 | builder->Add(ik2.Encode().ToString(), v2); | |
569 | EXPECT_TRUE(builder->status().ok()); | |
570 | ||
571 | Status s = builder->Finish(); | |
572 | file_writer->Flush(); | |
573 | EXPECT_TRUE(s.ok()) << s.ToString(); | |
574 | ||
575 | EXPECT_EQ(static_cast<test::StringSink*>(file_writer->writable_file()) | |
576 | ->contents() | |
577 | .size(), | |
578 | builder->FileSize()); | |
579 | ||
580 | // Open the table | |
581 | file_reader.reset(test::GetRandomAccessFileReader(new test::StringSource( | |
582 | static_cast<test::StringSink*>(file_writer->writable_file())->contents(), | |
583 | 0 /*uniq_id*/, ioptions.allow_mmap_reads))); | |
584 | const bool kSkipFilters = true; | |
585 | const bool kImmortal = true; | |
586 | ioptions.table_factory->NewTableReader( | |
587 | TableReaderOptions(ioptions, moptions.prefix_extractor.get(), soptions, | |
588 | internal_comparator, !kSkipFilters, !kImmortal, | |
589 | level_), | |
590 | std::move(file_reader), | |
591 | static_cast<test::StringSink*>(file_writer->writable_file()) | |
592 | ->contents() | |
593 | .size(), | |
594 | &table_reader); | |
595 | // Search using Get() | |
596 | ReadOptions ro; | |
597 | ||
598 | ASSERT_OK(table_reader->Get(ro, seek_ikey.Encode().ToString(), &get_context, | |
599 | moptions.prefix_extractor.get())); | |
600 | } | |
601 | ||
602 | TEST(DataBlockHashIndex, BlockBoundary) { | |
603 | BlockBasedTableOptions table_options; | |
604 | table_options.data_block_index_type = | |
605 | BlockBasedTableOptions::kDataBlockBinaryAndHash; | |
606 | table_options.block_restart_interval = 1; | |
607 | table_options.block_size = 4096; | |
608 | ||
609 | Options options; | |
610 | options.comparator = BytewiseComparator(); | |
611 | ||
612 | options.table_factory.reset(NewBlockBasedTableFactory(table_options)); | |
613 | ||
614 | // insert two large k/v pair. Given that the block_size is 4096, one k/v | |
615 | // pair will take up one block. | |
616 | // [ k1/v1 ][ k2/v2 ] | |
617 | // [ Block N ][ Block N+1 ] | |
618 | ||
619 | { | |
620 | // [ "aab"@100 ][ "axy"@10 ] | |
621 | // | Block N ][ Block N+1 ] | |
622 | // seek for "axy"@60 | |
623 | std::string uk1("aab"); | |
624 | InternalKey ik1(uk1, 100, kTypeValue); | |
625 | std::string v1(4100, '1'); // large value | |
626 | ||
627 | std::string uk2("axy"); | |
628 | InternalKey ik2(uk2, 10, kTypeValue); | |
629 | std::string v2(4100, '2'); // large value | |
630 | ||
631 | PinnableSlice value; | |
632 | std::string seek_ukey("axy"); | |
633 | InternalKey seek_ikey(seek_ukey, 60, kTypeValue); | |
634 | GetContext get_context(options.comparator, nullptr, nullptr, nullptr, | |
635 | GetContext::kNotFound, seek_ukey, &value, nullptr, | |
636 | nullptr, nullptr, nullptr); | |
637 | ||
638 | TestBoundary(ik1, v1, ik2, v2, seek_ikey, get_context, options); | |
639 | ASSERT_EQ(get_context.State(), GetContext::kFound); | |
640 | ASSERT_EQ(value, v2); | |
641 | value.Reset(); | |
642 | } | |
643 | ||
644 | { | |
645 | // [ "axy"@100 ][ "axy"@10 ] | |
646 | // | Block N ][ Block N+1 ] | |
647 | // seek for "axy"@60 | |
648 | std::string uk1("axy"); | |
649 | InternalKey ik1(uk1, 100, kTypeValue); | |
650 | std::string v1(4100, '1'); // large value | |
651 | ||
652 | std::string uk2("axy"); | |
653 | InternalKey ik2(uk2, 10, kTypeValue); | |
654 | std::string v2(4100, '2'); // large value | |
655 | ||
656 | PinnableSlice value; | |
657 | std::string seek_ukey("axy"); | |
658 | InternalKey seek_ikey(seek_ukey, 60, kTypeValue); | |
659 | GetContext get_context(options.comparator, nullptr, nullptr, nullptr, | |
660 | GetContext::kNotFound, seek_ukey, &value, nullptr, | |
661 | nullptr, nullptr, nullptr); | |
662 | ||
663 | TestBoundary(ik1, v1, ik2, v2, seek_ikey, get_context, options); | |
664 | ASSERT_EQ(get_context.State(), GetContext::kFound); | |
665 | ASSERT_EQ(value, v2); | |
666 | value.Reset(); | |
667 | } | |
668 | ||
669 | { | |
670 | // [ "axy"@100 ][ "axy"@10 ] | |
671 | // | Block N ][ Block N+1 ] | |
672 | // seek for "axy"@120 | |
673 | std::string uk1("axy"); | |
674 | InternalKey ik1(uk1, 100, kTypeValue); | |
675 | std::string v1(4100, '1'); // large value | |
676 | ||
677 | std::string uk2("axy"); | |
678 | InternalKey ik2(uk2, 10, kTypeValue); | |
679 | std::string v2(4100, '2'); // large value | |
680 | ||
681 | PinnableSlice value; | |
682 | std::string seek_ukey("axy"); | |
683 | InternalKey seek_ikey(seek_ukey, 120, kTypeValue); | |
684 | GetContext get_context(options.comparator, nullptr, nullptr, nullptr, | |
685 | GetContext::kNotFound, seek_ukey, &value, nullptr, | |
686 | nullptr, nullptr, nullptr); | |
687 | ||
688 | TestBoundary(ik1, v1, ik2, v2, seek_ikey, get_context, options); | |
689 | ASSERT_EQ(get_context.State(), GetContext::kFound); | |
690 | ASSERT_EQ(value, v1); | |
691 | value.Reset(); | |
692 | } | |
693 | ||
694 | { | |
695 | // [ "axy"@100 ][ "axy"@10 ] | |
696 | // | Block N ][ Block N+1 ] | |
697 | // seek for "axy"@5 | |
698 | std::string uk1("axy"); | |
699 | InternalKey ik1(uk1, 100, kTypeValue); | |
700 | std::string v1(4100, '1'); // large value | |
701 | ||
702 | std::string uk2("axy"); | |
703 | InternalKey ik2(uk2, 10, kTypeValue); | |
704 | std::string v2(4100, '2'); // large value | |
705 | ||
706 | PinnableSlice value; | |
707 | std::string seek_ukey("axy"); | |
708 | InternalKey seek_ikey(seek_ukey, 5, kTypeValue); | |
709 | GetContext get_context(options.comparator, nullptr, nullptr, nullptr, | |
710 | GetContext::kNotFound, seek_ukey, &value, nullptr, | |
711 | nullptr, nullptr, nullptr); | |
712 | ||
713 | TestBoundary(ik1, v1, ik2, v2, seek_ikey, get_context, options); | |
714 | ASSERT_EQ(get_context.State(), GetContext::kNotFound); | |
715 | value.Reset(); | |
716 | } | |
717 | } | |
718 | ||
719 | } // namespace rocksdb | |
720 | ||
721 | int main(int argc, char** argv) { | |
722 | ::testing::InitGoogleTest(&argc, argv); | |
723 | return RUN_ALL_TESTS(); | |
724 | } |