]>
Commit | Line | Data |
---|---|---|
11fdf7f2 TL |
1 | // Copyright (c) 2011-present, Facebook, Inc. All rights reserved. |
2 | // This source code is licensed under both the GPLv2 (found in the | |
3 | // COPYING file in the root directory) and Apache 2.0 License | |
4 | // (found in the LICENSE.Apache file in the root directory). | |
5 | ||
20effc67 TL |
6 | #include "table/block_based/data_block_hash_index.h" |
7 | ||
11fdf7f2 TL |
8 | #include <cstdlib> |
9 | #include <string> | |
10 | #include <unordered_map> | |
11 | ||
494da23a | 12 | #include "db/table_properties_collector.h" |
11fdf7f2 | 13 | #include "rocksdb/slice.h" |
f67539c2 TL |
14 | #include "table/block_based/block.h" |
15 | #include "table/block_based/block_based_table_reader.h" | |
16 | #include "table/block_based/block_builder.h" | |
11fdf7f2 | 17 | #include "table/get_context.h" |
494da23a | 18 | #include "table/table_builder.h" |
f67539c2 TL |
19 | #include "test_util/testharness.h" |
20 | #include "test_util/testutil.h" | |
20effc67 | 21 | #include "util/random.h" |
11fdf7f2 | 22 | |
f67539c2 | 23 | namespace ROCKSDB_NAMESPACE { |
11fdf7f2 TL |
24 | |
25 | bool SearchForOffset(DataBlockHashIndex& index, const char* data, | |
26 | uint16_t map_offset, const Slice& key, | |
27 | uint8_t& restart_point) { | |
28 | uint8_t entry = index.Lookup(data, map_offset, key); | |
29 | if (entry == kCollision) { | |
30 | return true; | |
31 | } | |
32 | ||
33 | if (entry == kNoEntry) { | |
34 | return false; | |
35 | } | |
36 | ||
37 | return entry == restart_point; | |
38 | } | |
39 | ||
11fdf7f2 TL |
40 | std::string GenerateKey(int primary_key, int secondary_key, int padding_size, |
41 | Random* rnd) { | |
42 | char buf[50]; | |
43 | char* p = &buf[0]; | |
44 | snprintf(buf, sizeof(buf), "%6d%4d", primary_key, secondary_key); | |
45 | std::string k(p); | |
46 | if (padding_size) { | |
20effc67 | 47 | k += rnd->RandomString(padding_size); |
11fdf7f2 TL |
48 | } |
49 | ||
50 | return k; | |
51 | } | |
52 | ||
53 | // Generate random key value pairs. | |
54 | // The generated key will be sorted. You can tune the parameters to generated | |
55 | // different kinds of test key/value pairs for different scenario. | |
56 | void GenerateRandomKVs(std::vector<std::string>* keys, | |
57 | std::vector<std::string>* values, const int from, | |
58 | const int len, const int step = 1, | |
59 | const int padding_size = 0, | |
60 | const int keys_share_prefix = 1) { | |
61 | Random rnd(302); | |
62 | ||
63 | // generate different prefix | |
64 | for (int i = from; i < from + len; i += step) { | |
65 | // generating keys that shares the prefix | |
66 | for (int j = 0; j < keys_share_prefix; ++j) { | |
67 | keys->emplace_back(GenerateKey(i, j, padding_size, &rnd)); | |
68 | ||
69 | // 100 bytes values | |
20effc67 | 70 | values->emplace_back(rnd.RandomString(100)); |
11fdf7f2 TL |
71 | } |
72 | } | |
73 | } | |
74 | ||
75 | TEST(DataBlockHashIndex, DataBlockHashTestSmall) { | |
76 | DataBlockHashIndexBuilder builder; | |
77 | builder.Initialize(0.75 /*util_ratio*/); | |
78 | for (int j = 0; j < 5; j++) { | |
79 | for (uint8_t i = 0; i < 2 + j; i++) { | |
80 | std::string key("key" + std::to_string(i)); | |
81 | uint8_t restart_point = i; | |
82 | builder.Add(key, restart_point); | |
83 | } | |
84 | ||
85 | size_t estimated_size = builder.EstimateSize(); | |
86 | ||
87 | std::string buffer("fake"), buffer2; | |
88 | size_t original_size = buffer.size(); | |
89 | estimated_size += original_size; | |
90 | builder.Finish(buffer); | |
91 | ||
92 | ASSERT_EQ(buffer.size(), estimated_size); | |
93 | ||
94 | buffer2 = buffer; // test for the correctness of relative offset | |
95 | ||
96 | Slice s(buffer2); | |
97 | DataBlockHashIndex index; | |
98 | uint16_t map_offset; | |
99 | index.Initialize(s.data(), static_cast<uint16_t>(s.size()), &map_offset); | |
100 | ||
101 | // the additional hash map should start at the end of the buffer | |
102 | ASSERT_EQ(original_size, map_offset); | |
103 | for (uint8_t i = 0; i < 2; i++) { | |
104 | std::string key("key" + std::to_string(i)); | |
105 | uint8_t restart_point = i; | |
106 | ASSERT_TRUE( | |
107 | SearchForOffset(index, s.data(), map_offset, key, restart_point)); | |
108 | } | |
109 | builder.Reset(); | |
110 | } | |
111 | } | |
112 | ||
113 | TEST(DataBlockHashIndex, DataBlockHashTest) { | |
114 | // bucket_num = 200, #keys = 100. 50% utilization | |
115 | DataBlockHashIndexBuilder builder; | |
116 | builder.Initialize(0.75 /*util_ratio*/); | |
117 | ||
118 | for (uint8_t i = 0; i < 100; i++) { | |
119 | std::string key("key" + std::to_string(i)); | |
120 | uint8_t restart_point = i; | |
121 | builder.Add(key, restart_point); | |
122 | } | |
123 | ||
124 | size_t estimated_size = builder.EstimateSize(); | |
125 | ||
126 | std::string buffer("fake content"), buffer2; | |
127 | size_t original_size = buffer.size(); | |
128 | estimated_size += original_size; | |
129 | builder.Finish(buffer); | |
130 | ||
131 | ASSERT_EQ(buffer.size(), estimated_size); | |
132 | ||
1e59de90 | 133 | buffer2 = buffer; // test for the correctness of relative offset |
11fdf7f2 TL |
134 | |
135 | Slice s(buffer2); | |
136 | DataBlockHashIndex index; | |
137 | uint16_t map_offset; | |
138 | index.Initialize(s.data(), static_cast<uint16_t>(s.size()), &map_offset); | |
139 | ||
140 | // the additional hash map should start at the end of the buffer | |
141 | ASSERT_EQ(original_size, map_offset); | |
142 | for (uint8_t i = 0; i < 100; i++) { | |
143 | std::string key("key" + std::to_string(i)); | |
144 | uint8_t restart_point = i; | |
145 | ASSERT_TRUE( | |
146 | SearchForOffset(index, s.data(), map_offset, key, restart_point)); | |
147 | } | |
148 | } | |
149 | ||
150 | TEST(DataBlockHashIndex, DataBlockHashTestCollision) { | |
151 | // bucket_num = 2. There will be intense hash collisions | |
152 | DataBlockHashIndexBuilder builder; | |
153 | builder.Initialize(0.75 /*util_ratio*/); | |
154 | ||
155 | for (uint8_t i = 0; i < 100; i++) { | |
156 | std::string key("key" + std::to_string(i)); | |
157 | uint8_t restart_point = i; | |
158 | builder.Add(key, restart_point); | |
159 | } | |
160 | ||
161 | size_t estimated_size = builder.EstimateSize(); | |
162 | ||
163 | std::string buffer("some other fake content to take up space"), buffer2; | |
164 | size_t original_size = buffer.size(); | |
165 | estimated_size += original_size; | |
166 | builder.Finish(buffer); | |
167 | ||
168 | ASSERT_EQ(buffer.size(), estimated_size); | |
169 | ||
1e59de90 | 170 | buffer2 = buffer; // test for the correctness of relative offset |
11fdf7f2 TL |
171 | |
172 | Slice s(buffer2); | |
173 | DataBlockHashIndex index; | |
174 | uint16_t map_offset; | |
175 | index.Initialize(s.data(), static_cast<uint16_t>(s.size()), &map_offset); | |
176 | ||
177 | // the additional hash map should start at the end of the buffer | |
178 | ASSERT_EQ(original_size, map_offset); | |
179 | for (uint8_t i = 0; i < 100; i++) { | |
180 | std::string key("key" + std::to_string(i)); | |
181 | uint8_t restart_point = i; | |
182 | ASSERT_TRUE( | |
183 | SearchForOffset(index, s.data(), map_offset, key, restart_point)); | |
184 | } | |
185 | } | |
186 | ||
187 | TEST(DataBlockHashIndex, DataBlockHashTestLarge) { | |
188 | DataBlockHashIndexBuilder builder; | |
189 | builder.Initialize(0.75 /*util_ratio*/); | |
190 | std::unordered_map<std::string, uint8_t> m; | |
191 | ||
192 | for (uint8_t i = 0; i < 100; i++) { | |
193 | if (i % 2) { | |
194 | continue; // leave half of the keys out | |
195 | } | |
196 | std::string key = "key" + std::to_string(i); | |
197 | uint8_t restart_point = i; | |
198 | builder.Add(key, restart_point); | |
199 | m[key] = restart_point; | |
200 | } | |
201 | ||
202 | size_t estimated_size = builder.EstimateSize(); | |
203 | ||
204 | std::string buffer("filling stuff"), buffer2; | |
205 | size_t original_size = buffer.size(); | |
206 | estimated_size += original_size; | |
207 | builder.Finish(buffer); | |
208 | ||
209 | ASSERT_EQ(buffer.size(), estimated_size); | |
210 | ||
1e59de90 | 211 | buffer2 = buffer; // test for the correctness of relative offset |
11fdf7f2 TL |
212 | |
213 | Slice s(buffer2); | |
214 | DataBlockHashIndex index; | |
215 | uint16_t map_offset; | |
216 | index.Initialize(s.data(), static_cast<uint16_t>(s.size()), &map_offset); | |
217 | ||
218 | // the additional hash map should start at the end of the buffer | |
219 | ASSERT_EQ(original_size, map_offset); | |
220 | for (uint8_t i = 0; i < 100; i++) { | |
221 | std::string key = "key" + std::to_string(i); | |
222 | uint8_t restart_point = i; | |
223 | if (m.count(key)) { | |
224 | ASSERT_TRUE(m[key] == restart_point); | |
225 | ASSERT_TRUE( | |
226 | SearchForOffset(index, s.data(), map_offset, key, restart_point)); | |
227 | } else { | |
228 | // we allow false positve, so don't test the nonexisting keys. | |
229 | // when false positive happens, the search will continue to the | |
230 | // restart intervals to see if the key really exist. | |
231 | } | |
232 | } | |
233 | } | |
234 | ||
235 | TEST(DataBlockHashIndex, RestartIndexExceedMax) { | |
236 | DataBlockHashIndexBuilder builder; | |
237 | builder.Initialize(0.75 /*util_ratio*/); | |
238 | std::unordered_map<std::string, uint8_t> m; | |
239 | ||
240 | for (uint8_t i = 0; i <= 253; i++) { | |
241 | std::string key = "key" + std::to_string(i); | |
242 | uint8_t restart_point = i; | |
243 | builder.Add(key, restart_point); | |
244 | } | |
245 | ASSERT_TRUE(builder.Valid()); | |
246 | ||
247 | builder.Reset(); | |
248 | ||
249 | for (uint8_t i = 0; i <= 254; i++) { | |
250 | std::string key = "key" + std::to_string(i); | |
251 | uint8_t restart_point = i; | |
252 | builder.Add(key, restart_point); | |
253 | } | |
254 | ||
255 | ASSERT_FALSE(builder.Valid()); | |
256 | ||
257 | builder.Reset(); | |
258 | ASSERT_TRUE(builder.Valid()); | |
259 | } | |
260 | ||
261 | TEST(DataBlockHashIndex, BlockRestartIndexExceedMax) { | |
262 | Options options = Options(); | |
263 | ||
264 | BlockBuilder builder(1 /* block_restart_interval */, | |
265 | true /* use_delta_encoding */, | |
266 | false /* use_value_delta_encoding */, | |
267 | BlockBasedTableOptions::kDataBlockBinaryAndHash); | |
268 | ||
269 | // #restarts <= 253. HashIndex is valid | |
270 | for (int i = 0; i <= 253; i++) { | |
271 | std::string ukey = "key" + std::to_string(i); | |
272 | InternalKey ikey(ukey, 0, kTypeValue); | |
273 | builder.Add(ikey.Encode().ToString(), "value"); | |
274 | } | |
275 | ||
276 | { | |
277 | // read serialized contents of the block | |
278 | Slice rawblock = builder.Finish(); | |
279 | ||
280 | // create block reader | |
281 | BlockContents contents; | |
282 | contents.data = rawblock; | |
20effc67 | 283 | Block reader(std::move(contents)); |
11fdf7f2 TL |
284 | |
285 | ASSERT_EQ(reader.IndexType(), | |
286 | BlockBasedTableOptions::kDataBlockBinaryAndHash); | |
287 | } | |
288 | ||
289 | builder.Reset(); | |
290 | ||
291 | // #restarts > 253. HashIndex is not used | |
292 | for (int i = 0; i <= 254; i++) { | |
293 | std::string ukey = "key" + std::to_string(i); | |
294 | InternalKey ikey(ukey, 0, kTypeValue); | |
295 | builder.Add(ikey.Encode().ToString(), "value"); | |
296 | } | |
297 | ||
298 | { | |
299 | // read serialized contents of the block | |
300 | Slice rawblock = builder.Finish(); | |
301 | ||
302 | // create block reader | |
303 | BlockContents contents; | |
304 | contents.data = rawblock; | |
20effc67 | 305 | Block reader(std::move(contents)); |
11fdf7f2 TL |
306 | |
307 | ASSERT_EQ(reader.IndexType(), | |
308 | BlockBasedTableOptions::kDataBlockBinarySearch); | |
309 | } | |
310 | } | |
311 | ||
312 | TEST(DataBlockHashIndex, BlockSizeExceedMax) { | |
313 | Options options = Options(); | |
314 | std::string ukey(10, 'k'); | |
315 | InternalKey ikey(ukey, 0, kTypeValue); | |
316 | ||
317 | BlockBuilder builder(1 /* block_restart_interval */, | |
318 | false /* use_delta_encoding */, | |
319 | false /* use_value_delta_encoding */, | |
320 | BlockBasedTableOptions::kDataBlockBinaryAndHash); | |
321 | ||
322 | { | |
323 | // insert a large value. The block size plus HashIndex is 65536. | |
324 | std::string value(65502, 'v'); | |
325 | ||
326 | builder.Add(ikey.Encode().ToString(), value); | |
327 | ||
328 | // read serialized contents of the block | |
329 | Slice rawblock = builder.Finish(); | |
330 | ASSERT_LE(rawblock.size(), kMaxBlockSizeSupportedByHashIndex); | |
331 | std::cerr << "block size: " << rawblock.size() << std::endl; | |
332 | ||
333 | // create block reader | |
334 | BlockContents contents; | |
335 | contents.data = rawblock; | |
20effc67 | 336 | Block reader(std::move(contents)); |
11fdf7f2 TL |
337 | |
338 | ASSERT_EQ(reader.IndexType(), | |
339 | BlockBasedTableOptions::kDataBlockBinaryAndHash); | |
340 | } | |
341 | ||
342 | builder.Reset(); | |
343 | ||
344 | { | |
345 | // insert a large value. The block size plus HashIndex would be 65537. | |
346 | // This excceed the max block size supported by HashIndex (65536). | |
347 | // So when build finishes HashIndex will not be created for the block. | |
348 | std::string value(65503, 'v'); | |
349 | ||
350 | builder.Add(ikey.Encode().ToString(), value); | |
351 | ||
352 | // read serialized contents of the block | |
353 | Slice rawblock = builder.Finish(); | |
354 | ASSERT_LE(rawblock.size(), kMaxBlockSizeSupportedByHashIndex); | |
355 | std::cerr << "block size: " << rawblock.size() << std::endl; | |
356 | ||
357 | // create block reader | |
358 | BlockContents contents; | |
359 | contents.data = rawblock; | |
20effc67 | 360 | Block reader(std::move(contents)); |
11fdf7f2 TL |
361 | |
362 | // the index type have fallen back to binary when build finish. | |
363 | ASSERT_EQ(reader.IndexType(), | |
364 | BlockBasedTableOptions::kDataBlockBinarySearch); | |
365 | } | |
366 | } | |
367 | ||
368 | TEST(DataBlockHashIndex, BlockTestSingleKey) { | |
369 | Options options = Options(); | |
370 | ||
371 | BlockBuilder builder(16 /* block_restart_interval */, | |
372 | true /* use_delta_encoding */, | |
373 | false /* use_value_delta_encoding */, | |
374 | BlockBasedTableOptions::kDataBlockBinaryAndHash); | |
375 | ||
376 | std::string ukey("gopher"); | |
377 | std::string value("gold"); | |
378 | InternalKey ikey(ukey, 10, kTypeValue); | |
379 | builder.Add(ikey.Encode().ToString(), value /*value*/); | |
380 | ||
381 | // read serialized contents of the block | |
382 | Slice rawblock = builder.Finish(); | |
383 | ||
384 | // create block reader | |
385 | BlockContents contents; | |
386 | contents.data = rawblock; | |
20effc67 | 387 | Block reader(std::move(contents)); |
11fdf7f2 TL |
388 | |
389 | const InternalKeyComparator icmp(BytewiseComparator()); | |
20effc67 TL |
390 | auto iter = reader.NewDataIterator(icmp.user_comparator(), |
391 | kDisableGlobalSequenceNumber); | |
11fdf7f2 TL |
392 | bool may_exist; |
393 | // search in block for the key just inserted | |
394 | { | |
395 | InternalKey seek_ikey(ukey, 10, kValueTypeForSeek); | |
396 | may_exist = iter->SeekForGet(seek_ikey.Encode().ToString()); | |
397 | ASSERT_TRUE(may_exist); | |
398 | ASSERT_TRUE(iter->Valid()); | |
399 | ASSERT_EQ( | |
400 | options.comparator->Compare(iter->key(), ikey.Encode().ToString()), 0); | |
401 | ASSERT_EQ(iter->value(), value); | |
402 | } | |
403 | ||
404 | // search in block for the existing ukey, but with higher seqno | |
405 | { | |
406 | InternalKey seek_ikey(ukey, 20, kValueTypeForSeek); | |
407 | ||
408 | // HashIndex should be able to set the iter correctly | |
409 | may_exist = iter->SeekForGet(seek_ikey.Encode().ToString()); | |
410 | ASSERT_TRUE(may_exist); | |
411 | ASSERT_TRUE(iter->Valid()); | |
412 | ||
413 | // user key should match | |
414 | ASSERT_EQ(options.comparator->Compare(ExtractUserKey(iter->key()), ukey), | |
415 | 0); | |
416 | ||
417 | // seek_key seqno number should be greater than that of iter result | |
418 | ASSERT_GT(GetInternalKeySeqno(seek_ikey.Encode()), | |
419 | GetInternalKeySeqno(iter->key())); | |
420 | ||
421 | ASSERT_EQ(iter->value(), value); | |
422 | } | |
423 | ||
424 | // Search in block for the existing ukey, but with lower seqno | |
425 | // in this case, hash can find the only occurrence of the user_key, but | |
426 | // ParseNextDataKey() will skip it as it does not have a older seqno. | |
427 | // In this case, GetForSeek() is effective to locate the user_key, and | |
428 | // iter->Valid() == false indicates that we've reached to the end of | |
429 | // the block and the caller should continue searching the next block. | |
430 | { | |
431 | InternalKey seek_ikey(ukey, 5, kValueTypeForSeek); | |
432 | may_exist = iter->SeekForGet(seek_ikey.Encode().ToString()); | |
433 | ASSERT_TRUE(may_exist); | |
434 | ASSERT_FALSE(iter->Valid()); // should have reached to the end of block | |
435 | } | |
436 | ||
437 | delete iter; | |
438 | } | |
439 | ||
440 | TEST(DataBlockHashIndex, BlockTestLarge) { | |
441 | Random rnd(1019); | |
442 | Options options = Options(); | |
443 | std::vector<std::string> keys; | |
444 | std::vector<std::string> values; | |
445 | ||
446 | BlockBuilder builder(16 /* block_restart_interval */, | |
447 | true /* use_delta_encoding */, | |
448 | false /* use_value_delta_encoding */, | |
449 | BlockBasedTableOptions::kDataBlockBinaryAndHash); | |
450 | int num_records = 500; | |
451 | ||
452 | GenerateRandomKVs(&keys, &values, 0, num_records); | |
453 | ||
454 | // Generate keys. Adding a trailing "1" to indicate existent keys. | |
455 | // Later will Seeking for keys with a trailing "0" to test seeking | |
456 | // non-existent keys. | |
457 | for (int i = 0; i < num_records; i++) { | |
458 | std::string ukey(keys[i] + "1" /* existing key marker */); | |
459 | InternalKey ikey(ukey, 0, kTypeValue); | |
460 | builder.Add(ikey.Encode().ToString(), values[i]); | |
461 | } | |
462 | ||
463 | // read serialized contents of the block | |
464 | Slice rawblock = builder.Finish(); | |
465 | ||
466 | // create block reader | |
467 | BlockContents contents; | |
468 | contents.data = rawblock; | |
20effc67 | 469 | Block reader(std::move(contents)); |
11fdf7f2 TL |
470 | const InternalKeyComparator icmp(BytewiseComparator()); |
471 | ||
472 | // random seek existent keys | |
473 | for (int i = 0; i < num_records; i++) { | |
20effc67 TL |
474 | auto iter = reader.NewDataIterator(icmp.user_comparator(), |
475 | kDisableGlobalSequenceNumber); | |
11fdf7f2 TL |
476 | // find a random key in the lookaside array |
477 | int index = rnd.Uniform(num_records); | |
478 | std::string ukey(keys[index] + "1" /* existing key marker */); | |
479 | InternalKey ikey(ukey, 0, kTypeValue); | |
480 | ||
481 | // search in block for this key | |
482 | bool may_exist = iter->SeekForGet(ikey.Encode().ToString()); | |
483 | ASSERT_TRUE(may_exist); | |
484 | ASSERT_TRUE(iter->Valid()); | |
485 | ASSERT_EQ(values[index], iter->value()); | |
486 | ||
487 | delete iter; | |
488 | } | |
489 | ||
490 | // random seek non-existent user keys | |
491 | // In this case A), the user_key cannot be found in HashIndex. The key may | |
492 | // exist in the next block. So the iter is set invalidated to tell the | |
493 | // caller to search the next block. This test case belongs to this case A). | |
494 | // | |
495 | // Note that for non-existent keys, there is possibility of false positive, | |
496 | // i.e. the key is still hashed into some restart interval. | |
497 | // Two additional possible outcome: | |
498 | // B) linear seek the restart interval and not found, the iter stops at the | |
499 | // starting of the next restart interval. The key does not exist | |
500 | // anywhere. | |
501 | // C) linear seek the restart interval and not found, the iter stops at the | |
502 | // the end of the block, i.e. restarts_. The key may exist in the next | |
503 | // block. | |
504 | // So these combinations are possible when searching non-existent user_key: | |
505 | // | |
506 | // case# may_exist iter->Valid() | |
507 | // A true false | |
508 | // B false true | |
509 | // C true false | |
510 | ||
511 | for (int i = 0; i < num_records; i++) { | |
20effc67 TL |
512 | auto iter = reader.NewDataIterator(icmp.user_comparator(), |
513 | kDisableGlobalSequenceNumber); | |
11fdf7f2 TL |
514 | // find a random key in the lookaside array |
515 | int index = rnd.Uniform(num_records); | |
516 | std::string ukey(keys[index] + "0" /* non-existing key marker */); | |
517 | InternalKey ikey(ukey, 0, kTypeValue); | |
518 | ||
519 | // search in block for this key | |
520 | bool may_exist = iter->SeekForGet(ikey.Encode().ToString()); | |
521 | if (!may_exist) { | |
522 | ASSERT_TRUE(iter->Valid()); | |
523 | } | |
524 | if (!iter->Valid()) { | |
525 | ASSERT_TRUE(may_exist); | |
526 | } | |
527 | ||
528 | delete iter; | |
529 | } | |
530 | } | |
531 | ||
532 | // helper routine for DataBlockHashIndex.BlockBoundary | |
533 | void TestBoundary(InternalKey& ik1, std::string& v1, InternalKey& ik2, | |
534 | std::string& v2, InternalKey& seek_ikey, | |
535 | GetContext& get_context, Options& options) { | |
494da23a TL |
536 | std::unique_ptr<WritableFileWriter> file_writer; |
537 | std::unique_ptr<RandomAccessFileReader> file_reader; | |
538 | std::unique_ptr<TableReader> table_reader; | |
11fdf7f2 TL |
539 | int level_ = -1; |
540 | ||
541 | std::vector<std::string> keys; | |
1e59de90 | 542 | const ImmutableOptions ioptions(options); |
11fdf7f2 TL |
543 | const MutableCFOptions moptions(options); |
544 | const InternalKeyComparator internal_comparator(options.comparator); | |
545 | ||
546 | EnvOptions soptions; | |
547 | ||
548 | soptions.use_mmap_reads = ioptions.allow_mmap_reads; | |
1e59de90 TL |
549 | test::StringSink* sink = new test::StringSink(); |
550 | std::unique_ptr<FSWritableFile> f(sink); | |
11fdf7f2 | 551 | file_writer.reset( |
1e59de90 | 552 | new WritableFileWriter(std::move(f), "" /* don't care */, FileOptions())); |
494da23a | 553 | std::unique_ptr<TableBuilder> builder; |
1e59de90 | 554 | IntTblPropCollectorFactories int_tbl_prop_collector_factories; |
11fdf7f2 TL |
555 | std::string column_family_name; |
556 | builder.reset(ioptions.table_factory->NewTableBuilder( | |
1e59de90 TL |
557 | TableBuilderOptions( |
558 | ioptions, moptions, internal_comparator, | |
559 | &int_tbl_prop_collector_factories, options.compression, | |
560 | CompressionOptions(), | |
561 | TablePropertiesCollectorFactory::Context::kUnknownColumnFamily, | |
562 | column_family_name, level_), | |
11fdf7f2 TL |
563 | file_writer.get())); |
564 | ||
565 | builder->Add(ik1.Encode().ToString(), v1); | |
566 | builder->Add(ik2.Encode().ToString(), v2); | |
567 | EXPECT_TRUE(builder->status().ok()); | |
568 | ||
569 | Status s = builder->Finish(); | |
1e59de90 | 570 | ASSERT_OK(file_writer->Flush()); |
11fdf7f2 TL |
571 | EXPECT_TRUE(s.ok()) << s.ToString(); |
572 | ||
1e59de90 | 573 | EXPECT_EQ(sink->contents().size(), builder->FileSize()); |
11fdf7f2 TL |
574 | |
575 | // Open the table | |
1e59de90 TL |
576 | test::StringSource* source = new test::StringSource( |
577 | sink->contents(), 0 /*uniq_id*/, ioptions.allow_mmap_reads); | |
578 | std::unique_ptr<FSRandomAccessFile> file(source); | |
579 | file_reader.reset(new RandomAccessFileReader(std::move(file), "test")); | |
11fdf7f2 TL |
580 | const bool kSkipFilters = true; |
581 | const bool kImmortal = true; | |
1e59de90 TL |
582 | ASSERT_OK(ioptions.table_factory->NewTableReader( |
583 | TableReaderOptions(ioptions, moptions.prefix_extractor, soptions, | |
11fdf7f2 TL |
584 | internal_comparator, !kSkipFilters, !kImmortal, |
585 | level_), | |
1e59de90 | 586 | std::move(file_reader), sink->contents().size(), &table_reader)); |
11fdf7f2 TL |
587 | // Search using Get() |
588 | ReadOptions ro; | |
589 | ||
590 | ASSERT_OK(table_reader->Get(ro, seek_ikey.Encode().ToString(), &get_context, | |
591 | moptions.prefix_extractor.get())); | |
592 | } | |
593 | ||
594 | TEST(DataBlockHashIndex, BlockBoundary) { | |
595 | BlockBasedTableOptions table_options; | |
596 | table_options.data_block_index_type = | |
597 | BlockBasedTableOptions::kDataBlockBinaryAndHash; | |
598 | table_options.block_restart_interval = 1; | |
599 | table_options.block_size = 4096; | |
600 | ||
601 | Options options; | |
602 | options.comparator = BytewiseComparator(); | |
603 | ||
604 | options.table_factory.reset(NewBlockBasedTableFactory(table_options)); | |
605 | ||
606 | // insert two large k/v pair. Given that the block_size is 4096, one k/v | |
607 | // pair will take up one block. | |
608 | // [ k1/v1 ][ k2/v2 ] | |
609 | // [ Block N ][ Block N+1 ] | |
610 | ||
611 | { | |
612 | // [ "aab"@100 ][ "axy"@10 ] | |
613 | // | Block N ][ Block N+1 ] | |
614 | // seek for "axy"@60 | |
615 | std::string uk1("aab"); | |
616 | InternalKey ik1(uk1, 100, kTypeValue); | |
617 | std::string v1(4100, '1'); // large value | |
618 | ||
619 | std::string uk2("axy"); | |
620 | InternalKey ik2(uk2, 10, kTypeValue); | |
621 | std::string v2(4100, '2'); // large value | |
622 | ||
623 | PinnableSlice value; | |
624 | std::string seek_ukey("axy"); | |
625 | InternalKey seek_ikey(seek_ukey, 60, kTypeValue); | |
626 | GetContext get_context(options.comparator, nullptr, nullptr, nullptr, | |
627 | GetContext::kNotFound, seek_ukey, &value, nullptr, | |
1e59de90 | 628 | nullptr, nullptr, true, nullptr, nullptr); |
11fdf7f2 TL |
629 | |
630 | TestBoundary(ik1, v1, ik2, v2, seek_ikey, get_context, options); | |
631 | ASSERT_EQ(get_context.State(), GetContext::kFound); | |
632 | ASSERT_EQ(value, v2); | |
633 | value.Reset(); | |
634 | } | |
635 | ||
636 | { | |
637 | // [ "axy"@100 ][ "axy"@10 ] | |
638 | // | Block N ][ Block N+1 ] | |
639 | // seek for "axy"@60 | |
640 | std::string uk1("axy"); | |
641 | InternalKey ik1(uk1, 100, kTypeValue); | |
642 | std::string v1(4100, '1'); // large value | |
643 | ||
644 | std::string uk2("axy"); | |
645 | InternalKey ik2(uk2, 10, kTypeValue); | |
646 | std::string v2(4100, '2'); // large value | |
647 | ||
648 | PinnableSlice value; | |
649 | std::string seek_ukey("axy"); | |
650 | InternalKey seek_ikey(seek_ukey, 60, kTypeValue); | |
651 | GetContext get_context(options.comparator, nullptr, nullptr, nullptr, | |
652 | GetContext::kNotFound, seek_ukey, &value, nullptr, | |
1e59de90 | 653 | nullptr, nullptr, true, nullptr, nullptr); |
11fdf7f2 TL |
654 | |
655 | TestBoundary(ik1, v1, ik2, v2, seek_ikey, get_context, options); | |
656 | ASSERT_EQ(get_context.State(), GetContext::kFound); | |
657 | ASSERT_EQ(value, v2); | |
658 | value.Reset(); | |
659 | } | |
660 | ||
661 | { | |
662 | // [ "axy"@100 ][ "axy"@10 ] | |
663 | // | Block N ][ Block N+1 ] | |
664 | // seek for "axy"@120 | |
665 | std::string uk1("axy"); | |
666 | InternalKey ik1(uk1, 100, kTypeValue); | |
667 | std::string v1(4100, '1'); // large value | |
668 | ||
669 | std::string uk2("axy"); | |
670 | InternalKey ik2(uk2, 10, kTypeValue); | |
671 | std::string v2(4100, '2'); // large value | |
672 | ||
673 | PinnableSlice value; | |
674 | std::string seek_ukey("axy"); | |
675 | InternalKey seek_ikey(seek_ukey, 120, kTypeValue); | |
676 | GetContext get_context(options.comparator, nullptr, nullptr, nullptr, | |
677 | GetContext::kNotFound, seek_ukey, &value, nullptr, | |
1e59de90 | 678 | nullptr, nullptr, true, nullptr, nullptr); |
11fdf7f2 TL |
679 | |
680 | TestBoundary(ik1, v1, ik2, v2, seek_ikey, get_context, options); | |
681 | ASSERT_EQ(get_context.State(), GetContext::kFound); | |
682 | ASSERT_EQ(value, v1); | |
683 | value.Reset(); | |
684 | } | |
685 | ||
686 | { | |
687 | // [ "axy"@100 ][ "axy"@10 ] | |
688 | // | Block N ][ Block N+1 ] | |
689 | // seek for "axy"@5 | |
690 | std::string uk1("axy"); | |
691 | InternalKey ik1(uk1, 100, kTypeValue); | |
692 | std::string v1(4100, '1'); // large value | |
693 | ||
694 | std::string uk2("axy"); | |
695 | InternalKey ik2(uk2, 10, kTypeValue); | |
696 | std::string v2(4100, '2'); // large value | |
697 | ||
698 | PinnableSlice value; | |
699 | std::string seek_ukey("axy"); | |
700 | InternalKey seek_ikey(seek_ukey, 5, kTypeValue); | |
701 | GetContext get_context(options.comparator, nullptr, nullptr, nullptr, | |
702 | GetContext::kNotFound, seek_ukey, &value, nullptr, | |
1e59de90 | 703 | nullptr, nullptr, true, nullptr, nullptr); |
11fdf7f2 TL |
704 | |
705 | TestBoundary(ik1, v1, ik2, v2, seek_ikey, get_context, options); | |
706 | ASSERT_EQ(get_context.State(), GetContext::kNotFound); | |
707 | value.Reset(); | |
708 | } | |
709 | } | |
710 | ||
f67539c2 | 711 | } // namespace ROCKSDB_NAMESPACE |
11fdf7f2 TL |
712 | |
713 | int main(int argc, char** argv) { | |
1e59de90 | 714 | ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); |
11fdf7f2 TL |
715 | ::testing::InitGoogleTest(&argc, argv); |
716 | return RUN_ALL_TESTS(); | |
717 | } |