]>
Commit | Line | Data |
---|---|---|
7c673cae | 1 | // Copyright (c) 2011-present, Facebook, Inc. All rights reserved. |
11fdf7f2 TL |
2 | // This source code is licensed under both the GPLv2 (found in the |
3 | // COPYING file in the root directory) and Apache 2.0 License | |
4 | // (found in the LICENSE.Apache file in the root directory). | |
7c673cae FG |
5 | // |
6 | // Copyright (c) 2011 The LevelDB Authors. All rights reserved. | |
7 | // Use of this source code is governed by a BSD-style license that can be | |
8 | // found in the LICENSE file. See the AUTHORS file for names of contributors. | |
9 | ||
10 | #pragma once | |
11 | #include <memory> | |
1e59de90 | 12 | |
494da23a | 13 | #include "db/range_tombstone_fragmenter.h" |
1e59de90 TL |
14 | #if USE_COROUTINES |
15 | #include "folly/experimental/coro/Coroutine.h" | |
16 | #include "folly/experimental/coro/Task.h" | |
17 | #endif | |
11fdf7f2 | 18 | #include "rocksdb/slice_transform.h" |
1e59de90 | 19 | #include "rocksdb/table_reader_caller.h" |
f67539c2 | 20 | #include "table/get_context.h" |
7c673cae | 21 | #include "table/internal_iterator.h" |
f67539c2 | 22 | #include "table/multiget_context.h" |
7c673cae | 23 | |
f67539c2 | 24 | namespace ROCKSDB_NAMESPACE { |
7c673cae FG |
25 | |
26 | class Iterator; | |
27 | struct ParsedInternalKey; | |
28 | class Slice; | |
29 | class Arena; | |
30 | struct ReadOptions; | |
31 | struct TableProperties; | |
32 | class GetContext; | |
f67539c2 | 33 | class MultiGetContext; |
7c673cae | 34 | |
f67539c2 TL |
35 | // A Table (also referred to as SST) is a sorted map from strings to strings. |
36 | // Tables are immutable and persistent. A Table may be safely accessed from | |
37 | // multiple threads without external synchronization. Table readers are used | |
38 | // for reading various types of table formats supported by rocksdb including | |
39 | // BlockBasedTable, PlainTable and CuckooTable format. | |
7c673cae FG |
40 | class TableReader { |
41 | public: | |
42 | virtual ~TableReader() {} | |
43 | ||
44 | // Returns a new iterator over the table contents. | |
45 | // The result of NewIterator() is initially invalid (caller must | |
46 | // call one of the Seek methods on the iterator before using it). | |
20effc67 TL |
47 | // |
48 | // read_options: Must outlive the returned iterator. | |
7c673cae FG |
49 | // arena: If not null, the arena needs to be used to allocate the Iterator. |
50 | // When destroying the iterator, the caller will not call "delete" | |
51 | // but Iterator::~Iterator() directly. The destructor needs to destroy | |
52 | // all the states but those allocated in arena. | |
53 | // skip_filters: disables checking the bloom filters even if they exist. This | |
54 | // option is effective only for block-based table format. | |
f67539c2 TL |
55 | // compaction_readahead_size: its value will only be used if caller = |
56 | // kCompaction | |
57 | virtual InternalIterator* NewIterator( | |
20effc67 TL |
58 | const ReadOptions& read_options, const SliceTransform* prefix_extractor, |
59 | Arena* arena, bool skip_filters, TableReaderCaller caller, | |
60 | size_t compaction_readahead_size = 0, | |
61 | bool allow_unprepared_value = false) = 0; | |
7c673cae | 62 | |
494da23a | 63 | virtual FragmentedRangeTombstoneIterator* NewRangeTombstoneIterator( |
11fdf7f2 | 64 | const ReadOptions& /*read_options*/) { |
7c673cae FG |
65 | return nullptr; |
66 | } | |
67 | ||
68 | // Given a key, return an approximate byte offset in the file where | |
69 | // the data for that key begins (or would begin if the key were | |
70 | // present in the file). The returned value is in terms of file | |
71 | // bytes, and so includes effects like compression of the underlying data. | |
72 | // E.g., the approximate offset of the last key in the table will | |
73 | // be close to the file length. | |
20effc67 TL |
74 | // TODO(peterd): Since this function is only used for approximate size |
75 | // from beginning of file, reduce code duplication by removing this | |
76 | // function and letting ApproximateSize take optional start and end, so | |
77 | // that absolute start and end can be specified and optimized without | |
78 | // key / index work. | |
f67539c2 TL |
79 | virtual uint64_t ApproximateOffsetOf(const Slice& key, |
80 | TableReaderCaller caller) = 0; | |
81 | ||
82 | // Given start and end keys, return the approximate data size in the file | |
83 | // between the keys. The returned value is in terms of file bytes, and so | |
20effc67 TL |
84 | // includes effects like compression of the underlying data and applicable |
85 | // portions of metadata including filters and indexes. Nullptr for start or | |
86 | // end (or both) indicates absolute start or end of the table. | |
f67539c2 TL |
87 | virtual uint64_t ApproximateSize(const Slice& start, const Slice& end, |
88 | TableReaderCaller caller) = 0; | |
7c673cae | 89 | |
1e59de90 TL |
90 | struct Anchor { |
91 | Anchor(const Slice& _user_key, size_t _range_size) | |
92 | : user_key(_user_key.ToStringView()), range_size(_range_size) {} | |
93 | std::string user_key; | |
94 | size_t range_size; | |
95 | }; | |
96 | ||
97 | // Now try to return approximately 128 anchor keys. | |
98 | // The last one tends to be the largest key. | |
99 | virtual Status ApproximateKeyAnchors(const ReadOptions& /*read_options*/, | |
100 | std::vector<Anchor>& /*anchors*/) { | |
101 | return Status::NotSupported("ApproximateKeyAnchors() not supported."); | |
102 | } | |
103 | ||
7c673cae FG |
104 | // Set up the table for Compaction. Might change some parameters with |
105 | // posix_fadvise | |
106 | virtual void SetupForCompaction() = 0; | |
107 | ||
108 | virtual std::shared_ptr<const TableProperties> GetTableProperties() const = 0; | |
109 | ||
110 | // Prepare work that can be done before the real Get() | |
11fdf7f2 | 111 | virtual void Prepare(const Slice& /*target*/) {} |
7c673cae FG |
112 | |
113 | // Report an approximation of how much memory has been used. | |
114 | virtual size_t ApproximateMemoryUsage() const = 0; | |
115 | ||
116 | // Calls get_context->SaveValue() repeatedly, starting with | |
117 | // the entry found after a call to Seek(key), until it returns false. | |
118 | // May not make such a call if filter policy says that key is not present. | |
119 | // | |
120 | // get_context->MarkKeyMayExist needs to be called when it is configured to be | |
121 | // memory only and the key is not found in the block cache. | |
122 | // | |
123 | // readOptions is the options for the read | |
124 | // key is the key to search for | |
125 | // skip_filters: disables checking the bloom filters even if they exist. This | |
126 | // option is effective only for block-based table format. | |
127 | virtual Status Get(const ReadOptions& readOptions, const Slice& key, | |
11fdf7f2 TL |
128 | GetContext* get_context, |
129 | const SliceTransform* prefix_extractor, | |
130 | bool skip_filters = false) = 0; | |
7c673cae | 131 | |
1e59de90 TL |
132 | // Use bloom filters in the table file, if present, to filter out keys. The |
133 | // mget_range will be updated to skip keys that get a negative result from | |
134 | // the filter lookup. | |
135 | virtual Status MultiGetFilter(const ReadOptions& /*readOptions*/, | |
136 | const SliceTransform* /*prefix_extractor*/, | |
137 | MultiGetContext::Range* /*mget_range*/) { | |
138 | return Status::NotSupported(); | |
139 | } | |
140 | ||
f67539c2 TL |
141 | virtual void MultiGet(const ReadOptions& readOptions, |
142 | const MultiGetContext::Range* mget_range, | |
143 | const SliceTransform* prefix_extractor, | |
144 | bool skip_filters = false) { | |
145 | for (auto iter = mget_range->begin(); iter != mget_range->end(); ++iter) { | |
146 | *iter->s = Get(readOptions, iter->ikey, iter->get_context, | |
147 | prefix_extractor, skip_filters); | |
148 | } | |
149 | } | |
150 | ||
1e59de90 TL |
151 | #if USE_COROUTINES |
152 | virtual folly::coro::Task<void> MultiGetCoroutine( | |
153 | const ReadOptions& readOptions, const MultiGetContext::Range* mget_range, | |
154 | const SliceTransform* prefix_extractor, bool skip_filters = false) { | |
155 | MultiGet(readOptions, mget_range, prefix_extractor, skip_filters); | |
156 | co_return; | |
157 | } | |
158 | #endif // USE_COROUTINES | |
159 | ||
7c673cae FG |
160 | // Prefetch data corresponding to a give range of keys |
161 | // Typically this functionality is required for table implementations that | |
162 | // persists the data on a non volatile storage medium like disk/SSD | |
163 | virtual Status Prefetch(const Slice* begin = nullptr, | |
164 | const Slice* end = nullptr) { | |
1e59de90 TL |
165 | (void)begin; |
166 | (void)end; | |
7c673cae FG |
167 | // Default implementation is NOOP. |
168 | // The child class should implement functionality when applicable | |
169 | return Status::OK(); | |
170 | } | |
171 | ||
172 | // convert db file to a human readable form | |
f67539c2 | 173 | virtual Status DumpTable(WritableFile* /*out_file*/) { |
7c673cae FG |
174 | return Status::NotSupported("DumpTable() not supported"); |
175 | } | |
176 | ||
11fdf7f2 | 177 | // check whether there is corruption in this db file |
f67539c2 TL |
178 | virtual Status VerifyChecksum(const ReadOptions& /*read_options*/, |
179 | TableReaderCaller /*caller*/) { | |
11fdf7f2 TL |
180 | return Status::NotSupported("VerifyChecksum() not supported"); |
181 | } | |
7c673cae FG |
182 | }; |
183 | ||
f67539c2 | 184 | } // namespace ROCKSDB_NAMESPACE |