]> git.proxmox.com Git - ceph.git/blame - ceph/src/rocksdb/table/table_reader.h
update ceph source to reef 18.1.2
[ceph.git] / ceph / src / rocksdb / table / table_reader.h
CommitLineData
7c673cae 1// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
11fdf7f2
TL
2// This source code is licensed under both the GPLv2 (found in the
3// COPYING file in the root directory) and Apache 2.0 License
4// (found in the LICENSE.Apache file in the root directory).
7c673cae
FG
5//
6// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
7// Use of this source code is governed by a BSD-style license that can be
8// found in the LICENSE file. See the AUTHORS file for names of contributors.
9
10#pragma once
11#include <memory>
1e59de90 12
494da23a 13#include "db/range_tombstone_fragmenter.h"
1e59de90
TL
14#if USE_COROUTINES
15#include "folly/experimental/coro/Coroutine.h"
16#include "folly/experimental/coro/Task.h"
17#endif
11fdf7f2 18#include "rocksdb/slice_transform.h"
1e59de90 19#include "rocksdb/table_reader_caller.h"
f67539c2 20#include "table/get_context.h"
7c673cae 21#include "table/internal_iterator.h"
f67539c2 22#include "table/multiget_context.h"
7c673cae 23
f67539c2 24namespace ROCKSDB_NAMESPACE {
7c673cae
FG
25
26class Iterator;
27struct ParsedInternalKey;
28class Slice;
29class Arena;
30struct ReadOptions;
31struct TableProperties;
32class GetContext;
f67539c2 33class MultiGetContext;
7c673cae 34
f67539c2
TL
35// A Table (also referred to as SST) is a sorted map from strings to strings.
36// Tables are immutable and persistent. A Table may be safely accessed from
37// multiple threads without external synchronization. Table readers are used
38// for reading various types of table formats supported by rocksdb including
39// BlockBasedTable, PlainTable and CuckooTable format.
7c673cae
FG
40class TableReader {
41 public:
42 virtual ~TableReader() {}
43
44 // Returns a new iterator over the table contents.
45 // The result of NewIterator() is initially invalid (caller must
46 // call one of the Seek methods on the iterator before using it).
20effc67
TL
47 //
48 // read_options: Must outlive the returned iterator.
7c673cae
FG
49 // arena: If not null, the arena needs to be used to allocate the Iterator.
50 // When destroying the iterator, the caller will not call "delete"
51 // but Iterator::~Iterator() directly. The destructor needs to destroy
52 // all the states but those allocated in arena.
53 // skip_filters: disables checking the bloom filters even if they exist. This
54 // option is effective only for block-based table format.
f67539c2
TL
55 // compaction_readahead_size: its value will only be used if caller =
56 // kCompaction
57 virtual InternalIterator* NewIterator(
20effc67
TL
58 const ReadOptions& read_options, const SliceTransform* prefix_extractor,
59 Arena* arena, bool skip_filters, TableReaderCaller caller,
60 size_t compaction_readahead_size = 0,
61 bool allow_unprepared_value = false) = 0;
7c673cae 62
494da23a 63 virtual FragmentedRangeTombstoneIterator* NewRangeTombstoneIterator(
11fdf7f2 64 const ReadOptions& /*read_options*/) {
7c673cae
FG
65 return nullptr;
66 }
67
68 // Given a key, return an approximate byte offset in the file where
69 // the data for that key begins (or would begin if the key were
70 // present in the file). The returned value is in terms of file
71 // bytes, and so includes effects like compression of the underlying data.
72 // E.g., the approximate offset of the last key in the table will
73 // be close to the file length.
20effc67
TL
74 // TODO(peterd): Since this function is only used for approximate size
75 // from beginning of file, reduce code duplication by removing this
76 // function and letting ApproximateSize take optional start and end, so
77 // that absolute start and end can be specified and optimized without
78 // key / index work.
f67539c2
TL
79 virtual uint64_t ApproximateOffsetOf(const Slice& key,
80 TableReaderCaller caller) = 0;
81
82 // Given start and end keys, return the approximate data size in the file
83 // between the keys. The returned value is in terms of file bytes, and so
20effc67
TL
84 // includes effects like compression of the underlying data and applicable
85 // portions of metadata including filters and indexes. Nullptr for start or
86 // end (or both) indicates absolute start or end of the table.
f67539c2
TL
87 virtual uint64_t ApproximateSize(const Slice& start, const Slice& end,
88 TableReaderCaller caller) = 0;
7c673cae 89
1e59de90
TL
90 struct Anchor {
91 Anchor(const Slice& _user_key, size_t _range_size)
92 : user_key(_user_key.ToStringView()), range_size(_range_size) {}
93 std::string user_key;
94 size_t range_size;
95 };
96
97 // Now try to return approximately 128 anchor keys.
98 // The last one tends to be the largest key.
99 virtual Status ApproximateKeyAnchors(const ReadOptions& /*read_options*/,
100 std::vector<Anchor>& /*anchors*/) {
101 return Status::NotSupported("ApproximateKeyAnchors() not supported.");
102 }
103
7c673cae
FG
104 // Set up the table for Compaction. Might change some parameters with
105 // posix_fadvise
106 virtual void SetupForCompaction() = 0;
107
108 virtual std::shared_ptr<const TableProperties> GetTableProperties() const = 0;
109
110 // Prepare work that can be done before the real Get()
11fdf7f2 111 virtual void Prepare(const Slice& /*target*/) {}
7c673cae
FG
112
113 // Report an approximation of how much memory has been used.
114 virtual size_t ApproximateMemoryUsage() const = 0;
115
116 // Calls get_context->SaveValue() repeatedly, starting with
117 // the entry found after a call to Seek(key), until it returns false.
118 // May not make such a call if filter policy says that key is not present.
119 //
120 // get_context->MarkKeyMayExist needs to be called when it is configured to be
121 // memory only and the key is not found in the block cache.
122 //
123 // readOptions is the options for the read
124 // key is the key to search for
125 // skip_filters: disables checking the bloom filters even if they exist. This
126 // option is effective only for block-based table format.
127 virtual Status Get(const ReadOptions& readOptions, const Slice& key,
11fdf7f2
TL
128 GetContext* get_context,
129 const SliceTransform* prefix_extractor,
130 bool skip_filters = false) = 0;
7c673cae 131
1e59de90
TL
132 // Use bloom filters in the table file, if present, to filter out keys. The
133 // mget_range will be updated to skip keys that get a negative result from
134 // the filter lookup.
135 virtual Status MultiGetFilter(const ReadOptions& /*readOptions*/,
136 const SliceTransform* /*prefix_extractor*/,
137 MultiGetContext::Range* /*mget_range*/) {
138 return Status::NotSupported();
139 }
140
f67539c2
TL
141 virtual void MultiGet(const ReadOptions& readOptions,
142 const MultiGetContext::Range* mget_range,
143 const SliceTransform* prefix_extractor,
144 bool skip_filters = false) {
145 for (auto iter = mget_range->begin(); iter != mget_range->end(); ++iter) {
146 *iter->s = Get(readOptions, iter->ikey, iter->get_context,
147 prefix_extractor, skip_filters);
148 }
149 }
150
1e59de90
TL
151#if USE_COROUTINES
152 virtual folly::coro::Task<void> MultiGetCoroutine(
153 const ReadOptions& readOptions, const MultiGetContext::Range* mget_range,
154 const SliceTransform* prefix_extractor, bool skip_filters = false) {
155 MultiGet(readOptions, mget_range, prefix_extractor, skip_filters);
156 co_return;
157 }
158#endif // USE_COROUTINES
159
7c673cae
FG
160 // Prefetch data corresponding to a give range of keys
161 // Typically this functionality is required for table implementations that
162 // persists the data on a non volatile storage medium like disk/SSD
163 virtual Status Prefetch(const Slice* begin = nullptr,
164 const Slice* end = nullptr) {
1e59de90
TL
165 (void)begin;
166 (void)end;
7c673cae
FG
167 // Default implementation is NOOP.
168 // The child class should implement functionality when applicable
169 return Status::OK();
170 }
171
172 // convert db file to a human readable form
f67539c2 173 virtual Status DumpTable(WritableFile* /*out_file*/) {
7c673cae
FG
174 return Status::NotSupported("DumpTable() not supported");
175 }
176
11fdf7f2 177 // check whether there is corruption in this db file
f67539c2
TL
178 virtual Status VerifyChecksum(const ReadOptions& /*read_options*/,
179 TableReaderCaller /*caller*/) {
11fdf7f2
TL
180 return Status::NotSupported("VerifyChecksum() not supported");
181 }
7c673cae
FG
182};
183
f67539c2 184} // namespace ROCKSDB_NAMESPACE