]> git.proxmox.com Git - ceph.git/blob - ceph/src/rocksdb/table/table_reader.h
import quincy beta 17.1.0
[ceph.git] / ceph / src / rocksdb / table / table_reader.h
1 // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
2 // This source code is licensed under both the GPLv2 (found in the
3 // COPYING file in the root directory) and Apache 2.0 License
4 // (found in the LICENSE.Apache file in the root directory).
5 //
6 // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
7 // Use of this source code is governed by a BSD-style license that can be
8 // found in the LICENSE file. See the AUTHORS file for names of contributors.
9
10 #pragma once
11 #include <memory>
12 #include "db/range_tombstone_fragmenter.h"
13 #include "rocksdb/slice_transform.h"
14 #include "table/get_context.h"
15 #include "table/internal_iterator.h"
16 #include "table/multiget_context.h"
17 #include "table/table_reader_caller.h"
18
19 namespace ROCKSDB_NAMESPACE {
20
21 class Iterator;
22 struct ParsedInternalKey;
23 class Slice;
24 class Arena;
25 struct ReadOptions;
26 struct TableProperties;
27 class GetContext;
28 class MultiGetContext;
29
30 // A Table (also referred to as SST) is a sorted map from strings to strings.
31 // Tables are immutable and persistent. A Table may be safely accessed from
32 // multiple threads without external synchronization. Table readers are used
33 // for reading various types of table formats supported by rocksdb including
34 // BlockBasedTable, PlainTable and CuckooTable format.
35 class TableReader {
36 public:
37 virtual ~TableReader() {}
38
39 // Returns a new iterator over the table contents.
40 // The result of NewIterator() is initially invalid (caller must
41 // call one of the Seek methods on the iterator before using it).
42 //
43 // read_options: Must outlive the returned iterator.
44 // arena: If not null, the arena needs to be used to allocate the Iterator.
45 // When destroying the iterator, the caller will not call "delete"
46 // but Iterator::~Iterator() directly. The destructor needs to destroy
47 // all the states but those allocated in arena.
48 // skip_filters: disables checking the bloom filters even if they exist. This
49 // option is effective only for block-based table format.
50 // compaction_readahead_size: its value will only be used if caller =
51 // kCompaction
52 virtual InternalIterator* NewIterator(
53 const ReadOptions& read_options, const SliceTransform* prefix_extractor,
54 Arena* arena, bool skip_filters, TableReaderCaller caller,
55 size_t compaction_readahead_size = 0,
56 bool allow_unprepared_value = false) = 0;
57
58 virtual FragmentedRangeTombstoneIterator* NewRangeTombstoneIterator(
59 const ReadOptions& /*read_options*/) {
60 return nullptr;
61 }
62
63 // Given a key, return an approximate byte offset in the file where
64 // the data for that key begins (or would begin if the key were
65 // present in the file). The returned value is in terms of file
66 // bytes, and so includes effects like compression of the underlying data.
67 // E.g., the approximate offset of the last key in the table will
68 // be close to the file length.
69 // TODO(peterd): Since this function is only used for approximate size
70 // from beginning of file, reduce code duplication by removing this
71 // function and letting ApproximateSize take optional start and end, so
72 // that absolute start and end can be specified and optimized without
73 // key / index work.
74 virtual uint64_t ApproximateOffsetOf(const Slice& key,
75 TableReaderCaller caller) = 0;
76
77 // Given start and end keys, return the approximate data size in the file
78 // between the keys. The returned value is in terms of file bytes, and so
79 // includes effects like compression of the underlying data and applicable
80 // portions of metadata including filters and indexes. Nullptr for start or
81 // end (or both) indicates absolute start or end of the table.
82 virtual uint64_t ApproximateSize(const Slice& start, const Slice& end,
83 TableReaderCaller caller) = 0;
84
85 // Set up the table for Compaction. Might change some parameters with
86 // posix_fadvise
87 virtual void SetupForCompaction() = 0;
88
89 virtual std::shared_ptr<const TableProperties> GetTableProperties() const = 0;
90
91 // Prepare work that can be done before the real Get()
92 virtual void Prepare(const Slice& /*target*/) {}
93
94 // Report an approximation of how much memory has been used.
95 virtual size_t ApproximateMemoryUsage() const = 0;
96
97 // Calls get_context->SaveValue() repeatedly, starting with
98 // the entry found after a call to Seek(key), until it returns false.
99 // May not make such a call if filter policy says that key is not present.
100 //
101 // get_context->MarkKeyMayExist needs to be called when it is configured to be
102 // memory only and the key is not found in the block cache.
103 //
104 // readOptions is the options for the read
105 // key is the key to search for
106 // skip_filters: disables checking the bloom filters even if they exist. This
107 // option is effective only for block-based table format.
108 virtual Status Get(const ReadOptions& readOptions, const Slice& key,
109 GetContext* get_context,
110 const SliceTransform* prefix_extractor,
111 bool skip_filters = false) = 0;
112
113 virtual void MultiGet(const ReadOptions& readOptions,
114 const MultiGetContext::Range* mget_range,
115 const SliceTransform* prefix_extractor,
116 bool skip_filters = false) {
117 for (auto iter = mget_range->begin(); iter != mget_range->end(); ++iter) {
118 *iter->s = Get(readOptions, iter->ikey, iter->get_context,
119 prefix_extractor, skip_filters);
120 }
121 }
122
123 // Prefetch data corresponding to a give range of keys
124 // Typically this functionality is required for table implementations that
125 // persists the data on a non volatile storage medium like disk/SSD
126 virtual Status Prefetch(const Slice* begin = nullptr,
127 const Slice* end = nullptr) {
128 (void) begin;
129 (void) end;
130 // Default implementation is NOOP.
131 // The child class should implement functionality when applicable
132 return Status::OK();
133 }
134
135 // convert db file to a human readable form
136 virtual Status DumpTable(WritableFile* /*out_file*/) {
137 return Status::NotSupported("DumpTable() not supported");
138 }
139
140 // check whether there is corruption in this db file
141 virtual Status VerifyChecksum(const ReadOptions& /*read_options*/,
142 TableReaderCaller /*caller*/) {
143 return Status::NotSupported("VerifyChecksum() not supported");
144 }
145 };
146
147 } // namespace ROCKSDB_NAMESPACE