]> git.proxmox.com Git - ceph.git/blame - ceph/src/rocksdb/include/rocksdb/table_properties.h
update ceph source to reef 18.1.2
[ceph.git] / ceph / src / rocksdb / include / rocksdb / table_properties.h
CommitLineData
7c673cae
FG
1// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file. See the AUTHORS file for names of contributors.
f67539c2 4// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
7c673cae
FG
5#pragma once
6
7#include <stdint.h>
1e59de90 8
7c673cae 9#include <map>
1e59de90 10#include <memory>
11fdf7f2 11#include <string>
1e59de90
TL
12
13#include "rocksdb/customizable.h"
7c673cae
FG
14#include "rocksdb/status.h"
15#include "rocksdb/types.h"
16
f67539c2 17namespace ROCKSDB_NAMESPACE {
7c673cae
FG
18
19// -- Table Properties
20// Other than basic table properties, each table may also have the user
21// collected properties.
22// The value of the user-collected properties are encoded as raw bytes --
11fdf7f2 23// users have to interpret these values by themselves.
7c673cae
FG
24// Note: To do prefix seek/scan in `UserCollectedProperties`, you can do
25// something similar to:
26//
27// UserCollectedProperties props = ...;
28// for (auto pos = props.lower_bound(prefix);
29// pos != props.end() && pos->first.compare(0, prefix.size(), prefix) == 0;
30// ++pos) {
31// ...
32// }
1e59de90 33using UserCollectedProperties = std::map<std::string, std::string>;
7c673cae
FG
34
35// table properties' human-readable names in the property block.
36struct TablePropertiesNames {
20effc67
TL
37 static const std::string kDbId;
38 static const std::string kDbSessionId;
39 static const std::string kDbHostId;
1e59de90 40 static const std::string kOriginalFileNumber;
7c673cae
FG
41 static const std::string kDataSize;
42 static const std::string kIndexSize;
11fdf7f2
TL
43 static const std::string kIndexPartitions;
44 static const std::string kTopLevelIndexSize;
45 static const std::string kIndexKeyIsUserKey;
46 static const std::string kIndexValueIsDeltaEncoded;
7c673cae
FG
47 static const std::string kFilterSize;
48 static const std::string kRawKeySize;
49 static const std::string kRawValueSize;
50 static const std::string kNumDataBlocks;
51 static const std::string kNumEntries;
1e59de90 52 static const std::string kNumFilterEntries;
494da23a
TL
53 static const std::string kDeletedKeys;
54 static const std::string kMergeOperands;
11fdf7f2 55 static const std::string kNumRangeDeletions;
7c673cae
FG
56 static const std::string kFormatVersion;
57 static const std::string kFixedKeyLen;
58 static const std::string kFilterPolicy;
59 static const std::string kColumnFamilyName;
60 static const std::string kColumnFamilyId;
61 static const std::string kComparator;
62 static const std::string kMergeOperator;
63 static const std::string kPrefixExtractorName;
64 static const std::string kPropertyCollectors;
65 static const std::string kCompression;
494da23a 66 static const std::string kCompressionOptions;
11fdf7f2
TL
67 static const std::string kCreationTime;
68 static const std::string kOldestKeyTime;
f67539c2 69 static const std::string kFileCreationTime;
1e59de90
TL
70 static const std::string kSlowCompressionEstimatedDataSize;
71 static const std::string kFastCompressionEstimatedDataSize;
72 static const std::string kSequenceNumberTimeMapping;
7c673cae
FG
73};
74
7c673cae
FG
75// `TablePropertiesCollector` provides the mechanism for users to collect
76// their own properties that they are interested in. This class is essentially
77// a collection of callback functions that will be invoked during table
11fdf7f2 78// building. It is constructed with TablePropertiesCollectorFactory. The methods
7c673cae 79// don't need to be thread-safe, as we will create exactly one
1e59de90
TL
80// TablePropertiesCollector object per table and then call it sequentially.
81//
82// Statuses from these callbacks are currently logged when not OK, but
83// otherwise ignored by RocksDB.
84//
85// Exceptions MUST NOT propagate out of overridden functions into RocksDB,
86// because RocksDB is not exception-safe. This could cause undefined behavior
87// including data loss, unreported corruption, deadlocks, and more.
7c673cae
FG
88class TablePropertiesCollector {
89 public:
90 virtual ~TablePropertiesCollector() {}
91
92 // DEPRECATE User defined collector should implement AddUserKey(), though
93 // this old function still works for backward compatible reason.
94 // Add() will be called when a new key/value pair is inserted into the table.
95 // @params key the user key that is inserted into the table.
96 // @params value the value that is inserted into the table.
97 virtual Status Add(const Slice& /*key*/, const Slice& /*value*/) {
98 return Status::InvalidArgument(
99 "TablePropertiesCollector::Add() deprecated.");
100 }
101
102 // AddUserKey() will be called when a new key/value pair is inserted into the
103 // table.
104 // @params key the user key that is inserted into the table.
105 // @params value the value that is inserted into the table.
106 virtual Status AddUserKey(const Slice& key, const Slice& value,
107 EntryType /*type*/, SequenceNumber /*seq*/,
108 uint64_t /*file_size*/) {
109 // For backwards-compatibility.
110 return Add(key, value);
111 }
112
494da23a 113 // Called after each new block is cut
1e59de90
TL
114 virtual void BlockAdd(uint64_t /* block_uncomp_bytes */,
115 uint64_t /* block_compressed_bytes_fast */,
116 uint64_t /* block_compressed_bytes_slow */) {
494da23a
TL
117 // Nothing to do here. Callback registers can override.
118 return;
119 }
120
7c673cae
FG
121 // Finish() will be called when a table has already been built and is ready
122 // for writing the properties block.
123 // @params properties User will add their collected statistics to
124 // `properties`.
125 virtual Status Finish(UserCollectedProperties* properties) = 0;
126
127 // Return the human-readable properties, where the key is property name and
128 // the value is the human-readable form of value.
129 virtual UserCollectedProperties GetReadableProperties() const = 0;
130
131 // The name of the properties collector can be used for debugging purpose.
132 virtual const char* Name() const = 0;
133
134 // EXPERIMENTAL Return whether the output file should be further compacted
135 virtual bool NeedCompact() const { return false; }
136};
137
138// Constructs TablePropertiesCollector. Internals create a new
139// TablePropertiesCollector for each new table
1e59de90
TL
140//
141// Exceptions MUST NOT propagate out of overridden functions into RocksDB,
142// because RocksDB is not exception-safe. This could cause undefined behavior
143// including data loss, unreported corruption, deadlocks, and more.
144class TablePropertiesCollectorFactory : public Customizable {
7c673cae
FG
145 public:
146 struct Context {
147 uint32_t column_family_id;
1e59de90
TL
148 // The level at creating the SST file (i.e, table), of which the
149 // properties are being collected.
150 int level_at_creation = kUnknownLevelAtCreation;
7c673cae 151 static const uint32_t kUnknownColumnFamily;
1e59de90 152 static const int kUnknownLevelAtCreation = -1;
7c673cae
FG
153 };
154
1e59de90
TL
155 ~TablePropertiesCollectorFactory() override {}
156 static const char* Type() { return "TablePropertiesCollectorFactory"; }
157 static Status CreateFromString(
158 const ConfigOptions& options, const std::string& value,
159 std::shared_ptr<TablePropertiesCollectorFactory>* result);
160
7c673cae
FG
161 // has to be thread-safe
162 virtual TablePropertiesCollector* CreateTablePropertiesCollector(
163 TablePropertiesCollectorFactory::Context context) = 0;
164
165 // The name of the properties collector can be used for debugging purpose.
1e59de90 166 const char* Name() const override = 0;
20effc67
TL
167
168 // Can be overridden by sub-classes to return the Name, followed by
169 // configuration info that will // be logged to the info log when the
170 // DB is opened
171 virtual std::string ToString() const { return Name(); }
7c673cae
FG
172};
173
174// TableProperties contains a bunch of read-only properties of its associated
175// table.
176struct TableProperties {
177 public:
1e59de90
TL
178 // the file number at creation time, or 0 for unknown. When known,
179 // combining with db_session_id must uniquely identify an SST file.
180 uint64_t orig_file_number = 0;
7c673cae
FG
181 // the total size of all data blocks.
182 uint64_t data_size = 0;
183 // the size of index block.
184 uint64_t index_size = 0;
11fdf7f2
TL
185 // Total number of index partitions if kTwoLevelIndexSearch is used
186 uint64_t index_partitions = 0;
187 // Size of the top-level index if kTwoLevelIndexSearch is used
188 uint64_t top_level_index_size = 0;
189 // Whether the index key is user key. Otherwise it includes 8 byte of sequence
190 // number added by internal key format.
191 uint64_t index_key_is_user_key = 0;
192 // Whether delta encoding is used to encode the index values.
193 uint64_t index_value_is_delta_encoded = 0;
7c673cae
FG
194 // the size of filter block.
195 uint64_t filter_size = 0;
1e59de90 196 // total raw (uncompressed, undelineated) key size
7c673cae 197 uint64_t raw_key_size = 0;
1e59de90 198 // total raw (uncompressed, undelineated) value size
7c673cae
FG
199 uint64_t raw_value_size = 0;
200 // the number of blocks in this table
201 uint64_t num_data_blocks = 0;
202 // the number of entries in this table
203 uint64_t num_entries = 0;
1e59de90
TL
204 // the number of unique entries (keys or prefixes) added to filters
205 uint64_t num_filter_entries = 0;
494da23a
TL
206 // the number of deletions in the table
207 uint64_t num_deletions = 0;
208 // the number of merge operands in the table
209 uint64_t num_merge_operands = 0;
11fdf7f2
TL
210 // the number of range deletions in this table
211 uint64_t num_range_deletions = 0;
7c673cae
FG
212 // format version, reserved for backward compatibility
213 uint64_t format_version = 0;
214 // If 0, key is variable length. Otherwise number of bytes for each key.
215 uint64_t fixed_key_len = 0;
216 // ID of column family for this SST file, corresponding to the CF identified
217 // by column_family_name.
f67539c2
TL
218 uint64_t column_family_id = ROCKSDB_NAMESPACE::
219 TablePropertiesCollectorFactory::Context::kUnknownColumnFamily;
220 // Timestamp of the latest key. 0 means unknown.
221 // TODO(sagar0): Should be changed to latest_key_time ... but don't know the
222 // full implications of backward compatibility. Hence retaining for now.
11fdf7f2 223 uint64_t creation_time = 0;
1e59de90 224
11fdf7f2
TL
225 // Timestamp of the earliest key. 0 means unknown.
226 uint64_t oldest_key_time = 0;
f67539c2
TL
227 // Actual SST file creation time. 0 means unknown.
228 uint64_t file_creation_time = 0;
1e59de90
TL
229 // Estimated size of data blocks if compressed using a relatively slower
230 // compression algorithm (see `ColumnFamilyOptions::sample_for_compression`).
231 // 0 means unknown.
232 uint64_t slow_compression_estimated_data_size = 0;
233 // Estimated size of data blocks if compressed using a relatively faster
234 // compression algorithm (see `ColumnFamilyOptions::sample_for_compression`).
235 // 0 means unknown.
236 uint64_t fast_compression_estimated_data_size = 0;
237 // Offset of the value of the property "external sst file global seqno" in the
238 // file if the property exists.
239 // 0 means not exists.
240 uint64_t external_sst_file_global_seqno_offset = 0;
7c673cae 241
20effc67
TL
242 // DB identity
243 // db_id is an identifier generated the first time the DB is created
244 // If DB identity is unset or unassigned, `db_id` will be an empty string.
245 std::string db_id;
246
247 // DB session identity
248 // db_session_id is an identifier that gets reset every time the DB is opened
249 // If DB session identity is unset or unassigned, `db_session_id` will be an
250 // empty string.
251 std::string db_session_id;
252
253 // Location of the machine hosting the DB instance
254 // db_host_id identifies the location of the host in some form
255 // (hostname by default, but can also be any string of the user's choosing).
256 // It can potentially change whenever the DB is opened
257 std::string db_host_id;
258
7c673cae
FG
259 // Name of the column family with which this SST file is associated.
260 // If column family is unknown, `column_family_name` will be an empty string.
261 std::string column_family_name;
262
263 // The name of the filter policy used in this table.
264 // If no filter policy is used, `filter_policy_name` will be an empty string.
265 std::string filter_policy_name;
266
267 // The name of the comparator used in this table.
268 std::string comparator_name;
269
270 // The name of the merge operator used in this table.
271 // If no merge operator is used, `merge_operator_name` will be "nullptr".
272 std::string merge_operator_name;
273
274 // The name of the prefix extractor used in this table
275 // If no prefix extractor is used, `prefix_extractor_name` will be "nullptr".
276 std::string prefix_extractor_name;
277
278 // The names of the property collectors factories used in this table
279 // separated by commas
280 // {collector_name[1]},{collector_name[2]},{collector_name[3]} ..
281 std::string property_collectors_names;
282
283 // The compression algo used to compress the SST files.
284 std::string compression_name;
285
494da23a
TL
286 // Compression options used to compress the SST files.
287 std::string compression_options;
288
1e59de90
TL
289 // Sequence number to time mapping, delta encoded.
290 std::string seqno_to_time_mapping;
291
7c673cae
FG
292 // user collected properties
293 UserCollectedProperties user_collected_properties;
294 UserCollectedProperties readable_properties;
295
7c673cae
FG
296 // convert this object to a human readable form
297 // @prop_delim: delimiter for each property.
298 std::string ToString(const std::string& prop_delim = "; ",
299 const std::string& kv_delim = "=") const;
300
301 // Aggregate the numerical member variables of the specified
302 // TableProperties.
303 void Add(const TableProperties& tp);
1e59de90
TL
304
305 // Subset of properties that make sense when added together
306 // between tables. Keys match field names in this class instead
307 // of using full property names.
308 std::map<std::string, uint64_t> GetAggregatablePropertiesAsMap() const;
309
310 // Return the approximated memory usage of this TableProperties object,
311 // including memory used by the string properties and UserCollectedProperties
312 std::size_t ApproximateMemoryUsage() const;
7c673cae
FG
313};
314
315// Extra properties
316// Below is a list of non-basic properties that are collected by database
317// itself. Especially some properties regarding to the internal keys (which
318// is unknown to `table`).
494da23a
TL
319//
320// DEPRECATED: these properties now belong as TableProperties members. Please
321// use TableProperties::num_deletions and TableProperties::num_merge_operands,
322// respectively.
7c673cae
FG
323extern uint64_t GetDeletedKeys(const UserCollectedProperties& props);
324extern uint64_t GetMergeOperands(const UserCollectedProperties& props,
325 bool* property_present);
326
f67539c2 327} // namespace ROCKSDB_NAMESPACE