]> git.proxmox.com Git - ceph.git/blame - ceph/src/rocksdb/utilities/write_batch_with_index/write_batch_with_index_internal.h
import quincy beta 17.1.0
[ceph.git] / ceph / src / rocksdb / utilities / write_batch_with_index / write_batch_with_index_internal.h
CommitLineData
7c673cae 1// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
11fdf7f2
TL
2// This source code is licensed under both the GPLv2 (found in the
3// COPYING file in the root directory) and Apache 2.0 License
4// (found in the LICENSE.Apache file in the root directory).
7c673cae
FG
5#pragma once
6
7#ifndef ROCKSDB_LITE
8
9#include <limits>
10#include <string>
11#include <vector>
12
13#include "options/db_options.h"
14#include "port/port.h"
15#include "rocksdb/comparator.h"
16#include "rocksdb/iterator.h"
17#include "rocksdb/slice.h"
18#include "rocksdb/status.h"
19#include "rocksdb/utilities/write_batch_with_index.h"
20
f67539c2 21namespace ROCKSDB_NAMESPACE {
7c673cae
FG
22
23class MergeContext;
24struct Options;
25
26// Key used by skip list, as the binary searchable index of WriteBatchWithIndex.
27struct WriteBatchIndexEntry {
28 WriteBatchIndexEntry(size_t o, uint32_t c, size_t ko, size_t ksz)
29 : offset(o),
30 column_family(c),
31 key_offset(ko),
32 key_size(ksz),
33 search_key(nullptr) {}
11fdf7f2
TL
34 // Create a dummy entry as the search key. This index entry won't be backed
35 // by an entry from the write batch, but a pointer to the search key. Or a
36 // special flag of offset can indicate we are seek to first.
37 // @_search_key: the search key
38 // @_column_family: column family
39 // @is_forward_direction: true for Seek(). False for SeekForPrev()
40 // @is_seek_to_first: true if we seek to the beginning of the column family
41 // _search_key should be null in this case.
42 WriteBatchIndexEntry(const Slice* _search_key, uint32_t _column_family,
43 bool is_forward_direction, bool is_seek_to_first)
44 // For SeekForPrev(), we need to make the dummy entry larger than any
45 // entry who has the same search key. Otherwise, we'll miss those entries.
46 : offset(is_forward_direction ? 0 : port::kMaxSizet),
47 column_family(_column_family),
7c673cae 48 key_offset(0),
11fdf7f2
TL
49 key_size(is_seek_to_first ? kFlagMinInCf : 0),
50 search_key(_search_key) {
51 assert(_search_key != nullptr || is_seek_to_first);
52 }
53
54 // If this flag appears in the key_size, it indicates a
55 // key that is smaller than any other entry for the same column family.
56 static const size_t kFlagMinInCf = port::kMaxSizet;
7c673cae 57
11fdf7f2
TL
58 bool is_min_in_cf() const {
59 assert(key_size != kFlagMinInCf ||
60 (key_offset == 0 && search_key == nullptr));
61 return key_size == kFlagMinInCf;
62 }
7c673cae 63
11fdf7f2
TL
64 // offset of an entry in write batch's string buffer. If this is a dummy
65 // lookup key, in which case search_key != nullptr, offset is set to either
66 // 0 or max, only for comparison purpose. Because when entries have the same
67 // key, the entry with larger offset is larger, offset = 0 will make a seek
68 // key small or equal than all the entries with the seek key, so that Seek()
69 // will find all the entries of the same key. Similarly, offset = MAX will
70 // make the entry just larger than all entries with the search key so
71 // SeekForPrev() will see all the keys with the same key.
72 size_t offset;
73 uint32_t column_family; // c1olumn family of the entry.
7c673cae 74 size_t key_offset; // offset of the key in write batch's string buffer.
11fdf7f2
TL
75 size_t key_size; // size of the key. kFlagMinInCf indicates
76 // that this is a dummy look up entry for
77 // SeekToFirst() to the beginning of the column
78 // family. We use the flag here to save a boolean
79 // in the struct.
7c673cae
FG
80
81 const Slice* search_key; // if not null, instead of reading keys from
82 // write batch, use it to compare. This is used
83 // for lookup key.
84};
85
86class ReadableWriteBatch : public WriteBatch {
87 public:
88 explicit ReadableWriteBatch(size_t reserved_bytes = 0, size_t max_bytes = 0)
89 : WriteBatch(reserved_bytes, max_bytes) {}
90 // Retrieve some information from a write entry in the write batch, given
91 // the start offset of the write entry.
92 Status GetEntryFromDataOffset(size_t data_offset, WriteType* type, Slice* Key,
93 Slice* value, Slice* blob, Slice* xid) const;
94};
95
96class WriteBatchEntryComparator {
97 public:
98 WriteBatchEntryComparator(const Comparator* _default_comparator,
99 const ReadableWriteBatch* write_batch)
100 : default_comparator_(_default_comparator), write_batch_(write_batch) {}
101 // Compare a and b. Return a negative value if a is less than b, 0 if they
102 // are equal, and a positive value if a is greater than b
103 int operator()(const WriteBatchIndexEntry* entry1,
104 const WriteBatchIndexEntry* entry2) const;
105
106 int CompareKey(uint32_t column_family, const Slice& key1,
107 const Slice& key2) const;
108
109 void SetComparatorForCF(uint32_t column_family_id,
110 const Comparator* comparator) {
111 if (column_family_id >= cf_comparators_.size()) {
112 cf_comparators_.resize(column_family_id + 1, nullptr);
113 }
114 cf_comparators_[column_family_id] = comparator;
115 }
116
117 const Comparator* default_comparator() { return default_comparator_; }
118
119 private:
120 const Comparator* default_comparator_;
121 std::vector<const Comparator*> cf_comparators_;
122 const ReadableWriteBatch* write_batch_;
123};
124
125class WriteBatchWithIndexInternal {
126 public:
127 enum Result { kFound, kDeleted, kNotFound, kMergeInProgress, kError };
128
129 // If batch contains a value for key, store it in *value and return kFound.
130 // If batch contains a deletion for key, return Deleted.
131 // If batch contains Merge operations as the most recent entry for a key,
132 // and the merge process does not stop (not reaching a value or delete),
133 // prepend the current merge operands to *operands,
134 // and return kMergeInProgress
135 // If batch does not contain this key, return kNotFound
136 // Else, return kError on error with error Status stored in *s.
137 static WriteBatchWithIndexInternal::Result GetFromBatch(
138 const ImmutableDBOptions& ioptions, WriteBatchWithIndex* batch,
139 ColumnFamilyHandle* column_family, const Slice& key,
140 MergeContext* merge_context, WriteBatchEntryComparator* cmp,
141 std::string* value, bool overwrite_key, Status* s);
142};
143
f67539c2 144} // namespace ROCKSDB_NAMESPACE
7c673cae 145#endif // !ROCKSDB_LITE