]> git.proxmox.com Git - ceph.git/blob - ceph/src/rocksdb/utilities/redis/redis_list_iterator.h
update sources to ceph Nautilus 14.2.1
[ceph.git] / ceph / src / rocksdb / utilities / redis / redis_list_iterator.h
1 // Copyright 2013 Facebook
2 /**
3 * RedisListIterator:
4 * An abstraction over the "list" concept (e.g.: for redis lists).
5 * Provides functionality to read, traverse, edit, and write these lists.
6 *
7 * Upon construction, the RedisListIterator is given a block of list data.
8 * Internally, it stores a pointer to the data and a pointer to current item.
9 * It also stores a "result" list that will be mutated over time.
10 *
11 * Traversal and mutation are done by "forward iteration".
12 * The Push() and Skip() methods will advance the iterator to the next item.
13 * However, Push() will also "write the current item to the result".
14 * Skip() will simply move to next item, causing current item to be dropped.
15 *
16 * Upon completion, the result (accessible by WriteResult()) will be saved.
17 * All "skipped" items will be gone; all "pushed" items will remain.
18 *
19 * @throws Any of the operations may throw a RedisListException if an invalid
20 * operation is performed or if the data is found to be corrupt.
21 *
22 * @notes By default, if WriteResult() is called part-way through iteration,
23 * it will automatically advance the iterator to the end, and Keep()
24 * all items that haven't been traversed yet. This may be subject
25 * to review.
26 *
27 * @notes Can access the "current" item via GetCurrent(), and other
28 * list-specific information such as Length().
29 *
30 * @notes The internal representation is due to change at any time. Presently,
31 * the list is represented as follows:
32 * - 32-bit integer header: the number of items in the list
33 * - For each item:
34 * - 32-bit int (n): the number of bytes representing this item
35 * - n bytes of data: the actual data.
36 *
37 * @author Deon Nicholas (dnicholas@fb.com)
38 */
39
40 #pragma once
41 #ifndef ROCKSDB_LITE
42
43 #include <string>
44
45 #include "redis_list_exception.h"
46 #include "rocksdb/slice.h"
47 #include "util/coding.h"
48
49 namespace rocksdb {
50
51 /// An abstraction over the "list" concept.
52 /// All operations may throw a RedisListException
53 class RedisListIterator {
54 public:
55 /// Construct a redis-list-iterator based on data.
56 /// If the data is non-empty, it must formatted according to @notes above.
57 ///
58 /// If the data is valid, we can assume the following invariant(s):
59 /// a) length_, num_bytes_ are set correctly.
60 /// b) cur_byte_ always refers to the start of the current element,
61 /// just before the bytes that specify element length.
62 /// c) cur_elem_ is always the index of the current element.
63 /// d) cur_elem_length_ is always the number of bytes in current element,
64 /// excluding the 4-byte header itself.
65 /// e) result_ will always contain data_[0..cur_byte_) and a header
66 /// f) Whenever corrupt data is encountered or an invalid operation is
67 /// attempted, a RedisListException will immediately be thrown.
68 explicit RedisListIterator(const std::string& list_data)
69 : data_(list_data.data()),
70 num_bytes_(static_cast<uint32_t>(list_data.size())),
71 cur_byte_(0),
72 cur_elem_(0),
73 cur_elem_length_(0),
74 length_(0),
75 result_() {
76 // Initialize the result_ (reserve enough space for header)
77 InitializeResult();
78
79 // Parse the data only if it is not empty.
80 if (num_bytes_ == 0) {
81 return;
82 }
83
84 // If non-empty, but less than 4 bytes, data must be corrupt
85 if (num_bytes_ < sizeof(length_)) {
86 ThrowError("Corrupt header."); // Will break control flow
87 }
88
89 // Good. The first bytes specify the number of elements
90 length_ = DecodeFixed32(data_);
91 cur_byte_ = sizeof(length_);
92
93 // If we have at least one element, point to that element.
94 // Also, read the first integer of the element (specifying the size),
95 // if possible.
96 if (length_ > 0) {
97 if (cur_byte_ + sizeof(cur_elem_length_) <= num_bytes_) {
98 cur_elem_length_ = DecodeFixed32(data_+cur_byte_);
99 } else {
100 ThrowError("Corrupt data for first element.");
101 }
102 }
103
104 // At this point, we are fully set-up.
105 // The invariants described in the header should now be true.
106 }
107
108 /// Reserve some space for the result_.
109 /// Equivalent to result_.reserve(bytes).
110 void Reserve(int bytes) {
111 result_.reserve(bytes);
112 }
113
114 /// Go to next element in data file.
115 /// Also writes the current element to result_.
116 RedisListIterator& Push() {
117 WriteCurrentElement();
118 MoveNext();
119 return *this;
120 }
121
122 /// Go to next element in data file.
123 /// Drops/skips the current element. It will not be written to result_.
124 RedisListIterator& Skip() {
125 MoveNext();
126 --length_; // One less item
127 --cur_elem_; // We moved one forward, but index did not change
128 return *this;
129 }
130
131 /// Insert elem into the result_ (just BEFORE the current element / byte)
132 /// Note: if Done() (i.e.: iterator points to end), this will append elem.
133 void InsertElement(const Slice& elem) {
134 // Ensure we are in a valid state
135 CheckErrors();
136
137 const int kOrigSize = static_cast<int>(result_.size());
138 result_.resize(kOrigSize + SizeOf(elem));
139 EncodeFixed32(result_.data() + kOrigSize,
140 static_cast<uint32_t>(elem.size()));
141 memcpy(result_.data() + kOrigSize + sizeof(uint32_t), elem.data(),
142 elem.size());
143 ++length_;
144 ++cur_elem_;
145 }
146
147 /// Access the current element, and save the result into *curElem
148 void GetCurrent(Slice* curElem) {
149 // Ensure we are in a valid state
150 CheckErrors();
151
152 // Ensure that we are not past the last element.
153 if (Done()) {
154 ThrowError("Invalid dereferencing.");
155 }
156
157 // Dereference the element
158 *curElem = Slice(data_+cur_byte_+sizeof(cur_elem_length_),
159 cur_elem_length_);
160 }
161
162 // Number of elements
163 int Length() const {
164 return length_;
165 }
166
167 // Number of bytes in the final representation (i.e: WriteResult().size())
168 int Size() const {
169 // result_ holds the currently written data
170 // data_[cur_byte..num_bytes-1] is the remainder of the data
171 return static_cast<int>(result_.size() + (num_bytes_ - cur_byte_));
172 }
173
174 // Reached the end?
175 bool Done() const {
176 return cur_byte_ >= num_bytes_ || cur_elem_ >= length_;
177 }
178
179 /// Returns a string representing the final, edited, data.
180 /// Assumes that all bytes of data_ in the range [0,cur_byte_) have been read
181 /// and that result_ contains this data.
182 /// The rest of the data must still be written.
183 /// So, this method ADVANCES THE ITERATOR TO THE END before writing.
184 Slice WriteResult() {
185 CheckErrors();
186
187 // The header should currently be filled with dummy data (0's)
188 // Correctly update the header.
189 // Note, this is safe since result_ is a vector (guaranteed contiguous)
190 EncodeFixed32(&result_[0],length_);
191
192 // Append the remainder of the data to the result.
193 result_.insert(result_.end(),data_+cur_byte_, data_ +num_bytes_);
194
195 // Seek to end of file
196 cur_byte_ = num_bytes_;
197 cur_elem_ = length_;
198 cur_elem_length_ = 0;
199
200 // Return the result
201 return Slice(result_.data(),result_.size());
202 }
203
204 public: // Static public functions
205
206 /// An upper-bound on the amount of bytes needed to store this element.
207 /// This is used to hide representation information from the client.
208 /// E.G. This can be used to compute the bytes we want to Reserve().
209 static uint32_t SizeOf(const Slice& elem) {
210 // [Integer Length . Data]
211 return static_cast<uint32_t>(sizeof(uint32_t) + elem.size());
212 }
213
214 private: // Private functions
215
216 /// Initializes the result_ string.
217 /// It will fill the first few bytes with 0's so that there is
218 /// enough space for header information when we need to write later.
219 /// Currently, "header information" means: the length (number of elements)
220 /// Assumes that result_ is empty to begin with
221 void InitializeResult() {
222 assert(result_.empty()); // Should always be true.
223 result_.resize(sizeof(uint32_t),0); // Put a block of 0's as the header
224 }
225
226 /// Go to the next element (used in Push() and Skip())
227 void MoveNext() {
228 CheckErrors();
229
230 // Check to make sure we are not already in a finished state
231 if (Done()) {
232 ThrowError("Attempting to iterate past end of list.");
233 }
234
235 // Move forward one element.
236 cur_byte_ += sizeof(cur_elem_length_) + cur_elem_length_;
237 ++cur_elem_;
238
239 // If we are at the end, finish
240 if (Done()) {
241 cur_elem_length_ = 0;
242 return;
243 }
244
245 // Otherwise, we should be able to read the new element's length
246 if (cur_byte_ + sizeof(cur_elem_length_) > num_bytes_) {
247 ThrowError("Corrupt element data.");
248 }
249
250 // Set the new element's length
251 cur_elem_length_ = DecodeFixed32(data_+cur_byte_);
252
253 return;
254 }
255
256 /// Append the current element (pointed to by cur_byte_) to result_
257 /// Assumes result_ has already been reserved appropriately.
258 void WriteCurrentElement() {
259 // First verify that the iterator is still valid.
260 CheckErrors();
261 if (Done()) {
262 ThrowError("Attempting to write invalid element.");
263 }
264
265 // Append the cur element.
266 result_.insert(result_.end(),
267 data_+cur_byte_,
268 data_+cur_byte_+ sizeof(uint32_t) + cur_elem_length_);
269 }
270
271 /// Will ThrowError() if necessary.
272 /// Checks for common/ubiquitous errors that can arise after most operations.
273 /// This method should be called before any reading operation.
274 /// If this function succeeds, then we are guaranteed to be in a valid state.
275 /// Other member functions should check for errors and ThrowError() also
276 /// if an error occurs that is specific to it even while in a valid state.
277 void CheckErrors() {
278 // Check if any crazy thing has happened recently
279 if ((cur_elem_ > length_) || // Bad index
280 (cur_byte_ > num_bytes_) || // No more bytes
281 (cur_byte_ + cur_elem_length_ > num_bytes_) || // Item too large
282 (cur_byte_ == num_bytes_ && cur_elem_ != length_) || // Too many items
283 (cur_elem_ == length_ && cur_byte_ != num_bytes_)) { // Too many bytes
284 ThrowError("Corrupt data.");
285 }
286 }
287
288 /// Will throw an exception based on the passed-in message.
289 /// This function is guaranteed to STOP THE CONTROL-FLOW.
290 /// (i.e.: you do not have to call "return" after calling ThrowError)
291 void ThrowError(const char* const /*msg*/ = nullptr) {
292 // TODO: For now we ignore the msg parameter. This can be expanded later.
293 throw RedisListException();
294 }
295
296 private:
297 const char* const data_; // A pointer to the data (the first byte)
298 const uint32_t num_bytes_; // The number of bytes in this list
299
300 uint32_t cur_byte_; // The current byte being read
301 uint32_t cur_elem_; // The current element being read
302 uint32_t cur_elem_length_; // The number of bytes in current element
303
304 uint32_t length_; // The number of elements in this list
305 std::vector<char> result_; // The output data
306 };
307
308 } // namespace rocksdb
309 #endif // ROCKSDB_LITE