]> git.proxmox.com Git - ceph.git/blob - ceph/src/common/bit_vector.hpp
import ceph nautilus 14.2.2
[ceph.git] / ceph / src / common / bit_vector.hpp
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3 /*
4 * Ceph - scalable distributed file system
5 *
6 * Copyright (C) 2014 Red Hat <contact@redhat.com>
7 *
8 * LGPL2.1 (see COPYING-LGPL2.1) or later
9 */
10
11 #ifndef BIT_VECTOR_HPP
12 #define BIT_VECTOR_HPP
13
14 #include "common/Formatter.h"
15 #include "include/ceph_assert.h"
16 #include "include/encoding.h"
17 #include <memory>
18 #include <utility>
19 #include <vector>
20
21 namespace ceph {
22
23 template <uint8_t _bit_count>
24 class BitVector
25 {
26 private:
27 static const uint8_t BITS_PER_BYTE = 8;
28 static const uint32_t ELEMENTS_PER_BLOCK = BITS_PER_BYTE / _bit_count;
29 static const uint8_t MASK = static_cast<uint8_t>((1 << _bit_count) - 1);
30
31 // must be power of 2
32 BOOST_STATIC_ASSERT((_bit_count != 0) && !(_bit_count & (_bit_count - 1)));
33 BOOST_STATIC_ASSERT(_bit_count <= BITS_PER_BYTE);
34
35 template <typename DataIterator>
36 class ReferenceImpl {
37 protected:
38 DataIterator m_data_iterator;
39 uint64_t m_shift;
40
41 ReferenceImpl(const DataIterator& data_iterator, uint64_t shift)
42 : m_data_iterator(data_iterator), m_shift(shift) {
43 }
44 ReferenceImpl(DataIterator&& data_iterator, uint64_t shift)
45 : m_data_iterator(std::move(data_iterator)), m_shift(shift) {
46 }
47
48 public:
49 inline operator uint8_t() const {
50 return (*m_data_iterator >> m_shift) & MASK;
51 }
52 };
53
54 public:
55
56 class ConstReference : public ReferenceImpl<bufferlist::const_iterator> {
57 private:
58 friend class BitVector;
59
60 ConstReference(const bufferlist::const_iterator& data_iterator,
61 uint64_t shift)
62 : ReferenceImpl<bufferlist::const_iterator>(data_iterator, shift) {
63 }
64 ConstReference(bufferlist::const_iterator&& data_iterator, uint64_t shift)
65 : ReferenceImpl<bufferlist::const_iterator>(std::move(data_iterator),
66 shift) {
67 }
68 };
69
70 class Reference : public ReferenceImpl<bufferlist::iterator> {
71 public:
72 Reference& operator=(uint8_t v);
73
74 private:
75 friend class BitVector;
76
77 Reference(const bufferlist::iterator& data_iterator, uint64_t shift)
78 : ReferenceImpl<bufferlist::iterator>(data_iterator, shift) {
79 }
80 Reference(bufferlist::iterator&& data_iterator, uint64_t shift)
81 : ReferenceImpl<bufferlist::iterator>(std::move(data_iterator), shift) {
82 }
83 };
84
85 public:
86 template <typename BitVectorT, typename DataIterator>
87 class IteratorImpl {
88 private:
89 friend class BitVector;
90
91 uint64_t m_offset = 0;
92 BitVectorT *m_bit_vector;
93
94 // cached derived values
95 uint64_t m_index = 0;
96 uint64_t m_shift = 0;
97 DataIterator m_data_iterator;
98
99 IteratorImpl(BitVectorT *bit_vector, uint64_t offset)
100 : m_bit_vector(bit_vector),
101 m_data_iterator(bit_vector->m_data.begin()) {
102 *this += offset;
103 }
104
105 public:
106 inline IteratorImpl& operator++() {
107 ++m_offset;
108
109 uint64_t index;
110 compute_index(m_offset, &index, &m_shift);
111
112 ceph_assert(index == m_index || index == m_index + 1);
113 if (index > m_index) {
114 m_index = index;
115 ++m_data_iterator;
116 }
117 return *this;
118 }
119 inline IteratorImpl& operator+=(uint64_t offset) {
120 m_offset += offset;
121 compute_index(m_offset, &m_index, &m_shift);
122 if (m_offset < m_bit_vector->size()) {
123 m_data_iterator.seek(m_index);
124 } else {
125 m_data_iterator = m_bit_vector->m_data.end();
126 }
127 return *this;
128 }
129
130 inline IteratorImpl operator++(int) {
131 IteratorImpl iterator_impl(*this);
132 ++iterator_impl;
133 return iterator_impl;
134 }
135 inline IteratorImpl operator+(uint64_t offset) {
136 IteratorImpl iterator_impl(*this);
137 iterator_impl += offset;
138 return iterator_impl;
139 }
140
141 inline bool operator==(const IteratorImpl& rhs) const {
142 return (m_offset == rhs.m_offset && m_bit_vector == rhs.m_bit_vector);
143 }
144 inline bool operator!=(const IteratorImpl& rhs) const {
145 return (m_offset != rhs.m_offset || m_bit_vector != rhs.m_bit_vector);
146 }
147
148 inline ConstReference operator*() const {
149 return ConstReference(m_data_iterator, m_shift);
150 }
151 inline Reference operator*() {
152 return Reference(m_data_iterator, m_shift);
153 }
154 };
155
156 typedef IteratorImpl<const BitVector,
157 bufferlist::const_iterator> ConstIterator;
158 typedef IteratorImpl<BitVector, bufferlist::iterator> Iterator;
159
160 static const uint32_t BLOCK_SIZE;
161 static const uint8_t BIT_COUNT = _bit_count;
162
163 BitVector();
164
165 inline ConstIterator begin() const {
166 return ConstIterator(this, 0);
167 }
168 inline ConstIterator end() const {
169 return ConstIterator(this, m_size);
170 }
171 inline Iterator begin() {
172 return Iterator(this, 0);
173 }
174 inline Iterator end() {
175 return Iterator(this, m_size);
176 }
177
178 void set_crc_enabled(bool enabled) {
179 m_crc_enabled = enabled;
180 }
181 void clear();
182
183 void resize(uint64_t elements);
184 uint64_t size() const;
185
186 const bufferlist& get_data() const;
187
188 Reference operator[](uint64_t offset);
189 ConstReference operator[](uint64_t offset) const;
190
191 void encode_header(bufferlist& bl) const;
192 void decode_header(bufferlist::const_iterator& it);
193 uint64_t get_header_length() const;
194
195 void encode_data(bufferlist& bl, uint64_t data_byte_offset,
196 uint64_t byte_length) const;
197 void decode_data(bufferlist::const_iterator& it, uint64_t data_byte_offset);
198 void get_data_extents(uint64_t offset, uint64_t length,
199 uint64_t *data_byte_offset,
200 uint64_t *object_byte_offset,
201 uint64_t *byte_length) const;
202
203 void encode_footer(bufferlist& bl) const;
204 void decode_footer(bufferlist::const_iterator& it);
205 uint64_t get_footer_offset() const;
206
207 void decode_header_crc(bufferlist::const_iterator& it);
208 void get_header_crc_extents(uint64_t *byte_offset,
209 uint64_t *byte_length) const;
210
211 void encode_data_crcs(bufferlist& bl, uint64_t offset,
212 uint64_t length) const;
213 void decode_data_crcs(bufferlist::const_iterator& it, uint64_t offset);
214 void get_data_crcs_extents(uint64_t offset, uint64_t length,
215 uint64_t *byte_offset,
216 uint64_t *byte_length) const;
217
218 void encode(bufferlist& bl) const;
219 void decode(bufferlist::const_iterator& it);
220 void dump(Formatter *f) const;
221
222 bool operator==(const BitVector &b) const;
223
224 static void generate_test_instances(std::list<BitVector *> &o);
225 private:
226 struct NoInitAllocator : public std::allocator<__u32> {
227 NoInitAllocator() {}
228 NoInitAllocator(const std::allocator<__u32>& alloc)
229 : std::allocator<__u32>(alloc) {
230 }
231
232 template <class U, class... Args>
233 void construct(U* p, Args&&... args) const {
234 }
235 };
236
237 bufferlist m_data;
238 uint64_t m_size;
239 bool m_crc_enabled;
240
241 mutable __u32 m_header_crc;
242 mutable std::vector<__u32, NoInitAllocator> m_data_crcs;
243
244 void resize(uint64_t elements, bool zero);
245
246 static void compute_index(uint64_t offset, uint64_t *index, uint64_t *shift);
247
248 };
249
250 template <uint8_t _b>
251 const uint32_t BitVector<_b>::BLOCK_SIZE = 4096;
252
253 template <uint8_t _b>
254 BitVector<_b>::BitVector() : m_size(0), m_crc_enabled(true), m_header_crc(0)
255 {
256 }
257
258 template <uint8_t _b>
259 void BitVector<_b>::clear() {
260 m_data.clear();
261 m_data_crcs.clear();
262 m_size = 0;
263 m_header_crc = 0;
264 }
265
266 template <uint8_t _b>
267 void BitVector<_b>::resize(uint64_t size) {
268 resize(size, true);
269 }
270
271 template <uint8_t _b>
272 void BitVector<_b>::resize(uint64_t size, bool zero) {
273 uint64_t buffer_size = (size + ELEMENTS_PER_BLOCK - 1) / ELEMENTS_PER_BLOCK;
274 if (buffer_size > m_data.length()) {
275 if (zero) {
276 m_data.append_zero(buffer_size - m_data.length());
277 } else {
278 m_data.append(std::move(buffer::ptr(buffer_size - m_data.length())));
279 }
280 } else if (buffer_size < m_data.length()) {
281 bufferlist bl;
282 bl.substr_of(m_data, 0, buffer_size);
283 bl.swap(m_data);
284 }
285 m_size = size;
286
287 uint64_t block_count = (buffer_size + BLOCK_SIZE - 1) / BLOCK_SIZE;
288 m_data_crcs.resize(block_count);
289 }
290
291 template <uint8_t _b>
292 uint64_t BitVector<_b>::size() const {
293 return m_size;
294 }
295
296 template <uint8_t _b>
297 const bufferlist& BitVector<_b>::get_data() const {
298 return m_data;
299 }
300
301 template <uint8_t _b>
302 void BitVector<_b>::compute_index(uint64_t offset, uint64_t *index, uint64_t *shift) {
303 *index = offset / ELEMENTS_PER_BLOCK;
304 *shift = ((ELEMENTS_PER_BLOCK - 1) - (offset % ELEMENTS_PER_BLOCK)) * _b;
305 }
306
307 template <uint8_t _b>
308 void BitVector<_b>::encode_header(bufferlist& bl) const {
309 bufferlist header_bl;
310 ENCODE_START(1, 1, header_bl);
311 encode(m_size, header_bl);
312 ENCODE_FINISH(header_bl);
313 m_header_crc = header_bl.crc32c(0);
314
315 encode(header_bl, bl);
316 }
317
318 template <uint8_t _b>
319 void BitVector<_b>::decode_header(bufferlist::const_iterator& it) {
320 using ceph::decode;
321 bufferlist header_bl;
322 decode(header_bl, it);
323
324 auto header_it = header_bl.cbegin();
325 uint64_t size;
326 DECODE_START(1, header_it);
327 decode(size, header_it);
328 DECODE_FINISH(header_it);
329
330 resize(size, false);
331 m_header_crc = header_bl.crc32c(0);
332 }
333
334 template <uint8_t _b>
335 uint64_t BitVector<_b>::get_header_length() const {
336 // 4 byte bl length, 6 byte encoding header, 8 byte size
337 return 18;
338 }
339
340 template <uint8_t _b>
341 void BitVector<_b>::encode_data(bufferlist& bl, uint64_t data_byte_offset,
342 uint64_t byte_length) const {
343 ceph_assert(data_byte_offset % BLOCK_SIZE == 0);
344 ceph_assert(data_byte_offset + byte_length == m_data.length() ||
345 byte_length % BLOCK_SIZE == 0);
346
347 uint64_t end_offset = data_byte_offset + byte_length;
348 while (data_byte_offset < end_offset) {
349 uint64_t len = std::min<uint64_t>(BLOCK_SIZE,
350 end_offset - data_byte_offset);
351
352 bufferlist bit;
353 bit.substr_of(m_data, data_byte_offset, len);
354 m_data_crcs[data_byte_offset / BLOCK_SIZE] = bit.crc32c(0);
355
356 bl.claim_append(bit);
357 data_byte_offset += BLOCK_SIZE;
358 }
359 }
360
361 template <uint8_t _b>
362 void BitVector<_b>::decode_data(bufferlist::const_iterator& it,
363 uint64_t data_byte_offset) {
364 ceph_assert(data_byte_offset % BLOCK_SIZE == 0);
365 if (it.end()) {
366 return;
367 }
368
369 uint64_t end_offset = data_byte_offset + it.get_remaining();
370 if (end_offset > m_data.length()) {
371 throw buffer::end_of_buffer();
372 }
373
374 bufferlist data;
375 if (data_byte_offset > 0) {
376 data.substr_of(m_data, 0, data_byte_offset);
377 }
378
379 while (data_byte_offset < end_offset) {
380 uint64_t len = std::min<uint64_t>(BLOCK_SIZE, end_offset - data_byte_offset);
381
382 bufferptr ptr;
383 it.copy_deep(len, ptr);
384
385 bufferlist bit;
386 bit.append(ptr);
387 if (m_crc_enabled &&
388 m_data_crcs[data_byte_offset / BLOCK_SIZE] != bit.crc32c(0)) {
389 throw buffer::malformed_input("invalid data block CRC");
390 }
391 data.append(bit);
392 data_byte_offset += bit.length();
393 }
394
395 if (m_data.length() > end_offset) {
396 bufferlist tail;
397 tail.substr_of(m_data, end_offset, m_data.length() - end_offset);
398 data.append(tail);
399 }
400 ceph_assert(data.length() == m_data.length());
401 data.swap(m_data);
402 }
403
404 template <uint8_t _b>
405 void BitVector<_b>::get_data_extents(uint64_t offset, uint64_t length,
406 uint64_t *data_byte_offset,
407 uint64_t *object_byte_offset,
408 uint64_t *byte_length) const {
409 // read BLOCK_SIZE-aligned chunks
410 ceph_assert(length > 0 && offset + length <= m_size);
411 uint64_t shift;
412 compute_index(offset, data_byte_offset, &shift);
413 *data_byte_offset -= (*data_byte_offset % BLOCK_SIZE);
414
415 uint64_t end_offset;
416 compute_index(offset + length - 1, &end_offset, &shift);
417 end_offset += (BLOCK_SIZE - (end_offset % BLOCK_SIZE));
418 ceph_assert(*data_byte_offset <= end_offset);
419
420 *object_byte_offset = get_header_length() + *data_byte_offset;
421 *byte_length = end_offset - *data_byte_offset;
422 if (*data_byte_offset + *byte_length > m_data.length()) {
423 *byte_length = m_data.length() - *data_byte_offset;
424 }
425 }
426
427 template <uint8_t _b>
428 void BitVector<_b>::encode_footer(bufferlist& bl) const {
429 using ceph::encode;
430 bufferlist footer_bl;
431 if (m_crc_enabled) {
432 encode(m_header_crc, footer_bl);
433
434 __u32 size = m_data_crcs.size();
435 encode(size, footer_bl);
436 encode_data_crcs(footer_bl, 0, m_size);
437 }
438 encode(footer_bl, bl);
439 }
440
441 template <uint8_t _b>
442 void BitVector<_b>::decode_footer(bufferlist::const_iterator& it) {
443 using ceph::decode;
444 bufferlist footer_bl;
445 decode(footer_bl, it);
446
447 m_crc_enabled = (footer_bl.length() > 0);
448 if (m_crc_enabled) {
449 auto footer_it = footer_bl.cbegin();
450 decode_header_crc(footer_it);
451
452 __u32 data_src_size;
453 decode(data_src_size, footer_it);
454 decode_data_crcs(footer_it, 0);
455
456 uint64_t block_count = (m_data.length() + BLOCK_SIZE - 1) / BLOCK_SIZE;
457 if (m_data_crcs.size() != block_count) {
458 throw buffer::malformed_input("invalid data block CRCs");
459 }
460 }
461 }
462
463 template <uint8_t _b>
464 uint64_t BitVector<_b>::get_footer_offset() const {
465 return get_header_length() + m_data.length();
466 }
467
468 template <uint8_t _b>
469 void BitVector<_b>::decode_header_crc(bufferlist::const_iterator& it) {
470 if (it.get_remaining() > 0) {
471 __u32 header_crc;
472 ceph::decode(header_crc, it);
473 if (m_header_crc != header_crc) {
474 throw buffer::malformed_input("incorrect header CRC");
475 }
476 }
477 }
478
479 template <uint8_t _b>
480 void BitVector<_b>::get_header_crc_extents(uint64_t *byte_offset,
481 uint64_t *byte_length) const {
482 // footer is prefixed with a bufferlist length
483 *byte_offset = get_footer_offset() + sizeof(__u32);
484 *byte_length = sizeof(__u32);
485 }
486
487 template <uint8_t _b>
488 void BitVector<_b>::encode_data_crcs(bufferlist& bl, uint64_t offset,
489 uint64_t length) const {
490 if (length == 0) {
491 return;
492 }
493
494 uint64_t index;
495 uint64_t shift;
496 compute_index(offset, &index, &shift);
497 uint64_t crc_index = index / BLOCK_SIZE;
498
499 compute_index(offset + length - 1, &index, &shift);
500 uint64_t end_crc_index = index / BLOCK_SIZE;
501 while (crc_index <= end_crc_index) {
502 __u32 crc = m_data_crcs[crc_index++];
503 ceph::encode(crc, bl);
504 }
505 }
506
507 template <uint8_t _b>
508 void BitVector<_b>::decode_data_crcs(bufferlist::const_iterator& it,
509 uint64_t offset) {
510 if (it.end()) {
511 return;
512 }
513
514 uint64_t index;
515 uint64_t shift;
516 compute_index(offset, &index, &shift);
517
518 uint64_t crc_index = index / BLOCK_SIZE;
519 uint64_t remaining = it.get_remaining() / sizeof(__u32);
520 while (remaining > 0) {
521 __u32 crc;
522 ceph::decode(crc, it);
523 m_data_crcs[crc_index++] = crc;
524 --remaining;
525 }
526 }
527
528 template <uint8_t _b>
529 void BitVector<_b>::get_data_crcs_extents(uint64_t offset, uint64_t length,
530 uint64_t *byte_offset,
531 uint64_t *byte_length) const {
532 // data CRCs immediately follow the header CRC
533 get_header_crc_extents(byte_offset, byte_length);
534 *byte_offset += *byte_length;
535
536 // skip past data CRC vector size
537 *byte_offset += sizeof(__u32);
538
539 // CRCs are computed over BLOCK_SIZE chunks
540 ceph_assert(length > 0 && offset + length <= m_size);
541 uint64_t index;
542 uint64_t shift;
543 compute_index(offset, &index, &shift);
544 uint64_t start_byte_offset =
545 *byte_offset + ((index / BLOCK_SIZE) * sizeof(__u32));
546
547 compute_index(offset + length, &index, &shift);
548 uint64_t end_byte_offset =
549 *byte_offset + (((index / BLOCK_SIZE) + 1) * sizeof(__u32));
550 ceph_assert(start_byte_offset < end_byte_offset);
551
552 *byte_offset = start_byte_offset;
553 *byte_length = end_byte_offset - start_byte_offset;
554 }
555
556 template <uint8_t _b>
557 void BitVector<_b>::encode(bufferlist& bl) const {
558 encode_header(bl);
559 encode_data(bl, 0, m_data.length());
560 encode_footer(bl);
561 }
562
563 template <uint8_t _b>
564 void BitVector<_b>::decode(bufferlist::const_iterator& it) {
565 decode_header(it);
566
567 bufferlist data_bl;
568 if (m_data.length() > 0) {
569 it.copy(m_data.length(), data_bl);
570 }
571
572 decode_footer(it);
573
574 auto data_it = data_bl.cbegin();
575 decode_data(data_it, 0);
576 }
577
578 template <uint8_t _b>
579 void BitVector<_b>::dump(Formatter *f) const {
580 f->dump_unsigned("size", m_size);
581 f->open_array_section("bit_table");
582 for (unsigned i = 0; i < m_data.length(); ++i) {
583 f->dump_format("byte", "0x%02hhX", m_data[i]);
584 }
585 f->close_section();
586 }
587
588 template <uint8_t _b>
589 bool BitVector<_b>::operator==(const BitVector &b) const {
590 return (this->m_size == b.m_size && this->m_data == b.m_data);
591 }
592
593 template <uint8_t _b>
594 typename BitVector<_b>::Reference BitVector<_b>::operator[](uint64_t offset) {
595 uint64_t index;
596 uint64_t shift;
597 compute_index(offset, &index, &shift);
598
599 bufferlist::iterator data_iterator(m_data.begin());
600 data_iterator.seek(index);
601 return Reference(std::move(data_iterator), shift);
602 }
603
604 template <uint8_t _b>
605 typename BitVector<_b>::ConstReference BitVector<_b>::operator[](uint64_t offset) const {
606 uint64_t index;
607 uint64_t shift;
608 compute_index(offset, &index, &shift);
609
610 bufferlist::const_iterator data_iterator(m_data.begin());
611 data_iterator.seek(index);
612 return ConstReference(std::move(data_iterator), shift);
613 }
614
615 template <uint8_t _b>
616 typename BitVector<_b>::Reference& BitVector<_b>::Reference::operator=(uint8_t v) {
617 uint8_t mask = MASK << this->m_shift;
618 char packed_value = (*this->m_data_iterator & ~mask) |
619 ((v << this->m_shift) & mask);
620 bufferlist::iterator it(this->m_data_iterator);
621 it.copy_in(1, &packed_value, true);
622 return *this;
623 }
624
625 template <uint8_t _b>
626 void BitVector<_b>::generate_test_instances(std::list<BitVector *> &o) {
627 o.push_back(new BitVector());
628
629 BitVector *b = new BitVector();
630 const uint64_t radix = 1 << b->BIT_COUNT;
631 const uint64_t size = 1024;
632
633 b->resize(size, false);
634 for (uint64_t i = 0; i < size; ++i) {
635 (*b)[i] = rand() % radix;
636 }
637 o.push_back(b);
638 }
639
640
641 WRITE_CLASS_ENCODER(ceph::BitVector<2>)
642
643 template <uint8_t _b>
644 inline std::ostream& operator<<(std::ostream& out, const ceph::BitVector<_b> &b)
645 {
646 out << "ceph::BitVector<" << _b << ">(size=" << b.size() << ", data="
647 << b.get_data() << ")";
648 return out;
649 }
650 }
651
652 #endif // BIT_VECTOR_HPP