1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
4 * Ceph - scalable distributed file system
6 * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
17 #if defined(__linux__) || defined(__FreeBSD__)
23 # define _XOPEN_SOURCE 600
29 #if defined(__linux__) // For malloc(2).
38 # include <sys/mman.h>
47 #include <type_traits>
51 #include "buffer_fwd.h"
54 # include "include/assert.h"
59 #include "inline_memory.h"
62 #define CEPH_BUFFER_API __attribute__ ((visibility ("default")))
64 #define CEPH_BUFFER_API
69 class XioDispatchHook
;
75 namespace buffer CEPH_BUFFER_API
{
80 struct error
: public std::exception
{
81 const char *what() const throw () override
;
83 struct bad_alloc
: public error
{
84 const char *what() const throw () override
;
86 struct end_of_buffer
: public error
{
87 const char *what() const throw () override
;
89 struct malformed_input
: public error
{
90 explicit malformed_input(const std::string
& w
) {
91 snprintf(buf
, sizeof(buf
), "buffer::malformed_input: %s", w
.c_str());
93 const char *what() const throw () override
;
97 struct error_code
: public malformed_input
{
98 explicit error_code(int error
);
103 /// total bytes allocated
104 int get_total_alloc();
106 /// history total bytes allocated
107 uint64_t get_history_alloc_bytes();
109 /// total num allocated
110 uint64_t get_history_alloc_num();
112 /// enable/disable alloc tracking
113 void track_alloc(bool b
);
115 /// count of cached crc hits (matching input)
116 int get_cached_crc();
117 /// count of cached crc hits (mismatching input, required adjustment)
118 int get_cached_crc_adjusted();
119 /// count of crc cache misses
120 int get_missed_crc();
121 /// enable/disable tracking of cached crcs
122 void track_cached_crc(bool b
);
124 /// count of calls to buffer::ptr::c_str()
125 int get_c_str_accesses();
126 /// enable/disable tracking of buffer::ptr::c_str() calls
127 void track_c_str(bool b
);
130 * an abstract raw buffer. with a reference count.
135 class raw_mmap_pages
;
136 class raw_posix_aligned
;
137 class raw_hack_aligned
;
140 class raw_unshareable
; // diagnostic, unshareable char buffer
142 class raw_claim_buffer
;
146 class xio_msg_buffer
;
151 raw
* copy(const char *c
, unsigned len
);
152 raw
* create(unsigned len
);
153 raw
* claim_char(unsigned len
, char *buf
);
154 raw
* create_malloc(unsigned len
);
155 raw
* claim_malloc(unsigned len
, char *buf
);
156 raw
* create_static(unsigned len
, char *buf
);
157 raw
* create_aligned(unsigned len
, unsigned align
);
158 raw
* create_page_aligned(unsigned len
);
159 raw
* create_zero_copy(unsigned len
, int fd
, int64_t *offset
);
160 raw
* create_unshareable(unsigned len
);
161 raw
* create_static(unsigned len
, char *buf
);
162 raw
* claim_buffer(unsigned len
, char *buf
, deleter del
);
164 #if defined(HAVE_XIO)
165 raw
* create_msg(unsigned len
, char *buf
, XioDispatchHook
*m_hook
);
169 * a buffer pointer. references (a subsequence of) a raw buffer.
171 class CEPH_BUFFER_API ptr
{
179 const ptr
*bp
; ///< parent ptr
180 const char *start
; ///< starting pointer into bp->c_str()
181 const char *pos
; ///< pointer into bp->c_str()
182 const char *end_ptr
; ///< pointer to bp->end_c_str()
183 bool deep
; ///< if true, no not allow shallow ptr copies
185 iterator(const ptr
*p
, size_t offset
, bool d
)
187 start(p
->c_str() + offset
),
189 end_ptr(p
->end_c_str()),
195 const char *get_pos_add(size_t n
) {
199 throw end_of_buffer();
203 ptr
get_ptr(size_t len
) {
205 return buffer::copy(get_pos_add(len
), len
);
207 size_t off
= pos
- bp
->c_str();
210 throw end_of_buffer();
211 return ptr(*bp
, off
, len
);
214 ptr
get_preceding_ptr(size_t len
) {
216 return buffer::copy(get_pos() - len
, len
);
218 size_t off
= pos
- bp
->c_str();
219 return ptr(*bp
, off
- len
, len
);
223 void advance(size_t len
) {
226 throw end_of_buffer();
229 const char *get_pos() {
232 const char *get_end() {
236 size_t get_offset() {
241 return pos
== end_ptr
;
245 ptr() : _raw(0), _off(0), _len(0) {}
246 // cppcheck-suppress noExplicitConstructor
248 // cppcheck-suppress noExplicitConstructor
250 ptr(const char *d
, unsigned l
);
252 ptr(ptr
&& p
) noexcept
;
253 ptr(const ptr
& p
, unsigned o
, unsigned l
);
254 ptr
& operator= (const ptr
& p
);
255 ptr
& operator= (ptr
&& p
) noexcept
;
260 bool have_raw() const { return _raw
? true:false; }
263 void swap(ptr
& other
);
264 ptr
& make_shareable();
266 iterator
begin(size_t offset
=0) const {
267 return iterator(this, offset
, false);
269 iterator
begin_deep(size_t offset
=0) const {
270 return iterator(this, offset
, true);
274 bool at_buffer_head() const { return _off
== 0; }
275 bool at_buffer_tail() const;
277 bool is_aligned(unsigned align
) const {
278 return ((long)c_str() & (align
-1)) == 0;
280 bool is_page_aligned() const { return is_aligned(CEPH_PAGE_SIZE
); }
281 bool is_n_align_sized(unsigned align
) const
283 return (length() % align
) == 0;
285 bool is_n_page_sized() const { return is_n_align_sized(CEPH_PAGE_SIZE
); }
286 bool is_partial() const {
287 return have_raw() && (start() > 0 || end() < raw_length());
291 raw
*get_raw() const { return _raw
; }
292 const char *c_str() const;
294 const char *end_c_str() const;
296 unsigned length() const { return _len
; }
297 unsigned offset() const { return _off
; }
298 unsigned start() const { return _off
; }
299 unsigned end() const { return _off
+ _len
; }
300 unsigned unused_tail_length() const;
301 const char& operator[](unsigned n
) const;
302 char& operator[](unsigned n
);
304 const char *raw_c_str() const;
305 unsigned raw_length() const;
306 int raw_nref() const;
308 void copy_out(unsigned o
, unsigned l
, char *dest
) const;
310 bool can_zero_copy() const;
311 int zero_copy_to_fd(int fd
, int64_t *offset
) const;
315 int cmp(const ptr
& o
) const;
316 bool is_zero() const;
319 void set_offset(unsigned o
) {
320 assert(raw_length() >= o
);
323 void set_length(unsigned l
) {
324 assert(raw_length() >= l
);
328 unsigned append(char c
);
329 unsigned append(const char *p
, unsigned l
);
330 void copy_in(unsigned o
, unsigned l
, const char *src
);
331 void copy_in(unsigned o
, unsigned l
, const char *src
, bool crc_reset
);
333 void zero(bool crc_reset
);
334 void zero(unsigned o
, unsigned l
);
335 void zero(unsigned o
, unsigned l
, bool crc_reset
);
341 * list - the useful bit!
344 class CEPH_BUFFER_API list
{
346 std::list
<ptr
> _buffers
;
348 unsigned _memcopy_count
; //the total of memcopy using rebuild().
349 ptr append_buffer
; // where i put small appends.
355 template <bool is_const
>
356 class CEPH_BUFFER_API iterator_impl
357 : public std::iterator
<std::forward_iterator_tag
, char> {
359 typedef typename
std::conditional
<is_const
,
362 typedef typename
std::conditional
<is_const
,
363 const std::list
<ptr
>,
364 std::list
<ptr
> >::type list_t
;
365 typedef typename
std::conditional
<is_const
,
366 typename
std::list
<ptr
>::const_iterator
,
367 typename
std::list
<ptr
>::iterator
>::type list_iter_t
;
369 list_t
* ls
; // meh.. just here to avoid an extra pointer dereference..
370 unsigned off
; // in bl
372 unsigned p_off
; // in *p
373 friend class iterator_impl
<true>;
376 // constructor. position.
378 : bl(0), ls(0), off(0), p_off(0) {}
379 iterator_impl(bl_t
*l
, unsigned o
=0);
380 iterator_impl(bl_t
*l
, unsigned o
, list_iter_t ip
, unsigned po
)
381 : bl(l
), ls(&bl
->_buffers
), off(o
), p(ip
), p_off(po
) {}
382 iterator_impl(const list::iterator
& i
);
384 /// get current iterator offset in buffer::list
385 unsigned get_off() const { return off
; }
387 /// get number of bytes remaining from iterator position to the end of the buffer::list
388 unsigned get_remaining() const { return bl
->length() - off
; }
390 /// true if iterator is at the end of the buffer::list
392 return p
== ls
->end();
393 //return off == bl->length();
397 void seek(unsigned o
);
398 char operator*() const;
399 iterator_impl
& operator++();
400 ptr
get_current_ptr() const;
402 bl_t
& get_bl() const { return *bl
; }
405 // note that these all _append_ to dest!
406 void copy(unsigned len
, char *dest
);
407 // deprecated, use copy_deep()
408 void copy(unsigned len
, ptr
&dest
) __attribute__((deprecated
));
409 void copy_deep(unsigned len
, ptr
&dest
);
410 void copy_shallow(unsigned len
, ptr
&dest
);
411 void copy(unsigned len
, list
&dest
);
412 void copy(unsigned len
, std::string
&dest
);
413 void copy_all(list
&dest
);
415 // get a pointer to the currenet iterator position, return the
416 // number of bytes we can read from that position (up to want),
417 // and advance the iterator by that amount.
418 size_t get_ptr_and_advance(size_t want
, const char **p
);
420 /// calculate crc from iterator position
421 uint32_t crc32c(size_t length
, uint32_t crc
);
423 friend bool operator==(const iterator_impl
& lhs
,
424 const iterator_impl
& rhs
) {
425 return &lhs
.get_bl() == &rhs
.get_bl() && lhs
.get_off() == rhs
.get_off();
427 friend bool operator!=(const iterator_impl
& lhs
,
428 const iterator_impl
& rhs
) {
429 return &lhs
.get_bl() != &rhs
.get_bl() || lhs
.get_off() != rhs
.get_off();
434 typedef iterator_impl
<true> const_iterator
;
436 class CEPH_BUFFER_API iterator
: public iterator_impl
<false> {
438 iterator() = default;
439 iterator(bl_t
*l
, unsigned o
=0);
440 iterator(bl_t
*l
, unsigned o
, list_iter_t ip
, unsigned po
);
443 void seek(unsigned o
);
445 iterator
& operator++();
446 ptr
get_current_ptr();
449 void copy(unsigned len
, char *dest
);
450 // deprecated, use copy_deep()
451 void copy(unsigned len
, ptr
&dest
) __attribute__((deprecated
));
452 void copy_deep(unsigned len
, ptr
&dest
);
453 void copy_shallow(unsigned len
, ptr
&dest
);
454 void copy(unsigned len
, list
&dest
);
455 void copy(unsigned len
, std::string
&dest
);
456 void copy_all(list
&dest
);
459 void copy_in(unsigned len
, const char *src
);
460 void copy_in(unsigned len
, const char *src
, bool crc_reset
);
461 void copy_in(unsigned len
, const list
& otherl
);
463 bool operator==(const iterator
& rhs
) const {
464 return bl
== rhs
.bl
&& off
== rhs
.off
;
466 bool operator!=(const iterator
& rhs
) const {
467 return bl
!= rhs
.bl
|| off
!= rhs
.off
;
471 class contiguous_appender
{
477 /// running count of bytes appended that are not reflected by @pos
478 size_t out_of_band_offset
= 0;
480 contiguous_appender(bufferlist
*l
, size_t len
, bool d
)
483 size_t unused
= pbl
->append_buffer
.unused_tail_length();
485 // note: if len < the normal append_buffer size it *might*
486 // be better to allocate a normal-sized append_buffer and
487 // use part of it. however, that optimizes for the case of
488 // old-style types including new-style types. and in most
489 // such cases, this won't be the very first thing encoded to
490 // the list, so append_buffer will already be allocated.
491 // OTOH if everything is new-style, we *should* allocate
492 // only what we need and conserve memory.
493 bp
= buffer::create(len
);
496 pos
= pbl
->append_buffer
.end_c_str();
500 void flush_and_continue() {
502 // we allocated a new buffer
503 size_t l
= pos
- bp
.c_str();
504 pbl
->append(bufferptr(bp
, 0, l
));
505 bp
.set_length(bp
.length() - l
);
506 bp
.set_offset(bp
.offset() + l
);
508 // we are using pbl's append_buffer
509 size_t l
= pos
- pbl
->append_buffer
.end_c_str();
511 pbl
->append_buffer
.set_length(pbl
->append_buffer
.length() + l
);
512 pbl
->append(pbl
->append_buffer
, pbl
->append_buffer
.end() - l
, l
);
513 pos
= pbl
->append_buffer
.end_c_str();
521 ~contiguous_appender() {
523 // we allocated a new buffer
524 bp
.set_length(pos
- bp
.c_str());
525 pbl
->append(std::move(bp
));
527 // we are using pbl's append_buffer
528 size_t l
= pos
- pbl
->append_buffer
.end_c_str();
530 pbl
->append_buffer
.set_length(pbl
->append_buffer
.length() + l
);
531 pbl
->append(pbl
->append_buffer
, pbl
->append_buffer
.end() - l
, l
);
536 size_t get_out_of_band_offset() const {
537 return out_of_band_offset
;
539 void append(const char *p
, size_t l
) {
540 maybe_inline_memcpy(pos
, p
, l
, 16);
543 char *get_pos_add(size_t len
) {
552 void append(const bufferptr
& p
) {
557 append(p
.c_str(), p
.length());
559 flush_and_continue();
561 out_of_band_offset
+= p
.length();
564 void append(const bufferlist
& l
) {
569 for (const auto &p
: l
._buffers
) {
570 append(p
.c_str(), p
.length());
573 flush_and_continue();
575 out_of_band_offset
+= l
.length();
579 size_t get_logical_offset() {
581 return out_of_band_offset
+ (pos
- bp
.c_str());
583 return out_of_band_offset
+ (pos
- pbl
->append_buffer
.end_c_str());
588 contiguous_appender
get_contiguous_appender(size_t len
, bool deep
=false) {
589 return contiguous_appender(this, len
, deep
);
592 class page_aligned_appender
{
599 page_aligned_appender(list
*l
, unsigned min_pages
)
601 min_alloc(min_pages
* CEPH_PAGE_SIZE
),
602 pos(nullptr), end(nullptr) {}
607 ~page_aligned_appender() {
612 if (pos
&& pos
!= buffer
.c_str()) {
613 size_t len
= pos
- buffer
.c_str();
614 pbl
->append(buffer
, 0, len
);
615 buffer
.set_length(buffer
.length() - len
);
616 buffer
.set_offset(buffer
.offset() + len
);
620 void append(const char *buf
, size_t len
) {
623 size_t alloc
= (len
+ CEPH_PAGE_SIZE
- 1) & CEPH_PAGE_MASK
;
624 if (alloc
< min_alloc
) {
627 buffer
= create_page_aligned(alloc
);
628 pos
= buffer
.c_str();
629 end
= buffer
.end_c_str();
632 if (l
> (size_t)(end
- pos
)) {
640 pbl
->append(buffer
, 0, buffer
.length());
647 page_aligned_appender
get_page_aligned_appender(unsigned min_pages
=1) {
648 return page_aligned_appender(this, min_pages
);
652 mutable iterator last_p
;
653 int zero_copy_to_fd(int fd
) const;
657 list() : _len(0), _memcopy_count(0), last_p(this) {}
658 // cppcheck-suppress noExplicitConstructor
659 list(unsigned prealloc
) : _len(0), _memcopy_count(0), last_p(this) {
663 list(const list
& other
) : _buffers(other
._buffers
), _len(other
._len
),
664 _memcopy_count(other
._memcopy_count
), last_p(this) {
668 list
& operator= (const list
& other
) {
669 if (this != &other
) {
670 _buffers
= other
._buffers
;
677 list
& operator= (list
&& other
) {
678 _buffers
= std::move(other
._buffers
);
680 _memcopy_count
= other
._memcopy_count
;
682 append_buffer
.swap(other
.append_buffer
);
687 unsigned get_num_buffers() const { return _buffers
.size(); }
688 const ptr
& front() const { return _buffers
.front(); }
689 const ptr
& back() const { return _buffers
.back(); }
691 unsigned get_memcopy_count() const {return _memcopy_count
; }
692 const std::list
<ptr
>& buffers() const { return _buffers
; }
693 void swap(list
& other
);
694 unsigned length() const {
696 // DEBUG: verify _len
698 for (std::list
<ptr
>::const_iterator it
= _buffers
.begin();
699 it
!= _buffers
.end();
701 len
+= (*it
).length();
708 bool contents_equal(buffer::list
& other
);
709 bool contents_equal(const buffer::list
& other
) const;
711 bool can_zero_copy() const;
712 bool is_provided_buffer(const char *dst
) const;
713 bool is_aligned(unsigned align
) const;
714 bool is_page_aligned() const;
715 bool is_n_align_sized(unsigned align
) const;
716 bool is_n_page_sized() const;
717 bool is_aligned_size_and_memory(unsigned align_size
,
718 unsigned align_memory
) const;
720 bool is_zero() const;
728 append_buffer
= ptr();
730 void push_front(ptr
& bp
) {
731 if (bp
.length() == 0)
733 _buffers
.push_front(bp
);
736 void push_front(ptr
&& bp
) {
737 if (bp
.length() == 0)
740 _buffers
.push_front(std::move(bp
));
742 void push_front(raw
*r
) {
745 void push_back(const ptr
& bp
) {
746 if (bp
.length() == 0)
748 _buffers
.push_back(bp
);
751 void push_back(ptr
&& bp
) {
752 if (bp
.length() == 0)
755 _buffers
.push_back(std::move(bp
));
757 void push_back(raw
*r
) {
762 void zero(unsigned o
, unsigned l
);
764 bool is_contiguous() const;
766 void rebuild(ptr
& nb
);
767 bool rebuild_aligned(unsigned align
);
768 bool rebuild_aligned_size_and_memory(unsigned align_size
,
769 unsigned align_memory
);
770 bool rebuild_page_aligned();
772 void reserve(size_t prealloc
) {
773 if (append_buffer
.unused_tail_length() < prealloc
) {
774 append_buffer
= buffer::create(prealloc
);
775 append_buffer
.set_length(0); // unused, so far.
779 // assignment-op with move semantics
780 const static unsigned int CLAIM_DEFAULT
= 0;
781 const static unsigned int CLAIM_ALLOW_NONSHAREABLE
= 1;
783 void claim(list
& bl
, unsigned int flags
= CLAIM_DEFAULT
);
784 void claim_append(list
& bl
, unsigned int flags
= CLAIM_DEFAULT
);
785 void claim_prepend(list
& bl
, unsigned int flags
= CLAIM_DEFAULT
);
787 // clone non-shareable buffers (make shareable)
788 void make_shareable() {
789 std::list
<buffer::ptr
>::iterator pb
;
790 for (pb
= _buffers
.begin(); pb
!= _buffers
.end(); ++pb
) {
791 (void) pb
->make_shareable();
795 // copy with explicit volatile-sharing semantics
796 void share(const list
& bl
)
800 std::list
<buffer::ptr
>::const_iterator pb
;
801 for (pb
= bl
._buffers
.begin(); pb
!= bl
._buffers
.end(); ++pb
) {
808 return iterator(this, 0);
811 return iterator(this, _len
, _buffers
.end(), 0);
814 const_iterator
begin() const {
815 return const_iterator(this, 0);
817 const_iterator
end() const {
818 return const_iterator(this, _len
, _buffers
.end(), 0);
822 // **** WARNING: this are horribly inefficient for large bufferlists. ****
823 void copy(unsigned off
, unsigned len
, char *dest
) const;
824 void copy(unsigned off
, unsigned len
, list
&dest
) const;
825 void copy(unsigned off
, unsigned len
, std::string
& dest
) const;
826 void copy_in(unsigned off
, unsigned len
, const char *src
);
827 void copy_in(unsigned off
, unsigned len
, const char *src
, bool crc_reset
);
828 void copy_in(unsigned off
, unsigned len
, const list
& src
);
831 void append(const char *data
, unsigned len
);
832 void append(const std::string
& s
) {
833 append(s
.data(), s
.length());
835 void append(const ptr
& bp
);
836 void append(ptr
&& bp
);
837 void append(const ptr
& bp
, unsigned off
, unsigned len
);
838 void append(const list
& bl
);
839 void append(std::istream
& in
);
840 void append_zero(unsigned len
);
841 void prepend_zero(unsigned len
);
846 const char& operator[](unsigned n
) const;
848 std::string
to_str() const;
850 void substr_of(const list
& other
, unsigned off
, unsigned len
);
852 /// return a pointer to a contiguous extent of the buffer,
853 /// reallocating as needed
854 char *get_contiguous(unsigned off
, ///< offset
855 unsigned len
); ///< length
858 void splice(unsigned off
, unsigned len
, list
*claim_by
=0 /*, bufferlist& replace_with */);
859 void write(int off
, int len
, std::ostream
& out
) const;
861 void encode_base64(list
& o
);
862 void decode_base64(list
& o
);
864 void write_stream(std::ostream
&out
) const;
865 void hexdump(std::ostream
&out
, bool trailing_newline
= true) const;
866 int read_file(const char *fn
, std::string
*error
);
867 ssize_t
read_fd(int fd
, size_t len
);
868 int read_fd_zero_copy(int fd
, size_t len
);
869 int write_file(const char *fn
, int mode
=0644);
870 int write_fd(int fd
) const;
871 int write_fd(int fd
, uint64_t offset
) const;
872 int write_fd_zero_copy(int fd
) const;
873 template<typename VectorT
>
874 void prepare_iov(VectorT
*piov
) const {
875 assert(_buffers
.size() <= IOV_MAX
);
876 piov
->resize(_buffers
.size());
878 for (auto& p
: _buffers
) {
879 (*piov
)[n
].iov_base
= (void *)p
.c_str();
880 (*piov
)[n
].iov_len
= p
.length();
884 uint32_t crc32c(uint32_t crc
) const;
885 void invalidate_crc();
889 * efficient hash of one or more bufferlists
897 // cppcheck-suppress noExplicitConstructor
898 hash(uint32_t init
) : crc(init
) { }
900 void update(buffer::list
& bl
) {
901 crc
= bl
.crc32c(crc
);
909 inline bool operator>(bufferlist
& l
, bufferlist
& r
) {
910 for (unsigned p
= 0; ; p
++) {
911 if (l
.length() > p
&& r
.length() == p
) return true;
912 if (l
.length() == p
) return false;
913 if (l
[p
] > r
[p
]) return true;
914 if (l
[p
] < r
[p
]) return false;
917 inline bool operator>=(bufferlist
& l
, bufferlist
& r
) {
918 for (unsigned p
= 0; ; p
++) {
919 if (l
.length() > p
&& r
.length() == p
) return true;
920 if (r
.length() == p
&& l
.length() == p
) return true;
921 if (l
.length() == p
&& r
.length() > p
) return false;
922 if (l
[p
] > r
[p
]) return true;
923 if (l
[p
] < r
[p
]) return false;
927 inline bool operator==(const bufferlist
&l
, const bufferlist
&r
) {
928 if (l
.length() != r
.length())
930 for (unsigned p
= 0; p
< l
.length(); p
++) {
936 inline bool operator<(bufferlist
& l
, bufferlist
& r
) {
939 inline bool operator<=(bufferlist
& l
, bufferlist
& r
) {
944 std::ostream
& operator<<(std::ostream
& out
, const buffer::ptr
& bp
);
946 std::ostream
& operator<<(std::ostream
& out
, const raw
&r
);
948 std::ostream
& operator<<(std::ostream
& out
, const buffer::list
& bl
);
950 std::ostream
& operator<<(std::ostream
& out
, const buffer::error
& e
);
952 inline bufferhash
& operator<<(bufferhash
& l
, bufferlist
&r
) {
959 #if defined(HAVE_XIO)
960 xio_reg_mem
* get_xio_mp(const buffer::ptr
& bp
);