1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
4 * Ceph - scalable distributed file system
6 * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
22 #include "include/ceph_assert.h"
23 #include "include/types.h"
24 #include "include/buffer_raw.h"
25 #include "include/compat.h"
26 #include "include/mempool.h"
28 #include "common/environment.h"
29 #include "common/errno.h"
30 #include "common/error_code.h"
31 #include "common/safe_io.h"
32 #include "common/strtol.h"
33 #include "common/likely.h"
34 #include "common/valgrind.h"
35 #include "common/deleter.h"
36 #include "common/RWLock.h"
37 #include "common/error_code.h"
38 #include "include/spinlock.h"
39 #include "include/scope_guard.h"
48 #define CEPH_BUFFER_ALLOC_UNIT 4096u
49 #define CEPH_BUFFER_APPEND_SIZE (CEPH_BUFFER_ALLOC_UNIT - sizeof(raw_combined))
52 static ceph::spinlock debug_lock
;
53 # define bdout { std::lock_guard<ceph::spinlock> lg(debug_lock); std::cout
54 # define bendl std::endl; }
56 # define bdout if (0) { std::cout
57 # define bendl std::endl; }
60 static ceph::atomic
<unsigned> buffer_cached_crc
{ 0 };
61 static ceph::atomic
<unsigned> buffer_cached_crc_adjusted
{ 0 };
62 static ceph::atomic
<unsigned> buffer_missed_crc
{ 0 };
64 static bool buffer_track_crc
= get_env_bool("CEPH_BUFFER_TRACK");
66 void buffer::track_cached_crc(bool b
) {
69 int buffer::get_cached_crc() {
70 return buffer_cached_crc
;
72 int buffer::get_cached_crc_adjusted() {
73 return buffer_cached_crc_adjusted
;
76 int buffer::get_missed_crc() {
77 return buffer_missed_crc
;
81 * raw_combined is always placed within a single allocation along
82 * with the data buffer. the data goes at the beginning, and
83 * raw_combined at the end.
85 class buffer::raw_combined
: public buffer::raw
{
88 raw_combined(char *dataptr
, unsigned l
, unsigned align
,
90 : raw(dataptr
, l
, mempool
),
93 raw
* clone_empty() override
{
94 return create(len
, alignment
).release();
97 static ceph::unique_leakable_ptr
<buffer::raw
>
100 int mempool
= mempool::mempool_buffer_anon
)
103 align
= sizeof(size_t);
104 size_t rawlen
= round_up_to(sizeof(buffer::raw_combined
),
105 alignof(buffer::raw_combined
));
106 size_t datalen
= round_up_to(len
, alignof(buffer::raw_combined
));
109 char *ptr
= (char *) valloc(rawlen
+ datalen
);
112 int r
= ::posix_memalign((void**)(void*)&ptr
, align
, rawlen
+ datalen
);
119 // actual data first, since it has presumably larger alignment restriction
120 // then put the raw_combined at the end
121 return ceph::unique_leakable_ptr
<buffer::raw
>(
122 new (ptr
+ datalen
) raw_combined(ptr
, len
, align
, mempool
));
125 static void operator delete(void *ptr
) {
126 raw_combined
*raw
= (raw_combined
*)ptr
;
127 aligned_free((void *)raw
->data
);
131 class buffer::raw_malloc
: public buffer::raw
{
133 MEMPOOL_CLASS_HELPERS();
135 explicit raw_malloc(unsigned l
) : raw(l
) {
137 data
= (char *)malloc(len
);
143 bdout
<< "raw_malloc " << this << " alloc " << (void *)data
<< " " << l
<< bendl
;
145 raw_malloc(unsigned l
, char *b
) : raw(b
, l
) {
146 bdout
<< "raw_malloc " << this << " alloc " << (void *)data
<< " " << l
<< bendl
;
148 ~raw_malloc() override
{
150 bdout
<< "raw_malloc " << this << " free " << (void *)data
<< " " << bendl
;
152 raw
* clone_empty() override
{
153 return new raw_malloc(len
);
158 class buffer::raw_posix_aligned
: public buffer::raw
{
161 MEMPOOL_CLASS_HELPERS();
163 raw_posix_aligned(unsigned l
, unsigned _align
) : raw(l
) {
165 ceph_assert((align
>= sizeof(void *)) && (align
& (align
- 1)) == 0);
167 data
= (char *) valloc(len
);
169 int r
= ::posix_memalign((void**)(void*)&data
, align
, len
);
175 bdout
<< "raw_posix_aligned " << this << " alloc " << (void *)data
176 << " l=" << l
<< ", align=" << align
<< bendl
;
178 ~raw_posix_aligned() override
{
180 bdout
<< "raw_posix_aligned " << this << " free " << (void *)data
<< bendl
;
182 raw
* clone_empty() override
{
183 return new raw_posix_aligned(len
, align
);
189 class buffer::raw_hack_aligned
: public buffer::raw
{
193 raw_hack_aligned(unsigned l
, unsigned _align
) : raw(l
) {
195 realdata
= new char[len
+align
-1];
196 unsigned off
= ((uintptr_t)realdata
) & (align
-1);
198 data
= realdata
+ align
- off
;
201 //cout << "hack aligned " << (unsigned)data
202 //<< " in raw " << (unsigned)realdata
203 //<< " off " << off << std::endl;
204 ceph_assert(((uintptr_t)data
& (align
-1)) == 0);
206 ~raw_hack_aligned() {
210 return new raw_hack_aligned(len
, align
);
216 * primitive buffer types
218 class buffer::raw_char
: public buffer::raw
{
220 MEMPOOL_CLASS_HELPERS();
222 explicit raw_char(unsigned l
) : raw(l
) {
224 data
= new char[len
];
227 bdout
<< "raw_char " << this << " alloc " << (void *)data
<< " " << l
<< bendl
;
229 raw_char(unsigned l
, char *b
) : raw(b
, l
) {
230 bdout
<< "raw_char " << this << " alloc " << (void *)data
<< " " << l
<< bendl
;
232 ~raw_char() override
{
234 bdout
<< "raw_char " << this << " free " << (void *)data
<< bendl
;
236 raw
* clone_empty() override
{
237 return new raw_char(len
);
241 class buffer::raw_claimed_char
: public buffer::raw
{
243 MEMPOOL_CLASS_HELPERS();
245 explicit raw_claimed_char(unsigned l
, char *b
) : raw(b
, l
) {
246 bdout
<< "raw_claimed_char " << this << " alloc " << (void *)data
247 << " " << l
<< bendl
;
249 ~raw_claimed_char() override
{
250 bdout
<< "raw_claimed_char " << this << " free " << (void *)data
253 raw
* clone_empty() override
{
254 return new raw_char(len
);
258 class buffer::raw_static
: public buffer::raw
{
260 MEMPOOL_CLASS_HELPERS();
262 raw_static(const char *d
, unsigned l
) : raw((char*)d
, l
) { }
263 ~raw_static() override
{}
264 raw
* clone_empty() override
{
265 return new buffer::raw_char(len
);
269 class buffer::raw_claim_buffer
: public buffer::raw
{
272 raw_claim_buffer(const char *b
, unsigned l
, deleter d
)
273 : raw((char*)b
, l
), del(std::move(d
)) { }
274 ~raw_claim_buffer() override
{}
275 raw
* clone_empty() override
{
276 return new buffer::raw_char(len
);
280 ceph::unique_leakable_ptr
<buffer::raw
> buffer::copy(const char *c
, unsigned len
) {
281 auto r
= buffer::create_aligned(len
, sizeof(size_t));
282 memcpy(r
->get_data(), c
, len
);
286 ceph::unique_leakable_ptr
<buffer::raw
> buffer::create(unsigned len
) {
287 return buffer::create_aligned(len
, sizeof(size_t));
289 ceph::unique_leakable_ptr
<buffer::raw
> buffer::create(unsigned len
, char c
) {
290 auto ret
= buffer::create_aligned(len
, sizeof(size_t));
291 memset(ret
->get_data(), c
, len
);
294 ceph::unique_leakable_ptr
<buffer::raw
>
295 buffer::create_in_mempool(unsigned len
, int mempool
) {
296 return buffer::create_aligned_in_mempool(len
, sizeof(size_t), mempool
);
298 ceph::unique_leakable_ptr
<buffer::raw
>
299 buffer::claim_char(unsigned len
, char *buf
) {
300 return ceph::unique_leakable_ptr
<buffer::raw
>(
301 new raw_claimed_char(len
, buf
));
303 ceph::unique_leakable_ptr
<buffer::raw
> buffer::create_malloc(unsigned len
) {
304 return ceph::unique_leakable_ptr
<buffer::raw
>(new raw_malloc(len
));
306 ceph::unique_leakable_ptr
<buffer::raw
>
307 buffer::claim_malloc(unsigned len
, char *buf
) {
308 return ceph::unique_leakable_ptr
<buffer::raw
>(new raw_malloc(len
, buf
));
310 ceph::unique_leakable_ptr
<buffer::raw
>
311 buffer::create_static(unsigned len
, char *buf
) {
312 return ceph::unique_leakable_ptr
<buffer::raw
>(new raw_static(buf
, len
));
314 ceph::unique_leakable_ptr
<buffer::raw
>
315 buffer::claim_buffer(unsigned len
, char *buf
, deleter del
) {
316 return ceph::unique_leakable_ptr
<buffer::raw
>(
317 new raw_claim_buffer(buf
, len
, std::move(del
)));
320 ceph::unique_leakable_ptr
<buffer::raw
> buffer::create_aligned_in_mempool(
321 unsigned len
, unsigned align
, int mempool
)
323 // If alignment is a page multiple, use a separate buffer::raw to
324 // avoid fragmenting the heap.
326 // Somewhat unexpectedly, I see consistently better performance
327 // from raw_combined than from raw even when the allocation size is
328 // a page multiple (but alignment is not).
330 // I also see better performance from a separate buffer::raw once the
332 if ((align
& ~CEPH_PAGE_MASK
) == 0 ||
333 len
>= CEPH_PAGE_SIZE
* 2) {
335 return ceph::unique_leakable_ptr
<buffer::raw
>(new raw_posix_aligned(len
, align
));
337 return ceph::unique_leakable_ptr
<buffer::raw
>(new raw_hack_aligned(len
, align
));
340 return raw_combined::create(len
, align
, mempool
);
342 ceph::unique_leakable_ptr
<buffer::raw
> buffer::create_aligned(
343 unsigned len
, unsigned align
) {
344 return create_aligned_in_mempool(len
, align
,
345 mempool::mempool_buffer_anon
);
348 ceph::unique_leakable_ptr
<buffer::raw
> buffer::create_page_aligned(unsigned len
) {
349 return create_aligned(len
, CEPH_PAGE_SIZE
);
351 ceph::unique_leakable_ptr
<buffer::raw
> buffer::create_small_page_aligned(unsigned len
) {
352 if (len
< CEPH_PAGE_SIZE
) {
353 return create_aligned(len
, CEPH_BUFFER_ALLOC_UNIT
);
355 return create_aligned(len
, CEPH_PAGE_SIZE
);
359 buffer::ptr::ptr(ceph::unique_leakable_ptr
<raw
> r
)
362 _len(_raw
->get_len())
364 _raw
->nref
.store(1, std::memory_order_release
);
365 bdout
<< "ptr " << this << " get " << _raw
<< bendl
;
367 buffer::ptr::ptr(unsigned l
) : _off(0), _len(l
)
369 _raw
= buffer::create(l
).release();
370 _raw
->nref
.store(1, std::memory_order_release
);
371 bdout
<< "ptr " << this << " get " << _raw
<< bendl
;
373 buffer::ptr::ptr(const char *d
, unsigned l
) : _off(0), _len(l
) // ditto.
375 _raw
= buffer::copy(d
, l
).release();
376 _raw
->nref
.store(1, std::memory_order_release
);
377 bdout
<< "ptr " << this << " get " << _raw
<< bendl
;
379 buffer::ptr::ptr(const ptr
& p
) : _raw(p
._raw
), _off(p
._off
), _len(p
._len
)
383 bdout
<< "ptr " << this << " get " << _raw
<< bendl
;
386 buffer::ptr::ptr(ptr
&& p
) noexcept
: _raw(p
._raw
), _off(p
._off
), _len(p
._len
)
391 buffer::ptr::ptr(const ptr
& p
, unsigned o
, unsigned l
)
392 : _raw(p
._raw
), _off(p
._off
+ o
), _len(l
)
394 ceph_assert(o
+l
<= p
._len
);
397 bdout
<< "ptr " << this << " get " << _raw
<< bendl
;
399 buffer::ptr::ptr(const ptr
& p
, ceph::unique_leakable_ptr
<raw
> r
)
404 _raw
->nref
.store(1, std::memory_order_release
);
405 bdout
<< "ptr " << this << " get " << _raw
<< bendl
;
407 buffer::ptr
& buffer::ptr::operator= (const ptr
& p
)
411 bdout
<< "ptr " << this << " get " << _raw
<< bendl
;
413 buffer::raw
*raw
= p
._raw
;
424 buffer::ptr
& buffer::ptr::operator= (ptr
&& p
) noexcept
427 buffer::raw
*raw
= p
._raw
;
440 ceph::unique_leakable_ptr
<buffer::raw
> buffer::ptr::clone()
442 return _raw
->clone();
445 void buffer::ptr::swap(ptr
& other
) noexcept
458 void buffer::ptr::release()
460 // BE CAREFUL: this is called also for hypercombined ptr_node. After
461 // freeing underlying raw, `*this` can become inaccessible as well!
463 // cache the pointer to avoid unncecessary reloads and repeated
465 if (auto* const cached_raw
= std::exchange(_raw
, nullptr);
467 bdout
<< "ptr " << this << " release " << cached_raw
<< bendl
;
468 // optimize the common case where a particular `buffer::raw` has
469 // only a single reference. Altogether with initializing `nref` of
470 // freshly fabricated one with `1` through the std::atomic's ctor
471 // (which doesn't impose a memory barrier on the strongly-ordered
472 // x86), this allows to avoid all atomical operations in such case.
473 const bool last_one
= \
474 (1 == cached_raw
->nref
.load(std::memory_order_acquire
));
475 if (likely(last_one
) || --cached_raw
->nref
== 0) {
476 bdout
<< "deleting raw " << static_cast<void*>(cached_raw
)
477 << " len " << cached_raw
->get_len() << bendl
;
478 ANNOTATE_HAPPENS_AFTER(&cached_raw
->nref
);
479 ANNOTATE_HAPPENS_BEFORE_FORGET_ALL(&cached_raw
->nref
);
480 delete cached_raw
; // dealloc old (if any)
482 ANNOTATE_HAPPENS_BEFORE(&cached_raw
->nref
);
487 int buffer::ptr::get_mempool() const {
489 return _raw
->mempool
;
491 return mempool::mempool_buffer_anon
;
494 void buffer::ptr::reassign_to_mempool(int pool
) {
496 _raw
->reassign_to_mempool(pool
);
499 void buffer::ptr::try_assign_to_mempool(int pool
) {
501 _raw
->try_assign_to_mempool(pool
);
505 const char *buffer::ptr::c_str() const {
507 return _raw
->get_data() + _off
;
509 char *buffer::ptr::c_str() {
511 return _raw
->get_data() + _off
;
513 const char *buffer::ptr::end_c_str() const {
515 return _raw
->get_data() + _off
+ _len
;
517 char *buffer::ptr::end_c_str() {
519 return _raw
->get_data() + _off
+ _len
;
522 unsigned buffer::ptr::unused_tail_length() const
524 return _raw
? _raw
->get_len() - (_off
+ _len
) : 0;
526 const char& buffer::ptr::operator[](unsigned n
) const
529 ceph_assert(n
< _len
);
530 return _raw
->get_data()[_off
+ n
];
532 char& buffer::ptr::operator[](unsigned n
)
535 ceph_assert(n
< _len
);
536 return _raw
->get_data()[_off
+ n
];
539 const char *buffer::ptr::raw_c_str() const { ceph_assert(_raw
); return _raw
->get_data(); }
540 unsigned buffer::ptr::raw_length() const { ceph_assert(_raw
); return _raw
->get_len(); }
541 int buffer::ptr::raw_nref() const { ceph_assert(_raw
); return _raw
->nref
; }
543 void buffer::ptr::copy_out(unsigned o
, unsigned l
, char *dest
) const {
546 throw end_of_buffer();
547 char* src
= _raw
->get_data() + _off
+ o
;
548 maybe_inline_memcpy(dest
, src
, l
, 8);
551 unsigned buffer::ptr::wasted() const
553 return _raw
->get_len() - _len
;
556 int buffer::ptr::cmp(const ptr
& o
) const
558 int l
= _len
< o
._len
? _len
: o
._len
;
560 int r
= memcmp(c_str(), o
.c_str(), l
);
571 bool buffer::ptr::is_zero() const
573 return mem_is_zero(c_str(), _len
);
576 unsigned buffer::ptr::append(char c
)
579 ceph_assert(1 <= unused_tail_length());
580 char* ptr
= _raw
->get_data() + _off
+ _len
;
586 unsigned buffer::ptr::append(const char *p
, unsigned l
)
589 ceph_assert(l
<= unused_tail_length());
590 char* c
= _raw
->get_data() + _off
+ _len
;
591 maybe_inline_memcpy(c
, p
, l
, 32);
596 unsigned buffer::ptr::append_zeros(unsigned l
)
599 ceph_assert(l
<= unused_tail_length());
600 char* c
= _raw
->get_data() + _off
+ _len
;
601 // FIPS zeroization audit 20191115: this memset is not security related.
607 void buffer::ptr::copy_in(unsigned o
, unsigned l
, const char *src
, bool crc_reset
)
610 ceph_assert(o
<= _len
);
611 ceph_assert(o
+l
<= _len
);
612 char* dest
= _raw
->get_data() + _off
+ o
;
614 _raw
->invalidate_crc();
615 maybe_inline_memcpy(dest
, src
, l
, 64);
618 void buffer::ptr::zero(bool crc_reset
)
621 _raw
->invalidate_crc();
622 // FIPS zeroization audit 20191115: this memset is not security related.
623 memset(c_str(), 0, _len
);
626 void buffer::ptr::zero(unsigned o
, unsigned l
, bool crc_reset
)
628 ceph_assert(o
+l
<= _len
);
630 _raw
->invalidate_crc();
631 // FIPS zeroization audit 20191115: this memset is not security related.
632 memset(c_str()+o
, 0, l
);
636 buffer::ptr::iterator_impl
<B
>& buffer::ptr::iterator_impl
<B
>::operator +=(size_t len
) {
639 throw end_of_buffer();
643 template buffer::ptr::iterator_impl
<false>&
644 buffer::ptr::iterator_impl
<false>::operator +=(size_t len
);
645 template buffer::ptr::iterator_impl
<true>&
646 buffer::ptr::iterator_impl
<true>::operator +=(size_t len
);
648 // -- buffer::list::iterator --
650 buffer::list::iterator operator=(const buffer::list::iterator& other)
652 if (this != &other) {
662 template<bool is_const
>
663 buffer::list::iterator_impl
<is_const
>::iterator_impl(bl_t
*l
, unsigned o
)
664 : bl(l
), ls(&bl
->_buffers
), p(ls
->begin()), off(0), p_off(0)
669 template<bool is_const
>
670 buffer::list::iterator_impl
<is_const
>::iterator_impl(const buffer::list::iterator
& i
)
671 : iterator_impl
<is_const
>(i
.bl
, i
.off
, i
.p
, i
.p_off
) {}
673 template<bool is_const
>
674 auto buffer::list::iterator_impl
<is_const
>::operator +=(unsigned o
)
677 //cout << this << " advance " << o << " from " << off
678 // << " (p_off " << p_off << " in " << p->length() << ")"
682 while (p
!= ls
->end()) {
683 if (p_off
>= p
->length()) {
685 p_off
-= p
->length();
688 // somewhere in this buffer!
692 if (p
== ls
->end() && p_off
) {
693 throw end_of_buffer();
699 template<bool is_const
>
700 void buffer::list::iterator_impl
<is_const
>::seek(unsigned o
)
707 template<bool is_const
>
708 char buffer::list::iterator_impl
<is_const
>::operator*() const
711 throw end_of_buffer();
715 template<bool is_const
>
716 buffer::list::iterator_impl
<is_const
>&
717 buffer::list::iterator_impl
<is_const
>::operator++()
720 throw end_of_buffer();
725 template<bool is_const
>
726 buffer::ptr
buffer::list::iterator_impl
<is_const
>::get_current_ptr() const
729 throw end_of_buffer();
730 return ptr(*p
, p_off
, p
->length() - p_off
);
733 template<bool is_const
>
734 bool buffer::list::iterator_impl
<is_const
>::is_pointing_same_raw(
735 const ptr
& other
) const
738 throw end_of_buffer();
739 return p
->_raw
== other
._raw
;
743 // note that these all _append_ to dest!
744 template<bool is_const
>
745 void buffer::list::iterator_impl
<is_const
>::copy(unsigned len
, char *dest
)
747 if (p
== ls
->end()) seek(off
);
750 throw end_of_buffer();
752 unsigned howmuch
= p
->length() - p_off
;
753 if (len
< howmuch
) howmuch
= len
;
754 p
->copy_out(p_off
, howmuch
, dest
);
762 template<bool is_const
>
763 void buffer::list::iterator_impl
<is_const
>::copy(unsigned len
, ptr
&dest
)
765 copy_deep(len
, dest
);
768 template<bool is_const
>
769 void buffer::list::iterator_impl
<is_const
>::copy_deep(unsigned len
, ptr
&dest
)
775 throw end_of_buffer();
777 copy(len
, dest
.c_str());
779 template<bool is_const
>
780 void buffer::list::iterator_impl
<is_const
>::copy_shallow(unsigned len
,
787 throw end_of_buffer();
788 unsigned howmuch
= p
->length() - p_off
;
791 copy(len
, dest
.c_str());
793 dest
= ptr(*p
, p_off
, len
);
798 template<bool is_const
>
799 void buffer::list::iterator_impl
<is_const
>::copy(unsigned len
, list
&dest
)
805 throw end_of_buffer();
807 unsigned howmuch
= p
->length() - p_off
;
810 dest
.append(*p
, p_off
, howmuch
);
817 template<bool is_const
>
818 void buffer::list::iterator_impl
<is_const
>::copy(unsigned len
, std::string
&dest
)
824 throw end_of_buffer();
826 unsigned howmuch
= p
->length() - p_off
;
827 const char *c_str
= p
->c_str();
830 dest
.append(c_str
+ p_off
, howmuch
);
837 template<bool is_const
>
838 void buffer::list::iterator_impl
<is_const
>::copy_all(list
&dest
)
846 unsigned howmuch
= p
->length() - p_off
;
847 const char *c_str
= p
->c_str();
848 dest
.append(c_str
+ p_off
, howmuch
);
854 template<bool is_const
>
855 size_t buffer::list::iterator_impl
<is_const
>::get_ptr_and_advance(
856 size_t want
, const char **data
)
858 if (p
== ls
->end()) {
860 if (p
== ls
->end()) {
864 *data
= p
->c_str() + p_off
;
865 size_t l
= std::min
<size_t>(p
->length() - p_off
, want
);
867 if (p_off
== p
->length()) {
875 template<bool is_const
>
876 uint32_t buffer::list::iterator_impl
<is_const
>::crc32c(
877 size_t length
, uint32_t crc
)
879 length
= std::min
<size_t>(length
, get_remaining());
882 size_t l
= get_ptr_and_advance(length
, &p
);
883 crc
= ceph_crc32c(crc
, (unsigned char*)p
, l
);
889 // explicitly instantiate only the iterator types we need, so we can hide the
890 // details in this compilation unit without introducing unnecessary link time
892 template class buffer::list::iterator_impl
<true>;
893 template class buffer::list::iterator_impl
<false>;
895 buffer::list::iterator::iterator(bl_t
*l
, unsigned o
)
896 : iterator_impl(l
, o
)
899 buffer::list::iterator::iterator(bl_t
*l
, unsigned o
, list_iter_t ip
, unsigned po
)
900 : iterator_impl(l
, o
, ip
, po
)
904 void buffer::list::iterator::copy_in(unsigned len
, const char *src
, bool crc_reset
)
911 throw end_of_buffer();
913 unsigned howmuch
= p
->length() - p_off
;
916 p
->copy_in(p_off
, howmuch
, src
, crc_reset
);
924 void buffer::list::iterator::copy_in(unsigned len
, const list
& otherl
)
929 for (const auto& node
: otherl
._buffers
) {
930 unsigned l
= node
.length();
933 copy_in(l
, node
.c_str());
940 // -- buffer::list --
942 void buffer::list::swap(list
& other
) noexcept
944 std::swap(_len
, other
._len
);
945 std::swap(_num
, other
._num
);
946 std::swap(_carriage
, other
._carriage
);
947 _buffers
.swap(other
._buffers
);
950 bool buffer::list::contents_equal(const ceph::buffer::list
& other
) const
952 if (length() != other
.length())
955 // buffer-wise comparison
957 auto a
= std::cbegin(_buffers
);
958 auto b
= std::cbegin(other
._buffers
);
959 unsigned aoff
= 0, boff
= 0;
960 while (a
!= std::cend(_buffers
)) {
961 unsigned len
= a
->length() - aoff
;
962 if (len
> b
->length() - boff
)
963 len
= b
->length() - boff
;
964 if (memcmp(a
->c_str() + aoff
, b
->c_str() + boff
, len
) != 0)
967 if (aoff
== a
->length()) {
972 if (boff
== b
->length()) {
980 // byte-wise comparison
982 bufferlist::const_iterator me
= begin();
983 bufferlist::const_iterator him
= other
.begin();
994 bool buffer::list::contents_equal(const void* const other
,
997 if (this->length() != length
) {
1001 const auto* other_buf
= reinterpret_cast<const char*>(other
);
1002 for (const auto& bp
: buffers()) {
1003 assert(bp
.length() <= length
);
1004 if (std::memcmp(bp
.c_str(), other_buf
, bp
.length()) != 0) {
1007 length
-= bp
.length();
1008 other_buf
+= bp
.length();
1015 bool buffer::list::is_provided_buffer(const char* const dst
) const
1017 if (_buffers
.empty()) {
1020 return (is_contiguous() && (_buffers
.front().c_str() == dst
));
1023 bool buffer::list::is_aligned(const unsigned align
) const
1025 for (const auto& node
: _buffers
) {
1026 if (!node
.is_aligned(align
)) {
1033 bool buffer::list::is_n_align_sized(const unsigned align
) const
1035 for (const auto& node
: _buffers
) {
1036 if (!node
.is_n_align_sized(align
)) {
1043 bool buffer::list::is_aligned_size_and_memory(
1044 const unsigned align_size
,
1045 const unsigned align_memory
) const
1047 for (const auto& node
: _buffers
) {
1048 if (!node
.is_aligned(align_memory
) || !node
.is_n_align_sized(align_size
)) {
1055 bool buffer::list::is_zero() const {
1056 for (const auto& node
: _buffers
) {
1057 if (!node
.is_zero()) {
1064 void buffer::list::zero()
1066 for (auto& node
: _buffers
) {
1071 void buffer::list::zero(const unsigned o
, const unsigned l
)
1073 ceph_assert(o
+l
<= _len
);
1075 for (auto& node
: _buffers
) {
1076 if (p
+ node
.length() > o
) {
1077 if (p
>= o
&& p
+node
.length() <= o
+l
) {
1078 // 'o'------------- l -----------|
1079 // 'p'-- node.length() --|
1081 } else if (p
>= o
) {
1082 // 'o'------------- l -----------|
1083 // 'p'------- node.length() -------|
1084 node
.zero(0, o
+l
-p
);
1085 } else if (p
+ node
.length() <= o
+l
) {
1086 // 'o'------------- l -----------|
1087 // 'p'------- node.length() -------|
1088 node
.zero(o
-p
, node
.length()-(o
-p
));
1090 // 'o'----------- l -----------|
1091 // 'p'---------- node.length() ----------|
1102 bool buffer::list::is_contiguous() const
1107 bool buffer::list::is_n_page_sized() const
1109 return is_n_align_sized(CEPH_PAGE_SIZE
);
1112 bool buffer::list::is_page_aligned() const
1114 return is_aligned(CEPH_PAGE_SIZE
);
1117 int buffer::list::get_mempool() const
1119 if (_buffers
.empty()) {
1120 return mempool::mempool_buffer_anon
;
1122 return _buffers
.back().get_mempool();
1125 void buffer::list::reassign_to_mempool(int pool
)
1127 for (auto& p
: _buffers
) {
1128 p
._raw
->reassign_to_mempool(pool
);
1132 void buffer::list::try_assign_to_mempool(int pool
)
1134 for (auto& p
: _buffers
) {
1135 p
._raw
->try_assign_to_mempool(pool
);
1139 uint64_t buffer::list::get_wasted_space() const
1142 return _buffers
.back().wasted();
1144 std::vector
<const raw
*> raw_vec
;
1145 raw_vec
.reserve(_num
);
1146 for (const auto& p
: _buffers
)
1147 raw_vec
.push_back(p
._raw
);
1148 std::sort(raw_vec
.begin(), raw_vec
.end());
1151 const raw
*last
= nullptr;
1152 for (const auto r
: raw_vec
) {
1156 total
+= r
->get_len();
1158 // If multiple buffers are sharing the same raw buffer and they overlap
1159 // with each other, the wasted space will be underestimated.
1160 if (total
<= length())
1162 return total
- length();
1165 void buffer::list::rebuild()
1168 _carriage
= &always_empty_bptr
;
1169 _buffers
.clear_and_dispose();
1173 if ((_len
& ~CEPH_PAGE_MASK
) == 0)
1174 rebuild(ptr_node::create(buffer::create_page_aligned(_len
)));
1176 rebuild(ptr_node::create(buffer::create(_len
)));
1179 void buffer::list::rebuild(
1180 std::unique_ptr
<buffer::ptr_node
, buffer::ptr_node::disposer
> nb
)
1183 int mempool
= _buffers
.front().get_mempool();
1184 nb
->reassign_to_mempool(mempool
);
1185 for (auto& node
: _buffers
) {
1186 nb
->copy_in(pos
, node
.length(), node
.c_str(), false);
1187 pos
+= node
.length();
1189 _buffers
.clear_and_dispose();
1190 if (likely(nb
->length())) {
1191 _carriage
= nb
.get();
1192 _buffers
.push_back(*nb
.release());
1195 _carriage
= &always_empty_bptr
;
1201 bool buffer::list::rebuild_aligned(unsigned align
)
1203 return rebuild_aligned_size_and_memory(align
, align
);
1206 bool buffer::list::rebuild_aligned_size_and_memory(unsigned align_size
,
1207 unsigned align_memory
,
1208 unsigned max_buffers
)
1210 bool had_to_rebuild
= false;
1212 if (max_buffers
&& _num
> max_buffers
&& _len
> (max_buffers
* align_size
)) {
1213 align_size
= round_up_to(round_up_to(_len
, max_buffers
) / max_buffers
, align_size
);
1215 auto p
= std::begin(_buffers
);
1216 auto p_prev
= _buffers
.before_begin();
1217 while (p
!= std::end(_buffers
)) {
1218 // keep anything that's already align and sized aligned
1219 if (p
->is_aligned(align_memory
) && p
->is_n_align_sized(align_size
)) {
1220 /*cout << " segment " << (void*)p->c_str()
1221 << " offset " << ((unsigned long)p->c_str() & (align - 1))
1222 << " length " << p->length()
1223 << " " << (p->length() & (align - 1)) << " ok" << std::endl;
1229 // consolidate unaligned items, until we get something that is sized+aligned
1231 unsigned offset
= 0;
1233 /*cout << " segment " << (void*)p->c_str()
1234 << " offset " << ((unsigned long)p->c_str() & (align - 1))
1235 << " length " << p->length() << " " << (p->length() & (align - 1))
1236 << " overall offset " << offset << " " << (offset & (align - 1))
1237 << " not ok" << std::endl;
1239 offset
+= p
->length();
1240 // no need to reallocate, relinking is enough thankfully to bi::list.
1241 auto p_after
= _buffers
.erase_after(p_prev
);
1243 unaligned
._buffers
.push_back(*p
);
1244 unaligned
._len
+= p
->length();
1245 unaligned
._num
+= 1;
1247 } while (p
!= std::end(_buffers
) &&
1248 (!p
->is_aligned(align_memory
) ||
1249 !p
->is_n_align_sized(align_size
) ||
1250 (offset
% align_size
)));
1251 if (!(unaligned
.is_contiguous() && unaligned
._buffers
.front().is_aligned(align_memory
))) {
1254 buffer::create_aligned(unaligned
._len
, align_memory
)));
1255 had_to_rebuild
= true;
1257 _buffers
.insert_after(p_prev
, *ptr_node::create(unaligned
._buffers
.front()).release());
1261 return had_to_rebuild
;
1264 bool buffer::list::rebuild_page_aligned()
1266 return rebuild_aligned(CEPH_PAGE_SIZE
);
1269 void buffer::list::reserve(size_t prealloc
)
1271 if (get_append_buffer_unused_tail_length() < prealloc
) {
1272 auto ptr
= ptr_node::create(buffer::create_small_page_aligned(prealloc
));
1273 ptr
->set_length(0); // unused, so far.
1274 _carriage
= ptr
.get();
1275 _buffers
.push_back(*ptr
.release());
1280 void buffer::list::claim_append(list
& bl
)
1282 // steal the other guy's buffers
1285 _buffers
.splice_back(bl
._buffers
);
1289 void buffer::list::append(char c
)
1291 // put what we can into the existing append_buffer.
1292 unsigned gap
= get_append_buffer_unused_tail_length();
1294 // make a new buffer!
1295 auto buf
= ptr_node::create(
1296 raw_combined::create(CEPH_BUFFER_APPEND_SIZE
, 0, get_mempool()));
1297 buf
->set_length(0); // unused, so far.
1298 _carriage
= buf
.get();
1299 _buffers
.push_back(*buf
.release());
1301 } else if (unlikely(_carriage
!= &_buffers
.back())) {
1302 auto bptr
= ptr_node::create(*_carriage
, _carriage
->length(), 0);
1303 _carriage
= bptr
.get();
1304 _buffers
.push_back(*bptr
.release());
1307 _carriage
->append(c
);
1311 buffer::ptr_node
buffer::list::always_empty_bptr
;
1313 buffer::ptr_node
& buffer::list::refill_append_space(const unsigned len
)
1315 // make a new buffer. fill out a complete page, factoring in the
1316 // raw_combined overhead.
1317 size_t need
= round_up_to(len
, sizeof(size_t)) + sizeof(raw_combined
);
1318 size_t alen
= round_up_to(need
, CEPH_BUFFER_ALLOC_UNIT
) -
1319 sizeof(raw_combined
);
1321 ptr_node::create(raw_combined::create(alen
, 0, get_mempool()));
1322 new_back
->set_length(0); // unused, so far.
1323 _carriage
= new_back
.get();
1324 _buffers
.push_back(*new_back
.release());
1326 return _buffers
.back();
1329 void buffer::list::append(const char *data
, unsigned len
)
1333 const unsigned free_in_last
= get_append_buffer_unused_tail_length();
1334 const unsigned first_round
= std::min(len
, free_in_last
);
1336 // _buffers and carriage can desynchronize when 1) a new ptr
1337 // we don't own has been added into the _buffers 2) _buffers
1338 // has been emptied as as a result of std::move or stolen by
1340 if (unlikely(_carriage
!= &_buffers
.back())) {
1341 auto bptr
= ptr_node::create(*_carriage
, _carriage
->length(), 0);
1342 _carriage
= bptr
.get();
1343 _buffers
.push_back(*bptr
.release());
1346 _carriage
->append(data
, first_round
);
1349 const unsigned second_round
= len
- first_round
;
1351 auto& new_back
= refill_append_space(second_round
);
1352 new_back
.append(data
+ first_round
, second_round
);
1356 buffer::list::reserve_t
buffer::list::obtain_contiguous_space(
1359 // note: if len < the normal append_buffer size it *might*
1360 // be better to allocate a normal-sized append_buffer and
1361 // use part of it. however, that optimizes for the case of
1362 // old-style types including new-style types. and in most
1363 // such cases, this won't be the very first thing encoded to
1364 // the list, so append_buffer will already be allocated.
1365 // OTOH if everything is new-style, we *should* allocate
1366 // only what we need and conserve memory.
1367 if (unlikely(get_append_buffer_unused_tail_length() < len
)) {
1369 buffer::ptr_node::create(buffer::create(len
)).release();
1370 new_back
->set_length(0); // unused, so far.
1371 _buffers
.push_back(*new_back
);
1373 _carriage
= new_back
;
1374 return { new_back
->c_str(), &new_back
->_len
, &_len
};
1376 ceph_assert(!_buffers
.empty());
1377 if (unlikely(_carriage
!= &_buffers
.back())) {
1378 auto bptr
= ptr_node::create(*_carriage
, _carriage
->length(), 0);
1379 _carriage
= bptr
.get();
1380 _buffers
.push_back(*bptr
.release());
1383 return { _carriage
->end_c_str(), &_carriage
->_len
, &_len
};
1387 void buffer::list::append(const ptr
& bp
)
1392 void buffer::list::append(ptr
&& bp
)
1394 push_back(std::move(bp
));
1397 void buffer::list::append(const ptr
& bp
, unsigned off
, unsigned len
)
1399 ceph_assert(len
+off
<= bp
.length());
1400 if (!_buffers
.empty()) {
1401 ptr
&l
= _buffers
.back();
1402 if (l
._raw
== bp
._raw
&& l
.end() == bp
.start() + off
) {
1403 // yay contiguous with tail bp!
1404 l
.set_length(l
.length()+len
);
1409 // add new item to list
1410 _buffers
.push_back(*ptr_node::create(bp
, off
, len
).release());
1415 void buffer::list::append(const list
& bl
)
1419 for (const auto& node
: bl
._buffers
) {
1420 _buffers
.push_back(*ptr_node::create(node
).release());
1424 void buffer::list::append(std::istream
& in
)
1429 append(s
.c_str(), s
.length());
1435 buffer::list::contiguous_filler
buffer::list::append_hole(const unsigned len
)
1439 if (unlikely(get_append_buffer_unused_tail_length() < len
)) {
1440 // make a new append_buffer. fill out a complete page, factoring in
1441 // the raw_combined overhead.
1442 auto& new_back
= refill_append_space(len
);
1443 new_back
.set_length(len
);
1444 return { new_back
.c_str() };
1445 } else if (unlikely(_carriage
!= &_buffers
.back())) {
1446 auto bptr
= ptr_node::create(*_carriage
, _carriage
->length(), 0);
1447 _carriage
= bptr
.get();
1448 _buffers
.push_back(*bptr
.release());
1451 _carriage
->set_length(_carriage
->length() + len
);
1452 return { _carriage
->end_c_str() - len
};
1455 void buffer::list::prepend_zero(unsigned len
)
1457 auto bp
= ptr_node::create(len
);
1461 _buffers
.push_front(*bp
.release());
1464 void buffer::list::append_zero(unsigned len
)
1468 const unsigned free_in_last
= get_append_buffer_unused_tail_length();
1469 const unsigned first_round
= std::min(len
, free_in_last
);
1471 if (unlikely(_carriage
!= &_buffers
.back())) {
1472 auto bptr
= ptr_node::create(*_carriage
, _carriage
->length(), 0);
1473 _carriage
= bptr
.get();
1474 _buffers
.push_back(*bptr
.release());
1477 _carriage
->append_zeros(first_round
);
1480 const unsigned second_round
= len
- first_round
;
1482 auto& new_back
= refill_append_space(second_round
);
1483 new_back
.set_length(second_round
);
1484 new_back
.zero(false);
1492 const char& buffer::list::operator[](unsigned n
) const
1495 throw end_of_buffer();
1497 for (const auto& node
: _buffers
) {
1498 if (n
>= node
.length()) {
1508 * return a contiguous ptr to whole bufferlist contents.
1510 char *buffer::list::c_str()
1512 if (_buffers
.empty())
1513 return 0; // no buffers
1515 auto iter
= std::cbegin(_buffers
);
1518 if (iter
!= std::cend(_buffers
)) {
1521 return _buffers
.front().c_str(); // good, we're already contiguous.
1524 string
buffer::list::to_str() const {
1526 s
.reserve(length());
1527 for (const auto& node
: _buffers
) {
1528 if (node
.length()) {
1529 s
.append(node
.c_str(), node
.length());
1535 void buffer::list::substr_of(const list
& other
, unsigned off
, unsigned len
)
1537 if (off
+ len
> other
.length())
1538 throw end_of_buffer();
1543 auto curbuf
= std::cbegin(other
._buffers
);
1544 while (off
> 0 && off
>= curbuf
->length()) {
1546 //cout << "skipping over " << *curbuf << std::endl;
1547 off
-= (*curbuf
).length();
1550 ceph_assert(len
== 0 || curbuf
!= std::cend(other
._buffers
));
1554 if (off
+ len
< curbuf
->length()) {
1555 //cout << "copying partial of " << *curbuf << std::endl;
1556 _buffers
.push_back(*ptr_node::create(*curbuf
, off
, len
).release());
1563 //cout << "copying end (all?) of " << *curbuf << std::endl;
1564 unsigned howmuch
= curbuf
->length() - off
;
1565 _buffers
.push_back(*ptr_node::create(*curbuf
, off
, howmuch
).release());
1575 void buffer::list::splice(unsigned off
, unsigned len
, list
*claim_by
/*, bufferlist& replace_with */)
1580 if (off
>= length())
1581 throw end_of_buffer();
1583 ceph_assert(len
> 0);
1584 //cout << "splice off " << off << " len " << len << " ... mylen = " << length() << std::endl;
1587 auto curbuf
= std::begin(_buffers
);
1588 auto curbuf_prev
= _buffers
.before_begin();
1590 ceph_assert(curbuf
!= std::end(_buffers
));
1591 if (off
>= (*curbuf
).length()) {
1593 //cout << "off = " << off << " skipping over " << *curbuf << std::endl;
1594 off
-= (*curbuf
).length();
1595 curbuf_prev
= curbuf
++;
1597 // somewhere in this buffer!
1598 //cout << "off = " << off << " somewhere in " << *curbuf << std::endl;
1604 // add a reference to the front bit, insert it before curbuf (which
1606 //cout << "keeping front " << off << " of " << *curbuf << std::endl;
1607 _buffers
.insert_after(curbuf_prev
,
1608 *ptr_node::create(*curbuf
, 0, off
).release());
1615 // partial or the last (appendable) one?
1616 if (const auto to_drop
= off
+ len
; to_drop
< curbuf
->length()) {
1617 //cout << "keeping end of " << *curbuf << ", losing first " << off+len << std::endl;
1619 claim_by
->append(*curbuf
, off
, len
);
1620 curbuf
->set_offset(to_drop
+ curbuf
->offset()); // ignore beginning big
1621 curbuf
->set_length(curbuf
->length() - to_drop
);
1623 //cout << " now " << *curbuf << std::endl;
1627 // hose though the end
1628 unsigned howmuch
= curbuf
->length() - off
;
1629 //cout << "discarding " << howmuch << " of " << *curbuf << std::endl;
1631 claim_by
->append(*curbuf
, off
, howmuch
);
1632 _len
-= curbuf
->length();
1633 if (curbuf
== _carriage
) {
1634 // no need to reallocate, shrinking and relinking is enough.
1635 curbuf
= _buffers
.erase_after(curbuf_prev
);
1636 _carriage
->set_offset(_carriage
->offset() + _carriage
->length());
1637 _carriage
->set_length(0);
1638 _buffers
.push_back(*_carriage
);
1640 curbuf
= _buffers
.erase_after_and_dispose(curbuf_prev
);
1647 // splice in *replace (implement me later?)
1650 void buffer::list::write(int off
, int len
, std::ostream
& out
) const
1653 s
.substr_of(*this, off
, len
);
1654 for (const auto& node
: s
._buffers
) {
1655 if (node
.length()) {
1656 out
.write(node
.c_str(), node
.length());
1661 void buffer::list::encode_base64(buffer::list
& o
)
1663 bufferptr
bp(length() * 4 / 3 + 3);
1664 int l
= ceph_armor(bp
.c_str(), bp
.c_str() + bp
.length(), c_str(), c_str() + length());
1666 o
.push_back(std::move(bp
));
1669 void buffer::list::decode_base64(buffer::list
& e
)
1671 bufferptr
bp(4 + ((e
.length() * 3) / 4));
1672 int l
= ceph_unarmor(bp
.c_str(), bp
.c_str() + bp
.length(), e
.c_str(), e
.c_str() + e
.length());
1674 std::ostringstream oss
;
1675 oss
<< "decode_base64: decoding failed:\n";
1677 throw buffer::malformed_input(oss
.str().c_str());
1679 ceph_assert(l
<= (int)bp
.length());
1681 push_back(std::move(bp
));
1684 ssize_t
buffer::list::pread_file(const char *fn
, uint64_t off
, uint64_t len
, std::string
*error
)
1686 int fd
= TEMP_FAILURE_RETRY(::open(fn
, O_RDONLY
|O_CLOEXEC
|O_BINARY
));
1689 std::ostringstream oss
;
1690 oss
<< "can't open " << fn
<< ": " << cpp_strerror(err
);
1696 // FIPS zeroization audit 20191115: this memset is not security related.
1697 memset(&st
, 0, sizeof(st
));
1698 if (::fstat(fd
, &st
) < 0) {
1700 std::ostringstream oss
;
1701 oss
<< "bufferlist::read_file(" << fn
<< "): stat error: "
1702 << cpp_strerror(err
);
1704 VOID_TEMP_FAILURE_RETRY(::close(fd
));
1708 if (off
> (uint64_t)st
.st_size
) {
1709 std::ostringstream oss
;
1710 oss
<< "bufferlist::read_file(" << fn
<< "): read error: size < offset";
1712 VOID_TEMP_FAILURE_RETRY(::close(fd
));
1716 if (len
> st
.st_size
- off
) {
1717 len
= st
.st_size
- off
;
1719 ssize_t ret
= lseek64(fd
, off
, SEEK_SET
);
1720 if (ret
!= (ssize_t
)off
) {
1724 ret
= read_fd(fd
, len
);
1726 std::ostringstream oss
;
1727 oss
<< "bufferlist::read_file(" << fn
<< "): read error:"
1728 << cpp_strerror(ret
);
1730 VOID_TEMP_FAILURE_RETRY(::close(fd
));
1732 } else if (ret
!= (ssize_t
)len
) {
1734 // Perhaps the file changed between stat() and read()?
1735 std::ostringstream oss
;
1736 oss
<< "bufferlist::read_file(" << fn
<< "): warning: got premature EOF.";
1738 // not actually an error, but weird
1740 VOID_TEMP_FAILURE_RETRY(::close(fd
));
1744 int buffer::list::read_file(const char *fn
, std::string
*error
)
1746 int fd
= TEMP_FAILURE_RETRY(::open(fn
, O_RDONLY
|O_CLOEXEC
|O_BINARY
));
1749 std::ostringstream oss
;
1750 oss
<< "can't open " << fn
<< ": " << cpp_strerror(err
);
1756 // FIPS zeroization audit 20191115: this memset is not security related.
1757 memset(&st
, 0, sizeof(st
));
1758 if (::fstat(fd
, &st
) < 0) {
1760 std::ostringstream oss
;
1761 oss
<< "bufferlist::read_file(" << fn
<< "): stat error: "
1762 << cpp_strerror(err
);
1764 VOID_TEMP_FAILURE_RETRY(::close(fd
));
1768 ssize_t ret
= read_fd(fd
, st
.st_size
);
1770 std::ostringstream oss
;
1771 oss
<< "bufferlist::read_file(" << fn
<< "): read error:"
1772 << cpp_strerror(ret
);
1774 VOID_TEMP_FAILURE_RETRY(::close(fd
));
1777 else if (ret
!= st
.st_size
) {
1779 // Perhaps the file changed between stat() and read()?
1780 std::ostringstream oss
;
1781 oss
<< "bufferlist::read_file(" << fn
<< "): warning: got premature EOF.";
1783 // not actually an error, but weird
1785 VOID_TEMP_FAILURE_RETRY(::close(fd
));
1789 ssize_t
buffer::list::read_fd(int fd
, size_t len
)
1791 auto bp
= ptr_node::create(buffer::create(len
));
1792 ssize_t ret
= safe_read(fd
, (void*)bp
->c_str(), len
);
1794 bp
->set_length(ret
);
1795 push_back(std::move(bp
));
1800 ssize_t
buffer::list::recv_fd(int fd
, size_t len
)
1802 auto bp
= ptr_node::create(buffer::create(len
));
1803 ssize_t ret
= safe_recv(fd
, (void*)bp
->c_str(), len
);
1805 bp
->set_length(ret
);
1806 push_back(std::move(bp
));
1811 int buffer::list::write_file(const char *fn
, int mode
)
1813 int fd
= TEMP_FAILURE_RETRY(::open(fn
, O_WRONLY
|O_CREAT
|O_TRUNC
|O_CLOEXEC
|O_BINARY
, mode
));
1816 cerr
<< "bufferlist::write_file(" << fn
<< "): failed to open file: "
1817 << cpp_strerror(err
) << std::endl
;
1820 int ret
= write_fd(fd
);
1822 cerr
<< "bufferlist::write_fd(" << fn
<< "): write_fd error: "
1823 << cpp_strerror(ret
) << std::endl
;
1824 VOID_TEMP_FAILURE_RETRY(::close(fd
));
1827 if (TEMP_FAILURE_RETRY(::close(fd
))) {
1829 cerr
<< "bufferlist::write_file(" << fn
<< "): close error: "
1830 << cpp_strerror(err
) << std::endl
;
1836 static int do_writev(int fd
, struct iovec
*vec
, uint64_t offset
, unsigned veclen
, unsigned bytes
)
1841 r
= ::pwritev(fd
, vec
, veclen
, offset
);
1843 r
= ::lseek64(fd
, offset
, SEEK_SET
);
1847 r
= ::writev(fd
, vec
, veclen
);
1857 if (bytes
== 0) break;
1860 if (vec
[0].iov_len
<= (size_t)r
) {
1861 // drain this whole item
1862 r
-= vec
[0].iov_len
;
1866 vec
[0].iov_base
= (char *)vec
[0].iov_base
+ r
;
1867 vec
[0].iov_len
-= r
;
1876 int buffer::list::write_fd(int fd
) const
1883 auto p
= std::cbegin(_buffers
);
1884 while (p
!= std::cend(_buffers
)) {
1885 if (p
->length() > 0) {
1886 iov
[iovlen
].iov_base
= (void *)p
->c_str();
1887 iov
[iovlen
].iov_len
= p
->length();
1888 bytes
+= p
->length();
1893 if (iovlen
== IOV_MAX
||
1894 p
== _buffers
.end()) {
1899 wrote
= ::writev(fd
, start
, num
);
1906 if (wrote
< bytes
) {
1907 // partial write, recover!
1908 while ((size_t)wrote
>= start
[0].iov_len
) {
1909 wrote
-= start
[0].iov_len
;
1910 bytes
-= start
[0].iov_len
;
1915 start
[0].iov_len
-= wrote
;
1916 start
[0].iov_base
= (char *)start
[0].iov_base
+ wrote
;
1928 int buffer::list::send_fd(int fd
) const {
1929 return buffer::list::write_fd(fd
);
1932 int buffer::list::write_fd(int fd
, uint64_t offset
) const
1936 auto p
= std::cbegin(_buffers
);
1937 uint64_t left_pbrs
= get_num_buffers();
1940 unsigned iovlen
= 0;
1941 uint64_t size
= std::min
<uint64_t>(left_pbrs
, IOV_MAX
);
1944 iov
[iovlen
].iov_base
= (void *)p
->c_str();
1945 iov
[iovlen
].iov_len
= p
->length();
1947 bytes
+= p
->length();
1952 int r
= do_writev(fd
, iov
, offset
, iovlen
, bytes
);
1960 int buffer::list::write_fd(int fd
) const
1962 // There's no writev on Windows. WriteFileGather may be an option,
1963 // but it has strict requirements in terms of buffer size and alignment.
1964 auto p
= std::cbegin(_buffers
);
1965 uint64_t left_pbrs
= get_num_buffers();
1968 while (written
< p
->length()) {
1969 int r
= ::write(fd
, p
->c_str(), p
->length() - written
);
1983 int buffer::list::send_fd(int fd
) const
1985 // There's no writev on Windows. WriteFileGather may be an option,
1986 // but it has strict requirements in terms of buffer size and alignment.
1987 auto p
= std::cbegin(_buffers
);
1988 uint64_t left_pbrs
= get_num_buffers();
1991 while (written
< p
->length()) {
1992 int r
= ::send(fd
, p
->c_str(), p
->length() - written
, 0);
1994 return -ceph_sock_errno();
2006 int buffer::list::write_fd(int fd
, uint64_t offset
) const
2008 int r
= ::lseek64(fd
, offset
, SEEK_SET
);
2012 return write_fd(fd
);
2016 __u32
buffer::list::crc32c(__u32 crc
) const
2018 int cache_misses
= 0;
2020 int cache_adjusts
= 0;
2022 for (const auto& node
: _buffers
) {
2023 if (node
.length()) {
2024 raw
* const r
= node
._raw
;
2025 pair
<size_t, size_t> ofs(node
.offset(), node
.offset() + node
.length());
2026 pair
<uint32_t, uint32_t> ccrc
;
2027 if (r
->get_crc(ofs
, &ccrc
)) {
2028 if (ccrc
.first
== crc
) {
2033 /* If we have cached crc32c(buf, v) for initial value v,
2034 * we can convert this to a different initial value v' by:
2035 * crc32c(buf, v') = crc32c(buf, v) ^ adjustment
2036 * where adjustment = crc32c(0*len(buf), v ^ v')
2038 * http://crcutil.googlecode.com/files/crc-doc.1.0.pdf
2039 * note, u for our crc32c implementation is 0
2041 crc
= ccrc
.second
^ ceph_crc32c(ccrc
.first
^ crc
, NULL
, node
.length());
2046 uint32_t base
= crc
;
2047 crc
= ceph_crc32c(crc
, (unsigned char*)node
.c_str(), node
.length());
2048 r
->set_crc(ofs
, make_pair(base
, crc
));
2053 if (buffer_track_crc
) {
2055 buffer_cached_crc_adjusted
+= cache_adjusts
;
2057 buffer_cached_crc
+= cache_hits
;
2059 buffer_missed_crc
+= cache_misses
;
2065 void buffer::list::invalidate_crc()
2067 for (const auto& node
: _buffers
) {
2069 node
._raw
->invalidate_crc();
2075 * Binary write all contents to a C++ stream
2077 void buffer::list::write_stream(std::ostream
&out
) const
2079 for (const auto& node
: _buffers
) {
2080 if (node
.length() > 0) {
2081 out
.write(node
.c_str(), node
.length());
2087 void buffer::list::hexdump(std::ostream
&out
, bool trailing_newline
) const
2092 std::ios_base::fmtflags original_flags
= out
.flags();
2094 // do our best to match the output of hexdump -C, for better
2097 out
.setf(std::ios::right
);
2101 char last_row_char
= '\0';
2102 bool was_same
= false, did_star
= false;
2103 for (unsigned o
=0; o
<length(); o
+= per
) {
2105 last_row_char
= (*this)[o
];
2108 if (o
+ per
< length()) {
2109 bool row_is_same
= true;
2110 for (unsigned i
=0; i
<per
&& o
+i
<length(); i
++) {
2111 char current_char
= (*this)[o
+i
];
2112 if (current_char
!= last_row_char
) {
2114 last_row_char
= current_char
;
2118 row_is_same
= false;
2138 out
<< std::hex
<< std::setw(8) << o
<< " ";
2141 for (i
=0; i
<per
&& o
+i
<length(); i
++) {
2144 out
<< " " << std::setw(2) << ((unsigned)(*this)[o
+i
] & 0xff);
2146 for (; i
<per
; i
++) {
2153 for (i
=0; i
<per
&& o
+i
<length(); i
++) {
2154 char c
= (*this)[o
+i
];
2155 if (isupper(c
) || islower(c
) || isdigit(c
) || c
== ' ' || ispunct(c
))
2160 out
<< '|' << std::dec
;
2162 if (trailing_newline
) {
2163 out
<< "\n" << std::hex
<< std::setw(8) << length();
2167 out
.flags(original_flags
);
2171 buffer::list
buffer::list::static_from_mem(char* c
, size_t l
) {
2173 bl
.push_back(ptr_node::create(create_static(l
, c
)));
2177 buffer::list
buffer::list::static_from_cstring(char* c
) {
2178 return static_from_mem(c
, std::strlen(c
));
2181 buffer::list
buffer::list::static_from_string(string
& s
) {
2182 // C++14 just has string::data return a char* from a non-const
2184 return static_from_mem(const_cast<char*>(s
.data()), s
.length());
2185 // But the way buffer::list mostly doesn't work in a sane way with
2186 // const makes me generally sad.
2189 // buffer::raw is not a standard layout type.
2190 #define BUF_OFFSETOF(type, field) \
2191 (reinterpret_cast<std::uintptr_t>(&(((type*)1024)->field)) - 1024u)
2193 bool buffer::ptr_node::dispose_if_hypercombined(
2194 buffer::ptr_node
* const delete_this
)
2196 // in case _raw is nullptr
2197 const std::uintptr_t bptr
=
2198 (reinterpret_cast<std::uintptr_t>(delete_this
->_raw
) +
2199 BUF_OFFSETOF(buffer::raw
, bptr_storage
));
2200 const bool is_hypercombined
=
2201 reinterpret_cast<std::uintptr_t>(delete_this
) == bptr
;
2202 if (is_hypercombined
) {
2203 ceph_assert_always("hypercombining is currently disabled" == nullptr);
2204 delete_this
->~ptr_node();
2211 std::unique_ptr
<buffer::ptr_node
, buffer::ptr_node::disposer
>
2212 buffer::ptr_node::create_hypercombined(ceph::unique_leakable_ptr
<buffer::raw
> r
)
2214 // FIXME: we don't currently hypercombine buffers due to crashes
2215 // observed in the rados suite. After fixing we'll use placement
2216 // new to create ptr_node on buffer::raw::bptr_storage.
2217 return std::unique_ptr
<buffer::ptr_node
, buffer::ptr_node::disposer
>(
2218 new ptr_node(std::move(r
)));
2221 buffer::ptr_node
* buffer::ptr_node::cloner::operator()(
2222 const buffer::ptr_node
& clone_this
)
2224 return new ptr_node(clone_this
);
2227 std::ostream
& buffer::operator<<(std::ostream
& out
, const buffer::raw
&r
) {
2228 return out
<< "buffer::raw("
2229 << (void*)r
.get_data() << " len " << r
.get_len()
2230 << " nref " << r
.nref
.load() << ")";
2233 std::ostream
& buffer::operator<<(std::ostream
& out
, const buffer::ptr
& bp
) {
2235 out
<< "buffer::ptr(" << bp
.offset() << "~" << bp
.length()
2236 << " " << (void*)bp
.c_str()
2237 << " in raw " << (void*)bp
.raw_c_str()
2238 << " len " << bp
.raw_length()
2239 << " nref " << bp
.raw_nref() << ")";
2241 out
<< "buffer:ptr(" << bp
.offset() << "~" << bp
.length() << " no raw)";
2245 std::ostream
& buffer::operator<<(std::ostream
& out
, const buffer::list
& bl
) {
2246 out
<< "buffer::list(len=" << bl
.length() << ",\n";
2248 for (const auto& node
: bl
.buffers()) {
2249 out
<< "\t" << node
;
2250 if (&node
!= &bl
.buffers().back()) {
2258 MEMPOOL_DEFINE_OBJECT_FACTORY(buffer::raw_malloc
, buffer_raw_malloc
,
2260 MEMPOOL_DEFINE_OBJECT_FACTORY(buffer::raw_posix_aligned
,
2261 buffer_raw_posix_aligned
, buffer_meta
);
2262 MEMPOOL_DEFINE_OBJECT_FACTORY(buffer::raw_char
, buffer_raw_char
, buffer_meta
);
2263 MEMPOOL_DEFINE_OBJECT_FACTORY(buffer::raw_claimed_char
, buffer_raw_claimed_char
,
2265 MEMPOOL_DEFINE_OBJECT_FACTORY(buffer::raw_static
, buffer_raw_static
,
2269 void ceph::buffer::list::page_aligned_appender::_refill(size_t len
) {
2270 const size_t alloc
= \
2271 std::max((size_t)min_alloc
, (len
+ CEPH_PAGE_SIZE
- 1) & CEPH_PAGE_MASK
);
2273 ptr_node::create(buffer::create_page_aligned(alloc
));
2274 new_back
->set_length(0); // unused, so far.
2275 bl
.push_back(std::move(new_back
));
2278 namespace ceph::buffer
{
2279 inline namespace v15_2_0
{
2281 #pragma GCC diagnostic push
2282 #pragma GCC diagnostic ignored "-Wnon-virtual-dtor"
2283 #pragma clang diagnostic push
2284 #pragma clang diagnostic ignored "-Wnon-virtual-dtor"
2285 class buffer_error_category
: public ceph::converting_category
{
2287 buffer_error_category(){}
2288 const char* name() const noexcept override
;
2289 const char* message(int ev
, char*, std::size_t) const noexcept override
;
2290 std::string
message(int ev
) const override
;
2291 boost::system::error_condition
default_error_condition(int ev
) const noexcept
2293 using ceph::converting_category::equivalent
;
2294 bool equivalent(int ev
, const boost::system::error_condition
& c
) const
2296 int from_code(int ev
) const noexcept override
;
2298 #pragma GCC diagnostic pop
2299 #pragma clang diagnostic pop
2301 const char* buffer_error_category::name() const noexcept
{
2306 buffer_error_category::message(int ev
, char*, std::size_t) const noexcept
{
2307 using ceph::buffer::errc
;
2311 switch (static_cast<errc
>(ev
)) {
2312 case errc::bad_alloc
:
2313 return "Bad allocation";
2315 case errc::end_of_buffer
:
2316 return "End of buffer";
2318 case errc::malformed_input
:
2319 return "Malformed input";
2322 return "Unknown error";
2325 std::string
buffer_error_category::message(int ev
) const {
2326 return message(ev
, nullptr, 0);
2329 boost::system::error_condition
2330 buffer_error_category::default_error_condition(int ev
)const noexcept
{
2331 using ceph::buffer::errc
;
2332 switch (static_cast<errc
>(ev
)) {
2333 case errc::bad_alloc
:
2334 return boost::system::errc::not_enough_memory
;
2335 case errc::end_of_buffer
:
2336 case errc::malformed_input
:
2337 return boost::system::errc::io_error
;
2339 return { ev
, *this };
2342 bool buffer_error_category::equivalent(int ev
, const boost::system::error_condition
& c
) const noexcept
{
2343 return default_error_condition(ev
) == c
;
2346 int buffer_error_category::from_code(int ev
) const noexcept
{
2347 using ceph::buffer::errc
;
2348 switch (static_cast<errc
>(ev
)) {
2349 case errc::bad_alloc
:
2352 case errc::end_of_buffer
:
2355 case errc::malformed_input
:
2361 const boost::system::error_category
& buffer_category() noexcept
{
2362 static const buffer_error_category c
;