1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
4 * Ceph - scalable distributed file system
6 * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
22 #include "include/ceph_assert.h"
23 #include "include/types.h"
24 #include "include/buffer_raw.h"
25 #include "include/compat.h"
26 #include "include/mempool.h"
28 #include "common/environment.h"
29 #include "common/errno.h"
30 #include "common/error_code.h"
31 #include "common/safe_io.h"
32 #include "common/strtol.h"
33 #include "common/likely.h"
34 #include "common/valgrind.h"
35 #include "common/deleter.h"
36 #include "common/error_code.h"
37 #include "include/intarith.h"
38 #include "include/spinlock.h"
39 #include "include/scope_guard.h"
48 #define CEPH_BUFFER_ALLOC_UNIT 4096u
49 #define CEPH_BUFFER_APPEND_SIZE (CEPH_BUFFER_ALLOC_UNIT - sizeof(raw_combined))
51 // 256K is the maximum "small" object size in tcmalloc above which allocations come from
52 // the central heap. For now let's keep this below that threshold.
53 #define CEPH_BUFFER_ALLOC_UNIT_MAX std::size_t { 256*1024 }
56 static ceph::spinlock debug_lock
;
57 # define bdout { std::lock_guard<ceph::spinlock> lg(debug_lock); std::cout
58 # define bendl std::endl; }
60 # define bdout if (0) { std::cout
61 # define bendl std::endl; }
64 static ceph::atomic
<unsigned> buffer_cached_crc
{ 0 };
65 static ceph::atomic
<unsigned> buffer_cached_crc_adjusted
{ 0 };
66 static ceph::atomic
<unsigned> buffer_missed_crc
{ 0 };
68 static bool buffer_track_crc
= get_env_bool("CEPH_BUFFER_TRACK");
70 void buffer::track_cached_crc(bool b
) {
73 int buffer::get_cached_crc() {
74 return buffer_cached_crc
;
76 int buffer::get_cached_crc_adjusted() {
77 return buffer_cached_crc_adjusted
;
80 int buffer::get_missed_crc() {
81 return buffer_missed_crc
;
85 * raw_combined is always placed within a single allocation along
86 * with the data buffer. the data goes at the beginning, and
87 * raw_combined at the end.
89 class buffer::raw_combined
: public buffer::raw
{
91 raw_combined(char *dataptr
, unsigned l
, int mempool
)
92 : raw(dataptr
, l
, mempool
) {
95 static ceph::unique_leakable_ptr
<buffer::raw
>
98 int mempool
= mempool::mempool_buffer_anon
)
100 // posix_memalign() requires a multiple of sizeof(void *)
101 align
= std::max
<unsigned>(align
, sizeof(void *));
102 size_t rawlen
= round_up_to(sizeof(buffer::raw_combined
),
103 alignof(buffer::raw_combined
));
104 size_t datalen
= round_up_to(len
, alignof(buffer::raw_combined
));
107 char *ptr
= (char *) valloc(rawlen
+ datalen
);
110 int r
= ::posix_memalign((void**)(void*)&ptr
, align
, rawlen
+ datalen
);
117 // actual data first, since it has presumably larger alignment restriction
118 // then put the raw_combined at the end
119 return ceph::unique_leakable_ptr
<buffer::raw
>(
120 new (ptr
+ datalen
) raw_combined(ptr
, len
, mempool
));
123 static void operator delete(void *ptr
) {
124 raw_combined
*raw
= (raw_combined
*)ptr
;
125 aligned_free((void *)raw
->data
);
129 class buffer::raw_malloc
: public buffer::raw
{
131 MEMPOOL_CLASS_HELPERS();
133 explicit raw_malloc(unsigned l
) : raw(l
) {
135 data
= (char *)malloc(len
);
141 bdout
<< "raw_malloc " << this << " alloc " << (void *)data
<< " " << l
<< bendl
;
143 raw_malloc(unsigned l
, char *b
) : raw(b
, l
) {
144 bdout
<< "raw_malloc " << this << " alloc " << (void *)data
<< " " << l
<< bendl
;
146 ~raw_malloc() override
{
148 bdout
<< "raw_malloc " << this << " free " << (void *)data
<< " " << bendl
;
153 class buffer::raw_posix_aligned
: public buffer::raw
{
155 MEMPOOL_CLASS_HELPERS();
157 raw_posix_aligned(unsigned l
, unsigned align
) : raw(l
) {
158 // posix_memalign() requires a multiple of sizeof(void *)
159 align
= std::max
<unsigned>(align
, sizeof(void *));
161 data
= (char *) valloc(len
);
163 int r
= ::posix_memalign((void**)(void*)&data
, align
, len
);
169 bdout
<< "raw_posix_aligned " << this << " alloc " << (void *)data
170 << " l=" << l
<< ", align=" << align
<< bendl
;
172 ~raw_posix_aligned() override
{
174 bdout
<< "raw_posix_aligned " << this << " free " << (void *)data
<< bendl
;
180 class buffer::raw_hack_aligned
: public buffer::raw
{
183 raw_hack_aligned(unsigned l
, unsigned align
) : raw(l
) {
184 realdata
= new char[len
+align
-1];
185 unsigned off
= ((uintptr_t)realdata
) & (align
-1);
187 data
= realdata
+ align
- off
;
190 //cout << "hack aligned " << (unsigned)data
191 //<< " in raw " << (unsigned)realdata
192 //<< " off " << off << std::endl;
193 ceph_assert(((uintptr_t)data
& (align
-1)) == 0);
195 ~raw_hack_aligned() {
202 * primitive buffer types
204 class buffer::raw_claimed_char
: public buffer::raw
{
206 MEMPOOL_CLASS_HELPERS();
208 explicit raw_claimed_char(unsigned l
, char *b
) : raw(b
, l
) {
209 bdout
<< "raw_claimed_char " << this << " alloc " << (void *)data
210 << " " << l
<< bendl
;
212 ~raw_claimed_char() override
{
213 bdout
<< "raw_claimed_char " << this << " free " << (void *)data
218 class buffer::raw_static
: public buffer::raw
{
220 MEMPOOL_CLASS_HELPERS();
222 raw_static(const char *d
, unsigned l
) : raw((char*)d
, l
) { }
223 ~raw_static() override
{}
226 class buffer::raw_claim_buffer
: public buffer::raw
{
229 raw_claim_buffer(const char *b
, unsigned l
, deleter d
)
230 : raw((char*)b
, l
), del(std::move(d
)) { }
231 ~raw_claim_buffer() override
{}
234 ceph::unique_leakable_ptr
<buffer::raw
> buffer::copy(const char *c
, unsigned len
) {
235 auto r
= buffer::create_aligned(len
, sizeof(size_t));
236 memcpy(r
->get_data(), c
, len
);
240 ceph::unique_leakable_ptr
<buffer::raw
> buffer::create(unsigned len
) {
241 return buffer::create_aligned(len
, sizeof(size_t));
243 ceph::unique_leakable_ptr
<buffer::raw
> buffer::create(unsigned len
, char c
) {
244 auto ret
= buffer::create_aligned(len
, sizeof(size_t));
245 memset(ret
->get_data(), c
, len
);
248 ceph::unique_leakable_ptr
<buffer::raw
>
249 buffer::create_in_mempool(unsigned len
, int mempool
) {
250 return buffer::create_aligned_in_mempool(len
, sizeof(size_t), mempool
);
252 ceph::unique_leakable_ptr
<buffer::raw
>
253 buffer::claim_char(unsigned len
, char *buf
) {
254 return ceph::unique_leakable_ptr
<buffer::raw
>(
255 new raw_claimed_char(len
, buf
));
257 ceph::unique_leakable_ptr
<buffer::raw
> buffer::create_malloc(unsigned len
) {
258 return ceph::unique_leakable_ptr
<buffer::raw
>(new raw_malloc(len
));
260 ceph::unique_leakable_ptr
<buffer::raw
>
261 buffer::claim_malloc(unsigned len
, char *buf
) {
262 return ceph::unique_leakable_ptr
<buffer::raw
>(new raw_malloc(len
, buf
));
264 ceph::unique_leakable_ptr
<buffer::raw
>
265 buffer::create_static(unsigned len
, char *buf
) {
266 return ceph::unique_leakable_ptr
<buffer::raw
>(new raw_static(buf
, len
));
268 ceph::unique_leakable_ptr
<buffer::raw
>
269 buffer::claim_buffer(unsigned len
, char *buf
, deleter del
) {
270 return ceph::unique_leakable_ptr
<buffer::raw
>(
271 new raw_claim_buffer(buf
, len
, std::move(del
)));
274 ceph::unique_leakable_ptr
<buffer::raw
> buffer::create_aligned_in_mempool(
275 unsigned len
, unsigned align
, int mempool
)
277 // If alignment is a page multiple, use a separate buffer::raw to
278 // avoid fragmenting the heap.
280 // Somewhat unexpectedly, I see consistently better performance
281 // from raw_combined than from raw even when the allocation size is
282 // a page multiple (but alignment is not).
284 // I also see better performance from a separate buffer::raw once the
286 if ((align
& ~CEPH_PAGE_MASK
) == 0 ||
287 len
>= CEPH_PAGE_SIZE
* 2) {
289 return ceph::unique_leakable_ptr
<buffer::raw
>(new raw_posix_aligned(len
, align
));
291 return ceph::unique_leakable_ptr
<buffer::raw
>(new raw_hack_aligned(len
, align
));
294 return raw_combined::create(len
, align
, mempool
);
296 ceph::unique_leakable_ptr
<buffer::raw
> buffer::create_aligned(
297 unsigned len
, unsigned align
) {
298 return create_aligned_in_mempool(len
, align
,
299 mempool::mempool_buffer_anon
);
302 ceph::unique_leakable_ptr
<buffer::raw
> buffer::create_page_aligned(unsigned len
) {
303 return create_aligned(len
, CEPH_PAGE_SIZE
);
305 ceph::unique_leakable_ptr
<buffer::raw
> buffer::create_small_page_aligned(unsigned len
) {
306 if (len
< CEPH_PAGE_SIZE
) {
307 return create_aligned(len
, CEPH_BUFFER_ALLOC_UNIT
);
309 return create_aligned(len
, CEPH_PAGE_SIZE
);
313 buffer::ptr::ptr(ceph::unique_leakable_ptr
<raw
> r
)
316 _len(_raw
->get_len())
318 _raw
->nref
.store(1, std::memory_order_release
);
319 bdout
<< "ptr " << this << " get " << _raw
<< bendl
;
321 buffer::ptr::ptr(unsigned l
) : _off(0), _len(l
)
323 _raw
= buffer::create(l
).release();
324 _raw
->nref
.store(1, std::memory_order_release
);
325 bdout
<< "ptr " << this << " get " << _raw
<< bendl
;
327 buffer::ptr::ptr(const char *d
, unsigned l
) : _off(0), _len(l
) // ditto.
329 _raw
= buffer::copy(d
, l
).release();
330 _raw
->nref
.store(1, std::memory_order_release
);
331 bdout
<< "ptr " << this << " get " << _raw
<< bendl
;
333 buffer::ptr::ptr(const ptr
& p
) : _raw(p
._raw
), _off(p
._off
), _len(p
._len
)
337 bdout
<< "ptr " << this << " get " << _raw
<< bendl
;
340 buffer::ptr::ptr(ptr
&& p
) noexcept
: _raw(p
._raw
), _off(p
._off
), _len(p
._len
)
345 buffer::ptr::ptr(const ptr
& p
, unsigned o
, unsigned l
)
346 : _raw(p
._raw
), _off(p
._off
+ o
), _len(l
)
348 ceph_assert(o
+l
<= p
._len
);
351 bdout
<< "ptr " << this << " get " << _raw
<< bendl
;
353 buffer::ptr::ptr(const ptr
& p
, ceph::unique_leakable_ptr
<raw
> r
)
358 _raw
->nref
.store(1, std::memory_order_release
);
359 bdout
<< "ptr " << this << " get " << _raw
<< bendl
;
361 buffer::ptr
& buffer::ptr::operator= (const ptr
& p
)
365 bdout
<< "ptr " << this << " get " << _raw
<< bendl
;
367 buffer::raw
*raw
= p
._raw
;
378 buffer::ptr
& buffer::ptr::operator= (ptr
&& p
) noexcept
381 buffer::raw
*raw
= p
._raw
;
394 void buffer::ptr::swap(ptr
& other
) noexcept
407 void buffer::ptr::release()
409 // BE CAREFUL: this is called also for hypercombined ptr_node. After
410 // freeing underlying raw, `*this` can become inaccessible as well!
412 // cache the pointer to avoid unncecessary reloads and repeated
414 if (auto* const cached_raw
= std::exchange(_raw
, nullptr);
416 bdout
<< "ptr " << this << " release " << cached_raw
<< bendl
;
417 // optimize the common case where a particular `buffer::raw` has
418 // only a single reference. Altogether with initializing `nref` of
419 // freshly fabricated one with `1` through the std::atomic's ctor
420 // (which doesn't impose a memory barrier on the strongly-ordered
421 // x86), this allows to avoid all atomical operations in such case.
422 const bool last_one
= \
423 (1 == cached_raw
->nref
.load(std::memory_order_acquire
));
424 if (likely(last_one
) || --cached_raw
->nref
== 0) {
425 bdout
<< "deleting raw " << static_cast<void*>(cached_raw
)
426 << " len " << cached_raw
->get_len() << bendl
;
427 ANNOTATE_HAPPENS_AFTER(&cached_raw
->nref
);
428 ANNOTATE_HAPPENS_BEFORE_FORGET_ALL(&cached_raw
->nref
);
429 delete cached_raw
; // dealloc old (if any)
431 ANNOTATE_HAPPENS_BEFORE(&cached_raw
->nref
);
436 int buffer::ptr::get_mempool() const {
438 return _raw
->mempool
;
440 return mempool::mempool_buffer_anon
;
443 void buffer::ptr::reassign_to_mempool(int pool
) {
445 _raw
->reassign_to_mempool(pool
);
448 void buffer::ptr::try_assign_to_mempool(int pool
) {
450 _raw
->try_assign_to_mempool(pool
);
454 const char *buffer::ptr::c_str() const {
456 return _raw
->get_data() + _off
;
458 char *buffer::ptr::c_str() {
460 return _raw
->get_data() + _off
;
462 const char *buffer::ptr::end_c_str() const {
464 return _raw
->get_data() + _off
+ _len
;
466 char *buffer::ptr::end_c_str() {
468 return _raw
->get_data() + _off
+ _len
;
471 unsigned buffer::ptr::unused_tail_length() const
473 return _raw
? _raw
->get_len() - (_off
+ _len
) : 0;
475 const char& buffer::ptr::operator[](unsigned n
) const
478 ceph_assert(n
< _len
);
479 return _raw
->get_data()[_off
+ n
];
481 char& buffer::ptr::operator[](unsigned n
)
484 ceph_assert(n
< _len
);
485 return _raw
->get_data()[_off
+ n
];
488 const char *buffer::ptr::raw_c_str() const { ceph_assert(_raw
); return _raw
->get_data(); }
489 unsigned buffer::ptr::raw_length() const { ceph_assert(_raw
); return _raw
->get_len(); }
490 int buffer::ptr::raw_nref() const { ceph_assert(_raw
); return _raw
->nref
; }
492 void buffer::ptr::copy_out(unsigned o
, unsigned l
, char *dest
) const {
495 throw end_of_buffer();
496 char* src
= _raw
->get_data() + _off
+ o
;
497 maybe_inline_memcpy(dest
, src
, l
, 8);
500 unsigned buffer::ptr::wasted() const
502 return _raw
->get_len() - _len
;
505 int buffer::ptr::cmp(const ptr
& o
) const
507 int l
= _len
< o
._len
? _len
: o
._len
;
509 int r
= memcmp(c_str(), o
.c_str(), l
);
520 bool buffer::ptr::is_zero() const
522 return mem_is_zero(c_str(), _len
);
525 unsigned buffer::ptr::append(char c
)
528 ceph_assert(1 <= unused_tail_length());
529 char* ptr
= _raw
->get_data() + _off
+ _len
;
535 unsigned buffer::ptr::append(const char *p
, unsigned l
)
538 ceph_assert(l
<= unused_tail_length());
539 char* c
= _raw
->get_data() + _off
+ _len
;
540 maybe_inline_memcpy(c
, p
, l
, 32);
545 unsigned buffer::ptr::append_zeros(unsigned l
)
548 ceph_assert(l
<= unused_tail_length());
549 char* c
= _raw
->get_data() + _off
+ _len
;
550 // FIPS zeroization audit 20191115: this memset is not security related.
556 void buffer::ptr::copy_in(unsigned o
, unsigned l
, const char *src
, bool crc_reset
)
559 ceph_assert(o
<= _len
);
560 ceph_assert(o
+l
<= _len
);
561 char* dest
= _raw
->get_data() + _off
+ o
;
563 _raw
->invalidate_crc();
564 maybe_inline_memcpy(dest
, src
, l
, 64);
567 void buffer::ptr::zero(bool crc_reset
)
570 _raw
->invalidate_crc();
571 // FIPS zeroization audit 20191115: this memset is not security related.
572 memset(c_str(), 0, _len
);
575 void buffer::ptr::zero(unsigned o
, unsigned l
, bool crc_reset
)
577 ceph_assert(o
+l
<= _len
);
579 _raw
->invalidate_crc();
580 // FIPS zeroization audit 20191115: this memset is not security related.
581 memset(c_str()+o
, 0, l
);
585 buffer::ptr::iterator_impl
<B
>& buffer::ptr::iterator_impl
<B
>::operator +=(size_t len
) {
588 throw end_of_buffer();
592 template buffer::ptr::iterator_impl
<false>&
593 buffer::ptr::iterator_impl
<false>::operator +=(size_t len
);
594 template buffer::ptr::iterator_impl
<true>&
595 buffer::ptr::iterator_impl
<true>::operator +=(size_t len
);
597 // -- buffer::list::iterator --
599 buffer::list::iterator operator=(const buffer::list::iterator& other)
601 if (this != &other) {
611 template<bool is_const
>
612 buffer::list::iterator_impl
<is_const
>::iterator_impl(bl_t
*l
, unsigned o
)
613 : bl(l
), ls(&bl
->_buffers
), p(ls
->begin()), off(0), p_off(0)
618 template<bool is_const
>
619 buffer::list::iterator_impl
<is_const
>::iterator_impl(const buffer::list::iterator
& i
)
620 : iterator_impl
<is_const
>(i
.bl
, i
.off
, i
.p
, i
.p_off
) {}
622 template<bool is_const
>
623 auto buffer::list::iterator_impl
<is_const
>::operator +=(unsigned o
)
626 //cout << this << " advance " << o << " from " << off
627 // << " (p_off " << p_off << " in " << p->length() << ")"
631 while (p
!= ls
->end()) {
632 if (p_off
>= p
->length()) {
634 p_off
-= p
->length();
637 // somewhere in this buffer!
641 if (p
== ls
->end() && p_off
) {
642 throw end_of_buffer();
648 template<bool is_const
>
649 void buffer::list::iterator_impl
<is_const
>::seek(unsigned o
)
656 template<bool is_const
>
657 char buffer::list::iterator_impl
<is_const
>::operator*() const
660 throw end_of_buffer();
664 template<bool is_const
>
665 buffer::list::iterator_impl
<is_const
>&
666 buffer::list::iterator_impl
<is_const
>::operator++()
669 throw end_of_buffer();
674 template<bool is_const
>
675 buffer::ptr
buffer::list::iterator_impl
<is_const
>::get_current_ptr() const
678 throw end_of_buffer();
679 return ptr(*p
, p_off
, p
->length() - p_off
);
682 template<bool is_const
>
683 bool buffer::list::iterator_impl
<is_const
>::is_pointing_same_raw(
684 const ptr
& other
) const
687 throw end_of_buffer();
688 return p
->_raw
== other
._raw
;
692 // note that these all _append_ to dest!
693 template<bool is_const
>
694 void buffer::list::iterator_impl
<is_const
>::copy(unsigned len
, char *dest
)
696 if (p
== ls
->end()) seek(off
);
699 throw end_of_buffer();
701 unsigned howmuch
= p
->length() - p_off
;
702 if (len
< howmuch
) howmuch
= len
;
703 p
->copy_out(p_off
, howmuch
, dest
);
711 template<bool is_const
>
712 void buffer::list::iterator_impl
<is_const
>::copy(unsigned len
, ptr
&dest
)
714 copy_deep(len
, dest
);
717 template<bool is_const
>
718 void buffer::list::iterator_impl
<is_const
>::copy_deep(unsigned len
, ptr
&dest
)
724 throw end_of_buffer();
726 copy(len
, dest
.c_str());
728 template<bool is_const
>
729 void buffer::list::iterator_impl
<is_const
>::copy_shallow(unsigned len
,
736 throw end_of_buffer();
737 unsigned howmuch
= p
->length() - p_off
;
740 copy(len
, dest
.c_str());
742 dest
= ptr(*p
, p_off
, len
);
747 template<bool is_const
>
748 void buffer::list::iterator_impl
<is_const
>::copy(unsigned len
, list
&dest
)
754 throw end_of_buffer();
756 unsigned howmuch
= p
->length() - p_off
;
759 dest
.append(*p
, p_off
, howmuch
);
766 template<bool is_const
>
767 void buffer::list::iterator_impl
<is_const
>::copy(unsigned len
, std::string
&dest
)
773 throw end_of_buffer();
775 unsigned howmuch
= p
->length() - p_off
;
776 const char *c_str
= p
->c_str();
779 dest
.append(c_str
+ p_off
, howmuch
);
786 template<bool is_const
>
787 void buffer::list::iterator_impl
<is_const
>::copy_all(list
&dest
)
795 unsigned howmuch
= p
->length() - p_off
;
796 const char *c_str
= p
->c_str();
797 dest
.append(c_str
+ p_off
, howmuch
);
803 template<bool is_const
>
804 size_t buffer::list::iterator_impl
<is_const
>::get_ptr_and_advance(
805 size_t want
, const char **data
)
807 if (p
== ls
->end()) {
809 if (p
== ls
->end()) {
813 *data
= p
->c_str() + p_off
;
814 size_t l
= std::min
<size_t>(p
->length() - p_off
, want
);
816 if (p_off
== p
->length()) {
824 template<bool is_const
>
825 uint32_t buffer::list::iterator_impl
<is_const
>::crc32c(
826 size_t length
, uint32_t crc
)
828 length
= std::min
<size_t>(length
, get_remaining());
831 size_t l
= get_ptr_and_advance(length
, &p
);
832 crc
= ceph_crc32c(crc
, (unsigned char*)p
, l
);
838 // explicitly instantiate only the iterator types we need, so we can hide the
839 // details in this compilation unit without introducing unnecessary link time
841 template class buffer::list::iterator_impl
<true>;
842 template class buffer::list::iterator_impl
<false>;
844 buffer::list::iterator::iterator(bl_t
*l
, unsigned o
)
845 : iterator_impl(l
, o
)
848 buffer::list::iterator::iterator(bl_t
*l
, unsigned o
, list_iter_t ip
, unsigned po
)
849 : iterator_impl(l
, o
, ip
, po
)
853 void buffer::list::iterator::copy_in(unsigned len
, const char *src
, bool crc_reset
)
860 throw end_of_buffer();
862 unsigned howmuch
= p
->length() - p_off
;
865 p
->copy_in(p_off
, howmuch
, src
, crc_reset
);
873 void buffer::list::iterator::copy_in(unsigned len
, const list
& otherl
)
878 for (const auto& node
: otherl
._buffers
) {
879 unsigned l
= node
.length();
882 copy_in(l
, node
.c_str());
889 // -- buffer::list --
891 void buffer::list::swap(list
& other
) noexcept
893 std::swap(_len
, other
._len
);
894 std::swap(_num
, other
._num
);
895 std::swap(_carriage
, other
._carriage
);
896 _buffers
.swap(other
._buffers
);
899 bool buffer::list::contents_equal(const ceph::buffer::list
& other
) const
901 if (length() != other
.length())
904 // buffer-wise comparison
906 auto a
= std::cbegin(_buffers
);
907 auto b
= std::cbegin(other
._buffers
);
908 unsigned aoff
= 0, boff
= 0;
909 while (a
!= std::cend(_buffers
)) {
910 unsigned len
= a
->length() - aoff
;
911 if (len
> b
->length() - boff
)
912 len
= b
->length() - boff
;
913 if (memcmp(a
->c_str() + aoff
, b
->c_str() + boff
, len
) != 0)
916 if (aoff
== a
->length()) {
921 if (boff
== b
->length()) {
929 // byte-wise comparison
931 bufferlist::const_iterator me
= begin();
932 bufferlist::const_iterator him
= other
.begin();
943 bool buffer::list::contents_equal(const void* const other
,
946 if (this->length() != length
) {
950 const auto* other_buf
= reinterpret_cast<const char*>(other
);
951 for (const auto& bp
: buffers()) {
952 assert(bp
.length() <= length
);
953 if (std::memcmp(bp
.c_str(), other_buf
, bp
.length()) != 0) {
956 length
-= bp
.length();
957 other_buf
+= bp
.length();
964 bool buffer::list::is_provided_buffer(const char* const dst
) const
966 if (_buffers
.empty()) {
969 return (is_contiguous() && (_buffers
.front().c_str() == dst
));
972 bool buffer::list::is_aligned(const unsigned align
) const
974 for (const auto& node
: _buffers
) {
975 if (!node
.is_aligned(align
)) {
982 bool buffer::list::is_n_align_sized(const unsigned align
) const
984 for (const auto& node
: _buffers
) {
985 if (!node
.is_n_align_sized(align
)) {
992 bool buffer::list::is_aligned_size_and_memory(
993 const unsigned align_size
,
994 const unsigned align_memory
) const
996 for (const auto& node
: _buffers
) {
997 if (!node
.is_aligned(align_memory
) || !node
.is_n_align_sized(align_size
)) {
1004 bool buffer::list::is_zero() const {
1005 for (const auto& node
: _buffers
) {
1006 if (!node
.is_zero()) {
1013 void buffer::list::zero()
1015 for (auto& node
: _buffers
) {
1020 void buffer::list::zero(const unsigned o
, const unsigned l
)
1022 ceph_assert(o
+l
<= _len
);
1024 for (auto& node
: _buffers
) {
1025 if (p
+ node
.length() > o
) {
1026 if (p
>= o
&& p
+node
.length() <= o
+l
) {
1027 // 'o'------------- l -----------|
1028 // 'p'-- node.length() --|
1030 } else if (p
>= o
) {
1031 // 'o'------------- l -----------|
1032 // 'p'------- node.length() -------|
1033 node
.zero(0, o
+l
-p
);
1034 } else if (p
+ node
.length() <= o
+l
) {
1035 // 'o'------------- l -----------|
1036 // 'p'------- node.length() -------|
1037 node
.zero(o
-p
, node
.length()-(o
-p
));
1039 // 'o'----------- l -----------|
1040 // 'p'---------- node.length() ----------|
1051 bool buffer::list::is_contiguous() const
1056 bool buffer::list::is_n_page_sized() const
1058 return is_n_align_sized(CEPH_PAGE_SIZE
);
1061 bool buffer::list::is_page_aligned() const
1063 return is_aligned(CEPH_PAGE_SIZE
);
1066 int buffer::list::get_mempool() const
1068 if (_buffers
.empty()) {
1069 return mempool::mempool_buffer_anon
;
1071 return _buffers
.back().get_mempool();
1074 void buffer::list::reassign_to_mempool(int pool
)
1076 for (auto& p
: _buffers
) {
1077 p
._raw
->reassign_to_mempool(pool
);
1081 void buffer::list::try_assign_to_mempool(int pool
)
1083 for (auto& p
: _buffers
) {
1084 p
._raw
->try_assign_to_mempool(pool
);
1088 uint64_t buffer::list::get_wasted_space() const
1091 return _buffers
.back().wasted();
1093 std::vector
<const raw
*> raw_vec
;
1094 raw_vec
.reserve(_num
);
1095 for (const auto& p
: _buffers
)
1096 raw_vec
.push_back(p
._raw
);
1097 std::sort(raw_vec
.begin(), raw_vec
.end());
1100 const raw
*last
= nullptr;
1101 for (const auto r
: raw_vec
) {
1105 total
+= r
->get_len();
1107 // If multiple buffers are sharing the same raw buffer and they overlap
1108 // with each other, the wasted space will be underestimated.
1109 if (total
<= length())
1111 return total
- length();
1114 void buffer::list::rebuild()
1117 _carriage
= &always_empty_bptr
;
1118 _buffers
.clear_and_dispose();
1122 if ((_len
& ~CEPH_PAGE_MASK
) == 0)
1123 rebuild(ptr_node::create(buffer::create_page_aligned(_len
)));
1125 rebuild(ptr_node::create(buffer::create(_len
)));
1128 void buffer::list::rebuild(
1129 std::unique_ptr
<buffer::ptr_node
, buffer::ptr_node::disposer
> nb
)
1132 int mempool
= _buffers
.front().get_mempool();
1133 nb
->reassign_to_mempool(mempool
);
1134 for (auto& node
: _buffers
) {
1135 nb
->copy_in(pos
, node
.length(), node
.c_str(), false);
1136 pos
+= node
.length();
1138 _buffers
.clear_and_dispose();
1139 if (likely(nb
->length())) {
1140 _carriage
= nb
.get();
1141 _buffers
.push_back(*nb
.release());
1144 _carriage
= &always_empty_bptr
;
1150 bool buffer::list::rebuild_aligned(unsigned align
)
1152 return rebuild_aligned_size_and_memory(align
, align
);
1155 bool buffer::list::rebuild_aligned_size_and_memory(unsigned align_size
,
1156 unsigned align_memory
,
1157 unsigned max_buffers
)
1159 bool had_to_rebuild
= false;
1161 if (max_buffers
&& _num
> max_buffers
&& _len
> (max_buffers
* align_size
)) {
1162 align_size
= round_up_to(round_up_to(_len
, max_buffers
) / max_buffers
, align_size
);
1164 auto p
= std::begin(_buffers
);
1165 auto p_prev
= _buffers
.before_begin();
1166 while (p
!= std::end(_buffers
)) {
1167 // keep anything that's already align and sized aligned
1168 if (p
->is_aligned(align_memory
) && p
->is_n_align_sized(align_size
)) {
1169 /*cout << " segment " << (void*)p->c_str()
1170 << " offset " << ((unsigned long)p->c_str() & (align - 1))
1171 << " length " << p->length()
1172 << " " << (p->length() & (align - 1)) << " ok" << std::endl;
1178 // consolidate unaligned items, until we get something that is sized+aligned
1180 unsigned offset
= 0;
1182 /*cout << " segment " << (void*)p->c_str()
1183 << " offset " << ((unsigned long)p->c_str() & (align - 1))
1184 << " length " << p->length() << " " << (p->length() & (align - 1))
1185 << " overall offset " << offset << " " << (offset & (align - 1))
1186 << " not ok" << std::endl;
1188 offset
+= p
->length();
1189 // no need to reallocate, relinking is enough thankfully to bi::list.
1190 auto p_after
= _buffers
.erase_after(p_prev
);
1192 unaligned
._buffers
.push_back(*p
);
1193 unaligned
._len
+= p
->length();
1194 unaligned
._num
+= 1;
1196 } while (p
!= std::end(_buffers
) &&
1197 (!p
->is_aligned(align_memory
) ||
1198 !p
->is_n_align_sized(align_size
) ||
1199 (offset
% align_size
)));
1200 if (!(unaligned
.is_contiguous() && unaligned
._buffers
.front().is_aligned(align_memory
))) {
1203 buffer::create_aligned(unaligned
._len
, align_memory
)));
1204 had_to_rebuild
= true;
1206 if (unaligned
.get_num_buffers()) {
1207 _buffers
.insert_after(p_prev
, *ptr_node::create(unaligned
._buffers
.front()).release());
1210 // a bufferlist containing only 0-length bptrs is rebuilt as empty
1214 return had_to_rebuild
;
1217 bool buffer::list::rebuild_page_aligned()
1219 return rebuild_aligned(CEPH_PAGE_SIZE
);
1222 void buffer::list::reserve(size_t prealloc
)
1224 if (get_append_buffer_unused_tail_length() < prealloc
) {
1225 auto ptr
= ptr_node::create(buffer::create_small_page_aligned(prealloc
));
1226 ptr
->set_length(0); // unused, so far.
1227 _carriage
= ptr
.get();
1228 _buffers
.push_back(*ptr
.release());
1233 void buffer::list::claim_append(list
& bl
)
1236 assert(_len
+ bl
._len
>= _len
);
1237 // steal the other guy's buffers
1240 _buffers
.splice_back(bl
._buffers
);
1244 void buffer::list::append(char c
)
1246 // put what we can into the existing append_buffer.
1247 unsigned gap
= get_append_buffer_unused_tail_length();
1249 // make a new buffer!
1250 auto buf
= ptr_node::create(
1251 raw_combined::create(CEPH_BUFFER_APPEND_SIZE
, 0, get_mempool()));
1252 buf
->set_length(0); // unused, so far.
1253 _carriage
= buf
.get();
1254 _buffers
.push_back(*buf
.release());
1256 } else if (unlikely(_carriage
!= &_buffers
.back())) {
1257 auto bptr
= ptr_node::create(*_carriage
, _carriage
->length(), 0);
1258 _carriage
= bptr
.get();
1259 _buffers
.push_back(*bptr
.release());
1262 _carriage
->append(c
);
1266 buffer::ptr_node
buffer::list::always_empty_bptr
;
1268 buffer::ptr_node
& buffer::list::refill_append_space(const unsigned len
)
1270 // make a new buffer. fill out a complete page, factoring in the
1271 // raw_combined overhead.
1272 size_t need
= round_up_to(len
, sizeof(size_t)) + sizeof(raw_combined
);
1273 size_t alen
= round_up_to(need
, CEPH_BUFFER_ALLOC_UNIT
);
1274 if (_carriage
== &_buffers
.back()) {
1275 size_t nlen
= round_up_to(_carriage
->raw_length(), CEPH_BUFFER_ALLOC_UNIT
) * 2;
1276 nlen
= std::min(nlen
, CEPH_BUFFER_ALLOC_UNIT_MAX
);
1277 alen
= std::max(alen
, nlen
);
1279 alen
-= sizeof(raw_combined
);
1282 ptr_node::create(raw_combined::create(alen
, 0, get_mempool()));
1283 new_back
->set_length(0); // unused, so far.
1284 _carriage
= new_back
.get();
1285 _buffers
.push_back(*new_back
.release());
1287 return _buffers
.back();
1290 void buffer::list::append(const char *data
, unsigned len
)
1294 const unsigned free_in_last
= get_append_buffer_unused_tail_length();
1295 const unsigned first_round
= std::min(len
, free_in_last
);
1297 // _buffers and carriage can desynchronize when 1) a new ptr
1298 // we don't own has been added into the _buffers 2) _buffers
1299 // has been emptied as as a result of std::move or stolen by
1301 if (unlikely(_carriage
!= &_buffers
.back())) {
1302 auto bptr
= ptr_node::create(*_carriage
, _carriage
->length(), 0);
1303 _carriage
= bptr
.get();
1304 _buffers
.push_back(*bptr
.release());
1307 _carriage
->append(data
, first_round
);
1310 const unsigned second_round
= len
- first_round
;
1312 auto& new_back
= refill_append_space(second_round
);
1313 new_back
.append(data
+ first_round
, second_round
);
1317 buffer::list::reserve_t
buffer::list::obtain_contiguous_space(
1320 // note: if len < the normal append_buffer size it *might*
1321 // be better to allocate a normal-sized append_buffer and
1322 // use part of it. however, that optimizes for the case of
1323 // old-style types including new-style types. and in most
1324 // such cases, this won't be the very first thing encoded to
1325 // the list, so append_buffer will already be allocated.
1326 // OTOH if everything is new-style, we *should* allocate
1327 // only what we need and conserve memory.
1328 if (unlikely(get_append_buffer_unused_tail_length() < len
)) {
1330 buffer::ptr_node::create(buffer::create(len
)).release();
1331 new_back
->set_length(0); // unused, so far.
1332 _buffers
.push_back(*new_back
);
1334 _carriage
= new_back
;
1335 return { new_back
->c_str(), &new_back
->_len
, &_len
};
1337 ceph_assert(!_buffers
.empty());
1338 if (unlikely(_carriage
!= &_buffers
.back())) {
1339 auto bptr
= ptr_node::create(*_carriage
, _carriage
->length(), 0);
1340 _carriage
= bptr
.get();
1341 _buffers
.push_back(*bptr
.release());
1344 return { _carriage
->end_c_str(), &_carriage
->_len
, &_len
};
1348 void buffer::list::append(const ptr
& bp
)
1353 void buffer::list::append(ptr
&& bp
)
1355 push_back(std::move(bp
));
1358 void buffer::list::append(const ptr
& bp
, unsigned off
, unsigned len
)
1360 ceph_assert(len
+off
<= bp
.length());
1361 if (!_buffers
.empty()) {
1362 ptr
&l
= _buffers
.back();
1363 if (l
._raw
== bp
._raw
&& l
.end() == bp
.start() + off
) {
1364 // yay contiguous with tail bp!
1365 l
.set_length(l
.length()+len
);
1370 // add new item to list
1371 _buffers
.push_back(*ptr_node::create(bp
, off
, len
).release());
1376 void buffer::list::append(const list
& bl
)
1380 for (const auto& node
: bl
._buffers
) {
1381 _buffers
.push_back(*ptr_node::create(node
).release());
1385 void buffer::list::append(std::istream
& in
)
1390 append(s
.c_str(), s
.length());
1396 buffer::list::contiguous_filler
buffer::list::append_hole(const unsigned len
)
1400 if (unlikely(get_append_buffer_unused_tail_length() < len
)) {
1401 // make a new append_buffer. fill out a complete page, factoring in
1402 // the raw_combined overhead.
1403 auto& new_back
= refill_append_space(len
);
1404 new_back
.set_length(len
);
1405 return { new_back
.c_str() };
1406 } else if (unlikely(_carriage
!= &_buffers
.back())) {
1407 auto bptr
= ptr_node::create(*_carriage
, _carriage
->length(), 0);
1408 _carriage
= bptr
.get();
1409 _buffers
.push_back(*bptr
.release());
1412 _carriage
->set_length(_carriage
->length() + len
);
1413 return { _carriage
->end_c_str() - len
};
1416 void buffer::list::prepend_zero(unsigned len
)
1418 auto bp
= ptr_node::create(len
);
1422 _buffers
.push_front(*bp
.release());
1425 void buffer::list::append_zero(unsigned len
)
1429 const unsigned free_in_last
= get_append_buffer_unused_tail_length();
1430 const unsigned first_round
= std::min(len
, free_in_last
);
1432 if (unlikely(_carriage
!= &_buffers
.back())) {
1433 auto bptr
= ptr_node::create(*_carriage
, _carriage
->length(), 0);
1434 _carriage
= bptr
.get();
1435 _buffers
.push_back(*bptr
.release());
1438 _carriage
->append_zeros(first_round
);
1441 const unsigned second_round
= len
- first_round
;
1443 auto& new_back
= refill_append_space(second_round
);
1444 new_back
.set_length(second_round
);
1445 new_back
.zero(false);
1453 const char& buffer::list::operator[](unsigned n
) const
1456 throw end_of_buffer();
1458 for (const auto& node
: _buffers
) {
1459 if (n
>= node
.length()) {
1469 * return a contiguous ptr to whole bufferlist contents.
1471 char *buffer::list::c_str()
1473 if (const auto len
= length(); len
== 0) {
1474 return nullptr; // no non-empty buffers
1475 } else if (len
!= _buffers
.front().length()) {
1478 // there are two *main* scenarios that hit this branch:
1479 // 1. bufferlist with single, non-empty buffer;
1480 // 2. bufferlist with single, non-empty buffer followed by
1481 // empty buffer. splice() tries to not waste our appendable
1482 // space; to carry it an empty bptr is added at the end.
1483 // we account for these and don't rebuild unnecessarily
1485 return _buffers
.front().c_str();
1488 string
buffer::list::to_str() const {
1490 s
.reserve(length());
1491 for (const auto& node
: _buffers
) {
1492 if (node
.length()) {
1493 s
.append(node
.c_str(), node
.length());
1499 void buffer::list::substr_of(const list
& other
, unsigned off
, unsigned len
)
1501 if (off
+ len
> other
.length())
1502 throw end_of_buffer();
1507 auto curbuf
= std::cbegin(other
._buffers
);
1508 while (off
> 0 && off
>= curbuf
->length()) {
1510 //cout << "skipping over " << *curbuf << std::endl;
1511 off
-= (*curbuf
).length();
1514 ceph_assert(len
== 0 || curbuf
!= std::cend(other
._buffers
));
1518 if (off
+ len
< curbuf
->length()) {
1519 //cout << "copying partial of " << *curbuf << std::endl;
1520 _buffers
.push_back(*ptr_node::create(*curbuf
, off
, len
).release());
1527 //cout << "copying end (all?) of " << *curbuf << std::endl;
1528 unsigned howmuch
= curbuf
->length() - off
;
1529 _buffers
.push_back(*ptr_node::create(*curbuf
, off
, howmuch
).release());
1539 void buffer::list::splice(unsigned off
, unsigned len
, list
*claim_by
/*, bufferlist& replace_with */)
1544 if (off
>= length())
1545 throw end_of_buffer();
1547 ceph_assert(len
> 0);
1548 //cout << "splice off " << off << " len " << len << " ... mylen = " << length() << std::endl;
1551 auto curbuf
= std::begin(_buffers
);
1552 auto curbuf_prev
= _buffers
.before_begin();
1554 ceph_assert(curbuf
!= std::end(_buffers
));
1555 if (off
>= (*curbuf
).length()) {
1557 //cout << "off = " << off << " skipping over " << *curbuf << std::endl;
1558 off
-= (*curbuf
).length();
1559 curbuf_prev
= curbuf
++;
1561 // somewhere in this buffer!
1562 //cout << "off = " << off << " somewhere in " << *curbuf << std::endl;
1568 // add a reference to the front bit, insert it before curbuf (which
1570 //cout << "keeping front " << off << " of " << *curbuf << std::endl;
1571 _buffers
.insert_after(curbuf_prev
,
1572 *ptr_node::create(*curbuf
, 0, off
).release());
1579 // partial or the last (appendable) one?
1580 if (const auto to_drop
= off
+ len
; to_drop
< curbuf
->length()) {
1581 //cout << "keeping end of " << *curbuf << ", losing first " << off+len << std::endl;
1583 claim_by
->append(*curbuf
, off
, len
);
1584 curbuf
->set_offset(to_drop
+ curbuf
->offset()); // ignore beginning big
1585 curbuf
->set_length(curbuf
->length() - to_drop
);
1587 //cout << " now " << *curbuf << std::endl;
1591 // hose though the end
1592 unsigned howmuch
= curbuf
->length() - off
;
1593 //cout << "discarding " << howmuch << " of " << *curbuf << std::endl;
1595 claim_by
->append(*curbuf
, off
, howmuch
);
1596 _len
-= curbuf
->length();
1597 if (curbuf
== _carriage
) {
1598 // no need to reallocate, shrinking and relinking is enough.
1599 curbuf
= _buffers
.erase_after(curbuf_prev
);
1600 _carriage
->set_offset(_carriage
->offset() + _carriage
->length());
1601 _carriage
->set_length(0);
1602 _buffers
.push_back(*_carriage
);
1604 curbuf
= _buffers
.erase_after_and_dispose(curbuf_prev
);
1611 // splice in *replace (implement me later?)
1614 void buffer::list::write(int off
, int len
, std::ostream
& out
) const
1617 s
.substr_of(*this, off
, len
);
1618 for (const auto& node
: s
._buffers
) {
1619 if (node
.length()) {
1620 out
.write(node
.c_str(), node
.length());
1625 void buffer::list::encode_base64(buffer::list
& o
)
1627 bufferptr
bp(length() * 4 / 3 + 3);
1628 int l
= ceph_armor(bp
.c_str(), bp
.c_str() + bp
.length(), c_str(), c_str() + length());
1630 o
.push_back(std::move(bp
));
1633 void buffer::list::decode_base64(buffer::list
& e
)
1635 bufferptr
bp(4 + ((e
.length() * 3) / 4));
1636 int l
= ceph_unarmor(bp
.c_str(), bp
.c_str() + bp
.length(), e
.c_str(), e
.c_str() + e
.length());
1638 std::ostringstream oss
;
1639 oss
<< "decode_base64: decoding failed:\n";
1641 throw buffer::malformed_input(oss
.str().c_str());
1643 ceph_assert(l
<= (int)bp
.length());
1645 push_back(std::move(bp
));
1648 ssize_t
buffer::list::pread_file(const char *fn
, uint64_t off
, uint64_t len
, std::string
*error
)
1650 int fd
= TEMP_FAILURE_RETRY(::open(fn
, O_RDONLY
|O_CLOEXEC
|O_BINARY
));
1653 std::ostringstream oss
;
1654 oss
<< "can't open " << fn
<< ": " << cpp_strerror(err
);
1660 // FIPS zeroization audit 20191115: this memset is not security related.
1661 memset(&st
, 0, sizeof(st
));
1662 if (::fstat(fd
, &st
) < 0) {
1664 std::ostringstream oss
;
1665 oss
<< "bufferlist::read_file(" << fn
<< "): stat error: "
1666 << cpp_strerror(err
);
1668 VOID_TEMP_FAILURE_RETRY(::close(fd
));
1672 if (off
> (uint64_t)st
.st_size
) {
1673 std::ostringstream oss
;
1674 oss
<< "bufferlist::read_file(" << fn
<< "): read error: size < offset";
1676 VOID_TEMP_FAILURE_RETRY(::close(fd
));
1680 if (len
> st
.st_size
- off
) {
1681 len
= st
.st_size
- off
;
1683 ssize_t ret
= lseek64(fd
, off
, SEEK_SET
);
1684 if (ret
!= (ssize_t
)off
) {
1688 ret
= read_fd(fd
, len
);
1690 std::ostringstream oss
;
1691 oss
<< "bufferlist::read_file(" << fn
<< "): read error:"
1692 << cpp_strerror(ret
);
1694 VOID_TEMP_FAILURE_RETRY(::close(fd
));
1696 } else if (ret
!= (ssize_t
)len
) {
1698 // Perhaps the file changed between stat() and read()?
1699 std::ostringstream oss
;
1700 oss
<< "bufferlist::read_file(" << fn
<< "): warning: got premature EOF.";
1702 // not actually an error, but weird
1704 VOID_TEMP_FAILURE_RETRY(::close(fd
));
1708 int buffer::list::read_file(const char *fn
, std::string
*error
)
1710 int fd
= TEMP_FAILURE_RETRY(::open(fn
, O_RDONLY
|O_CLOEXEC
|O_BINARY
));
1713 std::ostringstream oss
;
1714 oss
<< "can't open " << fn
<< ": " << cpp_strerror(err
);
1720 // FIPS zeroization audit 20191115: this memset is not security related.
1721 memset(&st
, 0, sizeof(st
));
1722 if (::fstat(fd
, &st
) < 0) {
1724 std::ostringstream oss
;
1725 oss
<< "bufferlist::read_file(" << fn
<< "): stat error: "
1726 << cpp_strerror(err
);
1728 VOID_TEMP_FAILURE_RETRY(::close(fd
));
1732 ssize_t ret
= read_fd(fd
, st
.st_size
);
1734 std::ostringstream oss
;
1735 oss
<< "bufferlist::read_file(" << fn
<< "): read error:"
1736 << cpp_strerror(ret
);
1738 VOID_TEMP_FAILURE_RETRY(::close(fd
));
1741 else if (ret
!= st
.st_size
) {
1743 // Perhaps the file changed between stat() and read()?
1744 std::ostringstream oss
;
1745 oss
<< "bufferlist::read_file(" << fn
<< "): warning: got premature EOF.";
1747 // not actually an error, but weird
1749 VOID_TEMP_FAILURE_RETRY(::close(fd
));
1753 ssize_t
buffer::list::read_fd(int fd
, size_t len
)
1755 auto bp
= ptr_node::create(buffer::create(len
));
1756 ssize_t ret
= safe_read(fd
, (void*)bp
->c_str(), len
);
1758 bp
->set_length(ret
);
1759 push_back(std::move(bp
));
1764 ssize_t
buffer::list::recv_fd(int fd
, size_t len
)
1766 auto bp
= ptr_node::create(buffer::create(len
));
1767 ssize_t ret
= safe_recv(fd
, (void*)bp
->c_str(), len
);
1769 bp
->set_length(ret
);
1770 push_back(std::move(bp
));
1775 int buffer::list::write_file(const char *fn
, int mode
)
1777 int fd
= TEMP_FAILURE_RETRY(::open(fn
, O_WRONLY
|O_CREAT
|O_TRUNC
|O_CLOEXEC
|O_BINARY
, mode
));
1780 cerr
<< "bufferlist::write_file(" << fn
<< "): failed to open file: "
1781 << cpp_strerror(err
) << std::endl
;
1784 int ret
= write_fd(fd
);
1786 cerr
<< "bufferlist::write_fd(" << fn
<< "): write_fd error: "
1787 << cpp_strerror(ret
) << std::endl
;
1788 VOID_TEMP_FAILURE_RETRY(::close(fd
));
1791 if (TEMP_FAILURE_RETRY(::close(fd
))) {
1793 cerr
<< "bufferlist::write_file(" << fn
<< "): close error: "
1794 << cpp_strerror(err
) << std::endl
;
1800 static int do_writev(int fd
, struct iovec
*vec
, uint64_t offset
, unsigned veclen
, unsigned bytes
)
1805 r
= ::pwritev(fd
, vec
, veclen
, offset
);
1807 r
= ::lseek64(fd
, offset
, SEEK_SET
);
1811 r
= ::writev(fd
, vec
, veclen
);
1821 if (bytes
== 0) break;
1824 if (vec
[0].iov_len
<= (size_t)r
) {
1825 // drain this whole item
1826 r
-= vec
[0].iov_len
;
1830 vec
[0].iov_base
= (char *)vec
[0].iov_base
+ r
;
1831 vec
[0].iov_len
-= r
;
1840 int buffer::list::write_fd(int fd
) const
1847 auto p
= std::cbegin(_buffers
);
1848 while (p
!= std::cend(_buffers
)) {
1849 if (p
->length() > 0) {
1850 iov
[iovlen
].iov_base
= (void *)p
->c_str();
1851 iov
[iovlen
].iov_len
= p
->length();
1852 bytes
+= p
->length();
1857 if (iovlen
== IOV_MAX
||
1858 p
== _buffers
.end()) {
1863 wrote
= ::writev(fd
, start
, num
);
1870 if (wrote
< bytes
) {
1871 // partial write, recover!
1872 while ((size_t)wrote
>= start
[0].iov_len
) {
1873 wrote
-= start
[0].iov_len
;
1874 bytes
-= start
[0].iov_len
;
1879 start
[0].iov_len
-= wrote
;
1880 start
[0].iov_base
= (char *)start
[0].iov_base
+ wrote
;
1892 int buffer::list::send_fd(int fd
) const {
1893 return buffer::list::write_fd(fd
);
1896 int buffer::list::write_fd(int fd
, uint64_t offset
) const
1900 auto p
= std::cbegin(_buffers
);
1901 uint64_t left_pbrs
= get_num_buffers();
1904 unsigned iovlen
= 0;
1905 uint64_t size
= std::min
<uint64_t>(left_pbrs
, IOV_MAX
);
1908 iov
[iovlen
].iov_base
= (void *)p
->c_str();
1909 iov
[iovlen
].iov_len
= p
->length();
1911 bytes
+= p
->length();
1916 int r
= do_writev(fd
, iov
, offset
, iovlen
, bytes
);
1924 int buffer::list::write_fd(int fd
) const
1926 // There's no writev on Windows. WriteFileGather may be an option,
1927 // but it has strict requirements in terms of buffer size and alignment.
1928 auto p
= std::cbegin(_buffers
);
1929 uint64_t left_pbrs
= get_num_buffers();
1932 while (written
< p
->length()) {
1933 int r
= ::write(fd
, p
->c_str(), p
->length() - written
);
1947 int buffer::list::send_fd(int fd
) const
1949 // There's no writev on Windows. WriteFileGather may be an option,
1950 // but it has strict requirements in terms of buffer size and alignment.
1951 auto p
= std::cbegin(_buffers
);
1952 uint64_t left_pbrs
= get_num_buffers();
1955 while (written
< p
->length()) {
1956 int r
= ::send(fd
, p
->c_str(), p
->length() - written
, 0);
1958 return -ceph_sock_errno();
1970 int buffer::list::write_fd(int fd
, uint64_t offset
) const
1972 int r
= ::lseek64(fd
, offset
, SEEK_SET
);
1976 return write_fd(fd
);
1980 buffer::list::iov_vec_t
buffer::list::prepare_iovs() const
1984 iov_vec_t iovs
{_num
/ IOV_MAX
+ 1};
1985 auto it
= iovs
.begin();
1986 for (auto& bp
: _buffers
) {
1990 size_t nr_iov_created
= std::distance(iovs
.begin(), it
);
1992 std::min(_num
- IOV_MAX
* nr_iov_created
, (size_t)IOV_MAX
));
1994 it
->iov
[index
].iov_base
= (void*)bp
.c_str();
1995 it
->iov
[index
].iov_len
= bp
.length();
1997 it
->length
+= bp
.length();
1998 if (++index
== IOV_MAX
) {
1999 // continue with a new vector<iov> if we have more buf
2007 __u32
buffer::list::crc32c(__u32 crc
) const
2009 int cache_misses
= 0;
2011 int cache_adjusts
= 0;
2013 for (const auto& node
: _buffers
) {
2014 if (node
.length()) {
2015 raw
* const r
= node
._raw
;
2016 pair
<size_t, size_t> ofs(node
.offset(), node
.offset() + node
.length());
2017 pair
<uint32_t, uint32_t> ccrc
;
2018 if (r
->get_crc(ofs
, &ccrc
)) {
2019 if (ccrc
.first
== crc
) {
2024 /* If we have cached crc32c(buf, v) for initial value v,
2025 * we can convert this to a different initial value v' by:
2026 * crc32c(buf, v') = crc32c(buf, v) ^ adjustment
2027 * where adjustment = crc32c(0*len(buf), v ^ v')
2029 * http://crcutil.googlecode.com/files/crc-doc.1.0.pdf
2030 * note, u for our crc32c implementation is 0
2032 crc
= ccrc
.second
^ ceph_crc32c(ccrc
.first
^ crc
, NULL
, node
.length());
2037 uint32_t base
= crc
;
2038 crc
= ceph_crc32c(crc
, (unsigned char*)node
.c_str(), node
.length());
2039 r
->set_crc(ofs
, make_pair(base
, crc
));
2044 if (buffer_track_crc
) {
2046 buffer_cached_crc_adjusted
+= cache_adjusts
;
2048 buffer_cached_crc
+= cache_hits
;
2050 buffer_missed_crc
+= cache_misses
;
2056 void buffer::list::invalidate_crc()
2058 for (const auto& node
: _buffers
) {
2060 node
._raw
->invalidate_crc();
2066 * Binary write all contents to a C++ stream
2068 void buffer::list::write_stream(std::ostream
&out
) const
2070 for (const auto& node
: _buffers
) {
2071 if (node
.length() > 0) {
2072 out
.write(node
.c_str(), node
.length());
2078 void buffer::list::hexdump(std::ostream
&out
, bool trailing_newline
) const
2083 std::ios_base::fmtflags original_flags
= out
.flags();
2085 // do our best to match the output of hexdump -C, for better
2088 out
.setf(std::ios::right
);
2092 char last_row_char
= '\0';
2093 bool was_same
= false, did_star
= false;
2094 for (unsigned o
=0; o
<length(); o
+= per
) {
2096 last_row_char
= (*this)[o
];
2099 if (o
+ per
< length()) {
2100 bool row_is_same
= true;
2101 for (unsigned i
=0; i
<per
&& o
+i
<length(); i
++) {
2102 char current_char
= (*this)[o
+i
];
2103 if (current_char
!= last_row_char
) {
2105 last_row_char
= current_char
;
2109 row_is_same
= false;
2129 out
<< std::hex
<< std::setw(8) << o
<< " ";
2132 for (i
=0; i
<per
&& o
+i
<length(); i
++) {
2135 out
<< " " << std::setw(2) << ((unsigned)(*this)[o
+i
] & 0xff);
2137 for (; i
<per
; i
++) {
2144 for (i
=0; i
<per
&& o
+i
<length(); i
++) {
2145 char c
= (*this)[o
+i
];
2146 if (isupper(c
) || islower(c
) || isdigit(c
) || c
== ' ' || ispunct(c
))
2151 out
<< '|' << std::dec
;
2153 if (trailing_newline
) {
2154 out
<< "\n" << std::hex
<< std::setw(8) << length();
2158 out
.flags(original_flags
);
2162 buffer::list
buffer::list::static_from_mem(char* c
, size_t l
) {
2164 bl
.push_back(ptr_node::create(create_static(l
, c
)));
2168 buffer::list
buffer::list::static_from_cstring(char* c
) {
2169 return static_from_mem(c
, std::strlen(c
));
2172 buffer::list
buffer::list::static_from_string(string
& s
) {
2173 // C++14 just has string::data return a char* from a non-const
2175 return static_from_mem(const_cast<char*>(s
.data()), s
.length());
2176 // But the way buffer::list mostly doesn't work in a sane way with
2177 // const makes me generally sad.
2180 // buffer::raw is not a standard layout type.
2181 #define BUF_OFFSETOF(type, field) \
2182 (reinterpret_cast<std::uintptr_t>(&(((type*)1024)->field)) - 1024u)
2184 bool buffer::ptr_node::dispose_if_hypercombined(
2185 buffer::ptr_node
* const delete_this
)
2187 // in case _raw is nullptr
2188 const std::uintptr_t bptr
=
2189 (reinterpret_cast<std::uintptr_t>(delete_this
->_raw
) +
2190 BUF_OFFSETOF(buffer::raw
, bptr_storage
));
2191 const bool is_hypercombined
=
2192 reinterpret_cast<std::uintptr_t>(delete_this
) == bptr
;
2193 if (is_hypercombined
) {
2194 ceph_assert_always("hypercombining is currently disabled" == nullptr);
2195 delete_this
->~ptr_node();
2202 std::unique_ptr
<buffer::ptr_node
, buffer::ptr_node::disposer
>
2203 buffer::ptr_node::create_hypercombined(ceph::unique_leakable_ptr
<buffer::raw
> r
)
2205 // FIXME: we don't currently hypercombine buffers due to crashes
2206 // observed in the rados suite. After fixing we'll use placement
2207 // new to create ptr_node on buffer::raw::bptr_storage.
2208 return std::unique_ptr
<buffer::ptr_node
, buffer::ptr_node::disposer
>(
2209 new ptr_node(std::move(r
)));
2212 buffer::ptr_node
* buffer::ptr_node::cloner::operator()(
2213 const buffer::ptr_node
& clone_this
)
2215 return new ptr_node(clone_this
);
2218 std::ostream
& buffer::operator<<(std::ostream
& out
, const buffer::raw
&r
) {
2219 return out
<< "buffer::raw("
2220 << (void*)r
.get_data() << " len " << r
.get_len()
2221 << " nref " << r
.nref
.load() << ")";
2224 std::ostream
& buffer::operator<<(std::ostream
& out
, const buffer::ptr
& bp
) {
2226 out
<< "buffer::ptr(" << bp
.offset() << "~" << bp
.length()
2227 << " " << (void*)bp
.c_str()
2228 << " in raw " << (void*)bp
.raw_c_str()
2229 << " len " << bp
.raw_length()
2230 << " nref " << bp
.raw_nref() << ")";
2232 out
<< "buffer:ptr(" << bp
.offset() << "~" << bp
.length() << " no raw)";
2236 std::ostream
& buffer::operator<<(std::ostream
& out
, const buffer::list
& bl
) {
2237 out
<< "buffer::list(len=" << bl
.length() << ",\n";
2239 for (const auto& node
: bl
.buffers()) {
2240 out
<< "\t" << node
;
2241 if (&node
!= &bl
.buffers().back()) {
2249 MEMPOOL_DEFINE_OBJECT_FACTORY(buffer::raw_malloc
, buffer_raw_malloc
,
2251 MEMPOOL_DEFINE_OBJECT_FACTORY(buffer::raw_posix_aligned
,
2252 buffer_raw_posix_aligned
, buffer_meta
);
2253 MEMPOOL_DEFINE_OBJECT_FACTORY(buffer::raw_claimed_char
, buffer_raw_claimed_char
,
2255 MEMPOOL_DEFINE_OBJECT_FACTORY(buffer::raw_static
, buffer_raw_static
,
2259 void ceph::buffer::list::page_aligned_appender::_refill(size_t len
) {
2260 const unsigned alloc
=
2262 shift_round_up(static_cast<unsigned>(len
),
2263 static_cast<unsigned>(CEPH_PAGE_SHIFT
)));
2265 ptr_node::create(buffer::create_page_aligned(alloc
));
2266 new_back
->set_length(0); // unused, so far.
2267 bl
.push_back(std::move(new_back
));
2270 namespace ceph::buffer
{
2271 inline namespace v15_2_0
{
2273 #pragma GCC diagnostic push
2274 #pragma GCC diagnostic ignored "-Wnon-virtual-dtor"
2275 #pragma clang diagnostic push
2276 #pragma clang diagnostic ignored "-Wnon-virtual-dtor"
2277 class buffer_error_category
: public ceph::converting_category
{
2279 buffer_error_category(){}
2280 const char* name() const noexcept override
;
2281 const char* message(int ev
, char*, std::size_t) const noexcept override
;
2282 std::string
message(int ev
) const override
;
2283 boost::system::error_condition
default_error_condition(int ev
) const noexcept
2285 using ceph::converting_category::equivalent
;
2286 bool equivalent(int ev
, const boost::system::error_condition
& c
) const
2288 int from_code(int ev
) const noexcept override
;
2290 #pragma GCC diagnostic pop
2291 #pragma clang diagnostic pop
2293 const char* buffer_error_category::name() const noexcept
{
2298 buffer_error_category::message(int ev
, char*, std::size_t) const noexcept
{
2299 using ceph::buffer::errc
;
2303 switch (static_cast<errc
>(ev
)) {
2304 case errc::bad_alloc
:
2305 return "Bad allocation";
2307 case errc::end_of_buffer
:
2308 return "End of buffer";
2310 case errc::malformed_input
:
2311 return "Malformed input";
2314 return "Unknown error";
2317 std::string
buffer_error_category::message(int ev
) const {
2318 return message(ev
, nullptr, 0);
2321 boost::system::error_condition
2322 buffer_error_category::default_error_condition(int ev
)const noexcept
{
2323 using ceph::buffer::errc
;
2324 switch (static_cast<errc
>(ev
)) {
2325 case errc::bad_alloc
:
2326 return boost::system::errc::not_enough_memory
;
2327 case errc::end_of_buffer
:
2328 case errc::malformed_input
:
2329 return boost::system::errc::io_error
;
2331 return { ev
, *this };
2334 bool buffer_error_category::equivalent(int ev
, const boost::system::error_condition
& c
) const noexcept
{
2335 return default_error_condition(ev
) == c
;
2338 int buffer_error_category::from_code(int ev
) const noexcept
{
2339 using ceph::buffer::errc
;
2340 switch (static_cast<errc
>(ev
)) {
2341 case errc::bad_alloc
:
2344 case errc::end_of_buffer
:
2347 case errc::malformed_input
:
2353 const boost::system::error_category
& buffer_category() noexcept
{
2354 static const buffer_error_category c
;