1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
4 * Ceph - scalable distributed file system
6 * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
21 #include "include/ceph_assert.h"
22 #include "include/types.h"
23 #include "include/buffer_raw.h"
24 #include "include/compat.h"
25 #include "include/mempool.h"
27 #include "common/environment.h"
28 #include "common/errno.h"
29 #include "common/safe_io.h"
30 #include "common/strtol.h"
31 #include "common/likely.h"
32 #include "common/valgrind.h"
33 #include "common/deleter.h"
34 #include "common/RWLock.h"
35 #include "include/spinlock.h"
36 #include "include/scope_guard.h"
39 #include "msg/xio/XioMsg.h"
44 #define CEPH_BUFFER_ALLOC_UNIT 4096u
45 #define CEPH_BUFFER_APPEND_SIZE (CEPH_BUFFER_ALLOC_UNIT - sizeof(raw_combined))
48 static ceph::spinlock debug_lock
;
49 # define bdout { std::lock_guard<ceph::spinlock> lg(debug_lock); std::cout
50 # define bendl std::endl; }
52 # define bdout if (0) { std::cout
53 # define bendl std::endl; }
56 static std::atomic
<unsigned> buffer_cached_crc
{ 0 };
57 static std::atomic
<unsigned> buffer_cached_crc_adjusted
{ 0 };
58 static std::atomic
<unsigned> buffer_missed_crc
{ 0 };
60 static bool buffer_track_crc
= get_env_bool("CEPH_BUFFER_TRACK");
62 void buffer::track_cached_crc(bool b
) {
65 int buffer::get_cached_crc() {
66 return buffer_cached_crc
;
68 int buffer::get_cached_crc_adjusted() {
69 return buffer_cached_crc_adjusted
;
72 int buffer::get_missed_crc() {
73 return buffer_missed_crc
;
76 const char * buffer::error::what() const throw () {
77 return "buffer::exception";
79 const char * buffer::bad_alloc::what() const throw () {
80 return "buffer::bad_alloc";
82 const char * buffer::end_of_buffer::what() const throw () {
83 return "buffer::end_of_buffer";
85 const char * buffer::malformed_input::what() const throw () {
88 buffer::error_code::error_code(int error
) :
89 buffer::malformed_input(cpp_strerror(error
).c_str()), code(error
) {}
92 * raw_combined is always placed within a single allocation along
93 * with the data buffer. the data goes at the beginning, and
94 * raw_combined at the end.
96 class buffer::raw_combined
: public buffer::raw
{
99 raw_combined(char *dataptr
, unsigned l
, unsigned align
,
101 : raw(dataptr
, l
, mempool
),
104 raw
* clone_empty() override
{
105 return create(len
, alignment
);
108 static raw_combined
*create(unsigned len
,
110 int mempool
= mempool::mempool_buffer_anon
) {
112 align
= sizeof(size_t);
113 size_t rawlen
= round_up_to(sizeof(buffer::raw_combined
),
114 alignof(buffer::raw_combined
));
115 size_t datalen
= round_up_to(len
, alignof(buffer::raw_combined
));
118 char *ptr
= (char *) valloc(rawlen
+ datalen
);
121 int r
= ::posix_memalign((void**)(void*)&ptr
, align
, rawlen
+ datalen
);
128 // actual data first, since it has presumably larger alignment restriction
129 // then put the raw_combined at the end
130 return new (ptr
+ datalen
) raw_combined(ptr
, len
, align
, mempool
);
133 static void operator delete(void *ptr
) {
134 raw_combined
*raw
= (raw_combined
*)ptr
;
135 ::free((void *)raw
->data
);
139 class buffer::raw_malloc
: public buffer::raw
{
141 MEMPOOL_CLASS_HELPERS();
143 explicit raw_malloc(unsigned l
) : raw(l
) {
145 data
= (char *)malloc(len
);
151 bdout
<< "raw_malloc " << this << " alloc " << (void *)data
<< " " << l
<< bendl
;
153 raw_malloc(unsigned l
, char *b
) : raw(b
, l
) {
154 bdout
<< "raw_malloc " << this << " alloc " << (void *)data
<< " " << l
<< bendl
;
156 ~raw_malloc() override
{
158 bdout
<< "raw_malloc " << this << " free " << (void *)data
<< " " << bendl
;
160 raw
* clone_empty() override
{
161 return new raw_malloc(len
);
166 class buffer::raw_posix_aligned
: public buffer::raw
{
169 MEMPOOL_CLASS_HELPERS();
171 raw_posix_aligned(unsigned l
, unsigned _align
) : raw(l
) {
173 ceph_assert((align
>= sizeof(void *)) && (align
& (align
- 1)) == 0);
175 data
= (char *) valloc(len
);
177 int r
= ::posix_memalign((void**)(void*)&data
, align
, len
);
183 bdout
<< "raw_posix_aligned " << this << " alloc " << (void *)data
184 << " l=" << l
<< ", align=" << align
<< bendl
;
186 ~raw_posix_aligned() override
{
188 bdout
<< "raw_posix_aligned " << this << " free " << (void *)data
<< bendl
;
190 raw
* clone_empty() override
{
191 return new raw_posix_aligned(len
, align
);
197 class buffer::raw_hack_aligned
: public buffer::raw
{
201 raw_hack_aligned(unsigned l
, unsigned _align
) : raw(l
) {
203 realdata
= new char[len
+align
-1];
204 unsigned off
= ((unsigned)realdata
) & (align
-1);
206 data
= realdata
+ align
- off
;
209 //cout << "hack aligned " << (unsigned)data
210 //<< " in raw " << (unsigned)realdata
211 //<< " off " << off << std::endl;
212 ceph_assert(((unsigned)data
& (align
-1)) == 0);
214 ~raw_hack_aligned() {
218 return new raw_hack_aligned(len
, align
);
224 * primitive buffer types
226 class buffer::raw_char
: public buffer::raw
{
228 MEMPOOL_CLASS_HELPERS();
230 explicit raw_char(unsigned l
) : raw(l
) {
232 data
= new char[len
];
235 bdout
<< "raw_char " << this << " alloc " << (void *)data
<< " " << l
<< bendl
;
237 raw_char(unsigned l
, char *b
) : raw(b
, l
) {
238 bdout
<< "raw_char " << this << " alloc " << (void *)data
<< " " << l
<< bendl
;
240 ~raw_char() override
{
242 bdout
<< "raw_char " << this << " free " << (void *)data
<< bendl
;
244 raw
* clone_empty() override
{
245 return new raw_char(len
);
249 class buffer::raw_claimed_char
: public buffer::raw
{
251 MEMPOOL_CLASS_HELPERS();
253 explicit raw_claimed_char(unsigned l
, char *b
) : raw(b
, l
) {
254 bdout
<< "raw_claimed_char " << this << " alloc " << (void *)data
255 << " " << l
<< bendl
;
257 ~raw_claimed_char() override
{
258 bdout
<< "raw_claimed_char " << this << " free " << (void *)data
261 raw
* clone_empty() override
{
262 return new raw_char(len
);
266 class buffer::raw_unshareable
: public buffer::raw
{
268 MEMPOOL_CLASS_HELPERS();
270 explicit raw_unshareable(unsigned l
) : raw(l
) {
272 data
= new char[len
];
276 raw_unshareable(unsigned l
, char *b
) : raw(b
, l
) {
278 raw
* clone_empty() override
{
279 return new raw_char(len
);
281 bool is_shareable() const override
{
282 return false; // !shareable, will force make_shareable()
284 ~raw_unshareable() override
{
289 class buffer::raw_static
: public buffer::raw
{
291 MEMPOOL_CLASS_HELPERS();
293 raw_static(const char *d
, unsigned l
) : raw((char*)d
, l
) { }
294 ~raw_static() override
{}
295 raw
* clone_empty() override
{
296 return new buffer::raw_char(len
);
300 class buffer::raw_claim_buffer
: public buffer::raw
{
303 raw_claim_buffer(const char *b
, unsigned l
, deleter d
)
304 : raw((char*)b
, l
), del(std::move(d
)) { }
305 ~raw_claim_buffer() override
{}
306 raw
* clone_empty() override
{
307 return new buffer::raw_char(len
);
311 #if defined(HAVE_XIO)
312 class buffer::xio_msg_buffer
: public buffer::raw
{
314 XioDispatchHook
* m_hook
;
316 xio_msg_buffer(XioDispatchHook
* _m_hook
, const char *d
,
318 raw((char*)d
, l
), m_hook(_m_hook
->get()) {}
320 bool is_shareable() const override
{ return false; }
321 static void operator delete(void *p
)
323 xio_msg_buffer
*buf
= static_cast<xio_msg_buffer
*>(p
);
324 // return hook ref (counts against pool); it appears illegal
325 // to do this in our dtor, because this fires after that
329 return new buffer::raw_char(len
);
333 class buffer::xio_mempool
: public buffer::raw
{
335 struct xio_reg_mem
*mp
;
336 xio_mempool(struct xio_reg_mem
*_mp
, unsigned l
) :
337 raw((char*)_mp
->addr
, l
), mp(_mp
)
341 return new buffer::raw_char(len
);
345 struct xio_reg_mem
* get_xio_mp(const buffer::ptr
& bp
)
347 buffer::xio_mempool
*mb
= dynamic_cast<buffer::xio_mempool
*>(bp
.get_raw());
354 buffer::raw
* buffer::create_msg(
355 unsigned len
, char *buf
, XioDispatchHook
* m_hook
) {
356 XioPool
& pool
= m_hook
->get_pool();
358 static_cast<buffer::raw
*>(pool
.alloc(sizeof(xio_msg_buffer
)));
359 new (bp
) xio_msg_buffer(m_hook
, buf
, len
);
362 #endif /* HAVE_XIO */
364 ceph::unique_leakable_ptr
<buffer::raw
> buffer::copy(const char *c
, unsigned len
) {
365 auto r
= buffer::create_aligned(len
, sizeof(size_t));
366 memcpy(r
->data
, c
, len
);
370 ceph::unique_leakable_ptr
<buffer::raw
> buffer::create(unsigned len
) {
371 return buffer::create_aligned(len
, sizeof(size_t));
373 ceph::unique_leakable_ptr
<buffer::raw
> buffer::create_in_mempool(unsigned len
, int mempool
) {
374 return buffer::create_aligned_in_mempool(len
, sizeof(size_t), mempool
);
376 buffer::raw
* buffer::claim_char(unsigned len
, char *buf
) {
377 return new raw_claimed_char(len
, buf
);
379 buffer::raw
* buffer::create_malloc(unsigned len
) {
380 return new raw_malloc(len
);
382 buffer::raw
* buffer::claim_malloc(unsigned len
, char *buf
) {
383 return new raw_malloc(len
, buf
);
385 buffer::raw
* buffer::create_static(unsigned len
, char *buf
) {
386 return new raw_static(buf
, len
);
388 buffer::raw
* buffer::claim_buffer(unsigned len
, char *buf
, deleter del
) {
389 return new raw_claim_buffer(buf
, len
, std::move(del
));
392 ceph::unique_leakable_ptr
<buffer::raw
> buffer::create_aligned_in_mempool(
393 unsigned len
, unsigned align
, int mempool
) {
394 // If alignment is a page multiple, use a separate buffer::raw to
395 // avoid fragmenting the heap.
397 // Somewhat unexpectedly, I see consistently better performance
398 // from raw_combined than from raw even when the allocation size is
399 // a page multiple (but alignment is not).
401 // I also see better performance from a separate buffer::raw once the
403 if ((align
& ~CEPH_PAGE_MASK
) == 0 ||
404 len
>= CEPH_PAGE_SIZE
* 2) {
406 return ceph::unique_leakable_ptr
<buffer::raw
>(new raw_posix_aligned(len
, align
));
408 return ceph::unique_leakable_ptr
<buffer::raw
>(new raw_hack_aligned(len
, align
));
411 return ceph::unique_leakable_ptr
<buffer::raw
>(
412 raw_combined::create(len
, align
, mempool
));
414 ceph::unique_leakable_ptr
<buffer::raw
> buffer::create_aligned(
415 unsigned len
, unsigned align
) {
416 return create_aligned_in_mempool(len
, align
,
417 mempool::mempool_buffer_anon
);
420 ceph::unique_leakable_ptr
<buffer::raw
> buffer::create_page_aligned(unsigned len
) {
421 return create_aligned(len
, CEPH_PAGE_SIZE
);
423 ceph::unique_leakable_ptr
<buffer::raw
> buffer::create_small_page_aligned(unsigned len
) {
424 if (len
< CEPH_PAGE_SIZE
) {
425 return create_aligned(len
, CEPH_BUFFER_ALLOC_UNIT
);
427 return create_aligned(len
, CEPH_PAGE_SIZE
);
430 buffer::raw
* buffer::create_unshareable(unsigned len
) {
431 return new raw_unshareable(len
);
434 buffer::ptr::ptr(raw
* r
) : _raw(r
), _off(0), _len(r
->len
) // no lock needed; this is an unref raw.
437 bdout
<< "ptr " << this << " get " << _raw
<< bendl
;
439 buffer::ptr::ptr(ceph::unique_leakable_ptr
<raw
> r
)
444 _raw
->nref
.store(1, std::memory_order_release
);
445 bdout
<< "ptr " << this << " get " << _raw
<< bendl
;
447 buffer::ptr::ptr(unsigned l
) : _off(0), _len(l
)
449 _raw
= buffer::create(l
).release();
450 _raw
->nref
.store(1, std::memory_order_release
);
451 bdout
<< "ptr " << this << " get " << _raw
<< bendl
;
453 buffer::ptr::ptr(const char *d
, unsigned l
) : _off(0), _len(l
) // ditto.
455 _raw
= buffer::copy(d
, l
).release();
456 _raw
->nref
.store(1, std::memory_order_release
);
457 bdout
<< "ptr " << this << " get " << _raw
<< bendl
;
459 buffer::ptr::ptr(const ptr
& p
) : _raw(p
._raw
), _off(p
._off
), _len(p
._len
)
463 bdout
<< "ptr " << this << " get " << _raw
<< bendl
;
466 buffer::ptr::ptr(ptr
&& p
) noexcept
: _raw(p
._raw
), _off(p
._off
), _len(p
._len
)
471 buffer::ptr::ptr(const ptr
& p
, unsigned o
, unsigned l
)
472 : _raw(p
._raw
), _off(p
._off
+ o
), _len(l
)
474 ceph_assert(o
+l
<= p
._len
);
477 bdout
<< "ptr " << this << " get " << _raw
<< bendl
;
479 buffer::ptr::ptr(const ptr
& p
, ceph::unique_leakable_ptr
<raw
> r
)
484 _raw
->nref
.store(1, std::memory_order_release
);
485 bdout
<< "ptr " << this << " get " << _raw
<< bendl
;
487 buffer::ptr
& buffer::ptr::operator= (const ptr
& p
)
491 bdout
<< "ptr " << this << " get " << _raw
<< bendl
;
493 buffer::raw
*raw
= p
._raw
;
504 buffer::ptr
& buffer::ptr::operator= (ptr
&& p
) noexcept
507 buffer::raw
*raw
= p
._raw
;
520 ceph::unique_leakable_ptr
<buffer::raw
> buffer::ptr::clone()
522 return _raw
->clone();
525 void buffer::ptr::swap(ptr
& other
) noexcept
538 void buffer::ptr::release()
541 bdout
<< "ptr " << this << " release " << _raw
<< bendl
;
542 const bool last_one
= (1 == _raw
->nref
.load(std::memory_order_acquire
));
543 if (likely(last_one
) || --_raw
->nref
== 0) {
544 // BE CAREFUL: this is called also for hypercombined ptr_node. After
545 // freeing underlying raw, `*this` can become inaccessible as well!
546 const auto* delete_raw
= _raw
;
548 //cout << "hosing raw " << (void*)_raw << " len " << _raw->len << std::endl;
549 ANNOTATE_HAPPENS_AFTER(&_raw
->nref
);
550 ANNOTATE_HAPPENS_BEFORE_FORGET_ALL(&_raw
->nref
);
551 delete delete_raw
; // dealloc old (if any)
553 ANNOTATE_HAPPENS_BEFORE(&_raw
->nref
);
559 int buffer::ptr::get_mempool() const {
561 return _raw
->mempool
;
563 return mempool::mempool_buffer_anon
;
566 void buffer::ptr::reassign_to_mempool(int pool
) {
568 _raw
->reassign_to_mempool(pool
);
571 void buffer::ptr::try_assign_to_mempool(int pool
) {
573 _raw
->try_assign_to_mempool(pool
);
577 const char *buffer::ptr::c_str() const {
579 return _raw
->get_data() + _off
;
581 char *buffer::ptr::c_str() {
583 return _raw
->get_data() + _off
;
585 const char *buffer::ptr::end_c_str() const {
587 return _raw
->get_data() + _off
+ _len
;
589 char *buffer::ptr::end_c_str() {
591 return _raw
->get_data() + _off
+ _len
;
594 unsigned buffer::ptr::unused_tail_length() const
597 return _raw
->len
- (_off
+_len
);
601 const char& buffer::ptr::operator[](unsigned n
) const
604 ceph_assert(n
< _len
);
605 return _raw
->get_data()[_off
+ n
];
607 char& buffer::ptr::operator[](unsigned n
)
610 ceph_assert(n
< _len
);
611 return _raw
->get_data()[_off
+ n
];
614 const char *buffer::ptr::raw_c_str() const { ceph_assert(_raw
); return _raw
->data
; }
615 unsigned buffer::ptr::raw_length() const { ceph_assert(_raw
); return _raw
->len
; }
616 int buffer::ptr::raw_nref() const { ceph_assert(_raw
); return _raw
->nref
; }
618 void buffer::ptr::copy_out(unsigned o
, unsigned l
, char *dest
) const {
621 throw end_of_buffer();
622 char* src
= _raw
->data
+ _off
+ o
;
623 maybe_inline_memcpy(dest
, src
, l
, 8);
626 unsigned buffer::ptr::wasted() const
628 return _raw
->len
- _len
;
631 int buffer::ptr::cmp(const ptr
& o
) const
633 int l
= _len
< o
._len
? _len
: o
._len
;
635 int r
= memcmp(c_str(), o
.c_str(), l
);
646 bool buffer::ptr::is_zero() const
648 return mem_is_zero(c_str(), _len
);
651 unsigned buffer::ptr::append(char c
)
654 ceph_assert(1 <= unused_tail_length());
655 char* ptr
= _raw
->data
+ _off
+ _len
;
661 unsigned buffer::ptr::append(const char *p
, unsigned l
)
664 ceph_assert(l
<= unused_tail_length());
665 char* c
= _raw
->data
+ _off
+ _len
;
666 maybe_inline_memcpy(c
, p
, l
, 32);
671 unsigned buffer::ptr::append_zeros(unsigned l
)
674 ceph_assert(l
<= unused_tail_length());
675 char* c
= _raw
->data
+ _off
+ _len
;
676 // FIPS zeroization audit 20191115: this memset is not security related.
682 void buffer::ptr::copy_in(unsigned o
, unsigned l
, const char *src
, bool crc_reset
)
685 ceph_assert(o
<= _len
);
686 ceph_assert(o
+l
<= _len
);
687 char* dest
= _raw
->data
+ _off
+ o
;
689 _raw
->invalidate_crc();
690 maybe_inline_memcpy(dest
, src
, l
, 64);
693 void buffer::ptr::zero(bool crc_reset
)
696 _raw
->invalidate_crc();
697 // FIPS zeroization audit 20191115: this memset is not security related.
698 memset(c_str(), 0, _len
);
701 void buffer::ptr::zero(unsigned o
, unsigned l
, bool crc_reset
)
703 ceph_assert(o
+l
<= _len
);
705 _raw
->invalidate_crc();
706 // FIPS zeroization audit 20191115: this memset is not security related.
707 memset(c_str()+o
, 0, l
);
710 // -- buffer::list::iterator --
712 buffer::list::iterator operator=(const buffer::list::iterator& other)
714 if (this != &other) {
724 template<bool is_const
>
725 buffer::list::iterator_impl
<is_const
>::iterator_impl(bl_t
*l
, unsigned o
)
726 : bl(l
), ls(&bl
->_buffers
), p(ls
->begin()), off(0), p_off(0)
731 template<bool is_const
>
732 buffer::list::iterator_impl
<is_const
>::iterator_impl(const buffer::list::iterator
& i
)
733 : iterator_impl
<is_const
>(i
.bl
, i
.off
, i
.p
, i
.p_off
) {}
735 template<bool is_const
>
736 void buffer::list::iterator_impl
<is_const
>::advance(unsigned o
)
738 //cout << this << " advance " << o << " from " << off
739 // << " (p_off " << p_off << " in " << p->length() << ")"
743 while (p
!= ls
->end()) {
744 if (p_off
>= p
->length()) {
746 p_off
-= p
->length();
749 // somewhere in this buffer!
753 if (p
== ls
->end() && p_off
) {
754 throw end_of_buffer();
759 template<bool is_const
>
760 void buffer::list::iterator_impl
<is_const
>::seek(unsigned o
)
767 template<bool is_const
>
768 char buffer::list::iterator_impl
<is_const
>::operator*() const
771 throw end_of_buffer();
775 template<bool is_const
>
776 buffer::list::iterator_impl
<is_const
>&
777 buffer::list::iterator_impl
<is_const
>::operator++()
780 throw end_of_buffer();
785 template<bool is_const
>
786 buffer::ptr
buffer::list::iterator_impl
<is_const
>::get_current_ptr() const
789 throw end_of_buffer();
790 return ptr(*p
, p_off
, p
->length() - p_off
);
793 template<bool is_const
>
794 bool buffer::list::iterator_impl
<is_const
>::is_pointing_same_raw(
795 const ptr
& other
) const
798 throw end_of_buffer();
799 return p
->get_raw() == other
.get_raw();
803 // note that these all _append_ to dest!
804 template<bool is_const
>
805 void buffer::list::iterator_impl
<is_const
>::copy(unsigned len
, char *dest
)
807 if (p
== ls
->end()) seek(off
);
810 throw end_of_buffer();
812 unsigned howmuch
= p
->length() - p_off
;
813 if (len
< howmuch
) howmuch
= len
;
814 p
->copy_out(p_off
, howmuch
, dest
);
822 template<bool is_const
>
823 void buffer::list::iterator_impl
<is_const
>::copy(unsigned len
, ptr
&dest
)
825 copy_deep(len
, dest
);
828 template<bool is_const
>
829 void buffer::list::iterator_impl
<is_const
>::copy_deep(unsigned len
, ptr
&dest
)
835 throw end_of_buffer();
837 copy(len
, dest
.c_str());
839 template<bool is_const
>
840 void buffer::list::iterator_impl
<is_const
>::copy_shallow(unsigned len
,
847 throw end_of_buffer();
848 unsigned howmuch
= p
->length() - p_off
;
851 copy(len
, dest
.c_str());
853 dest
= ptr(*p
, p_off
, len
);
858 template<bool is_const
>
859 void buffer::list::iterator_impl
<is_const
>::copy(unsigned len
, list
&dest
)
865 throw end_of_buffer();
867 unsigned howmuch
= p
->length() - p_off
;
870 dest
.append(*p
, p_off
, howmuch
);
877 template<bool is_const
>
878 void buffer::list::iterator_impl
<is_const
>::copy(unsigned len
, std::string
&dest
)
884 throw end_of_buffer();
886 unsigned howmuch
= p
->length() - p_off
;
887 const char *c_str
= p
->c_str();
890 dest
.append(c_str
+ p_off
, howmuch
);
897 template<bool is_const
>
898 void buffer::list::iterator_impl
<is_const
>::copy_all(list
&dest
)
906 unsigned howmuch
= p
->length() - p_off
;
907 const char *c_str
= p
->c_str();
908 dest
.append(c_str
+ p_off
, howmuch
);
914 template<bool is_const
>
915 size_t buffer::list::iterator_impl
<is_const
>::get_ptr_and_advance(
916 size_t want
, const char **data
)
918 if (p
== ls
->end()) {
920 if (p
== ls
->end()) {
924 *data
= p
->c_str() + p_off
;
925 size_t l
= std::min
<size_t>(p
->length() - p_off
, want
);
927 if (p_off
== p
->length()) {
935 template<bool is_const
>
936 uint32_t buffer::list::iterator_impl
<is_const
>::crc32c(
937 size_t length
, uint32_t crc
)
939 length
= std::min
<size_t>(length
, get_remaining());
942 size_t l
= get_ptr_and_advance(length
, &p
);
943 crc
= ceph_crc32c(crc
, (unsigned char*)p
, l
);
949 // explicitly instantiate only the iterator types we need, so we can hide the
950 // details in this compilation unit without introducing unnecessary link time
952 template class buffer::list::iterator_impl
<true>;
953 template class buffer::list::iterator_impl
<false>;
955 buffer::list::iterator::iterator(bl_t
*l
, unsigned o
)
956 : iterator_impl(l
, o
)
959 buffer::list::iterator::iterator(bl_t
*l
, unsigned o
, list_iter_t ip
, unsigned po
)
960 : iterator_impl(l
, o
, ip
, po
)
964 void buffer::list::iterator::copy_in(unsigned len
, const char *src
, bool crc_reset
)
971 throw end_of_buffer();
973 unsigned howmuch
= p
->length() - p_off
;
976 p
->copy_in(p_off
, howmuch
, src
, crc_reset
);
984 void buffer::list::iterator::copy_in(unsigned len
, const list
& otherl
)
989 for (const auto& node
: otherl
._buffers
) {
990 unsigned l
= node
.length();
993 copy_in(l
, node
.c_str());
1000 // -- buffer::list --
1002 buffer::list::list(list
&& other
) noexcept
1003 : _buffers(std::move(other
._buffers
)),
1004 _carriage(&always_empty_bptr
),
1006 _memcopy_count(other
._memcopy_count
),
1011 void buffer::list::swap(list
& other
) noexcept
1013 std::swap(_len
, other
._len
);
1014 std::swap(_memcopy_count
, other
._memcopy_count
);
1015 std::swap(_carriage
, other
._carriage
);
1016 _buffers
.swap(other
._buffers
);
1017 //last_p.swap(other.last_p);
1019 other
.last_p
= other
.begin();
1022 bool buffer::list::contents_equal(const ceph::buffer::list
& other
) const
1024 if (length() != other
.length())
1027 // buffer-wise comparison
1029 auto a
= std::cbegin(_buffers
);
1030 auto b
= std::cbegin(other
._buffers
);
1031 unsigned aoff
= 0, boff
= 0;
1032 while (a
!= std::cend(_buffers
)) {
1033 unsigned len
= a
->length() - aoff
;
1034 if (len
> b
->length() - boff
)
1035 len
= b
->length() - boff
;
1036 if (memcmp(a
->c_str() + aoff
, b
->c_str() + boff
, len
) != 0)
1039 if (aoff
== a
->length()) {
1044 if (boff
== b
->length()) {
1052 // byte-wise comparison
1054 bufferlist::const_iterator me
= begin();
1055 bufferlist::const_iterator him
= other
.begin();
1066 bool buffer::list::is_provided_buffer(const char* const dst
) const
1068 if (_buffers
.empty()) {
1071 return (is_contiguous() && (_buffers
.front().c_str() == dst
));
1074 bool buffer::list::is_aligned(const unsigned align
) const
1076 for (const auto& node
: _buffers
) {
1077 if (!node
.is_aligned(align
)) {
1084 bool buffer::list::is_n_align_sized(const unsigned align
) const
1086 for (const auto& node
: _buffers
) {
1087 if (!node
.is_n_align_sized(align
)) {
1094 bool buffer::list::is_aligned_size_and_memory(
1095 const unsigned align_size
,
1096 const unsigned align_memory
) const
1098 for (const auto& node
: _buffers
) {
1099 if (!node
.is_aligned(align_memory
) || !node
.is_n_align_sized(align_size
)) {
1106 bool buffer::list::is_zero() const {
1107 for (const auto& node
: _buffers
) {
1108 if (!node
.is_zero()) {
1115 void buffer::list::zero()
1117 for (auto& node
: _buffers
) {
1122 void buffer::list::zero(const unsigned o
, const unsigned l
)
1124 ceph_assert(o
+l
<= _len
);
1126 for (auto& node
: _buffers
) {
1127 if (p
+ node
.length() > o
) {
1128 if (p
>= o
&& p
+node
.length() <= o
+l
) {
1129 // 'o'------------- l -----------|
1130 // 'p'-- node.length() --|
1132 } else if (p
>= o
) {
1133 // 'o'------------- l -----------|
1134 // 'p'------- node.length() -------|
1135 node
.zero(0, o
+l
-p
);
1136 } else if (p
+ node
.length() <= o
+l
) {
1137 // 'o'------------- l -----------|
1138 // 'p'------- node.length() -------|
1139 node
.zero(o
-p
, node
.length()-(o
-p
));
1141 // 'o'----------- l -----------|
1142 // 'p'---------- node.length() ----------|
1153 bool buffer::list::is_contiguous() const
1155 return _buffers
.size() <= 1;
1158 bool buffer::list::is_n_page_sized() const
1160 return is_n_align_sized(CEPH_PAGE_SIZE
);
1163 bool buffer::list::is_page_aligned() const
1165 return is_aligned(CEPH_PAGE_SIZE
);
1168 int buffer::list::get_mempool() const
1170 if (_buffers
.empty()) {
1171 return mempool::mempool_buffer_anon
;
1173 return _buffers
.back().get_mempool();
1176 void buffer::list::reassign_to_mempool(int pool
)
1178 for (auto& p
: _buffers
) {
1179 p
.get_raw()->reassign_to_mempool(pool
);
1183 void buffer::list::try_assign_to_mempool(int pool
)
1185 for (auto& p
: _buffers
) {
1186 p
.get_raw()->try_assign_to_mempool(pool
);
1190 uint64_t buffer::list::get_wasted_space() const
1192 if (_buffers
.size() == 1)
1193 return _buffers
.back().wasted();
1195 std::vector
<const raw
*> raw_vec
;
1196 raw_vec
.reserve(_buffers
.size());
1197 for (const auto& p
: _buffers
)
1198 raw_vec
.push_back(p
.get_raw());
1199 std::sort(raw_vec
.begin(), raw_vec
.end());
1202 const raw
*last
= nullptr;
1203 for (const auto r
: raw_vec
) {
1209 // If multiple buffers are sharing the same raw buffer and they overlap
1210 // with each other, the wasted space will be underestimated.
1211 if (total
<= length())
1213 return total
- length();
1216 void buffer::list::rebuild()
1219 _carriage
= &always_empty_bptr
;
1220 _buffers
.clear_and_dispose();
1223 if ((_len
& ~CEPH_PAGE_MASK
) == 0)
1224 rebuild(ptr_node::create(buffer::create_page_aligned(_len
)));
1226 rebuild(ptr_node::create(buffer::create(_len
)));
1229 void buffer::list::rebuild(
1230 std::unique_ptr
<buffer::ptr_node
, buffer::ptr_node::disposer
> nb
)
1233 for (auto& node
: _buffers
) {
1234 nb
->copy_in(pos
, node
.length(), node
.c_str(), false);
1235 pos
+= node
.length();
1237 _memcopy_count
+= pos
;
1238 _carriage
= &always_empty_bptr
;
1239 _buffers
.clear_and_dispose();
1240 if (likely(nb
->length())) {
1241 _carriage
= nb
.get();
1242 _buffers
.push_back(*nb
.release());
1248 bool buffer::list::rebuild_aligned(unsigned align
)
1250 return rebuild_aligned_size_and_memory(align
, align
);
1253 bool buffer::list::rebuild_aligned_size_and_memory(unsigned align_size
,
1254 unsigned align_memory
,
1255 unsigned max_buffers
)
1257 unsigned old_memcopy_count
= _memcopy_count
;
1259 if (max_buffers
&& _buffers
.size() > max_buffers
1260 && _len
> (max_buffers
* align_size
)) {
1261 align_size
= round_up_to(round_up_to(_len
, max_buffers
) / max_buffers
, align_size
);
1263 auto p
= std::begin(_buffers
);
1264 auto p_prev
= _buffers
.before_begin();
1265 while (p
!= std::end(_buffers
)) {
1266 // keep anything that's already align and sized aligned
1267 if (p
->is_aligned(align_memory
) && p
->is_n_align_sized(align_size
)) {
1268 /*cout << " segment " << (void*)p->c_str()
1269 << " offset " << ((unsigned long)p->c_str() & (align - 1))
1270 << " length " << p->length()
1271 << " " << (p->length() & (align - 1)) << " ok" << std::endl;
1277 // consolidate unaligned items, until we get something that is sized+aligned
1279 unsigned offset
= 0;
1281 /*cout << " segment " << (void*)p->c_str()
1282 << " offset " << ((unsigned long)p->c_str() & (align - 1))
1283 << " length " << p->length() << " " << (p->length() & (align - 1))
1284 << " overall offset " << offset << " " << (offset & (align - 1))
1285 << " not ok" << std::endl;
1287 offset
+= p
->length();
1288 // no need to reallocate, relinking is enough thankfully to bi::list.
1289 auto p_after
= _buffers
.erase_after(p_prev
);
1290 unaligned
._buffers
.push_back(*p
);
1291 unaligned
._len
+= p
->length();
1293 } while (p
!= std::end(_buffers
) &&
1294 (!p
->is_aligned(align_memory
) ||
1295 !p
->is_n_align_sized(align_size
) ||
1296 (offset
% align_size
)));
1297 if (!(unaligned
.is_contiguous() && unaligned
._buffers
.front().is_aligned(align_memory
))) {
1300 buffer::create_aligned(unaligned
._len
, align_memory
)));
1301 _memcopy_count
+= unaligned
._len
;
1303 _buffers
.insert_after(p_prev
, *ptr_node::create(unaligned
._buffers
.front()).release());
1308 return (old_memcopy_count
!= _memcopy_count
);
1311 bool buffer::list::rebuild_page_aligned()
1313 return rebuild_aligned(CEPH_PAGE_SIZE
);
1316 void buffer::list::reserve(size_t prealloc
)
1318 if (get_append_buffer_unused_tail_length() < prealloc
) {
1319 auto ptr
= ptr_node::create(buffer::create_page_aligned(prealloc
));
1320 ptr
->set_length(0); // unused, so far.
1321 _carriage
= ptr
.get();
1322 _buffers
.push_back(*ptr
.release());
1326 // sort-of-like-assignment-op
1327 void buffer::list::claim(list
& bl
, unsigned int flags
)
1331 claim_append(bl
, flags
);
1334 void buffer::list::claim_append(list
& bl
, unsigned int flags
)
1336 // steal the other guy's buffers
1338 if (!(flags
& CLAIM_ALLOW_NONSHAREABLE
)) {
1339 auto curbuf
= bl
._buffers
.begin();
1340 auto curbuf_prev
= bl
._buffers
.before_begin();
1342 while (curbuf
!= bl
._buffers
.end()) {
1343 const auto* const raw
= curbuf
->get_raw();
1344 if (unlikely(raw
&& !raw
->is_shareable())) {
1345 auto* clone
= ptr_node::copy_hypercombined(*curbuf
);
1346 curbuf
= bl
._buffers
.erase_after_and_dispose(curbuf_prev
);
1347 bl
._buffers
.insert_after(curbuf_prev
++, *clone
);
1349 curbuf_prev
= curbuf
++;
1353 _buffers
.splice_back(bl
._buffers
);
1354 bl
._carriage
= &always_empty_bptr
;
1355 bl
._buffers
.clear_and_dispose();
1357 bl
.last_p
= bl
.begin();
1360 void buffer::list::claim_append_piecewise(list
& bl
)
1362 // steal the other guy's buffers
1363 for (const auto& node
: bl
.buffers()) {
1364 append(node
, 0, node
.length());
1369 void buffer::list::copy(unsigned off
, unsigned len
, char *dest
) const
1371 if (off
+ len
> length())
1372 throw end_of_buffer();
1373 if (last_p
.get_off() != off
)
1375 last_p
.copy(len
, dest
);
1378 void buffer::list::copy(unsigned off
, unsigned len
, list
&dest
) const
1380 if (off
+ len
> length())
1381 throw end_of_buffer();
1382 if (last_p
.get_off() != off
)
1384 last_p
.copy(len
, dest
);
1387 void buffer::list::copy(unsigned off
, unsigned len
, std::string
& dest
) const
1389 if (last_p
.get_off() != off
)
1391 return last_p
.copy(len
, dest
);
1394 void buffer::list::copy_in(unsigned off
, unsigned len
, const char *src
, bool crc_reset
)
1396 if (off
+ len
> length())
1397 throw end_of_buffer();
1399 if (last_p
.get_off() != off
)
1401 last_p
.copy_in(len
, src
, crc_reset
);
1404 void buffer::list::copy_in(unsigned off
, unsigned len
, const list
& src
)
1406 if (last_p
.get_off() != off
)
1408 last_p
.copy_in(len
, src
);
1411 void buffer::list::append(char c
)
1413 // put what we can into the existing append_buffer.
1414 unsigned gap
= get_append_buffer_unused_tail_length();
1416 // make a new buffer!
1417 auto buf
= ptr_node::create(
1418 raw_combined::create(CEPH_BUFFER_APPEND_SIZE
, 0, get_mempool()));
1419 buf
->set_length(0); // unused, so far.
1420 _carriage
= buf
.get();
1421 _buffers
.push_back(*buf
.release());
1422 } else if (unlikely(_carriage
!= &_buffers
.back())) {
1423 auto bptr
= ptr_node::create(*_carriage
, _carriage
->length(), 0);
1424 _carriage
= bptr
.get();
1425 _buffers
.push_back(*bptr
.release());
1427 _carriage
->append(c
);
1431 buffer::ptr
buffer::list::always_empty_bptr
;
1433 buffer::ptr_node
& buffer::list::refill_append_space(const unsigned len
)
1435 // make a new buffer. fill out a complete page, factoring in the
1436 // raw_combined overhead.
1437 size_t need
= round_up_to(len
, sizeof(size_t)) + sizeof(raw_combined
);
1438 size_t alen
= round_up_to(need
, CEPH_BUFFER_ALLOC_UNIT
) -
1439 sizeof(raw_combined
);
1441 ptr_node::create(raw_combined::create(alen
, 0, get_mempool()));
1442 new_back
->set_length(0); // unused, so far.
1443 _carriage
= new_back
.get();
1444 _buffers
.push_back(*new_back
.release());
1445 return _buffers
.back();
1448 void buffer::list::append(const char *data
, unsigned len
)
1452 const unsigned free_in_last
= get_append_buffer_unused_tail_length();
1453 const unsigned first_round
= std::min(len
, free_in_last
);
1455 // _buffers and carriage can desynchronize when 1) a new ptr
1456 // we don't own has been added into the _buffers 2) _buffers
1457 // has been emptied as as a result of std::move or stolen by
1459 if (unlikely(_carriage
!= &_buffers
.back())) {
1460 auto bptr
= ptr_node::create(*_carriage
, _carriage
->length(), 0);
1461 _carriage
= bptr
.get();
1462 _buffers
.push_back(*bptr
.release());
1464 _carriage
->append(data
, first_round
);
1467 const unsigned second_round
= len
- first_round
;
1469 auto& new_back
= refill_append_space(second_round
);
1470 new_back
.append(data
+ first_round
, second_round
);
1474 buffer::list::reserve_t
buffer::list::obtain_contiguous_space(
1477 // note: if len < the normal append_buffer size it *might*
1478 // be better to allocate a normal-sized append_buffer and
1479 // use part of it. however, that optimizes for the case of
1480 // old-style types including new-style types. and in most
1481 // such cases, this won't be the very first thing encoded to
1482 // the list, so append_buffer will already be allocated.
1483 // OTOH if everything is new-style, we *should* allocate
1484 // only what we need and conserve memory.
1485 if (unlikely(get_append_buffer_unused_tail_length() < len
)) {
1487 buffer::ptr_node::create(buffer::create(len
)).release();
1488 new_back
->set_length(0); // unused, so far.
1489 _buffers
.push_back(*new_back
);
1490 _carriage
= new_back
;
1491 return { new_back
->c_str(), &new_back
->_len
, &_len
};
1493 if (unlikely(_carriage
!= &_buffers
.back())) {
1494 auto bptr
= ptr_node::create(*_carriage
, _carriage
->length(), 0);
1495 _carriage
= bptr
.get();
1496 _buffers
.push_back(*bptr
.release());
1498 return { _carriage
->end_c_str(), &_carriage
->_len
, &_len
};
1502 void buffer::list::append(const ptr
& bp
)
1507 void buffer::list::append(ptr
&& bp
)
1509 push_back(std::move(bp
));
1512 void buffer::list::append(const ptr
& bp
, unsigned off
, unsigned len
)
1514 ceph_assert(len
+off
<= bp
.length());
1515 if (!_buffers
.empty()) {
1516 ptr
&l
= _buffers
.back();
1517 if (l
.get_raw() == bp
.get_raw() &&
1518 l
.end() == bp
.start() + off
) {
1519 // yay contiguous with tail bp!
1520 l
.set_length(l
.length()+len
);
1525 // add new item to list
1526 _buffers
.push_back(*ptr_node::create(bp
, off
, len
).release());
1530 void buffer::list::append(const list
& bl
)
1533 for (const auto& node
: bl
._buffers
) {
1534 _buffers
.push_back(*ptr_node::create(node
).release());
1538 void buffer::list::append(std::istream
& in
)
1543 append(s
.c_str(), s
.length());
1549 buffer::list::contiguous_filler
buffer::list::append_hole(const unsigned len
)
1553 if (unlikely(get_append_buffer_unused_tail_length() < len
)) {
1554 // make a new append_buffer. fill out a complete page, factoring in
1555 // the raw_combined overhead.
1556 auto& new_back
= refill_append_space(len
);
1557 new_back
.set_length(len
);
1558 return { new_back
.c_str() };
1559 } else if (unlikely(_carriage
!= &_buffers
.back())) {
1560 auto bptr
= ptr_node::create(*_carriage
, _carriage
->length(), 0);
1561 _carriage
= bptr
.get();
1562 _buffers
.push_back(*bptr
.release());
1564 _carriage
->set_length(_carriage
->length() + len
);
1565 return { _carriage
->end_c_str() - len
};
1568 void buffer::list::prepend_zero(unsigned len
)
1570 auto bp
= ptr_node::create(len
);
1573 _buffers
.push_front(*bp
.release());
1576 void buffer::list::append_zero(unsigned len
)
1580 const unsigned free_in_last
= get_append_buffer_unused_tail_length();
1581 const unsigned first_round
= std::min(len
, free_in_last
);
1583 if (unlikely(_carriage
!= &_buffers
.back())) {
1584 auto bptr
= ptr_node::create(*_carriage
, _carriage
->length(), 0);
1585 _carriage
= bptr
.get();
1586 _buffers
.push_back(*bptr
.release());
1588 _carriage
->append_zeros(first_round
);
1591 const unsigned second_round
= len
- first_round
;
1593 auto& new_back
= refill_append_space(second_round
);
1594 new_back
.set_length(second_round
);
1595 new_back
.zero(false);
1603 const char& buffer::list::operator[](unsigned n
) const
1606 throw end_of_buffer();
1608 for (const auto& node
: _buffers
) {
1609 if (n
>= node
.length()) {
1619 * return a contiguous ptr to whole bufferlist contents.
1621 char *buffer::list::c_str()
1623 if (_buffers
.empty())
1624 return 0; // no buffers
1626 auto iter
= std::cbegin(_buffers
);
1629 if (iter
!= std::cend(_buffers
)) {
1632 return _buffers
.front().c_str(); // good, we're already contiguous.
1635 string
buffer::list::to_str() const {
1637 s
.reserve(length());
1638 for (const auto& node
: _buffers
) {
1639 if (node
.length()) {
1640 s
.append(node
.c_str(), node
.length());
1646 void buffer::list::substr_of(const list
& other
, unsigned off
, unsigned len
)
1648 if (off
+ len
> other
.length())
1649 throw end_of_buffer();
1654 auto curbuf
= std::cbegin(other
._buffers
);
1655 while (off
> 0 && off
>= curbuf
->length()) {
1657 //cout << "skipping over " << *curbuf << std::endl;
1658 off
-= (*curbuf
).length();
1661 ceph_assert(len
== 0 || curbuf
!= std::cend(other
._buffers
));
1665 if (off
+ len
< curbuf
->length()) {
1666 //cout << "copying partial of " << *curbuf << std::endl;
1667 _buffers
.push_back(*ptr_node::create( *curbuf
, off
, len
).release());
1673 //cout << "copying end (all?) of " << *curbuf << std::endl;
1674 unsigned howmuch
= curbuf
->length() - off
;
1675 _buffers
.push_back(*ptr_node::create( *curbuf
, off
, howmuch
).release());
1684 void buffer::list::splice(unsigned off
, unsigned len
, list
*claim_by
/*, bufferlist& replace_with */)
1689 if (off
>= length())
1690 throw end_of_buffer();
1692 ceph_assert(len
> 0);
1693 //cout << "splice off " << off << " len " << len << " ... mylen = " << length() << std::endl;
1696 auto curbuf
= std::begin(_buffers
);
1697 auto curbuf_prev
= _buffers
.before_begin();
1699 ceph_assert(curbuf
!= std::end(_buffers
));
1700 if (off
>= (*curbuf
).length()) {
1702 //cout << "off = " << off << " skipping over " << *curbuf << std::endl;
1703 off
-= (*curbuf
).length();
1704 curbuf_prev
= curbuf
++;
1706 // somewhere in this buffer!
1707 //cout << "off = " << off << " somewhere in " << *curbuf << std::endl;
1713 // add a reference to the front bit
1714 // insert it before curbuf (which we'll hose)
1715 //cout << "keeping front " << off << " of " << *curbuf << std::endl;
1716 _buffers
.insert_after(curbuf_prev
,
1717 *ptr_node::create(*curbuf
, 0, off
).release());
1722 _carriage
= &always_empty_bptr
;
1726 if (off
+ len
< (*curbuf
).length()) {
1727 //cout << "keeping end of " << *curbuf << ", losing first " << off+len << std::endl;
1729 claim_by
->append( *curbuf
, off
, len
);
1730 (*curbuf
).set_offset( off
+len
+ (*curbuf
).offset() ); // ignore beginning big
1731 (*curbuf
).set_length( (*curbuf
).length() - (len
+off
) );
1733 //cout << " now " << *curbuf << std::endl;
1737 // hose though the end
1738 unsigned howmuch
= (*curbuf
).length() - off
;
1739 //cout << "discarding " << howmuch << " of " << *curbuf << std::endl;
1741 claim_by
->append( *curbuf
, off
, howmuch
);
1742 _len
-= (*curbuf
).length();
1743 curbuf
= _buffers
.erase_after_and_dispose(curbuf_prev
);
1748 // splice in *replace (implement me later?)
1750 last_p
= begin(); // just in case we were in the removed region.
1753 void buffer::list::write(int off
, int len
, std::ostream
& out
) const
1756 s
.substr_of(*this, off
, len
);
1757 for (const auto& node
: s
._buffers
) {
1758 if (node
.length()) {
1759 out
.write(node
.c_str(), node
.length());
1764 void buffer::list::encode_base64(buffer::list
& o
)
1766 bufferptr
bp(length() * 4 / 3 + 3);
1767 int l
= ceph_armor(bp
.c_str(), bp
.c_str() + bp
.length(), c_str(), c_str() + length());
1769 o
.push_back(std::move(bp
));
1772 void buffer::list::decode_base64(buffer::list
& e
)
1774 bufferptr
bp(4 + ((e
.length() * 3) / 4));
1775 int l
= ceph_unarmor(bp
.c_str(), bp
.c_str() + bp
.length(), e
.c_str(), e
.c_str() + e
.length());
1777 std::ostringstream oss
;
1778 oss
<< "decode_base64: decoding failed:\n";
1780 throw buffer::malformed_input(oss
.str().c_str());
1782 ceph_assert(l
<= (int)bp
.length());
1784 push_back(std::move(bp
));
1788 int buffer::list::read_file(const char *fn
, std::string
*error
)
1790 int fd
= TEMP_FAILURE_RETRY(::open(fn
, O_RDONLY
|O_CLOEXEC
));
1793 std::ostringstream oss
;
1794 oss
<< "can't open " << fn
<< ": " << cpp_strerror(err
);
1800 // FIPS zeroization audit 20191115: this memset is not security related.
1801 memset(&st
, 0, sizeof(st
));
1802 if (::fstat(fd
, &st
) < 0) {
1804 std::ostringstream oss
;
1805 oss
<< "bufferlist::read_file(" << fn
<< "): stat error: "
1806 << cpp_strerror(err
);
1808 VOID_TEMP_FAILURE_RETRY(::close(fd
));
1812 ssize_t ret
= read_fd(fd
, st
.st_size
);
1814 std::ostringstream oss
;
1815 oss
<< "bufferlist::read_file(" << fn
<< "): read error:"
1816 << cpp_strerror(ret
);
1818 VOID_TEMP_FAILURE_RETRY(::close(fd
));
1821 else if (ret
!= st
.st_size
) {
1823 // Perhaps the file changed between stat() and read()?
1824 std::ostringstream oss
;
1825 oss
<< "bufferlist::read_file(" << fn
<< "): warning: got premature EOF.";
1827 // not actually an error, but weird
1829 VOID_TEMP_FAILURE_RETRY(::close(fd
));
1833 ssize_t
buffer::list::read_fd(int fd
, size_t len
)
1835 auto bp
= ptr_node::create(buffer::create(len
));
1836 ssize_t ret
= safe_read(fd
, (void*)bp
->c_str(), len
);
1838 bp
->set_length(ret
);
1839 push_back(std::move(bp
));
1844 int buffer::list::write_file(const char *fn
, int mode
)
1846 int fd
= TEMP_FAILURE_RETRY(::open(fn
, O_WRONLY
|O_CREAT
|O_TRUNC
|O_CLOEXEC
, mode
));
1849 cerr
<< "bufferlist::write_file(" << fn
<< "): failed to open file: "
1850 << cpp_strerror(err
) << std::endl
;
1853 int ret
= write_fd(fd
);
1855 cerr
<< "bufferlist::write_fd(" << fn
<< "): write_fd error: "
1856 << cpp_strerror(ret
) << std::endl
;
1857 VOID_TEMP_FAILURE_RETRY(::close(fd
));
1860 if (TEMP_FAILURE_RETRY(::close(fd
))) {
1862 cerr
<< "bufferlist::write_file(" << fn
<< "): close error: "
1863 << cpp_strerror(err
) << std::endl
;
1869 static int do_writev(int fd
, struct iovec
*vec
, uint64_t offset
, unsigned veclen
, unsigned bytes
)
1874 r
= ::pwritev(fd
, vec
, veclen
, offset
);
1876 r
= ::lseek64(fd
, offset
, SEEK_SET
);
1880 r
= ::writev(fd
, vec
, veclen
);
1890 if (bytes
== 0) break;
1893 if (vec
[0].iov_len
<= (size_t)r
) {
1894 // drain this whole item
1895 r
-= vec
[0].iov_len
;
1899 vec
[0].iov_base
= (char *)vec
[0].iov_base
+ r
;
1900 vec
[0].iov_len
-= r
;
1908 int buffer::list::write_fd(int fd
) const
1915 auto p
= std::cbegin(_buffers
);
1916 while (p
!= std::cend(_buffers
)) {
1917 if (p
->length() > 0) {
1918 iov
[iovlen
].iov_base
= (void *)p
->c_str();
1919 iov
[iovlen
].iov_len
= p
->length();
1920 bytes
+= p
->length();
1925 if (iovlen
== IOV_MAX
||
1926 p
== _buffers
.end()) {
1931 wrote
= ::writev(fd
, start
, num
);
1938 if (wrote
< bytes
) {
1939 // partial write, recover!
1940 while ((size_t)wrote
>= start
[0].iov_len
) {
1941 wrote
-= start
[0].iov_len
;
1942 bytes
-= start
[0].iov_len
;
1947 start
[0].iov_len
-= wrote
;
1948 start
[0].iov_base
= (char *)start
[0].iov_base
+ wrote
;
1960 int buffer::list::write_fd(int fd
, uint64_t offset
) const
1964 auto p
= std::cbegin(_buffers
);
1965 uint64_t left_pbrs
= std::size(_buffers
);
1968 unsigned iovlen
= 0;
1969 uint64_t size
= std::min
<uint64_t>(left_pbrs
, IOV_MAX
);
1972 iov
[iovlen
].iov_base
= (void *)p
->c_str();
1973 iov
[iovlen
].iov_len
= p
->length();
1975 bytes
+= p
->length();
1980 int r
= do_writev(fd
, iov
, offset
, iovlen
, bytes
);
1988 __u32
buffer::list::crc32c(__u32 crc
) const
1990 int cache_misses
= 0;
1992 int cache_adjusts
= 0;
1994 for (const auto& node
: _buffers
) {
1995 if (node
.length()) {
1996 raw
* const r
= node
.get_raw();
1997 pair
<size_t, size_t> ofs(node
.offset(), node
.offset() + node
.length());
1998 pair
<uint32_t, uint32_t> ccrc
;
1999 if (r
->get_crc(ofs
, &ccrc
)) {
2000 if (ccrc
.first
== crc
) {
2005 /* If we have cached crc32c(buf, v) for initial value v,
2006 * we can convert this to a different initial value v' by:
2007 * crc32c(buf, v') = crc32c(buf, v) ^ adjustment
2008 * where adjustment = crc32c(0*len(buf), v ^ v')
2010 * http://crcutil.googlecode.com/files/crc-doc.1.0.pdf
2011 * note, u for our crc32c implementation is 0
2013 crc
= ccrc
.second
^ ceph_crc32c(ccrc
.first
^ crc
, NULL
, node
.length());
2018 uint32_t base
= crc
;
2019 crc
= ceph_crc32c(crc
, (unsigned char*)node
.c_str(), node
.length());
2020 r
->set_crc(ofs
, make_pair(base
, crc
));
2025 if (buffer_track_crc
) {
2027 buffer_cached_crc_adjusted
+= cache_adjusts
;
2029 buffer_cached_crc
+= cache_hits
;
2031 buffer_missed_crc
+= cache_misses
;
2037 void buffer::list::invalidate_crc()
2039 for (const auto& node
: _buffers
) {
2040 raw
* const r
= node
.get_raw();
2042 r
->invalidate_crc();
2047 #include "common/ceph_crypto.h"
2048 using ceph::crypto::SHA1
;
2050 sha1_digest_t
buffer::list::sha1()
2052 unsigned char fingerprint
[CEPH_CRYPTO_SHA1_DIGESTSIZE
];
2054 for (auto& p
: _buffers
) {
2055 sha1_gen
.Update((const unsigned char *)p
.c_str(), p
.length());
2057 sha1_gen
.Final(fingerprint
);
2058 return sha1_digest_t(fingerprint
);
2062 * Binary write all contents to a C++ stream
2064 void buffer::list::write_stream(std::ostream
&out
) const
2066 for (const auto& node
: _buffers
) {
2067 if (node
.length() > 0) {
2068 out
.write(node
.c_str(), node
.length());
2074 void buffer::list::hexdump(std::ostream
&out
, bool trailing_newline
) const
2079 std::ios_base::fmtflags original_flags
= out
.flags();
2081 // do our best to match the output of hexdump -C, for better
2084 out
.setf(std::ios::right
);
2088 bool was_zeros
= false, did_star
= false;
2089 for (unsigned o
=0; o
<length(); o
+= per
) {
2090 if (o
+ per
< length()) {
2091 bool row_is_zeros
= true;
2092 for (unsigned i
=0; i
<per
&& o
+i
<length(); i
++) {
2094 row_is_zeros
= false;
2113 out
<< std::hex
<< std::setw(8) << o
<< " ";
2116 for (i
=0; i
<per
&& o
+i
<length(); i
++) {
2119 out
<< " " << std::setw(2) << ((unsigned)(*this)[o
+i
] & 0xff);
2121 for (; i
<per
; i
++) {
2128 for (i
=0; i
<per
&& o
+i
<length(); i
++) {
2129 char c
= (*this)[o
+i
];
2130 if (isupper(c
) || islower(c
) || isdigit(c
) || c
== ' ' || ispunct(c
))
2135 out
<< '|' << std::dec
;
2137 if (trailing_newline
) {
2138 out
<< "\n" << std::hex
<< std::setw(8) << length();
2142 out
.flags(original_flags
);
2146 buffer::list
buffer::list::static_from_mem(char* c
, size_t l
) {
2148 bl
.push_back(ptr_node::create(create_static(l
, c
)));
2152 buffer::list
buffer::list::static_from_cstring(char* c
) {
2153 return static_from_mem(c
, std::strlen(c
));
2156 buffer::list
buffer::list::static_from_string(string
& s
) {
2157 // C++14 just has string::data return a char* from a non-const
2159 return static_from_mem(const_cast<char*>(s
.data()), s
.length());
2160 // But the way buffer::list mostly doesn't work in a sane way with
2161 // const makes me generally sad.
2164 bool buffer::ptr_node::dispose_if_hypercombined(
2165 buffer::ptr_node
* const delete_this
)
2167 const bool is_hypercombined
= static_cast<void*>(delete_this
) == \
2168 static_cast<void*>(&delete_this
->get_raw()->bptr_storage
);
2169 if (is_hypercombined
) {
2170 ceph_assert_always("hypercombining is currently disabled" == nullptr);
2171 delete_this
->~ptr_node();
2173 return is_hypercombined
;
2176 std::unique_ptr
<buffer::ptr_node
, buffer::ptr_node::disposer
>
2177 buffer::ptr_node::create_hypercombined(ceph::unique_leakable_ptr
<buffer::raw
> r
)
2179 // FIXME: we don't currently hypercombine buffers due to crashes
2180 // observed in the rados suite. After fixing we'll use placement
2181 // new to create ptr_node on buffer::raw::bptr_storage.
2182 return std::unique_ptr
<buffer::ptr_node
, buffer::ptr_node::disposer
>(
2183 new ptr_node(std::move(r
)));
2186 std::unique_ptr
<buffer::ptr_node
, buffer::ptr_node::disposer
>
2187 buffer::ptr_node::create_hypercombined(buffer::raw
* const r
)
2189 if (likely(r
->nref
== 0)) {
2190 // FIXME: we don't currently hypercombine buffers due to crashes
2191 // observed in the rados suite. After fixing we'll use placement
2192 // new to create ptr_node on buffer::raw::bptr_storage.
2193 return std::unique_ptr
<buffer::ptr_node
, buffer::ptr_node::disposer
>(
2196 return std::unique_ptr
<buffer::ptr_node
, buffer::ptr_node::disposer
>(
2201 buffer::ptr_node
* buffer::ptr_node::copy_hypercombined(
2202 const buffer::ptr_node
& copy_this
)
2204 // FIXME: we don't currently hypercombine buffers due to crashes
2205 // observed in the rados suite. After fixing we'll use placement
2206 // new to create ptr_node on buffer::raw::bptr_storage.
2207 auto raw_new
= copy_this
.get_raw()->clone();
2208 return new ptr_node(copy_this
, std::move(raw_new
));
2211 buffer::ptr_node
* buffer::ptr_node::cloner::operator()(
2212 const buffer::ptr_node
& clone_this
)
2214 const raw
* const raw_this
= clone_this
.get_raw();
2215 if (likely(!raw_this
|| raw_this
->is_shareable())) {
2216 return new ptr_node(clone_this
);
2218 // clone non-shareable buffers (make shareable)
2219 return copy_hypercombined(clone_this
);
2223 std::ostream
& buffer::operator<<(std::ostream
& out
, const buffer::raw
&r
) {
2224 return out
<< "buffer::raw(" << (void*)r
.data
<< " len " << r
.len
<< " nref " << r
.nref
.load() << ")";
2227 std::ostream
& buffer::operator<<(std::ostream
& out
, const buffer::ptr
& bp
) {
2229 out
<< "buffer::ptr(" << bp
.offset() << "~" << bp
.length()
2230 << " " << (void*)bp
.c_str()
2231 << " in raw " << (void*)bp
.raw_c_str()
2232 << " len " << bp
.raw_length()
2233 << " nref " << bp
.raw_nref() << ")";
2235 out
<< "buffer:ptr(" << bp
.offset() << "~" << bp
.length() << " no raw)";
2239 std::ostream
& buffer::operator<<(std::ostream
& out
, const buffer::list
& bl
) {
2240 out
<< "buffer::list(len=" << bl
.length() << "," << std::endl
;
2242 for (const auto& node
: bl
.buffers()) {
2243 out
<< "\t" << node
;
2244 if (&node
!= &bl
.buffers().back()) {
2245 out
<< "," << std::endl
;
2248 out
<< std::endl
<< ")";
2252 std::ostream
& buffer::operator<<(std::ostream
& out
, const buffer::error
& e
)
2254 return out
<< e
.what();
2257 MEMPOOL_DEFINE_OBJECT_FACTORY(buffer::raw_malloc
, buffer_raw_malloc
,
2259 MEMPOOL_DEFINE_OBJECT_FACTORY(buffer::raw_posix_aligned
,
2260 buffer_raw_posix_aligned
, buffer_meta
);
2261 MEMPOOL_DEFINE_OBJECT_FACTORY(buffer::raw_char
, buffer_raw_char
, buffer_meta
);
2262 MEMPOOL_DEFINE_OBJECT_FACTORY(buffer::raw_claimed_char
, buffer_raw_claimed_char
,
2264 MEMPOOL_DEFINE_OBJECT_FACTORY(buffer::raw_unshareable
, buffer_raw_unshareable
,
2266 MEMPOOL_DEFINE_OBJECT_FACTORY(buffer::raw_static
, buffer_raw_static
,