1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
4 * Ceph - scalable distributed file system
6 * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
22 #include "include/ceph_assert.h"
23 #include "include/types.h"
24 #include "include/buffer_raw.h"
25 #include "include/compat.h"
26 #include "include/mempool.h"
28 #include "common/environment.h"
29 #include "common/errno.h"
30 #include "common/error_code.h"
31 #include "common/safe_io.h"
32 #include "common/strtol.h"
33 #include "common/likely.h"
34 #include "common/valgrind.h"
35 #include "common/deleter.h"
36 #include "common/error_code.h"
37 #include "include/intarith.h"
38 #include "include/spinlock.h"
39 #include "include/scope_guard.h"
48 #define CEPH_BUFFER_ALLOC_UNIT 4096u
49 #define CEPH_BUFFER_APPEND_SIZE (CEPH_BUFFER_ALLOC_UNIT - sizeof(raw_combined))
51 // 256K is the maximum "small" object size in tcmalloc above which allocations come from
52 // the central heap. For now let's keep this below that threshold.
53 #define CEPH_BUFFER_ALLOC_UNIT_MAX std::size_t { 256*1024 }
56 static ceph::spinlock debug_lock
;
57 # define bdout { std::lock_guard<ceph::spinlock> lg(debug_lock); std::cout
58 # define bendl std::endl; }
60 # define bdout if (0) { std::cout
61 # define bendl std::endl; }
64 static ceph::atomic
<unsigned> buffer_cached_crc
{ 0 };
65 static ceph::atomic
<unsigned> buffer_cached_crc_adjusted
{ 0 };
66 static ceph::atomic
<unsigned> buffer_missed_crc
{ 0 };
68 static bool buffer_track_crc
= get_env_bool("CEPH_BUFFER_TRACK");
70 void buffer::track_cached_crc(bool b
) {
73 int buffer::get_cached_crc() {
74 return buffer_cached_crc
;
76 int buffer::get_cached_crc_adjusted() {
77 return buffer_cached_crc_adjusted
;
80 int buffer::get_missed_crc() {
81 return buffer_missed_crc
;
85 * raw_combined is always placed within a single allocation along
86 * with the data buffer. the data goes at the beginning, and
87 * raw_combined at the end.
89 class buffer::raw_combined
: public buffer::raw
{
92 raw_combined(char *dataptr
, unsigned l
, unsigned align
,
94 : raw(dataptr
, l
, mempool
),
97 raw
* clone_empty() override
{
98 return create(len
, alignment
).release();
101 static ceph::unique_leakable_ptr
<buffer::raw
>
104 int mempool
= mempool::mempool_buffer_anon
)
106 // posix_memalign() requires a multiple of sizeof(void *)
107 align
= std::max
<unsigned>(align
, sizeof(void *));
108 size_t rawlen
= round_up_to(sizeof(buffer::raw_combined
),
109 alignof(buffer::raw_combined
));
110 size_t datalen
= round_up_to(len
, alignof(buffer::raw_combined
));
113 char *ptr
= (char *) valloc(rawlen
+ datalen
);
116 int r
= ::posix_memalign((void**)(void*)&ptr
, align
, rawlen
+ datalen
);
123 // actual data first, since it has presumably larger alignment restriction
124 // then put the raw_combined at the end
125 return ceph::unique_leakable_ptr
<buffer::raw
>(
126 new (ptr
+ datalen
) raw_combined(ptr
, len
, align
, mempool
));
129 static void operator delete(void *ptr
) {
130 raw_combined
*raw
= (raw_combined
*)ptr
;
131 aligned_free((void *)raw
->data
);
135 class buffer::raw_malloc
: public buffer::raw
{
137 MEMPOOL_CLASS_HELPERS();
139 explicit raw_malloc(unsigned l
) : raw(l
) {
141 data
= (char *)malloc(len
);
147 bdout
<< "raw_malloc " << this << " alloc " << (void *)data
<< " " << l
<< bendl
;
149 raw_malloc(unsigned l
, char *b
) : raw(b
, l
) {
150 bdout
<< "raw_malloc " << this << " alloc " << (void *)data
<< " " << l
<< bendl
;
152 ~raw_malloc() override
{
154 bdout
<< "raw_malloc " << this << " free " << (void *)data
<< " " << bendl
;
156 raw
* clone_empty() override
{
157 return new raw_malloc(len
);
162 class buffer::raw_posix_aligned
: public buffer::raw
{
165 MEMPOOL_CLASS_HELPERS();
167 raw_posix_aligned(unsigned l
, unsigned _align
) : raw(l
) {
168 // posix_memalign() requires a multiple of sizeof(void *)
169 align
= std::max
<unsigned>(_align
, sizeof(void *));
171 data
= (char *) valloc(len
);
173 int r
= ::posix_memalign((void**)(void*)&data
, align
, len
);
179 bdout
<< "raw_posix_aligned " << this << " alloc " << (void *)data
180 << " l=" << l
<< ", align=" << align
<< bendl
;
182 ~raw_posix_aligned() override
{
184 bdout
<< "raw_posix_aligned " << this << " free " << (void *)data
<< bendl
;
186 raw
* clone_empty() override
{
187 return new raw_posix_aligned(len
, align
);
193 class buffer::raw_hack_aligned
: public buffer::raw
{
197 raw_hack_aligned(unsigned l
, unsigned _align
) : raw(l
) {
199 realdata
= new char[len
+align
-1];
200 unsigned off
= ((uintptr_t)realdata
) & (align
-1);
202 data
= realdata
+ align
- off
;
205 //cout << "hack aligned " << (unsigned)data
206 //<< " in raw " << (unsigned)realdata
207 //<< " off " << off << std::endl;
208 ceph_assert(((uintptr_t)data
& (align
-1)) == 0);
210 ~raw_hack_aligned() {
214 return new raw_hack_aligned(len
, align
);
220 * primitive buffer types
222 class buffer::raw_char
: public buffer::raw
{
224 MEMPOOL_CLASS_HELPERS();
226 explicit raw_char(unsigned l
) : raw(l
) {
228 data
= new char[len
];
231 bdout
<< "raw_char " << this << " alloc " << (void *)data
<< " " << l
<< bendl
;
233 raw_char(unsigned l
, char *b
) : raw(b
, l
) {
234 bdout
<< "raw_char " << this << " alloc " << (void *)data
<< " " << l
<< bendl
;
236 ~raw_char() override
{
238 bdout
<< "raw_char " << this << " free " << (void *)data
<< bendl
;
240 raw
* clone_empty() override
{
241 return new raw_char(len
);
245 class buffer::raw_claimed_char
: public buffer::raw
{
247 MEMPOOL_CLASS_HELPERS();
249 explicit raw_claimed_char(unsigned l
, char *b
) : raw(b
, l
) {
250 bdout
<< "raw_claimed_char " << this << " alloc " << (void *)data
251 << " " << l
<< bendl
;
253 ~raw_claimed_char() override
{
254 bdout
<< "raw_claimed_char " << this << " free " << (void *)data
257 raw
* clone_empty() override
{
258 return new raw_char(len
);
262 class buffer::raw_static
: public buffer::raw
{
264 MEMPOOL_CLASS_HELPERS();
266 raw_static(const char *d
, unsigned l
) : raw((char*)d
, l
) { }
267 ~raw_static() override
{}
268 raw
* clone_empty() override
{
269 return new buffer::raw_char(len
);
273 class buffer::raw_claim_buffer
: public buffer::raw
{
276 raw_claim_buffer(const char *b
, unsigned l
, deleter d
)
277 : raw((char*)b
, l
), del(std::move(d
)) { }
278 ~raw_claim_buffer() override
{}
279 raw
* clone_empty() override
{
280 return new buffer::raw_char(len
);
284 ceph::unique_leakable_ptr
<buffer::raw
> buffer::copy(const char *c
, unsigned len
) {
285 auto r
= buffer::create_aligned(len
, sizeof(size_t));
286 memcpy(r
->get_data(), c
, len
);
290 ceph::unique_leakable_ptr
<buffer::raw
> buffer::create(unsigned len
) {
291 return buffer::create_aligned(len
, sizeof(size_t));
293 ceph::unique_leakable_ptr
<buffer::raw
> buffer::create(unsigned len
, char c
) {
294 auto ret
= buffer::create_aligned(len
, sizeof(size_t));
295 memset(ret
->get_data(), c
, len
);
298 ceph::unique_leakable_ptr
<buffer::raw
>
299 buffer::create_in_mempool(unsigned len
, int mempool
) {
300 return buffer::create_aligned_in_mempool(len
, sizeof(size_t), mempool
);
302 ceph::unique_leakable_ptr
<buffer::raw
>
303 buffer::claim_char(unsigned len
, char *buf
) {
304 return ceph::unique_leakable_ptr
<buffer::raw
>(
305 new raw_claimed_char(len
, buf
));
307 ceph::unique_leakable_ptr
<buffer::raw
> buffer::create_malloc(unsigned len
) {
308 return ceph::unique_leakable_ptr
<buffer::raw
>(new raw_malloc(len
));
310 ceph::unique_leakable_ptr
<buffer::raw
>
311 buffer::claim_malloc(unsigned len
, char *buf
) {
312 return ceph::unique_leakable_ptr
<buffer::raw
>(new raw_malloc(len
, buf
));
314 ceph::unique_leakable_ptr
<buffer::raw
>
315 buffer::create_static(unsigned len
, char *buf
) {
316 return ceph::unique_leakable_ptr
<buffer::raw
>(new raw_static(buf
, len
));
318 ceph::unique_leakable_ptr
<buffer::raw
>
319 buffer::claim_buffer(unsigned len
, char *buf
, deleter del
) {
320 return ceph::unique_leakable_ptr
<buffer::raw
>(
321 new raw_claim_buffer(buf
, len
, std::move(del
)));
324 ceph::unique_leakable_ptr
<buffer::raw
> buffer::create_aligned_in_mempool(
325 unsigned len
, unsigned align
, int mempool
)
327 // If alignment is a page multiple, use a separate buffer::raw to
328 // avoid fragmenting the heap.
330 // Somewhat unexpectedly, I see consistently better performance
331 // from raw_combined than from raw even when the allocation size is
332 // a page multiple (but alignment is not).
334 // I also see better performance from a separate buffer::raw once the
336 if ((align
& ~CEPH_PAGE_MASK
) == 0 ||
337 len
>= CEPH_PAGE_SIZE
* 2) {
339 return ceph::unique_leakable_ptr
<buffer::raw
>(new raw_posix_aligned(len
, align
));
341 return ceph::unique_leakable_ptr
<buffer::raw
>(new raw_hack_aligned(len
, align
));
344 return raw_combined::create(len
, align
, mempool
);
346 ceph::unique_leakable_ptr
<buffer::raw
> buffer::create_aligned(
347 unsigned len
, unsigned align
) {
348 return create_aligned_in_mempool(len
, align
,
349 mempool::mempool_buffer_anon
);
352 ceph::unique_leakable_ptr
<buffer::raw
> buffer::create_page_aligned(unsigned len
) {
353 return create_aligned(len
, CEPH_PAGE_SIZE
);
355 ceph::unique_leakable_ptr
<buffer::raw
> buffer::create_small_page_aligned(unsigned len
) {
356 if (len
< CEPH_PAGE_SIZE
) {
357 return create_aligned(len
, CEPH_BUFFER_ALLOC_UNIT
);
359 return create_aligned(len
, CEPH_PAGE_SIZE
);
363 buffer::ptr::ptr(ceph::unique_leakable_ptr
<raw
> r
)
366 _len(_raw
->get_len())
368 _raw
->nref
.store(1, std::memory_order_release
);
369 bdout
<< "ptr " << this << " get " << _raw
<< bendl
;
371 buffer::ptr::ptr(unsigned l
) : _off(0), _len(l
)
373 _raw
= buffer::create(l
).release();
374 _raw
->nref
.store(1, std::memory_order_release
);
375 bdout
<< "ptr " << this << " get " << _raw
<< bendl
;
377 buffer::ptr::ptr(const char *d
, unsigned l
) : _off(0), _len(l
) // ditto.
379 _raw
= buffer::copy(d
, l
).release();
380 _raw
->nref
.store(1, std::memory_order_release
);
381 bdout
<< "ptr " << this << " get " << _raw
<< bendl
;
383 buffer::ptr::ptr(const ptr
& p
) : _raw(p
._raw
), _off(p
._off
), _len(p
._len
)
387 bdout
<< "ptr " << this << " get " << _raw
<< bendl
;
390 buffer::ptr::ptr(ptr
&& p
) noexcept
: _raw(p
._raw
), _off(p
._off
), _len(p
._len
)
395 buffer::ptr::ptr(const ptr
& p
, unsigned o
, unsigned l
)
396 : _raw(p
._raw
), _off(p
._off
+ o
), _len(l
)
398 ceph_assert(o
+l
<= p
._len
);
401 bdout
<< "ptr " << this << " get " << _raw
<< bendl
;
403 buffer::ptr::ptr(const ptr
& p
, ceph::unique_leakable_ptr
<raw
> r
)
408 _raw
->nref
.store(1, std::memory_order_release
);
409 bdout
<< "ptr " << this << " get " << _raw
<< bendl
;
411 buffer::ptr
& buffer::ptr::operator= (const ptr
& p
)
415 bdout
<< "ptr " << this << " get " << _raw
<< bendl
;
417 buffer::raw
*raw
= p
._raw
;
428 buffer::ptr
& buffer::ptr::operator= (ptr
&& p
) noexcept
431 buffer::raw
*raw
= p
._raw
;
444 ceph::unique_leakable_ptr
<buffer::raw
> buffer::ptr::clone()
446 return _raw
->clone();
449 void buffer::ptr::swap(ptr
& other
) noexcept
462 void buffer::ptr::release()
464 // BE CAREFUL: this is called also for hypercombined ptr_node. After
465 // freeing underlying raw, `*this` can become inaccessible as well!
467 // cache the pointer to avoid unncecessary reloads and repeated
469 if (auto* const cached_raw
= std::exchange(_raw
, nullptr);
471 bdout
<< "ptr " << this << " release " << cached_raw
<< bendl
;
472 // optimize the common case where a particular `buffer::raw` has
473 // only a single reference. Altogether with initializing `nref` of
474 // freshly fabricated one with `1` through the std::atomic's ctor
475 // (which doesn't impose a memory barrier on the strongly-ordered
476 // x86), this allows to avoid all atomical operations in such case.
477 const bool last_one
= \
478 (1 == cached_raw
->nref
.load(std::memory_order_acquire
));
479 if (likely(last_one
) || --cached_raw
->nref
== 0) {
480 bdout
<< "deleting raw " << static_cast<void*>(cached_raw
)
481 << " len " << cached_raw
->get_len() << bendl
;
482 ANNOTATE_HAPPENS_AFTER(&cached_raw
->nref
);
483 ANNOTATE_HAPPENS_BEFORE_FORGET_ALL(&cached_raw
->nref
);
484 delete cached_raw
; // dealloc old (if any)
486 ANNOTATE_HAPPENS_BEFORE(&cached_raw
->nref
);
491 int buffer::ptr::get_mempool() const {
493 return _raw
->mempool
;
495 return mempool::mempool_buffer_anon
;
498 void buffer::ptr::reassign_to_mempool(int pool
) {
500 _raw
->reassign_to_mempool(pool
);
503 void buffer::ptr::try_assign_to_mempool(int pool
) {
505 _raw
->try_assign_to_mempool(pool
);
509 const char *buffer::ptr::c_str() const {
511 return _raw
->get_data() + _off
;
513 char *buffer::ptr::c_str() {
515 return _raw
->get_data() + _off
;
517 const char *buffer::ptr::end_c_str() const {
519 return _raw
->get_data() + _off
+ _len
;
521 char *buffer::ptr::end_c_str() {
523 return _raw
->get_data() + _off
+ _len
;
526 unsigned buffer::ptr::unused_tail_length() const
528 return _raw
? _raw
->get_len() - (_off
+ _len
) : 0;
530 const char& buffer::ptr::operator[](unsigned n
) const
533 ceph_assert(n
< _len
);
534 return _raw
->get_data()[_off
+ n
];
536 char& buffer::ptr::operator[](unsigned n
)
539 ceph_assert(n
< _len
);
540 return _raw
->get_data()[_off
+ n
];
543 const char *buffer::ptr::raw_c_str() const { ceph_assert(_raw
); return _raw
->get_data(); }
544 unsigned buffer::ptr::raw_length() const { ceph_assert(_raw
); return _raw
->get_len(); }
545 int buffer::ptr::raw_nref() const { ceph_assert(_raw
); return _raw
->nref
; }
547 void buffer::ptr::copy_out(unsigned o
, unsigned l
, char *dest
) const {
550 throw end_of_buffer();
551 char* src
= _raw
->get_data() + _off
+ o
;
552 maybe_inline_memcpy(dest
, src
, l
, 8);
555 unsigned buffer::ptr::wasted() const
557 return _raw
->get_len() - _len
;
560 int buffer::ptr::cmp(const ptr
& o
) const
562 int l
= _len
< o
._len
? _len
: o
._len
;
564 int r
= memcmp(c_str(), o
.c_str(), l
);
575 bool buffer::ptr::is_zero() const
577 return mem_is_zero(c_str(), _len
);
580 unsigned buffer::ptr::append(char c
)
583 ceph_assert(1 <= unused_tail_length());
584 char* ptr
= _raw
->get_data() + _off
+ _len
;
590 unsigned buffer::ptr::append(const char *p
, unsigned l
)
593 ceph_assert(l
<= unused_tail_length());
594 char* c
= _raw
->get_data() + _off
+ _len
;
595 maybe_inline_memcpy(c
, p
, l
, 32);
600 unsigned buffer::ptr::append_zeros(unsigned l
)
603 ceph_assert(l
<= unused_tail_length());
604 char* c
= _raw
->get_data() + _off
+ _len
;
605 // FIPS zeroization audit 20191115: this memset is not security related.
611 void buffer::ptr::copy_in(unsigned o
, unsigned l
, const char *src
, bool crc_reset
)
614 ceph_assert(o
<= _len
);
615 ceph_assert(o
+l
<= _len
);
616 char* dest
= _raw
->get_data() + _off
+ o
;
618 _raw
->invalidate_crc();
619 maybe_inline_memcpy(dest
, src
, l
, 64);
622 void buffer::ptr::zero(bool crc_reset
)
625 _raw
->invalidate_crc();
626 // FIPS zeroization audit 20191115: this memset is not security related.
627 memset(c_str(), 0, _len
);
630 void buffer::ptr::zero(unsigned o
, unsigned l
, bool crc_reset
)
632 ceph_assert(o
+l
<= _len
);
634 _raw
->invalidate_crc();
635 // FIPS zeroization audit 20191115: this memset is not security related.
636 memset(c_str()+o
, 0, l
);
640 buffer::ptr::iterator_impl
<B
>& buffer::ptr::iterator_impl
<B
>::operator +=(size_t len
) {
643 throw end_of_buffer();
647 template buffer::ptr::iterator_impl
<false>&
648 buffer::ptr::iterator_impl
<false>::operator +=(size_t len
);
649 template buffer::ptr::iterator_impl
<true>&
650 buffer::ptr::iterator_impl
<true>::operator +=(size_t len
);
652 // -- buffer::list::iterator --
654 buffer::list::iterator operator=(const buffer::list::iterator& other)
656 if (this != &other) {
666 template<bool is_const
>
667 buffer::list::iterator_impl
<is_const
>::iterator_impl(bl_t
*l
, unsigned o
)
668 : bl(l
), ls(&bl
->_buffers
), p(ls
->begin()), off(0), p_off(0)
673 template<bool is_const
>
674 buffer::list::iterator_impl
<is_const
>::iterator_impl(const buffer::list::iterator
& i
)
675 : iterator_impl
<is_const
>(i
.bl
, i
.off
, i
.p
, i
.p_off
) {}
677 template<bool is_const
>
678 auto buffer::list::iterator_impl
<is_const
>::operator +=(unsigned o
)
681 //cout << this << " advance " << o << " from " << off
682 // << " (p_off " << p_off << " in " << p->length() << ")"
686 while (p
!= ls
->end()) {
687 if (p_off
>= p
->length()) {
689 p_off
-= p
->length();
692 // somewhere in this buffer!
696 if (p
== ls
->end() && p_off
) {
697 throw end_of_buffer();
703 template<bool is_const
>
704 void buffer::list::iterator_impl
<is_const
>::seek(unsigned o
)
711 template<bool is_const
>
712 char buffer::list::iterator_impl
<is_const
>::operator*() const
715 throw end_of_buffer();
719 template<bool is_const
>
720 buffer::list::iterator_impl
<is_const
>&
721 buffer::list::iterator_impl
<is_const
>::operator++()
724 throw end_of_buffer();
729 template<bool is_const
>
730 buffer::ptr
buffer::list::iterator_impl
<is_const
>::get_current_ptr() const
733 throw end_of_buffer();
734 return ptr(*p
, p_off
, p
->length() - p_off
);
737 template<bool is_const
>
738 bool buffer::list::iterator_impl
<is_const
>::is_pointing_same_raw(
739 const ptr
& other
) const
742 throw end_of_buffer();
743 return p
->_raw
== other
._raw
;
747 // note that these all _append_ to dest!
748 template<bool is_const
>
749 void buffer::list::iterator_impl
<is_const
>::copy(unsigned len
, char *dest
)
751 if (p
== ls
->end()) seek(off
);
754 throw end_of_buffer();
756 unsigned howmuch
= p
->length() - p_off
;
757 if (len
< howmuch
) howmuch
= len
;
758 p
->copy_out(p_off
, howmuch
, dest
);
766 template<bool is_const
>
767 void buffer::list::iterator_impl
<is_const
>::copy(unsigned len
, ptr
&dest
)
769 copy_deep(len
, dest
);
772 template<bool is_const
>
773 void buffer::list::iterator_impl
<is_const
>::copy_deep(unsigned len
, ptr
&dest
)
779 throw end_of_buffer();
781 copy(len
, dest
.c_str());
783 template<bool is_const
>
784 void buffer::list::iterator_impl
<is_const
>::copy_shallow(unsigned len
,
791 throw end_of_buffer();
792 unsigned howmuch
= p
->length() - p_off
;
795 copy(len
, dest
.c_str());
797 dest
= ptr(*p
, p_off
, len
);
802 template<bool is_const
>
803 void buffer::list::iterator_impl
<is_const
>::copy(unsigned len
, list
&dest
)
809 throw end_of_buffer();
811 unsigned howmuch
= p
->length() - p_off
;
814 dest
.append(*p
, p_off
, howmuch
);
821 template<bool is_const
>
822 void buffer::list::iterator_impl
<is_const
>::copy(unsigned len
, std::string
&dest
)
828 throw end_of_buffer();
830 unsigned howmuch
= p
->length() - p_off
;
831 const char *c_str
= p
->c_str();
834 dest
.append(c_str
+ p_off
, howmuch
);
841 template<bool is_const
>
842 void buffer::list::iterator_impl
<is_const
>::copy_all(list
&dest
)
850 unsigned howmuch
= p
->length() - p_off
;
851 const char *c_str
= p
->c_str();
852 dest
.append(c_str
+ p_off
, howmuch
);
858 template<bool is_const
>
859 size_t buffer::list::iterator_impl
<is_const
>::get_ptr_and_advance(
860 size_t want
, const char **data
)
862 if (p
== ls
->end()) {
864 if (p
== ls
->end()) {
868 *data
= p
->c_str() + p_off
;
869 size_t l
= std::min
<size_t>(p
->length() - p_off
, want
);
871 if (p_off
== p
->length()) {
879 template<bool is_const
>
880 uint32_t buffer::list::iterator_impl
<is_const
>::crc32c(
881 size_t length
, uint32_t crc
)
883 length
= std::min
<size_t>(length
, get_remaining());
886 size_t l
= get_ptr_and_advance(length
, &p
);
887 crc
= ceph_crc32c(crc
, (unsigned char*)p
, l
);
893 // explicitly instantiate only the iterator types we need, so we can hide the
894 // details in this compilation unit without introducing unnecessary link time
896 template class buffer::list::iterator_impl
<true>;
897 template class buffer::list::iterator_impl
<false>;
899 buffer::list::iterator::iterator(bl_t
*l
, unsigned o
)
900 : iterator_impl(l
, o
)
903 buffer::list::iterator::iterator(bl_t
*l
, unsigned o
, list_iter_t ip
, unsigned po
)
904 : iterator_impl(l
, o
, ip
, po
)
908 void buffer::list::iterator::copy_in(unsigned len
, const char *src
, bool crc_reset
)
915 throw end_of_buffer();
917 unsigned howmuch
= p
->length() - p_off
;
920 p
->copy_in(p_off
, howmuch
, src
, crc_reset
);
928 void buffer::list::iterator::copy_in(unsigned len
, const list
& otherl
)
933 for (const auto& node
: otherl
._buffers
) {
934 unsigned l
= node
.length();
937 copy_in(l
, node
.c_str());
944 // -- buffer::list --
946 void buffer::list::swap(list
& other
) noexcept
948 std::swap(_len
, other
._len
);
949 std::swap(_num
, other
._num
);
950 std::swap(_carriage
, other
._carriage
);
951 _buffers
.swap(other
._buffers
);
954 bool buffer::list::contents_equal(const ceph::buffer::list
& other
) const
956 if (length() != other
.length())
959 // buffer-wise comparison
961 auto a
= std::cbegin(_buffers
);
962 auto b
= std::cbegin(other
._buffers
);
963 unsigned aoff
= 0, boff
= 0;
964 while (a
!= std::cend(_buffers
)) {
965 unsigned len
= a
->length() - aoff
;
966 if (len
> b
->length() - boff
)
967 len
= b
->length() - boff
;
968 if (memcmp(a
->c_str() + aoff
, b
->c_str() + boff
, len
) != 0)
971 if (aoff
== a
->length()) {
976 if (boff
== b
->length()) {
984 // byte-wise comparison
986 bufferlist::const_iterator me
= begin();
987 bufferlist::const_iterator him
= other
.begin();
998 bool buffer::list::contents_equal(const void* const other
,
1001 if (this->length() != length
) {
1005 const auto* other_buf
= reinterpret_cast<const char*>(other
);
1006 for (const auto& bp
: buffers()) {
1007 assert(bp
.length() <= length
);
1008 if (std::memcmp(bp
.c_str(), other_buf
, bp
.length()) != 0) {
1011 length
-= bp
.length();
1012 other_buf
+= bp
.length();
1019 bool buffer::list::is_provided_buffer(const char* const dst
) const
1021 if (_buffers
.empty()) {
1024 return (is_contiguous() && (_buffers
.front().c_str() == dst
));
1027 bool buffer::list::is_aligned(const unsigned align
) const
1029 for (const auto& node
: _buffers
) {
1030 if (!node
.is_aligned(align
)) {
1037 bool buffer::list::is_n_align_sized(const unsigned align
) const
1039 for (const auto& node
: _buffers
) {
1040 if (!node
.is_n_align_sized(align
)) {
1047 bool buffer::list::is_aligned_size_and_memory(
1048 const unsigned align_size
,
1049 const unsigned align_memory
) const
1051 for (const auto& node
: _buffers
) {
1052 if (!node
.is_aligned(align_memory
) || !node
.is_n_align_sized(align_size
)) {
1059 bool buffer::list::is_zero() const {
1060 for (const auto& node
: _buffers
) {
1061 if (!node
.is_zero()) {
1068 void buffer::list::zero()
1070 for (auto& node
: _buffers
) {
1075 void buffer::list::zero(const unsigned o
, const unsigned l
)
1077 ceph_assert(o
+l
<= _len
);
1079 for (auto& node
: _buffers
) {
1080 if (p
+ node
.length() > o
) {
1081 if (p
>= o
&& p
+node
.length() <= o
+l
) {
1082 // 'o'------------- l -----------|
1083 // 'p'-- node.length() --|
1085 } else if (p
>= o
) {
1086 // 'o'------------- l -----------|
1087 // 'p'------- node.length() -------|
1088 node
.zero(0, o
+l
-p
);
1089 } else if (p
+ node
.length() <= o
+l
) {
1090 // 'o'------------- l -----------|
1091 // 'p'------- node.length() -------|
1092 node
.zero(o
-p
, node
.length()-(o
-p
));
1094 // 'o'----------- l -----------|
1095 // 'p'---------- node.length() ----------|
1106 bool buffer::list::is_contiguous() const
1111 bool buffer::list::is_n_page_sized() const
1113 return is_n_align_sized(CEPH_PAGE_SIZE
);
1116 bool buffer::list::is_page_aligned() const
1118 return is_aligned(CEPH_PAGE_SIZE
);
1121 int buffer::list::get_mempool() const
1123 if (_buffers
.empty()) {
1124 return mempool::mempool_buffer_anon
;
1126 return _buffers
.back().get_mempool();
1129 void buffer::list::reassign_to_mempool(int pool
)
1131 for (auto& p
: _buffers
) {
1132 p
._raw
->reassign_to_mempool(pool
);
1136 void buffer::list::try_assign_to_mempool(int pool
)
1138 for (auto& p
: _buffers
) {
1139 p
._raw
->try_assign_to_mempool(pool
);
1143 uint64_t buffer::list::get_wasted_space() const
1146 return _buffers
.back().wasted();
1148 std::vector
<const raw
*> raw_vec
;
1149 raw_vec
.reserve(_num
);
1150 for (const auto& p
: _buffers
)
1151 raw_vec
.push_back(p
._raw
);
1152 std::sort(raw_vec
.begin(), raw_vec
.end());
1155 const raw
*last
= nullptr;
1156 for (const auto r
: raw_vec
) {
1160 total
+= r
->get_len();
1162 // If multiple buffers are sharing the same raw buffer and they overlap
1163 // with each other, the wasted space will be underestimated.
1164 if (total
<= length())
1166 return total
- length();
1169 void buffer::list::rebuild()
1172 _carriage
= &always_empty_bptr
;
1173 _buffers
.clear_and_dispose();
1177 if ((_len
& ~CEPH_PAGE_MASK
) == 0)
1178 rebuild(ptr_node::create(buffer::create_page_aligned(_len
)));
1180 rebuild(ptr_node::create(buffer::create(_len
)));
1183 void buffer::list::rebuild(
1184 std::unique_ptr
<buffer::ptr_node
, buffer::ptr_node::disposer
> nb
)
1187 int mempool
= _buffers
.front().get_mempool();
1188 nb
->reassign_to_mempool(mempool
);
1189 for (auto& node
: _buffers
) {
1190 nb
->copy_in(pos
, node
.length(), node
.c_str(), false);
1191 pos
+= node
.length();
1193 _buffers
.clear_and_dispose();
1194 if (likely(nb
->length())) {
1195 _carriage
= nb
.get();
1196 _buffers
.push_back(*nb
.release());
1199 _carriage
= &always_empty_bptr
;
1205 bool buffer::list::rebuild_aligned(unsigned align
)
1207 return rebuild_aligned_size_and_memory(align
, align
);
1210 bool buffer::list::rebuild_aligned_size_and_memory(unsigned align_size
,
1211 unsigned align_memory
,
1212 unsigned max_buffers
)
1214 bool had_to_rebuild
= false;
1216 if (max_buffers
&& _num
> max_buffers
&& _len
> (max_buffers
* align_size
)) {
1217 align_size
= round_up_to(round_up_to(_len
, max_buffers
) / max_buffers
, align_size
);
1219 auto p
= std::begin(_buffers
);
1220 auto p_prev
= _buffers
.before_begin();
1221 while (p
!= std::end(_buffers
)) {
1222 // keep anything that's already align and sized aligned
1223 if (p
->is_aligned(align_memory
) && p
->is_n_align_sized(align_size
)) {
1224 /*cout << " segment " << (void*)p->c_str()
1225 << " offset " << ((unsigned long)p->c_str() & (align - 1))
1226 << " length " << p->length()
1227 << " " << (p->length() & (align - 1)) << " ok" << std::endl;
1233 // consolidate unaligned items, until we get something that is sized+aligned
1235 unsigned offset
= 0;
1237 /*cout << " segment " << (void*)p->c_str()
1238 << " offset " << ((unsigned long)p->c_str() & (align - 1))
1239 << " length " << p->length() << " " << (p->length() & (align - 1))
1240 << " overall offset " << offset << " " << (offset & (align - 1))
1241 << " not ok" << std::endl;
1243 offset
+= p
->length();
1244 // no need to reallocate, relinking is enough thankfully to bi::list.
1245 auto p_after
= _buffers
.erase_after(p_prev
);
1247 unaligned
._buffers
.push_back(*p
);
1248 unaligned
._len
+= p
->length();
1249 unaligned
._num
+= 1;
1251 } while (p
!= std::end(_buffers
) &&
1252 (!p
->is_aligned(align_memory
) ||
1253 !p
->is_n_align_sized(align_size
) ||
1254 (offset
% align_size
)));
1255 if (!(unaligned
.is_contiguous() && unaligned
._buffers
.front().is_aligned(align_memory
))) {
1258 buffer::create_aligned(unaligned
._len
, align_memory
)));
1259 had_to_rebuild
= true;
1261 if (unaligned
.get_num_buffers()) {
1262 _buffers
.insert_after(p_prev
, *ptr_node::create(unaligned
._buffers
.front()).release());
1265 // a bufferlist containing only 0-length bptrs is rebuilt as empty
1269 return had_to_rebuild
;
1272 bool buffer::list::rebuild_page_aligned()
1274 return rebuild_aligned(CEPH_PAGE_SIZE
);
1277 void buffer::list::reserve(size_t prealloc
)
1279 if (get_append_buffer_unused_tail_length() < prealloc
) {
1280 auto ptr
= ptr_node::create(buffer::create_small_page_aligned(prealloc
));
1281 ptr
->set_length(0); // unused, so far.
1282 _carriage
= ptr
.get();
1283 _buffers
.push_back(*ptr
.release());
1288 void buffer::list::claim_append(list
& bl
)
1290 // steal the other guy's buffers
1293 _buffers
.splice_back(bl
._buffers
);
1297 void buffer::list::append(char c
)
1299 // put what we can into the existing append_buffer.
1300 unsigned gap
= get_append_buffer_unused_tail_length();
1302 // make a new buffer!
1303 auto buf
= ptr_node::create(
1304 raw_combined::create(CEPH_BUFFER_APPEND_SIZE
, 0, get_mempool()));
1305 buf
->set_length(0); // unused, so far.
1306 _carriage
= buf
.get();
1307 _buffers
.push_back(*buf
.release());
1309 } else if (unlikely(_carriage
!= &_buffers
.back())) {
1310 auto bptr
= ptr_node::create(*_carriage
, _carriage
->length(), 0);
1311 _carriage
= bptr
.get();
1312 _buffers
.push_back(*bptr
.release());
1315 _carriage
->append(c
);
1319 buffer::ptr_node
buffer::list::always_empty_bptr
;
1321 buffer::ptr_node
& buffer::list::refill_append_space(const unsigned len
)
1323 // make a new buffer. fill out a complete page, factoring in the
1324 // raw_combined overhead.
1325 size_t need
= round_up_to(len
, sizeof(size_t)) + sizeof(raw_combined
);
1326 size_t alen
= round_up_to(need
, CEPH_BUFFER_ALLOC_UNIT
);
1327 if (_carriage
== &_buffers
.back()) {
1328 size_t nlen
= round_up_to(_carriage
->raw_length(), CEPH_BUFFER_ALLOC_UNIT
) * 2;
1329 nlen
= std::min(nlen
, CEPH_BUFFER_ALLOC_UNIT_MAX
);
1330 alen
= std::max(alen
, nlen
);
1332 alen
-= sizeof(raw_combined
);
1335 ptr_node::create(raw_combined::create(alen
, 0, get_mempool()));
1336 new_back
->set_length(0); // unused, so far.
1337 _carriage
= new_back
.get();
1338 _buffers
.push_back(*new_back
.release());
1340 return _buffers
.back();
1343 void buffer::list::append(const char *data
, unsigned len
)
1347 const unsigned free_in_last
= get_append_buffer_unused_tail_length();
1348 const unsigned first_round
= std::min(len
, free_in_last
);
1350 // _buffers and carriage can desynchronize when 1) a new ptr
1351 // we don't own has been added into the _buffers 2) _buffers
1352 // has been emptied as as a result of std::move or stolen by
1354 if (unlikely(_carriage
!= &_buffers
.back())) {
1355 auto bptr
= ptr_node::create(*_carriage
, _carriage
->length(), 0);
1356 _carriage
= bptr
.get();
1357 _buffers
.push_back(*bptr
.release());
1360 _carriage
->append(data
, first_round
);
1363 const unsigned second_round
= len
- first_round
;
1365 auto& new_back
= refill_append_space(second_round
);
1366 new_back
.append(data
+ first_round
, second_round
);
1370 buffer::list::reserve_t
buffer::list::obtain_contiguous_space(
1373 // note: if len < the normal append_buffer size it *might*
1374 // be better to allocate a normal-sized append_buffer and
1375 // use part of it. however, that optimizes for the case of
1376 // old-style types including new-style types. and in most
1377 // such cases, this won't be the very first thing encoded to
1378 // the list, so append_buffer will already be allocated.
1379 // OTOH if everything is new-style, we *should* allocate
1380 // only what we need and conserve memory.
1381 if (unlikely(get_append_buffer_unused_tail_length() < len
)) {
1383 buffer::ptr_node::create(buffer::create(len
)).release();
1384 new_back
->set_length(0); // unused, so far.
1385 _buffers
.push_back(*new_back
);
1387 _carriage
= new_back
;
1388 return { new_back
->c_str(), &new_back
->_len
, &_len
};
1390 ceph_assert(!_buffers
.empty());
1391 if (unlikely(_carriage
!= &_buffers
.back())) {
1392 auto bptr
= ptr_node::create(*_carriage
, _carriage
->length(), 0);
1393 _carriage
= bptr
.get();
1394 _buffers
.push_back(*bptr
.release());
1397 return { _carriage
->end_c_str(), &_carriage
->_len
, &_len
};
1401 void buffer::list::append(const ptr
& bp
)
1406 void buffer::list::append(ptr
&& bp
)
1408 push_back(std::move(bp
));
1411 void buffer::list::append(const ptr
& bp
, unsigned off
, unsigned len
)
1413 ceph_assert(len
+off
<= bp
.length());
1414 if (!_buffers
.empty()) {
1415 ptr
&l
= _buffers
.back();
1416 if (l
._raw
== bp
._raw
&& l
.end() == bp
.start() + off
) {
1417 // yay contiguous with tail bp!
1418 l
.set_length(l
.length()+len
);
1423 // add new item to list
1424 _buffers
.push_back(*ptr_node::create(bp
, off
, len
).release());
1429 void buffer::list::append(const list
& bl
)
1433 for (const auto& node
: bl
._buffers
) {
1434 _buffers
.push_back(*ptr_node::create(node
).release());
1438 void buffer::list::append(std::istream
& in
)
1443 append(s
.c_str(), s
.length());
1449 buffer::list::contiguous_filler
buffer::list::append_hole(const unsigned len
)
1453 if (unlikely(get_append_buffer_unused_tail_length() < len
)) {
1454 // make a new append_buffer. fill out a complete page, factoring in
1455 // the raw_combined overhead.
1456 auto& new_back
= refill_append_space(len
);
1457 new_back
.set_length(len
);
1458 return { new_back
.c_str() };
1459 } else if (unlikely(_carriage
!= &_buffers
.back())) {
1460 auto bptr
= ptr_node::create(*_carriage
, _carriage
->length(), 0);
1461 _carriage
= bptr
.get();
1462 _buffers
.push_back(*bptr
.release());
1465 _carriage
->set_length(_carriage
->length() + len
);
1466 return { _carriage
->end_c_str() - len
};
1469 void buffer::list::prepend_zero(unsigned len
)
1471 auto bp
= ptr_node::create(len
);
1475 _buffers
.push_front(*bp
.release());
1478 void buffer::list::append_zero(unsigned len
)
1482 const unsigned free_in_last
= get_append_buffer_unused_tail_length();
1483 const unsigned first_round
= std::min(len
, free_in_last
);
1485 if (unlikely(_carriage
!= &_buffers
.back())) {
1486 auto bptr
= ptr_node::create(*_carriage
, _carriage
->length(), 0);
1487 _carriage
= bptr
.get();
1488 _buffers
.push_back(*bptr
.release());
1491 _carriage
->append_zeros(first_round
);
1494 const unsigned second_round
= len
- first_round
;
1496 auto& new_back
= refill_append_space(second_round
);
1497 new_back
.set_length(second_round
);
1498 new_back
.zero(false);
1506 const char& buffer::list::operator[](unsigned n
) const
1509 throw end_of_buffer();
1511 for (const auto& node
: _buffers
) {
1512 if (n
>= node
.length()) {
1522 * return a contiguous ptr to whole bufferlist contents.
1524 char *buffer::list::c_str()
1526 if (const auto len
= length(); len
== 0) {
1527 return nullptr; // no non-empty buffers
1528 } else if (len
!= _buffers
.front().length()) {
1531 // there are two *main* scenarios that hit this branch:
1532 // 1. bufferlist with single, non-empty buffer;
1533 // 2. bufferlist with single, non-empty buffer followed by
1534 // empty buffer. splice() tries to not waste our appendable
1535 // space; to carry it an empty bptr is added at the end.
1536 // we account for these and don't rebuild unnecessarily
1538 return _buffers
.front().c_str();
1541 string
buffer::list::to_str() const {
1543 s
.reserve(length());
1544 for (const auto& node
: _buffers
) {
1545 if (node
.length()) {
1546 s
.append(node
.c_str(), node
.length());
1552 void buffer::list::substr_of(const list
& other
, unsigned off
, unsigned len
)
1554 if (off
+ len
> other
.length())
1555 throw end_of_buffer();
1560 auto curbuf
= std::cbegin(other
._buffers
);
1561 while (off
> 0 && off
>= curbuf
->length()) {
1563 //cout << "skipping over " << *curbuf << std::endl;
1564 off
-= (*curbuf
).length();
1567 ceph_assert(len
== 0 || curbuf
!= std::cend(other
._buffers
));
1571 if (off
+ len
< curbuf
->length()) {
1572 //cout << "copying partial of " << *curbuf << std::endl;
1573 _buffers
.push_back(*ptr_node::create(*curbuf
, off
, len
).release());
1580 //cout << "copying end (all?) of " << *curbuf << std::endl;
1581 unsigned howmuch
= curbuf
->length() - off
;
1582 _buffers
.push_back(*ptr_node::create(*curbuf
, off
, howmuch
).release());
1592 void buffer::list::splice(unsigned off
, unsigned len
, list
*claim_by
/*, bufferlist& replace_with */)
1597 if (off
>= length())
1598 throw end_of_buffer();
1600 ceph_assert(len
> 0);
1601 //cout << "splice off " << off << " len " << len << " ... mylen = " << length() << std::endl;
1604 auto curbuf
= std::begin(_buffers
);
1605 auto curbuf_prev
= _buffers
.before_begin();
1607 ceph_assert(curbuf
!= std::end(_buffers
));
1608 if (off
>= (*curbuf
).length()) {
1610 //cout << "off = " << off << " skipping over " << *curbuf << std::endl;
1611 off
-= (*curbuf
).length();
1612 curbuf_prev
= curbuf
++;
1614 // somewhere in this buffer!
1615 //cout << "off = " << off << " somewhere in " << *curbuf << std::endl;
1621 // add a reference to the front bit, insert it before curbuf (which
1623 //cout << "keeping front " << off << " of " << *curbuf << std::endl;
1624 _buffers
.insert_after(curbuf_prev
,
1625 *ptr_node::create(*curbuf
, 0, off
).release());
1632 // partial or the last (appendable) one?
1633 if (const auto to_drop
= off
+ len
; to_drop
< curbuf
->length()) {
1634 //cout << "keeping end of " << *curbuf << ", losing first " << off+len << std::endl;
1636 claim_by
->append(*curbuf
, off
, len
);
1637 curbuf
->set_offset(to_drop
+ curbuf
->offset()); // ignore beginning big
1638 curbuf
->set_length(curbuf
->length() - to_drop
);
1640 //cout << " now " << *curbuf << std::endl;
1644 // hose though the end
1645 unsigned howmuch
= curbuf
->length() - off
;
1646 //cout << "discarding " << howmuch << " of " << *curbuf << std::endl;
1648 claim_by
->append(*curbuf
, off
, howmuch
);
1649 _len
-= curbuf
->length();
1650 if (curbuf
== _carriage
) {
1651 // no need to reallocate, shrinking and relinking is enough.
1652 curbuf
= _buffers
.erase_after(curbuf_prev
);
1653 _carriage
->set_offset(_carriage
->offset() + _carriage
->length());
1654 _carriage
->set_length(0);
1655 _buffers
.push_back(*_carriage
);
1657 curbuf
= _buffers
.erase_after_and_dispose(curbuf_prev
);
1664 // splice in *replace (implement me later?)
1667 void buffer::list::write(int off
, int len
, std::ostream
& out
) const
1670 s
.substr_of(*this, off
, len
);
1671 for (const auto& node
: s
._buffers
) {
1672 if (node
.length()) {
1673 out
.write(node
.c_str(), node
.length());
1678 void buffer::list::encode_base64(buffer::list
& o
)
1680 bufferptr
bp(length() * 4 / 3 + 3);
1681 int l
= ceph_armor(bp
.c_str(), bp
.c_str() + bp
.length(), c_str(), c_str() + length());
1683 o
.push_back(std::move(bp
));
1686 void buffer::list::decode_base64(buffer::list
& e
)
1688 bufferptr
bp(4 + ((e
.length() * 3) / 4));
1689 int l
= ceph_unarmor(bp
.c_str(), bp
.c_str() + bp
.length(), e
.c_str(), e
.c_str() + e
.length());
1691 std::ostringstream oss
;
1692 oss
<< "decode_base64: decoding failed:\n";
1694 throw buffer::malformed_input(oss
.str().c_str());
1696 ceph_assert(l
<= (int)bp
.length());
1698 push_back(std::move(bp
));
1701 ssize_t
buffer::list::pread_file(const char *fn
, uint64_t off
, uint64_t len
, std::string
*error
)
1703 int fd
= TEMP_FAILURE_RETRY(::open(fn
, O_RDONLY
|O_CLOEXEC
|O_BINARY
));
1706 std::ostringstream oss
;
1707 oss
<< "can't open " << fn
<< ": " << cpp_strerror(err
);
1713 // FIPS zeroization audit 20191115: this memset is not security related.
1714 memset(&st
, 0, sizeof(st
));
1715 if (::fstat(fd
, &st
) < 0) {
1717 std::ostringstream oss
;
1718 oss
<< "bufferlist::read_file(" << fn
<< "): stat error: "
1719 << cpp_strerror(err
);
1721 VOID_TEMP_FAILURE_RETRY(::close(fd
));
1725 if (off
> (uint64_t)st
.st_size
) {
1726 std::ostringstream oss
;
1727 oss
<< "bufferlist::read_file(" << fn
<< "): read error: size < offset";
1729 VOID_TEMP_FAILURE_RETRY(::close(fd
));
1733 if (len
> st
.st_size
- off
) {
1734 len
= st
.st_size
- off
;
1736 ssize_t ret
= lseek64(fd
, off
, SEEK_SET
);
1737 if (ret
!= (ssize_t
)off
) {
1741 ret
= read_fd(fd
, len
);
1743 std::ostringstream oss
;
1744 oss
<< "bufferlist::read_file(" << fn
<< "): read error:"
1745 << cpp_strerror(ret
);
1747 VOID_TEMP_FAILURE_RETRY(::close(fd
));
1749 } else if (ret
!= (ssize_t
)len
) {
1751 // Perhaps the file changed between stat() and read()?
1752 std::ostringstream oss
;
1753 oss
<< "bufferlist::read_file(" << fn
<< "): warning: got premature EOF.";
1755 // not actually an error, but weird
1757 VOID_TEMP_FAILURE_RETRY(::close(fd
));
1761 int buffer::list::read_file(const char *fn
, std::string
*error
)
1763 int fd
= TEMP_FAILURE_RETRY(::open(fn
, O_RDONLY
|O_CLOEXEC
|O_BINARY
));
1766 std::ostringstream oss
;
1767 oss
<< "can't open " << fn
<< ": " << cpp_strerror(err
);
1773 // FIPS zeroization audit 20191115: this memset is not security related.
1774 memset(&st
, 0, sizeof(st
));
1775 if (::fstat(fd
, &st
) < 0) {
1777 std::ostringstream oss
;
1778 oss
<< "bufferlist::read_file(" << fn
<< "): stat error: "
1779 << cpp_strerror(err
);
1781 VOID_TEMP_FAILURE_RETRY(::close(fd
));
1785 ssize_t ret
= read_fd(fd
, st
.st_size
);
1787 std::ostringstream oss
;
1788 oss
<< "bufferlist::read_file(" << fn
<< "): read error:"
1789 << cpp_strerror(ret
);
1791 VOID_TEMP_FAILURE_RETRY(::close(fd
));
1794 else if (ret
!= st
.st_size
) {
1796 // Perhaps the file changed between stat() and read()?
1797 std::ostringstream oss
;
1798 oss
<< "bufferlist::read_file(" << fn
<< "): warning: got premature EOF.";
1800 // not actually an error, but weird
1802 VOID_TEMP_FAILURE_RETRY(::close(fd
));
1806 ssize_t
buffer::list::read_fd(int fd
, size_t len
)
1808 auto bp
= ptr_node::create(buffer::create(len
));
1809 ssize_t ret
= safe_read(fd
, (void*)bp
->c_str(), len
);
1811 bp
->set_length(ret
);
1812 push_back(std::move(bp
));
1817 ssize_t
buffer::list::recv_fd(int fd
, size_t len
)
1819 auto bp
= ptr_node::create(buffer::create(len
));
1820 ssize_t ret
= safe_recv(fd
, (void*)bp
->c_str(), len
);
1822 bp
->set_length(ret
);
1823 push_back(std::move(bp
));
1828 int buffer::list::write_file(const char *fn
, int mode
)
1830 int fd
= TEMP_FAILURE_RETRY(::open(fn
, O_WRONLY
|O_CREAT
|O_TRUNC
|O_CLOEXEC
|O_BINARY
, mode
));
1833 cerr
<< "bufferlist::write_file(" << fn
<< "): failed to open file: "
1834 << cpp_strerror(err
) << std::endl
;
1837 int ret
= write_fd(fd
);
1839 cerr
<< "bufferlist::write_fd(" << fn
<< "): write_fd error: "
1840 << cpp_strerror(ret
) << std::endl
;
1841 VOID_TEMP_FAILURE_RETRY(::close(fd
));
1844 if (TEMP_FAILURE_RETRY(::close(fd
))) {
1846 cerr
<< "bufferlist::write_file(" << fn
<< "): close error: "
1847 << cpp_strerror(err
) << std::endl
;
1853 static int do_writev(int fd
, struct iovec
*vec
, uint64_t offset
, unsigned veclen
, unsigned bytes
)
1858 r
= ::pwritev(fd
, vec
, veclen
, offset
);
1860 r
= ::lseek64(fd
, offset
, SEEK_SET
);
1864 r
= ::writev(fd
, vec
, veclen
);
1874 if (bytes
== 0) break;
1877 if (vec
[0].iov_len
<= (size_t)r
) {
1878 // drain this whole item
1879 r
-= vec
[0].iov_len
;
1883 vec
[0].iov_base
= (char *)vec
[0].iov_base
+ r
;
1884 vec
[0].iov_len
-= r
;
1893 int buffer::list::write_fd(int fd
) const
1900 auto p
= std::cbegin(_buffers
);
1901 while (p
!= std::cend(_buffers
)) {
1902 if (p
->length() > 0) {
1903 iov
[iovlen
].iov_base
= (void *)p
->c_str();
1904 iov
[iovlen
].iov_len
= p
->length();
1905 bytes
+= p
->length();
1910 if (iovlen
== IOV_MAX
||
1911 p
== _buffers
.end()) {
1916 wrote
= ::writev(fd
, start
, num
);
1923 if (wrote
< bytes
) {
1924 // partial write, recover!
1925 while ((size_t)wrote
>= start
[0].iov_len
) {
1926 wrote
-= start
[0].iov_len
;
1927 bytes
-= start
[0].iov_len
;
1932 start
[0].iov_len
-= wrote
;
1933 start
[0].iov_base
= (char *)start
[0].iov_base
+ wrote
;
1945 int buffer::list::send_fd(int fd
) const {
1946 return buffer::list::write_fd(fd
);
1949 int buffer::list::write_fd(int fd
, uint64_t offset
) const
1953 auto p
= std::cbegin(_buffers
);
1954 uint64_t left_pbrs
= get_num_buffers();
1957 unsigned iovlen
= 0;
1958 uint64_t size
= std::min
<uint64_t>(left_pbrs
, IOV_MAX
);
1961 iov
[iovlen
].iov_base
= (void *)p
->c_str();
1962 iov
[iovlen
].iov_len
= p
->length();
1964 bytes
+= p
->length();
1969 int r
= do_writev(fd
, iov
, offset
, iovlen
, bytes
);
1977 int buffer::list::write_fd(int fd
) const
1979 // There's no writev on Windows. WriteFileGather may be an option,
1980 // but it has strict requirements in terms of buffer size and alignment.
1981 auto p
= std::cbegin(_buffers
);
1982 uint64_t left_pbrs
= get_num_buffers();
1985 while (written
< p
->length()) {
1986 int r
= ::write(fd
, p
->c_str(), p
->length() - written
);
2000 int buffer::list::send_fd(int fd
) const
2002 // There's no writev on Windows. WriteFileGather may be an option,
2003 // but it has strict requirements in terms of buffer size and alignment.
2004 auto p
= std::cbegin(_buffers
);
2005 uint64_t left_pbrs
= get_num_buffers();
2008 while (written
< p
->length()) {
2009 int r
= ::send(fd
, p
->c_str(), p
->length() - written
, 0);
2011 return -ceph_sock_errno();
2023 int buffer::list::write_fd(int fd
, uint64_t offset
) const
2025 int r
= ::lseek64(fd
, offset
, SEEK_SET
);
2029 return write_fd(fd
);
2033 buffer::list::iov_vec_t
buffer::list::prepare_iovs() const
2037 iov_vec_t iovs
{_num
/ IOV_MAX
+ 1};
2038 auto it
= iovs
.begin();
2039 for (auto& bp
: _buffers
) {
2043 size_t nr_iov_created
= std::distance(iovs
.begin(), it
);
2045 std::min(_num
- IOV_MAX
* nr_iov_created
, (size_t)IOV_MAX
));
2047 it
->iov
[index
].iov_base
= (void*)bp
.c_str();
2048 it
->iov
[index
].iov_len
= bp
.length();
2050 it
->length
+= bp
.length();
2051 if (++index
== IOV_MAX
) {
2052 // continue with a new vector<iov> if we have more buf
2060 __u32
buffer::list::crc32c(__u32 crc
) const
2062 int cache_misses
= 0;
2064 int cache_adjusts
= 0;
2066 for (const auto& node
: _buffers
) {
2067 if (node
.length()) {
2068 raw
* const r
= node
._raw
;
2069 pair
<size_t, size_t> ofs(node
.offset(), node
.offset() + node
.length());
2070 pair
<uint32_t, uint32_t> ccrc
;
2071 if (r
->get_crc(ofs
, &ccrc
)) {
2072 if (ccrc
.first
== crc
) {
2077 /* If we have cached crc32c(buf, v) for initial value v,
2078 * we can convert this to a different initial value v' by:
2079 * crc32c(buf, v') = crc32c(buf, v) ^ adjustment
2080 * where adjustment = crc32c(0*len(buf), v ^ v')
2082 * http://crcutil.googlecode.com/files/crc-doc.1.0.pdf
2083 * note, u for our crc32c implementation is 0
2085 crc
= ccrc
.second
^ ceph_crc32c(ccrc
.first
^ crc
, NULL
, node
.length());
2090 uint32_t base
= crc
;
2091 crc
= ceph_crc32c(crc
, (unsigned char*)node
.c_str(), node
.length());
2092 r
->set_crc(ofs
, make_pair(base
, crc
));
2097 if (buffer_track_crc
) {
2099 buffer_cached_crc_adjusted
+= cache_adjusts
;
2101 buffer_cached_crc
+= cache_hits
;
2103 buffer_missed_crc
+= cache_misses
;
2109 void buffer::list::invalidate_crc()
2111 for (const auto& node
: _buffers
) {
2113 node
._raw
->invalidate_crc();
2119 * Binary write all contents to a C++ stream
2121 void buffer::list::write_stream(std::ostream
&out
) const
2123 for (const auto& node
: _buffers
) {
2124 if (node
.length() > 0) {
2125 out
.write(node
.c_str(), node
.length());
2131 void buffer::list::hexdump(std::ostream
&out
, bool trailing_newline
) const
2136 std::ios_base::fmtflags original_flags
= out
.flags();
2138 // do our best to match the output of hexdump -C, for better
2141 out
.setf(std::ios::right
);
2145 char last_row_char
= '\0';
2146 bool was_same
= false, did_star
= false;
2147 for (unsigned o
=0; o
<length(); o
+= per
) {
2149 last_row_char
= (*this)[o
];
2152 if (o
+ per
< length()) {
2153 bool row_is_same
= true;
2154 for (unsigned i
=0; i
<per
&& o
+i
<length(); i
++) {
2155 char current_char
= (*this)[o
+i
];
2156 if (current_char
!= last_row_char
) {
2158 last_row_char
= current_char
;
2162 row_is_same
= false;
2182 out
<< std::hex
<< std::setw(8) << o
<< " ";
2185 for (i
=0; i
<per
&& o
+i
<length(); i
++) {
2188 out
<< " " << std::setw(2) << ((unsigned)(*this)[o
+i
] & 0xff);
2190 for (; i
<per
; i
++) {
2197 for (i
=0; i
<per
&& o
+i
<length(); i
++) {
2198 char c
= (*this)[o
+i
];
2199 if (isupper(c
) || islower(c
) || isdigit(c
) || c
== ' ' || ispunct(c
))
2204 out
<< '|' << std::dec
;
2206 if (trailing_newline
) {
2207 out
<< "\n" << std::hex
<< std::setw(8) << length();
2211 out
.flags(original_flags
);
2215 buffer::list
buffer::list::static_from_mem(char* c
, size_t l
) {
2217 bl
.push_back(ptr_node::create(create_static(l
, c
)));
2221 buffer::list
buffer::list::static_from_cstring(char* c
) {
2222 return static_from_mem(c
, std::strlen(c
));
2225 buffer::list
buffer::list::static_from_string(string
& s
) {
2226 // C++14 just has string::data return a char* from a non-const
2228 return static_from_mem(const_cast<char*>(s
.data()), s
.length());
2229 // But the way buffer::list mostly doesn't work in a sane way with
2230 // const makes me generally sad.
2233 // buffer::raw is not a standard layout type.
2234 #define BUF_OFFSETOF(type, field) \
2235 (reinterpret_cast<std::uintptr_t>(&(((type*)1024)->field)) - 1024u)
2237 bool buffer::ptr_node::dispose_if_hypercombined(
2238 buffer::ptr_node
* const delete_this
)
2240 // in case _raw is nullptr
2241 const std::uintptr_t bptr
=
2242 (reinterpret_cast<std::uintptr_t>(delete_this
->_raw
) +
2243 BUF_OFFSETOF(buffer::raw
, bptr_storage
));
2244 const bool is_hypercombined
=
2245 reinterpret_cast<std::uintptr_t>(delete_this
) == bptr
;
2246 if (is_hypercombined
) {
2247 ceph_assert_always("hypercombining is currently disabled" == nullptr);
2248 delete_this
->~ptr_node();
2255 std::unique_ptr
<buffer::ptr_node
, buffer::ptr_node::disposer
>
2256 buffer::ptr_node::create_hypercombined(ceph::unique_leakable_ptr
<buffer::raw
> r
)
2258 // FIXME: we don't currently hypercombine buffers due to crashes
2259 // observed in the rados suite. After fixing we'll use placement
2260 // new to create ptr_node on buffer::raw::bptr_storage.
2261 return std::unique_ptr
<buffer::ptr_node
, buffer::ptr_node::disposer
>(
2262 new ptr_node(std::move(r
)));
2265 buffer::ptr_node
* buffer::ptr_node::cloner::operator()(
2266 const buffer::ptr_node
& clone_this
)
2268 return new ptr_node(clone_this
);
2271 std::ostream
& buffer::operator<<(std::ostream
& out
, const buffer::raw
&r
) {
2272 return out
<< "buffer::raw("
2273 << (void*)r
.get_data() << " len " << r
.get_len()
2274 << " nref " << r
.nref
.load() << ")";
2277 std::ostream
& buffer::operator<<(std::ostream
& out
, const buffer::ptr
& bp
) {
2279 out
<< "buffer::ptr(" << bp
.offset() << "~" << bp
.length()
2280 << " " << (void*)bp
.c_str()
2281 << " in raw " << (void*)bp
.raw_c_str()
2282 << " len " << bp
.raw_length()
2283 << " nref " << bp
.raw_nref() << ")";
2285 out
<< "buffer:ptr(" << bp
.offset() << "~" << bp
.length() << " no raw)";
2289 std::ostream
& buffer::operator<<(std::ostream
& out
, const buffer::list
& bl
) {
2290 out
<< "buffer::list(len=" << bl
.length() << ",\n";
2292 for (const auto& node
: bl
.buffers()) {
2293 out
<< "\t" << node
;
2294 if (&node
!= &bl
.buffers().back()) {
2302 MEMPOOL_DEFINE_OBJECT_FACTORY(buffer::raw_malloc
, buffer_raw_malloc
,
2304 MEMPOOL_DEFINE_OBJECT_FACTORY(buffer::raw_posix_aligned
,
2305 buffer_raw_posix_aligned
, buffer_meta
);
2306 MEMPOOL_DEFINE_OBJECT_FACTORY(buffer::raw_char
, buffer_raw_char
, buffer_meta
);
2307 MEMPOOL_DEFINE_OBJECT_FACTORY(buffer::raw_claimed_char
, buffer_raw_claimed_char
,
2309 MEMPOOL_DEFINE_OBJECT_FACTORY(buffer::raw_static
, buffer_raw_static
,
2313 void ceph::buffer::list::page_aligned_appender::_refill(size_t len
) {
2314 const unsigned alloc
=
2316 shift_round_up(static_cast<unsigned>(len
),
2317 static_cast<unsigned>(CEPH_PAGE_SHIFT
)));
2319 ptr_node::create(buffer::create_page_aligned(alloc
));
2320 new_back
->set_length(0); // unused, so far.
2321 bl
.push_back(std::move(new_back
));
2324 namespace ceph::buffer
{
2325 inline namespace v15_2_0
{
2327 #pragma GCC diagnostic push
2328 #pragma GCC diagnostic ignored "-Wnon-virtual-dtor"
2329 #pragma clang diagnostic push
2330 #pragma clang diagnostic ignored "-Wnon-virtual-dtor"
2331 class buffer_error_category
: public ceph::converting_category
{
2333 buffer_error_category(){}
2334 const char* name() const noexcept override
;
2335 const char* message(int ev
, char*, std::size_t) const noexcept override
;
2336 std::string
message(int ev
) const override
;
2337 boost::system::error_condition
default_error_condition(int ev
) const noexcept
2339 using ceph::converting_category::equivalent
;
2340 bool equivalent(int ev
, const boost::system::error_condition
& c
) const
2342 int from_code(int ev
) const noexcept override
;
2344 #pragma GCC diagnostic pop
2345 #pragma clang diagnostic pop
2347 const char* buffer_error_category::name() const noexcept
{
2352 buffer_error_category::message(int ev
, char*, std::size_t) const noexcept
{
2353 using ceph::buffer::errc
;
2357 switch (static_cast<errc
>(ev
)) {
2358 case errc::bad_alloc
:
2359 return "Bad allocation";
2361 case errc::end_of_buffer
:
2362 return "End of buffer";
2364 case errc::malformed_input
:
2365 return "Malformed input";
2368 return "Unknown error";
2371 std::string
buffer_error_category::message(int ev
) const {
2372 return message(ev
, nullptr, 0);
2375 boost::system::error_condition
2376 buffer_error_category::default_error_condition(int ev
)const noexcept
{
2377 using ceph::buffer::errc
;
2378 switch (static_cast<errc
>(ev
)) {
2379 case errc::bad_alloc
:
2380 return boost::system::errc::not_enough_memory
;
2381 case errc::end_of_buffer
:
2382 case errc::malformed_input
:
2383 return boost::system::errc::io_error
;
2385 return { ev
, *this };
2388 bool buffer_error_category::equivalent(int ev
, const boost::system::error_condition
& c
) const noexcept
{
2389 return default_error_condition(ev
) == c
;
2392 int buffer_error_category::from_code(int ev
) const noexcept
{
2393 using ceph::buffer::errc
;
2394 switch (static_cast<errc
>(ev
)) {
2395 case errc::bad_alloc
:
2398 case errc::end_of_buffer
:
2401 case errc::malformed_input
:
2407 const boost::system::error_category
& buffer_category() noexcept
{
2408 static const buffer_error_category c
;