1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
4 * Ceph - scalable distributed file system
6 * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
21 #include "include/compat.h"
22 #include "include/mempool.h"
24 #include "common/environment.h"
25 #include "common/errno.h"
26 #include "common/safe_io.h"
27 #include "common/simple_spin.h"
28 #include "common/strtol.h"
29 #include "common/likely.h"
30 #include "common/valgrind.h"
31 #include "common/deleter.h"
32 #include "common/RWLock.h"
33 #include "include/types.h"
34 #include "include/scope_guard.h"
37 #include "msg/xio/XioMsg.h"
42 #define CEPH_BUFFER_ALLOC_UNIT (MIN(CEPH_PAGE_SIZE, 4096))
43 #define CEPH_BUFFER_APPEND_SIZE (CEPH_BUFFER_ALLOC_UNIT - sizeof(raw_combined))
46 static std::atomic_flag buffer_debug_lock
= ATOMIC_FLAG_INIT
;
47 # define bdout { simple_spin_lock(&buffer_debug_lock); std::cout
48 # define bendl std::endl; simple_spin_unlock(&buffer_debug_lock); }
50 # define bdout if (0) { std::cout
51 # define bendl std::endl; }
54 static std::atomic
<uint64_t> buffer_total_alloc
{ 0 };
55 static std::atomic
<uint64_t> buffer_history_alloc_bytes
{ 0 };
56 static std::atomic
<uint64_t> buffer_history_alloc_num
{ 0 };
58 const bool buffer_track_alloc
= get_env_bool("CEPH_BUFFER_TRACK");
61 void inc_total_alloc(unsigned len
) {
62 if (buffer_track_alloc
)
63 buffer_total_alloc
+= len
;
66 void dec_total_alloc(unsigned len
) {
67 if (buffer_track_alloc
)
68 buffer_total_alloc
-= len
;
71 void inc_history_alloc(uint64_t len
) {
72 if (buffer_track_alloc
) {
73 buffer_history_alloc_bytes
+= len
;
74 buffer_history_alloc_num
++;
79 int buffer::get_total_alloc() {
80 return buffer_total_alloc
;
82 uint64_t buffer::get_history_alloc_bytes() {
83 return buffer_history_alloc_bytes
;
85 uint64_t buffer::get_history_alloc_num() {
86 return buffer_history_alloc_num
;
89 static std::atomic
<unsigned> buffer_cached_crc
{ 0 };
90 static std::atomic
<unsigned> buffer_cached_crc_adjusted
{ 0 };
91 static std::atomic
<unsigned> buffer_missed_crc
{ 0 };
93 static bool buffer_track_crc
= get_env_bool("CEPH_BUFFER_TRACK");
95 void buffer::track_cached_crc(bool b
) {
98 int buffer::get_cached_crc() {
99 return buffer_cached_crc
;
101 int buffer::get_cached_crc_adjusted() {
102 return buffer_cached_crc_adjusted
;
105 int buffer::get_missed_crc() {
106 return buffer_missed_crc
;
109 static std::atomic
<unsigned> buffer_c_str_accesses
{ 0 };
111 static bool buffer_track_c_str
= get_env_bool("CEPH_BUFFER_TRACK");
113 void buffer::track_c_str(bool b
) {
114 buffer_track_c_str
= b
;
116 int buffer::get_c_str_accesses() {
117 return buffer_c_str_accesses
;
120 #ifdef CEPH_HAVE_SETPIPE_SZ
121 static std::atomic
<unsigned> buffer_max_pipe_size
{ 0 };
122 int update_max_pipe_size() {
126 struct stat stat_result
;
127 if (::stat(PROCPREFIX
"/proc/sys/fs/pipe-max-size", &stat_result
) == -1)
129 r
= safe_read_file(PROCPREFIX
"/proc/sys/fs/", "pipe-max-size",
130 buf
, sizeof(buf
) - 1);
134 size_t size
= strict_strtol(buf
, 10, &err
);
137 buffer_max_pipe_size
= size
;
141 size_t get_max_pipe_size() {
142 size_t size
= buffer_max_pipe_size
;
145 if (update_max_pipe_size() == 0)
146 return buffer_max_pipe_size
;
147 // this is the max size hardcoded in linux before 2.6.35
151 size_t get_max_pipe_size() { return 65536; }
155 const char * buffer::error::what() const throw () {
156 return "buffer::exception";
158 const char * buffer::bad_alloc::what() const throw () {
159 return "buffer::bad_alloc";
161 const char * buffer::end_of_buffer::what() const throw () {
162 return "buffer::end_of_buffer";
164 const char * buffer::malformed_input::what() const throw () {
167 buffer::error_code::error_code(int error
) :
168 buffer::malformed_input(cpp_strerror(error
).c_str()), code(error
) {}
174 std::atomic
<unsigned> nref
{ 0 };
177 mutable std::atomic_flag crc_spinlock
= ATOMIC_FLAG_INIT
;
178 map
<pair
<size_t, size_t>, pair
<uint32_t, uint32_t> > crc_map
;
180 explicit raw(unsigned l
, int mempool
=mempool::mempool_buffer_anon
)
181 : data(NULL
), len(l
), nref(0), mempool(mempool
) {
182 mempool::get_pool(mempool::pool_index_t(mempool
)).adjust_count(1, len
);
184 raw(char *c
, unsigned l
, int mempool
=mempool::mempool_buffer_anon
)
185 : data(c
), len(l
), nref(0), mempool(mempool
) {
186 mempool::get_pool(mempool::pool_index_t(mempool
)).adjust_count(1, len
);
189 mempool::get_pool(mempool::pool_index_t(mempool
)).adjust_count(
193 void _set_len(unsigned l
) {
194 mempool::get_pool(mempool::pool_index_t(mempool
)).adjust_count(
197 mempool::get_pool(mempool::pool_index_t(mempool
)).adjust_count(1, len
);
200 void reassign_to_mempool(int pool
) {
201 if (pool
== mempool
) {
204 mempool::get_pool(mempool::pool_index_t(mempool
)).adjust_count(
207 mempool::get_pool(mempool::pool_index_t(pool
)).adjust_count(1, len
);
210 void try_assign_to_mempool(int pool
) {
211 if (mempool
== mempool::mempool_buffer_anon
) {
212 reassign_to_mempool(pool
);
217 // cppcheck-suppress noExplicitConstructor
218 raw(const raw
&other
);
219 const raw
& operator=(const raw
&other
);
221 virtual char *get_data() {
224 virtual raw
* clone_empty() = 0;
226 raw
*c
= clone_empty();
227 memcpy(c
->data
, data
, len
);
230 virtual bool can_zero_copy() const {
233 virtual int zero_copy_to_fd(int fd
, loff_t
*offset
) {
236 virtual bool is_page_aligned() {
237 return ((long)data
& ~CEPH_PAGE_MASK
) == 0;
239 bool is_n_page_sized() {
240 return (len
& ~CEPH_PAGE_MASK
) == 0;
242 virtual bool is_shareable() {
243 // true if safe to reference/share the existing buffer copy
244 // false if it is not safe to share the buffer, e.g., due to special
245 // and/or registered memory that is scarce
248 bool get_crc(const pair
<size_t, size_t> &fromto
,
249 pair
<uint32_t, uint32_t> *crc
) const {
250 simple_spin_lock(&crc_spinlock
);
251 map
<pair
<size_t, size_t>, pair
<uint32_t, uint32_t> >::const_iterator i
=
252 crc_map
.find(fromto
);
253 if (i
== crc_map
.end()) {
254 simple_spin_unlock(&crc_spinlock
);
258 simple_spin_unlock(&crc_spinlock
);
261 void set_crc(const pair
<size_t, size_t> &fromto
,
262 const pair
<uint32_t, uint32_t> &crc
) {
263 simple_spin_lock(&crc_spinlock
);
264 crc_map
[fromto
] = crc
;
265 simple_spin_unlock(&crc_spinlock
);
267 void invalidate_crc() {
268 simple_spin_lock(&crc_spinlock
);
269 if (crc_map
.size() != 0) {
272 simple_spin_unlock(&crc_spinlock
);
277 * raw_combined is always placed within a single allocation along
278 * with the data buffer. the data goes at the beginning, and
279 * raw_combined at the end.
281 class buffer::raw_combined
: public buffer::raw
{
284 raw_combined(char *dataptr
, unsigned l
, unsigned align
,
286 : raw(dataptr
, l
, mempool
),
288 inc_total_alloc(len
);
289 inc_history_alloc(len
);
291 ~raw_combined() override
{
292 dec_total_alloc(len
);
294 raw
* clone_empty() override
{
295 return create(len
, alignment
);
298 static raw_combined
*create(unsigned len
,
300 int mempool
= mempool::mempool_buffer_anon
) {
302 align
= sizeof(size_t);
303 size_t rawlen
= ROUND_UP_TO(sizeof(buffer::raw_combined
),
304 alignof(buffer::raw_combined
));
305 size_t datalen
= ROUND_UP_TO(len
, alignof(buffer::raw_combined
));
308 char *ptr
= (char *) valloc(rawlen
+ datalen
);
311 int r
= ::posix_memalign((void**)(void*)&ptr
, align
, rawlen
+ datalen
);
318 // actual data first, since it has presumably larger alignment restriction
319 // then put the raw_combined at the end
320 return new (ptr
+ datalen
) raw_combined(ptr
, len
, align
, mempool
);
323 static void operator delete(void *ptr
) {
324 raw_combined
*raw
= (raw_combined
*)ptr
;
325 ::free((void *)raw
->data
);
329 class buffer::raw_malloc
: public buffer::raw
{
331 MEMPOOL_CLASS_HELPERS();
333 explicit raw_malloc(unsigned l
) : raw(l
) {
335 data
= (char *)malloc(len
);
341 inc_total_alloc(len
);
342 inc_history_alloc(len
);
343 bdout
<< "raw_malloc " << this << " alloc " << (void *)data
<< " " << l
<< " " << buffer::get_total_alloc() << bendl
;
345 raw_malloc(unsigned l
, char *b
) : raw(b
, l
) {
346 inc_total_alloc(len
);
347 bdout
<< "raw_malloc " << this << " alloc " << (void *)data
<< " " << l
<< " " << buffer::get_total_alloc() << bendl
;
349 ~raw_malloc() override
{
351 dec_total_alloc(len
);
352 bdout
<< "raw_malloc " << this << " free " << (void *)data
<< " " << buffer::get_total_alloc() << bendl
;
354 raw
* clone_empty() override
{
355 return new raw_malloc(len
);
360 class buffer::raw_mmap_pages
: public buffer::raw
{
362 MEMPOOL_CLASS_HELPERS();
364 explicit raw_mmap_pages(unsigned l
) : raw(l
) {
365 data
= (char*)::mmap(NULL
, len
, PROT_READ
|PROT_WRITE
, MAP_PRIVATE
|MAP_ANON
, -1, 0);
368 inc_total_alloc(len
);
369 inc_history_alloc(len
);
370 bdout
<< "raw_mmap " << this << " alloc " << (void *)data
<< " " << l
<< " " << buffer::get_total_alloc() << bendl
;
372 ~raw_mmap_pages() override
{
374 dec_total_alloc(len
);
375 bdout
<< "raw_mmap " << this << " free " << (void *)data
<< " " << buffer::get_total_alloc() << bendl
;
377 raw
* clone_empty() override
{
378 return new raw_mmap_pages(len
);
382 class buffer::raw_posix_aligned
: public buffer::raw
{
385 MEMPOOL_CLASS_HELPERS();
387 raw_posix_aligned(unsigned l
, unsigned _align
) : raw(l
) {
389 assert((align
>= sizeof(void *)) && (align
& (align
- 1)) == 0);
391 data
= (char *) valloc(len
);
393 int r
= ::posix_memalign((void**)(void*)&data
, align
, len
);
399 inc_total_alloc(len
);
400 inc_history_alloc(len
);
401 bdout
<< "raw_posix_aligned " << this << " alloc " << (void *)data
<< " l=" << l
<< ", align=" << align
<< " total_alloc=" << buffer::get_total_alloc() << bendl
;
403 ~raw_posix_aligned() override
{
405 dec_total_alloc(len
);
406 bdout
<< "raw_posix_aligned " << this << " free " << (void *)data
<< " " << buffer::get_total_alloc() << bendl
;
408 raw
* clone_empty() override
{
409 return new raw_posix_aligned(len
, align
);
415 class buffer::raw_hack_aligned
: public buffer::raw
{
419 raw_hack_aligned(unsigned l
, unsigned _align
) : raw(l
) {
421 realdata
= new char[len
+align
-1];
422 unsigned off
= ((unsigned)realdata
) & (align
-1);
424 data
= realdata
+ align
- off
;
427 inc_total_alloc(len
+align
-1);
428 inc_history_alloc(len
+align
-1);
429 //cout << "hack aligned " << (unsigned)data
430 //<< " in raw " << (unsigned)realdata
431 //<< " off " << off << std::endl;
432 assert(((unsigned)data
& (align
-1)) == 0);
434 ~raw_hack_aligned() {
436 dec_total_alloc(len
+align
-1);
439 return new raw_hack_aligned(len
, align
);
444 #ifdef CEPH_HAVE_SPLICE
445 class buffer::raw_pipe
: public buffer::raw
{
447 MEMPOOL_CLASS_HELPERS();
449 explicit raw_pipe(unsigned len
) : raw(len
), source_consumed(false) {
450 size_t max
= get_max_pipe_size();
452 bdout
<< "raw_pipe: requested length " << len
453 << " > max length " << max
<< bendl
;
454 throw malformed_input("length larger than max pipe size");
460 if (::pipe(pipefds
) == -1) {
462 bdout
<< "raw_pipe: error creating pipe: " << cpp_strerror(r
) << bendl
;
466 r
= set_nonblocking(pipefds
);
468 bdout
<< "raw_pipe: error setting nonblocking flag on temp pipe: "
469 << cpp_strerror(r
) << bendl
;
473 r
= set_pipe_size(pipefds
, len
);
475 bdout
<< "raw_pipe: could not set pipe size" << bendl
;
476 // continue, since the pipe should become large enough as needed
479 inc_total_alloc(len
);
480 inc_history_alloc(len
);
481 bdout
<< "raw_pipe " << this << " alloc " << len
<< " "
482 << buffer::get_total_alloc() << bendl
;
485 ~raw_pipe() override
{
489 dec_total_alloc(len
);
490 bdout
<< "raw_pipe " << this << " free " << (void *)data
<< " "
491 << buffer::get_total_alloc() << bendl
;
494 bool can_zero_copy() const override
{
498 int set_source(int fd
, loff_t
*off
) {
499 int flags
= SPLICE_F_NONBLOCK
;
500 ssize_t r
= safe_splice(fd
, off
, pipefds
[1], NULL
, len
, flags
);
502 bdout
<< "raw_pipe: error splicing into pipe: " << cpp_strerror(r
)
506 // update length with actual amount read
511 int zero_copy_to_fd(int fd
, loff_t
*offset
) override
{
512 assert(!source_consumed
);
513 int flags
= SPLICE_F_NONBLOCK
;
514 ssize_t r
= safe_splice_exact(pipefds
[0], NULL
, fd
, offset
, len
, flags
);
516 bdout
<< "raw_pipe: error splicing from pipe to fd: "
517 << cpp_strerror(r
) << bendl
;
520 source_consumed
= true;
524 buffer::raw
* clone_empty() override
{
525 // cloning doesn't make sense for pipe-based buffers,
526 // and is only used by unit tests for other types of buffers
530 char *get_data() override
{
533 return copy_pipe(pipefds
);
537 int set_pipe_size(int *fds
, long length
) {
538 #ifdef CEPH_HAVE_SETPIPE_SZ
539 if (::fcntl(fds
[1], F_SETPIPE_SZ
, length
) == -1) {
542 // pipe limit must have changed - EPERM means we requested
543 // more than the maximum size as an unprivileged user
544 update_max_pipe_size();
545 throw malformed_input("length larger than new max pipe size");
553 int set_nonblocking(int *fds
) {
554 if (::fcntl(fds
[0], F_SETFL
, O_NONBLOCK
) == -1)
556 if (::fcntl(fds
[1], F_SETFL
, O_NONBLOCK
) == -1)
561 static void close_pipe(const int *fds
) {
563 VOID_TEMP_FAILURE_RETRY(::close(fds
[0]));
565 VOID_TEMP_FAILURE_RETRY(::close(fds
[1]));
567 char *copy_pipe(int *fds
) {
568 /* preserve original pipe contents by copying into a temporary
569 * pipe before reading.
574 assert(!source_consumed
);
577 if (::pipe(tmpfd
) == -1) {
579 bdout
<< "raw_pipe: error creating temp pipe: " << cpp_strerror(r
)
583 auto sg
= make_scope_guard([=] { close_pipe(tmpfd
); });
584 r
= set_nonblocking(tmpfd
);
586 bdout
<< "raw_pipe: error setting nonblocking flag on temp pipe: "
587 << cpp_strerror(r
) << bendl
;
590 r
= set_pipe_size(tmpfd
, len
);
592 bdout
<< "raw_pipe: error setting pipe size on temp pipe: "
593 << cpp_strerror(r
) << bendl
;
595 int flags
= SPLICE_F_NONBLOCK
;
596 if (::tee(fds
[0], tmpfd
[1], len
, flags
) == -1) {
598 bdout
<< "raw_pipe: error tee'ing into temp pipe: " << cpp_strerror(r
)
602 data
= (char *)malloc(len
);
606 r
= safe_read(tmpfd
[0], data
, len
);
607 if (r
< (ssize_t
)len
) {
608 bdout
<< "raw_pipe: error reading from temp pipe:" << cpp_strerror(r
)
616 bool source_consumed
;
619 #endif // CEPH_HAVE_SPLICE
622 * primitive buffer types
624 class buffer::raw_char
: public buffer::raw
{
626 MEMPOOL_CLASS_HELPERS();
628 explicit raw_char(unsigned l
) : raw(l
) {
630 data
= new char[len
];
633 inc_total_alloc(len
);
634 inc_history_alloc(len
);
635 bdout
<< "raw_char " << this << " alloc " << (void *)data
<< " " << l
<< " " << buffer::get_total_alloc() << bendl
;
637 raw_char(unsigned l
, char *b
) : raw(b
, l
) {
638 inc_total_alloc(len
);
639 bdout
<< "raw_char " << this << " alloc " << (void *)data
<< " " << l
<< " " << buffer::get_total_alloc() << bendl
;
641 ~raw_char() override
{
643 dec_total_alloc(len
);
644 bdout
<< "raw_char " << this << " free " << (void *)data
<< " " << buffer::get_total_alloc() << bendl
;
646 raw
* clone_empty() override
{
647 return new raw_char(len
);
651 class buffer::raw_claimed_char
: public buffer::raw
{
653 MEMPOOL_CLASS_HELPERS();
655 explicit raw_claimed_char(unsigned l
, char *b
) : raw(b
, l
) {
656 inc_total_alloc(len
);
657 bdout
<< "raw_claimed_char " << this << " alloc " << (void *)data
658 << " " << l
<< " " << buffer::get_total_alloc() << bendl
;
660 ~raw_claimed_char() override
{
661 dec_total_alloc(len
);
662 bdout
<< "raw_claimed_char " << this << " free " << (void *)data
663 << " " << buffer::get_total_alloc() << bendl
;
665 raw
* clone_empty() override
{
666 return new raw_char(len
);
670 class buffer::raw_unshareable
: public buffer::raw
{
672 MEMPOOL_CLASS_HELPERS();
674 explicit raw_unshareable(unsigned l
) : raw(l
) {
676 data
= new char[len
];
680 raw_unshareable(unsigned l
, char *b
) : raw(b
, l
) {
682 raw
* clone_empty() override
{
683 return new raw_char(len
);
685 bool is_shareable() override
{
686 return false; // !shareable, will force make_shareable()
688 ~raw_unshareable() override
{
693 class buffer::raw_static
: public buffer::raw
{
695 MEMPOOL_CLASS_HELPERS();
697 raw_static(const char *d
, unsigned l
) : raw((char*)d
, l
) { }
698 ~raw_static() override
{}
699 raw
* clone_empty() override
{
700 return new buffer::raw_char(len
);
704 class buffer::raw_claim_buffer
: public buffer::raw
{
707 raw_claim_buffer(const char *b
, unsigned l
, deleter d
)
708 : raw((char*)b
, l
), del(std::move(d
)) { }
709 ~raw_claim_buffer() override
{}
710 raw
* clone_empty() override
{
711 return new buffer::raw_char(len
);
715 #if defined(HAVE_XIO)
716 class buffer::xio_msg_buffer
: public buffer::raw
{
718 XioDispatchHook
* m_hook
;
720 xio_msg_buffer(XioDispatchHook
* _m_hook
, const char *d
,
722 raw((char*)d
, l
), m_hook(_m_hook
->get()) {}
724 bool is_shareable() { return false; }
725 static void operator delete(void *p
)
727 xio_msg_buffer
*buf
= static_cast<xio_msg_buffer
*>(p
);
728 // return hook ref (counts against pool); it appears illegal
729 // to do this in our dtor, because this fires after that
733 return new buffer::raw_char(len
);
737 class buffer::xio_mempool
: public buffer::raw
{
739 struct xio_reg_mem
*mp
;
740 xio_mempool(struct xio_reg_mem
*_mp
, unsigned l
) :
741 raw((char*)_mp
->addr
, l
), mp(_mp
)
745 return new buffer::raw_char(len
);
749 struct xio_reg_mem
* get_xio_mp(const buffer::ptr
& bp
)
751 buffer::xio_mempool
*mb
= dynamic_cast<buffer::xio_mempool
*>(bp
.get_raw());
758 buffer::raw
* buffer::create_msg(
759 unsigned len
, char *buf
, XioDispatchHook
* m_hook
) {
760 XioPool
& pool
= m_hook
->get_pool();
762 static_cast<buffer::raw
*>(pool
.alloc(sizeof(xio_msg_buffer
)));
763 new (bp
) xio_msg_buffer(m_hook
, buf
, len
);
766 #endif /* HAVE_XIO */
768 buffer::raw
* buffer::copy(const char *c
, unsigned len
) {
769 raw
* r
= buffer::create_aligned(len
, sizeof(size_t));
770 memcpy(r
->data
, c
, len
);
774 buffer::raw
* buffer::create(unsigned len
) {
775 return buffer::create_aligned(len
, sizeof(size_t));
777 buffer::raw
* buffer::create_in_mempool(unsigned len
, int mempool
) {
778 return buffer::create_aligned_in_mempool(len
, sizeof(size_t), mempool
);
780 buffer::raw
* buffer::claim_char(unsigned len
, char *buf
) {
781 return new raw_claimed_char(len
, buf
);
783 buffer::raw
* buffer::create_malloc(unsigned len
) {
784 return new raw_malloc(len
);
786 buffer::raw
* buffer::claim_malloc(unsigned len
, char *buf
) {
787 return new raw_malloc(len
, buf
);
789 buffer::raw
* buffer::create_static(unsigned len
, char *buf
) {
790 return new raw_static(buf
, len
);
792 buffer::raw
* buffer::claim_buffer(unsigned len
, char *buf
, deleter del
) {
793 return new raw_claim_buffer(buf
, len
, std::move(del
));
796 buffer::raw
* buffer::create_aligned_in_mempool(
797 unsigned len
, unsigned align
, int mempool
) {
798 // If alignment is a page multiple, use a separate buffer::raw to
799 // avoid fragmenting the heap.
801 // Somewhat unexpectedly, I see consistently better performance
802 // from raw_combined than from raw even when the allocation size is
803 // a page multiple (but alignment is not).
805 // I also see better performance from a separate buffer::raw once the
807 if ((align
& ~CEPH_PAGE_MASK
) == 0 ||
808 len
>= CEPH_PAGE_SIZE
* 2) {
810 return new raw_posix_aligned(len
, align
);
812 return new raw_hack_aligned(len
, align
);
815 return raw_combined::create(len
, align
, mempool
);
817 buffer::raw
* buffer::create_aligned(
818 unsigned len
, unsigned align
) {
819 return create_aligned_in_mempool(len
, align
,
820 mempool::mempool_buffer_anon
);
823 buffer::raw
* buffer::create_page_aligned(unsigned len
) {
824 return create_aligned(len
, CEPH_PAGE_SIZE
);
827 buffer::raw
* buffer::create_zero_copy(unsigned len
, int fd
, int64_t *offset
) {
828 #ifdef CEPH_HAVE_SPLICE
829 buffer::raw_pipe
* buf
= new raw_pipe(len
);
830 int r
= buf
->set_source(fd
, (loff_t
*)offset
);
837 throw error_code(-ENOTSUP
);
841 buffer::raw
* buffer::create_unshareable(unsigned len
) {
842 return new raw_unshareable(len
);
845 buffer::ptr::ptr(raw
*r
) : _raw(r
), _off(0), _len(r
->len
) // no lock needed; this is an unref raw.
848 bdout
<< "ptr " << this << " get " << _raw
<< bendl
;
850 buffer::ptr::ptr(unsigned l
) : _off(0), _len(l
)
854 bdout
<< "ptr " << this << " get " << _raw
<< bendl
;
856 buffer::ptr::ptr(const char *d
, unsigned l
) : _off(0), _len(l
) // ditto.
860 bdout
<< "ptr " << this << " get " << _raw
<< bendl
;
862 buffer::ptr::ptr(const ptr
& p
) : _raw(p
._raw
), _off(p
._off
), _len(p
._len
)
866 bdout
<< "ptr " << this << " get " << _raw
<< bendl
;
869 buffer::ptr::ptr(ptr
&& p
) noexcept
: _raw(p
._raw
), _off(p
._off
), _len(p
._len
)
874 buffer::ptr::ptr(const ptr
& p
, unsigned o
, unsigned l
)
875 : _raw(p
._raw
), _off(p
._off
+ o
), _len(l
)
877 assert(o
+l
<= p
._len
);
880 bdout
<< "ptr " << this << " get " << _raw
<< bendl
;
882 buffer::ptr
& buffer::ptr::operator= (const ptr
& p
)
886 bdout
<< "ptr " << this << " get " << _raw
<< bendl
;
888 buffer::raw
*raw
= p
._raw
;
899 buffer::ptr
& buffer::ptr::operator= (ptr
&& p
) noexcept
902 buffer::raw
*raw
= p
._raw
;
915 buffer::raw
*buffer::ptr::clone()
917 return _raw
->clone();
920 buffer::ptr
& buffer::ptr::make_shareable() {
921 if (_raw
&& !_raw
->is_shareable()) {
922 buffer::raw
*tr
= _raw
;
925 if (unlikely(--tr
->nref
== 0)) {
926 ANNOTATE_HAPPENS_AFTER(&tr
->nref
);
927 ANNOTATE_HAPPENS_BEFORE_FORGET_ALL(&tr
->nref
);
930 ANNOTATE_HAPPENS_BEFORE(&tr
->nref
);
936 void buffer::ptr::swap(ptr
& other
)
949 void buffer::ptr::release()
952 bdout
<< "ptr " << this << " release " << _raw
<< bendl
;
953 if (--_raw
->nref
== 0) {
954 //cout << "hosing raw " << (void*)_raw << " len " << _raw->len << std::endl;
955 ANNOTATE_HAPPENS_AFTER(&_raw
->nref
);
956 ANNOTATE_HAPPENS_BEFORE_FORGET_ALL(&_raw
->nref
);
957 delete _raw
; // dealloc old (if any)
959 ANNOTATE_HAPPENS_BEFORE(&_raw
->nref
);
965 bool buffer::ptr::at_buffer_tail() const { return _off
+ _len
== _raw
->len
; }
967 int buffer::ptr::get_mempool() const {
969 return _raw
->mempool
;
971 return mempool::mempool_buffer_anon
;
974 void buffer::ptr::reassign_to_mempool(int pool
) {
976 _raw
->reassign_to_mempool(pool
);
979 void buffer::ptr::try_assign_to_mempool(int pool
) {
981 _raw
->try_assign_to_mempool(pool
);
985 const char *buffer::ptr::c_str() const {
987 if (buffer_track_c_str
)
988 buffer_c_str_accesses
++;
989 return _raw
->get_data() + _off
;
991 char *buffer::ptr::c_str() {
993 if (buffer_track_c_str
)
994 buffer_c_str_accesses
++;
995 return _raw
->get_data() + _off
;
997 const char *buffer::ptr::end_c_str() const {
999 if (buffer_track_c_str
)
1000 buffer_c_str_accesses
++;
1001 return _raw
->get_data() + _off
+ _len
;
1003 char *buffer::ptr::end_c_str() {
1005 if (buffer_track_c_str
)
1006 buffer_c_str_accesses
++;
1007 return _raw
->get_data() + _off
+ _len
;
1010 unsigned buffer::ptr::unused_tail_length() const
1013 return _raw
->len
- (_off
+_len
);
1017 const char& buffer::ptr::operator[](unsigned n
) const
1021 return _raw
->get_data()[_off
+ n
];
1023 char& buffer::ptr::operator[](unsigned n
)
1027 return _raw
->get_data()[_off
+ n
];
1030 const char *buffer::ptr::raw_c_str() const { assert(_raw
); return _raw
->data
; }
1031 unsigned buffer::ptr::raw_length() const { assert(_raw
); return _raw
->len
; }
1032 int buffer::ptr::raw_nref() const { assert(_raw
); return _raw
->nref
; }
1034 void buffer::ptr::copy_out(unsigned o
, unsigned l
, char *dest
) const {
1037 throw end_of_buffer();
1038 char* src
= _raw
->data
+ _off
+ o
;
1039 maybe_inline_memcpy(dest
, src
, l
, 8);
1042 unsigned buffer::ptr::wasted() const
1044 return _raw
->len
- _len
;
1047 int buffer::ptr::cmp(const ptr
& o
) const
1049 int l
= _len
< o
._len
? _len
: o
._len
;
1051 int r
= memcmp(c_str(), o
.c_str(), l
);
1062 bool buffer::ptr::is_zero() const
1064 return mem_is_zero(c_str(), _len
);
1067 unsigned buffer::ptr::append(char c
)
1070 assert(1 <= unused_tail_length());
1071 char* ptr
= _raw
->data
+ _off
+ _len
;
1077 unsigned buffer::ptr::append(const char *p
, unsigned l
)
1080 assert(l
<= unused_tail_length());
1081 char* c
= _raw
->data
+ _off
+ _len
;
1082 maybe_inline_memcpy(c
, p
, l
, 32);
1087 void buffer::ptr::copy_in(unsigned o
, unsigned l
, const char *src
)
1089 copy_in(o
, l
, src
, true);
1092 void buffer::ptr::copy_in(unsigned o
, unsigned l
, const char *src
, bool crc_reset
)
1096 assert(o
+l
<= _len
);
1097 char* dest
= _raw
->data
+ _off
+ o
;
1099 _raw
->invalidate_crc();
1100 maybe_inline_memcpy(dest
, src
, l
, 64);
1103 void buffer::ptr::zero()
1108 void buffer::ptr::zero(bool crc_reset
)
1111 _raw
->invalidate_crc();
1112 memset(c_str(), 0, _len
);
1115 void buffer::ptr::zero(unsigned o
, unsigned l
)
1120 void buffer::ptr::zero(unsigned o
, unsigned l
, bool crc_reset
)
1122 assert(o
+l
<= _len
);
1124 _raw
->invalidate_crc();
1125 memset(c_str()+o
, 0, l
);
1127 bool buffer::ptr::can_zero_copy() const
1129 return _raw
->can_zero_copy();
1132 int buffer::ptr::zero_copy_to_fd(int fd
, int64_t *offset
) const
1134 return _raw
->zero_copy_to_fd(fd
, (loff_t
*)offset
);
1137 // -- buffer::list::iterator --
1139 buffer::list::iterator operator=(const buffer::list::iterator& other)
1141 if (this != &other) {
1146 p_off = other.p_off;
1151 template<bool is_const
>
1152 buffer::list::iterator_impl
<is_const
>::iterator_impl(bl_t
*l
, unsigned o
)
1153 : bl(l
), ls(&bl
->_buffers
), off(0), p(ls
->begin()), p_off(0)
1158 template<bool is_const
>
1159 buffer::list::iterator_impl
<is_const
>::iterator_impl(const buffer::list::iterator
& i
)
1160 : iterator_impl
<is_const
>(i
.bl
, i
.off
, i
.p
, i
.p_off
) {}
1162 template<bool is_const
>
1163 void buffer::list::iterator_impl
<is_const
>::advance(int o
)
1165 //cout << this << " advance " << o << " from " << off << " (p_off " << p_off << " in " << p->length() << ")" << std::endl;
1170 throw end_of_buffer();
1171 if (p_off
>= p
->length()) {
1173 p_off
-= p
->length();
1176 // somewhere in this buffer!
1191 } else if (off
> 0) {
1192 assert(p
!= ls
->begin());
1194 p_off
= p
->length();
1196 throw end_of_buffer();
1201 template<bool is_const
>
1202 void buffer::list::iterator_impl
<is_const
>::seek(unsigned o
)
1209 template<bool is_const
>
1210 char buffer::list::iterator_impl
<is_const
>::operator*() const
1213 throw end_of_buffer();
1217 template<bool is_const
>
1218 buffer::list::iterator_impl
<is_const
>&
1219 buffer::list::iterator_impl
<is_const
>::operator++()
1222 throw end_of_buffer();
1227 template<bool is_const
>
1228 buffer::ptr
buffer::list::iterator_impl
<is_const
>::get_current_ptr() const
1231 throw end_of_buffer();
1232 return ptr(*p
, p_off
, p
->length() - p_off
);
1236 // note that these all _append_ to dest!
1237 template<bool is_const
>
1238 void buffer::list::iterator_impl
<is_const
>::copy(unsigned len
, char *dest
)
1240 if (p
== ls
->end()) seek(off
);
1243 throw end_of_buffer();
1244 assert(p
->length() > 0);
1246 unsigned howmuch
= p
->length() - p_off
;
1247 if (len
< howmuch
) howmuch
= len
;
1248 p
->copy_out(p_off
, howmuch
, dest
);
1256 template<bool is_const
>
1257 void buffer::list::iterator_impl
<is_const
>::copy(unsigned len
, ptr
&dest
)
1259 copy_deep(len
, dest
);
1262 template<bool is_const
>
1263 void buffer::list::iterator_impl
<is_const
>::copy_deep(unsigned len
, ptr
&dest
)
1269 throw end_of_buffer();
1270 assert(p
->length() > 0);
1272 copy(len
, dest
.c_str());
1274 template<bool is_const
>
1275 void buffer::list::iterator_impl
<is_const
>::copy_shallow(unsigned len
,
1282 throw end_of_buffer();
1283 assert(p
->length() > 0);
1284 unsigned howmuch
= p
->length() - p_off
;
1285 if (howmuch
< len
) {
1287 copy(len
, dest
.c_str());
1289 dest
= ptr(*p
, p_off
, len
);
1294 template<bool is_const
>
1295 void buffer::list::iterator_impl
<is_const
>::copy(unsigned len
, list
&dest
)
1301 throw end_of_buffer();
1303 unsigned howmuch
= p
->length() - p_off
;
1306 dest
.append(*p
, p_off
, howmuch
);
1313 template<bool is_const
>
1314 void buffer::list::iterator_impl
<is_const
>::copy(unsigned len
, std::string
&dest
)
1320 throw end_of_buffer();
1322 unsigned howmuch
= p
->length() - p_off
;
1323 const char *c_str
= p
->c_str();
1326 dest
.append(c_str
+ p_off
, howmuch
);
1333 template<bool is_const
>
1334 void buffer::list::iterator_impl
<is_const
>::copy_all(list
&dest
)
1341 assert(p
->length() > 0);
1343 unsigned howmuch
= p
->length() - p_off
;
1344 const char *c_str
= p
->c_str();
1345 dest
.append(c_str
+ p_off
, howmuch
);
1351 template<bool is_const
>
1352 size_t buffer::list::iterator_impl
<is_const
>::get_ptr_and_advance(
1353 size_t want
, const char **data
)
1355 if (p
== ls
->end()) {
1357 if (p
== ls
->end()) {
1361 *data
= p
->c_str() + p_off
;
1362 size_t l
= MIN(p
->length() - p_off
, want
);
1364 if (p_off
== p
->length()) {
1372 template<bool is_const
>
1373 uint32_t buffer::list::iterator_impl
<is_const
>::crc32c(
1374 size_t length
, uint32_t crc
)
1376 length
= MIN( length
, get_remaining());
1377 while (length
> 0) {
1379 size_t l
= get_ptr_and_advance(length
, &p
);
1380 crc
= ceph_crc32c(crc
, (unsigned char*)p
, l
);
1386 // explicitly instantiate only the iterator types we need, so we can hide the
1387 // details in this compilation unit without introducing unnecessary link time
1389 template class buffer::list::iterator_impl
<true>;
1390 template class buffer::list::iterator_impl
<false>;
1392 buffer::list::iterator::iterator(bl_t
*l
, unsigned o
)
1393 : iterator_impl(l
, o
)
1396 buffer::list::iterator::iterator(bl_t
*l
, unsigned o
, list_iter_t ip
, unsigned po
)
1397 : iterator_impl(l
, o
, ip
, po
)
1400 void buffer::list::iterator::advance(int o
)
1402 buffer::list::iterator_impl
<false>::advance(o
);
1405 void buffer::list::iterator::seek(unsigned o
)
1407 buffer::list::iterator_impl
<false>::seek(o
);
1410 char buffer::list::iterator::operator*()
1412 if (p
== ls
->end()) {
1413 throw end_of_buffer();
1418 buffer::list::iterator
& buffer::list::iterator::operator++()
1420 buffer::list::iterator_impl
<false>::operator++();
1424 buffer::ptr
buffer::list::iterator::get_current_ptr()
1426 if (p
== ls
->end()) {
1427 throw end_of_buffer();
1429 return ptr(*p
, p_off
, p
->length() - p_off
);
1432 void buffer::list::iterator::copy(unsigned len
, char *dest
)
1434 return buffer::list::iterator_impl
<false>::copy(len
, dest
);
1437 void buffer::list::iterator::copy(unsigned len
, ptr
&dest
)
1439 return buffer::list::iterator_impl
<false>::copy_deep(len
, dest
);
1442 void buffer::list::iterator::copy_deep(unsigned len
, ptr
&dest
)
1444 buffer::list::iterator_impl
<false>::copy_deep(len
, dest
);
1447 void buffer::list::iterator::copy_shallow(unsigned len
, ptr
&dest
)
1449 buffer::list::iterator_impl
<false>::copy_shallow(len
, dest
);
1452 void buffer::list::iterator::copy(unsigned len
, list
&dest
)
1454 buffer::list::iterator_impl
<false>::copy(len
, dest
);
1457 void buffer::list::iterator::copy(unsigned len
, std::string
&dest
)
1459 buffer::list::iterator_impl
<false>::copy(len
, dest
);
1462 void buffer::list::iterator::copy_all(list
&dest
)
1464 buffer::list::iterator_impl
<false>::copy_all(dest
);
1467 void buffer::list::iterator::copy_in(unsigned len
, const char *src
)
1469 copy_in(len
, src
, true);
1473 void buffer::list::iterator::copy_in(unsigned len
, const char *src
, bool crc_reset
)
1480 throw end_of_buffer();
1482 unsigned howmuch
= p
->length() - p_off
;
1485 p
->copy_in(p_off
, howmuch
, src
, crc_reset
);
1493 void buffer::list::iterator::copy_in(unsigned len
, const list
& otherl
)
1497 unsigned left
= len
;
1498 for (std::list
<ptr
>::const_iterator i
= otherl
._buffers
.begin();
1499 i
!= otherl
._buffers
.end();
1501 unsigned l
= (*i
).length();
1504 copy_in(l
, i
->c_str());
1511 // -- buffer::list --
1513 buffer::list::list(list
&& other
)
1514 : _buffers(std::move(other
._buffers
)),
1516 _memcopy_count(other
._memcopy_count
),
1518 append_buffer
.swap(other
.append_buffer
);
1522 void buffer::list::swap(list
& other
)
1524 std::swap(_len
, other
._len
);
1525 std::swap(_memcopy_count
, other
._memcopy_count
);
1526 _buffers
.swap(other
._buffers
);
1527 append_buffer
.swap(other
.append_buffer
);
1528 //last_p.swap(other.last_p);
1530 other
.last_p
= other
.begin();
1533 bool buffer::list::contents_equal(buffer::list
& other
)
1535 return static_cast<const buffer::list
*>(this)->contents_equal(other
);
1538 bool buffer::list::contents_equal(const ceph::buffer::list
& other
) const
1540 if (length() != other
.length())
1543 // buffer-wise comparison
1545 std::list
<ptr
>::const_iterator a
= _buffers
.begin();
1546 std::list
<ptr
>::const_iterator b
= other
._buffers
.begin();
1547 unsigned aoff
= 0, boff
= 0;
1548 while (a
!= _buffers
.end()) {
1549 unsigned len
= a
->length() - aoff
;
1550 if (len
> b
->length() - boff
)
1551 len
= b
->length() - boff
;
1552 if (memcmp(a
->c_str() + aoff
, b
->c_str() + boff
, len
) != 0)
1555 if (aoff
== a
->length()) {
1560 if (boff
== b
->length()) {
1565 assert(b
== other
._buffers
.end());
1569 // byte-wise comparison
1571 bufferlist::const_iterator me
= begin();
1572 bufferlist::const_iterator him
= other
.begin();
1583 bool buffer::list::can_zero_copy() const
1585 for (std::list
<ptr
>::const_iterator it
= _buffers
.begin();
1586 it
!= _buffers
.end();
1588 if (!it
->can_zero_copy())
1593 bool buffer::list::is_provided_buffer(const char *dst
) const
1595 if (_buffers
.empty())
1597 return (is_contiguous() && (_buffers
.front().c_str() == dst
));
1600 bool buffer::list::is_aligned(unsigned align
) const
1602 for (std::list
<ptr
>::const_iterator it
= _buffers
.begin();
1603 it
!= _buffers
.end();
1605 if (!it
->is_aligned(align
))
1610 bool buffer::list::is_n_align_sized(unsigned align
) const
1612 for (std::list
<ptr
>::const_iterator it
= _buffers
.begin();
1613 it
!= _buffers
.end();
1615 if (!it
->is_n_align_sized(align
))
1620 bool buffer::list::is_aligned_size_and_memory(unsigned align_size
,
1621 unsigned align_memory
) const
1623 for (std::list
<ptr
>::const_iterator it
= _buffers
.begin();
1624 it
!= _buffers
.end();
1626 if (!it
->is_aligned(align_memory
) || !it
->is_n_align_sized(align_size
))
1632 bool buffer::list::is_zero() const {
1633 for (std::list
<ptr
>::const_iterator it
= _buffers
.begin();
1634 it
!= _buffers
.end();
1636 if (!it
->is_zero()) {
1643 void buffer::list::zero()
1645 for (std::list
<ptr
>::iterator it
= _buffers
.begin();
1646 it
!= _buffers
.end();
1651 void buffer::list::zero(unsigned o
, unsigned l
)
1653 assert(o
+l
<= _len
);
1655 for (std::list
<ptr
>::iterator it
= _buffers
.begin();
1656 it
!= _buffers
.end();
1658 if (p
+ it
->length() > o
) {
1659 if (p
>= o
&& p
+it
->length() <= o
+l
) {
1660 // 'o'------------- l -----------|
1661 // 'p'-- it->length() --|
1663 } else if (p
>= o
) {
1664 // 'o'------------- l -----------|
1665 // 'p'------- it->length() -------|
1667 } else if (p
+ it
->length() <= o
+l
) {
1668 // 'o'------------- l -----------|
1669 // 'p'------- it->length() -------|
1670 it
->zero(o
-p
, it
->length()-(o
-p
));
1672 // 'o'----------- l -----------|
1673 // 'p'---------- it->length() ----------|
1683 bool buffer::list::is_contiguous() const
1685 return &(*_buffers
.begin()) == &(*_buffers
.rbegin());
1688 bool buffer::list::is_n_page_sized() const
1690 return is_n_align_sized(CEPH_PAGE_SIZE
);
1693 bool buffer::list::is_page_aligned() const
1695 return is_aligned(CEPH_PAGE_SIZE
);
1698 int buffer::list::get_mempool() const
1700 if (_buffers
.empty()) {
1701 return mempool::mempool_buffer_anon
;
1703 return _buffers
.back().get_mempool();
1706 void buffer::list::reassign_to_mempool(int pool
)
1708 if (append_buffer
.get_raw()) {
1709 append_buffer
.get_raw()->reassign_to_mempool(pool
);
1711 for (auto& p
: _buffers
) {
1712 p
.get_raw()->reassign_to_mempool(pool
);
1716 void buffer::list::try_assign_to_mempool(int pool
)
1718 if (append_buffer
.get_raw()) {
1719 append_buffer
.get_raw()->try_assign_to_mempool(pool
);
1721 for (auto& p
: _buffers
) {
1722 p
.get_raw()->try_assign_to_mempool(pool
);
1726 void buffer::list::rebuild()
1733 if ((_len
& ~CEPH_PAGE_MASK
) == 0)
1734 nb
= buffer::create_page_aligned(_len
);
1736 nb
= buffer::create(_len
);
1740 void buffer::list::rebuild(ptr
& nb
)
1743 for (std::list
<ptr
>::iterator it
= _buffers
.begin();
1744 it
!= _buffers
.end();
1746 nb
.copy_in(pos
, it
->length(), it
->c_str(), false);
1747 pos
+= it
->length();
1749 _memcopy_count
+= pos
;
1752 _buffers
.push_back(nb
);
1757 bool buffer::list::rebuild_aligned(unsigned align
)
1759 return rebuild_aligned_size_and_memory(align
, align
);
1762 bool buffer::list::rebuild_aligned_size_and_memory(unsigned align_size
,
1763 unsigned align_memory
,
1764 unsigned max_buffers
)
1766 unsigned old_memcopy_count
= _memcopy_count
;
1768 if (max_buffers
&& _buffers
.size() > max_buffers
1769 && _len
> (max_buffers
* align_size
)) {
1770 align_size
= ROUND_UP_TO(ROUND_UP_TO(_len
, max_buffers
) / max_buffers
, align_size
);
1772 std::list
<ptr
>::iterator p
= _buffers
.begin();
1773 while (p
!= _buffers
.end()) {
1774 // keep anything that's already align and sized aligned
1775 if (p
->is_aligned(align_memory
) && p
->is_n_align_sized(align_size
)) {
1776 /*cout << " segment " << (void*)p->c_str()
1777 << " offset " << ((unsigned long)p->c_str() & (align - 1))
1778 << " length " << p->length()
1779 << " " << (p->length() & (align - 1)) << " ok" << std::endl;
1785 // consolidate unaligned items, until we get something that is sized+aligned
1787 unsigned offset
= 0;
1789 /*cout << " segment " << (void*)p->c_str()
1790 << " offset " << ((unsigned long)p->c_str() & (align - 1))
1791 << " length " << p->length() << " " << (p->length() & (align - 1))
1792 << " overall offset " << offset << " " << (offset & (align - 1))
1793 << " not ok" << std::endl;
1795 offset
+= p
->length();
1796 unaligned
.push_back(*p
);
1797 _buffers
.erase(p
++);
1798 } while (p
!= _buffers
.end() &&
1799 (!p
->is_aligned(align_memory
) ||
1800 !p
->is_n_align_sized(align_size
) ||
1801 (offset
% align_size
)));
1802 if (!(unaligned
.is_contiguous() && unaligned
._buffers
.front().is_aligned(align_memory
))) {
1803 ptr
nb(buffer::create_aligned(unaligned
._len
, align_memory
));
1804 unaligned
.rebuild(nb
);
1805 _memcopy_count
+= unaligned
._len
;
1807 _buffers
.insert(p
, unaligned
._buffers
.front());
1811 return (old_memcopy_count
!= _memcopy_count
);
1814 bool buffer::list::rebuild_page_aligned()
1816 return rebuild_aligned(CEPH_PAGE_SIZE
);
1819 void buffer::list::reserve(size_t prealloc
)
1821 if (append_buffer
.unused_tail_length() < prealloc
) {
1822 append_buffer
= buffer::create_in_mempool(prealloc
, get_mempool());
1823 append_buffer
.set_length(0); // unused, so far.
1827 // sort-of-like-assignment-op
1828 void buffer::list::claim(list
& bl
, unsigned int flags
)
1832 claim_append(bl
, flags
);
1835 void buffer::list::claim_append(list
& bl
, unsigned int flags
)
1837 // steal the other guy's buffers
1839 if (!(flags
& CLAIM_ALLOW_NONSHAREABLE
))
1840 bl
.make_shareable();
1841 _buffers
.splice(_buffers
.end(), bl
._buffers
);
1843 bl
.last_p
= bl
.begin();
1846 void buffer::list::claim_prepend(list
& bl
, unsigned int flags
)
1848 // steal the other guy's buffers
1850 if (!(flags
& CLAIM_ALLOW_NONSHAREABLE
))
1851 bl
.make_shareable();
1852 _buffers
.splice(_buffers
.begin(), bl
._buffers
);
1854 bl
.last_p
= bl
.begin();
1857 void buffer::list::claim_append_piecewise(list
& bl
)
1859 // steal the other guy's buffers
1860 for (std::list
<buffer::ptr
>::const_iterator i
= bl
.buffers().begin();
1861 i
!= bl
.buffers().end(); i
++) {
1862 append(*i
, 0, i
->length());
1867 void buffer::list::copy(unsigned off
, unsigned len
, char *dest
) const
1869 if (off
+ len
> length())
1870 throw end_of_buffer();
1871 if (last_p
.get_off() != off
)
1873 last_p
.copy(len
, dest
);
1876 void buffer::list::copy(unsigned off
, unsigned len
, list
&dest
) const
1878 if (off
+ len
> length())
1879 throw end_of_buffer();
1880 if (last_p
.get_off() != off
)
1882 last_p
.copy(len
, dest
);
1885 void buffer::list::copy(unsigned off
, unsigned len
, std::string
& dest
) const
1887 if (last_p
.get_off() != off
)
1889 return last_p
.copy(len
, dest
);
1892 void buffer::list::copy_in(unsigned off
, unsigned len
, const char *src
)
1894 copy_in(off
, len
, src
, true);
1897 void buffer::list::copy_in(unsigned off
, unsigned len
, const char *src
, bool crc_reset
)
1899 if (off
+ len
> length())
1900 throw end_of_buffer();
1902 if (last_p
.get_off() != off
)
1904 last_p
.copy_in(len
, src
, crc_reset
);
1907 void buffer::list::copy_in(unsigned off
, unsigned len
, const list
& src
)
1909 if (last_p
.get_off() != off
)
1911 last_p
.copy_in(len
, src
);
1914 void buffer::list::append(char c
)
1916 // put what we can into the existing append_buffer.
1917 unsigned gap
= append_buffer
.unused_tail_length();
1919 // make a new append_buffer!
1920 append_buffer
= raw_combined::create(CEPH_BUFFER_APPEND_SIZE
, 0,
1922 append_buffer
.set_length(0); // unused, so far.
1924 append(append_buffer
, append_buffer
.append(c
) - 1, 1); // add segment to the list
1927 void buffer::list::append(const char *data
, unsigned len
)
1930 // put what we can into the existing append_buffer.
1931 unsigned gap
= append_buffer
.unused_tail_length();
1933 if (gap
> len
) gap
= len
;
1934 //cout << "append first char is " << data[0] << ", last char is " << data[len-1] << std::endl;
1935 append_buffer
.append(data
, gap
);
1936 append(append_buffer
, append_buffer
.length() - gap
, gap
); // add segment to the list
1943 // make a new append_buffer. fill out a complete page, factoring in the
1944 // raw_combined overhead.
1945 size_t need
= ROUND_UP_TO(len
, sizeof(size_t)) + sizeof(raw_combined
);
1946 size_t alen
= ROUND_UP_TO(need
, CEPH_BUFFER_ALLOC_UNIT
) -
1947 sizeof(raw_combined
);
1948 append_buffer
= raw_combined::create(alen
, 0, get_mempool());
1949 append_buffer
.set_length(0); // unused, so far.
1953 void buffer::list::append(const ptr
& bp
)
1959 void buffer::list::append(ptr
&& bp
)
1962 push_back(std::move(bp
));
1965 void buffer::list::append(const ptr
& bp
, unsigned off
, unsigned len
)
1967 assert(len
+off
<= bp
.length());
1968 if (!_buffers
.empty()) {
1969 ptr
&l
= _buffers
.back();
1970 if (l
.get_raw() == bp
.get_raw() &&
1971 l
.end() == bp
.start() + off
) {
1972 // yay contiguous with tail bp!
1973 l
.set_length(l
.length()+len
);
1978 // add new item to list
1979 push_back(ptr(bp
, off
, len
));
1982 void buffer::list::append(const list
& bl
)
1985 for (std::list
<ptr
>::const_iterator p
= bl
._buffers
.begin();
1986 p
!= bl
._buffers
.end();
1988 _buffers
.push_back(*p
);
1991 void buffer::list::append(std::istream
& in
)
1996 append(s
.c_str(), s
.length());
2002 void buffer::list::prepend_zero(unsigned len
)
2007 _buffers
.emplace_front(std::move(bp
));
2010 void buffer::list::append_zero(unsigned len
)
2014 append(std::move(bp
));
2021 const char& buffer::list::operator[](unsigned n
) const
2024 throw end_of_buffer();
2026 for (std::list
<ptr
>::const_iterator p
= _buffers
.begin();
2027 p
!= _buffers
.end();
2029 if (n
>= p
->length()) {
2039 * return a contiguous ptr to whole bufferlist contents.
2041 char *buffer::list::c_str()
2043 if (_buffers
.empty())
2044 return 0; // no buffers
2046 std::list
<ptr
>::const_iterator iter
= _buffers
.begin();
2049 if (iter
!= _buffers
.end())
2051 return _buffers
.front().c_str(); // good, we're already contiguous.
2054 string
buffer::list::to_str() const {
2056 s
.reserve(length());
2057 for (std::list
<ptr
>::const_iterator p
= _buffers
.begin();
2058 p
!= _buffers
.end();
2061 s
.append(p
->c_str(), p
->length());
2067 char *buffer::list::get_contiguous(unsigned orig_off
, unsigned len
)
2069 if (orig_off
+ len
> length())
2070 throw end_of_buffer();
2076 unsigned off
= orig_off
;
2077 std::list
<ptr
>::iterator curbuf
= _buffers
.begin();
2078 while (off
> 0 && off
>= curbuf
->length()) {
2079 off
-= curbuf
->length();
2083 if (off
+ len
> curbuf
->length()) {
2085 unsigned l
= off
+ len
;
2088 if (l
>= curbuf
->length())
2089 l
-= curbuf
->length();
2092 tmp
.append(*curbuf
);
2093 curbuf
= _buffers
.erase(curbuf
);
2095 } while (curbuf
!= _buffers
.end() && l
> 0);
2100 _buffers
.insert(curbuf
, tmp
._buffers
.front());
2101 return tmp
.c_str() + off
;
2104 last_p
= begin(); // we modified _buffers
2106 return curbuf
->c_str() + off
;
2109 void buffer::list::substr_of(const list
& other
, unsigned off
, unsigned len
)
2111 if (off
+ len
> other
.length())
2112 throw end_of_buffer();
2117 std::list
<ptr
>::const_iterator curbuf
= other
._buffers
.begin();
2119 off
>= curbuf
->length()) {
2121 //cout << "skipping over " << *curbuf << std::endl;
2122 off
-= (*curbuf
).length();
2125 assert(len
== 0 || curbuf
!= other
._buffers
.end());
2129 if (off
+ len
< curbuf
->length()) {
2130 //cout << "copying partial of " << *curbuf << std::endl;
2131 _buffers
.push_back( ptr( *curbuf
, off
, len
) );
2137 //cout << "copying end (all?) of " << *curbuf << std::endl;
2138 unsigned howmuch
= curbuf
->length() - off
;
2139 _buffers
.push_back( ptr( *curbuf
, off
, howmuch
) );
2148 void buffer::list::splice(unsigned off
, unsigned len
, list
*claim_by
/*, bufferlist& replace_with */)
2153 if (off
>= length())
2154 throw end_of_buffer();
2157 //cout << "splice off " << off << " len " << len << " ... mylen = " << length() << std::endl;
2160 std::list
<ptr
>::iterator curbuf
= _buffers
.begin();
2162 assert(curbuf
!= _buffers
.end());
2163 if (off
>= (*curbuf
).length()) {
2165 //cout << "off = " << off << " skipping over " << *curbuf << std::endl;
2166 off
-= (*curbuf
).length();
2169 // somewhere in this buffer!
2170 //cout << "off = " << off << " somewhere in " << *curbuf << std::endl;
2176 // add a reference to the front bit
2177 // insert it before curbuf (which we'll hose)
2178 //cout << "keeping front " << off << " of " << *curbuf << std::endl;
2179 _buffers
.insert( curbuf
, ptr( *curbuf
, 0, off
) );
2185 if (off
+ len
< (*curbuf
).length()) {
2186 //cout << "keeping end of " << *curbuf << ", losing first " << off+len << std::endl;
2188 claim_by
->append( *curbuf
, off
, len
);
2189 (*curbuf
).set_offset( off
+len
+ (*curbuf
).offset() ); // ignore beginning big
2190 (*curbuf
).set_length( (*curbuf
).length() - (len
+off
) );
2192 //cout << " now " << *curbuf << std::endl;
2196 // hose though the end
2197 unsigned howmuch
= (*curbuf
).length() - off
;
2198 //cout << "discarding " << howmuch << " of " << *curbuf << std::endl;
2200 claim_by
->append( *curbuf
, off
, howmuch
);
2201 _len
-= (*curbuf
).length();
2202 _buffers
.erase( curbuf
++ );
2207 // splice in *replace (implement me later?)
2209 last_p
= begin(); // just in case we were in the removed region.
2212 void buffer::list::write(int off
, int len
, std::ostream
& out
) const
2215 s
.substr_of(*this, off
, len
);
2216 for (std::list
<ptr
>::const_iterator it
= s
._buffers
.begin();
2217 it
!= s
._buffers
.end();
2220 out
.write(it
->c_str(), it
->length());
2221 /*iterator p(this, off);
2222 while (len > 0 && !p.end()) {
2223 int l = p.left_in_this_buf();
2226 out.write(p.c_str(), l);
2231 void buffer::list::encode_base64(buffer::list
& o
)
2233 bufferptr
bp(length() * 4 / 3 + 3);
2234 int l
= ceph_armor(bp
.c_str(), bp
.c_str() + bp
.length(), c_str(), c_str() + length());
2236 o
.push_back(std::move(bp
));
2239 void buffer::list::decode_base64(buffer::list
& e
)
2241 bufferptr
bp(4 + ((e
.length() * 3) / 4));
2242 int l
= ceph_unarmor(bp
.c_str(), bp
.c_str() + bp
.length(), e
.c_str(), e
.c_str() + e
.length());
2244 std::ostringstream oss
;
2245 oss
<< "decode_base64: decoding failed:\n";
2247 throw buffer::malformed_input(oss
.str().c_str());
2249 assert(l
<= (int)bp
.length());
2251 push_back(std::move(bp
));
2256 int buffer::list::read_file(const char *fn
, std::string
*error
)
2258 int fd
= TEMP_FAILURE_RETRY(::open(fn
, O_RDONLY
|O_CLOEXEC
));
2261 std::ostringstream oss
;
2262 oss
<< "can't open " << fn
<< ": " << cpp_strerror(err
);
2268 memset(&st
, 0, sizeof(st
));
2269 if (::fstat(fd
, &st
) < 0) {
2271 std::ostringstream oss
;
2272 oss
<< "bufferlist::read_file(" << fn
<< "): stat error: "
2273 << cpp_strerror(err
);
2275 VOID_TEMP_FAILURE_RETRY(::close(fd
));
2279 ssize_t ret
= read_fd(fd
, st
.st_size
);
2281 std::ostringstream oss
;
2282 oss
<< "bufferlist::read_file(" << fn
<< "): read error:"
2283 << cpp_strerror(ret
);
2285 VOID_TEMP_FAILURE_RETRY(::close(fd
));
2288 else if (ret
!= st
.st_size
) {
2290 // Perhaps the file changed between stat() and read()?
2291 std::ostringstream oss
;
2292 oss
<< "bufferlist::read_file(" << fn
<< "): warning: got premature EOF.";
2294 // not actually an error, but weird
2296 VOID_TEMP_FAILURE_RETRY(::close(fd
));
2300 ssize_t
buffer::list::read_fd(int fd
, size_t len
)
2302 // try zero copy first
2303 if (false && read_fd_zero_copy(fd
, len
) == 0) {
2304 // TODO fix callers to not require correct read size, which is not
2305 // available for raw_pipe until we actually inspect the data
2308 bufferptr bp
= buffer::create(len
);
2309 ssize_t ret
= safe_read(fd
, (void*)bp
.c_str(), len
);
2312 append(std::move(bp
));
2317 int buffer::list::read_fd_zero_copy(int fd
, size_t len
)
2319 #ifdef CEPH_HAVE_SPLICE
2321 append(buffer::create_zero_copy(len
, fd
, NULL
));
2322 } catch (buffer::error_code
&e
) {
2324 } catch (buffer::malformed_input
&e
) {
2333 int buffer::list::write_file(const char *fn
, int mode
)
2335 int fd
= TEMP_FAILURE_RETRY(::open(fn
, O_WRONLY
|O_CREAT
|O_TRUNC
|O_CLOEXEC
, mode
));
2338 cerr
<< "bufferlist::write_file(" << fn
<< "): failed to open file: "
2339 << cpp_strerror(err
) << std::endl
;
2342 int ret
= write_fd(fd
);
2344 cerr
<< "bufferlist::write_fd(" << fn
<< "): write_fd error: "
2345 << cpp_strerror(ret
) << std::endl
;
2346 VOID_TEMP_FAILURE_RETRY(::close(fd
));
2349 if (TEMP_FAILURE_RETRY(::close(fd
))) {
2351 cerr
<< "bufferlist::write_file(" << fn
<< "): close error: "
2352 << cpp_strerror(err
) << std::endl
;
2358 static int do_writev(int fd
, struct iovec
*vec
, uint64_t offset
, unsigned veclen
, unsigned bytes
)
2363 r
= ::pwritev(fd
, vec
, veclen
, offset
);
2365 r
= ::lseek64(fd
, offset
, SEEK_SET
);
2370 r
= ::writev(fd
, vec
, veclen
);
2380 if (bytes
== 0) break;
2383 if (vec
[0].iov_len
<= (size_t)r
) {
2384 // drain this whole item
2385 r
-= vec
[0].iov_len
;
2389 vec
[0].iov_base
= (char *)vec
[0].iov_base
+ r
;
2390 vec
[0].iov_len
-= r
;
2398 int buffer::list::write_fd(int fd
) const
2400 if (can_zero_copy())
2401 return write_fd_zero_copy(fd
);
2408 std::list
<ptr
>::const_iterator p
= _buffers
.begin();
2409 while (p
!= _buffers
.end()) {
2410 if (p
->length() > 0) {
2411 iov
[iovlen
].iov_base
= (void *)p
->c_str();
2412 iov
[iovlen
].iov_len
= p
->length();
2413 bytes
+= p
->length();
2418 if (iovlen
== IOV_MAX
||
2419 p
== _buffers
.end()) {
2424 wrote
= ::writev(fd
, start
, num
);
2431 if (wrote
< bytes
) {
2432 // partial write, recover!
2433 while ((size_t)wrote
>= start
[0].iov_len
) {
2434 wrote
-= start
[0].iov_len
;
2435 bytes
-= start
[0].iov_len
;
2440 start
[0].iov_len
-= wrote
;
2441 start
[0].iov_base
= (char *)start
[0].iov_base
+ wrote
;
2453 int buffer::list::write_fd(int fd
, uint64_t offset
) const
2457 std::list
<ptr
>::const_iterator p
= _buffers
.begin();
2458 uint64_t left_pbrs
= _buffers
.size();
2461 unsigned iovlen
= 0;
2462 uint64_t size
= MIN(left_pbrs
, IOV_MAX
);
2465 iov
[iovlen
].iov_base
= (void *)p
->c_str();
2466 iov
[iovlen
].iov_len
= p
->length();
2468 bytes
+= p
->length();
2473 int r
= do_writev(fd
, iov
, offset
, iovlen
, bytes
);
2481 int buffer::list::write_fd_zero_copy(int fd
) const
2483 if (!can_zero_copy())
2485 /* pass offset to each call to avoid races updating the fd seek
2486 * position, since the I/O may be non-blocking
2488 int64_t offset
= ::lseek(fd
, 0, SEEK_CUR
);
2489 int64_t *off_p
= &offset
;
2490 if (offset
< 0 && errno
!= ESPIPE
)
2492 if (errno
== ESPIPE
)
2494 for (std::list
<ptr
>::const_iterator it
= _buffers
.begin();
2495 it
!= _buffers
.end(); ++it
) {
2496 int r
= it
->zero_copy_to_fd(fd
, off_p
);
2500 offset
+= it
->length();
2505 __u32
buffer::list::crc32c(__u32 crc
) const
2507 for (std::list
<ptr
>::const_iterator it
= _buffers
.begin();
2508 it
!= _buffers
.end();
2511 raw
*r
= it
->get_raw();
2512 pair
<size_t, size_t> ofs(it
->offset(), it
->offset() + it
->length());
2513 pair
<uint32_t, uint32_t> ccrc
;
2514 if (r
->get_crc(ofs
, &ccrc
)) {
2515 if (ccrc
.first
== crc
) {
2518 if (buffer_track_crc
)
2519 buffer_cached_crc
++;
2521 /* If we have cached crc32c(buf, v) for initial value v,
2522 * we can convert this to a different initial value v' by:
2523 * crc32c(buf, v') = crc32c(buf, v) ^ adjustment
2524 * where adjustment = crc32c(0*len(buf), v ^ v')
2526 * http://crcutil.googlecode.com/files/crc-doc.1.0.pdf
2527 * note, u for our crc32c implementation is 0
2529 crc
= ccrc
.second
^ ceph_crc32c(ccrc
.first
^ crc
, NULL
, it
->length());
2530 if (buffer_track_crc
)
2531 buffer_cached_crc_adjusted
++;
2534 if (buffer_track_crc
)
2535 buffer_missed_crc
++;
2536 uint32_t base
= crc
;
2537 crc
= ceph_crc32c(crc
, (unsigned char*)it
->c_str(), it
->length());
2538 r
->set_crc(ofs
, make_pair(base
, crc
));
2545 void buffer::list::invalidate_crc()
2547 for (std::list
<ptr
>::const_iterator p
= _buffers
.begin(); p
!= _buffers
.end(); ++p
) {
2548 raw
*r
= p
->get_raw();
2550 r
->invalidate_crc();
2556 * Binary write all contents to a C++ stream
2558 void buffer::list::write_stream(std::ostream
&out
) const
2560 for (std::list
<ptr
>::const_iterator p
= _buffers
.begin(); p
!= _buffers
.end(); ++p
) {
2561 if (p
->length() > 0) {
2562 out
.write(p
->c_str(), p
->length());
2568 void buffer::list::hexdump(std::ostream
&out
, bool trailing_newline
) const
2573 std::ios_base::fmtflags original_flags
= out
.flags();
2575 // do our best to match the output of hexdump -C, for better
2578 out
.setf(std::ios::right
);
2582 bool was_zeros
= false, did_star
= false;
2583 for (unsigned o
=0; o
<length(); o
+= per
) {
2584 bool row_is_zeros
= false;
2585 if (o
+ per
< length()) {
2586 row_is_zeros
= true;
2587 for (unsigned i
=0; i
<per
&& o
+i
<length(); i
++) {
2589 row_is_zeros
= false;
2608 out
<< std::hex
<< std::setw(8) << o
<< " ";
2611 for (i
=0; i
<per
&& o
+i
<length(); i
++) {
2614 out
<< " " << std::setw(2) << ((unsigned)(*this)[o
+i
] & 0xff);
2616 for (; i
<per
; i
++) {
2623 for (i
=0; i
<per
&& o
+i
<length(); i
++) {
2624 char c
= (*this)[o
+i
];
2625 if (isupper(c
) || islower(c
) || isdigit(c
) || c
== ' ' || ispunct(c
))
2630 out
<< '|' << std::dec
;
2632 if (trailing_newline
) {
2633 out
<< "\n" << std::hex
<< std::setw(8) << length();
2637 out
.flags(original_flags
);
2641 buffer::list
buffer::list::static_from_mem(char* c
, size_t l
) {
2643 bl
.push_back(ptr(create_static(l
, c
)));
2647 buffer::list
buffer::list::static_from_cstring(char* c
) {
2648 return static_from_mem(c
, std::strlen(c
));
2651 buffer::list
buffer::list::static_from_string(string
& s
) {
2652 // C++14 just has string::data return a char* from a non-const
2654 return static_from_mem(const_cast<char*>(s
.data()), s
.length());
2655 // But the way buffer::list mostly doesn't work in a sane way with
2656 // const makes me generally sad.
2659 std::ostream
& buffer::operator<<(std::ostream
& out
, const buffer::raw
&r
) {
2660 return out
<< "buffer::raw(" << (void*)r
.data
<< " len " << r
.len
<< " nref " << r
.nref
.load() << ")";
2663 std::ostream
& buffer::operator<<(std::ostream
& out
, const buffer::ptr
& bp
) {
2665 out
<< "buffer::ptr(" << bp
.offset() << "~" << bp
.length()
2666 << " " << (void*)bp
.c_str()
2667 << " in raw " << (void*)bp
.raw_c_str()
2668 << " len " << bp
.raw_length()
2669 << " nref " << bp
.raw_nref() << ")";
2671 out
<< "buffer:ptr(" << bp
.offset() << "~" << bp
.length() << " no raw)";
2675 std::ostream
& buffer::operator<<(std::ostream
& out
, const buffer::list
& bl
) {
2676 out
<< "buffer::list(len=" << bl
.length() << "," << std::endl
;
2678 std::list
<buffer::ptr
>::const_iterator it
= bl
.buffers().begin();
2679 while (it
!= bl
.buffers().end()) {
2681 if (++it
== bl
.buffers().end()) break;
2682 out
<< "," << std::endl
;
2684 out
<< std::endl
<< ")";
2688 std::ostream
& buffer::operator<<(std::ostream
& out
, const buffer::error
& e
)
2690 return out
<< e
.what();
2693 MEMPOOL_DEFINE_OBJECT_FACTORY(buffer::raw_malloc
, buffer_raw_malloc
,
2695 MEMPOOL_DEFINE_OBJECT_FACTORY(buffer::raw_mmap_pages
, buffer_raw_mmap_pagse
,
2697 MEMPOOL_DEFINE_OBJECT_FACTORY(buffer::raw_posix_aligned
,
2698 buffer_raw_posix_aligned
, buffer_meta
);
2699 #ifdef CEPH_HAVE_SPLICE
2700 MEMPOOL_DEFINE_OBJECT_FACTORY(buffer::raw_pipe
, buffer_raw_pipe
, buffer_meta
);
2702 MEMPOOL_DEFINE_OBJECT_FACTORY(buffer::raw_char
, buffer_raw_char
, buffer_meta
);
2703 MEMPOOL_DEFINE_OBJECT_FACTORY(buffer::raw_claimed_char
, buffer_raw_claimed_char
,
2705 MEMPOOL_DEFINE_OBJECT_FACTORY(buffer::raw_unshareable
, buffer_raw_unshareable
,
2707 MEMPOOL_DEFINE_OBJECT_FACTORY(buffer::raw_static
, buffer_raw_static
,