]> git.proxmox.com Git - ceph.git/blob - ceph/src/common/buffer.cc
import quincy beta 17.1.0
[ceph.git] / ceph / src / common / buffer.cc
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3 /*
4 * Ceph - scalable distributed file system
5 *
6 * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
7 *
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
12 *
13 */
14
15 #include <atomic>
16 #include <cstring>
17 #include <errno.h>
18 #include <limits.h>
19
20 #include <sys/uio.h>
21
22 #include "include/ceph_assert.h"
23 #include "include/types.h"
24 #include "include/buffer_raw.h"
25 #include "include/compat.h"
26 #include "include/mempool.h"
27 #include "armor.h"
28 #include "common/environment.h"
29 #include "common/errno.h"
30 #include "common/error_code.h"
31 #include "common/safe_io.h"
32 #include "common/strtol.h"
33 #include "common/likely.h"
34 #include "common/valgrind.h"
35 #include "common/deleter.h"
36 #include "common/error_code.h"
37 #include "include/intarith.h"
38 #include "include/spinlock.h"
39 #include "include/scope_guard.h"
40
41 using std::cerr;
42 using std::make_pair;
43 using std::pair;
44 using std::string;
45
46 using namespace ceph;
47
48 #define CEPH_BUFFER_ALLOC_UNIT 4096u
49 #define CEPH_BUFFER_APPEND_SIZE (CEPH_BUFFER_ALLOC_UNIT - sizeof(raw_combined))
50
51 // 256K is the maximum "small" object size in tcmalloc above which allocations come from
52 // the central heap. For now let's keep this below that threshold.
53 #define CEPH_BUFFER_ALLOC_UNIT_MAX std::size_t { 256*1024 }
54
55 #ifdef BUFFER_DEBUG
56 static ceph::spinlock debug_lock;
57 # define bdout { std::lock_guard<ceph::spinlock> lg(debug_lock); std::cout
58 # define bendl std::endl; }
59 #else
60 # define bdout if (0) { std::cout
61 # define bendl std::endl; }
62 #endif
63
64 static ceph::atomic<unsigned> buffer_cached_crc { 0 };
65 static ceph::atomic<unsigned> buffer_cached_crc_adjusted { 0 };
66 static ceph::atomic<unsigned> buffer_missed_crc { 0 };
67
68 static bool buffer_track_crc = get_env_bool("CEPH_BUFFER_TRACK");
69
70 void buffer::track_cached_crc(bool b) {
71 buffer_track_crc = b;
72 }
73 int buffer::get_cached_crc() {
74 return buffer_cached_crc;
75 }
76 int buffer::get_cached_crc_adjusted() {
77 return buffer_cached_crc_adjusted;
78 }
79
80 int buffer::get_missed_crc() {
81 return buffer_missed_crc;
82 }
83
84 /*
85 * raw_combined is always placed within a single allocation along
86 * with the data buffer. the data goes at the beginning, and
87 * raw_combined at the end.
88 */
89 class buffer::raw_combined : public buffer::raw {
90 size_t alignment;
91 public:
92 raw_combined(char *dataptr, unsigned l, unsigned align,
93 int mempool)
94 : raw(dataptr, l, mempool),
95 alignment(align) {
96 }
97 raw* clone_empty() override {
98 return create(len, alignment).release();
99 }
100
101 static ceph::unique_leakable_ptr<buffer::raw>
102 create(unsigned len,
103 unsigned align,
104 int mempool = mempool::mempool_buffer_anon)
105 {
106 // posix_memalign() requires a multiple of sizeof(void *)
107 align = std::max<unsigned>(align, sizeof(void *));
108 size_t rawlen = round_up_to(sizeof(buffer::raw_combined),
109 alignof(buffer::raw_combined));
110 size_t datalen = round_up_to(len, alignof(buffer::raw_combined));
111
112 #ifdef DARWIN
113 char *ptr = (char *) valloc(rawlen + datalen);
114 #else
115 char *ptr = 0;
116 int r = ::posix_memalign((void**)(void*)&ptr, align, rawlen + datalen);
117 if (r)
118 throw bad_alloc();
119 #endif /* DARWIN */
120 if (!ptr)
121 throw bad_alloc();
122
123 // actual data first, since it has presumably larger alignment restriction
124 // then put the raw_combined at the end
125 return ceph::unique_leakable_ptr<buffer::raw>(
126 new (ptr + datalen) raw_combined(ptr, len, align, mempool));
127 }
128
129 static void operator delete(void *ptr) {
130 raw_combined *raw = (raw_combined *)ptr;
131 aligned_free((void *)raw->data);
132 }
133 };
134
135 class buffer::raw_malloc : public buffer::raw {
136 public:
137 MEMPOOL_CLASS_HELPERS();
138
139 explicit raw_malloc(unsigned l) : raw(l) {
140 if (len) {
141 data = (char *)malloc(len);
142 if (!data)
143 throw bad_alloc();
144 } else {
145 data = 0;
146 }
147 bdout << "raw_malloc " << this << " alloc " << (void *)data << " " << l << bendl;
148 }
149 raw_malloc(unsigned l, char *b) : raw(b, l) {
150 bdout << "raw_malloc " << this << " alloc " << (void *)data << " " << l << bendl;
151 }
152 ~raw_malloc() override {
153 free(data);
154 bdout << "raw_malloc " << this << " free " << (void *)data << " " << bendl;
155 }
156 raw* clone_empty() override {
157 return new raw_malloc(len);
158 }
159 };
160
161 #ifndef __CYGWIN__
162 class buffer::raw_posix_aligned : public buffer::raw {
163 unsigned align;
164 public:
165 MEMPOOL_CLASS_HELPERS();
166
167 raw_posix_aligned(unsigned l, unsigned _align) : raw(l) {
168 // posix_memalign() requires a multiple of sizeof(void *)
169 align = std::max<unsigned>(_align, sizeof(void *));
170 #ifdef DARWIN
171 data = (char *) valloc(len);
172 #else
173 int r = ::posix_memalign((void**)(void*)&data, align, len);
174 if (r)
175 throw bad_alloc();
176 #endif /* DARWIN */
177 if (!data)
178 throw bad_alloc();
179 bdout << "raw_posix_aligned " << this << " alloc " << (void *)data
180 << " l=" << l << ", align=" << align << bendl;
181 }
182 ~raw_posix_aligned() override {
183 aligned_free(data);
184 bdout << "raw_posix_aligned " << this << " free " << (void *)data << bendl;
185 }
186 raw* clone_empty() override {
187 return new raw_posix_aligned(len, align);
188 }
189 };
190 #endif
191
192 #ifdef __CYGWIN__
193 class buffer::raw_hack_aligned : public buffer::raw {
194 unsigned align;
195 char *realdata;
196 public:
197 raw_hack_aligned(unsigned l, unsigned _align) : raw(l) {
198 align = _align;
199 realdata = new char[len+align-1];
200 unsigned off = ((uintptr_t)realdata) & (align-1);
201 if (off)
202 data = realdata + align - off;
203 else
204 data = realdata;
205 //cout << "hack aligned " << (unsigned)data
206 //<< " in raw " << (unsigned)realdata
207 //<< " off " << off << std::endl;
208 ceph_assert(((uintptr_t)data & (align-1)) == 0);
209 }
210 ~raw_hack_aligned() {
211 delete[] realdata;
212 }
213 raw* clone_empty() {
214 return new raw_hack_aligned(len, align);
215 }
216 };
217 #endif
218
219 /*
220 * primitive buffer types
221 */
222 class buffer::raw_char : public buffer::raw {
223 public:
224 MEMPOOL_CLASS_HELPERS();
225
226 explicit raw_char(unsigned l) : raw(l) {
227 if (len)
228 data = new char[len];
229 else
230 data = 0;
231 bdout << "raw_char " << this << " alloc " << (void *)data << " " << l << bendl;
232 }
233 raw_char(unsigned l, char *b) : raw(b, l) {
234 bdout << "raw_char " << this << " alloc " << (void *)data << " " << l << bendl;
235 }
236 ~raw_char() override {
237 delete[] data;
238 bdout << "raw_char " << this << " free " << (void *)data << bendl;
239 }
240 raw* clone_empty() override {
241 return new raw_char(len);
242 }
243 };
244
245 class buffer::raw_claimed_char : public buffer::raw {
246 public:
247 MEMPOOL_CLASS_HELPERS();
248
249 explicit raw_claimed_char(unsigned l, char *b) : raw(b, l) {
250 bdout << "raw_claimed_char " << this << " alloc " << (void *)data
251 << " " << l << bendl;
252 }
253 ~raw_claimed_char() override {
254 bdout << "raw_claimed_char " << this << " free " << (void *)data
255 << bendl;
256 }
257 raw* clone_empty() override {
258 return new raw_char(len);
259 }
260 };
261
262 class buffer::raw_static : public buffer::raw {
263 public:
264 MEMPOOL_CLASS_HELPERS();
265
266 raw_static(const char *d, unsigned l) : raw((char*)d, l) { }
267 ~raw_static() override {}
268 raw* clone_empty() override {
269 return new buffer::raw_char(len);
270 }
271 };
272
273 class buffer::raw_claim_buffer : public buffer::raw {
274 deleter del;
275 public:
276 raw_claim_buffer(const char *b, unsigned l, deleter d)
277 : raw((char*)b, l), del(std::move(d)) { }
278 ~raw_claim_buffer() override {}
279 raw* clone_empty() override {
280 return new buffer::raw_char(len);
281 }
282 };
283
284 ceph::unique_leakable_ptr<buffer::raw> buffer::copy(const char *c, unsigned len) {
285 auto r = buffer::create_aligned(len, sizeof(size_t));
286 memcpy(r->get_data(), c, len);
287 return r;
288 }
289
290 ceph::unique_leakable_ptr<buffer::raw> buffer::create(unsigned len) {
291 return buffer::create_aligned(len, sizeof(size_t));
292 }
293 ceph::unique_leakable_ptr<buffer::raw> buffer::create(unsigned len, char c) {
294 auto ret = buffer::create_aligned(len, sizeof(size_t));
295 memset(ret->get_data(), c, len);
296 return ret;
297 }
298 ceph::unique_leakable_ptr<buffer::raw>
299 buffer::create_in_mempool(unsigned len, int mempool) {
300 return buffer::create_aligned_in_mempool(len, sizeof(size_t), mempool);
301 }
302 ceph::unique_leakable_ptr<buffer::raw>
303 buffer::claim_char(unsigned len, char *buf) {
304 return ceph::unique_leakable_ptr<buffer::raw>(
305 new raw_claimed_char(len, buf));
306 }
307 ceph::unique_leakable_ptr<buffer::raw> buffer::create_malloc(unsigned len) {
308 return ceph::unique_leakable_ptr<buffer::raw>(new raw_malloc(len));
309 }
310 ceph::unique_leakable_ptr<buffer::raw>
311 buffer::claim_malloc(unsigned len, char *buf) {
312 return ceph::unique_leakable_ptr<buffer::raw>(new raw_malloc(len, buf));
313 }
314 ceph::unique_leakable_ptr<buffer::raw>
315 buffer::create_static(unsigned len, char *buf) {
316 return ceph::unique_leakable_ptr<buffer::raw>(new raw_static(buf, len));
317 }
318 ceph::unique_leakable_ptr<buffer::raw>
319 buffer::claim_buffer(unsigned len, char *buf, deleter del) {
320 return ceph::unique_leakable_ptr<buffer::raw>(
321 new raw_claim_buffer(buf, len, std::move(del)));
322 }
323
324 ceph::unique_leakable_ptr<buffer::raw> buffer::create_aligned_in_mempool(
325 unsigned len, unsigned align, int mempool)
326 {
327 // If alignment is a page multiple, use a separate buffer::raw to
328 // avoid fragmenting the heap.
329 //
330 // Somewhat unexpectedly, I see consistently better performance
331 // from raw_combined than from raw even when the allocation size is
332 // a page multiple (but alignment is not).
333 //
334 // I also see better performance from a separate buffer::raw once the
335 // size passes 8KB.
336 if ((align & ~CEPH_PAGE_MASK) == 0 ||
337 len >= CEPH_PAGE_SIZE * 2) {
338 #ifndef __CYGWIN__
339 return ceph::unique_leakable_ptr<buffer::raw>(new raw_posix_aligned(len, align));
340 #else
341 return ceph::unique_leakable_ptr<buffer::raw>(new raw_hack_aligned(len, align));
342 #endif
343 }
344 return raw_combined::create(len, align, mempool);
345 }
346 ceph::unique_leakable_ptr<buffer::raw> buffer::create_aligned(
347 unsigned len, unsigned align) {
348 return create_aligned_in_mempool(len, align,
349 mempool::mempool_buffer_anon);
350 }
351
352 ceph::unique_leakable_ptr<buffer::raw> buffer::create_page_aligned(unsigned len) {
353 return create_aligned(len, CEPH_PAGE_SIZE);
354 }
355 ceph::unique_leakable_ptr<buffer::raw> buffer::create_small_page_aligned(unsigned len) {
356 if (len < CEPH_PAGE_SIZE) {
357 return create_aligned(len, CEPH_BUFFER_ALLOC_UNIT);
358 } else {
359 return create_aligned(len, CEPH_PAGE_SIZE);
360 }
361 }
362
363 buffer::ptr::ptr(ceph::unique_leakable_ptr<raw> r)
364 : _raw(r.release()),
365 _off(0),
366 _len(_raw->get_len())
367 {
368 _raw->nref.store(1, std::memory_order_release);
369 bdout << "ptr " << this << " get " << _raw << bendl;
370 }
371 buffer::ptr::ptr(unsigned l) : _off(0), _len(l)
372 {
373 _raw = buffer::create(l).release();
374 _raw->nref.store(1, std::memory_order_release);
375 bdout << "ptr " << this << " get " << _raw << bendl;
376 }
377 buffer::ptr::ptr(const char *d, unsigned l) : _off(0), _len(l) // ditto.
378 {
379 _raw = buffer::copy(d, l).release();
380 _raw->nref.store(1, std::memory_order_release);
381 bdout << "ptr " << this << " get " << _raw << bendl;
382 }
383 buffer::ptr::ptr(const ptr& p) : _raw(p._raw), _off(p._off), _len(p._len)
384 {
385 if (_raw) {
386 _raw->nref++;
387 bdout << "ptr " << this << " get " << _raw << bendl;
388 }
389 }
390 buffer::ptr::ptr(ptr&& p) noexcept : _raw(p._raw), _off(p._off), _len(p._len)
391 {
392 p._raw = nullptr;
393 p._off = p._len = 0;
394 }
395 buffer::ptr::ptr(const ptr& p, unsigned o, unsigned l)
396 : _raw(p._raw), _off(p._off + o), _len(l)
397 {
398 ceph_assert(o+l <= p._len);
399 ceph_assert(_raw);
400 _raw->nref++;
401 bdout << "ptr " << this << " get " << _raw << bendl;
402 }
403 buffer::ptr::ptr(const ptr& p, ceph::unique_leakable_ptr<raw> r)
404 : _raw(r.release()),
405 _off(p._off),
406 _len(p._len)
407 {
408 _raw->nref.store(1, std::memory_order_release);
409 bdout << "ptr " << this << " get " << _raw << bendl;
410 }
411 buffer::ptr& buffer::ptr::operator= (const ptr& p)
412 {
413 if (p._raw) {
414 p._raw->nref++;
415 bdout << "ptr " << this << " get " << _raw << bendl;
416 }
417 buffer::raw *raw = p._raw;
418 release();
419 if (raw) {
420 _raw = raw;
421 _off = p._off;
422 _len = p._len;
423 } else {
424 _off = _len = 0;
425 }
426 return *this;
427 }
428 buffer::ptr& buffer::ptr::operator= (ptr&& p) noexcept
429 {
430 release();
431 buffer::raw *raw = p._raw;
432 if (raw) {
433 _raw = raw;
434 _off = p._off;
435 _len = p._len;
436 p._raw = nullptr;
437 p._off = p._len = 0;
438 } else {
439 _off = _len = 0;
440 }
441 return *this;
442 }
443
444 ceph::unique_leakable_ptr<buffer::raw> buffer::ptr::clone()
445 {
446 return _raw->clone();
447 }
448
449 void buffer::ptr::swap(ptr& other) noexcept
450 {
451 raw *r = _raw;
452 unsigned o = _off;
453 unsigned l = _len;
454 _raw = other._raw;
455 _off = other._off;
456 _len = other._len;
457 other._raw = r;
458 other._off = o;
459 other._len = l;
460 }
461
462 void buffer::ptr::release()
463 {
464 // BE CAREFUL: this is called also for hypercombined ptr_node. After
465 // freeing underlying raw, `*this` can become inaccessible as well!
466 //
467 // cache the pointer to avoid unncecessary reloads and repeated
468 // checks.
469 if (auto* const cached_raw = std::exchange(_raw, nullptr);
470 cached_raw) {
471 bdout << "ptr " << this << " release " << cached_raw << bendl;
472 // optimize the common case where a particular `buffer::raw` has
473 // only a single reference. Altogether with initializing `nref` of
474 // freshly fabricated one with `1` through the std::atomic's ctor
475 // (which doesn't impose a memory barrier on the strongly-ordered
476 // x86), this allows to avoid all atomical operations in such case.
477 const bool last_one = \
478 (1 == cached_raw->nref.load(std::memory_order_acquire));
479 if (likely(last_one) || --cached_raw->nref == 0) {
480 bdout << "deleting raw " << static_cast<void*>(cached_raw)
481 << " len " << cached_raw->get_len() << bendl;
482 ANNOTATE_HAPPENS_AFTER(&cached_raw->nref);
483 ANNOTATE_HAPPENS_BEFORE_FORGET_ALL(&cached_raw->nref);
484 delete cached_raw; // dealloc old (if any)
485 } else {
486 ANNOTATE_HAPPENS_BEFORE(&cached_raw->nref);
487 }
488 }
489 }
490
491 int buffer::ptr::get_mempool() const {
492 if (_raw) {
493 return _raw->mempool;
494 }
495 return mempool::mempool_buffer_anon;
496 }
497
498 void buffer::ptr::reassign_to_mempool(int pool) {
499 if (_raw) {
500 _raw->reassign_to_mempool(pool);
501 }
502 }
503 void buffer::ptr::try_assign_to_mempool(int pool) {
504 if (_raw) {
505 _raw->try_assign_to_mempool(pool);
506 }
507 }
508
509 const char *buffer::ptr::c_str() const {
510 ceph_assert(_raw);
511 return _raw->get_data() + _off;
512 }
513 char *buffer::ptr::c_str() {
514 ceph_assert(_raw);
515 return _raw->get_data() + _off;
516 }
517 const char *buffer::ptr::end_c_str() const {
518 ceph_assert(_raw);
519 return _raw->get_data() + _off + _len;
520 }
521 char *buffer::ptr::end_c_str() {
522 ceph_assert(_raw);
523 return _raw->get_data() + _off + _len;
524 }
525
526 unsigned buffer::ptr::unused_tail_length() const
527 {
528 return _raw ? _raw->get_len() - (_off + _len) : 0;
529 }
530 const char& buffer::ptr::operator[](unsigned n) const
531 {
532 ceph_assert(_raw);
533 ceph_assert(n < _len);
534 return _raw->get_data()[_off + n];
535 }
536 char& buffer::ptr::operator[](unsigned n)
537 {
538 ceph_assert(_raw);
539 ceph_assert(n < _len);
540 return _raw->get_data()[_off + n];
541 }
542
543 const char *buffer::ptr::raw_c_str() const { ceph_assert(_raw); return _raw->get_data(); }
544 unsigned buffer::ptr::raw_length() const { ceph_assert(_raw); return _raw->get_len(); }
545 int buffer::ptr::raw_nref() const { ceph_assert(_raw); return _raw->nref; }
546
547 void buffer::ptr::copy_out(unsigned o, unsigned l, char *dest) const {
548 ceph_assert(_raw);
549 if (o+l > _len)
550 throw end_of_buffer();
551 char* src = _raw->get_data() + _off + o;
552 maybe_inline_memcpy(dest, src, l, 8);
553 }
554
555 unsigned buffer::ptr::wasted() const
556 {
557 return _raw->get_len() - _len;
558 }
559
560 int buffer::ptr::cmp(const ptr& o) const
561 {
562 int l = _len < o._len ? _len : o._len;
563 if (l) {
564 int r = memcmp(c_str(), o.c_str(), l);
565 if (r)
566 return r;
567 }
568 if (_len < o._len)
569 return -1;
570 if (_len > o._len)
571 return 1;
572 return 0;
573 }
574
575 bool buffer::ptr::is_zero() const
576 {
577 return mem_is_zero(c_str(), _len);
578 }
579
580 unsigned buffer::ptr::append(char c)
581 {
582 ceph_assert(_raw);
583 ceph_assert(1 <= unused_tail_length());
584 char* ptr = _raw->get_data() + _off + _len;
585 *ptr = c;
586 _len++;
587 return _len + _off;
588 }
589
590 unsigned buffer::ptr::append(const char *p, unsigned l)
591 {
592 ceph_assert(_raw);
593 ceph_assert(l <= unused_tail_length());
594 char* c = _raw->get_data() + _off + _len;
595 maybe_inline_memcpy(c, p, l, 32);
596 _len += l;
597 return _len + _off;
598 }
599
600 unsigned buffer::ptr::append_zeros(unsigned l)
601 {
602 ceph_assert(_raw);
603 ceph_assert(l <= unused_tail_length());
604 char* c = _raw->get_data() + _off + _len;
605 // FIPS zeroization audit 20191115: this memset is not security related.
606 memset(c, 0, l);
607 _len += l;
608 return _len + _off;
609 }
610
611 void buffer::ptr::copy_in(unsigned o, unsigned l, const char *src, bool crc_reset)
612 {
613 ceph_assert(_raw);
614 ceph_assert(o <= _len);
615 ceph_assert(o+l <= _len);
616 char* dest = _raw->get_data() + _off + o;
617 if (crc_reset)
618 _raw->invalidate_crc();
619 maybe_inline_memcpy(dest, src, l, 64);
620 }
621
622 void buffer::ptr::zero(bool crc_reset)
623 {
624 if (crc_reset)
625 _raw->invalidate_crc();
626 // FIPS zeroization audit 20191115: this memset is not security related.
627 memset(c_str(), 0, _len);
628 }
629
630 void buffer::ptr::zero(unsigned o, unsigned l, bool crc_reset)
631 {
632 ceph_assert(o+l <= _len);
633 if (crc_reset)
634 _raw->invalidate_crc();
635 // FIPS zeroization audit 20191115: this memset is not security related.
636 memset(c_str()+o, 0, l);
637 }
638
639 template<bool B>
640 buffer::ptr::iterator_impl<B>& buffer::ptr::iterator_impl<B>::operator +=(size_t len) {
641 pos += len;
642 if (pos > end_ptr)
643 throw end_of_buffer();
644 return *this;
645 }
646
647 template buffer::ptr::iterator_impl<false>&
648 buffer::ptr::iterator_impl<false>::operator +=(size_t len);
649 template buffer::ptr::iterator_impl<true>&
650 buffer::ptr::iterator_impl<true>::operator +=(size_t len);
651
652 // -- buffer::list::iterator --
653 /*
654 buffer::list::iterator operator=(const buffer::list::iterator& other)
655 {
656 if (this != &other) {
657 bl = other.bl;
658 ls = other.ls;
659 off = other.off;
660 p = other.p;
661 p_off = other.p_off;
662 }
663 return *this;
664 }*/
665
666 template<bool is_const>
667 buffer::list::iterator_impl<is_const>::iterator_impl(bl_t *l, unsigned o)
668 : bl(l), ls(&bl->_buffers), p(ls->begin()), off(0), p_off(0)
669 {
670 *this += o;
671 }
672
673 template<bool is_const>
674 buffer::list::iterator_impl<is_const>::iterator_impl(const buffer::list::iterator& i)
675 : iterator_impl<is_const>(i.bl, i.off, i.p, i.p_off) {}
676
677 template<bool is_const>
678 auto buffer::list::iterator_impl<is_const>::operator +=(unsigned o)
679 -> iterator_impl&
680 {
681 //cout << this << " advance " << o << " from " << off
682 // << " (p_off " << p_off << " in " << p->length() << ")"
683 // << std::endl;
684
685 p_off +=o;
686 while (p != ls->end()) {
687 if (p_off >= p->length()) {
688 // skip this buffer
689 p_off -= p->length();
690 p++;
691 } else {
692 // somewhere in this buffer!
693 break;
694 }
695 }
696 if (p == ls->end() && p_off) {
697 throw end_of_buffer();
698 }
699 off += o;
700 return *this;
701 }
702
703 template<bool is_const>
704 void buffer::list::iterator_impl<is_const>::seek(unsigned o)
705 {
706 p = ls->begin();
707 off = p_off = 0;
708 *this += o;
709 }
710
711 template<bool is_const>
712 char buffer::list::iterator_impl<is_const>::operator*() const
713 {
714 if (p == ls->end())
715 throw end_of_buffer();
716 return (*p)[p_off];
717 }
718
719 template<bool is_const>
720 buffer::list::iterator_impl<is_const>&
721 buffer::list::iterator_impl<is_const>::operator++()
722 {
723 if (p == ls->end())
724 throw end_of_buffer();
725 *this += 1;
726 return *this;
727 }
728
729 template<bool is_const>
730 buffer::ptr buffer::list::iterator_impl<is_const>::get_current_ptr() const
731 {
732 if (p == ls->end())
733 throw end_of_buffer();
734 return ptr(*p, p_off, p->length() - p_off);
735 }
736
737 template<bool is_const>
738 bool buffer::list::iterator_impl<is_const>::is_pointing_same_raw(
739 const ptr& other) const
740 {
741 if (p == ls->end())
742 throw end_of_buffer();
743 return p->_raw == other._raw;
744 }
745
746 // copy data out.
747 // note that these all _append_ to dest!
748 template<bool is_const>
749 void buffer::list::iterator_impl<is_const>::copy(unsigned len, char *dest)
750 {
751 if (p == ls->end()) seek(off);
752 while (len > 0) {
753 if (p == ls->end())
754 throw end_of_buffer();
755
756 unsigned howmuch = p->length() - p_off;
757 if (len < howmuch) howmuch = len;
758 p->copy_out(p_off, howmuch, dest);
759 dest += howmuch;
760
761 len -= howmuch;
762 *this += howmuch;
763 }
764 }
765
766 template<bool is_const>
767 void buffer::list::iterator_impl<is_const>::copy(unsigned len, ptr &dest)
768 {
769 copy_deep(len, dest);
770 }
771
772 template<bool is_const>
773 void buffer::list::iterator_impl<is_const>::copy_deep(unsigned len, ptr &dest)
774 {
775 if (!len) {
776 return;
777 }
778 if (p == ls->end())
779 throw end_of_buffer();
780 dest = create(len);
781 copy(len, dest.c_str());
782 }
783 template<bool is_const>
784 void buffer::list::iterator_impl<is_const>::copy_shallow(unsigned len,
785 ptr &dest)
786 {
787 if (!len) {
788 return;
789 }
790 if (p == ls->end())
791 throw end_of_buffer();
792 unsigned howmuch = p->length() - p_off;
793 if (howmuch < len) {
794 dest = create(len);
795 copy(len, dest.c_str());
796 } else {
797 dest = ptr(*p, p_off, len);
798 *this += len;
799 }
800 }
801
802 template<bool is_const>
803 void buffer::list::iterator_impl<is_const>::copy(unsigned len, list &dest)
804 {
805 if (p == ls->end())
806 seek(off);
807 while (len > 0) {
808 if (p == ls->end())
809 throw end_of_buffer();
810
811 unsigned howmuch = p->length() - p_off;
812 if (len < howmuch)
813 howmuch = len;
814 dest.append(*p, p_off, howmuch);
815
816 len -= howmuch;
817 *this += howmuch;
818 }
819 }
820
821 template<bool is_const>
822 void buffer::list::iterator_impl<is_const>::copy(unsigned len, std::string &dest)
823 {
824 if (p == ls->end())
825 seek(off);
826 while (len > 0) {
827 if (p == ls->end())
828 throw end_of_buffer();
829
830 unsigned howmuch = p->length() - p_off;
831 const char *c_str = p->c_str();
832 if (len < howmuch)
833 howmuch = len;
834 dest.append(c_str + p_off, howmuch);
835
836 len -= howmuch;
837 *this += howmuch;
838 }
839 }
840
841 template<bool is_const>
842 void buffer::list::iterator_impl<is_const>::copy_all(list &dest)
843 {
844 if (p == ls->end())
845 seek(off);
846 while (1) {
847 if (p == ls->end())
848 return;
849
850 unsigned howmuch = p->length() - p_off;
851 const char *c_str = p->c_str();
852 dest.append(c_str + p_off, howmuch);
853
854 *this += howmuch;
855 }
856 }
857
858 template<bool is_const>
859 size_t buffer::list::iterator_impl<is_const>::get_ptr_and_advance(
860 size_t want, const char **data)
861 {
862 if (p == ls->end()) {
863 seek(off);
864 if (p == ls->end()) {
865 return 0;
866 }
867 }
868 *data = p->c_str() + p_off;
869 size_t l = std::min<size_t>(p->length() - p_off, want);
870 p_off += l;
871 if (p_off == p->length()) {
872 ++p;
873 p_off = 0;
874 }
875 off += l;
876 return l;
877 }
878
879 template<bool is_const>
880 uint32_t buffer::list::iterator_impl<is_const>::crc32c(
881 size_t length, uint32_t crc)
882 {
883 length = std::min<size_t>(length, get_remaining());
884 while (length > 0) {
885 const char *p;
886 size_t l = get_ptr_and_advance(length, &p);
887 crc = ceph_crc32c(crc, (unsigned char*)p, l);
888 length -= l;
889 }
890 return crc;
891 }
892
893 // explicitly instantiate only the iterator types we need, so we can hide the
894 // details in this compilation unit without introducing unnecessary link time
895 // dependencies.
896 template class buffer::list::iterator_impl<true>;
897 template class buffer::list::iterator_impl<false>;
898
899 buffer::list::iterator::iterator(bl_t *l, unsigned o)
900 : iterator_impl(l, o)
901 {}
902
903 buffer::list::iterator::iterator(bl_t *l, unsigned o, list_iter_t ip, unsigned po)
904 : iterator_impl(l, o, ip, po)
905 {}
906
907 // copy data in
908 void buffer::list::iterator::copy_in(unsigned len, const char *src, bool crc_reset)
909 {
910 // copy
911 if (p == ls->end())
912 seek(off);
913 while (len > 0) {
914 if (p == ls->end())
915 throw end_of_buffer();
916
917 unsigned howmuch = p->length() - p_off;
918 if (len < howmuch)
919 howmuch = len;
920 p->copy_in(p_off, howmuch, src, crc_reset);
921
922 src += howmuch;
923 len -= howmuch;
924 *this += howmuch;
925 }
926 }
927
928 void buffer::list::iterator::copy_in(unsigned len, const list& otherl)
929 {
930 if (p == ls->end())
931 seek(off);
932 unsigned left = len;
933 for (const auto& node : otherl._buffers) {
934 unsigned l = node.length();
935 if (left < l)
936 l = left;
937 copy_in(l, node.c_str());
938 left -= l;
939 if (left == 0)
940 break;
941 }
942 }
943
944 // -- buffer::list --
945
946 void buffer::list::swap(list& other) noexcept
947 {
948 std::swap(_len, other._len);
949 std::swap(_num, other._num);
950 std::swap(_carriage, other._carriage);
951 _buffers.swap(other._buffers);
952 }
953
954 bool buffer::list::contents_equal(const ceph::buffer::list& other) const
955 {
956 if (length() != other.length())
957 return false;
958
959 // buffer-wise comparison
960 if (true) {
961 auto a = std::cbegin(_buffers);
962 auto b = std::cbegin(other._buffers);
963 unsigned aoff = 0, boff = 0;
964 while (a != std::cend(_buffers)) {
965 unsigned len = a->length() - aoff;
966 if (len > b->length() - boff)
967 len = b->length() - boff;
968 if (memcmp(a->c_str() + aoff, b->c_str() + boff, len) != 0)
969 return false;
970 aoff += len;
971 if (aoff == a->length()) {
972 aoff = 0;
973 ++a;
974 }
975 boff += len;
976 if (boff == b->length()) {
977 boff = 0;
978 ++b;
979 }
980 }
981 return true;
982 }
983
984 // byte-wise comparison
985 if (false) {
986 bufferlist::const_iterator me = begin();
987 bufferlist::const_iterator him = other.begin();
988 while (!me.end()) {
989 if (*me != *him)
990 return false;
991 ++me;
992 ++him;
993 }
994 return true;
995 }
996 }
997
998 bool buffer::list::contents_equal(const void* const other,
999 size_t length) const
1000 {
1001 if (this->length() != length) {
1002 return false;
1003 }
1004
1005 const auto* other_buf = reinterpret_cast<const char*>(other);
1006 for (const auto& bp : buffers()) {
1007 assert(bp.length() <= length);
1008 if (std::memcmp(bp.c_str(), other_buf, bp.length()) != 0) {
1009 return false;
1010 } else {
1011 length -= bp.length();
1012 other_buf += bp.length();
1013 }
1014 }
1015
1016 return true;
1017 }
1018
1019 bool buffer::list::is_provided_buffer(const char* const dst) const
1020 {
1021 if (_buffers.empty()) {
1022 return false;
1023 }
1024 return (is_contiguous() && (_buffers.front().c_str() == dst));
1025 }
1026
1027 bool buffer::list::is_aligned(const unsigned align) const
1028 {
1029 for (const auto& node : _buffers) {
1030 if (!node.is_aligned(align)) {
1031 return false;
1032 }
1033 }
1034 return true;
1035 }
1036
1037 bool buffer::list::is_n_align_sized(const unsigned align) const
1038 {
1039 for (const auto& node : _buffers) {
1040 if (!node.is_n_align_sized(align)) {
1041 return false;
1042 }
1043 }
1044 return true;
1045 }
1046
1047 bool buffer::list::is_aligned_size_and_memory(
1048 const unsigned align_size,
1049 const unsigned align_memory) const
1050 {
1051 for (const auto& node : _buffers) {
1052 if (!node.is_aligned(align_memory) || !node.is_n_align_sized(align_size)) {
1053 return false;
1054 }
1055 }
1056 return true;
1057 }
1058
1059 bool buffer::list::is_zero() const {
1060 for (const auto& node : _buffers) {
1061 if (!node.is_zero()) {
1062 return false;
1063 }
1064 }
1065 return true;
1066 }
1067
1068 void buffer::list::zero()
1069 {
1070 for (auto& node : _buffers) {
1071 node.zero();
1072 }
1073 }
1074
1075 void buffer::list::zero(const unsigned o, const unsigned l)
1076 {
1077 ceph_assert(o+l <= _len);
1078 unsigned p = 0;
1079 for (auto& node : _buffers) {
1080 if (p + node.length() > o) {
1081 if (p >= o && p+node.length() <= o+l) {
1082 // 'o'------------- l -----------|
1083 // 'p'-- node.length() --|
1084 node.zero();
1085 } else if (p >= o) {
1086 // 'o'------------- l -----------|
1087 // 'p'------- node.length() -------|
1088 node.zero(0, o+l-p);
1089 } else if (p + node.length() <= o+l) {
1090 // 'o'------------- l -----------|
1091 // 'p'------- node.length() -------|
1092 node.zero(o-p, node.length()-(o-p));
1093 } else {
1094 // 'o'----------- l -----------|
1095 // 'p'---------- node.length() ----------|
1096 node.zero(o-p, l);
1097 }
1098 }
1099 p += node.length();
1100 if (o+l <= p) {
1101 break; // done
1102 }
1103 }
1104 }
1105
1106 bool buffer::list::is_contiguous() const
1107 {
1108 return _num <= 1;
1109 }
1110
1111 bool buffer::list::is_n_page_sized() const
1112 {
1113 return is_n_align_sized(CEPH_PAGE_SIZE);
1114 }
1115
1116 bool buffer::list::is_page_aligned() const
1117 {
1118 return is_aligned(CEPH_PAGE_SIZE);
1119 }
1120
1121 int buffer::list::get_mempool() const
1122 {
1123 if (_buffers.empty()) {
1124 return mempool::mempool_buffer_anon;
1125 }
1126 return _buffers.back().get_mempool();
1127 }
1128
1129 void buffer::list::reassign_to_mempool(int pool)
1130 {
1131 for (auto& p : _buffers) {
1132 p._raw->reassign_to_mempool(pool);
1133 }
1134 }
1135
1136 void buffer::list::try_assign_to_mempool(int pool)
1137 {
1138 for (auto& p : _buffers) {
1139 p._raw->try_assign_to_mempool(pool);
1140 }
1141 }
1142
1143 uint64_t buffer::list::get_wasted_space() const
1144 {
1145 if (_num == 1)
1146 return _buffers.back().wasted();
1147
1148 std::vector<const raw*> raw_vec;
1149 raw_vec.reserve(_num);
1150 for (const auto& p : _buffers)
1151 raw_vec.push_back(p._raw);
1152 std::sort(raw_vec.begin(), raw_vec.end());
1153
1154 uint64_t total = 0;
1155 const raw *last = nullptr;
1156 for (const auto r : raw_vec) {
1157 if (r == last)
1158 continue;
1159 last = r;
1160 total += r->get_len();
1161 }
1162 // If multiple buffers are sharing the same raw buffer and they overlap
1163 // with each other, the wasted space will be underestimated.
1164 if (total <= length())
1165 return 0;
1166 return total - length();
1167 }
1168
1169 void buffer::list::rebuild()
1170 {
1171 if (_len == 0) {
1172 _carriage = &always_empty_bptr;
1173 _buffers.clear_and_dispose();
1174 _num = 0;
1175 return;
1176 }
1177 if ((_len & ~CEPH_PAGE_MASK) == 0)
1178 rebuild(ptr_node::create(buffer::create_page_aligned(_len)));
1179 else
1180 rebuild(ptr_node::create(buffer::create(_len)));
1181 }
1182
1183 void buffer::list::rebuild(
1184 std::unique_ptr<buffer::ptr_node, buffer::ptr_node::disposer> nb)
1185 {
1186 unsigned pos = 0;
1187 int mempool = _buffers.front().get_mempool();
1188 nb->reassign_to_mempool(mempool);
1189 for (auto& node : _buffers) {
1190 nb->copy_in(pos, node.length(), node.c_str(), false);
1191 pos += node.length();
1192 }
1193 _buffers.clear_and_dispose();
1194 if (likely(nb->length())) {
1195 _carriage = nb.get();
1196 _buffers.push_back(*nb.release());
1197 _num = 1;
1198 } else {
1199 _carriage = &always_empty_bptr;
1200 _num = 0;
1201 }
1202 invalidate_crc();
1203 }
1204
1205 bool buffer::list::rebuild_aligned(unsigned align)
1206 {
1207 return rebuild_aligned_size_and_memory(align, align);
1208 }
1209
1210 bool buffer::list::rebuild_aligned_size_and_memory(unsigned align_size,
1211 unsigned align_memory,
1212 unsigned max_buffers)
1213 {
1214 bool had_to_rebuild = false;
1215
1216 if (max_buffers && _num > max_buffers && _len > (max_buffers * align_size)) {
1217 align_size = round_up_to(round_up_to(_len, max_buffers) / max_buffers, align_size);
1218 }
1219 auto p = std::begin(_buffers);
1220 auto p_prev = _buffers.before_begin();
1221 while (p != std::end(_buffers)) {
1222 // keep anything that's already align and sized aligned
1223 if (p->is_aligned(align_memory) && p->is_n_align_sized(align_size)) {
1224 /*cout << " segment " << (void*)p->c_str()
1225 << " offset " << ((unsigned long)p->c_str() & (align - 1))
1226 << " length " << p->length()
1227 << " " << (p->length() & (align - 1)) << " ok" << std::endl;
1228 */
1229 p_prev = p++;
1230 continue;
1231 }
1232
1233 // consolidate unaligned items, until we get something that is sized+aligned
1234 list unaligned;
1235 unsigned offset = 0;
1236 do {
1237 /*cout << " segment " << (void*)p->c_str()
1238 << " offset " << ((unsigned long)p->c_str() & (align - 1))
1239 << " length " << p->length() << " " << (p->length() & (align - 1))
1240 << " overall offset " << offset << " " << (offset & (align - 1))
1241 << " not ok" << std::endl;
1242 */
1243 offset += p->length();
1244 // no need to reallocate, relinking is enough thankfully to bi::list.
1245 auto p_after = _buffers.erase_after(p_prev);
1246 _num -= 1;
1247 unaligned._buffers.push_back(*p);
1248 unaligned._len += p->length();
1249 unaligned._num += 1;
1250 p = p_after;
1251 } while (p != std::end(_buffers) &&
1252 (!p->is_aligned(align_memory) ||
1253 !p->is_n_align_sized(align_size) ||
1254 (offset % align_size)));
1255 if (!(unaligned.is_contiguous() && unaligned._buffers.front().is_aligned(align_memory))) {
1256 unaligned.rebuild(
1257 ptr_node::create(
1258 buffer::create_aligned(unaligned._len, align_memory)));
1259 had_to_rebuild = true;
1260 }
1261 if (unaligned.get_num_buffers()) {
1262 _buffers.insert_after(p_prev, *ptr_node::create(unaligned._buffers.front()).release());
1263 _num += 1;
1264 } else {
1265 // a bufferlist containing only 0-length bptrs is rebuilt as empty
1266 }
1267 ++p_prev;
1268 }
1269 return had_to_rebuild;
1270 }
1271
1272 bool buffer::list::rebuild_page_aligned()
1273 {
1274 return rebuild_aligned(CEPH_PAGE_SIZE);
1275 }
1276
1277 void buffer::list::reserve(size_t prealloc)
1278 {
1279 if (get_append_buffer_unused_tail_length() < prealloc) {
1280 auto ptr = ptr_node::create(buffer::create_small_page_aligned(prealloc));
1281 ptr->set_length(0); // unused, so far.
1282 _carriage = ptr.get();
1283 _buffers.push_back(*ptr.release());
1284 _num += 1;
1285 }
1286 }
1287
1288 void buffer::list::claim_append(list& bl)
1289 {
1290 // steal the other guy's buffers
1291 _len += bl._len;
1292 _num += bl._num;
1293 _buffers.splice_back(bl._buffers);
1294 bl.clear();
1295 }
1296
1297 void buffer::list::append(char c)
1298 {
1299 // put what we can into the existing append_buffer.
1300 unsigned gap = get_append_buffer_unused_tail_length();
1301 if (!gap) {
1302 // make a new buffer!
1303 auto buf = ptr_node::create(
1304 raw_combined::create(CEPH_BUFFER_APPEND_SIZE, 0, get_mempool()));
1305 buf->set_length(0); // unused, so far.
1306 _carriage = buf.get();
1307 _buffers.push_back(*buf.release());
1308 _num += 1;
1309 } else if (unlikely(_carriage != &_buffers.back())) {
1310 auto bptr = ptr_node::create(*_carriage, _carriage->length(), 0);
1311 _carriage = bptr.get();
1312 _buffers.push_back(*bptr.release());
1313 _num += 1;
1314 }
1315 _carriage->append(c);
1316 _len++;
1317 }
1318
1319 buffer::ptr_node buffer::list::always_empty_bptr;
1320
1321 buffer::ptr_node& buffer::list::refill_append_space(const unsigned len)
1322 {
1323 // make a new buffer. fill out a complete page, factoring in the
1324 // raw_combined overhead.
1325 size_t need = round_up_to(len, sizeof(size_t)) + sizeof(raw_combined);
1326 size_t alen = round_up_to(need, CEPH_BUFFER_ALLOC_UNIT);
1327 if (_carriage == &_buffers.back()) {
1328 size_t nlen = round_up_to(_carriage->raw_length(), CEPH_BUFFER_ALLOC_UNIT) * 2;
1329 nlen = std::min(nlen, CEPH_BUFFER_ALLOC_UNIT_MAX);
1330 alen = std::max(alen, nlen);
1331 }
1332 alen -= sizeof(raw_combined);
1333
1334 auto new_back = \
1335 ptr_node::create(raw_combined::create(alen, 0, get_mempool()));
1336 new_back->set_length(0); // unused, so far.
1337 _carriage = new_back.get();
1338 _buffers.push_back(*new_back.release());
1339 _num += 1;
1340 return _buffers.back();
1341 }
1342
1343 void buffer::list::append(const char *data, unsigned len)
1344 {
1345 _len += len;
1346
1347 const unsigned free_in_last = get_append_buffer_unused_tail_length();
1348 const unsigned first_round = std::min(len, free_in_last);
1349 if (first_round) {
1350 // _buffers and carriage can desynchronize when 1) a new ptr
1351 // we don't own has been added into the _buffers 2) _buffers
1352 // has been emptied as as a result of std::move or stolen by
1353 // claim_append.
1354 if (unlikely(_carriage != &_buffers.back())) {
1355 auto bptr = ptr_node::create(*_carriage, _carriage->length(), 0);
1356 _carriage = bptr.get();
1357 _buffers.push_back(*bptr.release());
1358 _num += 1;
1359 }
1360 _carriage->append(data, first_round);
1361 }
1362
1363 const unsigned second_round = len - first_round;
1364 if (second_round) {
1365 auto& new_back = refill_append_space(second_round);
1366 new_back.append(data + first_round, second_round);
1367 }
1368 }
1369
1370 buffer::list::reserve_t buffer::list::obtain_contiguous_space(
1371 const unsigned len)
1372 {
1373 // note: if len < the normal append_buffer size it *might*
1374 // be better to allocate a normal-sized append_buffer and
1375 // use part of it. however, that optimizes for the case of
1376 // old-style types including new-style types. and in most
1377 // such cases, this won't be the very first thing encoded to
1378 // the list, so append_buffer will already be allocated.
1379 // OTOH if everything is new-style, we *should* allocate
1380 // only what we need and conserve memory.
1381 if (unlikely(get_append_buffer_unused_tail_length() < len)) {
1382 auto new_back = \
1383 buffer::ptr_node::create(buffer::create(len)).release();
1384 new_back->set_length(0); // unused, so far.
1385 _buffers.push_back(*new_back);
1386 _num += 1;
1387 _carriage = new_back;
1388 return { new_back->c_str(), &new_back->_len, &_len };
1389 } else {
1390 ceph_assert(!_buffers.empty());
1391 if (unlikely(_carriage != &_buffers.back())) {
1392 auto bptr = ptr_node::create(*_carriage, _carriage->length(), 0);
1393 _carriage = bptr.get();
1394 _buffers.push_back(*bptr.release());
1395 _num += 1;
1396 }
1397 return { _carriage->end_c_str(), &_carriage->_len, &_len };
1398 }
1399 }
1400
1401 void buffer::list::append(const ptr& bp)
1402 {
1403 push_back(bp);
1404 }
1405
1406 void buffer::list::append(ptr&& bp)
1407 {
1408 push_back(std::move(bp));
1409 }
1410
1411 void buffer::list::append(const ptr& bp, unsigned off, unsigned len)
1412 {
1413 ceph_assert(len+off <= bp.length());
1414 if (!_buffers.empty()) {
1415 ptr &l = _buffers.back();
1416 if (l._raw == bp._raw && l.end() == bp.start() + off) {
1417 // yay contiguous with tail bp!
1418 l.set_length(l.length()+len);
1419 _len += len;
1420 return;
1421 }
1422 }
1423 // add new item to list
1424 _buffers.push_back(*ptr_node::create(bp, off, len).release());
1425 _len += len;
1426 _num += 1;
1427 }
1428
1429 void buffer::list::append(const list& bl)
1430 {
1431 _len += bl._len;
1432 _num += bl._num;
1433 for (const auto& node : bl._buffers) {
1434 _buffers.push_back(*ptr_node::create(node).release());
1435 }
1436 }
1437
1438 void buffer::list::append(std::istream& in)
1439 {
1440 while (!in.eof()) {
1441 std::string s;
1442 getline(in, s);
1443 append(s.c_str(), s.length());
1444 if (s.length())
1445 append("\n", 1);
1446 }
1447 }
1448
1449 buffer::list::contiguous_filler buffer::list::append_hole(const unsigned len)
1450 {
1451 _len += len;
1452
1453 if (unlikely(get_append_buffer_unused_tail_length() < len)) {
1454 // make a new append_buffer. fill out a complete page, factoring in
1455 // the raw_combined overhead.
1456 auto& new_back = refill_append_space(len);
1457 new_back.set_length(len);
1458 return { new_back.c_str() };
1459 } else if (unlikely(_carriage != &_buffers.back())) {
1460 auto bptr = ptr_node::create(*_carriage, _carriage->length(), 0);
1461 _carriage = bptr.get();
1462 _buffers.push_back(*bptr.release());
1463 _num += 1;
1464 }
1465 _carriage->set_length(_carriage->length() + len);
1466 return { _carriage->end_c_str() - len };
1467 }
1468
1469 void buffer::list::prepend_zero(unsigned len)
1470 {
1471 auto bp = ptr_node::create(len);
1472 bp->zero(false);
1473 _len += len;
1474 _num += 1;
1475 _buffers.push_front(*bp.release());
1476 }
1477
1478 void buffer::list::append_zero(unsigned len)
1479 {
1480 _len += len;
1481
1482 const unsigned free_in_last = get_append_buffer_unused_tail_length();
1483 const unsigned first_round = std::min(len, free_in_last);
1484 if (first_round) {
1485 if (unlikely(_carriage != &_buffers.back())) {
1486 auto bptr = ptr_node::create(*_carriage, _carriage->length(), 0);
1487 _carriage = bptr.get();
1488 _buffers.push_back(*bptr.release());
1489 _num += 1;
1490 }
1491 _carriage->append_zeros(first_round);
1492 }
1493
1494 const unsigned second_round = len - first_round;
1495 if (second_round) {
1496 auto& new_back = refill_append_space(second_round);
1497 new_back.set_length(second_round);
1498 new_back.zero(false);
1499 }
1500 }
1501
1502
1503 /*
1504 * get a char
1505 */
1506 const char& buffer::list::operator[](unsigned n) const
1507 {
1508 if (n >= _len)
1509 throw end_of_buffer();
1510
1511 for (const auto& node : _buffers) {
1512 if (n >= node.length()) {
1513 n -= node.length();
1514 continue;
1515 }
1516 return node[n];
1517 }
1518 ceph_abort();
1519 }
1520
1521 /*
1522 * return a contiguous ptr to whole bufferlist contents.
1523 */
1524 char *buffer::list::c_str()
1525 {
1526 if (const auto len = length(); len == 0) {
1527 return nullptr; // no non-empty buffers
1528 } else if (len != _buffers.front().length()) {
1529 rebuild();
1530 } else {
1531 // there are two *main* scenarios that hit this branch:
1532 // 1. bufferlist with single, non-empty buffer;
1533 // 2. bufferlist with single, non-empty buffer followed by
1534 // empty buffer. splice() tries to not waste our appendable
1535 // space; to carry it an empty bptr is added at the end.
1536 // we account for these and don't rebuild unnecessarily
1537 }
1538 return _buffers.front().c_str();
1539 }
1540
1541 string buffer::list::to_str() const {
1542 string s;
1543 s.reserve(length());
1544 for (const auto& node : _buffers) {
1545 if (node.length()) {
1546 s.append(node.c_str(), node.length());
1547 }
1548 }
1549 return s;
1550 }
1551
1552 void buffer::list::substr_of(const list& other, unsigned off, unsigned len)
1553 {
1554 if (off + len > other.length())
1555 throw end_of_buffer();
1556
1557 clear();
1558
1559 // skip off
1560 auto curbuf = std::cbegin(other._buffers);
1561 while (off > 0 && off >= curbuf->length()) {
1562 // skip this buffer
1563 //cout << "skipping over " << *curbuf << std::endl;
1564 off -= (*curbuf).length();
1565 ++curbuf;
1566 }
1567 ceph_assert(len == 0 || curbuf != std::cend(other._buffers));
1568
1569 while (len > 0) {
1570 // partial?
1571 if (off + len < curbuf->length()) {
1572 //cout << "copying partial of " << *curbuf << std::endl;
1573 _buffers.push_back(*ptr_node::create(*curbuf, off, len).release());
1574 _len += len;
1575 _num += 1;
1576 break;
1577 }
1578
1579 // through end
1580 //cout << "copying end (all?) of " << *curbuf << std::endl;
1581 unsigned howmuch = curbuf->length() - off;
1582 _buffers.push_back(*ptr_node::create(*curbuf, off, howmuch).release());
1583 _len += howmuch;
1584 _num += 1;
1585 len -= howmuch;
1586 off = 0;
1587 ++curbuf;
1588 }
1589 }
1590
1591 // funky modifer
1592 void buffer::list::splice(unsigned off, unsigned len, list *claim_by /*, bufferlist& replace_with */)
1593 { // fixme?
1594 if (len == 0)
1595 return;
1596
1597 if (off >= length())
1598 throw end_of_buffer();
1599
1600 ceph_assert(len > 0);
1601 //cout << "splice off " << off << " len " << len << " ... mylen = " << length() << std::endl;
1602
1603 // skip off
1604 auto curbuf = std::begin(_buffers);
1605 auto curbuf_prev = _buffers.before_begin();
1606 while (off > 0) {
1607 ceph_assert(curbuf != std::end(_buffers));
1608 if (off >= (*curbuf).length()) {
1609 // skip this buffer
1610 //cout << "off = " << off << " skipping over " << *curbuf << std::endl;
1611 off -= (*curbuf).length();
1612 curbuf_prev = curbuf++;
1613 } else {
1614 // somewhere in this buffer!
1615 //cout << "off = " << off << " somewhere in " << *curbuf << std::endl;
1616 break;
1617 }
1618 }
1619
1620 if (off) {
1621 // add a reference to the front bit, insert it before curbuf (which
1622 // we'll lose).
1623 //cout << "keeping front " << off << " of " << *curbuf << std::endl;
1624 _buffers.insert_after(curbuf_prev,
1625 *ptr_node::create(*curbuf, 0, off).release());
1626 _len += off;
1627 _num += 1;
1628 ++curbuf_prev;
1629 }
1630
1631 while (len > 0) {
1632 // partial or the last (appendable) one?
1633 if (const auto to_drop = off + len; to_drop < curbuf->length()) {
1634 //cout << "keeping end of " << *curbuf << ", losing first " << off+len << std::endl;
1635 if (claim_by)
1636 claim_by->append(*curbuf, off, len);
1637 curbuf->set_offset(to_drop + curbuf->offset()); // ignore beginning big
1638 curbuf->set_length(curbuf->length() - to_drop);
1639 _len -= to_drop;
1640 //cout << " now " << *curbuf << std::endl;
1641 break;
1642 }
1643
1644 // hose though the end
1645 unsigned howmuch = curbuf->length() - off;
1646 //cout << "discarding " << howmuch << " of " << *curbuf << std::endl;
1647 if (claim_by)
1648 claim_by->append(*curbuf, off, howmuch);
1649 _len -= curbuf->length();
1650 if (curbuf == _carriage) {
1651 // no need to reallocate, shrinking and relinking is enough.
1652 curbuf = _buffers.erase_after(curbuf_prev);
1653 _carriage->set_offset(_carriage->offset() + _carriage->length());
1654 _carriage->set_length(0);
1655 _buffers.push_back(*_carriage);
1656 } else {
1657 curbuf = _buffers.erase_after_and_dispose(curbuf_prev);
1658 _num -= 1;
1659 }
1660 len -= howmuch;
1661 off = 0;
1662 }
1663
1664 // splice in *replace (implement me later?)
1665 }
1666
1667 void buffer::list::write(int off, int len, std::ostream& out) const
1668 {
1669 list s;
1670 s.substr_of(*this, off, len);
1671 for (const auto& node : s._buffers) {
1672 if (node.length()) {
1673 out.write(node.c_str(), node.length());
1674 }
1675 }
1676 }
1677
1678 void buffer::list::encode_base64(buffer::list& o)
1679 {
1680 bufferptr bp(length() * 4 / 3 + 3);
1681 int l = ceph_armor(bp.c_str(), bp.c_str() + bp.length(), c_str(), c_str() + length());
1682 bp.set_length(l);
1683 o.push_back(std::move(bp));
1684 }
1685
1686 void buffer::list::decode_base64(buffer::list& e)
1687 {
1688 bufferptr bp(4 + ((e.length() * 3) / 4));
1689 int l = ceph_unarmor(bp.c_str(), bp.c_str() + bp.length(), e.c_str(), e.c_str() + e.length());
1690 if (l < 0) {
1691 std::ostringstream oss;
1692 oss << "decode_base64: decoding failed:\n";
1693 hexdump(oss);
1694 throw buffer::malformed_input(oss.str().c_str());
1695 }
1696 ceph_assert(l <= (int)bp.length());
1697 bp.set_length(l);
1698 push_back(std::move(bp));
1699 }
1700
1701 ssize_t buffer::list::pread_file(const char *fn, uint64_t off, uint64_t len, std::string *error)
1702 {
1703 int fd = TEMP_FAILURE_RETRY(::open(fn, O_RDONLY|O_CLOEXEC|O_BINARY));
1704 if (fd < 0) {
1705 int err = errno;
1706 std::ostringstream oss;
1707 oss << "can't open " << fn << ": " << cpp_strerror(err);
1708 *error = oss.str();
1709 return -err;
1710 }
1711
1712 struct stat st;
1713 // FIPS zeroization audit 20191115: this memset is not security related.
1714 memset(&st, 0, sizeof(st));
1715 if (::fstat(fd, &st) < 0) {
1716 int err = errno;
1717 std::ostringstream oss;
1718 oss << "bufferlist::read_file(" << fn << "): stat error: "
1719 << cpp_strerror(err);
1720 *error = oss.str();
1721 VOID_TEMP_FAILURE_RETRY(::close(fd));
1722 return -err;
1723 }
1724
1725 if (off > (uint64_t)st.st_size) {
1726 std::ostringstream oss;
1727 oss << "bufferlist::read_file(" << fn << "): read error: size < offset";
1728 *error = oss.str();
1729 VOID_TEMP_FAILURE_RETRY(::close(fd));
1730 return 0;
1731 }
1732
1733 if (len > st.st_size - off) {
1734 len = st.st_size - off;
1735 }
1736 ssize_t ret = lseek64(fd, off, SEEK_SET);
1737 if (ret != (ssize_t)off) {
1738 return -errno;
1739 }
1740
1741 ret = read_fd(fd, len);
1742 if (ret < 0) {
1743 std::ostringstream oss;
1744 oss << "bufferlist::read_file(" << fn << "): read error:"
1745 << cpp_strerror(ret);
1746 *error = oss.str();
1747 VOID_TEMP_FAILURE_RETRY(::close(fd));
1748 return ret;
1749 } else if (ret != (ssize_t)len) {
1750 // Premature EOF.
1751 // Perhaps the file changed between stat() and read()?
1752 std::ostringstream oss;
1753 oss << "bufferlist::read_file(" << fn << "): warning: got premature EOF.";
1754 *error = oss.str();
1755 // not actually an error, but weird
1756 }
1757 VOID_TEMP_FAILURE_RETRY(::close(fd));
1758 return 0;
1759 }
1760
1761 int buffer::list::read_file(const char *fn, std::string *error)
1762 {
1763 int fd = TEMP_FAILURE_RETRY(::open(fn, O_RDONLY|O_CLOEXEC|O_BINARY));
1764 if (fd < 0) {
1765 int err = errno;
1766 std::ostringstream oss;
1767 oss << "can't open " << fn << ": " << cpp_strerror(err);
1768 *error = oss.str();
1769 return -err;
1770 }
1771
1772 struct stat st;
1773 // FIPS zeroization audit 20191115: this memset is not security related.
1774 memset(&st, 0, sizeof(st));
1775 if (::fstat(fd, &st) < 0) {
1776 int err = errno;
1777 std::ostringstream oss;
1778 oss << "bufferlist::read_file(" << fn << "): stat error: "
1779 << cpp_strerror(err);
1780 *error = oss.str();
1781 VOID_TEMP_FAILURE_RETRY(::close(fd));
1782 return -err;
1783 }
1784
1785 ssize_t ret = read_fd(fd, st.st_size);
1786 if (ret < 0) {
1787 std::ostringstream oss;
1788 oss << "bufferlist::read_file(" << fn << "): read error:"
1789 << cpp_strerror(ret);
1790 *error = oss.str();
1791 VOID_TEMP_FAILURE_RETRY(::close(fd));
1792 return ret;
1793 }
1794 else if (ret != st.st_size) {
1795 // Premature EOF.
1796 // Perhaps the file changed between stat() and read()?
1797 std::ostringstream oss;
1798 oss << "bufferlist::read_file(" << fn << "): warning: got premature EOF.";
1799 *error = oss.str();
1800 // not actually an error, but weird
1801 }
1802 VOID_TEMP_FAILURE_RETRY(::close(fd));
1803 return 0;
1804 }
1805
1806 ssize_t buffer::list::read_fd(int fd, size_t len)
1807 {
1808 auto bp = ptr_node::create(buffer::create(len));
1809 ssize_t ret = safe_read(fd, (void*)bp->c_str(), len);
1810 if (ret >= 0) {
1811 bp->set_length(ret);
1812 push_back(std::move(bp));
1813 }
1814 return ret;
1815 }
1816
1817 ssize_t buffer::list::recv_fd(int fd, size_t len)
1818 {
1819 auto bp = ptr_node::create(buffer::create(len));
1820 ssize_t ret = safe_recv(fd, (void*)bp->c_str(), len);
1821 if (ret >= 0) {
1822 bp->set_length(ret);
1823 push_back(std::move(bp));
1824 }
1825 return ret;
1826 }
1827
1828 int buffer::list::write_file(const char *fn, int mode)
1829 {
1830 int fd = TEMP_FAILURE_RETRY(::open(fn, O_WRONLY|O_CREAT|O_TRUNC|O_CLOEXEC|O_BINARY, mode));
1831 if (fd < 0) {
1832 int err = errno;
1833 cerr << "bufferlist::write_file(" << fn << "): failed to open file: "
1834 << cpp_strerror(err) << std::endl;
1835 return -err;
1836 }
1837 int ret = write_fd(fd);
1838 if (ret) {
1839 cerr << "bufferlist::write_fd(" << fn << "): write_fd error: "
1840 << cpp_strerror(ret) << std::endl;
1841 VOID_TEMP_FAILURE_RETRY(::close(fd));
1842 return ret;
1843 }
1844 if (TEMP_FAILURE_RETRY(::close(fd))) {
1845 int err = errno;
1846 cerr << "bufferlist::write_file(" << fn << "): close error: "
1847 << cpp_strerror(err) << std::endl;
1848 return -err;
1849 }
1850 return 0;
1851 }
1852
1853 static int do_writev(int fd, struct iovec *vec, uint64_t offset, unsigned veclen, unsigned bytes)
1854 {
1855 while (bytes > 0) {
1856 ssize_t r = 0;
1857 #ifdef HAVE_PWRITEV
1858 r = ::pwritev(fd, vec, veclen, offset);
1859 #else
1860 r = ::lseek64(fd, offset, SEEK_SET);
1861 if (r != offset) {
1862 return -errno;
1863 }
1864 r = ::writev(fd, vec, veclen);
1865 #endif
1866 if (r < 0) {
1867 if (errno == EINTR)
1868 continue;
1869 return -errno;
1870 }
1871
1872 bytes -= r;
1873 offset += r;
1874 if (bytes == 0) break;
1875
1876 while (r > 0) {
1877 if (vec[0].iov_len <= (size_t)r) {
1878 // drain this whole item
1879 r -= vec[0].iov_len;
1880 ++vec;
1881 --veclen;
1882 } else {
1883 vec[0].iov_base = (char *)vec[0].iov_base + r;
1884 vec[0].iov_len -= r;
1885 break;
1886 }
1887 }
1888 }
1889 return 0;
1890 }
1891
1892 #ifndef _WIN32
1893 int buffer::list::write_fd(int fd) const
1894 {
1895 // use writev!
1896 iovec iov[IOV_MAX];
1897 int iovlen = 0;
1898 ssize_t bytes = 0;
1899
1900 auto p = std::cbegin(_buffers);
1901 while (p != std::cend(_buffers)) {
1902 if (p->length() > 0) {
1903 iov[iovlen].iov_base = (void *)p->c_str();
1904 iov[iovlen].iov_len = p->length();
1905 bytes += p->length();
1906 iovlen++;
1907 }
1908 ++p;
1909
1910 if (iovlen == IOV_MAX ||
1911 p == _buffers.end()) {
1912 iovec *start = iov;
1913 int num = iovlen;
1914 ssize_t wrote;
1915 retry:
1916 wrote = ::writev(fd, start, num);
1917 if (wrote < 0) {
1918 int err = errno;
1919 if (err == EINTR)
1920 goto retry;
1921 return -err;
1922 }
1923 if (wrote < bytes) {
1924 // partial write, recover!
1925 while ((size_t)wrote >= start[0].iov_len) {
1926 wrote -= start[0].iov_len;
1927 bytes -= start[0].iov_len;
1928 start++;
1929 num--;
1930 }
1931 if (wrote > 0) {
1932 start[0].iov_len -= wrote;
1933 start[0].iov_base = (char *)start[0].iov_base + wrote;
1934 bytes -= wrote;
1935 }
1936 goto retry;
1937 }
1938 iovlen = 0;
1939 bytes = 0;
1940 }
1941 }
1942 return 0;
1943 }
1944
1945 int buffer::list::send_fd(int fd) const {
1946 return buffer::list::write_fd(fd);
1947 }
1948
1949 int buffer::list::write_fd(int fd, uint64_t offset) const
1950 {
1951 iovec iov[IOV_MAX];
1952
1953 auto p = std::cbegin(_buffers);
1954 uint64_t left_pbrs = get_num_buffers();
1955 while (left_pbrs) {
1956 ssize_t bytes = 0;
1957 unsigned iovlen = 0;
1958 uint64_t size = std::min<uint64_t>(left_pbrs, IOV_MAX);
1959 left_pbrs -= size;
1960 while (size > 0) {
1961 iov[iovlen].iov_base = (void *)p->c_str();
1962 iov[iovlen].iov_len = p->length();
1963 iovlen++;
1964 bytes += p->length();
1965 ++p;
1966 size--;
1967 }
1968
1969 int r = do_writev(fd, iov, offset, iovlen, bytes);
1970 if (r < 0)
1971 return r;
1972 offset += bytes;
1973 }
1974 return 0;
1975 }
1976 #else
1977 int buffer::list::write_fd(int fd) const
1978 {
1979 // There's no writev on Windows. WriteFileGather may be an option,
1980 // but it has strict requirements in terms of buffer size and alignment.
1981 auto p = std::cbegin(_buffers);
1982 uint64_t left_pbrs = get_num_buffers();
1983 while (left_pbrs) {
1984 int written = 0;
1985 while (written < p->length()) {
1986 int r = ::write(fd, p->c_str(), p->length() - written);
1987 if (r < 0)
1988 return -errno;
1989
1990 written += r;
1991 }
1992
1993 left_pbrs--;
1994 p++;
1995 }
1996
1997 return 0;
1998 }
1999
2000 int buffer::list::send_fd(int fd) const
2001 {
2002 // There's no writev on Windows. WriteFileGather may be an option,
2003 // but it has strict requirements in terms of buffer size and alignment.
2004 auto p = std::cbegin(_buffers);
2005 uint64_t left_pbrs = get_num_buffers();
2006 while (left_pbrs) {
2007 int written = 0;
2008 while (written < p->length()) {
2009 int r = ::send(fd, p->c_str(), p->length() - written, 0);
2010 if (r < 0)
2011 return -ceph_sock_errno();
2012
2013 written += r;
2014 }
2015
2016 left_pbrs--;
2017 p++;
2018 }
2019
2020 return 0;
2021 }
2022
2023 int buffer::list::write_fd(int fd, uint64_t offset) const
2024 {
2025 int r = ::lseek64(fd, offset, SEEK_SET);
2026 if (r != offset)
2027 return -errno;
2028
2029 return write_fd(fd);
2030 }
2031 #endif
2032
2033 buffer::list::iov_vec_t buffer::list::prepare_iovs() const
2034 {
2035 size_t index = 0;
2036 uint64_t off = 0;
2037 iov_vec_t iovs{_num / IOV_MAX + 1};
2038 auto it = iovs.begin();
2039 for (auto& bp : _buffers) {
2040 if (index == 0) {
2041 it->offset = off;
2042 it->length = 0;
2043 size_t nr_iov_created = std::distance(iovs.begin(), it);
2044 it->iov.resize(
2045 std::min(_num - IOV_MAX * nr_iov_created, (size_t)IOV_MAX));
2046 }
2047 it->iov[index].iov_base = (void*)bp.c_str();
2048 it->iov[index].iov_len = bp.length();
2049 off += bp.length();
2050 it->length += bp.length();
2051 if (++index == IOV_MAX) {
2052 // continue with a new vector<iov> if we have more buf
2053 ++it;
2054 index = 0;
2055 }
2056 }
2057 return iovs;
2058 }
2059
2060 __u32 buffer::list::crc32c(__u32 crc) const
2061 {
2062 int cache_misses = 0;
2063 int cache_hits = 0;
2064 int cache_adjusts = 0;
2065
2066 for (const auto& node : _buffers) {
2067 if (node.length()) {
2068 raw* const r = node._raw;
2069 pair<size_t, size_t> ofs(node.offset(), node.offset() + node.length());
2070 pair<uint32_t, uint32_t> ccrc;
2071 if (r->get_crc(ofs, &ccrc)) {
2072 if (ccrc.first == crc) {
2073 // got it already
2074 crc = ccrc.second;
2075 cache_hits++;
2076 } else {
2077 /* If we have cached crc32c(buf, v) for initial value v,
2078 * we can convert this to a different initial value v' by:
2079 * crc32c(buf, v') = crc32c(buf, v) ^ adjustment
2080 * where adjustment = crc32c(0*len(buf), v ^ v')
2081 *
2082 * http://crcutil.googlecode.com/files/crc-doc.1.0.pdf
2083 * note, u for our crc32c implementation is 0
2084 */
2085 crc = ccrc.second ^ ceph_crc32c(ccrc.first ^ crc, NULL, node.length());
2086 cache_adjusts++;
2087 }
2088 } else {
2089 cache_misses++;
2090 uint32_t base = crc;
2091 crc = ceph_crc32c(crc, (unsigned char*)node.c_str(), node.length());
2092 r->set_crc(ofs, make_pair(base, crc));
2093 }
2094 }
2095 }
2096
2097 if (buffer_track_crc) {
2098 if (cache_adjusts)
2099 buffer_cached_crc_adjusted += cache_adjusts;
2100 if (cache_hits)
2101 buffer_cached_crc += cache_hits;
2102 if (cache_misses)
2103 buffer_missed_crc += cache_misses;
2104 }
2105
2106 return crc;
2107 }
2108
2109 void buffer::list::invalidate_crc()
2110 {
2111 for (const auto& node : _buffers) {
2112 if (node._raw) {
2113 node._raw->invalidate_crc();
2114 }
2115 }
2116 }
2117
2118 /**
2119 * Binary write all contents to a C++ stream
2120 */
2121 void buffer::list::write_stream(std::ostream &out) const
2122 {
2123 for (const auto& node : _buffers) {
2124 if (node.length() > 0) {
2125 out.write(node.c_str(), node.length());
2126 }
2127 }
2128 }
2129
2130
2131 void buffer::list::hexdump(std::ostream &out, bool trailing_newline) const
2132 {
2133 if (!length())
2134 return;
2135
2136 std::ios_base::fmtflags original_flags = out.flags();
2137
2138 // do our best to match the output of hexdump -C, for better
2139 // diff'ing!
2140
2141 out.setf(std::ios::right);
2142 out.fill('0');
2143
2144 unsigned per = 16;
2145 char last_row_char = '\0';
2146 bool was_same = false, did_star = false;
2147 for (unsigned o=0; o<length(); o += per) {
2148 if (o == 0) {
2149 last_row_char = (*this)[o];
2150 }
2151
2152 if (o + per < length()) {
2153 bool row_is_same = true;
2154 for (unsigned i=0; i<per && o+i<length(); i++) {
2155 char current_char = (*this)[o+i];
2156 if (current_char != last_row_char) {
2157 if (i == 0) {
2158 last_row_char = current_char;
2159 was_same = false;
2160 did_star = false;
2161 } else {
2162 row_is_same = false;
2163 }
2164 }
2165 }
2166 if (row_is_same) {
2167 if (was_same) {
2168 if (!did_star) {
2169 out << "\n*";
2170 did_star = true;
2171 }
2172 continue;
2173 }
2174 was_same = true;
2175 } else {
2176 was_same = false;
2177 did_star = false;
2178 }
2179 }
2180 if (o)
2181 out << "\n";
2182 out << std::hex << std::setw(8) << o << " ";
2183
2184 unsigned i;
2185 for (i=0; i<per && o+i<length(); i++) {
2186 if (i == 8)
2187 out << ' ';
2188 out << " " << std::setw(2) << ((unsigned)(*this)[o+i] & 0xff);
2189 }
2190 for (; i<per; i++) {
2191 if (i == 8)
2192 out << ' ';
2193 out << " ";
2194 }
2195
2196 out << " |";
2197 for (i=0; i<per && o+i<length(); i++) {
2198 char c = (*this)[o+i];
2199 if (isupper(c) || islower(c) || isdigit(c) || c == ' ' || ispunct(c))
2200 out << c;
2201 else
2202 out << '.';
2203 }
2204 out << '|' << std::dec;
2205 }
2206 if (trailing_newline) {
2207 out << "\n" << std::hex << std::setw(8) << length();
2208 out << "\n";
2209 }
2210
2211 out.flags(original_flags);
2212 }
2213
2214
2215 buffer::list buffer::list::static_from_mem(char* c, size_t l) {
2216 list bl;
2217 bl.push_back(ptr_node::create(create_static(l, c)));
2218 return bl;
2219 }
2220
2221 buffer::list buffer::list::static_from_cstring(char* c) {
2222 return static_from_mem(c, std::strlen(c));
2223 }
2224
2225 buffer::list buffer::list::static_from_string(string& s) {
2226 // C++14 just has string::data return a char* from a non-const
2227 // string.
2228 return static_from_mem(const_cast<char*>(s.data()), s.length());
2229 // But the way buffer::list mostly doesn't work in a sane way with
2230 // const makes me generally sad.
2231 }
2232
2233 // buffer::raw is not a standard layout type.
2234 #define BUF_OFFSETOF(type, field) \
2235 (reinterpret_cast<std::uintptr_t>(&(((type*)1024)->field)) - 1024u)
2236
2237 bool buffer::ptr_node::dispose_if_hypercombined(
2238 buffer::ptr_node* const delete_this)
2239 {
2240 // in case _raw is nullptr
2241 const std::uintptr_t bptr =
2242 (reinterpret_cast<std::uintptr_t>(delete_this->_raw) +
2243 BUF_OFFSETOF(buffer::raw, bptr_storage));
2244 const bool is_hypercombined =
2245 reinterpret_cast<std::uintptr_t>(delete_this) == bptr;
2246 if (is_hypercombined) {
2247 ceph_assert_always("hypercombining is currently disabled" == nullptr);
2248 delete_this->~ptr_node();
2249 return true;
2250 } else {
2251 return false;
2252 }
2253 }
2254
2255 std::unique_ptr<buffer::ptr_node, buffer::ptr_node::disposer>
2256 buffer::ptr_node::create_hypercombined(ceph::unique_leakable_ptr<buffer::raw> r)
2257 {
2258 // FIXME: we don't currently hypercombine buffers due to crashes
2259 // observed in the rados suite. After fixing we'll use placement
2260 // new to create ptr_node on buffer::raw::bptr_storage.
2261 return std::unique_ptr<buffer::ptr_node, buffer::ptr_node::disposer>(
2262 new ptr_node(std::move(r)));
2263 }
2264
2265 buffer::ptr_node* buffer::ptr_node::cloner::operator()(
2266 const buffer::ptr_node& clone_this)
2267 {
2268 return new ptr_node(clone_this);
2269 }
2270
2271 std::ostream& buffer::operator<<(std::ostream& out, const buffer::raw &r) {
2272 return out << "buffer::raw("
2273 << (void*)r.get_data() << " len " << r.get_len()
2274 << " nref " << r.nref.load() << ")";
2275 }
2276
2277 std::ostream& buffer::operator<<(std::ostream& out, const buffer::ptr& bp) {
2278 if (bp.have_raw())
2279 out << "buffer::ptr(" << bp.offset() << "~" << bp.length()
2280 << " " << (void*)bp.c_str()
2281 << " in raw " << (void*)bp.raw_c_str()
2282 << " len " << bp.raw_length()
2283 << " nref " << bp.raw_nref() << ")";
2284 else
2285 out << "buffer:ptr(" << bp.offset() << "~" << bp.length() << " no raw)";
2286 return out;
2287 }
2288
2289 std::ostream& buffer::operator<<(std::ostream& out, const buffer::list& bl) {
2290 out << "buffer::list(len=" << bl.length() << ",\n";
2291
2292 for (const auto& node : bl.buffers()) {
2293 out << "\t" << node;
2294 if (&node != &bl.buffers().back()) {
2295 out << ",\n";
2296 }
2297 }
2298 out << "\n)";
2299 return out;
2300 }
2301
2302 MEMPOOL_DEFINE_OBJECT_FACTORY(buffer::raw_malloc, buffer_raw_malloc,
2303 buffer_meta);
2304 MEMPOOL_DEFINE_OBJECT_FACTORY(buffer::raw_posix_aligned,
2305 buffer_raw_posix_aligned, buffer_meta);
2306 MEMPOOL_DEFINE_OBJECT_FACTORY(buffer::raw_char, buffer_raw_char, buffer_meta);
2307 MEMPOOL_DEFINE_OBJECT_FACTORY(buffer::raw_claimed_char, buffer_raw_claimed_char,
2308 buffer_meta);
2309 MEMPOOL_DEFINE_OBJECT_FACTORY(buffer::raw_static, buffer_raw_static,
2310 buffer_meta);
2311
2312
2313 void ceph::buffer::list::page_aligned_appender::_refill(size_t len) {
2314 const unsigned alloc =
2315 std::max(min_alloc,
2316 shift_round_up(static_cast<unsigned>(len),
2317 static_cast<unsigned>(CEPH_PAGE_SHIFT)));
2318 auto new_back = \
2319 ptr_node::create(buffer::create_page_aligned(alloc));
2320 new_back->set_length(0); // unused, so far.
2321 bl.push_back(std::move(new_back));
2322 }
2323
2324 namespace ceph::buffer {
2325 inline namespace v15_2_0 {
2326
2327 #pragma GCC diagnostic push
2328 #pragma GCC diagnostic ignored "-Wnon-virtual-dtor"
2329 #pragma clang diagnostic push
2330 #pragma clang diagnostic ignored "-Wnon-virtual-dtor"
2331 class buffer_error_category : public ceph::converting_category {
2332 public:
2333 buffer_error_category(){}
2334 const char* name() const noexcept override;
2335 const char* message(int ev, char*, std::size_t) const noexcept override;
2336 std::string message(int ev) const override;
2337 boost::system::error_condition default_error_condition(int ev) const noexcept
2338 override;
2339 using ceph::converting_category::equivalent;
2340 bool equivalent(int ev, const boost::system::error_condition& c) const
2341 noexcept override;
2342 int from_code(int ev) const noexcept override;
2343 };
2344 #pragma GCC diagnostic pop
2345 #pragma clang diagnostic pop
2346
2347 const char* buffer_error_category::name() const noexcept {
2348 return "buffer";
2349 }
2350
2351 const char*
2352 buffer_error_category::message(int ev, char*, std::size_t) const noexcept {
2353 using ceph::buffer::errc;
2354 if (ev == 0)
2355 return "No error";
2356
2357 switch (static_cast<errc>(ev)) {
2358 case errc::bad_alloc:
2359 return "Bad allocation";
2360
2361 case errc::end_of_buffer:
2362 return "End of buffer";
2363
2364 case errc::malformed_input:
2365 return "Malformed input";
2366 }
2367
2368 return "Unknown error";
2369 }
2370
2371 std::string buffer_error_category::message(int ev) const {
2372 return message(ev, nullptr, 0);
2373 }
2374
2375 boost::system::error_condition
2376 buffer_error_category::default_error_condition(int ev)const noexcept {
2377 using ceph::buffer::errc;
2378 switch (static_cast<errc>(ev)) {
2379 case errc::bad_alloc:
2380 return boost::system::errc::not_enough_memory;
2381 case errc::end_of_buffer:
2382 case errc::malformed_input:
2383 return boost::system::errc::io_error;
2384 }
2385 return { ev, *this };
2386 }
2387
2388 bool buffer_error_category::equivalent(int ev, const boost::system::error_condition& c) const noexcept {
2389 return default_error_condition(ev) == c;
2390 }
2391
2392 int buffer_error_category::from_code(int ev) const noexcept {
2393 using ceph::buffer::errc;
2394 switch (static_cast<errc>(ev)) {
2395 case errc::bad_alloc:
2396 return -ENOMEM;
2397
2398 case errc::end_of_buffer:
2399 return -EIO;
2400
2401 case errc::malformed_input:
2402 return -EIO;
2403 }
2404 return -EDOM;
2405 }
2406
2407 const boost::system::error_category& buffer_category() noexcept {
2408 static const buffer_error_category c;
2409 return c;
2410 }
2411 }
2412 }