]> git.proxmox.com Git - ceph.git/blame - ceph/src/common/buffer.cc
import ceph pacific 16.2.5
[ceph.git] / ceph / src / common / buffer.cc
CommitLineData
7c673cae
FG
1// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2// vim: ts=8 sw=2 smarttab
3/*
4 * Ceph - scalable distributed file system
5 *
6 * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
7 *
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
12 *
13 */
14
31f18b77 15#include <atomic>
9f95a23c 16#include <cstring>
31f18b77
FG
17#include <errno.h>
18#include <limits.h>
19
20#include <sys/uio.h>
21
11fdf7f2
TL
22#include "include/ceph_assert.h"
23#include "include/types.h"
24#include "include/buffer_raw.h"
7c673cae
FG
25#include "include/compat.h"
26#include "include/mempool.h"
27#include "armor.h"
28#include "common/environment.h"
29#include "common/errno.h"
f67539c2 30#include "common/error_code.h"
7c673cae 31#include "common/safe_io.h"
7c673cae
FG
32#include "common/strtol.h"
33#include "common/likely.h"
34#include "common/valgrind.h"
35#include "common/deleter.h"
7c673cae 36#include "common/RWLock.h"
f67539c2 37#include "common/error_code.h"
11fdf7f2 38#include "include/spinlock.h"
7c673cae 39#include "include/scope_guard.h"
31f18b77 40
f67539c2
TL
41using std::cerr;
42using std::make_pair;
43using std::pair;
44using std::string;
45
31f18b77 46using namespace ceph;
7c673cae 47
11fdf7f2 48#define CEPH_BUFFER_ALLOC_UNIT 4096u
7c673cae
FG
49#define CEPH_BUFFER_APPEND_SIZE (CEPH_BUFFER_ALLOC_UNIT - sizeof(raw_combined))
50
51#ifdef BUFFER_DEBUG
11fdf7f2
TL
52static ceph::spinlock debug_lock;
53# define bdout { std::lock_guard<ceph::spinlock> lg(debug_lock); std::cout
54# define bendl std::endl; }
7c673cae
FG
55#else
56# define bdout if (0) { std::cout
57# define bendl std::endl; }
58#endif
59
9f95a23c
TL
60 static ceph::atomic<unsigned> buffer_cached_crc { 0 };
61 static ceph::atomic<unsigned> buffer_cached_crc_adjusted { 0 };
62 static ceph::atomic<unsigned> buffer_missed_crc { 0 };
31f18b77 63
7c673cae
FG
64 static bool buffer_track_crc = get_env_bool("CEPH_BUFFER_TRACK");
65
66 void buffer::track_cached_crc(bool b) {
67 buffer_track_crc = b;
68 }
69 int buffer::get_cached_crc() {
31f18b77 70 return buffer_cached_crc;
7c673cae
FG
71 }
72 int buffer::get_cached_crc_adjusted() {
31f18b77 73 return buffer_cached_crc_adjusted;
7c673cae
FG
74 }
75
76 int buffer::get_missed_crc() {
31f18b77 77 return buffer_missed_crc;
7c673cae
FG
78 }
79
7c673cae
FG
80 /*
81 * raw_combined is always placed within a single allocation along
82 * with the data buffer. the data goes at the beginning, and
83 * raw_combined at the end.
84 */
85 class buffer::raw_combined : public buffer::raw {
86 size_t alignment;
87 public:
3efd9988
FG
88 raw_combined(char *dataptr, unsigned l, unsigned align,
89 int mempool)
90 : raw(dataptr, l, mempool),
7c673cae 91 alignment(align) {
7c673cae
FG
92 }
93 raw* clone_empty() override {
9f95a23c 94 return create(len, alignment).release();
7c673cae
FG
95 }
96
9f95a23c
TL
97 static ceph::unique_leakable_ptr<buffer::raw>
98 create(unsigned len,
99 unsigned align,
100 int mempool = mempool::mempool_buffer_anon)
101 {
b3b6e05e
TL
102 // posix_memalign() requires a multiple of sizeof(void *)
103 align = std::max<unsigned>(align, sizeof(void *));
11fdf7f2 104 size_t rawlen = round_up_to(sizeof(buffer::raw_combined),
7c673cae 105 alignof(buffer::raw_combined));
11fdf7f2 106 size_t datalen = round_up_to(len, alignof(buffer::raw_combined));
7c673cae 107
31f18b77
FG
108#ifdef DARWIN
109 char *ptr = (char *) valloc(rawlen + datalen);
110#else
111 char *ptr = 0;
112 int r = ::posix_memalign((void**)(void*)&ptr, align, rawlen + datalen);
113 if (r)
114 throw bad_alloc();
115#endif /* DARWIN */
7c673cae
FG
116 if (!ptr)
117 throw bad_alloc();
118
119 // actual data first, since it has presumably larger alignment restriction
120 // then put the raw_combined at the end
9f95a23c
TL
121 return ceph::unique_leakable_ptr<buffer::raw>(
122 new (ptr + datalen) raw_combined(ptr, len, align, mempool));
7c673cae
FG
123 }
124
125 static void operator delete(void *ptr) {
126 raw_combined *raw = (raw_combined *)ptr;
f67539c2 127 aligned_free((void *)raw->data);
7c673cae
FG
128 }
129 };
130
131 class buffer::raw_malloc : public buffer::raw {
132 public:
133 MEMPOOL_CLASS_HELPERS();
134
135 explicit raw_malloc(unsigned l) : raw(l) {
136 if (len) {
137 data = (char *)malloc(len);
138 if (!data)
139 throw bad_alloc();
140 } else {
141 data = 0;
142 }
11fdf7f2 143 bdout << "raw_malloc " << this << " alloc " << (void *)data << " " << l << bendl;
7c673cae
FG
144 }
145 raw_malloc(unsigned l, char *b) : raw(b, l) {
11fdf7f2 146 bdout << "raw_malloc " << this << " alloc " << (void *)data << " " << l << bendl;
7c673cae
FG
147 }
148 ~raw_malloc() override {
149 free(data);
11fdf7f2 150 bdout << "raw_malloc " << this << " free " << (void *)data << " " << bendl;
7c673cae
FG
151 }
152 raw* clone_empty() override {
153 return new raw_malloc(len);
154 }
155 };
156
157#ifndef __CYGWIN__
7c673cae
FG
158 class buffer::raw_posix_aligned : public buffer::raw {
159 unsigned align;
160 public:
161 MEMPOOL_CLASS_HELPERS();
162
163 raw_posix_aligned(unsigned l, unsigned _align) : raw(l) {
b3b6e05e
TL
164 // posix_memalign() requires a multiple of sizeof(void *)
165 align = std::max<unsigned>(_align, sizeof(void *));
31f18b77
FG
166#ifdef DARWIN
167 data = (char *) valloc(len);
168#else
169 int r = ::posix_memalign((void**)(void*)&data, align, len);
170 if (r)
171 throw bad_alloc();
172#endif /* DARWIN */
7c673cae
FG
173 if (!data)
174 throw bad_alloc();
11fdf7f2
TL
175 bdout << "raw_posix_aligned " << this << " alloc " << (void *)data
176 << " l=" << l << ", align=" << align << bendl;
7c673cae
FG
177 }
178 ~raw_posix_aligned() override {
f67539c2 179 aligned_free(data);
11fdf7f2 180 bdout << "raw_posix_aligned " << this << " free " << (void *)data << bendl;
7c673cae
FG
181 }
182 raw* clone_empty() override {
183 return new raw_posix_aligned(len, align);
184 }
185 };
186#endif
187
188#ifdef __CYGWIN__
189 class buffer::raw_hack_aligned : public buffer::raw {
190 unsigned align;
191 char *realdata;
192 public:
193 raw_hack_aligned(unsigned l, unsigned _align) : raw(l) {
194 align = _align;
195 realdata = new char[len+align-1];
f67539c2 196 unsigned off = ((uintptr_t)realdata) & (align-1);
7c673cae
FG
197 if (off)
198 data = realdata + align - off;
199 else
200 data = realdata;
7c673cae
FG
201 //cout << "hack aligned " << (unsigned)data
202 //<< " in raw " << (unsigned)realdata
203 //<< " off " << off << std::endl;
f67539c2 204 ceph_assert(((uintptr_t)data & (align-1)) == 0);
7c673cae
FG
205 }
206 ~raw_hack_aligned() {
207 delete[] realdata;
7c673cae
FG
208 }
209 raw* clone_empty() {
210 return new raw_hack_aligned(len, align);
211 }
212 };
213#endif
214
7c673cae
FG
215 /*
216 * primitive buffer types
217 */
218 class buffer::raw_char : public buffer::raw {
219 public:
220 MEMPOOL_CLASS_HELPERS();
221
222 explicit raw_char(unsigned l) : raw(l) {
223 if (len)
31f18b77 224 data = new char[len];
7c673cae
FG
225 else
226 data = 0;
11fdf7f2 227 bdout << "raw_char " << this << " alloc " << (void *)data << " " << l << bendl;
7c673cae
FG
228 }
229 raw_char(unsigned l, char *b) : raw(b, l) {
11fdf7f2 230 bdout << "raw_char " << this << " alloc " << (void *)data << " " << l << bendl;
7c673cae
FG
231 }
232 ~raw_char() override {
31f18b77 233 delete[] data;
11fdf7f2 234 bdout << "raw_char " << this << " free " << (void *)data << bendl;
7c673cae
FG
235 }
236 raw* clone_empty() override {
237 return new raw_char(len);
238 }
239 };
240
31f18b77
FG
241 class buffer::raw_claimed_char : public buffer::raw {
242 public:
243 MEMPOOL_CLASS_HELPERS();
244
245 explicit raw_claimed_char(unsigned l, char *b) : raw(b, l) {
31f18b77 246 bdout << "raw_claimed_char " << this << " alloc " << (void *)data
11fdf7f2 247 << " " << l << bendl;
31f18b77
FG
248 }
249 ~raw_claimed_char() override {
31f18b77 250 bdout << "raw_claimed_char " << this << " free " << (void *)data
11fdf7f2 251 << bendl;
31f18b77
FG
252 }
253 raw* clone_empty() override {
254 return new raw_char(len);
255 }
256 };
257
7c673cae
FG
258 class buffer::raw_static : public buffer::raw {
259 public:
260 MEMPOOL_CLASS_HELPERS();
261
262 raw_static(const char *d, unsigned l) : raw((char*)d, l) { }
263 ~raw_static() override {}
264 raw* clone_empty() override {
265 return new buffer::raw_char(len);
266 }
267 };
268
269 class buffer::raw_claim_buffer : public buffer::raw {
270 deleter del;
271 public:
272 raw_claim_buffer(const char *b, unsigned l, deleter d)
273 : raw((char*)b, l), del(std::move(d)) { }
274 ~raw_claim_buffer() override {}
275 raw* clone_empty() override {
276 return new buffer::raw_char(len);
277 }
278 };
279
11fdf7f2
TL
280 ceph::unique_leakable_ptr<buffer::raw> buffer::copy(const char *c, unsigned len) {
281 auto r = buffer::create_aligned(len, sizeof(size_t));
f67539c2 282 memcpy(r->get_data(), c, len);
7c673cae
FG
283 return r;
284 }
285
11fdf7f2 286 ceph::unique_leakable_ptr<buffer::raw> buffer::create(unsigned len) {
7c673cae
FG
287 return buffer::create_aligned(len, sizeof(size_t));
288 }
f67539c2
TL
289 ceph::unique_leakable_ptr<buffer::raw> buffer::create(unsigned len, char c) {
290 auto ret = buffer::create_aligned(len, sizeof(size_t));
291 memset(ret->get_data(), c, len);
292 return ret;
293 }
9f95a23c
TL
294 ceph::unique_leakable_ptr<buffer::raw>
295 buffer::create_in_mempool(unsigned len, int mempool) {
3efd9988
FG
296 return buffer::create_aligned_in_mempool(len, sizeof(size_t), mempool);
297 }
9f95a23c
TL
298 ceph::unique_leakable_ptr<buffer::raw>
299 buffer::claim_char(unsigned len, char *buf) {
300 return ceph::unique_leakable_ptr<buffer::raw>(
301 new raw_claimed_char(len, buf));
7c673cae 302 }
9f95a23c
TL
303 ceph::unique_leakable_ptr<buffer::raw> buffer::create_malloc(unsigned len) {
304 return ceph::unique_leakable_ptr<buffer::raw>(new raw_malloc(len));
7c673cae 305 }
9f95a23c
TL
306 ceph::unique_leakable_ptr<buffer::raw>
307 buffer::claim_malloc(unsigned len, char *buf) {
308 return ceph::unique_leakable_ptr<buffer::raw>(new raw_malloc(len, buf));
7c673cae 309 }
9f95a23c
TL
310 ceph::unique_leakable_ptr<buffer::raw>
311 buffer::create_static(unsigned len, char *buf) {
312 return ceph::unique_leakable_ptr<buffer::raw>(new raw_static(buf, len));
7c673cae 313 }
9f95a23c
TL
314 ceph::unique_leakable_ptr<buffer::raw>
315 buffer::claim_buffer(unsigned len, char *buf, deleter del) {
316 return ceph::unique_leakable_ptr<buffer::raw>(
317 new raw_claim_buffer(buf, len, std::move(del)));
7c673cae
FG
318 }
319
11fdf7f2 320 ceph::unique_leakable_ptr<buffer::raw> buffer::create_aligned_in_mempool(
9f95a23c
TL
321 unsigned len, unsigned align, int mempool)
322 {
7c673cae
FG
323 // If alignment is a page multiple, use a separate buffer::raw to
324 // avoid fragmenting the heap.
325 //
326 // Somewhat unexpectedly, I see consistently better performance
327 // from raw_combined than from raw even when the allocation size is
328 // a page multiple (but alignment is not).
329 //
330 // I also see better performance from a separate buffer::raw once the
331 // size passes 8KB.
332 if ((align & ~CEPH_PAGE_MASK) == 0 ||
333 len >= CEPH_PAGE_SIZE * 2) {
334#ifndef __CYGWIN__
11fdf7f2 335 return ceph::unique_leakable_ptr<buffer::raw>(new raw_posix_aligned(len, align));
7c673cae 336#else
11fdf7f2 337 return ceph::unique_leakable_ptr<buffer::raw>(new raw_hack_aligned(len, align));
7c673cae
FG
338#endif
339 }
9f95a23c 340 return raw_combined::create(len, align, mempool);
3efd9988 341 }
11fdf7f2 342 ceph::unique_leakable_ptr<buffer::raw> buffer::create_aligned(
3efd9988
FG
343 unsigned len, unsigned align) {
344 return create_aligned_in_mempool(len, align,
345 mempool::mempool_buffer_anon);
7c673cae
FG
346 }
347
11fdf7f2 348 ceph::unique_leakable_ptr<buffer::raw> buffer::create_page_aligned(unsigned len) {
7c673cae
FG
349 return create_aligned(len, CEPH_PAGE_SIZE);
350 }
11fdf7f2
TL
351 ceph::unique_leakable_ptr<buffer::raw> buffer::create_small_page_aligned(unsigned len) {
352 if (len < CEPH_PAGE_SIZE) {
353 return create_aligned(len, CEPH_BUFFER_ALLOC_UNIT);
9f95a23c 354 } else {
11fdf7f2 355 return create_aligned(len, CEPH_PAGE_SIZE);
9f95a23c 356 }
7c673cae
FG
357 }
358
11fdf7f2
TL
359 buffer::ptr::ptr(ceph::unique_leakable_ptr<raw> r)
360 : _raw(r.release()),
361 _off(0),
f67539c2 362 _len(_raw->get_len())
11fdf7f2
TL
363 {
364 _raw->nref.store(1, std::memory_order_release);
365 bdout << "ptr " << this << " get " << _raw << bendl;
366 }
7c673cae
FG
367 buffer::ptr::ptr(unsigned l) : _off(0), _len(l)
368 {
11fdf7f2
TL
369 _raw = buffer::create(l).release();
370 _raw->nref.store(1, std::memory_order_release);
7c673cae
FG
371 bdout << "ptr " << this << " get " << _raw << bendl;
372 }
373 buffer::ptr::ptr(const char *d, unsigned l) : _off(0), _len(l) // ditto.
374 {
11fdf7f2
TL
375 _raw = buffer::copy(d, l).release();
376 _raw->nref.store(1, std::memory_order_release);
7c673cae
FG
377 bdout << "ptr " << this << " get " << _raw << bendl;
378 }
379 buffer::ptr::ptr(const ptr& p) : _raw(p._raw), _off(p._off), _len(p._len)
380 {
381 if (_raw) {
31f18b77 382 _raw->nref++;
7c673cae
FG
383 bdout << "ptr " << this << " get " << _raw << bendl;
384 }
385 }
386 buffer::ptr::ptr(ptr&& p) noexcept : _raw(p._raw), _off(p._off), _len(p._len)
387 {
388 p._raw = nullptr;
389 p._off = p._len = 0;
390 }
391 buffer::ptr::ptr(const ptr& p, unsigned o, unsigned l)
392 : _raw(p._raw), _off(p._off + o), _len(l)
393 {
11fdf7f2
TL
394 ceph_assert(o+l <= p._len);
395 ceph_assert(_raw);
31f18b77 396 _raw->nref++;
7c673cae
FG
397 bdout << "ptr " << this << " get " << _raw << bendl;
398 }
11fdf7f2
TL
399 buffer::ptr::ptr(const ptr& p, ceph::unique_leakable_ptr<raw> r)
400 : _raw(r.release()),
401 _off(p._off),
402 _len(p._len)
403 {
404 _raw->nref.store(1, std::memory_order_release);
405 bdout << "ptr " << this << " get " << _raw << bendl;
406 }
7c673cae
FG
407 buffer::ptr& buffer::ptr::operator= (const ptr& p)
408 {
409 if (p._raw) {
31f18b77 410 p._raw->nref++;
7c673cae
FG
411 bdout << "ptr " << this << " get " << _raw << bendl;
412 }
413 buffer::raw *raw = p._raw;
414 release();
415 if (raw) {
416 _raw = raw;
417 _off = p._off;
418 _len = p._len;
419 } else {
420 _off = _len = 0;
421 }
422 return *this;
423 }
424 buffer::ptr& buffer::ptr::operator= (ptr&& p) noexcept
425 {
426 release();
427 buffer::raw *raw = p._raw;
428 if (raw) {
429 _raw = raw;
430 _off = p._off;
431 _len = p._len;
432 p._raw = nullptr;
433 p._off = p._len = 0;
434 } else {
435 _off = _len = 0;
436 }
437 return *this;
438 }
439
11fdf7f2 440 ceph::unique_leakable_ptr<buffer::raw> buffer::ptr::clone()
7c673cae
FG
441 {
442 return _raw->clone();
443 }
444
11fdf7f2 445 void buffer::ptr::swap(ptr& other) noexcept
7c673cae
FG
446 {
447 raw *r = _raw;
448 unsigned o = _off;
449 unsigned l = _len;
450 _raw = other._raw;
451 _off = other._off;
452 _len = other._len;
453 other._raw = r;
454 other._off = o;
455 other._len = l;
456 }
457
458 void buffer::ptr::release()
459 {
f67539c2
TL
460 // BE CAREFUL: this is called also for hypercombined ptr_node. After
461 // freeing underlying raw, `*this` can become inaccessible as well!
462 //
463 // cache the pointer to avoid unncecessary reloads and repeated
464 // checks.
465 if (auto* const cached_raw = std::exchange(_raw, nullptr);
466 cached_raw) {
467 bdout << "ptr " << this << " release " << cached_raw << bendl;
468 // optimize the common case where a particular `buffer::raw` has
469 // only a single reference. Altogether with initializing `nref` of
470 // freshly fabricated one with `1` through the std::atomic's ctor
471 // (which doesn't impose a memory barrier on the strongly-ordered
472 // x86), this allows to avoid all atomical operations in such case.
473 const bool last_one = \
474 (1 == cached_raw->nref.load(std::memory_order_acquire));
475 if (likely(last_one) || --cached_raw->nref == 0) {
476 bdout << "deleting raw " << static_cast<void*>(cached_raw)
477 << " len " << cached_raw->get_len() << bendl;
478 ANNOTATE_HAPPENS_AFTER(&cached_raw->nref);
479 ANNOTATE_HAPPENS_BEFORE_FORGET_ALL(&cached_raw->nref);
480 delete cached_raw; // dealloc old (if any)
7c673cae 481 } else {
f67539c2 482 ANNOTATE_HAPPENS_BEFORE(&cached_raw->nref);
7c673cae 483 }
7c673cae
FG
484 }
485 }
486
3efd9988
FG
487 int buffer::ptr::get_mempool() const {
488 if (_raw) {
489 return _raw->mempool;
490 }
491 return mempool::mempool_buffer_anon;
492 }
493
494 void buffer::ptr::reassign_to_mempool(int pool) {
495 if (_raw) {
496 _raw->reassign_to_mempool(pool);
497 }
498 }
499 void buffer::ptr::try_assign_to_mempool(int pool) {
500 if (_raw) {
501 _raw->try_assign_to_mempool(pool);
502 }
503 }
504
7c673cae 505 const char *buffer::ptr::c_str() const {
11fdf7f2 506 ceph_assert(_raw);
7c673cae
FG
507 return _raw->get_data() + _off;
508 }
509 char *buffer::ptr::c_str() {
11fdf7f2 510 ceph_assert(_raw);
7c673cae
FG
511 return _raw->get_data() + _off;
512 }
513 const char *buffer::ptr::end_c_str() const {
11fdf7f2 514 ceph_assert(_raw);
7c673cae
FG
515 return _raw->get_data() + _off + _len;
516 }
517 char *buffer::ptr::end_c_str() {
11fdf7f2 518 ceph_assert(_raw);
7c673cae
FG
519 return _raw->get_data() + _off + _len;
520 }
521
522 unsigned buffer::ptr::unused_tail_length() const
523 {
f67539c2 524 return _raw ? _raw->get_len() - (_off + _len) : 0;
7c673cae
FG
525 }
526 const char& buffer::ptr::operator[](unsigned n) const
527 {
11fdf7f2
TL
528 ceph_assert(_raw);
529 ceph_assert(n < _len);
7c673cae
FG
530 return _raw->get_data()[_off + n];
531 }
532 char& buffer::ptr::operator[](unsigned n)
533 {
11fdf7f2
TL
534 ceph_assert(_raw);
535 ceph_assert(n < _len);
7c673cae
FG
536 return _raw->get_data()[_off + n];
537 }
538
f67539c2
TL
539 const char *buffer::ptr::raw_c_str() const { ceph_assert(_raw); return _raw->get_data(); }
540 unsigned buffer::ptr::raw_length() const { ceph_assert(_raw); return _raw->get_len(); }
11fdf7f2 541 int buffer::ptr::raw_nref() const { ceph_assert(_raw); return _raw->nref; }
7c673cae
FG
542
543 void buffer::ptr::copy_out(unsigned o, unsigned l, char *dest) const {
11fdf7f2 544 ceph_assert(_raw);
7c673cae
FG
545 if (o+l > _len)
546 throw end_of_buffer();
f67539c2 547 char* src = _raw->get_data() + _off + o;
7c673cae
FG
548 maybe_inline_memcpy(dest, src, l, 8);
549 }
550
31f18b77 551 unsigned buffer::ptr::wasted() const
7c673cae 552 {
f67539c2 553 return _raw->get_len() - _len;
7c673cae
FG
554 }
555
556 int buffer::ptr::cmp(const ptr& o) const
557 {
558 int l = _len < o._len ? _len : o._len;
559 if (l) {
560 int r = memcmp(c_str(), o.c_str(), l);
561 if (r)
562 return r;
563 }
564 if (_len < o._len)
565 return -1;
566 if (_len > o._len)
567 return 1;
568 return 0;
569 }
570
571 bool buffer::ptr::is_zero() const
572 {
573 return mem_is_zero(c_str(), _len);
574 }
575
576 unsigned buffer::ptr::append(char c)
577 {
11fdf7f2
TL
578 ceph_assert(_raw);
579 ceph_assert(1 <= unused_tail_length());
f67539c2 580 char* ptr = _raw->get_data() + _off + _len;
7c673cae
FG
581 *ptr = c;
582 _len++;
583 return _len + _off;
584 }
585
586 unsigned buffer::ptr::append(const char *p, unsigned l)
587 {
11fdf7f2
TL
588 ceph_assert(_raw);
589 ceph_assert(l <= unused_tail_length());
f67539c2 590 char* c = _raw->get_data() + _off + _len;
7c673cae
FG
591 maybe_inline_memcpy(c, p, l, 32);
592 _len += l;
593 return _len + _off;
594 }
595
11fdf7f2 596 unsigned buffer::ptr::append_zeros(unsigned l)
7c673cae 597 {
11fdf7f2
TL
598 ceph_assert(_raw);
599 ceph_assert(l <= unused_tail_length());
f67539c2 600 char* c = _raw->get_data() + _off + _len;
92f5a8d4 601 // FIPS zeroization audit 20191115: this memset is not security related.
11fdf7f2
TL
602 memset(c, 0, l);
603 _len += l;
604 return _len + _off;
7c673cae
FG
605 }
606
607 void buffer::ptr::copy_in(unsigned o, unsigned l, const char *src, bool crc_reset)
608 {
11fdf7f2
TL
609 ceph_assert(_raw);
610 ceph_assert(o <= _len);
611 ceph_assert(o+l <= _len);
f67539c2 612 char* dest = _raw->get_data() + _off + o;
7c673cae
FG
613 if (crc_reset)
614 _raw->invalidate_crc();
615 maybe_inline_memcpy(dest, src, l, 64);
616 }
617
7c673cae
FG
618 void buffer::ptr::zero(bool crc_reset)
619 {
620 if (crc_reset)
621 _raw->invalidate_crc();
92f5a8d4 622 // FIPS zeroization audit 20191115: this memset is not security related.
7c673cae
FG
623 memset(c_str(), 0, _len);
624 }
625
7c673cae
FG
626 void buffer::ptr::zero(unsigned o, unsigned l, bool crc_reset)
627 {
11fdf7f2 628 ceph_assert(o+l <= _len);
7c673cae
FG
629 if (crc_reset)
630 _raw->invalidate_crc();
92f5a8d4 631 // FIPS zeroization audit 20191115: this memset is not security related.
7c673cae
FG
632 memset(c_str()+o, 0, l);
633 }
7c673cae 634
f67539c2
TL
635 template<bool B>
636 buffer::ptr::iterator_impl<B>& buffer::ptr::iterator_impl<B>::operator +=(size_t len) {
637 pos += len;
638 if (pos > end_ptr)
639 throw end_of_buffer();
640 return *this;
641 }
642
643 template buffer::ptr::iterator_impl<false>&
644 buffer::ptr::iterator_impl<false>::operator +=(size_t len);
645 template buffer::ptr::iterator_impl<true>&
646 buffer::ptr::iterator_impl<true>::operator +=(size_t len);
647
7c673cae
FG
648 // -- buffer::list::iterator --
649 /*
650 buffer::list::iterator operator=(const buffer::list::iterator& other)
651 {
652 if (this != &other) {
653 bl = other.bl;
654 ls = other.ls;
655 off = other.off;
656 p = other.p;
657 p_off = other.p_off;
658 }
659 return *this;
660 }*/
661
662 template<bool is_const>
663 buffer::list::iterator_impl<is_const>::iterator_impl(bl_t *l, unsigned o)
11fdf7f2 664 : bl(l), ls(&bl->_buffers), p(ls->begin()), off(0), p_off(0)
7c673cae 665 {
9f95a23c 666 *this += o;
7c673cae
FG
667 }
668
669 template<bool is_const>
670 buffer::list::iterator_impl<is_const>::iterator_impl(const buffer::list::iterator& i)
671 : iterator_impl<is_const>(i.bl, i.off, i.p, i.p_off) {}
672
673 template<bool is_const>
9f95a23c
TL
674 auto buffer::list::iterator_impl<is_const>::operator +=(unsigned o)
675 -> iterator_impl&
11fdf7f2
TL
676 {
677 //cout << this << " advance " << o << " from " << off
678 // << " (p_off " << p_off << " in " << p->length() << ")"
679 // << std::endl;
680
681 p_off +=o;
682 while (p != ls->end()) {
683 if (p_off >= p->length()) {
684 // skip this buffer
685 p_off -= p->length();
686 p++;
7c673cae 687 } else {
11fdf7f2
TL
688 // somewhere in this buffer!
689 break;
7c673cae
FG
690 }
691 }
11fdf7f2
TL
692 if (p == ls->end() && p_off) {
693 throw end_of_buffer();
694 }
695 off += o;
9f95a23c 696 return *this;
7c673cae
FG
697 }
698
699 template<bool is_const>
700 void buffer::list::iterator_impl<is_const>::seek(unsigned o)
701 {
702 p = ls->begin();
703 off = p_off = 0;
9f95a23c 704 *this += o;
7c673cae
FG
705 }
706
707 template<bool is_const>
708 char buffer::list::iterator_impl<is_const>::operator*() const
709 {
710 if (p == ls->end())
711 throw end_of_buffer();
712 return (*p)[p_off];
713 }
714
715 template<bool is_const>
716 buffer::list::iterator_impl<is_const>&
717 buffer::list::iterator_impl<is_const>::operator++()
718 {
719 if (p == ls->end())
720 throw end_of_buffer();
9f95a23c 721 *this += 1;
7c673cae
FG
722 return *this;
723 }
724
725 template<bool is_const>
726 buffer::ptr buffer::list::iterator_impl<is_const>::get_current_ptr() const
727 {
728 if (p == ls->end())
729 throw end_of_buffer();
730 return ptr(*p, p_off, p->length() - p_off);
731 }
732
11fdf7f2
TL
733 template<bool is_const>
734 bool buffer::list::iterator_impl<is_const>::is_pointing_same_raw(
735 const ptr& other) const
736 {
737 if (p == ls->end())
738 throw end_of_buffer();
9f95a23c 739 return p->_raw == other._raw;
11fdf7f2
TL
740 }
741
7c673cae
FG
742 // copy data out.
743 // note that these all _append_ to dest!
744 template<bool is_const>
745 void buffer::list::iterator_impl<is_const>::copy(unsigned len, char *dest)
746 {
747 if (p == ls->end()) seek(off);
748 while (len > 0) {
749 if (p == ls->end())
750 throw end_of_buffer();
7c673cae
FG
751
752 unsigned howmuch = p->length() - p_off;
753 if (len < howmuch) howmuch = len;
754 p->copy_out(p_off, howmuch, dest);
755 dest += howmuch;
756
757 len -= howmuch;
9f95a23c 758 *this += howmuch;
7c673cae
FG
759 }
760 }
761
762 template<bool is_const>
763 void buffer::list::iterator_impl<is_const>::copy(unsigned len, ptr &dest)
764 {
765 copy_deep(len, dest);
766 }
767
768 template<bool is_const>
769 void buffer::list::iterator_impl<is_const>::copy_deep(unsigned len, ptr &dest)
770 {
771 if (!len) {
772 return;
773 }
774 if (p == ls->end())
775 throw end_of_buffer();
7c673cae
FG
776 dest = create(len);
777 copy(len, dest.c_str());
778 }
779 template<bool is_const>
780 void buffer::list::iterator_impl<is_const>::copy_shallow(unsigned len,
781 ptr &dest)
782 {
783 if (!len) {
784 return;
785 }
786 if (p == ls->end())
787 throw end_of_buffer();
7c673cae
FG
788 unsigned howmuch = p->length() - p_off;
789 if (howmuch < len) {
790 dest = create(len);
791 copy(len, dest.c_str());
792 } else {
793 dest = ptr(*p, p_off, len);
9f95a23c 794 *this += len;
7c673cae
FG
795 }
796 }
797
798 template<bool is_const>
799 void buffer::list::iterator_impl<is_const>::copy(unsigned len, list &dest)
800 {
801 if (p == ls->end())
802 seek(off);
803 while (len > 0) {
804 if (p == ls->end())
805 throw end_of_buffer();
806
807 unsigned howmuch = p->length() - p_off;
808 if (len < howmuch)
809 howmuch = len;
810 dest.append(*p, p_off, howmuch);
811
812 len -= howmuch;
9f95a23c 813 *this += howmuch;
7c673cae
FG
814 }
815 }
816
817 template<bool is_const>
818 void buffer::list::iterator_impl<is_const>::copy(unsigned len, std::string &dest)
819 {
820 if (p == ls->end())
821 seek(off);
822 while (len > 0) {
823 if (p == ls->end())
824 throw end_of_buffer();
825
826 unsigned howmuch = p->length() - p_off;
827 const char *c_str = p->c_str();
828 if (len < howmuch)
829 howmuch = len;
830 dest.append(c_str + p_off, howmuch);
831
832 len -= howmuch;
9f95a23c 833 *this += howmuch;
7c673cae
FG
834 }
835 }
836
837 template<bool is_const>
838 void buffer::list::iterator_impl<is_const>::copy_all(list &dest)
839 {
840 if (p == ls->end())
841 seek(off);
842 while (1) {
843 if (p == ls->end())
844 return;
7c673cae
FG
845
846 unsigned howmuch = p->length() - p_off;
847 const char *c_str = p->c_str();
848 dest.append(c_str + p_off, howmuch);
849
9f95a23c 850 *this += howmuch;
7c673cae
FG
851 }
852 }
853
854 template<bool is_const>
855 size_t buffer::list::iterator_impl<is_const>::get_ptr_and_advance(
856 size_t want, const char **data)
857 {
858 if (p == ls->end()) {
859 seek(off);
860 if (p == ls->end()) {
861 return 0;
862 }
863 }
864 *data = p->c_str() + p_off;
11fdf7f2 865 size_t l = std::min<size_t>(p->length() - p_off, want);
7c673cae
FG
866 p_off += l;
867 if (p_off == p->length()) {
868 ++p;
869 p_off = 0;
870 }
871 off += l;
872 return l;
873 }
874
875 template<bool is_const>
876 uint32_t buffer::list::iterator_impl<is_const>::crc32c(
877 size_t length, uint32_t crc)
878 {
11fdf7f2 879 length = std::min<size_t>(length, get_remaining());
7c673cae
FG
880 while (length > 0) {
881 const char *p;
882 size_t l = get_ptr_and_advance(length, &p);
883 crc = ceph_crc32c(crc, (unsigned char*)p, l);
884 length -= l;
885 }
886 return crc;
887 }
888
889 // explicitly instantiate only the iterator types we need, so we can hide the
890 // details in this compilation unit without introducing unnecessary link time
891 // dependencies.
892 template class buffer::list::iterator_impl<true>;
893 template class buffer::list::iterator_impl<false>;
894
895 buffer::list::iterator::iterator(bl_t *l, unsigned o)
896 : iterator_impl(l, o)
897 {}
898
899 buffer::list::iterator::iterator(bl_t *l, unsigned o, list_iter_t ip, unsigned po)
900 : iterator_impl(l, o, ip, po)
901 {}
902
7c673cae
FG
903 // copy data in
904 void buffer::list::iterator::copy_in(unsigned len, const char *src, bool crc_reset)
905 {
906 // copy
907 if (p == ls->end())
908 seek(off);
909 while (len > 0) {
910 if (p == ls->end())
911 throw end_of_buffer();
912
913 unsigned howmuch = p->length() - p_off;
914 if (len < howmuch)
915 howmuch = len;
916 p->copy_in(p_off, howmuch, src, crc_reset);
917
918 src += howmuch;
919 len -= howmuch;
9f95a23c 920 *this += howmuch;
7c673cae
FG
921 }
922 }
923
924 void buffer::list::iterator::copy_in(unsigned len, const list& otherl)
925 {
926 if (p == ls->end())
927 seek(off);
928 unsigned left = len;
11fdf7f2
TL
929 for (const auto& node : otherl._buffers) {
930 unsigned l = node.length();
7c673cae
FG
931 if (left < l)
932 l = left;
11fdf7f2 933 copy_in(l, node.c_str());
7c673cae
FG
934 left -= l;
935 if (left == 0)
936 break;
937 }
938 }
939
940 // -- buffer::list --
941
11fdf7f2 942 void buffer::list::swap(list& other) noexcept
7c673cae
FG
943 {
944 std::swap(_len, other._len);
9f95a23c 945 std::swap(_num, other._num);
11fdf7f2 946 std::swap(_carriage, other._carriage);
7c673cae 947 _buffers.swap(other._buffers);
7c673cae
FG
948 }
949
7c673cae
FG
950 bool buffer::list::contents_equal(const ceph::buffer::list& other) const
951 {
952 if (length() != other.length())
953 return false;
954
955 // buffer-wise comparison
956 if (true) {
11fdf7f2
TL
957 auto a = std::cbegin(_buffers);
958 auto b = std::cbegin(other._buffers);
7c673cae 959 unsigned aoff = 0, boff = 0;
11fdf7f2 960 while (a != std::cend(_buffers)) {
7c673cae
FG
961 unsigned len = a->length() - aoff;
962 if (len > b->length() - boff)
963 len = b->length() - boff;
964 if (memcmp(a->c_str() + aoff, b->c_str() + boff, len) != 0)
965 return false;
966 aoff += len;
967 if (aoff == a->length()) {
968 aoff = 0;
969 ++a;
970 }
971 boff += len;
972 if (boff == b->length()) {
973 boff = 0;
974 ++b;
975 }
976 }
7c673cae
FG
977 return true;
978 }
979
980 // byte-wise comparison
981 if (false) {
982 bufferlist::const_iterator me = begin();
983 bufferlist::const_iterator him = other.begin();
984 while (!me.end()) {
985 if (*me != *him)
986 return false;
987 ++me;
988 ++him;
989 }
990 return true;
991 }
992 }
993
9f95a23c
TL
994 bool buffer::list::contents_equal(const void* const other,
995 size_t length) const
996 {
997 if (this->length() != length) {
998 return false;
999 }
1000
1001 const auto* other_buf = reinterpret_cast<const char*>(other);
1002 for (const auto& bp : buffers()) {
f67539c2
TL
1003 assert(bp.length() <= length);
1004 if (std::memcmp(bp.c_str(), other_buf, bp.length()) != 0) {
9f95a23c
TL
1005 return false;
1006 } else {
f67539c2
TL
1007 length -= bp.length();
1008 other_buf += bp.length();
9f95a23c
TL
1009 }
1010 }
1011
1012 return true;
1013 }
1014
11fdf7f2 1015 bool buffer::list::is_provided_buffer(const char* const dst) const
7c673cae 1016 {
11fdf7f2 1017 if (_buffers.empty()) {
7c673cae 1018 return false;
11fdf7f2 1019 }
7c673cae
FG
1020 return (is_contiguous() && (_buffers.front().c_str() == dst));
1021 }
1022
11fdf7f2 1023 bool buffer::list::is_aligned(const unsigned align) const
7c673cae 1024 {
11fdf7f2
TL
1025 for (const auto& node : _buffers) {
1026 if (!node.is_aligned(align)) {
7c673cae 1027 return false;
11fdf7f2
TL
1028 }
1029 }
7c673cae
FG
1030 return true;
1031 }
1032
11fdf7f2 1033 bool buffer::list::is_n_align_sized(const unsigned align) const
7c673cae 1034 {
11fdf7f2
TL
1035 for (const auto& node : _buffers) {
1036 if (!node.is_n_align_sized(align)) {
7c673cae 1037 return false;
11fdf7f2
TL
1038 }
1039 }
7c673cae
FG
1040 return true;
1041 }
1042
11fdf7f2
TL
1043 bool buffer::list::is_aligned_size_and_memory(
1044 const unsigned align_size,
1045 const unsigned align_memory) const
7c673cae 1046 {
11fdf7f2
TL
1047 for (const auto& node : _buffers) {
1048 if (!node.is_aligned(align_memory) || !node.is_n_align_sized(align_size)) {
7c673cae 1049 return false;
11fdf7f2 1050 }
7c673cae
FG
1051 }
1052 return true;
1053 }
1054
1055 bool buffer::list::is_zero() const {
11fdf7f2
TL
1056 for (const auto& node : _buffers) {
1057 if (!node.is_zero()) {
7c673cae
FG
1058 return false;
1059 }
1060 }
1061 return true;
1062 }
1063
1064 void buffer::list::zero()
1065 {
11fdf7f2
TL
1066 for (auto& node : _buffers) {
1067 node.zero();
1068 }
7c673cae
FG
1069 }
1070
11fdf7f2 1071 void buffer::list::zero(const unsigned o, const unsigned l)
7c673cae 1072 {
11fdf7f2 1073 ceph_assert(o+l <= _len);
7c673cae 1074 unsigned p = 0;
11fdf7f2
TL
1075 for (auto& node : _buffers) {
1076 if (p + node.length() > o) {
1077 if (p >= o && p+node.length() <= o+l) {
7c673cae 1078 // 'o'------------- l -----------|
11fdf7f2
TL
1079 // 'p'-- node.length() --|
1080 node.zero();
7c673cae
FG
1081 } else if (p >= o) {
1082 // 'o'------------- l -----------|
11fdf7f2
TL
1083 // 'p'------- node.length() -------|
1084 node.zero(0, o+l-p);
1085 } else if (p + node.length() <= o+l) {
7c673cae 1086 // 'o'------------- l -----------|
11fdf7f2
TL
1087 // 'p'------- node.length() -------|
1088 node.zero(o-p, node.length()-(o-p));
7c673cae
FG
1089 } else {
1090 // 'o'----------- l -----------|
11fdf7f2
TL
1091 // 'p'---------- node.length() ----------|
1092 node.zero(o-p, l);
7c673cae
FG
1093 }
1094 }
11fdf7f2
TL
1095 p += node.length();
1096 if (o+l <= p) {
7c673cae 1097 break; // done
11fdf7f2 1098 }
7c673cae
FG
1099 }
1100 }
1101
1102 bool buffer::list::is_contiguous() const
1103 {
9f95a23c 1104 return _num <= 1;
7c673cae
FG
1105 }
1106
1107 bool buffer::list::is_n_page_sized() const
1108 {
1109 return is_n_align_sized(CEPH_PAGE_SIZE);
1110 }
1111
1112 bool buffer::list::is_page_aligned() const
1113 {
1114 return is_aligned(CEPH_PAGE_SIZE);
1115 }
1116
3efd9988
FG
1117 int buffer::list::get_mempool() const
1118 {
1119 if (_buffers.empty()) {
1120 return mempool::mempool_buffer_anon;
1121 }
1122 return _buffers.back().get_mempool();
1123 }
1124
31f18b77
FG
1125 void buffer::list::reassign_to_mempool(int pool)
1126 {
31f18b77 1127 for (auto& p : _buffers) {
9f95a23c 1128 p._raw->reassign_to_mempool(pool);
31f18b77
FG
1129 }
1130 }
1131
1132 void buffer::list::try_assign_to_mempool(int pool)
1133 {
31f18b77 1134 for (auto& p : _buffers) {
9f95a23c 1135 p._raw->try_assign_to_mempool(pool);
31f18b77
FG
1136 }
1137 }
1138
f64942e4
AA
1139 uint64_t buffer::list::get_wasted_space() const
1140 {
9f95a23c 1141 if (_num == 1)
f64942e4
AA
1142 return _buffers.back().wasted();
1143
1144 std::vector<const raw*> raw_vec;
9f95a23c 1145 raw_vec.reserve(_num);
f64942e4 1146 for (const auto& p : _buffers)
9f95a23c 1147 raw_vec.push_back(p._raw);
f64942e4
AA
1148 std::sort(raw_vec.begin(), raw_vec.end());
1149
1150 uint64_t total = 0;
1151 const raw *last = nullptr;
1152 for (const auto r : raw_vec) {
1153 if (r == last)
1154 continue;
1155 last = r;
f67539c2 1156 total += r->get_len();
f64942e4
AA
1157 }
1158 // If multiple buffers are sharing the same raw buffer and they overlap
1159 // with each other, the wasted space will be underestimated.
1160 if (total <= length())
1161 return 0;
1162 return total - length();
1163 }
1164
7c673cae
FG
1165 void buffer::list::rebuild()
1166 {
1167 if (_len == 0) {
11fdf7f2
TL
1168 _carriage = &always_empty_bptr;
1169 _buffers.clear_and_dispose();
9f95a23c 1170 _num = 0;
7c673cae
FG
1171 return;
1172 }
7c673cae 1173 if ((_len & ~CEPH_PAGE_MASK) == 0)
11fdf7f2 1174 rebuild(ptr_node::create(buffer::create_page_aligned(_len)));
7c673cae 1175 else
11fdf7f2 1176 rebuild(ptr_node::create(buffer::create(_len)));
7c673cae
FG
1177 }
1178
11fdf7f2
TL
1179 void buffer::list::rebuild(
1180 std::unique_ptr<buffer::ptr_node, buffer::ptr_node::disposer> nb)
7c673cae
FG
1181 {
1182 unsigned pos = 0;
adb31ebb
TL
1183 int mempool = _buffers.front().get_mempool();
1184 nb->reassign_to_mempool(mempool);
11fdf7f2
TL
1185 for (auto& node : _buffers) {
1186 nb->copy_in(pos, node.length(), node.c_str(), false);
1187 pos += node.length();
7c673cae 1188 }
11fdf7f2
TL
1189 _buffers.clear_and_dispose();
1190 if (likely(nb->length())) {
1191 _carriage = nb.get();
1192 _buffers.push_back(*nb.release());
9f95a23c
TL
1193 _num = 1;
1194 } else {
1195 _carriage = &always_empty_bptr;
1196 _num = 0;
11fdf7f2 1197 }
7c673cae 1198 invalidate_crc();
7c673cae
FG
1199 }
1200
1201 bool buffer::list::rebuild_aligned(unsigned align)
1202 {
1203 return rebuild_aligned_size_and_memory(align, align);
1204 }
1205
1206 bool buffer::list::rebuild_aligned_size_and_memory(unsigned align_size,
b32b8144
FG
1207 unsigned align_memory,
1208 unsigned max_buffers)
7c673cae 1209 {
9f95a23c 1210 bool had_to_rebuild = false;
b32b8144 1211
9f95a23c 1212 if (max_buffers && _num > max_buffers && _len > (max_buffers * align_size)) {
11fdf7f2 1213 align_size = round_up_to(round_up_to(_len, max_buffers) / max_buffers, align_size);
b32b8144 1214 }
11fdf7f2
TL
1215 auto p = std::begin(_buffers);
1216 auto p_prev = _buffers.before_begin();
1217 while (p != std::end(_buffers)) {
7c673cae
FG
1218 // keep anything that's already align and sized aligned
1219 if (p->is_aligned(align_memory) && p->is_n_align_sized(align_size)) {
1220 /*cout << " segment " << (void*)p->c_str()
1221 << " offset " << ((unsigned long)p->c_str() & (align - 1))
1222 << " length " << p->length()
1223 << " " << (p->length() & (align - 1)) << " ok" << std::endl;
1224 */
11fdf7f2 1225 p_prev = p++;
7c673cae
FG
1226 continue;
1227 }
1228
1229 // consolidate unaligned items, until we get something that is sized+aligned
1230 list unaligned;
1231 unsigned offset = 0;
1232 do {
1233 /*cout << " segment " << (void*)p->c_str()
1234 << " offset " << ((unsigned long)p->c_str() & (align - 1))
1235 << " length " << p->length() << " " << (p->length() & (align - 1))
1236 << " overall offset " << offset << " " << (offset & (align - 1))
1237 << " not ok" << std::endl;
1238 */
1239 offset += p->length();
11fdf7f2
TL
1240 // no need to reallocate, relinking is enough thankfully to bi::list.
1241 auto p_after = _buffers.erase_after(p_prev);
9f95a23c 1242 _num -= 1;
11fdf7f2
TL
1243 unaligned._buffers.push_back(*p);
1244 unaligned._len += p->length();
9f95a23c 1245 unaligned._num += 1;
11fdf7f2
TL
1246 p = p_after;
1247 } while (p != std::end(_buffers) &&
7c673cae
FG
1248 (!p->is_aligned(align_memory) ||
1249 !p->is_n_align_sized(align_size) ||
1250 (offset % align_size)));
1251 if (!(unaligned.is_contiguous() && unaligned._buffers.front().is_aligned(align_memory))) {
11fdf7f2
TL
1252 unaligned.rebuild(
1253 ptr_node::create(
1254 buffer::create_aligned(unaligned._len, align_memory)));
9f95a23c 1255 had_to_rebuild = true;
7c673cae 1256 }
11fdf7f2 1257 _buffers.insert_after(p_prev, *ptr_node::create(unaligned._buffers.front()).release());
9f95a23c 1258 _num += 1;
11fdf7f2 1259 ++p_prev;
7c673cae 1260 }
9f95a23c 1261 return had_to_rebuild;
7c673cae
FG
1262 }
1263
1264 bool buffer::list::rebuild_page_aligned()
1265 {
1266 return rebuild_aligned(CEPH_PAGE_SIZE);
1267 }
1268
31f18b77
FG
1269 void buffer::list::reserve(size_t prealloc)
1270 {
11fdf7f2 1271 if (get_append_buffer_unused_tail_length() < prealloc) {
f67539c2 1272 auto ptr = ptr_node::create(buffer::create_small_page_aligned(prealloc));
11fdf7f2
TL
1273 ptr->set_length(0); // unused, so far.
1274 _carriage = ptr.get();
1275 _buffers.push_back(*ptr.release());
9f95a23c 1276 _num += 1;
31f18b77
FG
1277 }
1278 }
1279
9f95a23c 1280 void buffer::list::claim_append(list& bl)
7c673cae
FG
1281 {
1282 // steal the other guy's buffers
1283 _len += bl._len;
9f95a23c 1284 _num += bl._num;
11fdf7f2 1285 _buffers.splice_back(bl._buffers);
31f18b77
FG
1286 bl.clear();
1287 }
1288
7c673cae
FG
1289 void buffer::list::append(char c)
1290 {
1291 // put what we can into the existing append_buffer.
11fdf7f2 1292 unsigned gap = get_append_buffer_unused_tail_length();
7c673cae 1293 if (!gap) {
11fdf7f2
TL
1294 // make a new buffer!
1295 auto buf = ptr_node::create(
1296 raw_combined::create(CEPH_BUFFER_APPEND_SIZE, 0, get_mempool()));
1297 buf->set_length(0); // unused, so far.
1298 _carriage = buf.get();
1299 _buffers.push_back(*buf.release());
9f95a23c 1300 _num += 1;
11fdf7f2
TL
1301 } else if (unlikely(_carriage != &_buffers.back())) {
1302 auto bptr = ptr_node::create(*_carriage, _carriage->length(), 0);
1303 _carriage = bptr.get();
1304 _buffers.push_back(*bptr.release());
9f95a23c 1305 _num += 1;
11fdf7f2
TL
1306 }
1307 _carriage->append(c);
1308 _len++;
1309 }
1310
f67539c2 1311 buffer::ptr_node buffer::list::always_empty_bptr;
11fdf7f2
TL
1312
1313 buffer::ptr_node& buffer::list::refill_append_space(const unsigned len)
1314 {
1315 // make a new buffer. fill out a complete page, factoring in the
1316 // raw_combined overhead.
1317 size_t need = round_up_to(len, sizeof(size_t)) + sizeof(raw_combined);
1318 size_t alen = round_up_to(need, CEPH_BUFFER_ALLOC_UNIT) -
1319 sizeof(raw_combined);
1320 auto new_back = \
1321 ptr_node::create(raw_combined::create(alen, 0, get_mempool()));
1322 new_back->set_length(0); // unused, so far.
1323 _carriage = new_back.get();
1324 _buffers.push_back(*new_back.release());
9f95a23c 1325 _num += 1;
11fdf7f2 1326 return _buffers.back();
7c673cae
FG
1327 }
1328
1329 void buffer::list::append(const char *data, unsigned len)
1330 {
11fdf7f2
TL
1331 _len += len;
1332
1333 const unsigned free_in_last = get_append_buffer_unused_tail_length();
1334 const unsigned first_round = std::min(len, free_in_last);
1335 if (first_round) {
1336 // _buffers and carriage can desynchronize when 1) a new ptr
1337 // we don't own has been added into the _buffers 2) _buffers
1338 // has been emptied as as a result of std::move or stolen by
1339 // claim_append.
1340 if (unlikely(_carriage != &_buffers.back())) {
1341 auto bptr = ptr_node::create(*_carriage, _carriage->length(), 0);
1342 _carriage = bptr.get();
1343 _buffers.push_back(*bptr.release());
9f95a23c 1344 _num += 1;
7c673cae 1345 }
11fdf7f2
TL
1346 _carriage->append(data, first_round);
1347 }
1348
1349 const unsigned second_round = len - first_round;
1350 if (second_round) {
1351 auto& new_back = refill_append_space(second_round);
1352 new_back.append(data + first_round, second_round);
1353 }
1354 }
1355
1356 buffer::list::reserve_t buffer::list::obtain_contiguous_space(
1357 const unsigned len)
1358 {
1359 // note: if len < the normal append_buffer size it *might*
1360 // be better to allocate a normal-sized append_buffer and
1361 // use part of it. however, that optimizes for the case of
1362 // old-style types including new-style types. and in most
1363 // such cases, this won't be the very first thing encoded to
1364 // the list, so append_buffer will already be allocated.
1365 // OTOH if everything is new-style, we *should* allocate
1366 // only what we need and conserve memory.
1367 if (unlikely(get_append_buffer_unused_tail_length() < len)) {
1368 auto new_back = \
1369 buffer::ptr_node::create(buffer::create(len)).release();
1370 new_back->set_length(0); // unused, so far.
1371 _buffers.push_back(*new_back);
9f95a23c 1372 _num += 1;
11fdf7f2
TL
1373 _carriage = new_back;
1374 return { new_back->c_str(), &new_back->_len, &_len };
1375 } else {
f67539c2 1376 ceph_assert(!_buffers.empty());
11fdf7f2
TL
1377 if (unlikely(_carriage != &_buffers.back())) {
1378 auto bptr = ptr_node::create(*_carriage, _carriage->length(), 0);
1379 _carriage = bptr.get();
1380 _buffers.push_back(*bptr.release());
9f95a23c 1381 _num += 1;
11fdf7f2
TL
1382 }
1383 return { _carriage->end_c_str(), &_carriage->_len, &_len };
7c673cae
FG
1384 }
1385 }
1386
1387 void buffer::list::append(const ptr& bp)
1388 {
7c673cae
FG
1389 push_back(bp);
1390 }
1391
1392 void buffer::list::append(ptr&& bp)
1393 {
7c673cae
FG
1394 push_back(std::move(bp));
1395 }
1396
1397 void buffer::list::append(const ptr& bp, unsigned off, unsigned len)
1398 {
11fdf7f2 1399 ceph_assert(len+off <= bp.length());
7c673cae
FG
1400 if (!_buffers.empty()) {
1401 ptr &l = _buffers.back();
9f95a23c 1402 if (l._raw == bp._raw && l.end() == bp.start() + off) {
7c673cae
FG
1403 // yay contiguous with tail bp!
1404 l.set_length(l.length()+len);
1405 _len += len;
1406 return;
1407 }
1408 }
1409 // add new item to list
11fdf7f2
TL
1410 _buffers.push_back(*ptr_node::create(bp, off, len).release());
1411 _len += len;
9f95a23c 1412 _num += 1;
7c673cae
FG
1413 }
1414
1415 void buffer::list::append(const list& bl)
1416 {
1417 _len += bl._len;
9f95a23c 1418 _num += bl._num;
11fdf7f2
TL
1419 for (const auto& node : bl._buffers) {
1420 _buffers.push_back(*ptr_node::create(node).release());
1421 }
7c673cae
FG
1422 }
1423
1424 void buffer::list::append(std::istream& in)
1425 {
1426 while (!in.eof()) {
1427 std::string s;
1428 getline(in, s);
1429 append(s.c_str(), s.length());
1430 if (s.length())
1431 append("\n", 1);
1432 }
1433 }
1434
11fdf7f2
TL
1435 buffer::list::contiguous_filler buffer::list::append_hole(const unsigned len)
1436 {
1437 _len += len;
1438
1439 if (unlikely(get_append_buffer_unused_tail_length() < len)) {
1440 // make a new append_buffer. fill out a complete page, factoring in
1441 // the raw_combined overhead.
1442 auto& new_back = refill_append_space(len);
1443 new_back.set_length(len);
1444 return { new_back.c_str() };
1445 } else if (unlikely(_carriage != &_buffers.back())) {
1446 auto bptr = ptr_node::create(*_carriage, _carriage->length(), 0);
1447 _carriage = bptr.get();
1448 _buffers.push_back(*bptr.release());
9f95a23c 1449 _num += 1;
11fdf7f2
TL
1450 }
1451 _carriage->set_length(_carriage->length() + len);
1452 return { _carriage->end_c_str() - len };
1453 }
1454
7c673cae
FG
1455 void buffer::list::prepend_zero(unsigned len)
1456 {
11fdf7f2
TL
1457 auto bp = ptr_node::create(len);
1458 bp->zero(false);
7c673cae 1459 _len += len;
9f95a23c 1460 _num += 1;
11fdf7f2 1461 _buffers.push_front(*bp.release());
7c673cae
FG
1462 }
1463
1464 void buffer::list::append_zero(unsigned len)
1465 {
11fdf7f2
TL
1466 _len += len;
1467
1468 const unsigned free_in_last = get_append_buffer_unused_tail_length();
1469 const unsigned first_round = std::min(len, free_in_last);
1470 if (first_round) {
1471 if (unlikely(_carriage != &_buffers.back())) {
1472 auto bptr = ptr_node::create(*_carriage, _carriage->length(), 0);
1473 _carriage = bptr.get();
1474 _buffers.push_back(*bptr.release());
9f95a23c 1475 _num += 1;
11fdf7f2
TL
1476 }
1477 _carriage->append_zeros(first_round);
1478 }
1479
1480 const unsigned second_round = len - first_round;
1481 if (second_round) {
1482 auto& new_back = refill_append_space(second_round);
1483 new_back.set_length(second_round);
1484 new_back.zero(false);
1485 }
7c673cae
FG
1486 }
1487
1488
1489 /*
1490 * get a char
1491 */
1492 const char& buffer::list::operator[](unsigned n) const
1493 {
1494 if (n >= _len)
1495 throw end_of_buffer();
1496
11fdf7f2
TL
1497 for (const auto& node : _buffers) {
1498 if (n >= node.length()) {
1499 n -= node.length();
7c673cae
FG
1500 continue;
1501 }
11fdf7f2 1502 return node[n];
7c673cae
FG
1503 }
1504 ceph_abort();
1505 }
1506
1507 /*
1508 * return a contiguous ptr to whole bufferlist contents.
1509 */
1510 char *buffer::list::c_str()
1511 {
1512 if (_buffers.empty())
1513 return 0; // no buffers
1514
11fdf7f2 1515 auto iter = std::cbegin(_buffers);
7c673cae
FG
1516 ++iter;
1517
11fdf7f2 1518 if (iter != std::cend(_buffers)) {
7c673cae 1519 rebuild();
11fdf7f2 1520 }
7c673cae
FG
1521 return _buffers.front().c_str(); // good, we're already contiguous.
1522 }
1523
1524 string buffer::list::to_str() const {
1525 string s;
1526 s.reserve(length());
11fdf7f2
TL
1527 for (const auto& node : _buffers) {
1528 if (node.length()) {
1529 s.append(node.c_str(), node.length());
7c673cae
FG
1530 }
1531 }
1532 return s;
1533 }
1534
7c673cae
FG
1535 void buffer::list::substr_of(const list& other, unsigned off, unsigned len)
1536 {
1537 if (off + len > other.length())
1538 throw end_of_buffer();
1539
1540 clear();
1541
1542 // skip off
11fdf7f2
TL
1543 auto curbuf = std::cbegin(other._buffers);
1544 while (off > 0 && off >= curbuf->length()) {
7c673cae
FG
1545 // skip this buffer
1546 //cout << "skipping over " << *curbuf << std::endl;
1547 off -= (*curbuf).length();
1548 ++curbuf;
1549 }
11fdf7f2 1550 ceph_assert(len == 0 || curbuf != std::cend(other._buffers));
7c673cae
FG
1551
1552 while (len > 0) {
1553 // partial?
1554 if (off + len < curbuf->length()) {
1555 //cout << "copying partial of " << *curbuf << std::endl;
f67539c2 1556 _buffers.push_back(*ptr_node::create(*curbuf, off, len).release());
7c673cae 1557 _len += len;
9f95a23c 1558 _num += 1;
7c673cae
FG
1559 break;
1560 }
1561
1562 // through end
1563 //cout << "copying end (all?) of " << *curbuf << std::endl;
1564 unsigned howmuch = curbuf->length() - off;
f67539c2 1565 _buffers.push_back(*ptr_node::create(*curbuf, off, howmuch).release());
7c673cae 1566 _len += howmuch;
9f95a23c 1567 _num += 1;
7c673cae
FG
1568 len -= howmuch;
1569 off = 0;
1570 ++curbuf;
1571 }
1572 }
1573
1574 // funky modifer
1575 void buffer::list::splice(unsigned off, unsigned len, list *claim_by /*, bufferlist& replace_with */)
1576 { // fixme?
1577 if (len == 0)
1578 return;
1579
1580 if (off >= length())
1581 throw end_of_buffer();
1582
11fdf7f2 1583 ceph_assert(len > 0);
7c673cae
FG
1584 //cout << "splice off " << off << " len " << len << " ... mylen = " << length() << std::endl;
1585
1586 // skip off
11fdf7f2
TL
1587 auto curbuf = std::begin(_buffers);
1588 auto curbuf_prev = _buffers.before_begin();
7c673cae 1589 while (off > 0) {
11fdf7f2 1590 ceph_assert(curbuf != std::end(_buffers));
7c673cae
FG
1591 if (off >= (*curbuf).length()) {
1592 // skip this buffer
1593 //cout << "off = " << off << " skipping over " << *curbuf << std::endl;
1594 off -= (*curbuf).length();
11fdf7f2 1595 curbuf_prev = curbuf++;
7c673cae
FG
1596 } else {
1597 // somewhere in this buffer!
1598 //cout << "off = " << off << " somewhere in " << *curbuf << std::endl;
1599 break;
1600 }
1601 }
1602
1603 if (off) {
f67539c2
TL
1604 // add a reference to the front bit, insert it before curbuf (which
1605 // we'll lose).
7c673cae 1606 //cout << "keeping front " << off << " of " << *curbuf << std::endl;
11fdf7f2
TL
1607 _buffers.insert_after(curbuf_prev,
1608 *ptr_node::create(*curbuf, 0, off).release());
7c673cae 1609 _len += off;
9f95a23c 1610 _num += 1;
11fdf7f2 1611 ++curbuf_prev;
7c673cae
FG
1612 }
1613
1614 while (len > 0) {
f67539c2
TL
1615 // partial or the last (appendable) one?
1616 if (const auto to_drop = off + len; to_drop < curbuf->length()) {
7c673cae
FG
1617 //cout << "keeping end of " << *curbuf << ", losing first " << off+len << std::endl;
1618 if (claim_by)
f67539c2
TL
1619 claim_by->append(*curbuf, off, len);
1620 curbuf->set_offset(to_drop + curbuf->offset()); // ignore beginning big
1621 curbuf->set_length(curbuf->length() - to_drop);
1622 _len -= to_drop;
7c673cae
FG
1623 //cout << " now " << *curbuf << std::endl;
1624 break;
1625 }
f67539c2 1626
7c673cae 1627 // hose though the end
f67539c2 1628 unsigned howmuch = curbuf->length() - off;
7c673cae
FG
1629 //cout << "discarding " << howmuch << " of " << *curbuf << std::endl;
1630 if (claim_by)
f67539c2
TL
1631 claim_by->append(*curbuf, off, howmuch);
1632 _len -= curbuf->length();
1633 if (curbuf == _carriage) {
1634 // no need to reallocate, shrinking and relinking is enough.
1635 curbuf = _buffers.erase_after(curbuf_prev);
1636 _carriage->set_offset(_carriage->offset() + _carriage->length());
1637 _carriage->set_length(0);
1638 _buffers.push_back(*_carriage);
1639 } else {
1640 curbuf = _buffers.erase_after_and_dispose(curbuf_prev);
1641 _num -= 1;
1642 }
7c673cae
FG
1643 len -= howmuch;
1644 off = 0;
1645 }
f67539c2 1646
7c673cae 1647 // splice in *replace (implement me later?)
7c673cae
FG
1648 }
1649
1650 void buffer::list::write(int off, int len, std::ostream& out) const
1651 {
1652 list s;
1653 s.substr_of(*this, off, len);
11fdf7f2
TL
1654 for (const auto& node : s._buffers) {
1655 if (node.length()) {
1656 out.write(node.c_str(), node.length());
1657 }
1658 }
7c673cae
FG
1659 }
1660
1661void buffer::list::encode_base64(buffer::list& o)
1662{
1663 bufferptr bp(length() * 4 / 3 + 3);
1664 int l = ceph_armor(bp.c_str(), bp.c_str() + bp.length(), c_str(), c_str() + length());
1665 bp.set_length(l);
1666 o.push_back(std::move(bp));
1667}
1668
1669void buffer::list::decode_base64(buffer::list& e)
1670{
1671 bufferptr bp(4 + ((e.length() * 3) / 4));
1672 int l = ceph_unarmor(bp.c_str(), bp.c_str() + bp.length(), e.c_str(), e.c_str() + e.length());
1673 if (l < 0) {
1674 std::ostringstream oss;
1675 oss << "decode_base64: decoding failed:\n";
1676 hexdump(oss);
1677 throw buffer::malformed_input(oss.str().c_str());
1678 }
11fdf7f2 1679 ceph_assert(l <= (int)bp.length());
7c673cae
FG
1680 bp.set_length(l);
1681 push_back(std::move(bp));
1682}
1683
9f95a23c
TL
1684ssize_t buffer::list::pread_file(const char *fn, uint64_t off, uint64_t len, std::string *error)
1685{
f67539c2 1686 int fd = TEMP_FAILURE_RETRY(::open(fn, O_RDONLY|O_CLOEXEC|O_BINARY));
9f95a23c
TL
1687 if (fd < 0) {
1688 int err = errno;
1689 std::ostringstream oss;
1690 oss << "can't open " << fn << ": " << cpp_strerror(err);
1691 *error = oss.str();
1692 return -err;
1693 }
1694
1695 struct stat st;
1696 // FIPS zeroization audit 20191115: this memset is not security related.
1697 memset(&st, 0, sizeof(st));
1698 if (::fstat(fd, &st) < 0) {
1699 int err = errno;
1700 std::ostringstream oss;
1701 oss << "bufferlist::read_file(" << fn << "): stat error: "
1702 << cpp_strerror(err);
1703 *error = oss.str();
1704 VOID_TEMP_FAILURE_RETRY(::close(fd));
1705 return -err;
1706 }
1707
1708 if (off > (uint64_t)st.st_size) {
1709 std::ostringstream oss;
1710 oss << "bufferlist::read_file(" << fn << "): read error: size < offset";
1711 *error = oss.str();
1712 VOID_TEMP_FAILURE_RETRY(::close(fd));
1713 return 0;
1714 }
1715
1716 if (len > st.st_size - off) {
1717 len = st.st_size - off;
1718 }
1719 ssize_t ret = lseek64(fd, off, SEEK_SET);
1720 if (ret != (ssize_t)off) {
1721 return -errno;
1722 }
1723
1724 ret = read_fd(fd, len);
1725 if (ret < 0) {
1726 std::ostringstream oss;
1727 oss << "bufferlist::read_file(" << fn << "): read error:"
1728 << cpp_strerror(ret);
1729 *error = oss.str();
1730 VOID_TEMP_FAILURE_RETRY(::close(fd));
1731 return ret;
1732 } else if (ret != (ssize_t)len) {
1733 // Premature EOF.
1734 // Perhaps the file changed between stat() and read()?
1735 std::ostringstream oss;
1736 oss << "bufferlist::read_file(" << fn << "): warning: got premature EOF.";
1737 *error = oss.str();
1738 // not actually an error, but weird
1739 }
1740 VOID_TEMP_FAILURE_RETRY(::close(fd));
1741 return 0;
1742}
7c673cae
FG
1743
1744int buffer::list::read_file(const char *fn, std::string *error)
1745{
f67539c2 1746 int fd = TEMP_FAILURE_RETRY(::open(fn, O_RDONLY|O_CLOEXEC|O_BINARY));
7c673cae
FG
1747 if (fd < 0) {
1748 int err = errno;
1749 std::ostringstream oss;
1750 oss << "can't open " << fn << ": " << cpp_strerror(err);
1751 *error = oss.str();
1752 return -err;
1753 }
1754
1755 struct stat st;
92f5a8d4 1756 // FIPS zeroization audit 20191115: this memset is not security related.
7c673cae
FG
1757 memset(&st, 0, sizeof(st));
1758 if (::fstat(fd, &st) < 0) {
1759 int err = errno;
1760 std::ostringstream oss;
1761 oss << "bufferlist::read_file(" << fn << "): stat error: "
1762 << cpp_strerror(err);
1763 *error = oss.str();
1764 VOID_TEMP_FAILURE_RETRY(::close(fd));
1765 return -err;
1766 }
1767
1768 ssize_t ret = read_fd(fd, st.st_size);
1769 if (ret < 0) {
1770 std::ostringstream oss;
1771 oss << "bufferlist::read_file(" << fn << "): read error:"
1772 << cpp_strerror(ret);
1773 *error = oss.str();
1774 VOID_TEMP_FAILURE_RETRY(::close(fd));
1775 return ret;
1776 }
1777 else if (ret != st.st_size) {
1778 // Premature EOF.
1779 // Perhaps the file changed between stat() and read()?
1780 std::ostringstream oss;
1781 oss << "bufferlist::read_file(" << fn << "): warning: got premature EOF.";
1782 *error = oss.str();
1783 // not actually an error, but weird
1784 }
1785 VOID_TEMP_FAILURE_RETRY(::close(fd));
1786 return 0;
1787}
1788
1789ssize_t buffer::list::read_fd(int fd, size_t len)
1790{
11fdf7f2
TL
1791 auto bp = ptr_node::create(buffer::create(len));
1792 ssize_t ret = safe_read(fd, (void*)bp->c_str(), len);
7c673cae 1793 if (ret >= 0) {
11fdf7f2
TL
1794 bp->set_length(ret);
1795 push_back(std::move(bp));
7c673cae
FG
1796 }
1797 return ret;
1798}
1799
f67539c2
TL
1800ssize_t buffer::list::recv_fd(int fd, size_t len)
1801{
1802 auto bp = ptr_node::create(buffer::create(len));
1803 ssize_t ret = safe_recv(fd, (void*)bp->c_str(), len);
1804 if (ret >= 0) {
1805 bp->set_length(ret);
1806 push_back(std::move(bp));
1807 }
1808 return ret;
1809}
1810
7c673cae
FG
1811int buffer::list::write_file(const char *fn, int mode)
1812{
f67539c2 1813 int fd = TEMP_FAILURE_RETRY(::open(fn, O_WRONLY|O_CREAT|O_TRUNC|O_CLOEXEC|O_BINARY, mode));
7c673cae
FG
1814 if (fd < 0) {
1815 int err = errno;
1816 cerr << "bufferlist::write_file(" << fn << "): failed to open file: "
1817 << cpp_strerror(err) << std::endl;
1818 return -err;
1819 }
1820 int ret = write_fd(fd);
1821 if (ret) {
1822 cerr << "bufferlist::write_fd(" << fn << "): write_fd error: "
1823 << cpp_strerror(ret) << std::endl;
1824 VOID_TEMP_FAILURE_RETRY(::close(fd));
1825 return ret;
1826 }
1827 if (TEMP_FAILURE_RETRY(::close(fd))) {
1828 int err = errno;
1829 cerr << "bufferlist::write_file(" << fn << "): close error: "
1830 << cpp_strerror(err) << std::endl;
1831 return -err;
1832 }
1833 return 0;
1834}
1835
1836static int do_writev(int fd, struct iovec *vec, uint64_t offset, unsigned veclen, unsigned bytes)
1837{
7c673cae 1838 while (bytes > 0) {
11fdf7f2 1839 ssize_t r = 0;
7c673cae
FG
1840#ifdef HAVE_PWRITEV
1841 r = ::pwritev(fd, vec, veclen, offset);
1842#else
1843 r = ::lseek64(fd, offset, SEEK_SET);
1844 if (r != offset) {
11fdf7f2 1845 return -errno;
7c673cae
FG
1846 }
1847 r = ::writev(fd, vec, veclen);
1848#endif
1849 if (r < 0) {
1850 if (errno == EINTR)
1851 continue;
1852 return -errno;
1853 }
1854
1855 bytes -= r;
1856 offset += r;
1857 if (bytes == 0) break;
1858
1859 while (r > 0) {
1860 if (vec[0].iov_len <= (size_t)r) {
1861 // drain this whole item
1862 r -= vec[0].iov_len;
1863 ++vec;
1864 --veclen;
1865 } else {
1866 vec[0].iov_base = (char *)vec[0].iov_base + r;
1867 vec[0].iov_len -= r;
1868 break;
1869 }
1870 }
1871 }
1872 return 0;
1873}
1874
f67539c2 1875#ifndef _WIN32
7c673cae
FG
1876int buffer::list::write_fd(int fd) const
1877{
7c673cae
FG
1878 // use writev!
1879 iovec iov[IOV_MAX];
1880 int iovlen = 0;
1881 ssize_t bytes = 0;
1882
11fdf7f2
TL
1883 auto p = std::cbegin(_buffers);
1884 while (p != std::cend(_buffers)) {
7c673cae
FG
1885 if (p->length() > 0) {
1886 iov[iovlen].iov_base = (void *)p->c_str();
1887 iov[iovlen].iov_len = p->length();
1888 bytes += p->length();
1889 iovlen++;
1890 }
1891 ++p;
1892
d2e6a577 1893 if (iovlen == IOV_MAX ||
7c673cae
FG
1894 p == _buffers.end()) {
1895 iovec *start = iov;
1896 int num = iovlen;
1897 ssize_t wrote;
1898 retry:
1899 wrote = ::writev(fd, start, num);
1900 if (wrote < 0) {
1901 int err = errno;
1902 if (err == EINTR)
1903 goto retry;
1904 return -err;
1905 }
1906 if (wrote < bytes) {
1907 // partial write, recover!
1908 while ((size_t)wrote >= start[0].iov_len) {
1909 wrote -= start[0].iov_len;
1910 bytes -= start[0].iov_len;
1911 start++;
1912 num--;
1913 }
1914 if (wrote > 0) {
1915 start[0].iov_len -= wrote;
1916 start[0].iov_base = (char *)start[0].iov_base + wrote;
1917 bytes -= wrote;
1918 }
1919 goto retry;
1920 }
1921 iovlen = 0;
1922 bytes = 0;
1923 }
1924 }
1925 return 0;
1926}
1927
f67539c2
TL
1928int buffer::list::send_fd(int fd) const {
1929 return buffer::list::write_fd(fd);
1930}
1931
7c673cae
FG
1932int buffer::list::write_fd(int fd, uint64_t offset) const
1933{
1934 iovec iov[IOV_MAX];
1935
11fdf7f2 1936 auto p = std::cbegin(_buffers);
9f95a23c 1937 uint64_t left_pbrs = get_num_buffers();
7c673cae
FG
1938 while (left_pbrs) {
1939 ssize_t bytes = 0;
1940 unsigned iovlen = 0;
11fdf7f2 1941 uint64_t size = std::min<uint64_t>(left_pbrs, IOV_MAX);
7c673cae
FG
1942 left_pbrs -= size;
1943 while (size > 0) {
1944 iov[iovlen].iov_base = (void *)p->c_str();
1945 iov[iovlen].iov_len = p->length();
1946 iovlen++;
1947 bytes += p->length();
1948 ++p;
1949 size--;
1950 }
1951
1952 int r = do_writev(fd, iov, offset, iovlen, bytes);
1953 if (r < 0)
1954 return r;
1955 offset += bytes;
1956 }
1957 return 0;
1958}
f67539c2
TL
1959#else
1960int buffer::list::write_fd(int fd) const
1961{
1962 // There's no writev on Windows. WriteFileGather may be an option,
1963 // but it has strict requirements in terms of buffer size and alignment.
1964 auto p = std::cbegin(_buffers);
1965 uint64_t left_pbrs = get_num_buffers();
1966 while (left_pbrs) {
1967 int written = 0;
1968 while (written < p->length()) {
1969 int r = ::write(fd, p->c_str(), p->length() - written);
1970 if (r < 0)
1971 return -errno;
1972
1973 written += r;
1974 }
1975
1976 left_pbrs--;
1977 p++;
1978 }
1979
1980 return 0;
1981}
1982
1983int buffer::list::send_fd(int fd) const
1984{
1985 // There's no writev on Windows. WriteFileGather may be an option,
1986 // but it has strict requirements in terms of buffer size and alignment.
1987 auto p = std::cbegin(_buffers);
1988 uint64_t left_pbrs = get_num_buffers();
1989 while (left_pbrs) {
1990 int written = 0;
1991 while (written < p->length()) {
1992 int r = ::send(fd, p->c_str(), p->length() - written, 0);
1993 if (r < 0)
1994 return -ceph_sock_errno();
1995
1996 written += r;
1997 }
1998
1999 left_pbrs--;
2000 p++;
2001 }
2002
2003 return 0;
2004}
2005
2006int buffer::list::write_fd(int fd, uint64_t offset) const
2007{
2008 int r = ::lseek64(fd, offset, SEEK_SET);
2009 if (r != offset)
2010 return -errno;
2011
2012 return write_fd(fd);
2013}
2014#endif
7c673cae 2015
7c673cae
FG
2016__u32 buffer::list::crc32c(__u32 crc) const
2017{
11fdf7f2
TL
2018 int cache_misses = 0;
2019 int cache_hits = 0;
2020 int cache_adjusts = 0;
2021
2022 for (const auto& node : _buffers) {
2023 if (node.length()) {
9f95a23c 2024 raw* const r = node._raw;
11fdf7f2 2025 pair<size_t, size_t> ofs(node.offset(), node.offset() + node.length());
7c673cae
FG
2026 pair<uint32_t, uint32_t> ccrc;
2027 if (r->get_crc(ofs, &ccrc)) {
2028 if (ccrc.first == crc) {
2029 // got it already
2030 crc = ccrc.second;
11fdf7f2 2031 cache_hits++;
7c673cae
FG
2032 } else {
2033 /* If we have cached crc32c(buf, v) for initial value v,
2034 * we can convert this to a different initial value v' by:
2035 * crc32c(buf, v') = crc32c(buf, v) ^ adjustment
2036 * where adjustment = crc32c(0*len(buf), v ^ v')
2037 *
2038 * http://crcutil.googlecode.com/files/crc-doc.1.0.pdf
2039 * note, u for our crc32c implementation is 0
2040 */
11fdf7f2
TL
2041 crc = ccrc.second ^ ceph_crc32c(ccrc.first ^ crc, NULL, node.length());
2042 cache_adjusts++;
7c673cae
FG
2043 }
2044 } else {
11fdf7f2 2045 cache_misses++;
7c673cae 2046 uint32_t base = crc;
11fdf7f2 2047 crc = ceph_crc32c(crc, (unsigned char*)node.c_str(), node.length());
7c673cae
FG
2048 r->set_crc(ofs, make_pair(base, crc));
2049 }
2050 }
2051 }
11fdf7f2
TL
2052
2053 if (buffer_track_crc) {
2054 if (cache_adjusts)
2055 buffer_cached_crc_adjusted += cache_adjusts;
2056 if (cache_hits)
2057 buffer_cached_crc += cache_hits;
2058 if (cache_misses)
2059 buffer_missed_crc += cache_misses;
2060 }
2061
7c673cae
FG
2062 return crc;
2063}
2064
2065void buffer::list::invalidate_crc()
2066{
11fdf7f2 2067 for (const auto& node : _buffers) {
9f95a23c
TL
2068 if (node._raw) {
2069 node._raw->invalidate_crc();
7c673cae
FG
2070 }
2071 }
2072}
2073
2074/**
2075 * Binary write all contents to a C++ stream
2076 */
2077void buffer::list::write_stream(std::ostream &out) const
2078{
11fdf7f2
TL
2079 for (const auto& node : _buffers) {
2080 if (node.length() > 0) {
2081 out.write(node.c_str(), node.length());
7c673cae
FG
2082 }
2083 }
2084}
2085
2086
2087void buffer::list::hexdump(std::ostream &out, bool trailing_newline) const
2088{
2089 if (!length())
2090 return;
2091
2092 std::ios_base::fmtflags original_flags = out.flags();
2093
2094 // do our best to match the output of hexdump -C, for better
2095 // diff'ing!
2096
2097 out.setf(std::ios::right);
2098 out.fill('0');
2099
2100 unsigned per = 16;
9f95a23c
TL
2101 char last_row_char = '\0';
2102 bool was_same = false, did_star = false;
7c673cae 2103 for (unsigned o=0; o<length(); o += per) {
9f95a23c
TL
2104 if (o == 0) {
2105 last_row_char = (*this)[o];
2106 }
2107
7c673cae 2108 if (o + per < length()) {
9f95a23c 2109 bool row_is_same = true;
7c673cae 2110 for (unsigned i=0; i<per && o+i<length(); i++) {
9f95a23c
TL
2111 char current_char = (*this)[o+i];
2112 if (current_char != last_row_char) {
2113 if (i == 0) {
2114 last_row_char = current_char;
2115 was_same = false;
2116 did_star = false;
2117 } else {
2118 row_is_same = false;
2119 }
7c673cae
FG
2120 }
2121 }
9f95a23c
TL
2122 if (row_is_same) {
2123 if (was_same) {
7c673cae
FG
2124 if (!did_star) {
2125 out << "\n*";
2126 did_star = true;
2127 }
2128 continue;
2129 }
9f95a23c 2130 was_same = true;
7c673cae 2131 } else {
9f95a23c 2132 was_same = false;
7c673cae
FG
2133 did_star = false;
2134 }
2135 }
2136 if (o)
2137 out << "\n";
2138 out << std::hex << std::setw(8) << o << " ";
2139
2140 unsigned i;
2141 for (i=0; i<per && o+i<length(); i++) {
2142 if (i == 8)
2143 out << ' ';
2144 out << " " << std::setw(2) << ((unsigned)(*this)[o+i] & 0xff);
2145 }
2146 for (; i<per; i++) {
2147 if (i == 8)
2148 out << ' ';
2149 out << " ";
2150 }
2151
2152 out << " |";
2153 for (i=0; i<per && o+i<length(); i++) {
2154 char c = (*this)[o+i];
2155 if (isupper(c) || islower(c) || isdigit(c) || c == ' ' || ispunct(c))
2156 out << c;
2157 else
2158 out << '.';
2159 }
2160 out << '|' << std::dec;
2161 }
2162 if (trailing_newline) {
2163 out << "\n" << std::hex << std::setw(8) << length();
2164 out << "\n";
2165 }
2166
2167 out.flags(original_flags);
2168}
2169
31f18b77
FG
2170
2171buffer::list buffer::list::static_from_mem(char* c, size_t l) {
2172 list bl;
11fdf7f2 2173 bl.push_back(ptr_node::create(create_static(l, c)));
31f18b77
FG
2174 return bl;
2175}
2176
2177buffer::list buffer::list::static_from_cstring(char* c) {
2178 return static_from_mem(c, std::strlen(c));
2179}
2180
2181buffer::list buffer::list::static_from_string(string& s) {
2182 // C++14 just has string::data return a char* from a non-const
2183 // string.
2184 return static_from_mem(const_cast<char*>(s.data()), s.length());
2185 // But the way buffer::list mostly doesn't work in a sane way with
2186 // const makes me generally sad.
2187}
2188
f67539c2
TL
2189// buffer::raw is not a standard layout type.
2190#define BUF_OFFSETOF(type, field) \
2191 (reinterpret_cast<std::uintptr_t>(&(((type*)1024)->field)) - 1024u)
2192
11fdf7f2
TL
2193bool buffer::ptr_node::dispose_if_hypercombined(
2194 buffer::ptr_node* const delete_this)
2195{
f67539c2
TL
2196 // in case _raw is nullptr
2197 const std::uintptr_t bptr =
2198 (reinterpret_cast<std::uintptr_t>(delete_this->_raw) +
2199 BUF_OFFSETOF(buffer::raw, bptr_storage));
2200 const bool is_hypercombined =
2201 reinterpret_cast<std::uintptr_t>(delete_this) == bptr;
11fdf7f2
TL
2202 if (is_hypercombined) {
2203 ceph_assert_always("hypercombining is currently disabled" == nullptr);
2204 delete_this->~ptr_node();
f67539c2
TL
2205 return true;
2206 } else {
2207 return false;
11fdf7f2 2208 }
11fdf7f2
TL
2209}
2210
2211std::unique_ptr<buffer::ptr_node, buffer::ptr_node::disposer>
2212buffer::ptr_node::create_hypercombined(ceph::unique_leakable_ptr<buffer::raw> r)
2213{
2214 // FIXME: we don't currently hypercombine buffers due to crashes
2215 // observed in the rados suite. After fixing we'll use placement
2216 // new to create ptr_node on buffer::raw::bptr_storage.
2217 return std::unique_ptr<buffer::ptr_node, buffer::ptr_node::disposer>(
2218 new ptr_node(std::move(r)));
2219}
2220
11fdf7f2
TL
2221buffer::ptr_node* buffer::ptr_node::cloner::operator()(
2222 const buffer::ptr_node& clone_this)
2223{
9f95a23c 2224 return new ptr_node(clone_this);
11fdf7f2
TL
2225}
2226
7c673cae 2227std::ostream& buffer::operator<<(std::ostream& out, const buffer::raw &r) {
f67539c2
TL
2228 return out << "buffer::raw("
2229 << (void*)r.get_data() << " len " << r.get_len()
2230 << " nref " << r.nref.load() << ")";
7c673cae
FG
2231}
2232
2233std::ostream& buffer::operator<<(std::ostream& out, const buffer::ptr& bp) {
2234 if (bp.have_raw())
2235 out << "buffer::ptr(" << bp.offset() << "~" << bp.length()
2236 << " " << (void*)bp.c_str()
2237 << " in raw " << (void*)bp.raw_c_str()
2238 << " len " << bp.raw_length()
2239 << " nref " << bp.raw_nref() << ")";
2240 else
2241 out << "buffer:ptr(" << bp.offset() << "~" << bp.length() << " no raw)";
2242 return out;
2243}
2244
2245std::ostream& buffer::operator<<(std::ostream& out, const buffer::list& bl) {
f67539c2 2246 out << "buffer::list(len=" << bl.length() << ",\n";
7c673cae 2247
11fdf7f2
TL
2248 for (const auto& node : bl.buffers()) {
2249 out << "\t" << node;
2250 if (&node != &bl.buffers().back()) {
f67539c2 2251 out << ",\n";
11fdf7f2 2252 }
7c673cae 2253 }
f67539c2 2254 out << "\n)";
7c673cae
FG
2255 return out;
2256}
2257
7c673cae
FG
2258MEMPOOL_DEFINE_OBJECT_FACTORY(buffer::raw_malloc, buffer_raw_malloc,
2259 buffer_meta);
7c673cae
FG
2260MEMPOOL_DEFINE_OBJECT_FACTORY(buffer::raw_posix_aligned,
2261 buffer_raw_posix_aligned, buffer_meta);
7c673cae 2262MEMPOOL_DEFINE_OBJECT_FACTORY(buffer::raw_char, buffer_raw_char, buffer_meta);
31f18b77
FG
2263MEMPOOL_DEFINE_OBJECT_FACTORY(buffer::raw_claimed_char, buffer_raw_claimed_char,
2264 buffer_meta);
7c673cae
FG
2265MEMPOOL_DEFINE_OBJECT_FACTORY(buffer::raw_static, buffer_raw_static,
2266 buffer_meta);
2267
f67539c2
TL
2268
2269void ceph::buffer::list::page_aligned_appender::_refill(size_t len) {
2270 const size_t alloc = \
2271 std::max((size_t)min_alloc, (len + CEPH_PAGE_SIZE - 1) & CEPH_PAGE_MASK);
2272 auto new_back = \
2273 ptr_node::create(buffer::create_page_aligned(alloc));
2274 new_back->set_length(0); // unused, so far.
2275 bl.push_back(std::move(new_back));
2276}
2277
2278namespace ceph::buffer {
2279inline namespace v15_2_0 {
2280
2281#pragma GCC diagnostic push
2282#pragma GCC diagnostic ignored "-Wnon-virtual-dtor"
2283#pragma clang diagnostic push
2284#pragma clang diagnostic ignored "-Wnon-virtual-dtor"
2285class buffer_error_category : public ceph::converting_category {
2286public:
2287 buffer_error_category(){}
2288 const char* name() const noexcept override;
2289 const char* message(int ev, char*, std::size_t) const noexcept override;
2290 std::string message(int ev) const override;
2291 boost::system::error_condition default_error_condition(int ev) const noexcept
2292 override;
2293 using ceph::converting_category::equivalent;
2294 bool equivalent(int ev, const boost::system::error_condition& c) const
2295 noexcept override;
2296 int from_code(int ev) const noexcept override;
2297};
2298#pragma GCC diagnostic pop
2299#pragma clang diagnostic pop
2300
2301const char* buffer_error_category::name() const noexcept {
2302 return "buffer";
2303}
2304
2305const char*
2306buffer_error_category::message(int ev, char*, std::size_t) const noexcept {
2307 using ceph::buffer::errc;
2308 if (ev == 0)
2309 return "No error";
2310
2311 switch (static_cast<errc>(ev)) {
2312 case errc::bad_alloc:
2313 return "Bad allocation";
2314
2315 case errc::end_of_buffer:
2316 return "End of buffer";
2317
2318 case errc::malformed_input:
2319 return "Malformed input";
2320 }
2321
2322 return "Unknown error";
2323}
2324
2325std::string buffer_error_category::message(int ev) const {
2326 return message(ev, nullptr, 0);
2327}
2328
2329boost::system::error_condition
2330buffer_error_category::default_error_condition(int ev)const noexcept {
2331 using ceph::buffer::errc;
2332 switch (static_cast<errc>(ev)) {
2333 case errc::bad_alloc:
2334 return boost::system::errc::not_enough_memory;
2335 case errc::end_of_buffer:
2336 case errc::malformed_input:
2337 return boost::system::errc::io_error;
2338 }
2339 return { ev, *this };
2340}
2341
2342bool buffer_error_category::equivalent(int ev, const boost::system::error_condition& c) const noexcept {
2343 return default_error_condition(ev) == c;
2344}
2345
2346int buffer_error_category::from_code(int ev) const noexcept {
2347 using ceph::buffer::errc;
2348 switch (static_cast<errc>(ev)) {
2349 case errc::bad_alloc:
2350 return -ENOMEM;
2351
2352 case errc::end_of_buffer:
2353 return -EIO;
2354
2355 case errc::malformed_input:
2356 return -EIO;
2357 }
2358 return -EDOM;
2359}
2360
2361const boost::system::error_category& buffer_category() noexcept {
2362 static const buffer_error_category c;
2363 return c;
2364}
2365}
2366}