]> git.proxmox.com Git - ceph.git/blame - ceph/src/common/buffer.cc
bump version to 18.2.2-pve1
[ceph.git] / ceph / src / common / buffer.cc
CommitLineData
20effc67 1// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
7c673cae
FG
2// vim: ts=8 sw=2 smarttab
3/*
4 * Ceph - scalable distributed file system
5 *
6 * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
7 *
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
20effc67 10 * License version 2.1, as published by the Free Software
7c673cae 11 * Foundation. See file COPYING.
20effc67 12 *
7c673cae
FG
13 */
14
31f18b77 15#include <atomic>
9f95a23c 16#include <cstring>
31f18b77
FG
17#include <errno.h>
18#include <limits.h>
19
20#include <sys/uio.h>
21
11fdf7f2
TL
22#include "include/ceph_assert.h"
23#include "include/types.h"
24#include "include/buffer_raw.h"
7c673cae
FG
25#include "include/compat.h"
26#include "include/mempool.h"
27#include "armor.h"
28#include "common/environment.h"
29#include "common/errno.h"
f67539c2 30#include "common/error_code.h"
7c673cae 31#include "common/safe_io.h"
7c673cae
FG
32#include "common/strtol.h"
33#include "common/likely.h"
34#include "common/valgrind.h"
35#include "common/deleter.h"
f67539c2 36#include "common/error_code.h"
20effc67 37#include "include/intarith.h"
11fdf7f2 38#include "include/spinlock.h"
7c673cae 39#include "include/scope_guard.h"
31f18b77 40
f67539c2
TL
41using std::cerr;
42using std::make_pair;
43using std::pair;
44using std::string;
45
31f18b77 46using namespace ceph;
7c673cae 47
11fdf7f2 48#define CEPH_BUFFER_ALLOC_UNIT 4096u
7c673cae
FG
49#define CEPH_BUFFER_APPEND_SIZE (CEPH_BUFFER_ALLOC_UNIT - sizeof(raw_combined))
50
20effc67
TL
51// 256K is the maximum "small" object size in tcmalloc above which allocations come from
52// the central heap. For now let's keep this below that threshold.
53#define CEPH_BUFFER_ALLOC_UNIT_MAX std::size_t { 256*1024 }
54
7c673cae 55#ifdef BUFFER_DEBUG
11fdf7f2
TL
56static ceph::spinlock debug_lock;
57# define bdout { std::lock_guard<ceph::spinlock> lg(debug_lock); std::cout
58# define bendl std::endl; }
7c673cae
FG
59#else
60# define bdout if (0) { std::cout
61# define bendl std::endl; }
62#endif
63
9f95a23c
TL
64 static ceph::atomic<unsigned> buffer_cached_crc { 0 };
65 static ceph::atomic<unsigned> buffer_cached_crc_adjusted { 0 };
66 static ceph::atomic<unsigned> buffer_missed_crc { 0 };
31f18b77 67
7c673cae
FG
68 static bool buffer_track_crc = get_env_bool("CEPH_BUFFER_TRACK");
69
70 void buffer::track_cached_crc(bool b) {
71 buffer_track_crc = b;
72 }
73 int buffer::get_cached_crc() {
31f18b77 74 return buffer_cached_crc;
7c673cae
FG
75 }
76 int buffer::get_cached_crc_adjusted() {
31f18b77 77 return buffer_cached_crc_adjusted;
7c673cae
FG
78 }
79
80 int buffer::get_missed_crc() {
31f18b77 81 return buffer_missed_crc;
7c673cae
FG
82 }
83
7c673cae
FG
84 /*
85 * raw_combined is always placed within a single allocation along
86 * with the data buffer. the data goes at the beginning, and
87 * raw_combined at the end.
88 */
89 class buffer::raw_combined : public buffer::raw {
7c673cae 90 public:
1e59de90
TL
91 raw_combined(char *dataptr, unsigned l, int mempool)
92 : raw(dataptr, l, mempool) {
7c673cae
FG
93 }
94
9f95a23c
TL
95 static ceph::unique_leakable_ptr<buffer::raw>
96 create(unsigned len,
97 unsigned align,
98 int mempool = mempool::mempool_buffer_anon)
99 {
b3b6e05e
TL
100 // posix_memalign() requires a multiple of sizeof(void *)
101 align = std::max<unsigned>(align, sizeof(void *));
11fdf7f2 102 size_t rawlen = round_up_to(sizeof(buffer::raw_combined),
7c673cae 103 alignof(buffer::raw_combined));
11fdf7f2 104 size_t datalen = round_up_to(len, alignof(buffer::raw_combined));
7c673cae 105
31f18b77
FG
106#ifdef DARWIN
107 char *ptr = (char *) valloc(rawlen + datalen);
108#else
109 char *ptr = 0;
110 int r = ::posix_memalign((void**)(void*)&ptr, align, rawlen + datalen);
111 if (r)
112 throw bad_alloc();
113#endif /* DARWIN */
7c673cae
FG
114 if (!ptr)
115 throw bad_alloc();
116
117 // actual data first, since it has presumably larger alignment restriction
118 // then put the raw_combined at the end
9f95a23c 119 return ceph::unique_leakable_ptr<buffer::raw>(
1e59de90 120 new (ptr + datalen) raw_combined(ptr, len, mempool));
7c673cae
FG
121 }
122
123 static void operator delete(void *ptr) {
124 raw_combined *raw = (raw_combined *)ptr;
f67539c2 125 aligned_free((void *)raw->data);
7c673cae
FG
126 }
127 };
128
129 class buffer::raw_malloc : public buffer::raw {
130 public:
131 MEMPOOL_CLASS_HELPERS();
132
133 explicit raw_malloc(unsigned l) : raw(l) {
134 if (len) {
135 data = (char *)malloc(len);
136 if (!data)
137 throw bad_alloc();
138 } else {
139 data = 0;
140 }
11fdf7f2 141 bdout << "raw_malloc " << this << " alloc " << (void *)data << " " << l << bendl;
7c673cae
FG
142 }
143 raw_malloc(unsigned l, char *b) : raw(b, l) {
11fdf7f2 144 bdout << "raw_malloc " << this << " alloc " << (void *)data << " " << l << bendl;
7c673cae
FG
145 }
146 ~raw_malloc() override {
147 free(data);
11fdf7f2 148 bdout << "raw_malloc " << this << " free " << (void *)data << " " << bendl;
7c673cae 149 }
7c673cae
FG
150 };
151
152#ifndef __CYGWIN__
7c673cae 153 class buffer::raw_posix_aligned : public buffer::raw {
7c673cae
FG
154 public:
155 MEMPOOL_CLASS_HELPERS();
156
1e59de90 157 raw_posix_aligned(unsigned l, unsigned align) : raw(l) {
b3b6e05e 158 // posix_memalign() requires a multiple of sizeof(void *)
1e59de90 159 align = std::max<unsigned>(align, sizeof(void *));
31f18b77
FG
160#ifdef DARWIN
161 data = (char *) valloc(len);
162#else
163 int r = ::posix_memalign((void**)(void*)&data, align, len);
164 if (r)
165 throw bad_alloc();
166#endif /* DARWIN */
7c673cae
FG
167 if (!data)
168 throw bad_alloc();
11fdf7f2
TL
169 bdout << "raw_posix_aligned " << this << " alloc " << (void *)data
170 << " l=" << l << ", align=" << align << bendl;
7c673cae
FG
171 }
172 ~raw_posix_aligned() override {
f67539c2 173 aligned_free(data);
11fdf7f2 174 bdout << "raw_posix_aligned " << this << " free " << (void *)data << bendl;
7c673cae 175 }
7c673cae
FG
176 };
177#endif
178
179#ifdef __CYGWIN__
180 class buffer::raw_hack_aligned : public buffer::raw {
7c673cae
FG
181 char *realdata;
182 public:
1e59de90 183 raw_hack_aligned(unsigned l, unsigned align) : raw(l) {
7c673cae 184 realdata = new char[len+align-1];
f67539c2 185 unsigned off = ((uintptr_t)realdata) & (align-1);
7c673cae
FG
186 if (off)
187 data = realdata + align - off;
188 else
189 data = realdata;
7c673cae
FG
190 //cout << "hack aligned " << (unsigned)data
191 //<< " in raw " << (unsigned)realdata
192 //<< " off " << off << std::endl;
f67539c2 193 ceph_assert(((uintptr_t)data & (align-1)) == 0);
7c673cae
FG
194 }
195 ~raw_hack_aligned() {
196 delete[] realdata;
7c673cae 197 }
7c673cae
FG
198 };
199#endif
200
7c673cae
FG
201 /*
202 * primitive buffer types
203 */
31f18b77
FG
204 class buffer::raw_claimed_char : public buffer::raw {
205 public:
206 MEMPOOL_CLASS_HELPERS();
207
208 explicit raw_claimed_char(unsigned l, char *b) : raw(b, l) {
31f18b77 209 bdout << "raw_claimed_char " << this << " alloc " << (void *)data
11fdf7f2 210 << " " << l << bendl;
31f18b77
FG
211 }
212 ~raw_claimed_char() override {
31f18b77 213 bdout << "raw_claimed_char " << this << " free " << (void *)data
11fdf7f2 214 << bendl;
31f18b77 215 }
31f18b77
FG
216 };
217
7c673cae
FG
218 class buffer::raw_static : public buffer::raw {
219 public:
220 MEMPOOL_CLASS_HELPERS();
221
222 raw_static(const char *d, unsigned l) : raw((char*)d, l) { }
223 ~raw_static() override {}
7c673cae
FG
224 };
225
226 class buffer::raw_claim_buffer : public buffer::raw {
227 deleter del;
228 public:
229 raw_claim_buffer(const char *b, unsigned l, deleter d)
230 : raw((char*)b, l), del(std::move(d)) { }
231 ~raw_claim_buffer() override {}
7c673cae
FG
232 };
233
11fdf7f2
TL
234 ceph::unique_leakable_ptr<buffer::raw> buffer::copy(const char *c, unsigned len) {
235 auto r = buffer::create_aligned(len, sizeof(size_t));
f67539c2 236 memcpy(r->get_data(), c, len);
7c673cae
FG
237 return r;
238 }
239
11fdf7f2 240 ceph::unique_leakable_ptr<buffer::raw> buffer::create(unsigned len) {
7c673cae
FG
241 return buffer::create_aligned(len, sizeof(size_t));
242 }
f67539c2
TL
243 ceph::unique_leakable_ptr<buffer::raw> buffer::create(unsigned len, char c) {
244 auto ret = buffer::create_aligned(len, sizeof(size_t));
245 memset(ret->get_data(), c, len);
246 return ret;
247 }
9f95a23c
TL
248 ceph::unique_leakable_ptr<buffer::raw>
249 buffer::create_in_mempool(unsigned len, int mempool) {
3efd9988
FG
250 return buffer::create_aligned_in_mempool(len, sizeof(size_t), mempool);
251 }
9f95a23c
TL
252 ceph::unique_leakable_ptr<buffer::raw>
253 buffer::claim_char(unsigned len, char *buf) {
254 return ceph::unique_leakable_ptr<buffer::raw>(
255 new raw_claimed_char(len, buf));
7c673cae 256 }
9f95a23c
TL
257 ceph::unique_leakable_ptr<buffer::raw> buffer::create_malloc(unsigned len) {
258 return ceph::unique_leakable_ptr<buffer::raw>(new raw_malloc(len));
7c673cae 259 }
9f95a23c
TL
260 ceph::unique_leakable_ptr<buffer::raw>
261 buffer::claim_malloc(unsigned len, char *buf) {
262 return ceph::unique_leakable_ptr<buffer::raw>(new raw_malloc(len, buf));
7c673cae 263 }
9f95a23c
TL
264 ceph::unique_leakable_ptr<buffer::raw>
265 buffer::create_static(unsigned len, char *buf) {
266 return ceph::unique_leakable_ptr<buffer::raw>(new raw_static(buf, len));
7c673cae 267 }
9f95a23c
TL
268 ceph::unique_leakable_ptr<buffer::raw>
269 buffer::claim_buffer(unsigned len, char *buf, deleter del) {
270 return ceph::unique_leakable_ptr<buffer::raw>(
271 new raw_claim_buffer(buf, len, std::move(del)));
7c673cae
FG
272 }
273
11fdf7f2 274 ceph::unique_leakable_ptr<buffer::raw> buffer::create_aligned_in_mempool(
9f95a23c
TL
275 unsigned len, unsigned align, int mempool)
276 {
7c673cae
FG
277 // If alignment is a page multiple, use a separate buffer::raw to
278 // avoid fragmenting the heap.
279 //
280 // Somewhat unexpectedly, I see consistently better performance
281 // from raw_combined than from raw even when the allocation size is
282 // a page multiple (but alignment is not).
283 //
284 // I also see better performance from a separate buffer::raw once the
285 // size passes 8KB.
286 if ((align & ~CEPH_PAGE_MASK) == 0 ||
287 len >= CEPH_PAGE_SIZE * 2) {
288#ifndef __CYGWIN__
11fdf7f2 289 return ceph::unique_leakable_ptr<buffer::raw>(new raw_posix_aligned(len, align));
7c673cae 290#else
11fdf7f2 291 return ceph::unique_leakable_ptr<buffer::raw>(new raw_hack_aligned(len, align));
7c673cae
FG
292#endif
293 }
9f95a23c 294 return raw_combined::create(len, align, mempool);
3efd9988 295 }
11fdf7f2 296 ceph::unique_leakable_ptr<buffer::raw> buffer::create_aligned(
3efd9988
FG
297 unsigned len, unsigned align) {
298 return create_aligned_in_mempool(len, align,
299 mempool::mempool_buffer_anon);
7c673cae
FG
300 }
301
11fdf7f2 302 ceph::unique_leakable_ptr<buffer::raw> buffer::create_page_aligned(unsigned len) {
7c673cae
FG
303 return create_aligned(len, CEPH_PAGE_SIZE);
304 }
11fdf7f2
TL
305 ceph::unique_leakable_ptr<buffer::raw> buffer::create_small_page_aligned(unsigned len) {
306 if (len < CEPH_PAGE_SIZE) {
307 return create_aligned(len, CEPH_BUFFER_ALLOC_UNIT);
9f95a23c 308 } else {
11fdf7f2 309 return create_aligned(len, CEPH_PAGE_SIZE);
9f95a23c 310 }
7c673cae
FG
311 }
312
11fdf7f2
TL
313 buffer::ptr::ptr(ceph::unique_leakable_ptr<raw> r)
314 : _raw(r.release()),
315 _off(0),
f67539c2 316 _len(_raw->get_len())
11fdf7f2
TL
317 {
318 _raw->nref.store(1, std::memory_order_release);
319 bdout << "ptr " << this << " get " << _raw << bendl;
320 }
7c673cae
FG
321 buffer::ptr::ptr(unsigned l) : _off(0), _len(l)
322 {
11fdf7f2
TL
323 _raw = buffer::create(l).release();
324 _raw->nref.store(1, std::memory_order_release);
7c673cae
FG
325 bdout << "ptr " << this << " get " << _raw << bendl;
326 }
327 buffer::ptr::ptr(const char *d, unsigned l) : _off(0), _len(l) // ditto.
328 {
11fdf7f2
TL
329 _raw = buffer::copy(d, l).release();
330 _raw->nref.store(1, std::memory_order_release);
7c673cae
FG
331 bdout << "ptr " << this << " get " << _raw << bendl;
332 }
333 buffer::ptr::ptr(const ptr& p) : _raw(p._raw), _off(p._off), _len(p._len)
334 {
335 if (_raw) {
31f18b77 336 _raw->nref++;
7c673cae
FG
337 bdout << "ptr " << this << " get " << _raw << bendl;
338 }
339 }
340 buffer::ptr::ptr(ptr&& p) noexcept : _raw(p._raw), _off(p._off), _len(p._len)
341 {
342 p._raw = nullptr;
343 p._off = p._len = 0;
344 }
345 buffer::ptr::ptr(const ptr& p, unsigned o, unsigned l)
346 : _raw(p._raw), _off(p._off + o), _len(l)
347 {
11fdf7f2
TL
348 ceph_assert(o+l <= p._len);
349 ceph_assert(_raw);
31f18b77 350 _raw->nref++;
7c673cae
FG
351 bdout << "ptr " << this << " get " << _raw << bendl;
352 }
11fdf7f2
TL
353 buffer::ptr::ptr(const ptr& p, ceph::unique_leakable_ptr<raw> r)
354 : _raw(r.release()),
355 _off(p._off),
356 _len(p._len)
357 {
358 _raw->nref.store(1, std::memory_order_release);
359 bdout << "ptr " << this << " get " << _raw << bendl;
360 }
7c673cae
FG
361 buffer::ptr& buffer::ptr::operator= (const ptr& p)
362 {
363 if (p._raw) {
31f18b77 364 p._raw->nref++;
7c673cae
FG
365 bdout << "ptr " << this << " get " << _raw << bendl;
366 }
367 buffer::raw *raw = p._raw;
368 release();
369 if (raw) {
370 _raw = raw;
371 _off = p._off;
372 _len = p._len;
373 } else {
374 _off = _len = 0;
375 }
376 return *this;
377 }
378 buffer::ptr& buffer::ptr::operator= (ptr&& p) noexcept
379 {
380 release();
381 buffer::raw *raw = p._raw;
382 if (raw) {
383 _raw = raw;
384 _off = p._off;
385 _len = p._len;
386 p._raw = nullptr;
387 p._off = p._len = 0;
388 } else {
389 _off = _len = 0;
390 }
391 return *this;
392 }
393
11fdf7f2 394 void buffer::ptr::swap(ptr& other) noexcept
7c673cae
FG
395 {
396 raw *r = _raw;
397 unsigned o = _off;
398 unsigned l = _len;
399 _raw = other._raw;
400 _off = other._off;
401 _len = other._len;
402 other._raw = r;
403 other._off = o;
404 other._len = l;
405 }
406
407 void buffer::ptr::release()
408 {
f67539c2
TL
409 // BE CAREFUL: this is called also for hypercombined ptr_node. After
410 // freeing underlying raw, `*this` can become inaccessible as well!
411 //
412 // cache the pointer to avoid unncecessary reloads and repeated
413 // checks.
414 if (auto* const cached_raw = std::exchange(_raw, nullptr);
415 cached_raw) {
416 bdout << "ptr " << this << " release " << cached_raw << bendl;
417 // optimize the common case where a particular `buffer::raw` has
418 // only a single reference. Altogether with initializing `nref` of
419 // freshly fabricated one with `1` through the std::atomic's ctor
420 // (which doesn't impose a memory barrier on the strongly-ordered
421 // x86), this allows to avoid all atomical operations in such case.
422 const bool last_one = \
423 (1 == cached_raw->nref.load(std::memory_order_acquire));
424 if (likely(last_one) || --cached_raw->nref == 0) {
425 bdout << "deleting raw " << static_cast<void*>(cached_raw)
426 << " len " << cached_raw->get_len() << bendl;
427 ANNOTATE_HAPPENS_AFTER(&cached_raw->nref);
428 ANNOTATE_HAPPENS_BEFORE_FORGET_ALL(&cached_raw->nref);
429 delete cached_raw; // dealloc old (if any)
7c673cae 430 } else {
f67539c2 431 ANNOTATE_HAPPENS_BEFORE(&cached_raw->nref);
7c673cae 432 }
7c673cae
FG
433 }
434 }
435
3efd9988
FG
436 int buffer::ptr::get_mempool() const {
437 if (_raw) {
438 return _raw->mempool;
439 }
440 return mempool::mempool_buffer_anon;
441 }
442
443 void buffer::ptr::reassign_to_mempool(int pool) {
444 if (_raw) {
445 _raw->reassign_to_mempool(pool);
446 }
447 }
448 void buffer::ptr::try_assign_to_mempool(int pool) {
449 if (_raw) {
450 _raw->try_assign_to_mempool(pool);
451 }
452 }
453
7c673cae 454 const char *buffer::ptr::c_str() const {
11fdf7f2 455 ceph_assert(_raw);
7c673cae
FG
456 return _raw->get_data() + _off;
457 }
458 char *buffer::ptr::c_str() {
11fdf7f2 459 ceph_assert(_raw);
7c673cae
FG
460 return _raw->get_data() + _off;
461 }
462 const char *buffer::ptr::end_c_str() const {
11fdf7f2 463 ceph_assert(_raw);
7c673cae
FG
464 return _raw->get_data() + _off + _len;
465 }
466 char *buffer::ptr::end_c_str() {
11fdf7f2 467 ceph_assert(_raw);
7c673cae
FG
468 return _raw->get_data() + _off + _len;
469 }
470
471 unsigned buffer::ptr::unused_tail_length() const
472 {
f67539c2 473 return _raw ? _raw->get_len() - (_off + _len) : 0;
7c673cae
FG
474 }
475 const char& buffer::ptr::operator[](unsigned n) const
476 {
11fdf7f2
TL
477 ceph_assert(_raw);
478 ceph_assert(n < _len);
7c673cae
FG
479 return _raw->get_data()[_off + n];
480 }
481 char& buffer::ptr::operator[](unsigned n)
482 {
11fdf7f2
TL
483 ceph_assert(_raw);
484 ceph_assert(n < _len);
7c673cae
FG
485 return _raw->get_data()[_off + n];
486 }
487
f67539c2
TL
488 const char *buffer::ptr::raw_c_str() const { ceph_assert(_raw); return _raw->get_data(); }
489 unsigned buffer::ptr::raw_length() const { ceph_assert(_raw); return _raw->get_len(); }
11fdf7f2 490 int buffer::ptr::raw_nref() const { ceph_assert(_raw); return _raw->nref; }
7c673cae
FG
491
492 void buffer::ptr::copy_out(unsigned o, unsigned l, char *dest) const {
11fdf7f2 493 ceph_assert(_raw);
7c673cae
FG
494 if (o+l > _len)
495 throw end_of_buffer();
f67539c2 496 char* src = _raw->get_data() + _off + o;
7c673cae
FG
497 maybe_inline_memcpy(dest, src, l, 8);
498 }
499
31f18b77 500 unsigned buffer::ptr::wasted() const
7c673cae 501 {
f67539c2 502 return _raw->get_len() - _len;
7c673cae
FG
503 }
504
505 int buffer::ptr::cmp(const ptr& o) const
506 {
507 int l = _len < o._len ? _len : o._len;
508 if (l) {
509 int r = memcmp(c_str(), o.c_str(), l);
510 if (r)
511 return r;
512 }
513 if (_len < o._len)
514 return -1;
515 if (_len > o._len)
516 return 1;
517 return 0;
518 }
519
520 bool buffer::ptr::is_zero() const
521 {
522 return mem_is_zero(c_str(), _len);
523 }
524
525 unsigned buffer::ptr::append(char c)
526 {
11fdf7f2
TL
527 ceph_assert(_raw);
528 ceph_assert(1 <= unused_tail_length());
f67539c2 529 char* ptr = _raw->get_data() + _off + _len;
7c673cae
FG
530 *ptr = c;
531 _len++;
532 return _len + _off;
533 }
534
535 unsigned buffer::ptr::append(const char *p, unsigned l)
536 {
11fdf7f2
TL
537 ceph_assert(_raw);
538 ceph_assert(l <= unused_tail_length());
f67539c2 539 char* c = _raw->get_data() + _off + _len;
7c673cae
FG
540 maybe_inline_memcpy(c, p, l, 32);
541 _len += l;
542 return _len + _off;
543 }
544
11fdf7f2 545 unsigned buffer::ptr::append_zeros(unsigned l)
7c673cae 546 {
11fdf7f2
TL
547 ceph_assert(_raw);
548 ceph_assert(l <= unused_tail_length());
f67539c2 549 char* c = _raw->get_data() + _off + _len;
92f5a8d4 550 // FIPS zeroization audit 20191115: this memset is not security related.
11fdf7f2
TL
551 memset(c, 0, l);
552 _len += l;
553 return _len + _off;
7c673cae
FG
554 }
555
556 void buffer::ptr::copy_in(unsigned o, unsigned l, const char *src, bool crc_reset)
557 {
11fdf7f2
TL
558 ceph_assert(_raw);
559 ceph_assert(o <= _len);
560 ceph_assert(o+l <= _len);
f67539c2 561 char* dest = _raw->get_data() + _off + o;
7c673cae
FG
562 if (crc_reset)
563 _raw->invalidate_crc();
564 maybe_inline_memcpy(dest, src, l, 64);
565 }
566
7c673cae
FG
567 void buffer::ptr::zero(bool crc_reset)
568 {
569 if (crc_reset)
570 _raw->invalidate_crc();
92f5a8d4 571 // FIPS zeroization audit 20191115: this memset is not security related.
7c673cae
FG
572 memset(c_str(), 0, _len);
573 }
574
7c673cae
FG
575 void buffer::ptr::zero(unsigned o, unsigned l, bool crc_reset)
576 {
11fdf7f2 577 ceph_assert(o+l <= _len);
7c673cae
FG
578 if (crc_reset)
579 _raw->invalidate_crc();
92f5a8d4 580 // FIPS zeroization audit 20191115: this memset is not security related.
7c673cae
FG
581 memset(c_str()+o, 0, l);
582 }
7c673cae 583
f67539c2
TL
584 template<bool B>
585 buffer::ptr::iterator_impl<B>& buffer::ptr::iterator_impl<B>::operator +=(size_t len) {
586 pos += len;
587 if (pos > end_ptr)
588 throw end_of_buffer();
589 return *this;
590 }
591
592 template buffer::ptr::iterator_impl<false>&
593 buffer::ptr::iterator_impl<false>::operator +=(size_t len);
594 template buffer::ptr::iterator_impl<true>&
595 buffer::ptr::iterator_impl<true>::operator +=(size_t len);
596
7c673cae
FG
597 // -- buffer::list::iterator --
598 /*
599 buffer::list::iterator operator=(const buffer::list::iterator& other)
600 {
601 if (this != &other) {
602 bl = other.bl;
603 ls = other.ls;
604 off = other.off;
605 p = other.p;
606 p_off = other.p_off;
607 }
608 return *this;
609 }*/
610
611 template<bool is_const>
612 buffer::list::iterator_impl<is_const>::iterator_impl(bl_t *l, unsigned o)
11fdf7f2 613 : bl(l), ls(&bl->_buffers), p(ls->begin()), off(0), p_off(0)
7c673cae 614 {
9f95a23c 615 *this += o;
7c673cae
FG
616 }
617
618 template<bool is_const>
619 buffer::list::iterator_impl<is_const>::iterator_impl(const buffer::list::iterator& i)
620 : iterator_impl<is_const>(i.bl, i.off, i.p, i.p_off) {}
621
622 template<bool is_const>
9f95a23c
TL
623 auto buffer::list::iterator_impl<is_const>::operator +=(unsigned o)
624 -> iterator_impl&
11fdf7f2
TL
625 {
626 //cout << this << " advance " << o << " from " << off
627 // << " (p_off " << p_off << " in " << p->length() << ")"
628 // << std::endl;
629
630 p_off +=o;
631 while (p != ls->end()) {
632 if (p_off >= p->length()) {
633 // skip this buffer
634 p_off -= p->length();
635 p++;
7c673cae 636 } else {
11fdf7f2
TL
637 // somewhere in this buffer!
638 break;
7c673cae
FG
639 }
640 }
11fdf7f2
TL
641 if (p == ls->end() && p_off) {
642 throw end_of_buffer();
643 }
644 off += o;
9f95a23c 645 return *this;
7c673cae
FG
646 }
647
648 template<bool is_const>
649 void buffer::list::iterator_impl<is_const>::seek(unsigned o)
650 {
651 p = ls->begin();
652 off = p_off = 0;
9f95a23c 653 *this += o;
7c673cae
FG
654 }
655
656 template<bool is_const>
657 char buffer::list::iterator_impl<is_const>::operator*() const
658 {
659 if (p == ls->end())
660 throw end_of_buffer();
661 return (*p)[p_off];
662 }
663
664 template<bool is_const>
665 buffer::list::iterator_impl<is_const>&
666 buffer::list::iterator_impl<is_const>::operator++()
667 {
668 if (p == ls->end())
669 throw end_of_buffer();
9f95a23c 670 *this += 1;
7c673cae
FG
671 return *this;
672 }
673
674 template<bool is_const>
675 buffer::ptr buffer::list::iterator_impl<is_const>::get_current_ptr() const
676 {
677 if (p == ls->end())
678 throw end_of_buffer();
679 return ptr(*p, p_off, p->length() - p_off);
680 }
681
11fdf7f2
TL
682 template<bool is_const>
683 bool buffer::list::iterator_impl<is_const>::is_pointing_same_raw(
684 const ptr& other) const
685 {
686 if (p == ls->end())
687 throw end_of_buffer();
9f95a23c 688 return p->_raw == other._raw;
11fdf7f2
TL
689 }
690
7c673cae
FG
691 // copy data out.
692 // note that these all _append_ to dest!
693 template<bool is_const>
694 void buffer::list::iterator_impl<is_const>::copy(unsigned len, char *dest)
695 {
696 if (p == ls->end()) seek(off);
697 while (len > 0) {
698 if (p == ls->end())
699 throw end_of_buffer();
7c673cae
FG
700
701 unsigned howmuch = p->length() - p_off;
702 if (len < howmuch) howmuch = len;
703 p->copy_out(p_off, howmuch, dest);
704 dest += howmuch;
705
706 len -= howmuch;
9f95a23c 707 *this += howmuch;
7c673cae
FG
708 }
709 }
710
711 template<bool is_const>
712 void buffer::list::iterator_impl<is_const>::copy(unsigned len, ptr &dest)
713 {
714 copy_deep(len, dest);
715 }
716
717 template<bool is_const>
718 void buffer::list::iterator_impl<is_const>::copy_deep(unsigned len, ptr &dest)
719 {
720 if (!len) {
721 return;
722 }
723 if (p == ls->end())
724 throw end_of_buffer();
7c673cae
FG
725 dest = create(len);
726 copy(len, dest.c_str());
727 }
728 template<bool is_const>
729 void buffer::list::iterator_impl<is_const>::copy_shallow(unsigned len,
730 ptr &dest)
731 {
732 if (!len) {
733 return;
734 }
735 if (p == ls->end())
736 throw end_of_buffer();
7c673cae
FG
737 unsigned howmuch = p->length() - p_off;
738 if (howmuch < len) {
739 dest = create(len);
740 copy(len, dest.c_str());
741 } else {
742 dest = ptr(*p, p_off, len);
9f95a23c 743 *this += len;
7c673cae
FG
744 }
745 }
746
747 template<bool is_const>
748 void buffer::list::iterator_impl<is_const>::copy(unsigned len, list &dest)
749 {
750 if (p == ls->end())
751 seek(off);
752 while (len > 0) {
753 if (p == ls->end())
754 throw end_of_buffer();
755
756 unsigned howmuch = p->length() - p_off;
757 if (len < howmuch)
758 howmuch = len;
759 dest.append(*p, p_off, howmuch);
760
761 len -= howmuch;
9f95a23c 762 *this += howmuch;
7c673cae
FG
763 }
764 }
765
766 template<bool is_const>
767 void buffer::list::iterator_impl<is_const>::copy(unsigned len, std::string &dest)
768 {
769 if (p == ls->end())
770 seek(off);
771 while (len > 0) {
772 if (p == ls->end())
773 throw end_of_buffer();
774
775 unsigned howmuch = p->length() - p_off;
776 const char *c_str = p->c_str();
777 if (len < howmuch)
778 howmuch = len;
779 dest.append(c_str + p_off, howmuch);
780
781 len -= howmuch;
9f95a23c 782 *this += howmuch;
7c673cae
FG
783 }
784 }
785
786 template<bool is_const>
787 void buffer::list::iterator_impl<is_const>::copy_all(list &dest)
788 {
789 if (p == ls->end())
790 seek(off);
791 while (1) {
792 if (p == ls->end())
793 return;
7c673cae
FG
794
795 unsigned howmuch = p->length() - p_off;
796 const char *c_str = p->c_str();
797 dest.append(c_str + p_off, howmuch);
798
9f95a23c 799 *this += howmuch;
7c673cae
FG
800 }
801 }
802
803 template<bool is_const>
804 size_t buffer::list::iterator_impl<is_const>::get_ptr_and_advance(
805 size_t want, const char **data)
806 {
807 if (p == ls->end()) {
808 seek(off);
809 if (p == ls->end()) {
810 return 0;
811 }
812 }
813 *data = p->c_str() + p_off;
11fdf7f2 814 size_t l = std::min<size_t>(p->length() - p_off, want);
7c673cae
FG
815 p_off += l;
816 if (p_off == p->length()) {
817 ++p;
818 p_off = 0;
819 }
820 off += l;
821 return l;
822 }
823
824 template<bool is_const>
825 uint32_t buffer::list::iterator_impl<is_const>::crc32c(
826 size_t length, uint32_t crc)
827 {
11fdf7f2 828 length = std::min<size_t>(length, get_remaining());
7c673cae
FG
829 while (length > 0) {
830 const char *p;
831 size_t l = get_ptr_and_advance(length, &p);
832 crc = ceph_crc32c(crc, (unsigned char*)p, l);
833 length -= l;
834 }
835 return crc;
836 }
837
838 // explicitly instantiate only the iterator types we need, so we can hide the
839 // details in this compilation unit without introducing unnecessary link time
840 // dependencies.
841 template class buffer::list::iterator_impl<true>;
842 template class buffer::list::iterator_impl<false>;
843
844 buffer::list::iterator::iterator(bl_t *l, unsigned o)
845 : iterator_impl(l, o)
846 {}
847
848 buffer::list::iterator::iterator(bl_t *l, unsigned o, list_iter_t ip, unsigned po)
849 : iterator_impl(l, o, ip, po)
850 {}
851
7c673cae
FG
852 // copy data in
853 void buffer::list::iterator::copy_in(unsigned len, const char *src, bool crc_reset)
854 {
855 // copy
856 if (p == ls->end())
857 seek(off);
858 while (len > 0) {
859 if (p == ls->end())
860 throw end_of_buffer();
861
862 unsigned howmuch = p->length() - p_off;
863 if (len < howmuch)
864 howmuch = len;
865 p->copy_in(p_off, howmuch, src, crc_reset);
866
867 src += howmuch;
868 len -= howmuch;
9f95a23c 869 *this += howmuch;
7c673cae
FG
870 }
871 }
872
873 void buffer::list::iterator::copy_in(unsigned len, const list& otherl)
874 {
875 if (p == ls->end())
876 seek(off);
877 unsigned left = len;
11fdf7f2
TL
878 for (const auto& node : otherl._buffers) {
879 unsigned l = node.length();
7c673cae
FG
880 if (left < l)
881 l = left;
11fdf7f2 882 copy_in(l, node.c_str());
7c673cae
FG
883 left -= l;
884 if (left == 0)
885 break;
886 }
887 }
888
889 // -- buffer::list --
890
11fdf7f2 891 void buffer::list::swap(list& other) noexcept
7c673cae
FG
892 {
893 std::swap(_len, other._len);
9f95a23c 894 std::swap(_num, other._num);
11fdf7f2 895 std::swap(_carriage, other._carriage);
7c673cae 896 _buffers.swap(other._buffers);
7c673cae
FG
897 }
898
7c673cae
FG
899 bool buffer::list::contents_equal(const ceph::buffer::list& other) const
900 {
901 if (length() != other.length())
902 return false;
903
904 // buffer-wise comparison
905 if (true) {
11fdf7f2
TL
906 auto a = std::cbegin(_buffers);
907 auto b = std::cbegin(other._buffers);
7c673cae 908 unsigned aoff = 0, boff = 0;
11fdf7f2 909 while (a != std::cend(_buffers)) {
7c673cae
FG
910 unsigned len = a->length() - aoff;
911 if (len > b->length() - boff)
912 len = b->length() - boff;
913 if (memcmp(a->c_str() + aoff, b->c_str() + boff, len) != 0)
914 return false;
915 aoff += len;
916 if (aoff == a->length()) {
917 aoff = 0;
918 ++a;
919 }
920 boff += len;
921 if (boff == b->length()) {
922 boff = 0;
923 ++b;
924 }
925 }
7c673cae
FG
926 return true;
927 }
928
929 // byte-wise comparison
930 if (false) {
931 bufferlist::const_iterator me = begin();
932 bufferlist::const_iterator him = other.begin();
933 while (!me.end()) {
934 if (*me != *him)
935 return false;
936 ++me;
937 ++him;
938 }
939 return true;
940 }
941 }
942
9f95a23c
TL
943 bool buffer::list::contents_equal(const void* const other,
944 size_t length) const
945 {
946 if (this->length() != length) {
947 return false;
948 }
949
950 const auto* other_buf = reinterpret_cast<const char*>(other);
951 for (const auto& bp : buffers()) {
f67539c2
TL
952 assert(bp.length() <= length);
953 if (std::memcmp(bp.c_str(), other_buf, bp.length()) != 0) {
9f95a23c
TL
954 return false;
955 } else {
f67539c2
TL
956 length -= bp.length();
957 other_buf += bp.length();
9f95a23c
TL
958 }
959 }
960
961 return true;
962 }
963
11fdf7f2 964 bool buffer::list::is_provided_buffer(const char* const dst) const
7c673cae 965 {
11fdf7f2 966 if (_buffers.empty()) {
7c673cae 967 return false;
11fdf7f2 968 }
7c673cae
FG
969 return (is_contiguous() && (_buffers.front().c_str() == dst));
970 }
971
11fdf7f2 972 bool buffer::list::is_aligned(const unsigned align) const
7c673cae 973 {
11fdf7f2
TL
974 for (const auto& node : _buffers) {
975 if (!node.is_aligned(align)) {
7c673cae 976 return false;
11fdf7f2
TL
977 }
978 }
7c673cae
FG
979 return true;
980 }
981
11fdf7f2 982 bool buffer::list::is_n_align_sized(const unsigned align) const
7c673cae 983 {
11fdf7f2
TL
984 for (const auto& node : _buffers) {
985 if (!node.is_n_align_sized(align)) {
7c673cae 986 return false;
11fdf7f2
TL
987 }
988 }
7c673cae
FG
989 return true;
990 }
991
11fdf7f2
TL
992 bool buffer::list::is_aligned_size_and_memory(
993 const unsigned align_size,
994 const unsigned align_memory) const
7c673cae 995 {
11fdf7f2
TL
996 for (const auto& node : _buffers) {
997 if (!node.is_aligned(align_memory) || !node.is_n_align_sized(align_size)) {
7c673cae 998 return false;
11fdf7f2 999 }
7c673cae
FG
1000 }
1001 return true;
1002 }
1003
1004 bool buffer::list::is_zero() const {
11fdf7f2
TL
1005 for (const auto& node : _buffers) {
1006 if (!node.is_zero()) {
7c673cae
FG
1007 return false;
1008 }
1009 }
1010 return true;
1011 }
1012
1013 void buffer::list::zero()
1014 {
11fdf7f2
TL
1015 for (auto& node : _buffers) {
1016 node.zero();
1017 }
7c673cae
FG
1018 }
1019
11fdf7f2 1020 void buffer::list::zero(const unsigned o, const unsigned l)
7c673cae 1021 {
11fdf7f2 1022 ceph_assert(o+l <= _len);
7c673cae 1023 unsigned p = 0;
11fdf7f2
TL
1024 for (auto& node : _buffers) {
1025 if (p + node.length() > o) {
1026 if (p >= o && p+node.length() <= o+l) {
7c673cae 1027 // 'o'------------- l -----------|
11fdf7f2
TL
1028 // 'p'-- node.length() --|
1029 node.zero();
7c673cae
FG
1030 } else if (p >= o) {
1031 // 'o'------------- l -----------|
11fdf7f2
TL
1032 // 'p'------- node.length() -------|
1033 node.zero(0, o+l-p);
1034 } else if (p + node.length() <= o+l) {
7c673cae 1035 // 'o'------------- l -----------|
11fdf7f2
TL
1036 // 'p'------- node.length() -------|
1037 node.zero(o-p, node.length()-(o-p));
7c673cae
FG
1038 } else {
1039 // 'o'----------- l -----------|
11fdf7f2
TL
1040 // 'p'---------- node.length() ----------|
1041 node.zero(o-p, l);
7c673cae
FG
1042 }
1043 }
11fdf7f2
TL
1044 p += node.length();
1045 if (o+l <= p) {
7c673cae 1046 break; // done
11fdf7f2 1047 }
7c673cae
FG
1048 }
1049 }
1050
1051 bool buffer::list::is_contiguous() const
1052 {
9f95a23c 1053 return _num <= 1;
7c673cae
FG
1054 }
1055
1056 bool buffer::list::is_n_page_sized() const
1057 {
1058 return is_n_align_sized(CEPH_PAGE_SIZE);
1059 }
1060
1061 bool buffer::list::is_page_aligned() const
1062 {
1063 return is_aligned(CEPH_PAGE_SIZE);
1064 }
1065
3efd9988
FG
1066 int buffer::list::get_mempool() const
1067 {
1068 if (_buffers.empty()) {
1069 return mempool::mempool_buffer_anon;
1070 }
1071 return _buffers.back().get_mempool();
1072 }
1073
31f18b77
FG
1074 void buffer::list::reassign_to_mempool(int pool)
1075 {
31f18b77 1076 for (auto& p : _buffers) {
9f95a23c 1077 p._raw->reassign_to_mempool(pool);
31f18b77
FG
1078 }
1079 }
1080
1081 void buffer::list::try_assign_to_mempool(int pool)
1082 {
31f18b77 1083 for (auto& p : _buffers) {
9f95a23c 1084 p._raw->try_assign_to_mempool(pool);
31f18b77
FG
1085 }
1086 }
1087
f64942e4
AA
1088 uint64_t buffer::list::get_wasted_space() const
1089 {
9f95a23c 1090 if (_num == 1)
f64942e4
AA
1091 return _buffers.back().wasted();
1092
1093 std::vector<const raw*> raw_vec;
9f95a23c 1094 raw_vec.reserve(_num);
f64942e4 1095 for (const auto& p : _buffers)
9f95a23c 1096 raw_vec.push_back(p._raw);
f64942e4
AA
1097 std::sort(raw_vec.begin(), raw_vec.end());
1098
1099 uint64_t total = 0;
1100 const raw *last = nullptr;
1101 for (const auto r : raw_vec) {
1102 if (r == last)
1103 continue;
1104 last = r;
f67539c2 1105 total += r->get_len();
f64942e4
AA
1106 }
1107 // If multiple buffers are sharing the same raw buffer and they overlap
1108 // with each other, the wasted space will be underestimated.
1109 if (total <= length())
1110 return 0;
1111 return total - length();
1112 }
1113
7c673cae
FG
1114 void buffer::list::rebuild()
1115 {
1116 if (_len == 0) {
11fdf7f2
TL
1117 _carriage = &always_empty_bptr;
1118 _buffers.clear_and_dispose();
9f95a23c 1119 _num = 0;
7c673cae
FG
1120 return;
1121 }
7c673cae 1122 if ((_len & ~CEPH_PAGE_MASK) == 0)
11fdf7f2 1123 rebuild(ptr_node::create(buffer::create_page_aligned(_len)));
7c673cae 1124 else
11fdf7f2 1125 rebuild(ptr_node::create(buffer::create(_len)));
7c673cae
FG
1126 }
1127
11fdf7f2
TL
1128 void buffer::list::rebuild(
1129 std::unique_ptr<buffer::ptr_node, buffer::ptr_node::disposer> nb)
7c673cae
FG
1130 {
1131 unsigned pos = 0;
adb31ebb
TL
1132 int mempool = _buffers.front().get_mempool();
1133 nb->reassign_to_mempool(mempool);
11fdf7f2
TL
1134 for (auto& node : _buffers) {
1135 nb->copy_in(pos, node.length(), node.c_str(), false);
1136 pos += node.length();
7c673cae 1137 }
11fdf7f2
TL
1138 _buffers.clear_and_dispose();
1139 if (likely(nb->length())) {
1140 _carriage = nb.get();
1141 _buffers.push_back(*nb.release());
9f95a23c
TL
1142 _num = 1;
1143 } else {
1144 _carriage = &always_empty_bptr;
1145 _num = 0;
11fdf7f2 1146 }
7c673cae 1147 invalidate_crc();
7c673cae
FG
1148 }
1149
1150 bool buffer::list::rebuild_aligned(unsigned align)
1151 {
1152 return rebuild_aligned_size_and_memory(align, align);
1153 }
1154
1155 bool buffer::list::rebuild_aligned_size_and_memory(unsigned align_size,
b32b8144
FG
1156 unsigned align_memory,
1157 unsigned max_buffers)
7c673cae 1158 {
9f95a23c 1159 bool had_to_rebuild = false;
b32b8144 1160
9f95a23c 1161 if (max_buffers && _num > max_buffers && _len > (max_buffers * align_size)) {
11fdf7f2 1162 align_size = round_up_to(round_up_to(_len, max_buffers) / max_buffers, align_size);
b32b8144 1163 }
11fdf7f2
TL
1164 auto p = std::begin(_buffers);
1165 auto p_prev = _buffers.before_begin();
1166 while (p != std::end(_buffers)) {
7c673cae
FG
1167 // keep anything that's already align and sized aligned
1168 if (p->is_aligned(align_memory) && p->is_n_align_sized(align_size)) {
1169 /*cout << " segment " << (void*)p->c_str()
1170 << " offset " << ((unsigned long)p->c_str() & (align - 1))
1171 << " length " << p->length()
1172 << " " << (p->length() & (align - 1)) << " ok" << std::endl;
1173 */
11fdf7f2 1174 p_prev = p++;
7c673cae
FG
1175 continue;
1176 }
1177
1178 // consolidate unaligned items, until we get something that is sized+aligned
1179 list unaligned;
1180 unsigned offset = 0;
1181 do {
1182 /*cout << " segment " << (void*)p->c_str()
1183 << " offset " << ((unsigned long)p->c_str() & (align - 1))
1184 << " length " << p->length() << " " << (p->length() & (align - 1))
1185 << " overall offset " << offset << " " << (offset & (align - 1))
1186 << " not ok" << std::endl;
1187 */
1188 offset += p->length();
11fdf7f2
TL
1189 // no need to reallocate, relinking is enough thankfully to bi::list.
1190 auto p_after = _buffers.erase_after(p_prev);
9f95a23c 1191 _num -= 1;
11fdf7f2
TL
1192 unaligned._buffers.push_back(*p);
1193 unaligned._len += p->length();
9f95a23c 1194 unaligned._num += 1;
11fdf7f2
TL
1195 p = p_after;
1196 } while (p != std::end(_buffers) &&
7c673cae
FG
1197 (!p->is_aligned(align_memory) ||
1198 !p->is_n_align_sized(align_size) ||
1199 (offset % align_size)));
1200 if (!(unaligned.is_contiguous() && unaligned._buffers.front().is_aligned(align_memory))) {
11fdf7f2
TL
1201 unaligned.rebuild(
1202 ptr_node::create(
1203 buffer::create_aligned(unaligned._len, align_memory)));
9f95a23c 1204 had_to_rebuild = true;
7c673cae 1205 }
522d829b
TL
1206 if (unaligned.get_num_buffers()) {
1207 _buffers.insert_after(p_prev, *ptr_node::create(unaligned._buffers.front()).release());
1208 _num += 1;
1209 } else {
1210 // a bufferlist containing only 0-length bptrs is rebuilt as empty
1211 }
11fdf7f2 1212 ++p_prev;
7c673cae 1213 }
9f95a23c 1214 return had_to_rebuild;
7c673cae
FG
1215 }
1216
1217 bool buffer::list::rebuild_page_aligned()
1218 {
1219 return rebuild_aligned(CEPH_PAGE_SIZE);
1220 }
1221
31f18b77
FG
1222 void buffer::list::reserve(size_t prealloc)
1223 {
11fdf7f2 1224 if (get_append_buffer_unused_tail_length() < prealloc) {
f67539c2 1225 auto ptr = ptr_node::create(buffer::create_small_page_aligned(prealloc));
11fdf7f2
TL
1226 ptr->set_length(0); // unused, so far.
1227 _carriage = ptr.get();
1228 _buffers.push_back(*ptr.release());
9f95a23c 1229 _num += 1;
31f18b77
FG
1230 }
1231 }
1232
9f95a23c 1233 void buffer::list::claim_append(list& bl)
7c673cae 1234 {
1e59de90
TL
1235 // check overflow
1236 assert(_len + bl._len >= _len);
7c673cae
FG
1237 // steal the other guy's buffers
1238 _len += bl._len;
9f95a23c 1239 _num += bl._num;
11fdf7f2 1240 _buffers.splice_back(bl._buffers);
31f18b77
FG
1241 bl.clear();
1242 }
1243
7c673cae
FG
1244 void buffer::list::append(char c)
1245 {
1246 // put what we can into the existing append_buffer.
11fdf7f2 1247 unsigned gap = get_append_buffer_unused_tail_length();
7c673cae 1248 if (!gap) {
11fdf7f2
TL
1249 // make a new buffer!
1250 auto buf = ptr_node::create(
1251 raw_combined::create(CEPH_BUFFER_APPEND_SIZE, 0, get_mempool()));
1252 buf->set_length(0); // unused, so far.
1253 _carriage = buf.get();
1254 _buffers.push_back(*buf.release());
9f95a23c 1255 _num += 1;
11fdf7f2
TL
1256 } else if (unlikely(_carriage != &_buffers.back())) {
1257 auto bptr = ptr_node::create(*_carriage, _carriage->length(), 0);
1258 _carriage = bptr.get();
1259 _buffers.push_back(*bptr.release());
9f95a23c 1260 _num += 1;
11fdf7f2
TL
1261 }
1262 _carriage->append(c);
1263 _len++;
1264 }
1265
f67539c2 1266 buffer::ptr_node buffer::list::always_empty_bptr;
11fdf7f2
TL
1267
1268 buffer::ptr_node& buffer::list::refill_append_space(const unsigned len)
1269 {
1270 // make a new buffer. fill out a complete page, factoring in the
1271 // raw_combined overhead.
1272 size_t need = round_up_to(len, sizeof(size_t)) + sizeof(raw_combined);
20effc67
TL
1273 size_t alen = round_up_to(need, CEPH_BUFFER_ALLOC_UNIT);
1274 if (_carriage == &_buffers.back()) {
1275 size_t nlen = round_up_to(_carriage->raw_length(), CEPH_BUFFER_ALLOC_UNIT) * 2;
1276 nlen = std::min(nlen, CEPH_BUFFER_ALLOC_UNIT_MAX);
1277 alen = std::max(alen, nlen);
1278 }
1279 alen -= sizeof(raw_combined);
1280
11fdf7f2
TL
1281 auto new_back = \
1282 ptr_node::create(raw_combined::create(alen, 0, get_mempool()));
1283 new_back->set_length(0); // unused, so far.
1284 _carriage = new_back.get();
1285 _buffers.push_back(*new_back.release());
9f95a23c 1286 _num += 1;
11fdf7f2 1287 return _buffers.back();
7c673cae
FG
1288 }
1289
1290 void buffer::list::append(const char *data, unsigned len)
1291 {
11fdf7f2
TL
1292 _len += len;
1293
1294 const unsigned free_in_last = get_append_buffer_unused_tail_length();
1295 const unsigned first_round = std::min(len, free_in_last);
1296 if (first_round) {
1297 // _buffers and carriage can desynchronize when 1) a new ptr
1298 // we don't own has been added into the _buffers 2) _buffers
1299 // has been emptied as as a result of std::move or stolen by
1300 // claim_append.
1301 if (unlikely(_carriage != &_buffers.back())) {
1302 auto bptr = ptr_node::create(*_carriage, _carriage->length(), 0);
1303 _carriage = bptr.get();
1304 _buffers.push_back(*bptr.release());
9f95a23c 1305 _num += 1;
7c673cae 1306 }
11fdf7f2
TL
1307 _carriage->append(data, first_round);
1308 }
1309
1310 const unsigned second_round = len - first_round;
1311 if (second_round) {
1312 auto& new_back = refill_append_space(second_round);
1313 new_back.append(data + first_round, second_round);
1314 }
1315 }
1316
1317 buffer::list::reserve_t buffer::list::obtain_contiguous_space(
1318 const unsigned len)
1319 {
1320 // note: if len < the normal append_buffer size it *might*
1321 // be better to allocate a normal-sized append_buffer and
1322 // use part of it. however, that optimizes for the case of
1323 // old-style types including new-style types. and in most
1324 // such cases, this won't be the very first thing encoded to
1325 // the list, so append_buffer will already be allocated.
1326 // OTOH if everything is new-style, we *should* allocate
1327 // only what we need and conserve memory.
1328 if (unlikely(get_append_buffer_unused_tail_length() < len)) {
1329 auto new_back = \
1330 buffer::ptr_node::create(buffer::create(len)).release();
1331 new_back->set_length(0); // unused, so far.
1332 _buffers.push_back(*new_back);
9f95a23c 1333 _num += 1;
11fdf7f2
TL
1334 _carriage = new_back;
1335 return { new_back->c_str(), &new_back->_len, &_len };
1336 } else {
f67539c2 1337 ceph_assert(!_buffers.empty());
11fdf7f2
TL
1338 if (unlikely(_carriage != &_buffers.back())) {
1339 auto bptr = ptr_node::create(*_carriage, _carriage->length(), 0);
1340 _carriage = bptr.get();
1341 _buffers.push_back(*bptr.release());
9f95a23c 1342 _num += 1;
11fdf7f2
TL
1343 }
1344 return { _carriage->end_c_str(), &_carriage->_len, &_len };
7c673cae
FG
1345 }
1346 }
1347
1348 void buffer::list::append(const ptr& bp)
1349 {
7c673cae
FG
1350 push_back(bp);
1351 }
1352
1353 void buffer::list::append(ptr&& bp)
1354 {
7c673cae
FG
1355 push_back(std::move(bp));
1356 }
1357
1358 void buffer::list::append(const ptr& bp, unsigned off, unsigned len)
1359 {
11fdf7f2 1360 ceph_assert(len+off <= bp.length());
7c673cae
FG
1361 if (!_buffers.empty()) {
1362 ptr &l = _buffers.back();
9f95a23c 1363 if (l._raw == bp._raw && l.end() == bp.start() + off) {
7c673cae
FG
1364 // yay contiguous with tail bp!
1365 l.set_length(l.length()+len);
1366 _len += len;
1367 return;
1368 }
1369 }
1370 // add new item to list
11fdf7f2
TL
1371 _buffers.push_back(*ptr_node::create(bp, off, len).release());
1372 _len += len;
9f95a23c 1373 _num += 1;
7c673cae
FG
1374 }
1375
1376 void buffer::list::append(const list& bl)
1377 {
1378 _len += bl._len;
9f95a23c 1379 _num += bl._num;
11fdf7f2
TL
1380 for (const auto& node : bl._buffers) {
1381 _buffers.push_back(*ptr_node::create(node).release());
1382 }
7c673cae
FG
1383 }
1384
1385 void buffer::list::append(std::istream& in)
1386 {
1387 while (!in.eof()) {
1388 std::string s;
1389 getline(in, s);
1390 append(s.c_str(), s.length());
1391 if (s.length())
1392 append("\n", 1);
1393 }
1394 }
1395
11fdf7f2
TL
1396 buffer::list::contiguous_filler buffer::list::append_hole(const unsigned len)
1397 {
1398 _len += len;
1399
1400 if (unlikely(get_append_buffer_unused_tail_length() < len)) {
1401 // make a new append_buffer. fill out a complete page, factoring in
1402 // the raw_combined overhead.
1403 auto& new_back = refill_append_space(len);
1404 new_back.set_length(len);
1405 return { new_back.c_str() };
1406 } else if (unlikely(_carriage != &_buffers.back())) {
1407 auto bptr = ptr_node::create(*_carriage, _carriage->length(), 0);
1408 _carriage = bptr.get();
1409 _buffers.push_back(*bptr.release());
9f95a23c 1410 _num += 1;
11fdf7f2
TL
1411 }
1412 _carriage->set_length(_carriage->length() + len);
1413 return { _carriage->end_c_str() - len };
1414 }
1415
7c673cae
FG
1416 void buffer::list::prepend_zero(unsigned len)
1417 {
11fdf7f2
TL
1418 auto bp = ptr_node::create(len);
1419 bp->zero(false);
7c673cae 1420 _len += len;
9f95a23c 1421 _num += 1;
11fdf7f2 1422 _buffers.push_front(*bp.release());
7c673cae
FG
1423 }
1424
1425 void buffer::list::append_zero(unsigned len)
1426 {
11fdf7f2
TL
1427 _len += len;
1428
1429 const unsigned free_in_last = get_append_buffer_unused_tail_length();
1430 const unsigned first_round = std::min(len, free_in_last);
1431 if (first_round) {
1432 if (unlikely(_carriage != &_buffers.back())) {
1433 auto bptr = ptr_node::create(*_carriage, _carriage->length(), 0);
1434 _carriage = bptr.get();
1435 _buffers.push_back(*bptr.release());
9f95a23c 1436 _num += 1;
11fdf7f2
TL
1437 }
1438 _carriage->append_zeros(first_round);
1439 }
1440
1441 const unsigned second_round = len - first_round;
1442 if (second_round) {
1443 auto& new_back = refill_append_space(second_round);
1444 new_back.set_length(second_round);
1445 new_back.zero(false);
1446 }
7c673cae
FG
1447 }
1448
1449
1450 /*
1451 * get a char
1452 */
1453 const char& buffer::list::operator[](unsigned n) const
1454 {
1455 if (n >= _len)
1456 throw end_of_buffer();
1457
11fdf7f2
TL
1458 for (const auto& node : _buffers) {
1459 if (n >= node.length()) {
1460 n -= node.length();
7c673cae
FG
1461 continue;
1462 }
11fdf7f2 1463 return node[n];
7c673cae
FG
1464 }
1465 ceph_abort();
1466 }
1467
1468 /*
1469 * return a contiguous ptr to whole bufferlist contents.
1470 */
1471 char *buffer::list::c_str()
1472 {
20effc67
TL
1473 if (const auto len = length(); len == 0) {
1474 return nullptr; // no non-empty buffers
1475 } else if (len != _buffers.front().length()) {
7c673cae 1476 rebuild();
20effc67
TL
1477 } else {
1478 // there are two *main* scenarios that hit this branch:
1479 // 1. bufferlist with single, non-empty buffer;
1480 // 2. bufferlist with single, non-empty buffer followed by
1481 // empty buffer. splice() tries to not waste our appendable
1482 // space; to carry it an empty bptr is added at the end.
1483 // we account for these and don't rebuild unnecessarily
11fdf7f2 1484 }
20effc67 1485 return _buffers.front().c_str();
7c673cae
FG
1486 }
1487
1488 string buffer::list::to_str() const {
1489 string s;
1490 s.reserve(length());
11fdf7f2
TL
1491 for (const auto& node : _buffers) {
1492 if (node.length()) {
1493 s.append(node.c_str(), node.length());
7c673cae
FG
1494 }
1495 }
1496 return s;
1497 }
1498
7c673cae
FG
1499 void buffer::list::substr_of(const list& other, unsigned off, unsigned len)
1500 {
1501 if (off + len > other.length())
1502 throw end_of_buffer();
1503
1504 clear();
1505
1506 // skip off
11fdf7f2
TL
1507 auto curbuf = std::cbegin(other._buffers);
1508 while (off > 0 && off >= curbuf->length()) {
7c673cae
FG
1509 // skip this buffer
1510 //cout << "skipping over " << *curbuf << std::endl;
1511 off -= (*curbuf).length();
1512 ++curbuf;
1513 }
11fdf7f2 1514 ceph_assert(len == 0 || curbuf != std::cend(other._buffers));
7c673cae
FG
1515
1516 while (len > 0) {
1517 // partial?
1518 if (off + len < curbuf->length()) {
1519 //cout << "copying partial of " << *curbuf << std::endl;
f67539c2 1520 _buffers.push_back(*ptr_node::create(*curbuf, off, len).release());
7c673cae 1521 _len += len;
9f95a23c 1522 _num += 1;
7c673cae
FG
1523 break;
1524 }
1525
1526 // through end
1527 //cout << "copying end (all?) of " << *curbuf << std::endl;
1528 unsigned howmuch = curbuf->length() - off;
f67539c2 1529 _buffers.push_back(*ptr_node::create(*curbuf, off, howmuch).release());
7c673cae 1530 _len += howmuch;
9f95a23c 1531 _num += 1;
7c673cae
FG
1532 len -= howmuch;
1533 off = 0;
1534 ++curbuf;
1535 }
1536 }
1537
1538 // funky modifer
1539 void buffer::list::splice(unsigned off, unsigned len, list *claim_by /*, bufferlist& replace_with */)
1540 { // fixme?
1541 if (len == 0)
1542 return;
1543
1544 if (off >= length())
1545 throw end_of_buffer();
1546
11fdf7f2 1547 ceph_assert(len > 0);
7c673cae
FG
1548 //cout << "splice off " << off << " len " << len << " ... mylen = " << length() << std::endl;
1549
1550 // skip off
11fdf7f2
TL
1551 auto curbuf = std::begin(_buffers);
1552 auto curbuf_prev = _buffers.before_begin();
7c673cae 1553 while (off > 0) {
11fdf7f2 1554 ceph_assert(curbuf != std::end(_buffers));
7c673cae
FG
1555 if (off >= (*curbuf).length()) {
1556 // skip this buffer
1557 //cout << "off = " << off << " skipping over " << *curbuf << std::endl;
1558 off -= (*curbuf).length();
11fdf7f2 1559 curbuf_prev = curbuf++;
7c673cae
FG
1560 } else {
1561 // somewhere in this buffer!
1562 //cout << "off = " << off << " somewhere in " << *curbuf << std::endl;
1563 break;
1564 }
1565 }
1566
1567 if (off) {
f67539c2
TL
1568 // add a reference to the front bit, insert it before curbuf (which
1569 // we'll lose).
7c673cae 1570 //cout << "keeping front " << off << " of " << *curbuf << std::endl;
11fdf7f2
TL
1571 _buffers.insert_after(curbuf_prev,
1572 *ptr_node::create(*curbuf, 0, off).release());
7c673cae 1573 _len += off;
9f95a23c 1574 _num += 1;
11fdf7f2 1575 ++curbuf_prev;
7c673cae
FG
1576 }
1577
1578 while (len > 0) {
f67539c2
TL
1579 // partial or the last (appendable) one?
1580 if (const auto to_drop = off + len; to_drop < curbuf->length()) {
7c673cae
FG
1581 //cout << "keeping end of " << *curbuf << ", losing first " << off+len << std::endl;
1582 if (claim_by)
f67539c2
TL
1583 claim_by->append(*curbuf, off, len);
1584 curbuf->set_offset(to_drop + curbuf->offset()); // ignore beginning big
1585 curbuf->set_length(curbuf->length() - to_drop);
1586 _len -= to_drop;
7c673cae
FG
1587 //cout << " now " << *curbuf << std::endl;
1588 break;
1589 }
f67539c2 1590
7c673cae 1591 // hose though the end
f67539c2 1592 unsigned howmuch = curbuf->length() - off;
7c673cae
FG
1593 //cout << "discarding " << howmuch << " of " << *curbuf << std::endl;
1594 if (claim_by)
f67539c2
TL
1595 claim_by->append(*curbuf, off, howmuch);
1596 _len -= curbuf->length();
1597 if (curbuf == _carriage) {
1598 // no need to reallocate, shrinking and relinking is enough.
1599 curbuf = _buffers.erase_after(curbuf_prev);
1600 _carriage->set_offset(_carriage->offset() + _carriage->length());
1601 _carriage->set_length(0);
1602 _buffers.push_back(*_carriage);
1603 } else {
1604 curbuf = _buffers.erase_after_and_dispose(curbuf_prev);
1605 _num -= 1;
1606 }
7c673cae
FG
1607 len -= howmuch;
1608 off = 0;
1609 }
f67539c2 1610
7c673cae 1611 // splice in *replace (implement me later?)
7c673cae
FG
1612 }
1613
1614 void buffer::list::write(int off, int len, std::ostream& out) const
1615 {
1616 list s;
1617 s.substr_of(*this, off, len);
11fdf7f2
TL
1618 for (const auto& node : s._buffers) {
1619 if (node.length()) {
1620 out.write(node.c_str(), node.length());
1621 }
1622 }
7c673cae
FG
1623 }
1624
1625void buffer::list::encode_base64(buffer::list& o)
1626{
1627 bufferptr bp(length() * 4 / 3 + 3);
1628 int l = ceph_armor(bp.c_str(), bp.c_str() + bp.length(), c_str(), c_str() + length());
1629 bp.set_length(l);
1630 o.push_back(std::move(bp));
1631}
1632
1633void buffer::list::decode_base64(buffer::list& e)
1634{
1635 bufferptr bp(4 + ((e.length() * 3) / 4));
1636 int l = ceph_unarmor(bp.c_str(), bp.c_str() + bp.length(), e.c_str(), e.c_str() + e.length());
1637 if (l < 0) {
1638 std::ostringstream oss;
1639 oss << "decode_base64: decoding failed:\n";
1640 hexdump(oss);
1641 throw buffer::malformed_input(oss.str().c_str());
1642 }
11fdf7f2 1643 ceph_assert(l <= (int)bp.length());
7c673cae
FG
1644 bp.set_length(l);
1645 push_back(std::move(bp));
1646}
1647
9f95a23c
TL
1648ssize_t buffer::list::pread_file(const char *fn, uint64_t off, uint64_t len, std::string *error)
1649{
f67539c2 1650 int fd = TEMP_FAILURE_RETRY(::open(fn, O_RDONLY|O_CLOEXEC|O_BINARY));
9f95a23c
TL
1651 if (fd < 0) {
1652 int err = errno;
1653 std::ostringstream oss;
1654 oss << "can't open " << fn << ": " << cpp_strerror(err);
1655 *error = oss.str();
1656 return -err;
1657 }
1658
1659 struct stat st;
1660 // FIPS zeroization audit 20191115: this memset is not security related.
1661 memset(&st, 0, sizeof(st));
1662 if (::fstat(fd, &st) < 0) {
1663 int err = errno;
1664 std::ostringstream oss;
1665 oss << "bufferlist::read_file(" << fn << "): stat error: "
1666 << cpp_strerror(err);
1667 *error = oss.str();
1668 VOID_TEMP_FAILURE_RETRY(::close(fd));
1669 return -err;
1670 }
1671
1672 if (off > (uint64_t)st.st_size) {
1673 std::ostringstream oss;
1674 oss << "bufferlist::read_file(" << fn << "): read error: size < offset";
1675 *error = oss.str();
1676 VOID_TEMP_FAILURE_RETRY(::close(fd));
1677 return 0;
1678 }
1679
1680 if (len > st.st_size - off) {
1681 len = st.st_size - off;
1682 }
1683 ssize_t ret = lseek64(fd, off, SEEK_SET);
1684 if (ret != (ssize_t)off) {
1685 return -errno;
1686 }
1687
1688 ret = read_fd(fd, len);
1689 if (ret < 0) {
1690 std::ostringstream oss;
1691 oss << "bufferlist::read_file(" << fn << "): read error:"
1692 << cpp_strerror(ret);
1693 *error = oss.str();
1694 VOID_TEMP_FAILURE_RETRY(::close(fd));
1695 return ret;
1696 } else if (ret != (ssize_t)len) {
1697 // Premature EOF.
1698 // Perhaps the file changed between stat() and read()?
1699 std::ostringstream oss;
1700 oss << "bufferlist::read_file(" << fn << "): warning: got premature EOF.";
1701 *error = oss.str();
1702 // not actually an error, but weird
1703 }
1704 VOID_TEMP_FAILURE_RETRY(::close(fd));
1705 return 0;
1706}
7c673cae
FG
1707
1708int buffer::list::read_file(const char *fn, std::string *error)
1709{
f67539c2 1710 int fd = TEMP_FAILURE_RETRY(::open(fn, O_RDONLY|O_CLOEXEC|O_BINARY));
7c673cae
FG
1711 if (fd < 0) {
1712 int err = errno;
1713 std::ostringstream oss;
1714 oss << "can't open " << fn << ": " << cpp_strerror(err);
1715 *error = oss.str();
1716 return -err;
1717 }
1718
1719 struct stat st;
92f5a8d4 1720 // FIPS zeroization audit 20191115: this memset is not security related.
7c673cae
FG
1721 memset(&st, 0, sizeof(st));
1722 if (::fstat(fd, &st) < 0) {
1723 int err = errno;
1724 std::ostringstream oss;
1725 oss << "bufferlist::read_file(" << fn << "): stat error: "
1726 << cpp_strerror(err);
1727 *error = oss.str();
1728 VOID_TEMP_FAILURE_RETRY(::close(fd));
1729 return -err;
1730 }
1731
1732 ssize_t ret = read_fd(fd, st.st_size);
1733 if (ret < 0) {
1734 std::ostringstream oss;
1735 oss << "bufferlist::read_file(" << fn << "): read error:"
1736 << cpp_strerror(ret);
1737 *error = oss.str();
1738 VOID_TEMP_FAILURE_RETRY(::close(fd));
1739 return ret;
1740 }
1741 else if (ret != st.st_size) {
1742 // Premature EOF.
1743 // Perhaps the file changed between stat() and read()?
1744 std::ostringstream oss;
1745 oss << "bufferlist::read_file(" << fn << "): warning: got premature EOF.";
1746 *error = oss.str();
1747 // not actually an error, but weird
1748 }
1749 VOID_TEMP_FAILURE_RETRY(::close(fd));
1750 return 0;
1751}
1752
1753ssize_t buffer::list::read_fd(int fd, size_t len)
1754{
11fdf7f2
TL
1755 auto bp = ptr_node::create(buffer::create(len));
1756 ssize_t ret = safe_read(fd, (void*)bp->c_str(), len);
7c673cae 1757 if (ret >= 0) {
11fdf7f2
TL
1758 bp->set_length(ret);
1759 push_back(std::move(bp));
7c673cae
FG
1760 }
1761 return ret;
1762}
1763
f67539c2
TL
1764ssize_t buffer::list::recv_fd(int fd, size_t len)
1765{
1766 auto bp = ptr_node::create(buffer::create(len));
1767 ssize_t ret = safe_recv(fd, (void*)bp->c_str(), len);
1768 if (ret >= 0) {
1769 bp->set_length(ret);
1770 push_back(std::move(bp));
1771 }
1772 return ret;
1773}
1774
7c673cae
FG
1775int buffer::list::write_file(const char *fn, int mode)
1776{
f67539c2 1777 int fd = TEMP_FAILURE_RETRY(::open(fn, O_WRONLY|O_CREAT|O_TRUNC|O_CLOEXEC|O_BINARY, mode));
7c673cae
FG
1778 if (fd < 0) {
1779 int err = errno;
1780 cerr << "bufferlist::write_file(" << fn << "): failed to open file: "
1781 << cpp_strerror(err) << std::endl;
1782 return -err;
1783 }
1784 int ret = write_fd(fd);
1785 if (ret) {
1786 cerr << "bufferlist::write_fd(" << fn << "): write_fd error: "
1787 << cpp_strerror(ret) << std::endl;
1788 VOID_TEMP_FAILURE_RETRY(::close(fd));
1789 return ret;
1790 }
1791 if (TEMP_FAILURE_RETRY(::close(fd))) {
1792 int err = errno;
1793 cerr << "bufferlist::write_file(" << fn << "): close error: "
1794 << cpp_strerror(err) << std::endl;
1795 return -err;
1796 }
1797 return 0;
1798}
1799
1800static int do_writev(int fd, struct iovec *vec, uint64_t offset, unsigned veclen, unsigned bytes)
1801{
7c673cae 1802 while (bytes > 0) {
11fdf7f2 1803 ssize_t r = 0;
7c673cae
FG
1804#ifdef HAVE_PWRITEV
1805 r = ::pwritev(fd, vec, veclen, offset);
1806#else
1807 r = ::lseek64(fd, offset, SEEK_SET);
1808 if (r != offset) {
11fdf7f2 1809 return -errno;
7c673cae
FG
1810 }
1811 r = ::writev(fd, vec, veclen);
1812#endif
1813 if (r < 0) {
1814 if (errno == EINTR)
1815 continue;
1816 return -errno;
1817 }
1818
1819 bytes -= r;
1820 offset += r;
1821 if (bytes == 0) break;
1822
1823 while (r > 0) {
1824 if (vec[0].iov_len <= (size_t)r) {
1825 // drain this whole item
1826 r -= vec[0].iov_len;
1827 ++vec;
1828 --veclen;
1829 } else {
1830 vec[0].iov_base = (char *)vec[0].iov_base + r;
1831 vec[0].iov_len -= r;
1832 break;
1833 }
1834 }
1835 }
1836 return 0;
1837}
1838
f67539c2 1839#ifndef _WIN32
7c673cae
FG
1840int buffer::list::write_fd(int fd) const
1841{
7c673cae
FG
1842 // use writev!
1843 iovec iov[IOV_MAX];
1844 int iovlen = 0;
1845 ssize_t bytes = 0;
1846
11fdf7f2
TL
1847 auto p = std::cbegin(_buffers);
1848 while (p != std::cend(_buffers)) {
7c673cae
FG
1849 if (p->length() > 0) {
1850 iov[iovlen].iov_base = (void *)p->c_str();
1851 iov[iovlen].iov_len = p->length();
1852 bytes += p->length();
1853 iovlen++;
1854 }
1855 ++p;
1856
d2e6a577 1857 if (iovlen == IOV_MAX ||
7c673cae
FG
1858 p == _buffers.end()) {
1859 iovec *start = iov;
1860 int num = iovlen;
1861 ssize_t wrote;
1862 retry:
1863 wrote = ::writev(fd, start, num);
1864 if (wrote < 0) {
1865 int err = errno;
1866 if (err == EINTR)
1867 goto retry;
1868 return -err;
1869 }
1870 if (wrote < bytes) {
1871 // partial write, recover!
1872 while ((size_t)wrote >= start[0].iov_len) {
1873 wrote -= start[0].iov_len;
1874 bytes -= start[0].iov_len;
1875 start++;
1876 num--;
1877 }
1878 if (wrote > 0) {
1879 start[0].iov_len -= wrote;
1880 start[0].iov_base = (char *)start[0].iov_base + wrote;
1881 bytes -= wrote;
1882 }
1883 goto retry;
1884 }
1885 iovlen = 0;
1886 bytes = 0;
1887 }
1888 }
1889 return 0;
1890}
1891
f67539c2
TL
1892int buffer::list::send_fd(int fd) const {
1893 return buffer::list::write_fd(fd);
1894}
1895
7c673cae
FG
1896int buffer::list::write_fd(int fd, uint64_t offset) const
1897{
1898 iovec iov[IOV_MAX];
1899
11fdf7f2 1900 auto p = std::cbegin(_buffers);
9f95a23c 1901 uint64_t left_pbrs = get_num_buffers();
7c673cae
FG
1902 while (left_pbrs) {
1903 ssize_t bytes = 0;
1904 unsigned iovlen = 0;
11fdf7f2 1905 uint64_t size = std::min<uint64_t>(left_pbrs, IOV_MAX);
7c673cae
FG
1906 left_pbrs -= size;
1907 while (size > 0) {
1908 iov[iovlen].iov_base = (void *)p->c_str();
1909 iov[iovlen].iov_len = p->length();
1910 iovlen++;
1911 bytes += p->length();
1912 ++p;
1913 size--;
1914 }
1915
1916 int r = do_writev(fd, iov, offset, iovlen, bytes);
1917 if (r < 0)
1918 return r;
1919 offset += bytes;
1920 }
1921 return 0;
1922}
f67539c2
TL
1923#else
1924int buffer::list::write_fd(int fd) const
1925{
1926 // There's no writev on Windows. WriteFileGather may be an option,
1927 // but it has strict requirements in terms of buffer size and alignment.
1928 auto p = std::cbegin(_buffers);
1929 uint64_t left_pbrs = get_num_buffers();
1930 while (left_pbrs) {
1931 int written = 0;
1932 while (written < p->length()) {
1933 int r = ::write(fd, p->c_str(), p->length() - written);
1934 if (r < 0)
1935 return -errno;
1936
1937 written += r;
1938 }
1939
1940 left_pbrs--;
1941 p++;
1942 }
1943
1944 return 0;
1945}
1946
1947int buffer::list::send_fd(int fd) const
1948{
1949 // There's no writev on Windows. WriteFileGather may be an option,
1950 // but it has strict requirements in terms of buffer size and alignment.
1951 auto p = std::cbegin(_buffers);
1952 uint64_t left_pbrs = get_num_buffers();
1953 while (left_pbrs) {
1954 int written = 0;
1955 while (written < p->length()) {
1956 int r = ::send(fd, p->c_str(), p->length() - written, 0);
1957 if (r < 0)
1958 return -ceph_sock_errno();
1959
1960 written += r;
1961 }
1962
1963 left_pbrs--;
1964 p++;
1965 }
1966
1967 return 0;
1968}
1969
1970int buffer::list::write_fd(int fd, uint64_t offset) const
1971{
1972 int r = ::lseek64(fd, offset, SEEK_SET);
1973 if (r != offset)
1974 return -errno;
1975
1976 return write_fd(fd);
1977}
1978#endif
7c673cae 1979
20effc67
TL
1980buffer::list::iov_vec_t buffer::list::prepare_iovs() const
1981{
1982 size_t index = 0;
1983 uint64_t off = 0;
1984 iov_vec_t iovs{_num / IOV_MAX + 1};
1985 auto it = iovs.begin();
1986 for (auto& bp : _buffers) {
1987 if (index == 0) {
1988 it->offset = off;
1989 it->length = 0;
1990 size_t nr_iov_created = std::distance(iovs.begin(), it);
1991 it->iov.resize(
1992 std::min(_num - IOV_MAX * nr_iov_created, (size_t)IOV_MAX));
1993 }
1994 it->iov[index].iov_base = (void*)bp.c_str();
1995 it->iov[index].iov_len = bp.length();
1996 off += bp.length();
1997 it->length += bp.length();
1998 if (++index == IOV_MAX) {
1999 // continue with a new vector<iov> if we have more buf
2000 ++it;
2001 index = 0;
2002 }
2003 }
2004 return iovs;
2005}
2006
7c673cae
FG
2007__u32 buffer::list::crc32c(__u32 crc) const
2008{
11fdf7f2
TL
2009 int cache_misses = 0;
2010 int cache_hits = 0;
2011 int cache_adjusts = 0;
2012
2013 for (const auto& node : _buffers) {
2014 if (node.length()) {
9f95a23c 2015 raw* const r = node._raw;
11fdf7f2 2016 pair<size_t, size_t> ofs(node.offset(), node.offset() + node.length());
7c673cae
FG
2017 pair<uint32_t, uint32_t> ccrc;
2018 if (r->get_crc(ofs, &ccrc)) {
2019 if (ccrc.first == crc) {
2020 // got it already
2021 crc = ccrc.second;
11fdf7f2 2022 cache_hits++;
7c673cae
FG
2023 } else {
2024 /* If we have cached crc32c(buf, v) for initial value v,
2025 * we can convert this to a different initial value v' by:
2026 * crc32c(buf, v') = crc32c(buf, v) ^ adjustment
2027 * where adjustment = crc32c(0*len(buf), v ^ v')
2028 *
2029 * http://crcutil.googlecode.com/files/crc-doc.1.0.pdf
2030 * note, u for our crc32c implementation is 0
2031 */
11fdf7f2
TL
2032 crc = ccrc.second ^ ceph_crc32c(ccrc.first ^ crc, NULL, node.length());
2033 cache_adjusts++;
7c673cae
FG
2034 }
2035 } else {
11fdf7f2 2036 cache_misses++;
7c673cae 2037 uint32_t base = crc;
11fdf7f2 2038 crc = ceph_crc32c(crc, (unsigned char*)node.c_str(), node.length());
7c673cae
FG
2039 r->set_crc(ofs, make_pair(base, crc));
2040 }
2041 }
2042 }
11fdf7f2
TL
2043
2044 if (buffer_track_crc) {
2045 if (cache_adjusts)
2046 buffer_cached_crc_adjusted += cache_adjusts;
2047 if (cache_hits)
2048 buffer_cached_crc += cache_hits;
2049 if (cache_misses)
2050 buffer_missed_crc += cache_misses;
2051 }
2052
7c673cae
FG
2053 return crc;
2054}
2055
2056void buffer::list::invalidate_crc()
2057{
11fdf7f2 2058 for (const auto& node : _buffers) {
9f95a23c
TL
2059 if (node._raw) {
2060 node._raw->invalidate_crc();
7c673cae
FG
2061 }
2062 }
2063}
2064
2065/**
2066 * Binary write all contents to a C++ stream
2067 */
2068void buffer::list::write_stream(std::ostream &out) const
2069{
11fdf7f2
TL
2070 for (const auto& node : _buffers) {
2071 if (node.length() > 0) {
2072 out.write(node.c_str(), node.length());
7c673cae
FG
2073 }
2074 }
2075}
2076
2077
2078void buffer::list::hexdump(std::ostream &out, bool trailing_newline) const
2079{
2080 if (!length())
2081 return;
2082
2083 std::ios_base::fmtflags original_flags = out.flags();
2084
2085 // do our best to match the output of hexdump -C, for better
2086 // diff'ing!
2087
2088 out.setf(std::ios::right);
2089 out.fill('0');
2090
2091 unsigned per = 16;
9f95a23c
TL
2092 char last_row_char = '\0';
2093 bool was_same = false, did_star = false;
7c673cae 2094 for (unsigned o=0; o<length(); o += per) {
9f95a23c
TL
2095 if (o == 0) {
2096 last_row_char = (*this)[o];
2097 }
2098
7c673cae 2099 if (o + per < length()) {
9f95a23c 2100 bool row_is_same = true;
7c673cae 2101 for (unsigned i=0; i<per && o+i<length(); i++) {
9f95a23c
TL
2102 char current_char = (*this)[o+i];
2103 if (current_char != last_row_char) {
2104 if (i == 0) {
2105 last_row_char = current_char;
2106 was_same = false;
2107 did_star = false;
2108 } else {
2109 row_is_same = false;
2110 }
7c673cae
FG
2111 }
2112 }
9f95a23c
TL
2113 if (row_is_same) {
2114 if (was_same) {
7c673cae
FG
2115 if (!did_star) {
2116 out << "\n*";
2117 did_star = true;
2118 }
2119 continue;
2120 }
9f95a23c 2121 was_same = true;
7c673cae 2122 } else {
9f95a23c 2123 was_same = false;
7c673cae
FG
2124 did_star = false;
2125 }
2126 }
2127 if (o)
2128 out << "\n";
2129 out << std::hex << std::setw(8) << o << " ";
2130
2131 unsigned i;
2132 for (i=0; i<per && o+i<length(); i++) {
2133 if (i == 8)
2134 out << ' ';
2135 out << " " << std::setw(2) << ((unsigned)(*this)[o+i] & 0xff);
2136 }
2137 for (; i<per; i++) {
2138 if (i == 8)
2139 out << ' ';
2140 out << " ";
2141 }
2142
2143 out << " |";
2144 for (i=0; i<per && o+i<length(); i++) {
2145 char c = (*this)[o+i];
2146 if (isupper(c) || islower(c) || isdigit(c) || c == ' ' || ispunct(c))
2147 out << c;
2148 else
2149 out << '.';
2150 }
2151 out << '|' << std::dec;
2152 }
2153 if (trailing_newline) {
2154 out << "\n" << std::hex << std::setw(8) << length();
2155 out << "\n";
2156 }
2157
2158 out.flags(original_flags);
2159}
2160
31f18b77
FG
2161
2162buffer::list buffer::list::static_from_mem(char* c, size_t l) {
2163 list bl;
11fdf7f2 2164 bl.push_back(ptr_node::create(create_static(l, c)));
31f18b77
FG
2165 return bl;
2166}
2167
2168buffer::list buffer::list::static_from_cstring(char* c) {
2169 return static_from_mem(c, std::strlen(c));
2170}
2171
2172buffer::list buffer::list::static_from_string(string& s) {
2173 // C++14 just has string::data return a char* from a non-const
2174 // string.
2175 return static_from_mem(const_cast<char*>(s.data()), s.length());
2176 // But the way buffer::list mostly doesn't work in a sane way with
2177 // const makes me generally sad.
2178}
2179
f67539c2
TL
2180// buffer::raw is not a standard layout type.
2181#define BUF_OFFSETOF(type, field) \
2182 (reinterpret_cast<std::uintptr_t>(&(((type*)1024)->field)) - 1024u)
2183
11fdf7f2
TL
2184bool buffer::ptr_node::dispose_if_hypercombined(
2185 buffer::ptr_node* const delete_this)
2186{
f67539c2
TL
2187 // in case _raw is nullptr
2188 const std::uintptr_t bptr =
2189 (reinterpret_cast<std::uintptr_t>(delete_this->_raw) +
2190 BUF_OFFSETOF(buffer::raw, bptr_storage));
2191 const bool is_hypercombined =
2192 reinterpret_cast<std::uintptr_t>(delete_this) == bptr;
11fdf7f2
TL
2193 if (is_hypercombined) {
2194 ceph_assert_always("hypercombining is currently disabled" == nullptr);
2195 delete_this->~ptr_node();
f67539c2
TL
2196 return true;
2197 } else {
2198 return false;
11fdf7f2 2199 }
11fdf7f2
TL
2200}
2201
2202std::unique_ptr<buffer::ptr_node, buffer::ptr_node::disposer>
2203buffer::ptr_node::create_hypercombined(ceph::unique_leakable_ptr<buffer::raw> r)
2204{
2205 // FIXME: we don't currently hypercombine buffers due to crashes
2206 // observed in the rados suite. After fixing we'll use placement
2207 // new to create ptr_node on buffer::raw::bptr_storage.
2208 return std::unique_ptr<buffer::ptr_node, buffer::ptr_node::disposer>(
2209 new ptr_node(std::move(r)));
2210}
2211
11fdf7f2
TL
2212buffer::ptr_node* buffer::ptr_node::cloner::operator()(
2213 const buffer::ptr_node& clone_this)
2214{
9f95a23c 2215 return new ptr_node(clone_this);
11fdf7f2
TL
2216}
2217
7c673cae 2218std::ostream& buffer::operator<<(std::ostream& out, const buffer::raw &r) {
f67539c2
TL
2219 return out << "buffer::raw("
2220 << (void*)r.get_data() << " len " << r.get_len()
2221 << " nref " << r.nref.load() << ")";
7c673cae
FG
2222}
2223
2224std::ostream& buffer::operator<<(std::ostream& out, const buffer::ptr& bp) {
2225 if (bp.have_raw())
2226 out << "buffer::ptr(" << bp.offset() << "~" << bp.length()
2227 << " " << (void*)bp.c_str()
2228 << " in raw " << (void*)bp.raw_c_str()
2229 << " len " << bp.raw_length()
2230 << " nref " << bp.raw_nref() << ")";
2231 else
2232 out << "buffer:ptr(" << bp.offset() << "~" << bp.length() << " no raw)";
2233 return out;
2234}
2235
2236std::ostream& buffer::operator<<(std::ostream& out, const buffer::list& bl) {
f67539c2 2237 out << "buffer::list(len=" << bl.length() << ",\n";
7c673cae 2238
11fdf7f2
TL
2239 for (const auto& node : bl.buffers()) {
2240 out << "\t" << node;
2241 if (&node != &bl.buffers().back()) {
f67539c2 2242 out << ",\n";
11fdf7f2 2243 }
7c673cae 2244 }
f67539c2 2245 out << "\n)";
7c673cae
FG
2246 return out;
2247}
2248
7c673cae
FG
2249MEMPOOL_DEFINE_OBJECT_FACTORY(buffer::raw_malloc, buffer_raw_malloc,
2250 buffer_meta);
7c673cae
FG
2251MEMPOOL_DEFINE_OBJECT_FACTORY(buffer::raw_posix_aligned,
2252 buffer_raw_posix_aligned, buffer_meta);
31f18b77
FG
2253MEMPOOL_DEFINE_OBJECT_FACTORY(buffer::raw_claimed_char, buffer_raw_claimed_char,
2254 buffer_meta);
7c673cae
FG
2255MEMPOOL_DEFINE_OBJECT_FACTORY(buffer::raw_static, buffer_raw_static,
2256 buffer_meta);
2257
f67539c2
TL
2258
2259void ceph::buffer::list::page_aligned_appender::_refill(size_t len) {
20effc67
TL
2260 const unsigned alloc =
2261 std::max(min_alloc,
2262 shift_round_up(static_cast<unsigned>(len),
2263 static_cast<unsigned>(CEPH_PAGE_SHIFT)));
f67539c2
TL
2264 auto new_back = \
2265 ptr_node::create(buffer::create_page_aligned(alloc));
2266 new_back->set_length(0); // unused, so far.
2267 bl.push_back(std::move(new_back));
2268}
2269
2270namespace ceph::buffer {
2271inline namespace v15_2_0 {
2272
2273#pragma GCC diagnostic push
2274#pragma GCC diagnostic ignored "-Wnon-virtual-dtor"
2275#pragma clang diagnostic push
2276#pragma clang diagnostic ignored "-Wnon-virtual-dtor"
2277class buffer_error_category : public ceph::converting_category {
2278public:
2279 buffer_error_category(){}
2280 const char* name() const noexcept override;
2281 const char* message(int ev, char*, std::size_t) const noexcept override;
2282 std::string message(int ev) const override;
2283 boost::system::error_condition default_error_condition(int ev) const noexcept
2284 override;
2285 using ceph::converting_category::equivalent;
2286 bool equivalent(int ev, const boost::system::error_condition& c) const
2287 noexcept override;
2288 int from_code(int ev) const noexcept override;
2289};
2290#pragma GCC diagnostic pop
2291#pragma clang diagnostic pop
2292
2293const char* buffer_error_category::name() const noexcept {
2294 return "buffer";
2295}
2296
2297const char*
2298buffer_error_category::message(int ev, char*, std::size_t) const noexcept {
2299 using ceph::buffer::errc;
2300 if (ev == 0)
2301 return "No error";
2302
2303 switch (static_cast<errc>(ev)) {
2304 case errc::bad_alloc:
2305 return "Bad allocation";
2306
2307 case errc::end_of_buffer:
2308 return "End of buffer";
2309
2310 case errc::malformed_input:
2311 return "Malformed input";
2312 }
2313
2314 return "Unknown error";
2315}
2316
2317std::string buffer_error_category::message(int ev) const {
2318 return message(ev, nullptr, 0);
2319}
2320
2321boost::system::error_condition
2322buffer_error_category::default_error_condition(int ev)const noexcept {
2323 using ceph::buffer::errc;
2324 switch (static_cast<errc>(ev)) {
2325 case errc::bad_alloc:
2326 return boost::system::errc::not_enough_memory;
2327 case errc::end_of_buffer:
2328 case errc::malformed_input:
2329 return boost::system::errc::io_error;
2330 }
2331 return { ev, *this };
2332}
2333
2334bool buffer_error_category::equivalent(int ev, const boost::system::error_condition& c) const noexcept {
2335 return default_error_condition(ev) == c;
2336}
2337
2338int buffer_error_category::from_code(int ev) const noexcept {
2339 using ceph::buffer::errc;
2340 switch (static_cast<errc>(ev)) {
2341 case errc::bad_alloc:
2342 return -ENOMEM;
2343
2344 case errc::end_of_buffer:
2345 return -EIO;
2346
2347 case errc::malformed_input:
2348 return -EIO;
2349 }
2350 return -EDOM;
2351}
2352
2353const boost::system::error_category& buffer_category() noexcept {
2354 static const buffer_error_category c;
2355 return c;
2356}
2357}
2358}