]> git.proxmox.com Git - ceph.git/blame - ceph/src/common/buffer.cc
update sources to v12.1.0
[ceph.git] / ceph / src / common / buffer.cc
CommitLineData
7c673cae
FG
1// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2// vim: ts=8 sw=2 smarttab
3/*
4 * Ceph - scalable distributed file system
5 *
6 * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
7 *
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
12 *
13 */
14
31f18b77
FG
15#include <atomic>
16#include <errno.h>
17#include <limits.h>
18
19#include <sys/uio.h>
20
7c673cae
FG
21#include "include/compat.h"
22#include "include/mempool.h"
23#include "armor.h"
24#include "common/environment.h"
25#include "common/errno.h"
26#include "common/safe_io.h"
27#include "common/simple_spin.h"
28#include "common/strtol.h"
29#include "common/likely.h"
30#include "common/valgrind.h"
31#include "common/deleter.h"
7c673cae
FG
32#include "common/RWLock.h"
33#include "include/types.h"
7c673cae 34#include "include/scope_guard.h"
31f18b77 35
7c673cae
FG
36#if defined(HAVE_XIO)
37#include "msg/xio/XioMsg.h"
38#endif
39
31f18b77 40using namespace ceph;
7c673cae
FG
41
42#define CEPH_BUFFER_ALLOC_UNIT (MIN(CEPH_PAGE_SIZE, 4096))
43#define CEPH_BUFFER_APPEND_SIZE (CEPH_BUFFER_ALLOC_UNIT - sizeof(raw_combined))
44
45#ifdef BUFFER_DEBUG
46static std::atomic_flag buffer_debug_lock = ATOMIC_FLAG_INIT;
47# define bdout { simple_spin_lock(&buffer_debug_lock); std::cout
48# define bendl std::endl; simple_spin_unlock(&buffer_debug_lock); }
49#else
50# define bdout if (0) { std::cout
51# define bendl std::endl; }
52#endif
53
31f18b77
FG
54 static std::atomic<uint64_t> buffer_total_alloc { 0 };
55 static std::atomic<uint64_t> buffer_history_alloc_bytes { 0 };
56 static std::atomic<uint64_t> buffer_history_alloc_num { 0 };
57
7c673cae
FG
58 const bool buffer_track_alloc = get_env_bool("CEPH_BUFFER_TRACK");
59
60 namespace {
61 void inc_total_alloc(unsigned len) {
62 if (buffer_track_alloc)
31f18b77 63 buffer_total_alloc += len;
7c673cae
FG
64 }
65
66 void dec_total_alloc(unsigned len) {
67 if (buffer_track_alloc)
31f18b77 68 buffer_total_alloc -= len;
7c673cae
FG
69 }
70
71 void inc_history_alloc(uint64_t len) {
72 if (buffer_track_alloc) {
31f18b77
FG
73 buffer_history_alloc_bytes += len;
74 buffer_history_alloc_num++;
7c673cae
FG
75 }
76 }
31f18b77 77 } // namespace
7c673cae
FG
78
79 int buffer::get_total_alloc() {
31f18b77 80 return buffer_total_alloc;
7c673cae
FG
81 }
82 uint64_t buffer::get_history_alloc_bytes() {
31f18b77 83 return buffer_history_alloc_bytes;
7c673cae
FG
84 }
85 uint64_t buffer::get_history_alloc_num() {
31f18b77 86 return buffer_history_alloc_num;
7c673cae
FG
87 }
88
31f18b77
FG
89 static std::atomic<unsigned> buffer_cached_crc { 0 };
90 static std::atomic<unsigned> buffer_cached_crc_adjusted { 0 };
91 static std::atomic<unsigned> buffer_missed_crc { 0 };
92
7c673cae
FG
93 static bool buffer_track_crc = get_env_bool("CEPH_BUFFER_TRACK");
94
95 void buffer::track_cached_crc(bool b) {
96 buffer_track_crc = b;
97 }
98 int buffer::get_cached_crc() {
31f18b77 99 return buffer_cached_crc;
7c673cae
FG
100 }
101 int buffer::get_cached_crc_adjusted() {
31f18b77 102 return buffer_cached_crc_adjusted;
7c673cae
FG
103 }
104
105 int buffer::get_missed_crc() {
31f18b77 106 return buffer_missed_crc;
7c673cae
FG
107 }
108
31f18b77
FG
109 static std::atomic<unsigned> buffer_c_str_accesses { 0 };
110
7c673cae
FG
111 static bool buffer_track_c_str = get_env_bool("CEPH_BUFFER_TRACK");
112
113 void buffer::track_c_str(bool b) {
114 buffer_track_c_str = b;
115 }
116 int buffer::get_c_str_accesses() {
31f18b77 117 return buffer_c_str_accesses;
7c673cae
FG
118 }
119
31f18b77 120 static std::atomic<unsigned> buffer_max_pipe_size { 0 };
7c673cae
FG
121 int update_max_pipe_size() {
122#ifdef CEPH_HAVE_SETPIPE_SZ
123 char buf[32];
124 int r;
125 std::string err;
126 struct stat stat_result;
127 if (::stat(PROCPREFIX "/proc/sys/fs/pipe-max-size", &stat_result) == -1)
128 return -errno;
129 r = safe_read_file(PROCPREFIX "/proc/sys/fs/", "pipe-max-size",
130 buf, sizeof(buf) - 1);
131 if (r < 0)
132 return r;
133 buf[r] = '\0';
134 size_t size = strict_strtol(buf, 10, &err);
135 if (!err.empty())
136 return -EIO;
31f18b77 137 buffer_max_pipe_size = size;
7c673cae
FG
138#endif
139 return 0;
140 }
141
142 size_t get_max_pipe_size() {
143#ifdef CEPH_HAVE_SETPIPE_SZ
31f18b77 144 size_t size = buffer_max_pipe_size;
7c673cae
FG
145 if (size)
146 return size;
147 if (update_max_pipe_size() == 0)
31f18b77 148 return buffer_max_pipe_size;
7c673cae
FG
149#endif
150 // this is the max size hardcoded in linux before 2.6.35
151 return 65536;
152 }
153
154 const char * buffer::error::what() const throw () {
155 return "buffer::exception";
156 }
157 const char * buffer::bad_alloc::what() const throw () {
158 return "buffer::bad_alloc";
159 }
160 const char * buffer::end_of_buffer::what() const throw () {
161 return "buffer::end_of_buffer";
162 }
163 const char * buffer::malformed_input::what() const throw () {
164 return buf;
165 }
166 buffer::error_code::error_code(int error) :
167 buffer::malformed_input(cpp_strerror(error).c_str()), code(error) {}
168
169 class buffer::raw {
170 public:
171 char *data;
172 unsigned len;
31f18b77
FG
173 std::atomic<unsigned> nref { 0 };
174 int mempool = mempool::mempool_buffer_anon;
7c673cae
FG
175
176 mutable std::atomic_flag crc_spinlock = ATOMIC_FLAG_INIT;
177 map<pair<size_t, size_t>, pair<uint32_t, uint32_t> > crc_map;
178
179 explicit raw(unsigned l)
31f18b77
FG
180 : data(NULL), len(l), nref(0) {
181 mempool::get_pool(mempool::pool_index_t(mempool)).adjust_count(1, len);
182 }
7c673cae 183 raw(char *c, unsigned l)
31f18b77
FG
184 : data(c), len(l), nref(0) {
185 mempool::get_pool(mempool::pool_index_t(mempool)).adjust_count(1, len);
186 }
187 virtual ~raw() {
188 mempool::get_pool(mempool::pool_index_t(mempool)).adjust_count(
189 -1, -(int)len);
190 }
191
192 void _set_len(unsigned l) {
193 mempool::get_pool(mempool::pool_index_t(mempool)).adjust_count(
194 -1, -(int)len);
195 len = l;
196 mempool::get_pool(mempool::pool_index_t(mempool)).adjust_count(1, len);
197 }
198
199 void reassign_to_mempool(int pool) {
200 if (pool == mempool) {
201 return;
202 }
203 mempool::get_pool(mempool::pool_index_t(mempool)).adjust_count(
204 -1, -(int)len);
205 mempool = pool;
206 mempool::get_pool(mempool::pool_index_t(pool)).adjust_count(1, len);
207 }
208
209 void try_assign_to_mempool(int pool) {
210 if (mempool == mempool::mempool_buffer_anon) {
211 reassign_to_mempool(pool);
212 }
213 }
7c673cae
FG
214
215 // no copying.
216 // cppcheck-suppress noExplicitConstructor
217 raw(const raw &other);
218 const raw& operator=(const raw &other);
219
220 virtual char *get_data() {
221 return data;
222 }
223 virtual raw* clone_empty() = 0;
224 raw *clone() {
225 raw *c = clone_empty();
226 memcpy(c->data, data, len);
227 return c;
228 }
229 virtual bool can_zero_copy() const {
230 return false;
231 }
232 virtual int zero_copy_to_fd(int fd, loff_t *offset) {
233 return -ENOTSUP;
234 }
235 virtual bool is_page_aligned() {
236 return ((long)data & ~CEPH_PAGE_MASK) == 0;
237 }
238 bool is_n_page_sized() {
239 return (len & ~CEPH_PAGE_MASK) == 0;
240 }
241 virtual bool is_shareable() {
242 // true if safe to reference/share the existing buffer copy
243 // false if it is not safe to share the buffer, e.g., due to special
244 // and/or registered memory that is scarce
245 return true;
246 }
247 bool get_crc(const pair<size_t, size_t> &fromto,
248 pair<uint32_t, uint32_t> *crc) const {
249 simple_spin_lock(&crc_spinlock);
250 map<pair<size_t, size_t>, pair<uint32_t, uint32_t> >::const_iterator i =
251 crc_map.find(fromto);
252 if (i == crc_map.end()) {
253 simple_spin_unlock(&crc_spinlock);
254 return false;
255 }
256 *crc = i->second;
257 simple_spin_unlock(&crc_spinlock);
258 return true;
259 }
260 void set_crc(const pair<size_t, size_t> &fromto,
261 const pair<uint32_t, uint32_t> &crc) {
262 simple_spin_lock(&crc_spinlock);
263 crc_map[fromto] = crc;
264 simple_spin_unlock(&crc_spinlock);
265 }
266 void invalidate_crc() {
267 simple_spin_lock(&crc_spinlock);
268 if (crc_map.size() != 0) {
269 crc_map.clear();
270 }
271 simple_spin_unlock(&crc_spinlock);
272 }
273 };
274
7c673cae
FG
275 /*
276 * raw_combined is always placed within a single allocation along
277 * with the data buffer. the data goes at the beginning, and
278 * raw_combined at the end.
279 */
280 class buffer::raw_combined : public buffer::raw {
281 size_t alignment;
282 public:
283 raw_combined(char *dataptr, unsigned l, unsigned align=0)
284 : raw(dataptr, l),
285 alignment(align) {
286 inc_total_alloc(len);
287 inc_history_alloc(len);
288 }
289 ~raw_combined() override {
290 dec_total_alloc(len);
291 }
292 raw* clone_empty() override {
293 return create(len, alignment);
294 }
295
296 static raw_combined *create(unsigned len, unsigned align=0) {
297 if (!align)
298 align = sizeof(size_t);
299 size_t rawlen = ROUND_UP_TO(sizeof(buffer::raw_combined),
300 alignof(buffer::raw_combined));
301 size_t datalen = ROUND_UP_TO(len, alignof(buffer::raw_combined));
302
31f18b77
FG
303#ifdef DARWIN
304 char *ptr = (char *) valloc(rawlen + datalen);
305#else
306 char *ptr = 0;
307 int r = ::posix_memalign((void**)(void*)&ptr, align, rawlen + datalen);
308 if (r)
309 throw bad_alloc();
310#endif /* DARWIN */
7c673cae
FG
311 if (!ptr)
312 throw bad_alloc();
313
314 // actual data first, since it has presumably larger alignment restriction
315 // then put the raw_combined at the end
316 return new (ptr + datalen) raw_combined(ptr, len, align);
317 }
318
319 static void operator delete(void *ptr) {
320 raw_combined *raw = (raw_combined *)ptr;
31f18b77 321 ::free((void *)raw->data);
7c673cae
FG
322 }
323 };
324
325 class buffer::raw_malloc : public buffer::raw {
326 public:
327 MEMPOOL_CLASS_HELPERS();
328
329 explicit raw_malloc(unsigned l) : raw(l) {
330 if (len) {
331 data = (char *)malloc(len);
332 if (!data)
333 throw bad_alloc();
334 } else {
335 data = 0;
336 }
337 inc_total_alloc(len);
338 inc_history_alloc(len);
339 bdout << "raw_malloc " << this << " alloc " << (void *)data << " " << l << " " << buffer::get_total_alloc() << bendl;
340 }
341 raw_malloc(unsigned l, char *b) : raw(b, l) {
342 inc_total_alloc(len);
343 bdout << "raw_malloc " << this << " alloc " << (void *)data << " " << l << " " << buffer::get_total_alloc() << bendl;
344 }
345 ~raw_malloc() override {
346 free(data);
347 dec_total_alloc(len);
348 bdout << "raw_malloc " << this << " free " << (void *)data << " " << buffer::get_total_alloc() << bendl;
349 }
350 raw* clone_empty() override {
351 return new raw_malloc(len);
352 }
353 };
354
355#ifndef __CYGWIN__
356 class buffer::raw_mmap_pages : public buffer::raw {
357 public:
358 MEMPOOL_CLASS_HELPERS();
359
360 explicit raw_mmap_pages(unsigned l) : raw(l) {
361 data = (char*)::mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANON, -1, 0);
362 if (!data)
363 throw bad_alloc();
364 inc_total_alloc(len);
365 inc_history_alloc(len);
366 bdout << "raw_mmap " << this << " alloc " << (void *)data << " " << l << " " << buffer::get_total_alloc() << bendl;
367 }
368 ~raw_mmap_pages() override {
369 ::munmap(data, len);
370 dec_total_alloc(len);
371 bdout << "raw_mmap " << this << " free " << (void *)data << " " << buffer::get_total_alloc() << bendl;
372 }
373 raw* clone_empty() override {
374 return new raw_mmap_pages(len);
375 }
376 };
377
378 class buffer::raw_posix_aligned : public buffer::raw {
379 unsigned align;
380 public:
381 MEMPOOL_CLASS_HELPERS();
382
383 raw_posix_aligned(unsigned l, unsigned _align) : raw(l) {
384 align = _align;
385 assert((align >= sizeof(void *)) && (align & (align - 1)) == 0);
31f18b77
FG
386#ifdef DARWIN
387 data = (char *) valloc(len);
388#else
389 int r = ::posix_memalign((void**)(void*)&data, align, len);
390 if (r)
391 throw bad_alloc();
392#endif /* DARWIN */
7c673cae
FG
393 if (!data)
394 throw bad_alloc();
395 inc_total_alloc(len);
396 inc_history_alloc(len);
397 bdout << "raw_posix_aligned " << this << " alloc " << (void *)data << " l=" << l << ", align=" << align << " total_alloc=" << buffer::get_total_alloc() << bendl;
398 }
399 ~raw_posix_aligned() override {
31f18b77 400 ::free(data);
7c673cae
FG
401 dec_total_alloc(len);
402 bdout << "raw_posix_aligned " << this << " free " << (void *)data << " " << buffer::get_total_alloc() << bendl;
403 }
404 raw* clone_empty() override {
405 return new raw_posix_aligned(len, align);
406 }
407 };
408#endif
409
410#ifdef __CYGWIN__
411 class buffer::raw_hack_aligned : public buffer::raw {
412 unsigned align;
413 char *realdata;
414 public:
415 raw_hack_aligned(unsigned l, unsigned _align) : raw(l) {
416 align = _align;
417 realdata = new char[len+align-1];
418 unsigned off = ((unsigned)realdata) & (align-1);
419 if (off)
420 data = realdata + align - off;
421 else
422 data = realdata;
423 inc_total_alloc(len+align-1);
424 inc_history_alloc(len+align-1);
425 //cout << "hack aligned " << (unsigned)data
426 //<< " in raw " << (unsigned)realdata
427 //<< " off " << off << std::endl;
428 assert(((unsigned)data & (align-1)) == 0);
429 }
430 ~raw_hack_aligned() {
431 delete[] realdata;
432 dec_total_alloc(len+align-1);
433 }
434 raw* clone_empty() {
435 return new raw_hack_aligned(len, align);
436 }
437 };
438#endif
439
440#ifdef CEPH_HAVE_SPLICE
441 class buffer::raw_pipe : public buffer::raw {
442 public:
443 MEMPOOL_CLASS_HELPERS();
444
445 explicit raw_pipe(unsigned len) : raw(len), source_consumed(false) {
446 size_t max = get_max_pipe_size();
447 if (len > max) {
448 bdout << "raw_pipe: requested length " << len
449 << " > max length " << max << bendl;
450 throw malformed_input("length larger than max pipe size");
451 }
452 pipefds[0] = -1;
453 pipefds[1] = -1;
454
455 int r;
456 if (::pipe(pipefds) == -1) {
457 r = -errno;
458 bdout << "raw_pipe: error creating pipe: " << cpp_strerror(r) << bendl;
459 throw error_code(r);
460 }
461
462 r = set_nonblocking(pipefds);
463 if (r < 0) {
464 bdout << "raw_pipe: error setting nonblocking flag on temp pipe: "
465 << cpp_strerror(r) << bendl;
466 throw error_code(r);
467 }
468
469 r = set_pipe_size(pipefds, len);
470 if (r < 0) {
471 bdout << "raw_pipe: could not set pipe size" << bendl;
472 // continue, since the pipe should become large enough as needed
473 }
474
475 inc_total_alloc(len);
476 inc_history_alloc(len);
477 bdout << "raw_pipe " << this << " alloc " << len << " "
478 << buffer::get_total_alloc() << bendl;
479 }
480
481 ~raw_pipe() override {
482 if (data)
483 free(data);
484 close_pipe(pipefds);
485 dec_total_alloc(len);
486 bdout << "raw_pipe " << this << " free " << (void *)data << " "
487 << buffer::get_total_alloc() << bendl;
488 }
489
490 bool can_zero_copy() const override {
491 return true;
492 }
493
494 int set_source(int fd, loff_t *off) {
495 int flags = SPLICE_F_NONBLOCK;
496 ssize_t r = safe_splice(fd, off, pipefds[1], NULL, len, flags);
497 if (r < 0) {
498 bdout << "raw_pipe: error splicing into pipe: " << cpp_strerror(r)
499 << bendl;
500 return r;
501 }
502 // update length with actual amount read
31f18b77 503 _set_len(r);
7c673cae
FG
504 return 0;
505 }
506
507 int zero_copy_to_fd(int fd, loff_t *offset) override {
508 assert(!source_consumed);
509 int flags = SPLICE_F_NONBLOCK;
510 ssize_t r = safe_splice_exact(pipefds[0], NULL, fd, offset, len, flags);
511 if (r < 0) {
512 bdout << "raw_pipe: error splicing from pipe to fd: "
513 << cpp_strerror(r) << bendl;
514 return r;
515 }
516 source_consumed = true;
517 return 0;
518 }
519
520 buffer::raw* clone_empty() override {
521 // cloning doesn't make sense for pipe-based buffers,
522 // and is only used by unit tests for other types of buffers
523 return NULL;
524 }
525
526 char *get_data() override {
527 if (data)
528 return data;
529 return copy_pipe(pipefds);
530 }
531
532 private:
533 int set_pipe_size(int *fds, long length) {
534#ifdef CEPH_HAVE_SETPIPE_SZ
535 if (::fcntl(fds[1], F_SETPIPE_SZ, length) == -1) {
536 int r = -errno;
537 if (r == -EPERM) {
538 // pipe limit must have changed - EPERM means we requested
539 // more than the maximum size as an unprivileged user
540 update_max_pipe_size();
541 throw malformed_input("length larger than new max pipe size");
542 }
543 return r;
544 }
545#endif
546 return 0;
547 }
548
549 int set_nonblocking(int *fds) {
550 if (::fcntl(fds[0], F_SETFL, O_NONBLOCK) == -1)
551 return -errno;
552 if (::fcntl(fds[1], F_SETFL, O_NONBLOCK) == -1)
553 return -errno;
554 return 0;
555 }
556
557 static void close_pipe(const int *fds) {
558 if (fds[0] >= 0)
559 VOID_TEMP_FAILURE_RETRY(::close(fds[0]));
560 if (fds[1] >= 0)
561 VOID_TEMP_FAILURE_RETRY(::close(fds[1]));
562 }
563 char *copy_pipe(int *fds) {
564 /* preserve original pipe contents by copying into a temporary
565 * pipe before reading.
566 */
567 int tmpfd[2];
568 int r;
569
570 assert(!source_consumed);
571 assert(fds[0] >= 0);
572
573 if (::pipe(tmpfd) == -1) {
574 r = -errno;
575 bdout << "raw_pipe: error creating temp pipe: " << cpp_strerror(r)
576 << bendl;
577 throw error_code(r);
578 }
579 auto sg = make_scope_guard([=] { close_pipe(tmpfd); });
580 r = set_nonblocking(tmpfd);
581 if (r < 0) {
582 bdout << "raw_pipe: error setting nonblocking flag on temp pipe: "
583 << cpp_strerror(r) << bendl;
584 throw error_code(r);
585 }
586 r = set_pipe_size(tmpfd, len);
587 if (r < 0) {
588 bdout << "raw_pipe: error setting pipe size on temp pipe: "
589 << cpp_strerror(r) << bendl;
590 }
591 int flags = SPLICE_F_NONBLOCK;
592 if (::tee(fds[0], tmpfd[1], len, flags) == -1) {
593 r = errno;
594 bdout << "raw_pipe: error tee'ing into temp pipe: " << cpp_strerror(r)
595 << bendl;
596 throw error_code(r);
597 }
598 data = (char *)malloc(len);
599 if (!data) {
600 throw bad_alloc();
601 }
602 r = safe_read(tmpfd[0], data, len);
603 if (r < (ssize_t)len) {
604 bdout << "raw_pipe: error reading from temp pipe:" << cpp_strerror(r)
605 << bendl;
606 free(data);
607 data = NULL;
608 throw error_code(r);
609 }
610 return data;
611 }
612 bool source_consumed;
613 int pipefds[2];
614 };
615#endif // CEPH_HAVE_SPLICE
616
617 /*
618 * primitive buffer types
619 */
620 class buffer::raw_char : public buffer::raw {
621 public:
622 MEMPOOL_CLASS_HELPERS();
623
624 explicit raw_char(unsigned l) : raw(l) {
625 if (len)
31f18b77 626 data = new char[len];
7c673cae
FG
627 else
628 data = 0;
629 inc_total_alloc(len);
630 inc_history_alloc(len);
631 bdout << "raw_char " << this << " alloc " << (void *)data << " " << l << " " << buffer::get_total_alloc() << bendl;
632 }
633 raw_char(unsigned l, char *b) : raw(b, l) {
634 inc_total_alloc(len);
635 bdout << "raw_char " << this << " alloc " << (void *)data << " " << l << " " << buffer::get_total_alloc() << bendl;
636 }
637 ~raw_char() override {
31f18b77 638 delete[] data;
7c673cae
FG
639 dec_total_alloc(len);
640 bdout << "raw_char " << this << " free " << (void *)data << " " << buffer::get_total_alloc() << bendl;
641 }
642 raw* clone_empty() override {
643 return new raw_char(len);
644 }
645 };
646
31f18b77
FG
647 class buffer::raw_claimed_char : public buffer::raw {
648 public:
649 MEMPOOL_CLASS_HELPERS();
650
651 explicit raw_claimed_char(unsigned l, char *b) : raw(b, l) {
652 inc_total_alloc(len);
653 bdout << "raw_claimed_char " << this << " alloc " << (void *)data
654 << " " << l << " " << buffer::get_total_alloc() << bendl;
655 }
656 ~raw_claimed_char() override {
657 dec_total_alloc(len);
658 bdout << "raw_claimed_char " << this << " free " << (void *)data
659 << " " << buffer::get_total_alloc() << bendl;
660 }
661 raw* clone_empty() override {
662 return new raw_char(len);
663 }
664 };
665
7c673cae
FG
666 class buffer::raw_unshareable : public buffer::raw {
667 public:
668 MEMPOOL_CLASS_HELPERS();
669
670 explicit raw_unshareable(unsigned l) : raw(l) {
671 if (len)
672 data = new char[len];
673 else
674 data = 0;
675 }
676 raw_unshareable(unsigned l, char *b) : raw(b, l) {
677 }
678 raw* clone_empty() override {
679 return new raw_char(len);
680 }
681 bool is_shareable() override {
682 return false; // !shareable, will force make_shareable()
683 }
684 ~raw_unshareable() override {
685 delete[] data;
686 }
687 };
688
689 class buffer::raw_static : public buffer::raw {
690 public:
691 MEMPOOL_CLASS_HELPERS();
692
693 raw_static(const char *d, unsigned l) : raw((char*)d, l) { }
694 ~raw_static() override {}
695 raw* clone_empty() override {
696 return new buffer::raw_char(len);
697 }
698 };
699
700 class buffer::raw_claim_buffer : public buffer::raw {
701 deleter del;
702 public:
703 raw_claim_buffer(const char *b, unsigned l, deleter d)
704 : raw((char*)b, l), del(std::move(d)) { }
705 ~raw_claim_buffer() override {}
706 raw* clone_empty() override {
707 return new buffer::raw_char(len);
708 }
709 };
710
711#if defined(HAVE_XIO)
712 class buffer::xio_msg_buffer : public buffer::raw {
713 private:
714 XioDispatchHook* m_hook;
715 public:
716 xio_msg_buffer(XioDispatchHook* _m_hook, const char *d,
717 unsigned l) :
718 raw((char*)d, l), m_hook(_m_hook->get()) {}
719
720 bool is_shareable() { return false; }
721 static void operator delete(void *p)
722 {
723 xio_msg_buffer *buf = static_cast<xio_msg_buffer*>(p);
724 // return hook ref (counts against pool); it appears illegal
725 // to do this in our dtor, because this fires after that
726 buf->m_hook->put();
727 }
728 raw* clone_empty() {
729 return new buffer::raw_char(len);
730 }
731 };
732
733 class buffer::xio_mempool : public buffer::raw {
734 public:
735 struct xio_reg_mem *mp;
736 xio_mempool(struct xio_reg_mem *_mp, unsigned l) :
737 raw((char*)_mp->addr, l), mp(_mp)
738 { }
739 ~xio_mempool() {}
740 raw* clone_empty() {
741 return new buffer::raw_char(len);
742 }
743 };
744
745 struct xio_reg_mem* get_xio_mp(const buffer::ptr& bp)
746 {
747 buffer::xio_mempool *mb = dynamic_cast<buffer::xio_mempool*>(bp.get_raw());
748 if (mb) {
749 return mb->mp;
750 }
751 return NULL;
752 }
753
754 buffer::raw* buffer::create_msg(
755 unsigned len, char *buf, XioDispatchHook* m_hook) {
756 XioPool& pool = m_hook->get_pool();
757 buffer::raw* bp =
758 static_cast<buffer::raw*>(pool.alloc(sizeof(xio_msg_buffer)));
759 new (bp) xio_msg_buffer(m_hook, buf, len);
760 return bp;
761 }
762#endif /* HAVE_XIO */
763
764 buffer::raw* buffer::copy(const char *c, unsigned len) {
765 raw* r = buffer::create_aligned(len, sizeof(size_t));
766 memcpy(r->data, c, len);
767 return r;
768 }
769
770 buffer::raw* buffer::create(unsigned len) {
771 return buffer::create_aligned(len, sizeof(size_t));
772 }
773 buffer::raw* buffer::claim_char(unsigned len, char *buf) {
31f18b77 774 return new raw_claimed_char(len, buf);
7c673cae
FG
775 }
776 buffer::raw* buffer::create_malloc(unsigned len) {
777 return new raw_malloc(len);
778 }
779 buffer::raw* buffer::claim_malloc(unsigned len, char *buf) {
780 return new raw_malloc(len, buf);
781 }
782 buffer::raw* buffer::create_static(unsigned len, char *buf) {
783 return new raw_static(buf, len);
784 }
785 buffer::raw* buffer::claim_buffer(unsigned len, char *buf, deleter del) {
786 return new raw_claim_buffer(buf, len, std::move(del));
787 }
788
789 buffer::raw* buffer::create_aligned(unsigned len, unsigned align) {
790 // If alignment is a page multiple, use a separate buffer::raw to
791 // avoid fragmenting the heap.
792 //
793 // Somewhat unexpectedly, I see consistently better performance
794 // from raw_combined than from raw even when the allocation size is
795 // a page multiple (but alignment is not).
796 //
797 // I also see better performance from a separate buffer::raw once the
798 // size passes 8KB.
799 if ((align & ~CEPH_PAGE_MASK) == 0 ||
800 len >= CEPH_PAGE_SIZE * 2) {
801#ifndef __CYGWIN__
802 return new raw_posix_aligned(len, align);
803#else
804 return new raw_hack_aligned(len, align);
805#endif
806 }
807 return raw_combined::create(len, align);
808 }
809
810 buffer::raw* buffer::create_page_aligned(unsigned len) {
811 return create_aligned(len, CEPH_PAGE_SIZE);
812 }
813
814 buffer::raw* buffer::create_zero_copy(unsigned len, int fd, int64_t *offset) {
815#ifdef CEPH_HAVE_SPLICE
816 buffer::raw_pipe* buf = new raw_pipe(len);
817 int r = buf->set_source(fd, (loff_t*)offset);
818 if (r < 0) {
819 delete buf;
820 throw error_code(r);
821 }
822 return buf;
823#else
824 throw error_code(-ENOTSUP);
825#endif
826 }
827
828 buffer::raw* buffer::create_unshareable(unsigned len) {
829 return new raw_unshareable(len);
830 }
831
832 buffer::ptr::ptr(raw *r) : _raw(r), _off(0), _len(r->len) // no lock needed; this is an unref raw.
833 {
31f18b77 834 r->nref++;
7c673cae
FG
835 bdout << "ptr " << this << " get " << _raw << bendl;
836 }
837 buffer::ptr::ptr(unsigned l) : _off(0), _len(l)
838 {
839 _raw = create(l);
31f18b77 840 _raw->nref++;
7c673cae
FG
841 bdout << "ptr " << this << " get " << _raw << bendl;
842 }
843 buffer::ptr::ptr(const char *d, unsigned l) : _off(0), _len(l) // ditto.
844 {
845 _raw = copy(d, l);
31f18b77 846 _raw->nref++;
7c673cae
FG
847 bdout << "ptr " << this << " get " << _raw << bendl;
848 }
849 buffer::ptr::ptr(const ptr& p) : _raw(p._raw), _off(p._off), _len(p._len)
850 {
851 if (_raw) {
31f18b77 852 _raw->nref++;
7c673cae
FG
853 bdout << "ptr " << this << " get " << _raw << bendl;
854 }
855 }
856 buffer::ptr::ptr(ptr&& p) noexcept : _raw(p._raw), _off(p._off), _len(p._len)
857 {
858 p._raw = nullptr;
859 p._off = p._len = 0;
860 }
861 buffer::ptr::ptr(const ptr& p, unsigned o, unsigned l)
862 : _raw(p._raw), _off(p._off + o), _len(l)
863 {
864 assert(o+l <= p._len);
865 assert(_raw);
31f18b77 866 _raw->nref++;
7c673cae
FG
867 bdout << "ptr " << this << " get " << _raw << bendl;
868 }
869 buffer::ptr& buffer::ptr::operator= (const ptr& p)
870 {
871 if (p._raw) {
31f18b77 872 p._raw->nref++;
7c673cae
FG
873 bdout << "ptr " << this << " get " << _raw << bendl;
874 }
875 buffer::raw *raw = p._raw;
876 release();
877 if (raw) {
878 _raw = raw;
879 _off = p._off;
880 _len = p._len;
881 } else {
882 _off = _len = 0;
883 }
884 return *this;
885 }
886 buffer::ptr& buffer::ptr::operator= (ptr&& p) noexcept
887 {
888 release();
889 buffer::raw *raw = p._raw;
890 if (raw) {
891 _raw = raw;
892 _off = p._off;
893 _len = p._len;
894 p._raw = nullptr;
895 p._off = p._len = 0;
896 } else {
897 _off = _len = 0;
898 }
899 return *this;
900 }
901
902 buffer::raw *buffer::ptr::clone()
903 {
904 return _raw->clone();
905 }
906
907 buffer::ptr& buffer::ptr::make_shareable() {
908 if (_raw && !_raw->is_shareable()) {
909 buffer::raw *tr = _raw;
910 _raw = tr->clone();
31f18b77
FG
911 _raw->nref = 1;
912 if (unlikely(--tr->nref == 0)) {
7c673cae
FG
913 ANNOTATE_HAPPENS_AFTER(&tr->nref);
914 ANNOTATE_HAPPENS_BEFORE_FORGET_ALL(&tr->nref);
915 delete tr;
916 } else {
917 ANNOTATE_HAPPENS_BEFORE(&tr->nref);
918 }
919 }
920 return *this;
921 }
922
923 void buffer::ptr::swap(ptr& other)
924 {
925 raw *r = _raw;
926 unsigned o = _off;
927 unsigned l = _len;
928 _raw = other._raw;
929 _off = other._off;
930 _len = other._len;
931 other._raw = r;
932 other._off = o;
933 other._len = l;
934 }
935
936 void buffer::ptr::release()
937 {
938 if (_raw) {
939 bdout << "ptr " << this << " release " << _raw << bendl;
31f18b77 940 if (--_raw->nref == 0) {
7c673cae
FG
941 //cout << "hosing raw " << (void*)_raw << " len " << _raw->len << std::endl;
942 ANNOTATE_HAPPENS_AFTER(&_raw->nref);
943 ANNOTATE_HAPPENS_BEFORE_FORGET_ALL(&_raw->nref);
944 delete _raw; // dealloc old (if any)
945 } else {
946 ANNOTATE_HAPPENS_BEFORE(&_raw->nref);
947 }
948 _raw = 0;
949 }
950 }
951
952 bool buffer::ptr::at_buffer_tail() const { return _off + _len == _raw->len; }
953
954 const char *buffer::ptr::c_str() const {
955 assert(_raw);
956 if (buffer_track_c_str)
31f18b77 957 buffer_c_str_accesses++;
7c673cae
FG
958 return _raw->get_data() + _off;
959 }
960 char *buffer::ptr::c_str() {
961 assert(_raw);
962 if (buffer_track_c_str)
31f18b77 963 buffer_c_str_accesses++;
7c673cae
FG
964 return _raw->get_data() + _off;
965 }
966 const char *buffer::ptr::end_c_str() const {
967 assert(_raw);
968 if (buffer_track_c_str)
31f18b77 969 buffer_c_str_accesses++;
7c673cae
FG
970 return _raw->get_data() + _off + _len;
971 }
972 char *buffer::ptr::end_c_str() {
973 assert(_raw);
974 if (buffer_track_c_str)
31f18b77 975 buffer_c_str_accesses++;
7c673cae
FG
976 return _raw->get_data() + _off + _len;
977 }
978
979 unsigned buffer::ptr::unused_tail_length() const
980 {
981 if (_raw)
982 return _raw->len - (_off+_len);
983 else
984 return 0;
985 }
986 const char& buffer::ptr::operator[](unsigned n) const
987 {
988 assert(_raw);
989 assert(n < _len);
990 return _raw->get_data()[_off + n];
991 }
992 char& buffer::ptr::operator[](unsigned n)
993 {
994 assert(_raw);
995 assert(n < _len);
996 return _raw->get_data()[_off + n];
997 }
998
999 const char *buffer::ptr::raw_c_str() const { assert(_raw); return _raw->data; }
1000 unsigned buffer::ptr::raw_length() const { assert(_raw); return _raw->len; }
31f18b77 1001 int buffer::ptr::raw_nref() const { assert(_raw); return _raw->nref; }
7c673cae
FG
1002
1003 void buffer::ptr::copy_out(unsigned o, unsigned l, char *dest) const {
1004 assert(_raw);
1005 if (o+l > _len)
1006 throw end_of_buffer();
1007 char* src = _raw->data + _off + o;
1008 maybe_inline_memcpy(dest, src, l, 8);
1009 }
1010
31f18b77 1011 unsigned buffer::ptr::wasted() const
7c673cae 1012 {
7c673cae
FG
1013 return _raw->len - _len;
1014 }
1015
1016 int buffer::ptr::cmp(const ptr& o) const
1017 {
1018 int l = _len < o._len ? _len : o._len;
1019 if (l) {
1020 int r = memcmp(c_str(), o.c_str(), l);
1021 if (r)
1022 return r;
1023 }
1024 if (_len < o._len)
1025 return -1;
1026 if (_len > o._len)
1027 return 1;
1028 return 0;
1029 }
1030
1031 bool buffer::ptr::is_zero() const
1032 {
1033 return mem_is_zero(c_str(), _len);
1034 }
1035
1036 unsigned buffer::ptr::append(char c)
1037 {
1038 assert(_raw);
1039 assert(1 <= unused_tail_length());
1040 char* ptr = _raw->data + _off + _len;
1041 *ptr = c;
1042 _len++;
1043 return _len + _off;
1044 }
1045
1046 unsigned buffer::ptr::append(const char *p, unsigned l)
1047 {
1048 assert(_raw);
1049 assert(l <= unused_tail_length());
1050 char* c = _raw->data + _off + _len;
1051 maybe_inline_memcpy(c, p, l, 32);
1052 _len += l;
1053 return _len + _off;
1054 }
1055
1056 void buffer::ptr::copy_in(unsigned o, unsigned l, const char *src)
1057 {
1058 copy_in(o, l, src, true);
1059 }
1060
1061 void buffer::ptr::copy_in(unsigned o, unsigned l, const char *src, bool crc_reset)
1062 {
1063 assert(_raw);
1064 assert(o <= _len);
1065 assert(o+l <= _len);
1066 char* dest = _raw->data + _off + o;
1067 if (crc_reset)
1068 _raw->invalidate_crc();
1069 maybe_inline_memcpy(dest, src, l, 64);
1070 }
1071
1072 void buffer::ptr::zero()
1073 {
1074 zero(true);
1075 }
1076
1077 void buffer::ptr::zero(bool crc_reset)
1078 {
1079 if (crc_reset)
1080 _raw->invalidate_crc();
1081 memset(c_str(), 0, _len);
1082 }
1083
1084 void buffer::ptr::zero(unsigned o, unsigned l)
1085 {
1086 zero(o, l, true);
1087 }
1088
1089 void buffer::ptr::zero(unsigned o, unsigned l, bool crc_reset)
1090 {
1091 assert(o+l <= _len);
1092 if (crc_reset)
1093 _raw->invalidate_crc();
1094 memset(c_str()+o, 0, l);
1095 }
1096 bool buffer::ptr::can_zero_copy() const
1097 {
1098 return _raw->can_zero_copy();
1099 }
1100
1101 int buffer::ptr::zero_copy_to_fd(int fd, int64_t *offset) const
1102 {
1103 return _raw->zero_copy_to_fd(fd, (loff_t*)offset);
1104 }
1105
1106 // -- buffer::list::iterator --
1107 /*
1108 buffer::list::iterator operator=(const buffer::list::iterator& other)
1109 {
1110 if (this != &other) {
1111 bl = other.bl;
1112 ls = other.ls;
1113 off = other.off;
1114 p = other.p;
1115 p_off = other.p_off;
1116 }
1117 return *this;
1118 }*/
1119
1120 template<bool is_const>
1121 buffer::list::iterator_impl<is_const>::iterator_impl(bl_t *l, unsigned o)
1122 : bl(l), ls(&bl->_buffers), off(0), p(ls->begin()), p_off(0)
1123 {
1124 advance(o);
1125 }
1126
1127 template<bool is_const>
1128 buffer::list::iterator_impl<is_const>::iterator_impl(const buffer::list::iterator& i)
1129 : iterator_impl<is_const>(i.bl, i.off, i.p, i.p_off) {}
1130
1131 template<bool is_const>
1132 void buffer::list::iterator_impl<is_const>::advance(int o)
1133 {
1134 //cout << this << " advance " << o << " from " << off << " (p_off " << p_off << " in " << p->length() << ")" << std::endl;
1135 if (o > 0) {
1136 p_off += o;
1137 while (p_off > 0) {
1138 if (p == ls->end())
1139 throw end_of_buffer();
1140 if (p_off >= p->length()) {
1141 // skip this buffer
1142 p_off -= p->length();
1143 p++;
1144 } else {
1145 // somewhere in this buffer!
1146 break;
1147 }
1148 }
1149 off += o;
1150 return;
1151 }
1152 while (o < 0) {
1153 if (p_off) {
1154 unsigned d = -o;
1155 if (d > p_off)
1156 d = p_off;
1157 p_off -= d;
1158 off -= d;
1159 o += d;
1160 } else if (off > 0) {
1161 assert(p != ls->begin());
1162 p--;
1163 p_off = p->length();
1164 } else {
1165 throw end_of_buffer();
1166 }
1167 }
1168 }
1169
1170 template<bool is_const>
1171 void buffer::list::iterator_impl<is_const>::seek(unsigned o)
1172 {
1173 p = ls->begin();
1174 off = p_off = 0;
1175 advance(o);
1176 }
1177
1178 template<bool is_const>
1179 char buffer::list::iterator_impl<is_const>::operator*() const
1180 {
1181 if (p == ls->end())
1182 throw end_of_buffer();
1183 return (*p)[p_off];
1184 }
1185
1186 template<bool is_const>
1187 buffer::list::iterator_impl<is_const>&
1188 buffer::list::iterator_impl<is_const>::operator++()
1189 {
1190 if (p == ls->end())
1191 throw end_of_buffer();
1192 advance(1);
1193 return *this;
1194 }
1195
1196 template<bool is_const>
1197 buffer::ptr buffer::list::iterator_impl<is_const>::get_current_ptr() const
1198 {
1199 if (p == ls->end())
1200 throw end_of_buffer();
1201 return ptr(*p, p_off, p->length() - p_off);
1202 }
1203
1204 // copy data out.
1205 // note that these all _append_ to dest!
1206 template<bool is_const>
1207 void buffer::list::iterator_impl<is_const>::copy(unsigned len, char *dest)
1208 {
1209 if (p == ls->end()) seek(off);
1210 while (len > 0) {
1211 if (p == ls->end())
1212 throw end_of_buffer();
1213 assert(p->length() > 0);
1214
1215 unsigned howmuch = p->length() - p_off;
1216 if (len < howmuch) howmuch = len;
1217 p->copy_out(p_off, howmuch, dest);
1218 dest += howmuch;
1219
1220 len -= howmuch;
1221 advance(howmuch);
1222 }
1223 }
1224
1225 template<bool is_const>
1226 void buffer::list::iterator_impl<is_const>::copy(unsigned len, ptr &dest)
1227 {
1228 copy_deep(len, dest);
1229 }
1230
1231 template<bool is_const>
1232 void buffer::list::iterator_impl<is_const>::copy_deep(unsigned len, ptr &dest)
1233 {
1234 if (!len) {
1235 return;
1236 }
1237 if (p == ls->end())
1238 throw end_of_buffer();
1239 assert(p->length() > 0);
1240 dest = create(len);
1241 copy(len, dest.c_str());
1242 }
1243 template<bool is_const>
1244 void buffer::list::iterator_impl<is_const>::copy_shallow(unsigned len,
1245 ptr &dest)
1246 {
1247 if (!len) {
1248 return;
1249 }
1250 if (p == ls->end())
1251 throw end_of_buffer();
1252 assert(p->length() > 0);
1253 unsigned howmuch = p->length() - p_off;
1254 if (howmuch < len) {
1255 dest = create(len);
1256 copy(len, dest.c_str());
1257 } else {
1258 dest = ptr(*p, p_off, len);
1259 advance(len);
1260 }
1261 }
1262
1263 template<bool is_const>
1264 void buffer::list::iterator_impl<is_const>::copy(unsigned len, list &dest)
1265 {
1266 if (p == ls->end())
1267 seek(off);
1268 while (len > 0) {
1269 if (p == ls->end())
1270 throw end_of_buffer();
1271
1272 unsigned howmuch = p->length() - p_off;
1273 if (len < howmuch)
1274 howmuch = len;
1275 dest.append(*p, p_off, howmuch);
1276
1277 len -= howmuch;
1278 advance(howmuch);
1279 }
1280 }
1281
1282 template<bool is_const>
1283 void buffer::list::iterator_impl<is_const>::copy(unsigned len, std::string &dest)
1284 {
1285 if (p == ls->end())
1286 seek(off);
1287 while (len > 0) {
1288 if (p == ls->end())
1289 throw end_of_buffer();
1290
1291 unsigned howmuch = p->length() - p_off;
1292 const char *c_str = p->c_str();
1293 if (len < howmuch)
1294 howmuch = len;
1295 dest.append(c_str + p_off, howmuch);
1296
1297 len -= howmuch;
1298 advance(howmuch);
1299 }
1300 }
1301
1302 template<bool is_const>
1303 void buffer::list::iterator_impl<is_const>::copy_all(list &dest)
1304 {
1305 if (p == ls->end())
1306 seek(off);
1307 while (1) {
1308 if (p == ls->end())
1309 return;
1310 assert(p->length() > 0);
1311
1312 unsigned howmuch = p->length() - p_off;
1313 const char *c_str = p->c_str();
1314 dest.append(c_str + p_off, howmuch);
1315
1316 advance(howmuch);
1317 }
1318 }
1319
1320 template<bool is_const>
1321 size_t buffer::list::iterator_impl<is_const>::get_ptr_and_advance(
1322 size_t want, const char **data)
1323 {
1324 if (p == ls->end()) {
1325 seek(off);
1326 if (p == ls->end()) {
1327 return 0;
1328 }
1329 }
1330 *data = p->c_str() + p_off;
1331 size_t l = MIN(p->length() - p_off, want);
1332 p_off += l;
1333 if (p_off == p->length()) {
1334 ++p;
1335 p_off = 0;
1336 }
1337 off += l;
1338 return l;
1339 }
1340
1341 template<bool is_const>
1342 uint32_t buffer::list::iterator_impl<is_const>::crc32c(
1343 size_t length, uint32_t crc)
1344 {
1345 length = MIN( length, get_remaining());
1346 while (length > 0) {
1347 const char *p;
1348 size_t l = get_ptr_and_advance(length, &p);
1349 crc = ceph_crc32c(crc, (unsigned char*)p, l);
1350 length -= l;
1351 }
1352 return crc;
1353 }
1354
1355 // explicitly instantiate only the iterator types we need, so we can hide the
1356 // details in this compilation unit without introducing unnecessary link time
1357 // dependencies.
1358 template class buffer::list::iterator_impl<true>;
1359 template class buffer::list::iterator_impl<false>;
1360
1361 buffer::list::iterator::iterator(bl_t *l, unsigned o)
1362 : iterator_impl(l, o)
1363 {}
1364
1365 buffer::list::iterator::iterator(bl_t *l, unsigned o, list_iter_t ip, unsigned po)
1366 : iterator_impl(l, o, ip, po)
1367 {}
1368
1369 void buffer::list::iterator::advance(int o)
1370 {
1371 buffer::list::iterator_impl<false>::advance(o);
1372 }
1373
1374 void buffer::list::iterator::seek(unsigned o)
1375 {
1376 buffer::list::iterator_impl<false>::seek(o);
1377 }
1378
1379 char buffer::list::iterator::operator*()
1380 {
1381 if (p == ls->end()) {
1382 throw end_of_buffer();
1383 }
1384 return (*p)[p_off];
1385 }
1386
1387 buffer::list::iterator& buffer::list::iterator::operator++()
1388 {
1389 buffer::list::iterator_impl<false>::operator++();
1390 return *this;
1391 }
1392
1393 buffer::ptr buffer::list::iterator::get_current_ptr()
1394 {
1395 if (p == ls->end()) {
1396 throw end_of_buffer();
1397 }
1398 return ptr(*p, p_off, p->length() - p_off);
1399 }
1400
1401 void buffer::list::iterator::copy(unsigned len, char *dest)
1402 {
1403 return buffer::list::iterator_impl<false>::copy(len, dest);
1404 }
1405
1406 void buffer::list::iterator::copy(unsigned len, ptr &dest)
1407 {
1408 return buffer::list::iterator_impl<false>::copy_deep(len, dest);
1409 }
1410
1411 void buffer::list::iterator::copy_deep(unsigned len, ptr &dest)
1412 {
1413 buffer::list::iterator_impl<false>::copy_deep(len, dest);
1414 }
1415
1416 void buffer::list::iterator::copy_shallow(unsigned len, ptr &dest)
1417 {
1418 buffer::list::iterator_impl<false>::copy_shallow(len, dest);
1419 }
1420
1421 void buffer::list::iterator::copy(unsigned len, list &dest)
1422 {
1423 buffer::list::iterator_impl<false>::copy(len, dest);
1424 }
1425
1426 void buffer::list::iterator::copy(unsigned len, std::string &dest)
1427 {
1428 buffer::list::iterator_impl<false>::copy(len, dest);
1429 }
1430
1431 void buffer::list::iterator::copy_all(list &dest)
1432 {
1433 buffer::list::iterator_impl<false>::copy_all(dest);
1434 }
1435
1436 void buffer::list::iterator::copy_in(unsigned len, const char *src)
1437 {
1438 copy_in(len, src, true);
1439 }
1440
1441 // copy data in
1442 void buffer::list::iterator::copy_in(unsigned len, const char *src, bool crc_reset)
1443 {
1444 // copy
1445 if (p == ls->end())
1446 seek(off);
1447 while (len > 0) {
1448 if (p == ls->end())
1449 throw end_of_buffer();
1450
1451 unsigned howmuch = p->length() - p_off;
1452 if (len < howmuch)
1453 howmuch = len;
1454 p->copy_in(p_off, howmuch, src, crc_reset);
1455
1456 src += howmuch;
1457 len -= howmuch;
1458 advance(howmuch);
1459 }
1460 }
1461
1462 void buffer::list::iterator::copy_in(unsigned len, const list& otherl)
1463 {
1464 if (p == ls->end())
1465 seek(off);
1466 unsigned left = len;
1467 for (std::list<ptr>::const_iterator i = otherl._buffers.begin();
1468 i != otherl._buffers.end();
1469 ++i) {
1470 unsigned l = (*i).length();
1471 if (left < l)
1472 l = left;
1473 copy_in(l, i->c_str());
1474 left -= l;
1475 if (left == 0)
1476 break;
1477 }
1478 }
1479
1480 // -- buffer::list --
1481
1482 buffer::list::list(list&& other)
1483 : _buffers(std::move(other._buffers)),
1484 _len(other._len),
1485 _memcopy_count(other._memcopy_count),
1486 last_p(this) {
1487 append_buffer.swap(other.append_buffer);
1488 other.clear();
1489 }
1490
1491 void buffer::list::swap(list& other)
1492 {
1493 std::swap(_len, other._len);
1494 std::swap(_memcopy_count, other._memcopy_count);
31f18b77 1495 std::swap(_mempool, other._mempool);
7c673cae
FG
1496 _buffers.swap(other._buffers);
1497 append_buffer.swap(other.append_buffer);
1498 //last_p.swap(other.last_p);
1499 last_p = begin();
1500 other.last_p = other.begin();
1501 }
1502
1503 bool buffer::list::contents_equal(buffer::list& other)
1504 {
1505 return static_cast<const buffer::list*>(this)->contents_equal(other);
1506 }
1507
1508 bool buffer::list::contents_equal(const ceph::buffer::list& other) const
1509 {
1510 if (length() != other.length())
1511 return false;
1512
1513 // buffer-wise comparison
1514 if (true) {
1515 std::list<ptr>::const_iterator a = _buffers.begin();
1516 std::list<ptr>::const_iterator b = other._buffers.begin();
1517 unsigned aoff = 0, boff = 0;
1518 while (a != _buffers.end()) {
1519 unsigned len = a->length() - aoff;
1520 if (len > b->length() - boff)
1521 len = b->length() - boff;
1522 if (memcmp(a->c_str() + aoff, b->c_str() + boff, len) != 0)
1523 return false;
1524 aoff += len;
1525 if (aoff == a->length()) {
1526 aoff = 0;
1527 ++a;
1528 }
1529 boff += len;
1530 if (boff == b->length()) {
1531 boff = 0;
1532 ++b;
1533 }
1534 }
1535 assert(b == other._buffers.end());
1536 return true;
1537 }
1538
1539 // byte-wise comparison
1540 if (false) {
1541 bufferlist::const_iterator me = begin();
1542 bufferlist::const_iterator him = other.begin();
1543 while (!me.end()) {
1544 if (*me != *him)
1545 return false;
1546 ++me;
1547 ++him;
1548 }
1549 return true;
1550 }
1551 }
1552
1553 bool buffer::list::can_zero_copy() const
1554 {
1555 for (std::list<ptr>::const_iterator it = _buffers.begin();
1556 it != _buffers.end();
1557 ++it)
1558 if (!it->can_zero_copy())
1559 return false;
1560 return true;
1561 }
1562
1563 bool buffer::list::is_provided_buffer(const char *dst) const
1564 {
1565 if (_buffers.empty())
1566 return false;
1567 return (is_contiguous() && (_buffers.front().c_str() == dst));
1568 }
1569
1570 bool buffer::list::is_aligned(unsigned align) const
1571 {
1572 for (std::list<ptr>::const_iterator it = _buffers.begin();
1573 it != _buffers.end();
1574 ++it)
1575 if (!it->is_aligned(align))
1576 return false;
1577 return true;
1578 }
1579
1580 bool buffer::list::is_n_align_sized(unsigned align) const
1581 {
1582 for (std::list<ptr>::const_iterator it = _buffers.begin();
1583 it != _buffers.end();
1584 ++it)
1585 if (!it->is_n_align_sized(align))
1586 return false;
1587 return true;
1588 }
1589
1590 bool buffer::list::is_aligned_size_and_memory(unsigned align_size,
1591 unsigned align_memory) const
1592 {
1593 for (std::list<ptr>::const_iterator it = _buffers.begin();
1594 it != _buffers.end();
1595 ++it) {
1596 if (!it->is_aligned(align_memory) || !it->is_n_align_sized(align_size))
1597 return false;
1598 }
1599 return true;
1600 }
1601
1602 bool buffer::list::is_zero() const {
1603 for (std::list<ptr>::const_iterator it = _buffers.begin();
1604 it != _buffers.end();
1605 ++it) {
1606 if (!it->is_zero()) {
1607 return false;
1608 }
1609 }
1610 return true;
1611 }
1612
1613 void buffer::list::zero()
1614 {
1615 for (std::list<ptr>::iterator it = _buffers.begin();
1616 it != _buffers.end();
1617 ++it)
1618 it->zero();
1619 }
1620
1621 void buffer::list::zero(unsigned o, unsigned l)
1622 {
1623 assert(o+l <= _len);
1624 unsigned p = 0;
1625 for (std::list<ptr>::iterator it = _buffers.begin();
1626 it != _buffers.end();
1627 ++it) {
1628 if (p + it->length() > o) {
1629 if (p >= o && p+it->length() <= o+l) {
1630 // 'o'------------- l -----------|
1631 // 'p'-- it->length() --|
1632 it->zero();
1633 } else if (p >= o) {
1634 // 'o'------------- l -----------|
1635 // 'p'------- it->length() -------|
1636 it->zero(0, o+l-p);
1637 } else if (p + it->length() <= o+l) {
1638 // 'o'------------- l -----------|
1639 // 'p'------- it->length() -------|
1640 it->zero(o-p, it->length()-(o-p));
1641 } else {
1642 // 'o'----------- l -----------|
1643 // 'p'---------- it->length() ----------|
1644 it->zero(o-p, l);
1645 }
1646 }
1647 p += it->length();
1648 if (o+l <= p)
1649 break; // done
1650 }
1651 }
1652
1653 bool buffer::list::is_contiguous() const
1654 {
1655 return &(*_buffers.begin()) == &(*_buffers.rbegin());
1656 }
1657
1658 bool buffer::list::is_n_page_sized() const
1659 {
1660 return is_n_align_sized(CEPH_PAGE_SIZE);
1661 }
1662
1663 bool buffer::list::is_page_aligned() const
1664 {
1665 return is_aligned(CEPH_PAGE_SIZE);
1666 }
1667
31f18b77
FG
1668 void buffer::list::reassign_to_mempool(int pool)
1669 {
1670 _mempool = pool;
1671 if (append_buffer.get_raw()) {
1672 append_buffer.get_raw()->reassign_to_mempool(pool);
1673 }
1674 for (auto& p : _buffers) {
1675 p.get_raw()->reassign_to_mempool(pool);
1676 }
1677 }
1678
1679 void buffer::list::try_assign_to_mempool(int pool)
1680 {
1681 _mempool = pool;
1682 if (append_buffer.get_raw()) {
1683 append_buffer.get_raw()->try_assign_to_mempool(pool);
1684 }
1685 for (auto& p : _buffers) {
1686 p.get_raw()->try_assign_to_mempool(pool);
1687 }
1688 }
1689
7c673cae
FG
1690 void buffer::list::rebuild()
1691 {
1692 if (_len == 0) {
1693 _buffers.clear();
1694 return;
1695 }
1696 ptr nb;
1697 if ((_len & ~CEPH_PAGE_MASK) == 0)
1698 nb = buffer::create_page_aligned(_len);
1699 else
1700 nb = buffer::create(_len);
1701 rebuild(nb);
1702 }
1703
1704 void buffer::list::rebuild(ptr& nb)
1705 {
1706 unsigned pos = 0;
1707 for (std::list<ptr>::iterator it = _buffers.begin();
1708 it != _buffers.end();
1709 ++it) {
1710 nb.copy_in(pos, it->length(), it->c_str(), false);
1711 pos += it->length();
1712 }
1713 _memcopy_count += pos;
1714 _buffers.clear();
1715 if (nb.length())
1716 _buffers.push_back(nb);
1717 invalidate_crc();
1718 last_p = begin();
1719 }
1720
1721 bool buffer::list::rebuild_aligned(unsigned align)
1722 {
1723 return rebuild_aligned_size_and_memory(align, align);
1724 }
1725
1726 bool buffer::list::rebuild_aligned_size_and_memory(unsigned align_size,
1727 unsigned align_memory)
1728 {
1729 unsigned old_memcopy_count = _memcopy_count;
1730 std::list<ptr>::iterator p = _buffers.begin();
1731 while (p != _buffers.end()) {
1732 // keep anything that's already align and sized aligned
1733 if (p->is_aligned(align_memory) && p->is_n_align_sized(align_size)) {
1734 /*cout << " segment " << (void*)p->c_str()
1735 << " offset " << ((unsigned long)p->c_str() & (align - 1))
1736 << " length " << p->length()
1737 << " " << (p->length() & (align - 1)) << " ok" << std::endl;
1738 */
1739 ++p;
1740 continue;
1741 }
1742
1743 // consolidate unaligned items, until we get something that is sized+aligned
1744 list unaligned;
1745 unsigned offset = 0;
1746 do {
1747 /*cout << " segment " << (void*)p->c_str()
1748 << " offset " << ((unsigned long)p->c_str() & (align - 1))
1749 << " length " << p->length() << " " << (p->length() & (align - 1))
1750 << " overall offset " << offset << " " << (offset & (align - 1))
1751 << " not ok" << std::endl;
1752 */
1753 offset += p->length();
1754 unaligned.push_back(*p);
1755 _buffers.erase(p++);
1756 } while (p != _buffers.end() &&
1757 (!p->is_aligned(align_memory) ||
1758 !p->is_n_align_sized(align_size) ||
1759 (offset % align_size)));
1760 if (!(unaligned.is_contiguous() && unaligned._buffers.front().is_aligned(align_memory))) {
1761 ptr nb(buffer::create_aligned(unaligned._len, align_memory));
1762 unaligned.rebuild(nb);
1763 _memcopy_count += unaligned._len;
1764 }
1765 _buffers.insert(p, unaligned._buffers.front());
1766 }
1767 last_p = begin();
1768
1769 return (old_memcopy_count != _memcopy_count);
1770 }
1771
1772 bool buffer::list::rebuild_page_aligned()
1773 {
1774 return rebuild_aligned(CEPH_PAGE_SIZE);
1775 }
1776
31f18b77
FG
1777 void buffer::list::reserve(size_t prealloc)
1778 {
1779 if (append_buffer.unused_tail_length() < prealloc) {
1780 append_buffer = buffer::create(prealloc);
1781 if (_mempool >= 0) {
1782 append_buffer.get_raw()->reassign_to_mempool(_mempool);
1783 }
1784 append_buffer.set_length(0); // unused, so far.
1785 }
1786 }
1787
7c673cae
FG
1788 // sort-of-like-assignment-op
1789 void buffer::list::claim(list& bl, unsigned int flags)
1790 {
1791 // free my buffers
1792 clear();
1793 claim_append(bl, flags);
1794 }
1795
1796 void buffer::list::claim_append(list& bl, unsigned int flags)
1797 {
1798 // steal the other guy's buffers
1799 _len += bl._len;
1800 if (!(flags & CLAIM_ALLOW_NONSHAREABLE))
1801 bl.make_shareable();
1802 _buffers.splice(_buffers.end(), bl._buffers );
1803 bl._len = 0;
1804 bl.last_p = bl.begin();
1805 }
1806
1807 void buffer::list::claim_prepend(list& bl, unsigned int flags)
1808 {
1809 // steal the other guy's buffers
1810 _len += bl._len;
1811 if (!(flags & CLAIM_ALLOW_NONSHAREABLE))
1812 bl.make_shareable();
1813 _buffers.splice(_buffers.begin(), bl._buffers );
1814 bl._len = 0;
1815 bl.last_p = bl.begin();
1816 }
1817
31f18b77
FG
1818 void buffer::list::claim_append_piecewise(list& bl)
1819 {
1820 // steal the other guy's buffers
1821 for (std::list<buffer::ptr>::const_iterator i = bl.buffers().begin();
1822 i != bl.buffers().end(); i++) {
1823 append(*i, 0, i->length());
1824 }
1825 bl.clear();
1826 }
1827
7c673cae
FG
1828 void buffer::list::copy(unsigned off, unsigned len, char *dest) const
1829 {
1830 if (off + len > length())
1831 throw end_of_buffer();
1832 if (last_p.get_off() != off)
1833 last_p.seek(off);
1834 last_p.copy(len, dest);
1835 }
1836
1837 void buffer::list::copy(unsigned off, unsigned len, list &dest) const
1838 {
1839 if (off + len > length())
1840 throw end_of_buffer();
1841 if (last_p.get_off() != off)
1842 last_p.seek(off);
1843 last_p.copy(len, dest);
1844 }
1845
1846 void buffer::list::copy(unsigned off, unsigned len, std::string& dest) const
1847 {
1848 if (last_p.get_off() != off)
1849 last_p.seek(off);
1850 return last_p.copy(len, dest);
1851 }
1852
1853 void buffer::list::copy_in(unsigned off, unsigned len, const char *src)
1854 {
1855 copy_in(off, len, src, true);
1856 }
1857
1858 void buffer::list::copy_in(unsigned off, unsigned len, const char *src, bool crc_reset)
1859 {
1860 if (off + len > length())
1861 throw end_of_buffer();
1862
1863 if (last_p.get_off() != off)
1864 last_p.seek(off);
1865 last_p.copy_in(len, src, crc_reset);
1866 }
1867
1868 void buffer::list::copy_in(unsigned off, unsigned len, const list& src)
1869 {
1870 if (last_p.get_off() != off)
1871 last_p.seek(off);
1872 last_p.copy_in(len, src);
1873 }
1874
1875 void buffer::list::append(char c)
1876 {
1877 // put what we can into the existing append_buffer.
1878 unsigned gap = append_buffer.unused_tail_length();
1879 if (!gap) {
1880 // make a new append_buffer!
1881 append_buffer = raw_combined::create(CEPH_BUFFER_APPEND_SIZE);
1882 append_buffer.set_length(0); // unused, so far.
31f18b77
FG
1883 if (_mempool >= 0) {
1884 append_buffer.get_raw()->reassign_to_mempool(_mempool);
1885 }
7c673cae
FG
1886 }
1887 append(append_buffer, append_buffer.append(c) - 1, 1); // add segment to the list
1888 }
1889
1890 void buffer::list::append(const char *data, unsigned len)
1891 {
1892 while (len > 0) {
1893 // put what we can into the existing append_buffer.
1894 unsigned gap = append_buffer.unused_tail_length();
1895 if (gap > 0) {
1896 if (gap > len) gap = len;
1897 //cout << "append first char is " << data[0] << ", last char is " << data[len-1] << std::endl;
1898 append_buffer.append(data, gap);
1899 append(append_buffer, append_buffer.length() - gap, gap); // add segment to the list
1900 len -= gap;
1901 data += gap;
1902 }
1903 if (len == 0)
1904 break; // done!
1905
1906 // make a new append_buffer. fill out a complete page, factoring in the
1907 // raw_combined overhead.
1908 size_t need = ROUND_UP_TO(len, sizeof(size_t)) + sizeof(raw_combined);
1909 size_t alen = ROUND_UP_TO(need, CEPH_BUFFER_ALLOC_UNIT) -
1910 sizeof(raw_combined);
1911 append_buffer = raw_combined::create(alen);
1912 append_buffer.set_length(0); // unused, so far.
31f18b77
FG
1913 if (_mempool >= 0) {
1914 append_buffer.get_raw()->reassign_to_mempool(_mempool);
1915 }
7c673cae
FG
1916 }
1917 }
1918
1919 void buffer::list::append(const ptr& bp)
1920 {
1921 if (bp.length())
1922 push_back(bp);
1923 }
1924
1925 void buffer::list::append(ptr&& bp)
1926 {
1927 if (bp.length())
1928 push_back(std::move(bp));
1929 }
1930
1931 void buffer::list::append(const ptr& bp, unsigned off, unsigned len)
1932 {
1933 assert(len+off <= bp.length());
1934 if (!_buffers.empty()) {
1935 ptr &l = _buffers.back();
1936 if (l.get_raw() == bp.get_raw() &&
1937 l.end() == bp.start() + off) {
1938 // yay contiguous with tail bp!
1939 l.set_length(l.length()+len);
1940 _len += len;
1941 return;
1942 }
1943 }
1944 // add new item to list
1945 push_back(ptr(bp, off, len));
1946 }
1947
1948 void buffer::list::append(const list& bl)
1949 {
1950 _len += bl._len;
1951 for (std::list<ptr>::const_iterator p = bl._buffers.begin();
1952 p != bl._buffers.end();
1953 ++p)
1954 _buffers.push_back(*p);
1955 }
1956
1957 void buffer::list::append(std::istream& in)
1958 {
1959 while (!in.eof()) {
1960 std::string s;
1961 getline(in, s);
1962 append(s.c_str(), s.length());
1963 if (s.length())
1964 append("\n", 1);
1965 }
1966 }
1967
1968 void buffer::list::prepend_zero(unsigned len)
1969 {
1970 ptr bp(len);
1971 bp.zero(false);
1972 _len += len;
1973 _buffers.emplace_front(std::move(bp));
1974 }
1975
1976 void buffer::list::append_zero(unsigned len)
1977 {
1978 ptr bp(len);
1979 bp.zero(false);
1980 append(std::move(bp));
1981 }
1982
1983
1984 /*
1985 * get a char
1986 */
1987 const char& buffer::list::operator[](unsigned n) const
1988 {
1989 if (n >= _len)
1990 throw end_of_buffer();
1991
1992 for (std::list<ptr>::const_iterator p = _buffers.begin();
1993 p != _buffers.end();
1994 ++p) {
1995 if (n >= p->length()) {
1996 n -= p->length();
1997 continue;
1998 }
1999 return (*p)[n];
2000 }
2001 ceph_abort();
2002 }
2003
2004 /*
2005 * return a contiguous ptr to whole bufferlist contents.
2006 */
2007 char *buffer::list::c_str()
2008 {
2009 if (_buffers.empty())
2010 return 0; // no buffers
2011
2012 std::list<ptr>::const_iterator iter = _buffers.begin();
2013 ++iter;
2014
2015 if (iter != _buffers.end())
2016 rebuild();
2017 return _buffers.front().c_str(); // good, we're already contiguous.
2018 }
2019
2020 string buffer::list::to_str() const {
2021 string s;
2022 s.reserve(length());
2023 for (std::list<ptr>::const_iterator p = _buffers.begin();
2024 p != _buffers.end();
2025 ++p) {
2026 if (p->length()) {
2027 s.append(p->c_str(), p->length());
2028 }
2029 }
2030 return s;
2031 }
2032
2033 char *buffer::list::get_contiguous(unsigned orig_off, unsigned len)
2034 {
2035 if (orig_off + len > length())
2036 throw end_of_buffer();
2037
2038 if (len == 0) {
2039 return 0;
2040 }
2041
2042 unsigned off = orig_off;
2043 std::list<ptr>::iterator curbuf = _buffers.begin();
2044 while (off > 0 && off >= curbuf->length()) {
2045 off -= curbuf->length();
2046 ++curbuf;
2047 }
2048
2049 if (off + len > curbuf->length()) {
2050 bufferlist tmp;
2051 unsigned l = off + len;
2052
2053 do {
2054 if (l >= curbuf->length())
2055 l -= curbuf->length();
2056 else
2057 l = 0;
2058 tmp.append(*curbuf);
2059 curbuf = _buffers.erase(curbuf);
2060
2061 } while (curbuf != _buffers.end() && l > 0);
2062
2063 assert(l == 0);
2064
2065 tmp.rebuild();
2066 _buffers.insert(curbuf, tmp._buffers.front());
2067 return tmp.c_str() + off;
2068 }
2069
2070 last_p = begin(); // we modified _buffers
2071
2072 return curbuf->c_str() + off;
2073 }
2074
2075 void buffer::list::substr_of(const list& other, unsigned off, unsigned len)
2076 {
2077 if (off + len > other.length())
2078 throw end_of_buffer();
2079
2080 clear();
2081
2082 // skip off
2083 std::list<ptr>::const_iterator curbuf = other._buffers.begin();
2084 while (off > 0 &&
2085 off >= curbuf->length()) {
2086 // skip this buffer
2087 //cout << "skipping over " << *curbuf << std::endl;
2088 off -= (*curbuf).length();
2089 ++curbuf;
2090 }
2091 assert(len == 0 || curbuf != other._buffers.end());
2092
2093 while (len > 0) {
2094 // partial?
2095 if (off + len < curbuf->length()) {
2096 //cout << "copying partial of " << *curbuf << std::endl;
2097 _buffers.push_back( ptr( *curbuf, off, len ) );
2098 _len += len;
2099 break;
2100 }
2101
2102 // through end
2103 //cout << "copying end (all?) of " << *curbuf << std::endl;
2104 unsigned howmuch = curbuf->length() - off;
2105 _buffers.push_back( ptr( *curbuf, off, howmuch ) );
2106 _len += howmuch;
2107 len -= howmuch;
2108 off = 0;
2109 ++curbuf;
2110 }
2111 }
2112
2113 // funky modifer
2114 void buffer::list::splice(unsigned off, unsigned len, list *claim_by /*, bufferlist& replace_with */)
2115 { // fixme?
2116 if (len == 0)
2117 return;
2118
2119 if (off >= length())
2120 throw end_of_buffer();
2121
2122 assert(len > 0);
2123 //cout << "splice off " << off << " len " << len << " ... mylen = " << length() << std::endl;
2124
2125 // skip off
2126 std::list<ptr>::iterator curbuf = _buffers.begin();
2127 while (off > 0) {
2128 assert(curbuf != _buffers.end());
2129 if (off >= (*curbuf).length()) {
2130 // skip this buffer
2131 //cout << "off = " << off << " skipping over " << *curbuf << std::endl;
2132 off -= (*curbuf).length();
2133 ++curbuf;
2134 } else {
2135 // somewhere in this buffer!
2136 //cout << "off = " << off << " somewhere in " << *curbuf << std::endl;
2137 break;
2138 }
2139 }
2140
2141 if (off) {
2142 // add a reference to the front bit
2143 // insert it before curbuf (which we'll hose)
2144 //cout << "keeping front " << off << " of " << *curbuf << std::endl;
2145 _buffers.insert( curbuf, ptr( *curbuf, 0, off ) );
2146 _len += off;
2147 }
2148
2149 while (len > 0) {
2150 // partial?
2151 if (off + len < (*curbuf).length()) {
2152 //cout << "keeping end of " << *curbuf << ", losing first " << off+len << std::endl;
2153 if (claim_by)
2154 claim_by->append( *curbuf, off, len );
2155 (*curbuf).set_offset( off+len + (*curbuf).offset() ); // ignore beginning big
2156 (*curbuf).set_length( (*curbuf).length() - (len+off) );
2157 _len -= off+len;
2158 //cout << " now " << *curbuf << std::endl;
2159 break;
2160 }
2161
2162 // hose though the end
2163 unsigned howmuch = (*curbuf).length() - off;
2164 //cout << "discarding " << howmuch << " of " << *curbuf << std::endl;
2165 if (claim_by)
2166 claim_by->append( *curbuf, off, howmuch );
2167 _len -= (*curbuf).length();
2168 _buffers.erase( curbuf++ );
2169 len -= howmuch;
2170 off = 0;
2171 }
2172
2173 // splice in *replace (implement me later?)
2174
2175 last_p = begin(); // just in case we were in the removed region.
2176 }
2177
2178 void buffer::list::write(int off, int len, std::ostream& out) const
2179 {
2180 list s;
2181 s.substr_of(*this, off, len);
2182 for (std::list<ptr>::const_iterator it = s._buffers.begin();
2183 it != s._buffers.end();
2184 ++it)
2185 if (it->length())
2186 out.write(it->c_str(), it->length());
2187 /*iterator p(this, off);
2188 while (len > 0 && !p.end()) {
2189 int l = p.left_in_this_buf();
2190 if (l > len)
2191 l = len;
2192 out.write(p.c_str(), l);
2193 len -= l;
2194 }*/
2195 }
2196
2197void buffer::list::encode_base64(buffer::list& o)
2198{
2199 bufferptr bp(length() * 4 / 3 + 3);
2200 int l = ceph_armor(bp.c_str(), bp.c_str() + bp.length(), c_str(), c_str() + length());
2201 bp.set_length(l);
2202 o.push_back(std::move(bp));
2203}
2204
2205void buffer::list::decode_base64(buffer::list& e)
2206{
2207 bufferptr bp(4 + ((e.length() * 3) / 4));
2208 int l = ceph_unarmor(bp.c_str(), bp.c_str() + bp.length(), e.c_str(), e.c_str() + e.length());
2209 if (l < 0) {
2210 std::ostringstream oss;
2211 oss << "decode_base64: decoding failed:\n";
2212 hexdump(oss);
2213 throw buffer::malformed_input(oss.str().c_str());
2214 }
2215 assert(l <= (int)bp.length());
2216 bp.set_length(l);
2217 push_back(std::move(bp));
2218}
2219
2220
2221
2222int buffer::list::read_file(const char *fn, std::string *error)
2223{
2224 int fd = TEMP_FAILURE_RETRY(::open(fn, O_RDONLY));
2225 if (fd < 0) {
2226 int err = errno;
2227 std::ostringstream oss;
2228 oss << "can't open " << fn << ": " << cpp_strerror(err);
2229 *error = oss.str();
2230 return -err;
2231 }
2232
2233 struct stat st;
2234 memset(&st, 0, sizeof(st));
2235 if (::fstat(fd, &st) < 0) {
2236 int err = errno;
2237 std::ostringstream oss;
2238 oss << "bufferlist::read_file(" << fn << "): stat error: "
2239 << cpp_strerror(err);
2240 *error = oss.str();
2241 VOID_TEMP_FAILURE_RETRY(::close(fd));
2242 return -err;
2243 }
2244
2245 ssize_t ret = read_fd(fd, st.st_size);
2246 if (ret < 0) {
2247 std::ostringstream oss;
2248 oss << "bufferlist::read_file(" << fn << "): read error:"
2249 << cpp_strerror(ret);
2250 *error = oss.str();
2251 VOID_TEMP_FAILURE_RETRY(::close(fd));
2252 return ret;
2253 }
2254 else if (ret != st.st_size) {
2255 // Premature EOF.
2256 // Perhaps the file changed between stat() and read()?
2257 std::ostringstream oss;
2258 oss << "bufferlist::read_file(" << fn << "): warning: got premature EOF.";
2259 *error = oss.str();
2260 // not actually an error, but weird
2261 }
2262 VOID_TEMP_FAILURE_RETRY(::close(fd));
2263 return 0;
2264}
2265
2266ssize_t buffer::list::read_fd(int fd, size_t len)
2267{
2268 // try zero copy first
2269 if (false && read_fd_zero_copy(fd, len) == 0) {
2270 // TODO fix callers to not require correct read size, which is not
2271 // available for raw_pipe until we actually inspect the data
2272 return 0;
2273 }
2274 bufferptr bp = buffer::create(len);
2275 ssize_t ret = safe_read(fd, (void*)bp.c_str(), len);
2276 if (ret >= 0) {
2277 bp.set_length(ret);
2278 append(std::move(bp));
2279 }
2280 return ret;
2281}
2282
2283int buffer::list::read_fd_zero_copy(int fd, size_t len)
2284{
2285#ifdef CEPH_HAVE_SPLICE
2286 try {
2287 append(buffer::create_zero_copy(len, fd, NULL));
2288 } catch (buffer::error_code &e) {
2289 return e.code;
2290 } catch (buffer::malformed_input &e) {
2291 return -EIO;
2292 }
2293 return 0;
2294#else
2295 return -ENOTSUP;
2296#endif
2297}
2298
2299int buffer::list::write_file(const char *fn, int mode)
2300{
2301 int fd = TEMP_FAILURE_RETRY(::open(fn, O_WRONLY|O_CREAT|O_TRUNC, mode));
2302 if (fd < 0) {
2303 int err = errno;
2304 cerr << "bufferlist::write_file(" << fn << "): failed to open file: "
2305 << cpp_strerror(err) << std::endl;
2306 return -err;
2307 }
2308 int ret = write_fd(fd);
2309 if (ret) {
2310 cerr << "bufferlist::write_fd(" << fn << "): write_fd error: "
2311 << cpp_strerror(ret) << std::endl;
2312 VOID_TEMP_FAILURE_RETRY(::close(fd));
2313 return ret;
2314 }
2315 if (TEMP_FAILURE_RETRY(::close(fd))) {
2316 int err = errno;
2317 cerr << "bufferlist::write_file(" << fn << "): close error: "
2318 << cpp_strerror(err) << std::endl;
2319 return -err;
2320 }
2321 return 0;
2322}
2323
2324static int do_writev(int fd, struct iovec *vec, uint64_t offset, unsigned veclen, unsigned bytes)
2325{
2326 ssize_t r = 0;
2327 while (bytes > 0) {
2328#ifdef HAVE_PWRITEV
2329 r = ::pwritev(fd, vec, veclen, offset);
2330#else
2331 r = ::lseek64(fd, offset, SEEK_SET);
2332 if (r != offset) {
2333 r = -errno;
2334 return r;
2335 }
2336 r = ::writev(fd, vec, veclen);
2337#endif
2338 if (r < 0) {
2339 if (errno == EINTR)
2340 continue;
2341 return -errno;
2342 }
2343
2344 bytes -= r;
2345 offset += r;
2346 if (bytes == 0) break;
2347
2348 while (r > 0) {
2349 if (vec[0].iov_len <= (size_t)r) {
2350 // drain this whole item
2351 r -= vec[0].iov_len;
2352 ++vec;
2353 --veclen;
2354 } else {
2355 vec[0].iov_base = (char *)vec[0].iov_base + r;
2356 vec[0].iov_len -= r;
2357 break;
2358 }
2359 }
2360 }
2361 return 0;
2362}
2363
2364int buffer::list::write_fd(int fd) const
2365{
2366 if (can_zero_copy())
2367 return write_fd_zero_copy(fd);
2368
2369 // use writev!
2370 iovec iov[IOV_MAX];
2371 int iovlen = 0;
2372 ssize_t bytes = 0;
2373
2374 std::list<ptr>::const_iterator p = _buffers.begin();
2375 while (p != _buffers.end()) {
2376 if (p->length() > 0) {
2377 iov[iovlen].iov_base = (void *)p->c_str();
2378 iov[iovlen].iov_len = p->length();
2379 bytes += p->length();
2380 iovlen++;
2381 }
2382 ++p;
2383
2384 if (iovlen == IOV_MAX-1 ||
2385 p == _buffers.end()) {
2386 iovec *start = iov;
2387 int num = iovlen;
2388 ssize_t wrote;
2389 retry:
2390 wrote = ::writev(fd, start, num);
2391 if (wrote < 0) {
2392 int err = errno;
2393 if (err == EINTR)
2394 goto retry;
2395 return -err;
2396 }
2397 if (wrote < bytes) {
2398 // partial write, recover!
2399 while ((size_t)wrote >= start[0].iov_len) {
2400 wrote -= start[0].iov_len;
2401 bytes -= start[0].iov_len;
2402 start++;
2403 num--;
2404 }
2405 if (wrote > 0) {
2406 start[0].iov_len -= wrote;
2407 start[0].iov_base = (char *)start[0].iov_base + wrote;
2408 bytes -= wrote;
2409 }
2410 goto retry;
2411 }
2412 iovlen = 0;
2413 bytes = 0;
2414 }
2415 }
2416 return 0;
2417}
2418
2419int buffer::list::write_fd(int fd, uint64_t offset) const
2420{
2421 iovec iov[IOV_MAX];
2422
2423 std::list<ptr>::const_iterator p = _buffers.begin();
2424 uint64_t left_pbrs = _buffers.size();
2425 while (left_pbrs) {
2426 ssize_t bytes = 0;
2427 unsigned iovlen = 0;
2428 uint64_t size = MIN(left_pbrs, IOV_MAX);
2429 left_pbrs -= size;
2430 while (size > 0) {
2431 iov[iovlen].iov_base = (void *)p->c_str();
2432 iov[iovlen].iov_len = p->length();
2433 iovlen++;
2434 bytes += p->length();
2435 ++p;
2436 size--;
2437 }
2438
2439 int r = do_writev(fd, iov, offset, iovlen, bytes);
2440 if (r < 0)
2441 return r;
2442 offset += bytes;
2443 }
2444 return 0;
2445}
2446
2447int buffer::list::write_fd_zero_copy(int fd) const
2448{
2449 if (!can_zero_copy())
2450 return -ENOTSUP;
2451 /* pass offset to each call to avoid races updating the fd seek
2452 * position, since the I/O may be non-blocking
2453 */
2454 int64_t offset = ::lseek(fd, 0, SEEK_CUR);
2455 int64_t *off_p = &offset;
2456 if (offset < 0 && errno != ESPIPE)
2457 return -errno;
2458 if (errno == ESPIPE)
2459 off_p = NULL;
2460 for (std::list<ptr>::const_iterator it = _buffers.begin();
2461 it != _buffers.end(); ++it) {
2462 int r = it->zero_copy_to_fd(fd, off_p);
2463 if (r < 0)
2464 return r;
2465 if (off_p)
2466 offset += it->length();
2467 }
2468 return 0;
2469}
2470
2471__u32 buffer::list::crc32c(__u32 crc) const
2472{
2473 for (std::list<ptr>::const_iterator it = _buffers.begin();
2474 it != _buffers.end();
2475 ++it) {
2476 if (it->length()) {
2477 raw *r = it->get_raw();
2478 pair<size_t, size_t> ofs(it->offset(), it->offset() + it->length());
2479 pair<uint32_t, uint32_t> ccrc;
2480 if (r->get_crc(ofs, &ccrc)) {
2481 if (ccrc.first == crc) {
2482 // got it already
2483 crc = ccrc.second;
2484 if (buffer_track_crc)
31f18b77 2485 buffer_cached_crc++;
7c673cae
FG
2486 } else {
2487 /* If we have cached crc32c(buf, v) for initial value v,
2488 * we can convert this to a different initial value v' by:
2489 * crc32c(buf, v') = crc32c(buf, v) ^ adjustment
2490 * where adjustment = crc32c(0*len(buf), v ^ v')
2491 *
2492 * http://crcutil.googlecode.com/files/crc-doc.1.0.pdf
2493 * note, u for our crc32c implementation is 0
2494 */
2495 crc = ccrc.second ^ ceph_crc32c(ccrc.first ^ crc, NULL, it->length());
2496 if (buffer_track_crc)
31f18b77 2497 buffer_cached_crc_adjusted++;
7c673cae
FG
2498 }
2499 } else {
2500 if (buffer_track_crc)
31f18b77 2501 buffer_missed_crc++;
7c673cae
FG
2502 uint32_t base = crc;
2503 crc = ceph_crc32c(crc, (unsigned char*)it->c_str(), it->length());
2504 r->set_crc(ofs, make_pair(base, crc));
2505 }
2506 }
2507 }
2508 return crc;
2509}
2510
2511void buffer::list::invalidate_crc()
2512{
2513 for (std::list<ptr>::const_iterator p = _buffers.begin(); p != _buffers.end(); ++p) {
2514 raw *r = p->get_raw();
2515 if (r) {
2516 r->invalidate_crc();
2517 }
2518 }
2519}
2520
2521/**
2522 * Binary write all contents to a C++ stream
2523 */
2524void buffer::list::write_stream(std::ostream &out) const
2525{
2526 for (std::list<ptr>::const_iterator p = _buffers.begin(); p != _buffers.end(); ++p) {
2527 if (p->length() > 0) {
2528 out.write(p->c_str(), p->length());
2529 }
2530 }
2531}
2532
2533
2534void buffer::list::hexdump(std::ostream &out, bool trailing_newline) const
2535{
2536 if (!length())
2537 return;
2538
2539 std::ios_base::fmtflags original_flags = out.flags();
2540
2541 // do our best to match the output of hexdump -C, for better
2542 // diff'ing!
2543
2544 out.setf(std::ios::right);
2545 out.fill('0');
2546
2547 unsigned per = 16;
2548 bool was_zeros = false, did_star = false;
2549 for (unsigned o=0; o<length(); o += per) {
2550 bool row_is_zeros = false;
2551 if (o + per < length()) {
2552 row_is_zeros = true;
2553 for (unsigned i=0; i<per && o+i<length(); i++) {
2554 if ((*this)[o+i]) {
2555 row_is_zeros = false;
2556 }
2557 }
2558 if (row_is_zeros) {
2559 if (was_zeros) {
2560 if (!did_star) {
2561 out << "\n*";
2562 did_star = true;
2563 }
2564 continue;
2565 }
2566 was_zeros = true;
2567 } else {
2568 was_zeros = false;
2569 did_star = false;
2570 }
2571 }
2572 if (o)
2573 out << "\n";
2574 out << std::hex << std::setw(8) << o << " ";
2575
2576 unsigned i;
2577 for (i=0; i<per && o+i<length(); i++) {
2578 if (i == 8)
2579 out << ' ';
2580 out << " " << std::setw(2) << ((unsigned)(*this)[o+i] & 0xff);
2581 }
2582 for (; i<per; i++) {
2583 if (i == 8)
2584 out << ' ';
2585 out << " ";
2586 }
2587
2588 out << " |";
2589 for (i=0; i<per && o+i<length(); i++) {
2590 char c = (*this)[o+i];
2591 if (isupper(c) || islower(c) || isdigit(c) || c == ' ' || ispunct(c))
2592 out << c;
2593 else
2594 out << '.';
2595 }
2596 out << '|' << std::dec;
2597 }
2598 if (trailing_newline) {
2599 out << "\n" << std::hex << std::setw(8) << length();
2600 out << "\n";
2601 }
2602
2603 out.flags(original_flags);
2604}
2605
31f18b77
FG
2606
2607buffer::list buffer::list::static_from_mem(char* c, size_t l) {
2608 list bl;
2609 bl.push_back(ptr(create_static(l, c)));
2610 return bl;
2611}
2612
2613buffer::list buffer::list::static_from_cstring(char* c) {
2614 return static_from_mem(c, std::strlen(c));
2615}
2616
2617buffer::list buffer::list::static_from_string(string& s) {
2618 // C++14 just has string::data return a char* from a non-const
2619 // string.
2620 return static_from_mem(const_cast<char*>(s.data()), s.length());
2621 // But the way buffer::list mostly doesn't work in a sane way with
2622 // const makes me generally sad.
2623}
2624
7c673cae 2625std::ostream& buffer::operator<<(std::ostream& out, const buffer::raw &r) {
31f18b77 2626 return out << "buffer::raw(" << (void*)r.data << " len " << r.len << " nref " << r.nref.load() << ")";
7c673cae
FG
2627}
2628
2629std::ostream& buffer::operator<<(std::ostream& out, const buffer::ptr& bp) {
2630 if (bp.have_raw())
2631 out << "buffer::ptr(" << bp.offset() << "~" << bp.length()
2632 << " " << (void*)bp.c_str()
2633 << " in raw " << (void*)bp.raw_c_str()
2634 << " len " << bp.raw_length()
2635 << " nref " << bp.raw_nref() << ")";
2636 else
2637 out << "buffer:ptr(" << bp.offset() << "~" << bp.length() << " no raw)";
2638 return out;
2639}
2640
2641std::ostream& buffer::operator<<(std::ostream& out, const buffer::list& bl) {
2642 out << "buffer::list(len=" << bl.length() << "," << std::endl;
2643
2644 std::list<buffer::ptr>::const_iterator it = bl.buffers().begin();
2645 while (it != bl.buffers().end()) {
2646 out << "\t" << *it;
2647 if (++it == bl.buffers().end()) break;
2648 out << "," << std::endl;
2649 }
2650 out << std::endl << ")";
2651 return out;
2652}
2653
2654std::ostream& buffer::operator<<(std::ostream& out, const buffer::error& e)
2655{
2656 return out << e.what();
2657}
2658
2659MEMPOOL_DEFINE_OBJECT_FACTORY(buffer::raw_malloc, buffer_raw_malloc,
2660 buffer_meta);
2661MEMPOOL_DEFINE_OBJECT_FACTORY(buffer::raw_mmap_pages, buffer_raw_mmap_pagse,
2662 buffer_meta);
2663MEMPOOL_DEFINE_OBJECT_FACTORY(buffer::raw_posix_aligned,
2664 buffer_raw_posix_aligned, buffer_meta);
2665#ifdef CEPH_HAVE_SPLICE
2666MEMPOOL_DEFINE_OBJECT_FACTORY(buffer::raw_pipe, buffer_raw_pipe, buffer_meta);
2667#endif
2668MEMPOOL_DEFINE_OBJECT_FACTORY(buffer::raw_char, buffer_raw_char, buffer_meta);
31f18b77
FG
2669MEMPOOL_DEFINE_OBJECT_FACTORY(buffer::raw_claimed_char, buffer_raw_claimed_char,
2670 buffer_meta);
7c673cae
FG
2671MEMPOOL_DEFINE_OBJECT_FACTORY(buffer::raw_unshareable, buffer_raw_unshareable,
2672 buffer_meta);
2673MEMPOOL_DEFINE_OBJECT_FACTORY(buffer::raw_static, buffer_raw_static,
2674 buffer_meta);
2675