]> git.proxmox.com Git - ceph.git/blob - ceph/src/common/buffer.cc
update sources to v12.1.3
[ceph.git] / ceph / src / common / buffer.cc
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3 /*
4 * Ceph - scalable distributed file system
5 *
6 * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
7 *
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
12 *
13 */
14
15 #include <atomic>
16 #include <errno.h>
17 #include <limits.h>
18
19 #include <sys/uio.h>
20
21 #include "include/compat.h"
22 #include "include/mempool.h"
23 #include "armor.h"
24 #include "common/environment.h"
25 #include "common/errno.h"
26 #include "common/safe_io.h"
27 #include "common/simple_spin.h"
28 #include "common/strtol.h"
29 #include "common/likely.h"
30 #include "common/valgrind.h"
31 #include "common/deleter.h"
32 #include "common/RWLock.h"
33 #include "include/types.h"
34 #include "include/scope_guard.h"
35
36 #if defined(HAVE_XIO)
37 #include "msg/xio/XioMsg.h"
38 #endif
39
40 using namespace ceph;
41
42 #define CEPH_BUFFER_ALLOC_UNIT (MIN(CEPH_PAGE_SIZE, 4096))
43 #define CEPH_BUFFER_APPEND_SIZE (CEPH_BUFFER_ALLOC_UNIT - sizeof(raw_combined))
44
45 #ifdef BUFFER_DEBUG
46 static std::atomic_flag buffer_debug_lock = ATOMIC_FLAG_INIT;
47 # define bdout { simple_spin_lock(&buffer_debug_lock); std::cout
48 # define bendl std::endl; simple_spin_unlock(&buffer_debug_lock); }
49 #else
50 # define bdout if (0) { std::cout
51 # define bendl std::endl; }
52 #endif
53
54 static std::atomic<uint64_t> buffer_total_alloc { 0 };
55 static std::atomic<uint64_t> buffer_history_alloc_bytes { 0 };
56 static std::atomic<uint64_t> buffer_history_alloc_num { 0 };
57
58 const bool buffer_track_alloc = get_env_bool("CEPH_BUFFER_TRACK");
59
60 namespace {
61 void inc_total_alloc(unsigned len) {
62 if (buffer_track_alloc)
63 buffer_total_alloc += len;
64 }
65
66 void dec_total_alloc(unsigned len) {
67 if (buffer_track_alloc)
68 buffer_total_alloc -= len;
69 }
70
71 void inc_history_alloc(uint64_t len) {
72 if (buffer_track_alloc) {
73 buffer_history_alloc_bytes += len;
74 buffer_history_alloc_num++;
75 }
76 }
77 } // namespace
78
79 int buffer::get_total_alloc() {
80 return buffer_total_alloc;
81 }
82 uint64_t buffer::get_history_alloc_bytes() {
83 return buffer_history_alloc_bytes;
84 }
85 uint64_t buffer::get_history_alloc_num() {
86 return buffer_history_alloc_num;
87 }
88
89 static std::atomic<unsigned> buffer_cached_crc { 0 };
90 static std::atomic<unsigned> buffer_cached_crc_adjusted { 0 };
91 static std::atomic<unsigned> buffer_missed_crc { 0 };
92
93 static bool buffer_track_crc = get_env_bool("CEPH_BUFFER_TRACK");
94
95 void buffer::track_cached_crc(bool b) {
96 buffer_track_crc = b;
97 }
98 int buffer::get_cached_crc() {
99 return buffer_cached_crc;
100 }
101 int buffer::get_cached_crc_adjusted() {
102 return buffer_cached_crc_adjusted;
103 }
104
105 int buffer::get_missed_crc() {
106 return buffer_missed_crc;
107 }
108
109 static std::atomic<unsigned> buffer_c_str_accesses { 0 };
110
111 static bool buffer_track_c_str = get_env_bool("CEPH_BUFFER_TRACK");
112
113 void buffer::track_c_str(bool b) {
114 buffer_track_c_str = b;
115 }
116 int buffer::get_c_str_accesses() {
117 return buffer_c_str_accesses;
118 }
119
120 #ifdef CEPH_HAVE_SETPIPE_SZ
121 static std::atomic<unsigned> buffer_max_pipe_size { 0 };
122 int update_max_pipe_size() {
123 char buf[32];
124 int r;
125 std::string err;
126 struct stat stat_result;
127 if (::stat(PROCPREFIX "/proc/sys/fs/pipe-max-size", &stat_result) == -1)
128 return -errno;
129 r = safe_read_file(PROCPREFIX "/proc/sys/fs/", "pipe-max-size",
130 buf, sizeof(buf) - 1);
131 if (r < 0)
132 return r;
133 buf[r] = '\0';
134 size_t size = strict_strtol(buf, 10, &err);
135 if (!err.empty())
136 return -EIO;
137 buffer_max_pipe_size = size;
138 return 0;
139 }
140
141 size_t get_max_pipe_size() {
142 size_t size = buffer_max_pipe_size;
143 if (size)
144 return size;
145 if (update_max_pipe_size() == 0)
146 return buffer_max_pipe_size;
147 // this is the max size hardcoded in linux before 2.6.35
148 return 65536;
149 }
150 #else
151 size_t get_max_pipe_size() { return 65536; }
152 #endif
153
154
155 const char * buffer::error::what() const throw () {
156 return "buffer::exception";
157 }
158 const char * buffer::bad_alloc::what() const throw () {
159 return "buffer::bad_alloc";
160 }
161 const char * buffer::end_of_buffer::what() const throw () {
162 return "buffer::end_of_buffer";
163 }
164 const char * buffer::malformed_input::what() const throw () {
165 return buf;
166 }
167 buffer::error_code::error_code(int error) :
168 buffer::malformed_input(cpp_strerror(error).c_str()), code(error) {}
169
170 class buffer::raw {
171 public:
172 char *data;
173 unsigned len;
174 std::atomic<unsigned> nref { 0 };
175 int mempool = mempool::mempool_buffer_anon;
176
177 mutable std::atomic_flag crc_spinlock = ATOMIC_FLAG_INIT;
178 map<pair<size_t, size_t>, pair<uint32_t, uint32_t> > crc_map;
179
180 explicit raw(unsigned l)
181 : data(NULL), len(l), nref(0) {
182 mempool::get_pool(mempool::pool_index_t(mempool)).adjust_count(1, len);
183 }
184 raw(char *c, unsigned l)
185 : data(c), len(l), nref(0) {
186 mempool::get_pool(mempool::pool_index_t(mempool)).adjust_count(1, len);
187 }
188 virtual ~raw() {
189 mempool::get_pool(mempool::pool_index_t(mempool)).adjust_count(
190 -1, -(int)len);
191 }
192
193 void _set_len(unsigned l) {
194 mempool::get_pool(mempool::pool_index_t(mempool)).adjust_count(
195 -1, -(int)len);
196 len = l;
197 mempool::get_pool(mempool::pool_index_t(mempool)).adjust_count(1, len);
198 }
199
200 void reassign_to_mempool(int pool) {
201 if (pool == mempool) {
202 return;
203 }
204 mempool::get_pool(mempool::pool_index_t(mempool)).adjust_count(
205 -1, -(int)len);
206 mempool = pool;
207 mempool::get_pool(mempool::pool_index_t(pool)).adjust_count(1, len);
208 }
209
210 void try_assign_to_mempool(int pool) {
211 if (mempool == mempool::mempool_buffer_anon) {
212 reassign_to_mempool(pool);
213 }
214 }
215
216 // no copying.
217 // cppcheck-suppress noExplicitConstructor
218 raw(const raw &other);
219 const raw& operator=(const raw &other);
220
221 virtual char *get_data() {
222 return data;
223 }
224 virtual raw* clone_empty() = 0;
225 raw *clone() {
226 raw *c = clone_empty();
227 memcpy(c->data, data, len);
228 return c;
229 }
230 virtual bool can_zero_copy() const {
231 return false;
232 }
233 virtual int zero_copy_to_fd(int fd, loff_t *offset) {
234 return -ENOTSUP;
235 }
236 virtual bool is_page_aligned() {
237 return ((long)data & ~CEPH_PAGE_MASK) == 0;
238 }
239 bool is_n_page_sized() {
240 return (len & ~CEPH_PAGE_MASK) == 0;
241 }
242 virtual bool is_shareable() {
243 // true if safe to reference/share the existing buffer copy
244 // false if it is not safe to share the buffer, e.g., due to special
245 // and/or registered memory that is scarce
246 return true;
247 }
248 bool get_crc(const pair<size_t, size_t> &fromto,
249 pair<uint32_t, uint32_t> *crc) const {
250 simple_spin_lock(&crc_spinlock);
251 map<pair<size_t, size_t>, pair<uint32_t, uint32_t> >::const_iterator i =
252 crc_map.find(fromto);
253 if (i == crc_map.end()) {
254 simple_spin_unlock(&crc_spinlock);
255 return false;
256 }
257 *crc = i->second;
258 simple_spin_unlock(&crc_spinlock);
259 return true;
260 }
261 void set_crc(const pair<size_t, size_t> &fromto,
262 const pair<uint32_t, uint32_t> &crc) {
263 simple_spin_lock(&crc_spinlock);
264 crc_map[fromto] = crc;
265 simple_spin_unlock(&crc_spinlock);
266 }
267 void invalidate_crc() {
268 simple_spin_lock(&crc_spinlock);
269 if (crc_map.size() != 0) {
270 crc_map.clear();
271 }
272 simple_spin_unlock(&crc_spinlock);
273 }
274 };
275
276 /*
277 * raw_combined is always placed within a single allocation along
278 * with the data buffer. the data goes at the beginning, and
279 * raw_combined at the end.
280 */
281 class buffer::raw_combined : public buffer::raw {
282 size_t alignment;
283 public:
284 raw_combined(char *dataptr, unsigned l, unsigned align=0)
285 : raw(dataptr, l),
286 alignment(align) {
287 inc_total_alloc(len);
288 inc_history_alloc(len);
289 }
290 ~raw_combined() override {
291 dec_total_alloc(len);
292 }
293 raw* clone_empty() override {
294 return create(len, alignment);
295 }
296
297 static raw_combined *create(unsigned len, unsigned align=0) {
298 if (!align)
299 align = sizeof(size_t);
300 size_t rawlen = ROUND_UP_TO(sizeof(buffer::raw_combined),
301 alignof(buffer::raw_combined));
302 size_t datalen = ROUND_UP_TO(len, alignof(buffer::raw_combined));
303
304 #ifdef DARWIN
305 char *ptr = (char *) valloc(rawlen + datalen);
306 #else
307 char *ptr = 0;
308 int r = ::posix_memalign((void**)(void*)&ptr, align, rawlen + datalen);
309 if (r)
310 throw bad_alloc();
311 #endif /* DARWIN */
312 if (!ptr)
313 throw bad_alloc();
314
315 // actual data first, since it has presumably larger alignment restriction
316 // then put the raw_combined at the end
317 return new (ptr + datalen) raw_combined(ptr, len, align);
318 }
319
320 static void operator delete(void *ptr) {
321 raw_combined *raw = (raw_combined *)ptr;
322 ::free((void *)raw->data);
323 }
324 };
325
326 class buffer::raw_malloc : public buffer::raw {
327 public:
328 MEMPOOL_CLASS_HELPERS();
329
330 explicit raw_malloc(unsigned l) : raw(l) {
331 if (len) {
332 data = (char *)malloc(len);
333 if (!data)
334 throw bad_alloc();
335 } else {
336 data = 0;
337 }
338 inc_total_alloc(len);
339 inc_history_alloc(len);
340 bdout << "raw_malloc " << this << " alloc " << (void *)data << " " << l << " " << buffer::get_total_alloc() << bendl;
341 }
342 raw_malloc(unsigned l, char *b) : raw(b, l) {
343 inc_total_alloc(len);
344 bdout << "raw_malloc " << this << " alloc " << (void *)data << " " << l << " " << buffer::get_total_alloc() << bendl;
345 }
346 ~raw_malloc() override {
347 free(data);
348 dec_total_alloc(len);
349 bdout << "raw_malloc " << this << " free " << (void *)data << " " << buffer::get_total_alloc() << bendl;
350 }
351 raw* clone_empty() override {
352 return new raw_malloc(len);
353 }
354 };
355
356 #ifndef __CYGWIN__
357 class buffer::raw_mmap_pages : public buffer::raw {
358 public:
359 MEMPOOL_CLASS_HELPERS();
360
361 explicit raw_mmap_pages(unsigned l) : raw(l) {
362 data = (char*)::mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANON, -1, 0);
363 if (!data)
364 throw bad_alloc();
365 inc_total_alloc(len);
366 inc_history_alloc(len);
367 bdout << "raw_mmap " << this << " alloc " << (void *)data << " " << l << " " << buffer::get_total_alloc() << bendl;
368 }
369 ~raw_mmap_pages() override {
370 ::munmap(data, len);
371 dec_total_alloc(len);
372 bdout << "raw_mmap " << this << " free " << (void *)data << " " << buffer::get_total_alloc() << bendl;
373 }
374 raw* clone_empty() override {
375 return new raw_mmap_pages(len);
376 }
377 };
378
379 class buffer::raw_posix_aligned : public buffer::raw {
380 unsigned align;
381 public:
382 MEMPOOL_CLASS_HELPERS();
383
384 raw_posix_aligned(unsigned l, unsigned _align) : raw(l) {
385 align = _align;
386 assert((align >= sizeof(void *)) && (align & (align - 1)) == 0);
387 #ifdef DARWIN
388 data = (char *) valloc(len);
389 #else
390 int r = ::posix_memalign((void**)(void*)&data, align, len);
391 if (r)
392 throw bad_alloc();
393 #endif /* DARWIN */
394 if (!data)
395 throw bad_alloc();
396 inc_total_alloc(len);
397 inc_history_alloc(len);
398 bdout << "raw_posix_aligned " << this << " alloc " << (void *)data << " l=" << l << ", align=" << align << " total_alloc=" << buffer::get_total_alloc() << bendl;
399 }
400 ~raw_posix_aligned() override {
401 ::free(data);
402 dec_total_alloc(len);
403 bdout << "raw_posix_aligned " << this << " free " << (void *)data << " " << buffer::get_total_alloc() << bendl;
404 }
405 raw* clone_empty() override {
406 return new raw_posix_aligned(len, align);
407 }
408 };
409 #endif
410
411 #ifdef __CYGWIN__
412 class buffer::raw_hack_aligned : public buffer::raw {
413 unsigned align;
414 char *realdata;
415 public:
416 raw_hack_aligned(unsigned l, unsigned _align) : raw(l) {
417 align = _align;
418 realdata = new char[len+align-1];
419 unsigned off = ((unsigned)realdata) & (align-1);
420 if (off)
421 data = realdata + align - off;
422 else
423 data = realdata;
424 inc_total_alloc(len+align-1);
425 inc_history_alloc(len+align-1);
426 //cout << "hack aligned " << (unsigned)data
427 //<< " in raw " << (unsigned)realdata
428 //<< " off " << off << std::endl;
429 assert(((unsigned)data & (align-1)) == 0);
430 }
431 ~raw_hack_aligned() {
432 delete[] realdata;
433 dec_total_alloc(len+align-1);
434 }
435 raw* clone_empty() {
436 return new raw_hack_aligned(len, align);
437 }
438 };
439 #endif
440
441 #ifdef CEPH_HAVE_SPLICE
442 class buffer::raw_pipe : public buffer::raw {
443 public:
444 MEMPOOL_CLASS_HELPERS();
445
446 explicit raw_pipe(unsigned len) : raw(len), source_consumed(false) {
447 size_t max = get_max_pipe_size();
448 if (len > max) {
449 bdout << "raw_pipe: requested length " << len
450 << " > max length " << max << bendl;
451 throw malformed_input("length larger than max pipe size");
452 }
453 pipefds[0] = -1;
454 pipefds[1] = -1;
455
456 int r;
457 if (::pipe(pipefds) == -1) {
458 r = -errno;
459 bdout << "raw_pipe: error creating pipe: " << cpp_strerror(r) << bendl;
460 throw error_code(r);
461 }
462
463 r = set_nonblocking(pipefds);
464 if (r < 0) {
465 bdout << "raw_pipe: error setting nonblocking flag on temp pipe: "
466 << cpp_strerror(r) << bendl;
467 throw error_code(r);
468 }
469
470 r = set_pipe_size(pipefds, len);
471 if (r < 0) {
472 bdout << "raw_pipe: could not set pipe size" << bendl;
473 // continue, since the pipe should become large enough as needed
474 }
475
476 inc_total_alloc(len);
477 inc_history_alloc(len);
478 bdout << "raw_pipe " << this << " alloc " << len << " "
479 << buffer::get_total_alloc() << bendl;
480 }
481
482 ~raw_pipe() override {
483 if (data)
484 free(data);
485 close_pipe(pipefds);
486 dec_total_alloc(len);
487 bdout << "raw_pipe " << this << " free " << (void *)data << " "
488 << buffer::get_total_alloc() << bendl;
489 }
490
491 bool can_zero_copy() const override {
492 return true;
493 }
494
495 int set_source(int fd, loff_t *off) {
496 int flags = SPLICE_F_NONBLOCK;
497 ssize_t r = safe_splice(fd, off, pipefds[1], NULL, len, flags);
498 if (r < 0) {
499 bdout << "raw_pipe: error splicing into pipe: " << cpp_strerror(r)
500 << bendl;
501 return r;
502 }
503 // update length with actual amount read
504 _set_len(r);
505 return 0;
506 }
507
508 int zero_copy_to_fd(int fd, loff_t *offset) override {
509 assert(!source_consumed);
510 int flags = SPLICE_F_NONBLOCK;
511 ssize_t r = safe_splice_exact(pipefds[0], NULL, fd, offset, len, flags);
512 if (r < 0) {
513 bdout << "raw_pipe: error splicing from pipe to fd: "
514 << cpp_strerror(r) << bendl;
515 return r;
516 }
517 source_consumed = true;
518 return 0;
519 }
520
521 buffer::raw* clone_empty() override {
522 // cloning doesn't make sense for pipe-based buffers,
523 // and is only used by unit tests for other types of buffers
524 return NULL;
525 }
526
527 char *get_data() override {
528 if (data)
529 return data;
530 return copy_pipe(pipefds);
531 }
532
533 private:
534 int set_pipe_size(int *fds, long length) {
535 #ifdef CEPH_HAVE_SETPIPE_SZ
536 if (::fcntl(fds[1], F_SETPIPE_SZ, length) == -1) {
537 int r = -errno;
538 if (r == -EPERM) {
539 // pipe limit must have changed - EPERM means we requested
540 // more than the maximum size as an unprivileged user
541 update_max_pipe_size();
542 throw malformed_input("length larger than new max pipe size");
543 }
544 return r;
545 }
546 #endif
547 return 0;
548 }
549
550 int set_nonblocking(int *fds) {
551 if (::fcntl(fds[0], F_SETFL, O_NONBLOCK) == -1)
552 return -errno;
553 if (::fcntl(fds[1], F_SETFL, O_NONBLOCK) == -1)
554 return -errno;
555 return 0;
556 }
557
558 static void close_pipe(const int *fds) {
559 if (fds[0] >= 0)
560 VOID_TEMP_FAILURE_RETRY(::close(fds[0]));
561 if (fds[1] >= 0)
562 VOID_TEMP_FAILURE_RETRY(::close(fds[1]));
563 }
564 char *copy_pipe(int *fds) {
565 /* preserve original pipe contents by copying into a temporary
566 * pipe before reading.
567 */
568 int tmpfd[2];
569 int r;
570
571 assert(!source_consumed);
572 assert(fds[0] >= 0);
573
574 if (::pipe(tmpfd) == -1) {
575 r = -errno;
576 bdout << "raw_pipe: error creating temp pipe: " << cpp_strerror(r)
577 << bendl;
578 throw error_code(r);
579 }
580 auto sg = make_scope_guard([=] { close_pipe(tmpfd); });
581 r = set_nonblocking(tmpfd);
582 if (r < 0) {
583 bdout << "raw_pipe: error setting nonblocking flag on temp pipe: "
584 << cpp_strerror(r) << bendl;
585 throw error_code(r);
586 }
587 r = set_pipe_size(tmpfd, len);
588 if (r < 0) {
589 bdout << "raw_pipe: error setting pipe size on temp pipe: "
590 << cpp_strerror(r) << bendl;
591 }
592 int flags = SPLICE_F_NONBLOCK;
593 if (::tee(fds[0], tmpfd[1], len, flags) == -1) {
594 r = errno;
595 bdout << "raw_pipe: error tee'ing into temp pipe: " << cpp_strerror(r)
596 << bendl;
597 throw error_code(r);
598 }
599 data = (char *)malloc(len);
600 if (!data) {
601 throw bad_alloc();
602 }
603 r = safe_read(tmpfd[0], data, len);
604 if (r < (ssize_t)len) {
605 bdout << "raw_pipe: error reading from temp pipe:" << cpp_strerror(r)
606 << bendl;
607 free(data);
608 data = NULL;
609 throw error_code(r);
610 }
611 return data;
612 }
613 bool source_consumed;
614 int pipefds[2];
615 };
616 #endif // CEPH_HAVE_SPLICE
617
618 /*
619 * primitive buffer types
620 */
621 class buffer::raw_char : public buffer::raw {
622 public:
623 MEMPOOL_CLASS_HELPERS();
624
625 explicit raw_char(unsigned l) : raw(l) {
626 if (len)
627 data = new char[len];
628 else
629 data = 0;
630 inc_total_alloc(len);
631 inc_history_alloc(len);
632 bdout << "raw_char " << this << " alloc " << (void *)data << " " << l << " " << buffer::get_total_alloc() << bendl;
633 }
634 raw_char(unsigned l, char *b) : raw(b, l) {
635 inc_total_alloc(len);
636 bdout << "raw_char " << this << " alloc " << (void *)data << " " << l << " " << buffer::get_total_alloc() << bendl;
637 }
638 ~raw_char() override {
639 delete[] data;
640 dec_total_alloc(len);
641 bdout << "raw_char " << this << " free " << (void *)data << " " << buffer::get_total_alloc() << bendl;
642 }
643 raw* clone_empty() override {
644 return new raw_char(len);
645 }
646 };
647
648 class buffer::raw_claimed_char : public buffer::raw {
649 public:
650 MEMPOOL_CLASS_HELPERS();
651
652 explicit raw_claimed_char(unsigned l, char *b) : raw(b, l) {
653 inc_total_alloc(len);
654 bdout << "raw_claimed_char " << this << " alloc " << (void *)data
655 << " " << l << " " << buffer::get_total_alloc() << bendl;
656 }
657 ~raw_claimed_char() override {
658 dec_total_alloc(len);
659 bdout << "raw_claimed_char " << this << " free " << (void *)data
660 << " " << buffer::get_total_alloc() << bendl;
661 }
662 raw* clone_empty() override {
663 return new raw_char(len);
664 }
665 };
666
667 class buffer::raw_unshareable : public buffer::raw {
668 public:
669 MEMPOOL_CLASS_HELPERS();
670
671 explicit raw_unshareable(unsigned l) : raw(l) {
672 if (len)
673 data = new char[len];
674 else
675 data = 0;
676 }
677 raw_unshareable(unsigned l, char *b) : raw(b, l) {
678 }
679 raw* clone_empty() override {
680 return new raw_char(len);
681 }
682 bool is_shareable() override {
683 return false; // !shareable, will force make_shareable()
684 }
685 ~raw_unshareable() override {
686 delete[] data;
687 }
688 };
689
690 class buffer::raw_static : public buffer::raw {
691 public:
692 MEMPOOL_CLASS_HELPERS();
693
694 raw_static(const char *d, unsigned l) : raw((char*)d, l) { }
695 ~raw_static() override {}
696 raw* clone_empty() override {
697 return new buffer::raw_char(len);
698 }
699 };
700
701 class buffer::raw_claim_buffer : public buffer::raw {
702 deleter del;
703 public:
704 raw_claim_buffer(const char *b, unsigned l, deleter d)
705 : raw((char*)b, l), del(std::move(d)) { }
706 ~raw_claim_buffer() override {}
707 raw* clone_empty() override {
708 return new buffer::raw_char(len);
709 }
710 };
711
712 #if defined(HAVE_XIO)
713 class buffer::xio_msg_buffer : public buffer::raw {
714 private:
715 XioDispatchHook* m_hook;
716 public:
717 xio_msg_buffer(XioDispatchHook* _m_hook, const char *d,
718 unsigned l) :
719 raw((char*)d, l), m_hook(_m_hook->get()) {}
720
721 bool is_shareable() { return false; }
722 static void operator delete(void *p)
723 {
724 xio_msg_buffer *buf = static_cast<xio_msg_buffer*>(p);
725 // return hook ref (counts against pool); it appears illegal
726 // to do this in our dtor, because this fires after that
727 buf->m_hook->put();
728 }
729 raw* clone_empty() {
730 return new buffer::raw_char(len);
731 }
732 };
733
734 class buffer::xio_mempool : public buffer::raw {
735 public:
736 struct xio_reg_mem *mp;
737 xio_mempool(struct xio_reg_mem *_mp, unsigned l) :
738 raw((char*)_mp->addr, l), mp(_mp)
739 { }
740 ~xio_mempool() {}
741 raw* clone_empty() {
742 return new buffer::raw_char(len);
743 }
744 };
745
746 struct xio_reg_mem* get_xio_mp(const buffer::ptr& bp)
747 {
748 buffer::xio_mempool *mb = dynamic_cast<buffer::xio_mempool*>(bp.get_raw());
749 if (mb) {
750 return mb->mp;
751 }
752 return NULL;
753 }
754
755 buffer::raw* buffer::create_msg(
756 unsigned len, char *buf, XioDispatchHook* m_hook) {
757 XioPool& pool = m_hook->get_pool();
758 buffer::raw* bp =
759 static_cast<buffer::raw*>(pool.alloc(sizeof(xio_msg_buffer)));
760 new (bp) xio_msg_buffer(m_hook, buf, len);
761 return bp;
762 }
763 #endif /* HAVE_XIO */
764
765 buffer::raw* buffer::copy(const char *c, unsigned len) {
766 raw* r = buffer::create_aligned(len, sizeof(size_t));
767 memcpy(r->data, c, len);
768 return r;
769 }
770
771 buffer::raw* buffer::create(unsigned len) {
772 return buffer::create_aligned(len, sizeof(size_t));
773 }
774 buffer::raw* buffer::claim_char(unsigned len, char *buf) {
775 return new raw_claimed_char(len, buf);
776 }
777 buffer::raw* buffer::create_malloc(unsigned len) {
778 return new raw_malloc(len);
779 }
780 buffer::raw* buffer::claim_malloc(unsigned len, char *buf) {
781 return new raw_malloc(len, buf);
782 }
783 buffer::raw* buffer::create_static(unsigned len, char *buf) {
784 return new raw_static(buf, len);
785 }
786 buffer::raw* buffer::claim_buffer(unsigned len, char *buf, deleter del) {
787 return new raw_claim_buffer(buf, len, std::move(del));
788 }
789
790 buffer::raw* buffer::create_aligned(unsigned len, unsigned align) {
791 // If alignment is a page multiple, use a separate buffer::raw to
792 // avoid fragmenting the heap.
793 //
794 // Somewhat unexpectedly, I see consistently better performance
795 // from raw_combined than from raw even when the allocation size is
796 // a page multiple (but alignment is not).
797 //
798 // I also see better performance from a separate buffer::raw once the
799 // size passes 8KB.
800 if ((align & ~CEPH_PAGE_MASK) == 0 ||
801 len >= CEPH_PAGE_SIZE * 2) {
802 #ifndef __CYGWIN__
803 return new raw_posix_aligned(len, align);
804 #else
805 return new raw_hack_aligned(len, align);
806 #endif
807 }
808 return raw_combined::create(len, align);
809 }
810
811 buffer::raw* buffer::create_page_aligned(unsigned len) {
812 return create_aligned(len, CEPH_PAGE_SIZE);
813 }
814
815 buffer::raw* buffer::create_zero_copy(unsigned len, int fd, int64_t *offset) {
816 #ifdef CEPH_HAVE_SPLICE
817 buffer::raw_pipe* buf = new raw_pipe(len);
818 int r = buf->set_source(fd, (loff_t*)offset);
819 if (r < 0) {
820 delete buf;
821 throw error_code(r);
822 }
823 return buf;
824 #else
825 throw error_code(-ENOTSUP);
826 #endif
827 }
828
829 buffer::raw* buffer::create_unshareable(unsigned len) {
830 return new raw_unshareable(len);
831 }
832
833 buffer::ptr::ptr(raw *r) : _raw(r), _off(0), _len(r->len) // no lock needed; this is an unref raw.
834 {
835 r->nref++;
836 bdout << "ptr " << this << " get " << _raw << bendl;
837 }
838 buffer::ptr::ptr(unsigned l) : _off(0), _len(l)
839 {
840 _raw = create(l);
841 _raw->nref++;
842 bdout << "ptr " << this << " get " << _raw << bendl;
843 }
844 buffer::ptr::ptr(const char *d, unsigned l) : _off(0), _len(l) // ditto.
845 {
846 _raw = copy(d, l);
847 _raw->nref++;
848 bdout << "ptr " << this << " get " << _raw << bendl;
849 }
850 buffer::ptr::ptr(const ptr& p) : _raw(p._raw), _off(p._off), _len(p._len)
851 {
852 if (_raw) {
853 _raw->nref++;
854 bdout << "ptr " << this << " get " << _raw << bendl;
855 }
856 }
857 buffer::ptr::ptr(ptr&& p) noexcept : _raw(p._raw), _off(p._off), _len(p._len)
858 {
859 p._raw = nullptr;
860 p._off = p._len = 0;
861 }
862 buffer::ptr::ptr(const ptr& p, unsigned o, unsigned l)
863 : _raw(p._raw), _off(p._off + o), _len(l)
864 {
865 assert(o+l <= p._len);
866 assert(_raw);
867 _raw->nref++;
868 bdout << "ptr " << this << " get " << _raw << bendl;
869 }
870 buffer::ptr& buffer::ptr::operator= (const ptr& p)
871 {
872 if (p._raw) {
873 p._raw->nref++;
874 bdout << "ptr " << this << " get " << _raw << bendl;
875 }
876 buffer::raw *raw = p._raw;
877 release();
878 if (raw) {
879 _raw = raw;
880 _off = p._off;
881 _len = p._len;
882 } else {
883 _off = _len = 0;
884 }
885 return *this;
886 }
887 buffer::ptr& buffer::ptr::operator= (ptr&& p) noexcept
888 {
889 release();
890 buffer::raw *raw = p._raw;
891 if (raw) {
892 _raw = raw;
893 _off = p._off;
894 _len = p._len;
895 p._raw = nullptr;
896 p._off = p._len = 0;
897 } else {
898 _off = _len = 0;
899 }
900 return *this;
901 }
902
903 buffer::raw *buffer::ptr::clone()
904 {
905 return _raw->clone();
906 }
907
908 buffer::ptr& buffer::ptr::make_shareable() {
909 if (_raw && !_raw->is_shareable()) {
910 buffer::raw *tr = _raw;
911 _raw = tr->clone();
912 _raw->nref = 1;
913 if (unlikely(--tr->nref == 0)) {
914 ANNOTATE_HAPPENS_AFTER(&tr->nref);
915 ANNOTATE_HAPPENS_BEFORE_FORGET_ALL(&tr->nref);
916 delete tr;
917 } else {
918 ANNOTATE_HAPPENS_BEFORE(&tr->nref);
919 }
920 }
921 return *this;
922 }
923
924 void buffer::ptr::swap(ptr& other)
925 {
926 raw *r = _raw;
927 unsigned o = _off;
928 unsigned l = _len;
929 _raw = other._raw;
930 _off = other._off;
931 _len = other._len;
932 other._raw = r;
933 other._off = o;
934 other._len = l;
935 }
936
937 void buffer::ptr::release()
938 {
939 if (_raw) {
940 bdout << "ptr " << this << " release " << _raw << bendl;
941 if (--_raw->nref == 0) {
942 //cout << "hosing raw " << (void*)_raw << " len " << _raw->len << std::endl;
943 ANNOTATE_HAPPENS_AFTER(&_raw->nref);
944 ANNOTATE_HAPPENS_BEFORE_FORGET_ALL(&_raw->nref);
945 delete _raw; // dealloc old (if any)
946 } else {
947 ANNOTATE_HAPPENS_BEFORE(&_raw->nref);
948 }
949 _raw = 0;
950 }
951 }
952
953 bool buffer::ptr::at_buffer_tail() const { return _off + _len == _raw->len; }
954
955 const char *buffer::ptr::c_str() const {
956 assert(_raw);
957 if (buffer_track_c_str)
958 buffer_c_str_accesses++;
959 return _raw->get_data() + _off;
960 }
961 char *buffer::ptr::c_str() {
962 assert(_raw);
963 if (buffer_track_c_str)
964 buffer_c_str_accesses++;
965 return _raw->get_data() + _off;
966 }
967 const char *buffer::ptr::end_c_str() const {
968 assert(_raw);
969 if (buffer_track_c_str)
970 buffer_c_str_accesses++;
971 return _raw->get_data() + _off + _len;
972 }
973 char *buffer::ptr::end_c_str() {
974 assert(_raw);
975 if (buffer_track_c_str)
976 buffer_c_str_accesses++;
977 return _raw->get_data() + _off + _len;
978 }
979
980 unsigned buffer::ptr::unused_tail_length() const
981 {
982 if (_raw)
983 return _raw->len - (_off+_len);
984 else
985 return 0;
986 }
987 const char& buffer::ptr::operator[](unsigned n) const
988 {
989 assert(_raw);
990 assert(n < _len);
991 return _raw->get_data()[_off + n];
992 }
993 char& buffer::ptr::operator[](unsigned n)
994 {
995 assert(_raw);
996 assert(n < _len);
997 return _raw->get_data()[_off + n];
998 }
999
1000 const char *buffer::ptr::raw_c_str() const { assert(_raw); return _raw->data; }
1001 unsigned buffer::ptr::raw_length() const { assert(_raw); return _raw->len; }
1002 int buffer::ptr::raw_nref() const { assert(_raw); return _raw->nref; }
1003
1004 void buffer::ptr::copy_out(unsigned o, unsigned l, char *dest) const {
1005 assert(_raw);
1006 if (o+l > _len)
1007 throw end_of_buffer();
1008 char* src = _raw->data + _off + o;
1009 maybe_inline_memcpy(dest, src, l, 8);
1010 }
1011
1012 unsigned buffer::ptr::wasted() const
1013 {
1014 return _raw->len - _len;
1015 }
1016
1017 int buffer::ptr::cmp(const ptr& o) const
1018 {
1019 int l = _len < o._len ? _len : o._len;
1020 if (l) {
1021 int r = memcmp(c_str(), o.c_str(), l);
1022 if (r)
1023 return r;
1024 }
1025 if (_len < o._len)
1026 return -1;
1027 if (_len > o._len)
1028 return 1;
1029 return 0;
1030 }
1031
1032 bool buffer::ptr::is_zero() const
1033 {
1034 return mem_is_zero(c_str(), _len);
1035 }
1036
1037 unsigned buffer::ptr::append(char c)
1038 {
1039 assert(_raw);
1040 assert(1 <= unused_tail_length());
1041 char* ptr = _raw->data + _off + _len;
1042 *ptr = c;
1043 _len++;
1044 return _len + _off;
1045 }
1046
1047 unsigned buffer::ptr::append(const char *p, unsigned l)
1048 {
1049 assert(_raw);
1050 assert(l <= unused_tail_length());
1051 char* c = _raw->data + _off + _len;
1052 maybe_inline_memcpy(c, p, l, 32);
1053 _len += l;
1054 return _len + _off;
1055 }
1056
1057 void buffer::ptr::copy_in(unsigned o, unsigned l, const char *src)
1058 {
1059 copy_in(o, l, src, true);
1060 }
1061
1062 void buffer::ptr::copy_in(unsigned o, unsigned l, const char *src, bool crc_reset)
1063 {
1064 assert(_raw);
1065 assert(o <= _len);
1066 assert(o+l <= _len);
1067 char* dest = _raw->data + _off + o;
1068 if (crc_reset)
1069 _raw->invalidate_crc();
1070 maybe_inline_memcpy(dest, src, l, 64);
1071 }
1072
1073 void buffer::ptr::zero()
1074 {
1075 zero(true);
1076 }
1077
1078 void buffer::ptr::zero(bool crc_reset)
1079 {
1080 if (crc_reset)
1081 _raw->invalidate_crc();
1082 memset(c_str(), 0, _len);
1083 }
1084
1085 void buffer::ptr::zero(unsigned o, unsigned l)
1086 {
1087 zero(o, l, true);
1088 }
1089
1090 void buffer::ptr::zero(unsigned o, unsigned l, bool crc_reset)
1091 {
1092 assert(o+l <= _len);
1093 if (crc_reset)
1094 _raw->invalidate_crc();
1095 memset(c_str()+o, 0, l);
1096 }
1097 bool buffer::ptr::can_zero_copy() const
1098 {
1099 return _raw->can_zero_copy();
1100 }
1101
1102 int buffer::ptr::zero_copy_to_fd(int fd, int64_t *offset) const
1103 {
1104 return _raw->zero_copy_to_fd(fd, (loff_t*)offset);
1105 }
1106
1107 // -- buffer::list::iterator --
1108 /*
1109 buffer::list::iterator operator=(const buffer::list::iterator& other)
1110 {
1111 if (this != &other) {
1112 bl = other.bl;
1113 ls = other.ls;
1114 off = other.off;
1115 p = other.p;
1116 p_off = other.p_off;
1117 }
1118 return *this;
1119 }*/
1120
1121 template<bool is_const>
1122 buffer::list::iterator_impl<is_const>::iterator_impl(bl_t *l, unsigned o)
1123 : bl(l), ls(&bl->_buffers), off(0), p(ls->begin()), p_off(0)
1124 {
1125 advance(o);
1126 }
1127
1128 template<bool is_const>
1129 buffer::list::iterator_impl<is_const>::iterator_impl(const buffer::list::iterator& i)
1130 : iterator_impl<is_const>(i.bl, i.off, i.p, i.p_off) {}
1131
1132 template<bool is_const>
1133 void buffer::list::iterator_impl<is_const>::advance(int o)
1134 {
1135 //cout << this << " advance " << o << " from " << off << " (p_off " << p_off << " in " << p->length() << ")" << std::endl;
1136 if (o > 0) {
1137 p_off += o;
1138 while (p_off > 0) {
1139 if (p == ls->end())
1140 throw end_of_buffer();
1141 if (p_off >= p->length()) {
1142 // skip this buffer
1143 p_off -= p->length();
1144 p++;
1145 } else {
1146 // somewhere in this buffer!
1147 break;
1148 }
1149 }
1150 off += o;
1151 return;
1152 }
1153 while (o < 0) {
1154 if (p_off) {
1155 unsigned d = -o;
1156 if (d > p_off)
1157 d = p_off;
1158 p_off -= d;
1159 off -= d;
1160 o += d;
1161 } else if (off > 0) {
1162 assert(p != ls->begin());
1163 p--;
1164 p_off = p->length();
1165 } else {
1166 throw end_of_buffer();
1167 }
1168 }
1169 }
1170
1171 template<bool is_const>
1172 void buffer::list::iterator_impl<is_const>::seek(unsigned o)
1173 {
1174 p = ls->begin();
1175 off = p_off = 0;
1176 advance(o);
1177 }
1178
1179 template<bool is_const>
1180 char buffer::list::iterator_impl<is_const>::operator*() const
1181 {
1182 if (p == ls->end())
1183 throw end_of_buffer();
1184 return (*p)[p_off];
1185 }
1186
1187 template<bool is_const>
1188 buffer::list::iterator_impl<is_const>&
1189 buffer::list::iterator_impl<is_const>::operator++()
1190 {
1191 if (p == ls->end())
1192 throw end_of_buffer();
1193 advance(1);
1194 return *this;
1195 }
1196
1197 template<bool is_const>
1198 buffer::ptr buffer::list::iterator_impl<is_const>::get_current_ptr() const
1199 {
1200 if (p == ls->end())
1201 throw end_of_buffer();
1202 return ptr(*p, p_off, p->length() - p_off);
1203 }
1204
1205 // copy data out.
1206 // note that these all _append_ to dest!
1207 template<bool is_const>
1208 void buffer::list::iterator_impl<is_const>::copy(unsigned len, char *dest)
1209 {
1210 if (p == ls->end()) seek(off);
1211 while (len > 0) {
1212 if (p == ls->end())
1213 throw end_of_buffer();
1214 assert(p->length() > 0);
1215
1216 unsigned howmuch = p->length() - p_off;
1217 if (len < howmuch) howmuch = len;
1218 p->copy_out(p_off, howmuch, dest);
1219 dest += howmuch;
1220
1221 len -= howmuch;
1222 advance(howmuch);
1223 }
1224 }
1225
1226 template<bool is_const>
1227 void buffer::list::iterator_impl<is_const>::copy(unsigned len, ptr &dest)
1228 {
1229 copy_deep(len, dest);
1230 }
1231
1232 template<bool is_const>
1233 void buffer::list::iterator_impl<is_const>::copy_deep(unsigned len, ptr &dest)
1234 {
1235 if (!len) {
1236 return;
1237 }
1238 if (p == ls->end())
1239 throw end_of_buffer();
1240 assert(p->length() > 0);
1241 dest = create(len);
1242 copy(len, dest.c_str());
1243 }
1244 template<bool is_const>
1245 void buffer::list::iterator_impl<is_const>::copy_shallow(unsigned len,
1246 ptr &dest)
1247 {
1248 if (!len) {
1249 return;
1250 }
1251 if (p == ls->end())
1252 throw end_of_buffer();
1253 assert(p->length() > 0);
1254 unsigned howmuch = p->length() - p_off;
1255 if (howmuch < len) {
1256 dest = create(len);
1257 copy(len, dest.c_str());
1258 } else {
1259 dest = ptr(*p, p_off, len);
1260 advance(len);
1261 }
1262 }
1263
1264 template<bool is_const>
1265 void buffer::list::iterator_impl<is_const>::copy(unsigned len, list &dest)
1266 {
1267 if (p == ls->end())
1268 seek(off);
1269 while (len > 0) {
1270 if (p == ls->end())
1271 throw end_of_buffer();
1272
1273 unsigned howmuch = p->length() - p_off;
1274 if (len < howmuch)
1275 howmuch = len;
1276 dest.append(*p, p_off, howmuch);
1277
1278 len -= howmuch;
1279 advance(howmuch);
1280 }
1281 }
1282
1283 template<bool is_const>
1284 void buffer::list::iterator_impl<is_const>::copy(unsigned len, std::string &dest)
1285 {
1286 if (p == ls->end())
1287 seek(off);
1288 while (len > 0) {
1289 if (p == ls->end())
1290 throw end_of_buffer();
1291
1292 unsigned howmuch = p->length() - p_off;
1293 const char *c_str = p->c_str();
1294 if (len < howmuch)
1295 howmuch = len;
1296 dest.append(c_str + p_off, howmuch);
1297
1298 len -= howmuch;
1299 advance(howmuch);
1300 }
1301 }
1302
1303 template<bool is_const>
1304 void buffer::list::iterator_impl<is_const>::copy_all(list &dest)
1305 {
1306 if (p == ls->end())
1307 seek(off);
1308 while (1) {
1309 if (p == ls->end())
1310 return;
1311 assert(p->length() > 0);
1312
1313 unsigned howmuch = p->length() - p_off;
1314 const char *c_str = p->c_str();
1315 dest.append(c_str + p_off, howmuch);
1316
1317 advance(howmuch);
1318 }
1319 }
1320
1321 template<bool is_const>
1322 size_t buffer::list::iterator_impl<is_const>::get_ptr_and_advance(
1323 size_t want, const char **data)
1324 {
1325 if (p == ls->end()) {
1326 seek(off);
1327 if (p == ls->end()) {
1328 return 0;
1329 }
1330 }
1331 *data = p->c_str() + p_off;
1332 size_t l = MIN(p->length() - p_off, want);
1333 p_off += l;
1334 if (p_off == p->length()) {
1335 ++p;
1336 p_off = 0;
1337 }
1338 off += l;
1339 return l;
1340 }
1341
1342 template<bool is_const>
1343 uint32_t buffer::list::iterator_impl<is_const>::crc32c(
1344 size_t length, uint32_t crc)
1345 {
1346 length = MIN( length, get_remaining());
1347 while (length > 0) {
1348 const char *p;
1349 size_t l = get_ptr_and_advance(length, &p);
1350 crc = ceph_crc32c(crc, (unsigned char*)p, l);
1351 length -= l;
1352 }
1353 return crc;
1354 }
1355
1356 // explicitly instantiate only the iterator types we need, so we can hide the
1357 // details in this compilation unit without introducing unnecessary link time
1358 // dependencies.
1359 template class buffer::list::iterator_impl<true>;
1360 template class buffer::list::iterator_impl<false>;
1361
1362 buffer::list::iterator::iterator(bl_t *l, unsigned o)
1363 : iterator_impl(l, o)
1364 {}
1365
1366 buffer::list::iterator::iterator(bl_t *l, unsigned o, list_iter_t ip, unsigned po)
1367 : iterator_impl(l, o, ip, po)
1368 {}
1369
1370 void buffer::list::iterator::advance(int o)
1371 {
1372 buffer::list::iterator_impl<false>::advance(o);
1373 }
1374
1375 void buffer::list::iterator::seek(unsigned o)
1376 {
1377 buffer::list::iterator_impl<false>::seek(o);
1378 }
1379
1380 char buffer::list::iterator::operator*()
1381 {
1382 if (p == ls->end()) {
1383 throw end_of_buffer();
1384 }
1385 return (*p)[p_off];
1386 }
1387
1388 buffer::list::iterator& buffer::list::iterator::operator++()
1389 {
1390 buffer::list::iterator_impl<false>::operator++();
1391 return *this;
1392 }
1393
1394 buffer::ptr buffer::list::iterator::get_current_ptr()
1395 {
1396 if (p == ls->end()) {
1397 throw end_of_buffer();
1398 }
1399 return ptr(*p, p_off, p->length() - p_off);
1400 }
1401
1402 void buffer::list::iterator::copy(unsigned len, char *dest)
1403 {
1404 return buffer::list::iterator_impl<false>::copy(len, dest);
1405 }
1406
1407 void buffer::list::iterator::copy(unsigned len, ptr &dest)
1408 {
1409 return buffer::list::iterator_impl<false>::copy_deep(len, dest);
1410 }
1411
1412 void buffer::list::iterator::copy_deep(unsigned len, ptr &dest)
1413 {
1414 buffer::list::iterator_impl<false>::copy_deep(len, dest);
1415 }
1416
1417 void buffer::list::iterator::copy_shallow(unsigned len, ptr &dest)
1418 {
1419 buffer::list::iterator_impl<false>::copy_shallow(len, dest);
1420 }
1421
1422 void buffer::list::iterator::copy(unsigned len, list &dest)
1423 {
1424 buffer::list::iterator_impl<false>::copy(len, dest);
1425 }
1426
1427 void buffer::list::iterator::copy(unsigned len, std::string &dest)
1428 {
1429 buffer::list::iterator_impl<false>::copy(len, dest);
1430 }
1431
1432 void buffer::list::iterator::copy_all(list &dest)
1433 {
1434 buffer::list::iterator_impl<false>::copy_all(dest);
1435 }
1436
1437 void buffer::list::iterator::copy_in(unsigned len, const char *src)
1438 {
1439 copy_in(len, src, true);
1440 }
1441
1442 // copy data in
1443 void buffer::list::iterator::copy_in(unsigned len, const char *src, bool crc_reset)
1444 {
1445 // copy
1446 if (p == ls->end())
1447 seek(off);
1448 while (len > 0) {
1449 if (p == ls->end())
1450 throw end_of_buffer();
1451
1452 unsigned howmuch = p->length() - p_off;
1453 if (len < howmuch)
1454 howmuch = len;
1455 p->copy_in(p_off, howmuch, src, crc_reset);
1456
1457 src += howmuch;
1458 len -= howmuch;
1459 advance(howmuch);
1460 }
1461 }
1462
1463 void buffer::list::iterator::copy_in(unsigned len, const list& otherl)
1464 {
1465 if (p == ls->end())
1466 seek(off);
1467 unsigned left = len;
1468 for (std::list<ptr>::const_iterator i = otherl._buffers.begin();
1469 i != otherl._buffers.end();
1470 ++i) {
1471 unsigned l = (*i).length();
1472 if (left < l)
1473 l = left;
1474 copy_in(l, i->c_str());
1475 left -= l;
1476 if (left == 0)
1477 break;
1478 }
1479 }
1480
1481 // -- buffer::list --
1482
1483 buffer::list::list(list&& other)
1484 : _buffers(std::move(other._buffers)),
1485 _len(other._len),
1486 _memcopy_count(other._memcopy_count),
1487 last_p(this) {
1488 append_buffer.swap(other.append_buffer);
1489 other.clear();
1490 }
1491
1492 void buffer::list::swap(list& other)
1493 {
1494 std::swap(_len, other._len);
1495 std::swap(_memcopy_count, other._memcopy_count);
1496 std::swap(_mempool, other._mempool);
1497 _buffers.swap(other._buffers);
1498 append_buffer.swap(other.append_buffer);
1499 //last_p.swap(other.last_p);
1500 last_p = begin();
1501 other.last_p = other.begin();
1502 }
1503
1504 bool buffer::list::contents_equal(buffer::list& other)
1505 {
1506 return static_cast<const buffer::list*>(this)->contents_equal(other);
1507 }
1508
1509 bool buffer::list::contents_equal(const ceph::buffer::list& other) const
1510 {
1511 if (length() != other.length())
1512 return false;
1513
1514 // buffer-wise comparison
1515 if (true) {
1516 std::list<ptr>::const_iterator a = _buffers.begin();
1517 std::list<ptr>::const_iterator b = other._buffers.begin();
1518 unsigned aoff = 0, boff = 0;
1519 while (a != _buffers.end()) {
1520 unsigned len = a->length() - aoff;
1521 if (len > b->length() - boff)
1522 len = b->length() - boff;
1523 if (memcmp(a->c_str() + aoff, b->c_str() + boff, len) != 0)
1524 return false;
1525 aoff += len;
1526 if (aoff == a->length()) {
1527 aoff = 0;
1528 ++a;
1529 }
1530 boff += len;
1531 if (boff == b->length()) {
1532 boff = 0;
1533 ++b;
1534 }
1535 }
1536 assert(b == other._buffers.end());
1537 return true;
1538 }
1539
1540 // byte-wise comparison
1541 if (false) {
1542 bufferlist::const_iterator me = begin();
1543 bufferlist::const_iterator him = other.begin();
1544 while (!me.end()) {
1545 if (*me != *him)
1546 return false;
1547 ++me;
1548 ++him;
1549 }
1550 return true;
1551 }
1552 }
1553
1554 bool buffer::list::can_zero_copy() const
1555 {
1556 for (std::list<ptr>::const_iterator it = _buffers.begin();
1557 it != _buffers.end();
1558 ++it)
1559 if (!it->can_zero_copy())
1560 return false;
1561 return true;
1562 }
1563
1564 bool buffer::list::is_provided_buffer(const char *dst) const
1565 {
1566 if (_buffers.empty())
1567 return false;
1568 return (is_contiguous() && (_buffers.front().c_str() == dst));
1569 }
1570
1571 bool buffer::list::is_aligned(unsigned align) const
1572 {
1573 for (std::list<ptr>::const_iterator it = _buffers.begin();
1574 it != _buffers.end();
1575 ++it)
1576 if (!it->is_aligned(align))
1577 return false;
1578 return true;
1579 }
1580
1581 bool buffer::list::is_n_align_sized(unsigned align) const
1582 {
1583 for (std::list<ptr>::const_iterator it = _buffers.begin();
1584 it != _buffers.end();
1585 ++it)
1586 if (!it->is_n_align_sized(align))
1587 return false;
1588 return true;
1589 }
1590
1591 bool buffer::list::is_aligned_size_and_memory(unsigned align_size,
1592 unsigned align_memory) const
1593 {
1594 for (std::list<ptr>::const_iterator it = _buffers.begin();
1595 it != _buffers.end();
1596 ++it) {
1597 if (!it->is_aligned(align_memory) || !it->is_n_align_sized(align_size))
1598 return false;
1599 }
1600 return true;
1601 }
1602
1603 bool buffer::list::is_zero() const {
1604 for (std::list<ptr>::const_iterator it = _buffers.begin();
1605 it != _buffers.end();
1606 ++it) {
1607 if (!it->is_zero()) {
1608 return false;
1609 }
1610 }
1611 return true;
1612 }
1613
1614 void buffer::list::zero()
1615 {
1616 for (std::list<ptr>::iterator it = _buffers.begin();
1617 it != _buffers.end();
1618 ++it)
1619 it->zero();
1620 }
1621
1622 void buffer::list::zero(unsigned o, unsigned l)
1623 {
1624 assert(o+l <= _len);
1625 unsigned p = 0;
1626 for (std::list<ptr>::iterator it = _buffers.begin();
1627 it != _buffers.end();
1628 ++it) {
1629 if (p + it->length() > o) {
1630 if (p >= o && p+it->length() <= o+l) {
1631 // 'o'------------- l -----------|
1632 // 'p'-- it->length() --|
1633 it->zero();
1634 } else if (p >= o) {
1635 // 'o'------------- l -----------|
1636 // 'p'------- it->length() -------|
1637 it->zero(0, o+l-p);
1638 } else if (p + it->length() <= o+l) {
1639 // 'o'------------- l -----------|
1640 // 'p'------- it->length() -------|
1641 it->zero(o-p, it->length()-(o-p));
1642 } else {
1643 // 'o'----------- l -----------|
1644 // 'p'---------- it->length() ----------|
1645 it->zero(o-p, l);
1646 }
1647 }
1648 p += it->length();
1649 if (o+l <= p)
1650 break; // done
1651 }
1652 }
1653
1654 bool buffer::list::is_contiguous() const
1655 {
1656 return &(*_buffers.begin()) == &(*_buffers.rbegin());
1657 }
1658
1659 bool buffer::list::is_n_page_sized() const
1660 {
1661 return is_n_align_sized(CEPH_PAGE_SIZE);
1662 }
1663
1664 bool buffer::list::is_page_aligned() const
1665 {
1666 return is_aligned(CEPH_PAGE_SIZE);
1667 }
1668
1669 void buffer::list::reassign_to_mempool(int pool)
1670 {
1671 _mempool = pool;
1672 if (append_buffer.get_raw()) {
1673 append_buffer.get_raw()->reassign_to_mempool(pool);
1674 }
1675 for (auto& p : _buffers) {
1676 p.get_raw()->reassign_to_mempool(pool);
1677 }
1678 }
1679
1680 void buffer::list::try_assign_to_mempool(int pool)
1681 {
1682 _mempool = pool;
1683 if (append_buffer.get_raw()) {
1684 append_buffer.get_raw()->try_assign_to_mempool(pool);
1685 }
1686 for (auto& p : _buffers) {
1687 p.get_raw()->try_assign_to_mempool(pool);
1688 }
1689 }
1690
1691 void buffer::list::rebuild()
1692 {
1693 if (_len == 0) {
1694 _buffers.clear();
1695 return;
1696 }
1697 ptr nb;
1698 if ((_len & ~CEPH_PAGE_MASK) == 0)
1699 nb = buffer::create_page_aligned(_len);
1700 else
1701 nb = buffer::create(_len);
1702 rebuild(nb);
1703 }
1704
1705 void buffer::list::rebuild(ptr& nb)
1706 {
1707 unsigned pos = 0;
1708 for (std::list<ptr>::iterator it = _buffers.begin();
1709 it != _buffers.end();
1710 ++it) {
1711 nb.copy_in(pos, it->length(), it->c_str(), false);
1712 pos += it->length();
1713 }
1714 _memcopy_count += pos;
1715 _buffers.clear();
1716 if (nb.length())
1717 _buffers.push_back(nb);
1718 invalidate_crc();
1719 last_p = begin();
1720 }
1721
1722 bool buffer::list::rebuild_aligned(unsigned align)
1723 {
1724 return rebuild_aligned_size_and_memory(align, align);
1725 }
1726
1727 bool buffer::list::rebuild_aligned_size_and_memory(unsigned align_size,
1728 unsigned align_memory)
1729 {
1730 unsigned old_memcopy_count = _memcopy_count;
1731 std::list<ptr>::iterator p = _buffers.begin();
1732 while (p != _buffers.end()) {
1733 // keep anything that's already align and sized aligned
1734 if (p->is_aligned(align_memory) && p->is_n_align_sized(align_size)) {
1735 /*cout << " segment " << (void*)p->c_str()
1736 << " offset " << ((unsigned long)p->c_str() & (align - 1))
1737 << " length " << p->length()
1738 << " " << (p->length() & (align - 1)) << " ok" << std::endl;
1739 */
1740 ++p;
1741 continue;
1742 }
1743
1744 // consolidate unaligned items, until we get something that is sized+aligned
1745 list unaligned;
1746 unsigned offset = 0;
1747 do {
1748 /*cout << " segment " << (void*)p->c_str()
1749 << " offset " << ((unsigned long)p->c_str() & (align - 1))
1750 << " length " << p->length() << " " << (p->length() & (align - 1))
1751 << " overall offset " << offset << " " << (offset & (align - 1))
1752 << " not ok" << std::endl;
1753 */
1754 offset += p->length();
1755 unaligned.push_back(*p);
1756 _buffers.erase(p++);
1757 } while (p != _buffers.end() &&
1758 (!p->is_aligned(align_memory) ||
1759 !p->is_n_align_sized(align_size) ||
1760 (offset % align_size)));
1761 if (!(unaligned.is_contiguous() && unaligned._buffers.front().is_aligned(align_memory))) {
1762 ptr nb(buffer::create_aligned(unaligned._len, align_memory));
1763 unaligned.rebuild(nb);
1764 _memcopy_count += unaligned._len;
1765 }
1766 _buffers.insert(p, unaligned._buffers.front());
1767 }
1768 last_p = begin();
1769
1770 return (old_memcopy_count != _memcopy_count);
1771 }
1772
1773 bool buffer::list::rebuild_page_aligned()
1774 {
1775 return rebuild_aligned(CEPH_PAGE_SIZE);
1776 }
1777
1778 void buffer::list::reserve(size_t prealloc)
1779 {
1780 if (append_buffer.unused_tail_length() < prealloc) {
1781 append_buffer = buffer::create(prealloc);
1782 if (_mempool >= 0) {
1783 append_buffer.get_raw()->reassign_to_mempool(_mempool);
1784 }
1785 append_buffer.set_length(0); // unused, so far.
1786 }
1787 }
1788
1789 // sort-of-like-assignment-op
1790 void buffer::list::claim(list& bl, unsigned int flags)
1791 {
1792 // free my buffers
1793 clear();
1794 claim_append(bl, flags);
1795 }
1796
1797 void buffer::list::claim_append(list& bl, unsigned int flags)
1798 {
1799 // steal the other guy's buffers
1800 _len += bl._len;
1801 if (!(flags & CLAIM_ALLOW_NONSHAREABLE))
1802 bl.make_shareable();
1803 _buffers.splice(_buffers.end(), bl._buffers );
1804 bl._len = 0;
1805 bl.last_p = bl.begin();
1806 }
1807
1808 void buffer::list::claim_prepend(list& bl, unsigned int flags)
1809 {
1810 // steal the other guy's buffers
1811 _len += bl._len;
1812 if (!(flags & CLAIM_ALLOW_NONSHAREABLE))
1813 bl.make_shareable();
1814 _buffers.splice(_buffers.begin(), bl._buffers );
1815 bl._len = 0;
1816 bl.last_p = bl.begin();
1817 }
1818
1819 void buffer::list::claim_append_piecewise(list& bl)
1820 {
1821 // steal the other guy's buffers
1822 for (std::list<buffer::ptr>::const_iterator i = bl.buffers().begin();
1823 i != bl.buffers().end(); i++) {
1824 append(*i, 0, i->length());
1825 }
1826 bl.clear();
1827 }
1828
1829 void buffer::list::copy(unsigned off, unsigned len, char *dest) const
1830 {
1831 if (off + len > length())
1832 throw end_of_buffer();
1833 if (last_p.get_off() != off)
1834 last_p.seek(off);
1835 last_p.copy(len, dest);
1836 }
1837
1838 void buffer::list::copy(unsigned off, unsigned len, list &dest) const
1839 {
1840 if (off + len > length())
1841 throw end_of_buffer();
1842 if (last_p.get_off() != off)
1843 last_p.seek(off);
1844 last_p.copy(len, dest);
1845 }
1846
1847 void buffer::list::copy(unsigned off, unsigned len, std::string& dest) const
1848 {
1849 if (last_p.get_off() != off)
1850 last_p.seek(off);
1851 return last_p.copy(len, dest);
1852 }
1853
1854 void buffer::list::copy_in(unsigned off, unsigned len, const char *src)
1855 {
1856 copy_in(off, len, src, true);
1857 }
1858
1859 void buffer::list::copy_in(unsigned off, unsigned len, const char *src, bool crc_reset)
1860 {
1861 if (off + len > length())
1862 throw end_of_buffer();
1863
1864 if (last_p.get_off() != off)
1865 last_p.seek(off);
1866 last_p.copy_in(len, src, crc_reset);
1867 }
1868
1869 void buffer::list::copy_in(unsigned off, unsigned len, const list& src)
1870 {
1871 if (last_p.get_off() != off)
1872 last_p.seek(off);
1873 last_p.copy_in(len, src);
1874 }
1875
1876 void buffer::list::append(char c)
1877 {
1878 // put what we can into the existing append_buffer.
1879 unsigned gap = append_buffer.unused_tail_length();
1880 if (!gap) {
1881 // make a new append_buffer!
1882 append_buffer = raw_combined::create(CEPH_BUFFER_APPEND_SIZE);
1883 append_buffer.set_length(0); // unused, so far.
1884 if (_mempool >= 0) {
1885 append_buffer.get_raw()->reassign_to_mempool(_mempool);
1886 }
1887 }
1888 append(append_buffer, append_buffer.append(c) - 1, 1); // add segment to the list
1889 }
1890
1891 void buffer::list::append(const char *data, unsigned len)
1892 {
1893 while (len > 0) {
1894 // put what we can into the existing append_buffer.
1895 unsigned gap = append_buffer.unused_tail_length();
1896 if (gap > 0) {
1897 if (gap > len) gap = len;
1898 //cout << "append first char is " << data[0] << ", last char is " << data[len-1] << std::endl;
1899 append_buffer.append(data, gap);
1900 append(append_buffer, append_buffer.length() - gap, gap); // add segment to the list
1901 len -= gap;
1902 data += gap;
1903 }
1904 if (len == 0)
1905 break; // done!
1906
1907 // make a new append_buffer. fill out a complete page, factoring in the
1908 // raw_combined overhead.
1909 size_t need = ROUND_UP_TO(len, sizeof(size_t)) + sizeof(raw_combined);
1910 size_t alen = ROUND_UP_TO(need, CEPH_BUFFER_ALLOC_UNIT) -
1911 sizeof(raw_combined);
1912 append_buffer = raw_combined::create(alen);
1913 append_buffer.set_length(0); // unused, so far.
1914 if (_mempool >= 0) {
1915 append_buffer.get_raw()->reassign_to_mempool(_mempool);
1916 }
1917 }
1918 }
1919
1920 void buffer::list::append(const ptr& bp)
1921 {
1922 if (bp.length())
1923 push_back(bp);
1924 }
1925
1926 void buffer::list::append(ptr&& bp)
1927 {
1928 if (bp.length())
1929 push_back(std::move(bp));
1930 }
1931
1932 void buffer::list::append(const ptr& bp, unsigned off, unsigned len)
1933 {
1934 assert(len+off <= bp.length());
1935 if (!_buffers.empty()) {
1936 ptr &l = _buffers.back();
1937 if (l.get_raw() == bp.get_raw() &&
1938 l.end() == bp.start() + off) {
1939 // yay contiguous with tail bp!
1940 l.set_length(l.length()+len);
1941 _len += len;
1942 return;
1943 }
1944 }
1945 // add new item to list
1946 push_back(ptr(bp, off, len));
1947 }
1948
1949 void buffer::list::append(const list& bl)
1950 {
1951 _len += bl._len;
1952 for (std::list<ptr>::const_iterator p = bl._buffers.begin();
1953 p != bl._buffers.end();
1954 ++p)
1955 _buffers.push_back(*p);
1956 }
1957
1958 void buffer::list::append(std::istream& in)
1959 {
1960 while (!in.eof()) {
1961 std::string s;
1962 getline(in, s);
1963 append(s.c_str(), s.length());
1964 if (s.length())
1965 append("\n", 1);
1966 }
1967 }
1968
1969 void buffer::list::prepend_zero(unsigned len)
1970 {
1971 ptr bp(len);
1972 bp.zero(false);
1973 _len += len;
1974 _buffers.emplace_front(std::move(bp));
1975 }
1976
1977 void buffer::list::append_zero(unsigned len)
1978 {
1979 ptr bp(len);
1980 bp.zero(false);
1981 append(std::move(bp));
1982 }
1983
1984
1985 /*
1986 * get a char
1987 */
1988 const char& buffer::list::operator[](unsigned n) const
1989 {
1990 if (n >= _len)
1991 throw end_of_buffer();
1992
1993 for (std::list<ptr>::const_iterator p = _buffers.begin();
1994 p != _buffers.end();
1995 ++p) {
1996 if (n >= p->length()) {
1997 n -= p->length();
1998 continue;
1999 }
2000 return (*p)[n];
2001 }
2002 ceph_abort();
2003 }
2004
2005 /*
2006 * return a contiguous ptr to whole bufferlist contents.
2007 */
2008 char *buffer::list::c_str()
2009 {
2010 if (_buffers.empty())
2011 return 0; // no buffers
2012
2013 std::list<ptr>::const_iterator iter = _buffers.begin();
2014 ++iter;
2015
2016 if (iter != _buffers.end())
2017 rebuild();
2018 return _buffers.front().c_str(); // good, we're already contiguous.
2019 }
2020
2021 string buffer::list::to_str() const {
2022 string s;
2023 s.reserve(length());
2024 for (std::list<ptr>::const_iterator p = _buffers.begin();
2025 p != _buffers.end();
2026 ++p) {
2027 if (p->length()) {
2028 s.append(p->c_str(), p->length());
2029 }
2030 }
2031 return s;
2032 }
2033
2034 char *buffer::list::get_contiguous(unsigned orig_off, unsigned len)
2035 {
2036 if (orig_off + len > length())
2037 throw end_of_buffer();
2038
2039 if (len == 0) {
2040 return 0;
2041 }
2042
2043 unsigned off = orig_off;
2044 std::list<ptr>::iterator curbuf = _buffers.begin();
2045 while (off > 0 && off >= curbuf->length()) {
2046 off -= curbuf->length();
2047 ++curbuf;
2048 }
2049
2050 if (off + len > curbuf->length()) {
2051 bufferlist tmp;
2052 unsigned l = off + len;
2053
2054 do {
2055 if (l >= curbuf->length())
2056 l -= curbuf->length();
2057 else
2058 l = 0;
2059 tmp.append(*curbuf);
2060 curbuf = _buffers.erase(curbuf);
2061
2062 } while (curbuf != _buffers.end() && l > 0);
2063
2064 assert(l == 0);
2065
2066 tmp.rebuild();
2067 _buffers.insert(curbuf, tmp._buffers.front());
2068 return tmp.c_str() + off;
2069 }
2070
2071 last_p = begin(); // we modified _buffers
2072
2073 return curbuf->c_str() + off;
2074 }
2075
2076 void buffer::list::substr_of(const list& other, unsigned off, unsigned len)
2077 {
2078 if (off + len > other.length())
2079 throw end_of_buffer();
2080
2081 clear();
2082
2083 // skip off
2084 std::list<ptr>::const_iterator curbuf = other._buffers.begin();
2085 while (off > 0 &&
2086 off >= curbuf->length()) {
2087 // skip this buffer
2088 //cout << "skipping over " << *curbuf << std::endl;
2089 off -= (*curbuf).length();
2090 ++curbuf;
2091 }
2092 assert(len == 0 || curbuf != other._buffers.end());
2093
2094 while (len > 0) {
2095 // partial?
2096 if (off + len < curbuf->length()) {
2097 //cout << "copying partial of " << *curbuf << std::endl;
2098 _buffers.push_back( ptr( *curbuf, off, len ) );
2099 _len += len;
2100 break;
2101 }
2102
2103 // through end
2104 //cout << "copying end (all?) of " << *curbuf << std::endl;
2105 unsigned howmuch = curbuf->length() - off;
2106 _buffers.push_back( ptr( *curbuf, off, howmuch ) );
2107 _len += howmuch;
2108 len -= howmuch;
2109 off = 0;
2110 ++curbuf;
2111 }
2112 }
2113
2114 // funky modifer
2115 void buffer::list::splice(unsigned off, unsigned len, list *claim_by /*, bufferlist& replace_with */)
2116 { // fixme?
2117 if (len == 0)
2118 return;
2119
2120 if (off >= length())
2121 throw end_of_buffer();
2122
2123 assert(len > 0);
2124 //cout << "splice off " << off << " len " << len << " ... mylen = " << length() << std::endl;
2125
2126 // skip off
2127 std::list<ptr>::iterator curbuf = _buffers.begin();
2128 while (off > 0) {
2129 assert(curbuf != _buffers.end());
2130 if (off >= (*curbuf).length()) {
2131 // skip this buffer
2132 //cout << "off = " << off << " skipping over " << *curbuf << std::endl;
2133 off -= (*curbuf).length();
2134 ++curbuf;
2135 } else {
2136 // somewhere in this buffer!
2137 //cout << "off = " << off << " somewhere in " << *curbuf << std::endl;
2138 break;
2139 }
2140 }
2141
2142 if (off) {
2143 // add a reference to the front bit
2144 // insert it before curbuf (which we'll hose)
2145 //cout << "keeping front " << off << " of " << *curbuf << std::endl;
2146 _buffers.insert( curbuf, ptr( *curbuf, 0, off ) );
2147 _len += off;
2148 }
2149
2150 while (len > 0) {
2151 // partial?
2152 if (off + len < (*curbuf).length()) {
2153 //cout << "keeping end of " << *curbuf << ", losing first " << off+len << std::endl;
2154 if (claim_by)
2155 claim_by->append( *curbuf, off, len );
2156 (*curbuf).set_offset( off+len + (*curbuf).offset() ); // ignore beginning big
2157 (*curbuf).set_length( (*curbuf).length() - (len+off) );
2158 _len -= off+len;
2159 //cout << " now " << *curbuf << std::endl;
2160 break;
2161 }
2162
2163 // hose though the end
2164 unsigned howmuch = (*curbuf).length() - off;
2165 //cout << "discarding " << howmuch << " of " << *curbuf << std::endl;
2166 if (claim_by)
2167 claim_by->append( *curbuf, off, howmuch );
2168 _len -= (*curbuf).length();
2169 _buffers.erase( curbuf++ );
2170 len -= howmuch;
2171 off = 0;
2172 }
2173
2174 // splice in *replace (implement me later?)
2175
2176 last_p = begin(); // just in case we were in the removed region.
2177 }
2178
2179 void buffer::list::write(int off, int len, std::ostream& out) const
2180 {
2181 list s;
2182 s.substr_of(*this, off, len);
2183 for (std::list<ptr>::const_iterator it = s._buffers.begin();
2184 it != s._buffers.end();
2185 ++it)
2186 if (it->length())
2187 out.write(it->c_str(), it->length());
2188 /*iterator p(this, off);
2189 while (len > 0 && !p.end()) {
2190 int l = p.left_in_this_buf();
2191 if (l > len)
2192 l = len;
2193 out.write(p.c_str(), l);
2194 len -= l;
2195 }*/
2196 }
2197
2198 void buffer::list::encode_base64(buffer::list& o)
2199 {
2200 bufferptr bp(length() * 4 / 3 + 3);
2201 int l = ceph_armor(bp.c_str(), bp.c_str() + bp.length(), c_str(), c_str() + length());
2202 bp.set_length(l);
2203 o.push_back(std::move(bp));
2204 }
2205
2206 void buffer::list::decode_base64(buffer::list& e)
2207 {
2208 bufferptr bp(4 + ((e.length() * 3) / 4));
2209 int l = ceph_unarmor(bp.c_str(), bp.c_str() + bp.length(), e.c_str(), e.c_str() + e.length());
2210 if (l < 0) {
2211 std::ostringstream oss;
2212 oss << "decode_base64: decoding failed:\n";
2213 hexdump(oss);
2214 throw buffer::malformed_input(oss.str().c_str());
2215 }
2216 assert(l <= (int)bp.length());
2217 bp.set_length(l);
2218 push_back(std::move(bp));
2219 }
2220
2221
2222
2223 int buffer::list::read_file(const char *fn, std::string *error)
2224 {
2225 int fd = TEMP_FAILURE_RETRY(::open(fn, O_RDONLY));
2226 if (fd < 0) {
2227 int err = errno;
2228 std::ostringstream oss;
2229 oss << "can't open " << fn << ": " << cpp_strerror(err);
2230 *error = oss.str();
2231 return -err;
2232 }
2233
2234 struct stat st;
2235 memset(&st, 0, sizeof(st));
2236 if (::fstat(fd, &st) < 0) {
2237 int err = errno;
2238 std::ostringstream oss;
2239 oss << "bufferlist::read_file(" << fn << "): stat error: "
2240 << cpp_strerror(err);
2241 *error = oss.str();
2242 VOID_TEMP_FAILURE_RETRY(::close(fd));
2243 return -err;
2244 }
2245
2246 ssize_t ret = read_fd(fd, st.st_size);
2247 if (ret < 0) {
2248 std::ostringstream oss;
2249 oss << "bufferlist::read_file(" << fn << "): read error:"
2250 << cpp_strerror(ret);
2251 *error = oss.str();
2252 VOID_TEMP_FAILURE_RETRY(::close(fd));
2253 return ret;
2254 }
2255 else if (ret != st.st_size) {
2256 // Premature EOF.
2257 // Perhaps the file changed between stat() and read()?
2258 std::ostringstream oss;
2259 oss << "bufferlist::read_file(" << fn << "): warning: got premature EOF.";
2260 *error = oss.str();
2261 // not actually an error, but weird
2262 }
2263 VOID_TEMP_FAILURE_RETRY(::close(fd));
2264 return 0;
2265 }
2266
2267 ssize_t buffer::list::read_fd(int fd, size_t len)
2268 {
2269 // try zero copy first
2270 if (false && read_fd_zero_copy(fd, len) == 0) {
2271 // TODO fix callers to not require correct read size, which is not
2272 // available for raw_pipe until we actually inspect the data
2273 return 0;
2274 }
2275 bufferptr bp = buffer::create(len);
2276 ssize_t ret = safe_read(fd, (void*)bp.c_str(), len);
2277 if (ret >= 0) {
2278 bp.set_length(ret);
2279 append(std::move(bp));
2280 }
2281 return ret;
2282 }
2283
2284 int buffer::list::read_fd_zero_copy(int fd, size_t len)
2285 {
2286 #ifdef CEPH_HAVE_SPLICE
2287 try {
2288 append(buffer::create_zero_copy(len, fd, NULL));
2289 } catch (buffer::error_code &e) {
2290 return e.code;
2291 } catch (buffer::malformed_input &e) {
2292 return -EIO;
2293 }
2294 return 0;
2295 #else
2296 return -ENOTSUP;
2297 #endif
2298 }
2299
2300 int buffer::list::write_file(const char *fn, int mode)
2301 {
2302 int fd = TEMP_FAILURE_RETRY(::open(fn, O_WRONLY|O_CREAT|O_TRUNC, mode));
2303 if (fd < 0) {
2304 int err = errno;
2305 cerr << "bufferlist::write_file(" << fn << "): failed to open file: "
2306 << cpp_strerror(err) << std::endl;
2307 return -err;
2308 }
2309 int ret = write_fd(fd);
2310 if (ret) {
2311 cerr << "bufferlist::write_fd(" << fn << "): write_fd error: "
2312 << cpp_strerror(ret) << std::endl;
2313 VOID_TEMP_FAILURE_RETRY(::close(fd));
2314 return ret;
2315 }
2316 if (TEMP_FAILURE_RETRY(::close(fd))) {
2317 int err = errno;
2318 cerr << "bufferlist::write_file(" << fn << "): close error: "
2319 << cpp_strerror(err) << std::endl;
2320 return -err;
2321 }
2322 return 0;
2323 }
2324
2325 static int do_writev(int fd, struct iovec *vec, uint64_t offset, unsigned veclen, unsigned bytes)
2326 {
2327 ssize_t r = 0;
2328 while (bytes > 0) {
2329 #ifdef HAVE_PWRITEV
2330 r = ::pwritev(fd, vec, veclen, offset);
2331 #else
2332 r = ::lseek64(fd, offset, SEEK_SET);
2333 if (r != offset) {
2334 r = -errno;
2335 return r;
2336 }
2337 r = ::writev(fd, vec, veclen);
2338 #endif
2339 if (r < 0) {
2340 if (errno == EINTR)
2341 continue;
2342 return -errno;
2343 }
2344
2345 bytes -= r;
2346 offset += r;
2347 if (bytes == 0) break;
2348
2349 while (r > 0) {
2350 if (vec[0].iov_len <= (size_t)r) {
2351 // drain this whole item
2352 r -= vec[0].iov_len;
2353 ++vec;
2354 --veclen;
2355 } else {
2356 vec[0].iov_base = (char *)vec[0].iov_base + r;
2357 vec[0].iov_len -= r;
2358 break;
2359 }
2360 }
2361 }
2362 return 0;
2363 }
2364
2365 int buffer::list::write_fd(int fd) const
2366 {
2367 if (can_zero_copy())
2368 return write_fd_zero_copy(fd);
2369
2370 // use writev!
2371 iovec iov[IOV_MAX];
2372 int iovlen = 0;
2373 ssize_t bytes = 0;
2374
2375 std::list<ptr>::const_iterator p = _buffers.begin();
2376 while (p != _buffers.end()) {
2377 if (p->length() > 0) {
2378 iov[iovlen].iov_base = (void *)p->c_str();
2379 iov[iovlen].iov_len = p->length();
2380 bytes += p->length();
2381 iovlen++;
2382 }
2383 ++p;
2384
2385 if (iovlen == IOV_MAX ||
2386 p == _buffers.end()) {
2387 iovec *start = iov;
2388 int num = iovlen;
2389 ssize_t wrote;
2390 retry:
2391 wrote = ::writev(fd, start, num);
2392 if (wrote < 0) {
2393 int err = errno;
2394 if (err == EINTR)
2395 goto retry;
2396 return -err;
2397 }
2398 if (wrote < bytes) {
2399 // partial write, recover!
2400 while ((size_t)wrote >= start[0].iov_len) {
2401 wrote -= start[0].iov_len;
2402 bytes -= start[0].iov_len;
2403 start++;
2404 num--;
2405 }
2406 if (wrote > 0) {
2407 start[0].iov_len -= wrote;
2408 start[0].iov_base = (char *)start[0].iov_base + wrote;
2409 bytes -= wrote;
2410 }
2411 goto retry;
2412 }
2413 iovlen = 0;
2414 bytes = 0;
2415 }
2416 }
2417 return 0;
2418 }
2419
2420 int buffer::list::write_fd(int fd, uint64_t offset) const
2421 {
2422 iovec iov[IOV_MAX];
2423
2424 std::list<ptr>::const_iterator p = _buffers.begin();
2425 uint64_t left_pbrs = _buffers.size();
2426 while (left_pbrs) {
2427 ssize_t bytes = 0;
2428 unsigned iovlen = 0;
2429 uint64_t size = MIN(left_pbrs, IOV_MAX);
2430 left_pbrs -= size;
2431 while (size > 0) {
2432 iov[iovlen].iov_base = (void *)p->c_str();
2433 iov[iovlen].iov_len = p->length();
2434 iovlen++;
2435 bytes += p->length();
2436 ++p;
2437 size--;
2438 }
2439
2440 int r = do_writev(fd, iov, offset, iovlen, bytes);
2441 if (r < 0)
2442 return r;
2443 offset += bytes;
2444 }
2445 return 0;
2446 }
2447
2448 int buffer::list::write_fd_zero_copy(int fd) const
2449 {
2450 if (!can_zero_copy())
2451 return -ENOTSUP;
2452 /* pass offset to each call to avoid races updating the fd seek
2453 * position, since the I/O may be non-blocking
2454 */
2455 int64_t offset = ::lseek(fd, 0, SEEK_CUR);
2456 int64_t *off_p = &offset;
2457 if (offset < 0 && errno != ESPIPE)
2458 return -errno;
2459 if (errno == ESPIPE)
2460 off_p = NULL;
2461 for (std::list<ptr>::const_iterator it = _buffers.begin();
2462 it != _buffers.end(); ++it) {
2463 int r = it->zero_copy_to_fd(fd, off_p);
2464 if (r < 0)
2465 return r;
2466 if (off_p)
2467 offset += it->length();
2468 }
2469 return 0;
2470 }
2471
2472 __u32 buffer::list::crc32c(__u32 crc) const
2473 {
2474 for (std::list<ptr>::const_iterator it = _buffers.begin();
2475 it != _buffers.end();
2476 ++it) {
2477 if (it->length()) {
2478 raw *r = it->get_raw();
2479 pair<size_t, size_t> ofs(it->offset(), it->offset() + it->length());
2480 pair<uint32_t, uint32_t> ccrc;
2481 if (r->get_crc(ofs, &ccrc)) {
2482 if (ccrc.first == crc) {
2483 // got it already
2484 crc = ccrc.second;
2485 if (buffer_track_crc)
2486 buffer_cached_crc++;
2487 } else {
2488 /* If we have cached crc32c(buf, v) for initial value v,
2489 * we can convert this to a different initial value v' by:
2490 * crc32c(buf, v') = crc32c(buf, v) ^ adjustment
2491 * where adjustment = crc32c(0*len(buf), v ^ v')
2492 *
2493 * http://crcutil.googlecode.com/files/crc-doc.1.0.pdf
2494 * note, u for our crc32c implementation is 0
2495 */
2496 crc = ccrc.second ^ ceph_crc32c(ccrc.first ^ crc, NULL, it->length());
2497 if (buffer_track_crc)
2498 buffer_cached_crc_adjusted++;
2499 }
2500 } else {
2501 if (buffer_track_crc)
2502 buffer_missed_crc++;
2503 uint32_t base = crc;
2504 crc = ceph_crc32c(crc, (unsigned char*)it->c_str(), it->length());
2505 r->set_crc(ofs, make_pair(base, crc));
2506 }
2507 }
2508 }
2509 return crc;
2510 }
2511
2512 void buffer::list::invalidate_crc()
2513 {
2514 for (std::list<ptr>::const_iterator p = _buffers.begin(); p != _buffers.end(); ++p) {
2515 raw *r = p->get_raw();
2516 if (r) {
2517 r->invalidate_crc();
2518 }
2519 }
2520 }
2521
2522 /**
2523 * Binary write all contents to a C++ stream
2524 */
2525 void buffer::list::write_stream(std::ostream &out) const
2526 {
2527 for (std::list<ptr>::const_iterator p = _buffers.begin(); p != _buffers.end(); ++p) {
2528 if (p->length() > 0) {
2529 out.write(p->c_str(), p->length());
2530 }
2531 }
2532 }
2533
2534
2535 void buffer::list::hexdump(std::ostream &out, bool trailing_newline) const
2536 {
2537 if (!length())
2538 return;
2539
2540 std::ios_base::fmtflags original_flags = out.flags();
2541
2542 // do our best to match the output of hexdump -C, for better
2543 // diff'ing!
2544
2545 out.setf(std::ios::right);
2546 out.fill('0');
2547
2548 unsigned per = 16;
2549 bool was_zeros = false, did_star = false;
2550 for (unsigned o=0; o<length(); o += per) {
2551 bool row_is_zeros = false;
2552 if (o + per < length()) {
2553 row_is_zeros = true;
2554 for (unsigned i=0; i<per && o+i<length(); i++) {
2555 if ((*this)[o+i]) {
2556 row_is_zeros = false;
2557 }
2558 }
2559 if (row_is_zeros) {
2560 if (was_zeros) {
2561 if (!did_star) {
2562 out << "\n*";
2563 did_star = true;
2564 }
2565 continue;
2566 }
2567 was_zeros = true;
2568 } else {
2569 was_zeros = false;
2570 did_star = false;
2571 }
2572 }
2573 if (o)
2574 out << "\n";
2575 out << std::hex << std::setw(8) << o << " ";
2576
2577 unsigned i;
2578 for (i=0; i<per && o+i<length(); i++) {
2579 if (i == 8)
2580 out << ' ';
2581 out << " " << std::setw(2) << ((unsigned)(*this)[o+i] & 0xff);
2582 }
2583 for (; i<per; i++) {
2584 if (i == 8)
2585 out << ' ';
2586 out << " ";
2587 }
2588
2589 out << " |";
2590 for (i=0; i<per && o+i<length(); i++) {
2591 char c = (*this)[o+i];
2592 if (isupper(c) || islower(c) || isdigit(c) || c == ' ' || ispunct(c))
2593 out << c;
2594 else
2595 out << '.';
2596 }
2597 out << '|' << std::dec;
2598 }
2599 if (trailing_newline) {
2600 out << "\n" << std::hex << std::setw(8) << length();
2601 out << "\n";
2602 }
2603
2604 out.flags(original_flags);
2605 }
2606
2607
2608 buffer::list buffer::list::static_from_mem(char* c, size_t l) {
2609 list bl;
2610 bl.push_back(ptr(create_static(l, c)));
2611 return bl;
2612 }
2613
2614 buffer::list buffer::list::static_from_cstring(char* c) {
2615 return static_from_mem(c, std::strlen(c));
2616 }
2617
2618 buffer::list buffer::list::static_from_string(string& s) {
2619 // C++14 just has string::data return a char* from a non-const
2620 // string.
2621 return static_from_mem(const_cast<char*>(s.data()), s.length());
2622 // But the way buffer::list mostly doesn't work in a sane way with
2623 // const makes me generally sad.
2624 }
2625
2626 std::ostream& buffer::operator<<(std::ostream& out, const buffer::raw &r) {
2627 return out << "buffer::raw(" << (void*)r.data << " len " << r.len << " nref " << r.nref.load() << ")";
2628 }
2629
2630 std::ostream& buffer::operator<<(std::ostream& out, const buffer::ptr& bp) {
2631 if (bp.have_raw())
2632 out << "buffer::ptr(" << bp.offset() << "~" << bp.length()
2633 << " " << (void*)bp.c_str()
2634 << " in raw " << (void*)bp.raw_c_str()
2635 << " len " << bp.raw_length()
2636 << " nref " << bp.raw_nref() << ")";
2637 else
2638 out << "buffer:ptr(" << bp.offset() << "~" << bp.length() << " no raw)";
2639 return out;
2640 }
2641
2642 std::ostream& buffer::operator<<(std::ostream& out, const buffer::list& bl) {
2643 out << "buffer::list(len=" << bl.length() << "," << std::endl;
2644
2645 std::list<buffer::ptr>::const_iterator it = bl.buffers().begin();
2646 while (it != bl.buffers().end()) {
2647 out << "\t" << *it;
2648 if (++it == bl.buffers().end()) break;
2649 out << "," << std::endl;
2650 }
2651 out << std::endl << ")";
2652 return out;
2653 }
2654
2655 std::ostream& buffer::operator<<(std::ostream& out, const buffer::error& e)
2656 {
2657 return out << e.what();
2658 }
2659
2660 MEMPOOL_DEFINE_OBJECT_FACTORY(buffer::raw_malloc, buffer_raw_malloc,
2661 buffer_meta);
2662 MEMPOOL_DEFINE_OBJECT_FACTORY(buffer::raw_mmap_pages, buffer_raw_mmap_pagse,
2663 buffer_meta);
2664 MEMPOOL_DEFINE_OBJECT_FACTORY(buffer::raw_posix_aligned,
2665 buffer_raw_posix_aligned, buffer_meta);
2666 #ifdef CEPH_HAVE_SPLICE
2667 MEMPOOL_DEFINE_OBJECT_FACTORY(buffer::raw_pipe, buffer_raw_pipe, buffer_meta);
2668 #endif
2669 MEMPOOL_DEFINE_OBJECT_FACTORY(buffer::raw_char, buffer_raw_char, buffer_meta);
2670 MEMPOOL_DEFINE_OBJECT_FACTORY(buffer::raw_claimed_char, buffer_raw_claimed_char,
2671 buffer_meta);
2672 MEMPOOL_DEFINE_OBJECT_FACTORY(buffer::raw_unshareable, buffer_raw_unshareable,
2673 buffer_meta);
2674 MEMPOOL_DEFINE_OBJECT_FACTORY(buffer::raw_static, buffer_raw_static,
2675 buffer_meta);
2676