]> git.proxmox.com Git - ceph.git/blob - ceph/src/common/buffer.cc
add subtree-ish sources for 12.0.3
[ceph.git] / ceph / src / common / buffer.cc
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3 /*
4 * Ceph - scalable distributed file system
5 *
6 * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
7 *
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
12 *
13 */
14
15 #include "include/compat.h"
16 #include "include/mempool.h"
17 #include "armor.h"
18 #include "common/environment.h"
19 #include "common/errno.h"
20 #include "common/safe_io.h"
21 #include "common/simple_spin.h"
22 #include "common/strtol.h"
23 #include "common/likely.h"
24 #include "common/valgrind.h"
25 #include "common/deleter.h"
26 #include "include/atomic.h"
27 #include "common/RWLock.h"
28 #include "include/types.h"
29 #include "include/compat.h"
30 #include "include/inline_memory.h"
31 #include "include/scope_guard.h"
32 #if defined(HAVE_XIO)
33 #include "msg/xio/XioMsg.h"
34 #endif
35
36 #include <errno.h>
37 #include <fstream>
38 #include <sstream>
39 #include <sys/uio.h>
40 #include <limits.h>
41
42 #include <atomic>
43 #include <ostream>
44
45 #define CEPH_BUFFER_ALLOC_UNIT (MIN(CEPH_PAGE_SIZE, 4096))
46 #define CEPH_BUFFER_APPEND_SIZE (CEPH_BUFFER_ALLOC_UNIT - sizeof(raw_combined))
47
48 #ifdef BUFFER_DEBUG
49 static std::atomic_flag buffer_debug_lock = ATOMIC_FLAG_INIT;
50 # define bdout { simple_spin_lock(&buffer_debug_lock); std::cout
51 # define bendl std::endl; simple_spin_unlock(&buffer_debug_lock); }
52 #else
53 # define bdout if (0) { std::cout
54 # define bendl std::endl; }
55 #endif
56
57 static atomic_t buffer_total_alloc;
58 static atomic64_t buffer_history_alloc_bytes;
59 static atomic64_t buffer_history_alloc_num;
60 const bool buffer_track_alloc = get_env_bool("CEPH_BUFFER_TRACK");
61
62 namespace {
63 void inc_total_alloc(unsigned len) {
64 if (buffer_track_alloc)
65 buffer_total_alloc.add(len);
66 }
67
68 void dec_total_alloc(unsigned len) {
69 if (buffer_track_alloc)
70 buffer_total_alloc.sub(len);
71 }
72
73 void inc_history_alloc(uint64_t len) {
74 if (buffer_track_alloc) {
75 buffer_history_alloc_bytes.add(len);
76 buffer_history_alloc_num.inc();
77 }
78 }
79 }
80
81
82 int buffer::get_total_alloc() {
83 return buffer_total_alloc.read();
84 }
85 uint64_t buffer::get_history_alloc_bytes() {
86 return buffer_history_alloc_bytes.read();
87 }
88 uint64_t buffer::get_history_alloc_num() {
89 return buffer_history_alloc_num.read();
90 }
91
92 static atomic_t buffer_cached_crc;
93 static atomic_t buffer_cached_crc_adjusted;
94 static atomic_t buffer_missed_crc;
95 static bool buffer_track_crc = get_env_bool("CEPH_BUFFER_TRACK");
96
97 void buffer::track_cached_crc(bool b) {
98 buffer_track_crc = b;
99 }
100 int buffer::get_cached_crc() {
101 return buffer_cached_crc.read();
102 }
103 int buffer::get_cached_crc_adjusted() {
104 return buffer_cached_crc_adjusted.read();
105 }
106
107 int buffer::get_missed_crc() {
108 return buffer_missed_crc.read();
109 }
110
111 static atomic_t buffer_c_str_accesses;
112 static bool buffer_track_c_str = get_env_bool("CEPH_BUFFER_TRACK");
113
114 void buffer::track_c_str(bool b) {
115 buffer_track_c_str = b;
116 }
117 int buffer::get_c_str_accesses() {
118 return buffer_c_str_accesses.read();
119 }
120
121 static atomic_t buffer_max_pipe_size;
122 int update_max_pipe_size() {
123 #ifdef CEPH_HAVE_SETPIPE_SZ
124 char buf[32];
125 int r;
126 std::string err;
127 struct stat stat_result;
128 if (::stat(PROCPREFIX "/proc/sys/fs/pipe-max-size", &stat_result) == -1)
129 return -errno;
130 r = safe_read_file(PROCPREFIX "/proc/sys/fs/", "pipe-max-size",
131 buf, sizeof(buf) - 1);
132 if (r < 0)
133 return r;
134 buf[r] = '\0';
135 size_t size = strict_strtol(buf, 10, &err);
136 if (!err.empty())
137 return -EIO;
138 buffer_max_pipe_size.set(size);
139 #endif
140 return 0;
141 }
142
143 size_t get_max_pipe_size() {
144 #ifdef CEPH_HAVE_SETPIPE_SZ
145 size_t size = buffer_max_pipe_size.read();
146 if (size)
147 return size;
148 if (update_max_pipe_size() == 0)
149 return buffer_max_pipe_size.read();
150 #endif
151 // this is the max size hardcoded in linux before 2.6.35
152 return 65536;
153 }
154
155 const char * buffer::error::what() const throw () {
156 return "buffer::exception";
157 }
158 const char * buffer::bad_alloc::what() const throw () {
159 return "buffer::bad_alloc";
160 }
161 const char * buffer::end_of_buffer::what() const throw () {
162 return "buffer::end_of_buffer";
163 }
164 const char * buffer::malformed_input::what() const throw () {
165 return buf;
166 }
167 buffer::error_code::error_code(int error) :
168 buffer::malformed_input(cpp_strerror(error).c_str()), code(error) {}
169
170 class buffer::raw {
171 public:
172 char *data;
173 unsigned len;
174 atomic_t nref;
175
176 mutable std::atomic_flag crc_spinlock = ATOMIC_FLAG_INIT;
177 map<pair<size_t, size_t>, pair<uint32_t, uint32_t> > crc_map;
178
179 explicit raw(unsigned l)
180 : data(NULL), len(l), nref(0)
181 { }
182 raw(char *c, unsigned l)
183 : data(c), len(l), nref(0)
184 { }
185 virtual ~raw() {}
186
187 // no copying.
188 // cppcheck-suppress noExplicitConstructor
189 raw(const raw &other);
190 const raw& operator=(const raw &other);
191
192 virtual char *get_data() {
193 return data;
194 }
195 virtual raw* clone_empty() = 0;
196 raw *clone() {
197 raw *c = clone_empty();
198 memcpy(c->data, data, len);
199 return c;
200 }
201 virtual bool can_zero_copy() const {
202 return false;
203 }
204 virtual int zero_copy_to_fd(int fd, loff_t *offset) {
205 return -ENOTSUP;
206 }
207 virtual bool is_page_aligned() {
208 return ((long)data & ~CEPH_PAGE_MASK) == 0;
209 }
210 bool is_n_page_sized() {
211 return (len & ~CEPH_PAGE_MASK) == 0;
212 }
213 virtual bool is_shareable() {
214 // true if safe to reference/share the existing buffer copy
215 // false if it is not safe to share the buffer, e.g., due to special
216 // and/or registered memory that is scarce
217 return true;
218 }
219 bool get_crc(const pair<size_t, size_t> &fromto,
220 pair<uint32_t, uint32_t> *crc) const {
221 simple_spin_lock(&crc_spinlock);
222 map<pair<size_t, size_t>, pair<uint32_t, uint32_t> >::const_iterator i =
223 crc_map.find(fromto);
224 if (i == crc_map.end()) {
225 simple_spin_unlock(&crc_spinlock);
226 return false;
227 }
228 *crc = i->second;
229 simple_spin_unlock(&crc_spinlock);
230 return true;
231 }
232 void set_crc(const pair<size_t, size_t> &fromto,
233 const pair<uint32_t, uint32_t> &crc) {
234 simple_spin_lock(&crc_spinlock);
235 crc_map[fromto] = crc;
236 simple_spin_unlock(&crc_spinlock);
237 }
238 void invalidate_crc() {
239 simple_spin_lock(&crc_spinlock);
240 if (crc_map.size() != 0) {
241 crc_map.clear();
242 }
243 simple_spin_unlock(&crc_spinlock);
244 }
245 };
246
247 MEMPOOL_DEFINE_FACTORY(char, char, buffer_data);
248
249 /*
250 * raw_combined is always placed within a single allocation along
251 * with the data buffer. the data goes at the beginning, and
252 * raw_combined at the end.
253 */
254 class buffer::raw_combined : public buffer::raw {
255 size_t alignment;
256 public:
257 raw_combined(char *dataptr, unsigned l, unsigned align=0)
258 : raw(dataptr, l),
259 alignment(align) {
260 inc_total_alloc(len);
261 inc_history_alloc(len);
262 }
263 ~raw_combined() override {
264 dec_total_alloc(len);
265 }
266 raw* clone_empty() override {
267 return create(len, alignment);
268 }
269
270 static raw_combined *create(unsigned len, unsigned align=0) {
271 if (!align)
272 align = sizeof(size_t);
273 size_t rawlen = ROUND_UP_TO(sizeof(buffer::raw_combined),
274 alignof(buffer::raw_combined));
275 size_t datalen = ROUND_UP_TO(len, alignof(buffer::raw_combined));
276
277 char *ptr = mempool::buffer_data::alloc_char.allocate_aligned(
278 rawlen + datalen, align);
279 if (!ptr)
280 throw bad_alloc();
281
282 // actual data first, since it has presumably larger alignment restriction
283 // then put the raw_combined at the end
284 return new (ptr + datalen) raw_combined(ptr, len, align);
285 }
286
287 static void operator delete(void *ptr) {
288 raw_combined *raw = (raw_combined *)ptr;
289 size_t rawlen = ROUND_UP_TO(sizeof(buffer::raw_combined),
290 alignof(buffer::raw_combined));
291 size_t datalen = ROUND_UP_TO(raw->len, alignof(buffer::raw_combined));
292 mempool::buffer_data::alloc_char.deallocate_aligned(
293 raw->data, rawlen + datalen);
294 }
295 };
296
297 class buffer::raw_malloc : public buffer::raw {
298 public:
299 MEMPOOL_CLASS_HELPERS();
300
301 explicit raw_malloc(unsigned l) : raw(l) {
302 if (len) {
303 data = (char *)malloc(len);
304 if (!data)
305 throw bad_alloc();
306 } else {
307 data = 0;
308 }
309 inc_total_alloc(len);
310 inc_history_alloc(len);
311 bdout << "raw_malloc " << this << " alloc " << (void *)data << " " << l << " " << buffer::get_total_alloc() << bendl;
312 }
313 raw_malloc(unsigned l, char *b) : raw(b, l) {
314 inc_total_alloc(len);
315 bdout << "raw_malloc " << this << " alloc " << (void *)data << " " << l << " " << buffer::get_total_alloc() << bendl;
316 }
317 ~raw_malloc() override {
318 free(data);
319 dec_total_alloc(len);
320 bdout << "raw_malloc " << this << " free " << (void *)data << " " << buffer::get_total_alloc() << bendl;
321 }
322 raw* clone_empty() override {
323 return new raw_malloc(len);
324 }
325 };
326
327 #ifndef __CYGWIN__
328 class buffer::raw_mmap_pages : public buffer::raw {
329 public:
330 MEMPOOL_CLASS_HELPERS();
331
332 explicit raw_mmap_pages(unsigned l) : raw(l) {
333 data = (char*)::mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANON, -1, 0);
334 if (!data)
335 throw bad_alloc();
336 inc_total_alloc(len);
337 inc_history_alloc(len);
338 bdout << "raw_mmap " << this << " alloc " << (void *)data << " " << l << " " << buffer::get_total_alloc() << bendl;
339 }
340 ~raw_mmap_pages() override {
341 ::munmap(data, len);
342 dec_total_alloc(len);
343 bdout << "raw_mmap " << this << " free " << (void *)data << " " << buffer::get_total_alloc() << bendl;
344 }
345 raw* clone_empty() override {
346 return new raw_mmap_pages(len);
347 }
348 };
349
350 class buffer::raw_posix_aligned : public buffer::raw {
351 unsigned align;
352 public:
353 MEMPOOL_CLASS_HELPERS();
354
355 raw_posix_aligned(unsigned l, unsigned _align) : raw(l) {
356 align = _align;
357 assert((align >= sizeof(void *)) && (align & (align - 1)) == 0);
358 data = mempool::buffer_data::alloc_char.allocate_aligned(len, align);
359 if (!data)
360 throw bad_alloc();
361 inc_total_alloc(len);
362 inc_history_alloc(len);
363 bdout << "raw_posix_aligned " << this << " alloc " << (void *)data << " l=" << l << ", align=" << align << " total_alloc=" << buffer::get_total_alloc() << bendl;
364 }
365 ~raw_posix_aligned() override {
366 mempool::buffer_data::alloc_char.deallocate_aligned(data, len);
367 dec_total_alloc(len);
368 bdout << "raw_posix_aligned " << this << " free " << (void *)data << " " << buffer::get_total_alloc() << bendl;
369 }
370 raw* clone_empty() override {
371 return new raw_posix_aligned(len, align);
372 }
373 };
374 #endif
375
376 #ifdef __CYGWIN__
377 class buffer::raw_hack_aligned : public buffer::raw {
378 unsigned align;
379 char *realdata;
380 public:
381 raw_hack_aligned(unsigned l, unsigned _align) : raw(l) {
382 align = _align;
383 realdata = new char[len+align-1];
384 unsigned off = ((unsigned)realdata) & (align-1);
385 if (off)
386 data = realdata + align - off;
387 else
388 data = realdata;
389 inc_total_alloc(len+align-1);
390 inc_history_alloc(len+align-1);
391 //cout << "hack aligned " << (unsigned)data
392 //<< " in raw " << (unsigned)realdata
393 //<< " off " << off << std::endl;
394 assert(((unsigned)data & (align-1)) == 0);
395 }
396 ~raw_hack_aligned() {
397 delete[] realdata;
398 dec_total_alloc(len+align-1);
399 }
400 raw* clone_empty() {
401 return new raw_hack_aligned(len, align);
402 }
403 };
404 #endif
405
406 #ifdef CEPH_HAVE_SPLICE
407 class buffer::raw_pipe : public buffer::raw {
408 public:
409 MEMPOOL_CLASS_HELPERS();
410
411 explicit raw_pipe(unsigned len) : raw(len), source_consumed(false) {
412 size_t max = get_max_pipe_size();
413 if (len > max) {
414 bdout << "raw_pipe: requested length " << len
415 << " > max length " << max << bendl;
416 throw malformed_input("length larger than max pipe size");
417 }
418 pipefds[0] = -1;
419 pipefds[1] = -1;
420
421 int r;
422 if (::pipe(pipefds) == -1) {
423 r = -errno;
424 bdout << "raw_pipe: error creating pipe: " << cpp_strerror(r) << bendl;
425 throw error_code(r);
426 }
427
428 r = set_nonblocking(pipefds);
429 if (r < 0) {
430 bdout << "raw_pipe: error setting nonblocking flag on temp pipe: "
431 << cpp_strerror(r) << bendl;
432 throw error_code(r);
433 }
434
435 r = set_pipe_size(pipefds, len);
436 if (r < 0) {
437 bdout << "raw_pipe: could not set pipe size" << bendl;
438 // continue, since the pipe should become large enough as needed
439 }
440
441 inc_total_alloc(len);
442 inc_history_alloc(len);
443 bdout << "raw_pipe " << this << " alloc " << len << " "
444 << buffer::get_total_alloc() << bendl;
445 }
446
447 ~raw_pipe() override {
448 if (data)
449 free(data);
450 close_pipe(pipefds);
451 dec_total_alloc(len);
452 bdout << "raw_pipe " << this << " free " << (void *)data << " "
453 << buffer::get_total_alloc() << bendl;
454 }
455
456 bool can_zero_copy() const override {
457 return true;
458 }
459
460 int set_source(int fd, loff_t *off) {
461 int flags = SPLICE_F_NONBLOCK;
462 ssize_t r = safe_splice(fd, off, pipefds[1], NULL, len, flags);
463 if (r < 0) {
464 bdout << "raw_pipe: error splicing into pipe: " << cpp_strerror(r)
465 << bendl;
466 return r;
467 }
468 // update length with actual amount read
469 len = r;
470 return 0;
471 }
472
473 int zero_copy_to_fd(int fd, loff_t *offset) override {
474 assert(!source_consumed);
475 int flags = SPLICE_F_NONBLOCK;
476 ssize_t r = safe_splice_exact(pipefds[0], NULL, fd, offset, len, flags);
477 if (r < 0) {
478 bdout << "raw_pipe: error splicing from pipe to fd: "
479 << cpp_strerror(r) << bendl;
480 return r;
481 }
482 source_consumed = true;
483 return 0;
484 }
485
486 buffer::raw* clone_empty() override {
487 // cloning doesn't make sense for pipe-based buffers,
488 // and is only used by unit tests for other types of buffers
489 return NULL;
490 }
491
492 char *get_data() override {
493 if (data)
494 return data;
495 return copy_pipe(pipefds);
496 }
497
498 private:
499 int set_pipe_size(int *fds, long length) {
500 #ifdef CEPH_HAVE_SETPIPE_SZ
501 if (::fcntl(fds[1], F_SETPIPE_SZ, length) == -1) {
502 int r = -errno;
503 if (r == -EPERM) {
504 // pipe limit must have changed - EPERM means we requested
505 // more than the maximum size as an unprivileged user
506 update_max_pipe_size();
507 throw malformed_input("length larger than new max pipe size");
508 }
509 return r;
510 }
511 #endif
512 return 0;
513 }
514
515 int set_nonblocking(int *fds) {
516 if (::fcntl(fds[0], F_SETFL, O_NONBLOCK) == -1)
517 return -errno;
518 if (::fcntl(fds[1], F_SETFL, O_NONBLOCK) == -1)
519 return -errno;
520 return 0;
521 }
522
523 static void close_pipe(const int *fds) {
524 if (fds[0] >= 0)
525 VOID_TEMP_FAILURE_RETRY(::close(fds[0]));
526 if (fds[1] >= 0)
527 VOID_TEMP_FAILURE_RETRY(::close(fds[1]));
528 }
529 char *copy_pipe(int *fds) {
530 /* preserve original pipe contents by copying into a temporary
531 * pipe before reading.
532 */
533 int tmpfd[2];
534 int r;
535
536 assert(!source_consumed);
537 assert(fds[0] >= 0);
538
539 if (::pipe(tmpfd) == -1) {
540 r = -errno;
541 bdout << "raw_pipe: error creating temp pipe: " << cpp_strerror(r)
542 << bendl;
543 throw error_code(r);
544 }
545 auto sg = make_scope_guard([=] { close_pipe(tmpfd); });
546 r = set_nonblocking(tmpfd);
547 if (r < 0) {
548 bdout << "raw_pipe: error setting nonblocking flag on temp pipe: "
549 << cpp_strerror(r) << bendl;
550 throw error_code(r);
551 }
552 r = set_pipe_size(tmpfd, len);
553 if (r < 0) {
554 bdout << "raw_pipe: error setting pipe size on temp pipe: "
555 << cpp_strerror(r) << bendl;
556 }
557 int flags = SPLICE_F_NONBLOCK;
558 if (::tee(fds[0], tmpfd[1], len, flags) == -1) {
559 r = errno;
560 bdout << "raw_pipe: error tee'ing into temp pipe: " << cpp_strerror(r)
561 << bendl;
562 throw error_code(r);
563 }
564 data = (char *)malloc(len);
565 if (!data) {
566 throw bad_alloc();
567 }
568 r = safe_read(tmpfd[0], data, len);
569 if (r < (ssize_t)len) {
570 bdout << "raw_pipe: error reading from temp pipe:" << cpp_strerror(r)
571 << bendl;
572 free(data);
573 data = NULL;
574 throw error_code(r);
575 }
576 return data;
577 }
578 bool source_consumed;
579 int pipefds[2];
580 };
581 #endif // CEPH_HAVE_SPLICE
582
583 /*
584 * primitive buffer types
585 */
586 class buffer::raw_char : public buffer::raw {
587 public:
588 MEMPOOL_CLASS_HELPERS();
589
590 explicit raw_char(unsigned l) : raw(l) {
591 if (len)
592 data = mempool::buffer_data::alloc_char.allocate(len);
593 else
594 data = 0;
595 inc_total_alloc(len);
596 inc_history_alloc(len);
597 bdout << "raw_char " << this << " alloc " << (void *)data << " " << l << " " << buffer::get_total_alloc() << bendl;
598 }
599 raw_char(unsigned l, char *b) : raw(b, l) {
600 inc_total_alloc(len);
601 bdout << "raw_char " << this << " alloc " << (void *)data << " " << l << " " << buffer::get_total_alloc() << bendl;
602 }
603 ~raw_char() override {
604 if (data)
605 mempool::buffer_data::alloc_char.deallocate(data, len);
606 dec_total_alloc(len);
607 bdout << "raw_char " << this << " free " << (void *)data << " " << buffer::get_total_alloc() << bendl;
608 }
609 raw* clone_empty() override {
610 return new raw_char(len);
611 }
612 };
613
614 class buffer::raw_unshareable : public buffer::raw {
615 public:
616 MEMPOOL_CLASS_HELPERS();
617
618 explicit raw_unshareable(unsigned l) : raw(l) {
619 if (len)
620 data = new char[len];
621 else
622 data = 0;
623 }
624 raw_unshareable(unsigned l, char *b) : raw(b, l) {
625 }
626 raw* clone_empty() override {
627 return new raw_char(len);
628 }
629 bool is_shareable() override {
630 return false; // !shareable, will force make_shareable()
631 }
632 ~raw_unshareable() override {
633 delete[] data;
634 }
635 };
636
637 class buffer::raw_static : public buffer::raw {
638 public:
639 MEMPOOL_CLASS_HELPERS();
640
641 raw_static(const char *d, unsigned l) : raw((char*)d, l) { }
642 ~raw_static() override {}
643 raw* clone_empty() override {
644 return new buffer::raw_char(len);
645 }
646 };
647
648 class buffer::raw_claim_buffer : public buffer::raw {
649 deleter del;
650 public:
651 raw_claim_buffer(const char *b, unsigned l, deleter d)
652 : raw((char*)b, l), del(std::move(d)) { }
653 ~raw_claim_buffer() override {}
654 raw* clone_empty() override {
655 return new buffer::raw_char(len);
656 }
657 };
658
659 #if defined(HAVE_XIO)
660 class buffer::xio_msg_buffer : public buffer::raw {
661 private:
662 XioDispatchHook* m_hook;
663 public:
664 xio_msg_buffer(XioDispatchHook* _m_hook, const char *d,
665 unsigned l) :
666 raw((char*)d, l), m_hook(_m_hook->get()) {}
667
668 bool is_shareable() { return false; }
669 static void operator delete(void *p)
670 {
671 xio_msg_buffer *buf = static_cast<xio_msg_buffer*>(p);
672 // return hook ref (counts against pool); it appears illegal
673 // to do this in our dtor, because this fires after that
674 buf->m_hook->put();
675 }
676 raw* clone_empty() {
677 return new buffer::raw_char(len);
678 }
679 };
680
681 class buffer::xio_mempool : public buffer::raw {
682 public:
683 struct xio_reg_mem *mp;
684 xio_mempool(struct xio_reg_mem *_mp, unsigned l) :
685 raw((char*)_mp->addr, l), mp(_mp)
686 { }
687 ~xio_mempool() {}
688 raw* clone_empty() {
689 return new buffer::raw_char(len);
690 }
691 };
692
693 struct xio_reg_mem* get_xio_mp(const buffer::ptr& bp)
694 {
695 buffer::xio_mempool *mb = dynamic_cast<buffer::xio_mempool*>(bp.get_raw());
696 if (mb) {
697 return mb->mp;
698 }
699 return NULL;
700 }
701
702 buffer::raw* buffer::create_msg(
703 unsigned len, char *buf, XioDispatchHook* m_hook) {
704 XioPool& pool = m_hook->get_pool();
705 buffer::raw* bp =
706 static_cast<buffer::raw*>(pool.alloc(sizeof(xio_msg_buffer)));
707 new (bp) xio_msg_buffer(m_hook, buf, len);
708 return bp;
709 }
710 #endif /* HAVE_XIO */
711
712 buffer::raw* buffer::copy(const char *c, unsigned len) {
713 raw* r = buffer::create_aligned(len, sizeof(size_t));
714 memcpy(r->data, c, len);
715 return r;
716 }
717
718 buffer::raw* buffer::create(unsigned len) {
719 return buffer::create_aligned(len, sizeof(size_t));
720 }
721 buffer::raw* buffer::claim_char(unsigned len, char *buf) {
722 return new raw_char(len, buf);
723 }
724 buffer::raw* buffer::create_malloc(unsigned len) {
725 return new raw_malloc(len);
726 }
727 buffer::raw* buffer::claim_malloc(unsigned len, char *buf) {
728 return new raw_malloc(len, buf);
729 }
730 buffer::raw* buffer::create_static(unsigned len, char *buf) {
731 return new raw_static(buf, len);
732 }
733 buffer::raw* buffer::claim_buffer(unsigned len, char *buf, deleter del) {
734 return new raw_claim_buffer(buf, len, std::move(del));
735 }
736
737 buffer::raw* buffer::create_aligned(unsigned len, unsigned align) {
738 // If alignment is a page multiple, use a separate buffer::raw to
739 // avoid fragmenting the heap.
740 //
741 // Somewhat unexpectedly, I see consistently better performance
742 // from raw_combined than from raw even when the allocation size is
743 // a page multiple (but alignment is not).
744 //
745 // I also see better performance from a separate buffer::raw once the
746 // size passes 8KB.
747 if ((align & ~CEPH_PAGE_MASK) == 0 ||
748 len >= CEPH_PAGE_SIZE * 2) {
749 #ifndef __CYGWIN__
750 return new raw_posix_aligned(len, align);
751 #else
752 return new raw_hack_aligned(len, align);
753 #endif
754 }
755 return raw_combined::create(len, align);
756 }
757
758 buffer::raw* buffer::create_page_aligned(unsigned len) {
759 return create_aligned(len, CEPH_PAGE_SIZE);
760 }
761
762 buffer::raw* buffer::create_zero_copy(unsigned len, int fd, int64_t *offset) {
763 #ifdef CEPH_HAVE_SPLICE
764 buffer::raw_pipe* buf = new raw_pipe(len);
765 int r = buf->set_source(fd, (loff_t*)offset);
766 if (r < 0) {
767 delete buf;
768 throw error_code(r);
769 }
770 return buf;
771 #else
772 throw error_code(-ENOTSUP);
773 #endif
774 }
775
776 buffer::raw* buffer::create_unshareable(unsigned len) {
777 return new raw_unshareable(len);
778 }
779
780 buffer::ptr::ptr(raw *r) : _raw(r), _off(0), _len(r->len) // no lock needed; this is an unref raw.
781 {
782 r->nref.inc();
783 bdout << "ptr " << this << " get " << _raw << bendl;
784 }
785 buffer::ptr::ptr(unsigned l) : _off(0), _len(l)
786 {
787 _raw = create(l);
788 _raw->nref.inc();
789 bdout << "ptr " << this << " get " << _raw << bendl;
790 }
791 buffer::ptr::ptr(const char *d, unsigned l) : _off(0), _len(l) // ditto.
792 {
793 _raw = copy(d, l);
794 _raw->nref.inc();
795 bdout << "ptr " << this << " get " << _raw << bendl;
796 }
797 buffer::ptr::ptr(const ptr& p) : _raw(p._raw), _off(p._off), _len(p._len)
798 {
799 if (_raw) {
800 _raw->nref.inc();
801 bdout << "ptr " << this << " get " << _raw << bendl;
802 }
803 }
804 buffer::ptr::ptr(ptr&& p) noexcept : _raw(p._raw), _off(p._off), _len(p._len)
805 {
806 p._raw = nullptr;
807 p._off = p._len = 0;
808 }
809 buffer::ptr::ptr(const ptr& p, unsigned o, unsigned l)
810 : _raw(p._raw), _off(p._off + o), _len(l)
811 {
812 assert(o+l <= p._len);
813 assert(_raw);
814 _raw->nref.inc();
815 bdout << "ptr " << this << " get " << _raw << bendl;
816 }
817 buffer::ptr& buffer::ptr::operator= (const ptr& p)
818 {
819 if (p._raw) {
820 p._raw->nref.inc();
821 bdout << "ptr " << this << " get " << _raw << bendl;
822 }
823 buffer::raw *raw = p._raw;
824 release();
825 if (raw) {
826 _raw = raw;
827 _off = p._off;
828 _len = p._len;
829 } else {
830 _off = _len = 0;
831 }
832 return *this;
833 }
834 buffer::ptr& buffer::ptr::operator= (ptr&& p) noexcept
835 {
836 release();
837 buffer::raw *raw = p._raw;
838 if (raw) {
839 _raw = raw;
840 _off = p._off;
841 _len = p._len;
842 p._raw = nullptr;
843 p._off = p._len = 0;
844 } else {
845 _off = _len = 0;
846 }
847 return *this;
848 }
849
850 buffer::raw *buffer::ptr::clone()
851 {
852 return _raw->clone();
853 }
854
855 buffer::ptr& buffer::ptr::make_shareable() {
856 if (_raw && !_raw->is_shareable()) {
857 buffer::raw *tr = _raw;
858 _raw = tr->clone();
859 _raw->nref.set(1);
860 if (unlikely(tr->nref.dec() == 0)) {
861 ANNOTATE_HAPPENS_AFTER(&tr->nref);
862 ANNOTATE_HAPPENS_BEFORE_FORGET_ALL(&tr->nref);
863 delete tr;
864 } else {
865 ANNOTATE_HAPPENS_BEFORE(&tr->nref);
866 }
867 }
868 return *this;
869 }
870
871 void buffer::ptr::swap(ptr& other)
872 {
873 raw *r = _raw;
874 unsigned o = _off;
875 unsigned l = _len;
876 _raw = other._raw;
877 _off = other._off;
878 _len = other._len;
879 other._raw = r;
880 other._off = o;
881 other._len = l;
882 }
883
884 void buffer::ptr::release()
885 {
886 if (_raw) {
887 bdout << "ptr " << this << " release " << _raw << bendl;
888 if (_raw->nref.dec() == 0) {
889 //cout << "hosing raw " << (void*)_raw << " len " << _raw->len << std::endl;
890 ANNOTATE_HAPPENS_AFTER(&_raw->nref);
891 ANNOTATE_HAPPENS_BEFORE_FORGET_ALL(&_raw->nref);
892 delete _raw; // dealloc old (if any)
893 } else {
894 ANNOTATE_HAPPENS_BEFORE(&_raw->nref);
895 }
896 _raw = 0;
897 }
898 }
899
900 bool buffer::ptr::at_buffer_tail() const { return _off + _len == _raw->len; }
901
902 const char *buffer::ptr::c_str() const {
903 assert(_raw);
904 if (buffer_track_c_str)
905 buffer_c_str_accesses.inc();
906 return _raw->get_data() + _off;
907 }
908 char *buffer::ptr::c_str() {
909 assert(_raw);
910 if (buffer_track_c_str)
911 buffer_c_str_accesses.inc();
912 return _raw->get_data() + _off;
913 }
914 const char *buffer::ptr::end_c_str() const {
915 assert(_raw);
916 if (buffer_track_c_str)
917 buffer_c_str_accesses.inc();
918 return _raw->get_data() + _off + _len;
919 }
920 char *buffer::ptr::end_c_str() {
921 assert(_raw);
922 if (buffer_track_c_str)
923 buffer_c_str_accesses.inc();
924 return _raw->get_data() + _off + _len;
925 }
926
927 unsigned buffer::ptr::unused_tail_length() const
928 {
929 if (_raw)
930 return _raw->len - (_off+_len);
931 else
932 return 0;
933 }
934 const char& buffer::ptr::operator[](unsigned n) const
935 {
936 assert(_raw);
937 assert(n < _len);
938 return _raw->get_data()[_off + n];
939 }
940 char& buffer::ptr::operator[](unsigned n)
941 {
942 assert(_raw);
943 assert(n < _len);
944 return _raw->get_data()[_off + n];
945 }
946
947 const char *buffer::ptr::raw_c_str() const { assert(_raw); return _raw->data; }
948 unsigned buffer::ptr::raw_length() const { assert(_raw); return _raw->len; }
949 int buffer::ptr::raw_nref() const { assert(_raw); return _raw->nref.read(); }
950
951 void buffer::ptr::copy_out(unsigned o, unsigned l, char *dest) const {
952 assert(_raw);
953 if (o+l > _len)
954 throw end_of_buffer();
955 char* src = _raw->data + _off + o;
956 maybe_inline_memcpy(dest, src, l, 8);
957 }
958
959 unsigned buffer::ptr::wasted()
960 {
961 assert(_raw);
962 return _raw->len - _len;
963 }
964
965 int buffer::ptr::cmp(const ptr& o) const
966 {
967 int l = _len < o._len ? _len : o._len;
968 if (l) {
969 int r = memcmp(c_str(), o.c_str(), l);
970 if (r)
971 return r;
972 }
973 if (_len < o._len)
974 return -1;
975 if (_len > o._len)
976 return 1;
977 return 0;
978 }
979
980 bool buffer::ptr::is_zero() const
981 {
982 return mem_is_zero(c_str(), _len);
983 }
984
985 unsigned buffer::ptr::append(char c)
986 {
987 assert(_raw);
988 assert(1 <= unused_tail_length());
989 char* ptr = _raw->data + _off + _len;
990 *ptr = c;
991 _len++;
992 return _len + _off;
993 }
994
995 unsigned buffer::ptr::append(const char *p, unsigned l)
996 {
997 assert(_raw);
998 assert(l <= unused_tail_length());
999 char* c = _raw->data + _off + _len;
1000 maybe_inline_memcpy(c, p, l, 32);
1001 _len += l;
1002 return _len + _off;
1003 }
1004
1005 void buffer::ptr::copy_in(unsigned o, unsigned l, const char *src)
1006 {
1007 copy_in(o, l, src, true);
1008 }
1009
1010 void buffer::ptr::copy_in(unsigned o, unsigned l, const char *src, bool crc_reset)
1011 {
1012 assert(_raw);
1013 assert(o <= _len);
1014 assert(o+l <= _len);
1015 char* dest = _raw->data + _off + o;
1016 if (crc_reset)
1017 _raw->invalidate_crc();
1018 maybe_inline_memcpy(dest, src, l, 64);
1019 }
1020
1021 void buffer::ptr::zero()
1022 {
1023 zero(true);
1024 }
1025
1026 void buffer::ptr::zero(bool crc_reset)
1027 {
1028 if (crc_reset)
1029 _raw->invalidate_crc();
1030 memset(c_str(), 0, _len);
1031 }
1032
1033 void buffer::ptr::zero(unsigned o, unsigned l)
1034 {
1035 zero(o, l, true);
1036 }
1037
1038 void buffer::ptr::zero(unsigned o, unsigned l, bool crc_reset)
1039 {
1040 assert(o+l <= _len);
1041 if (crc_reset)
1042 _raw->invalidate_crc();
1043 memset(c_str()+o, 0, l);
1044 }
1045 bool buffer::ptr::can_zero_copy() const
1046 {
1047 return _raw->can_zero_copy();
1048 }
1049
1050 int buffer::ptr::zero_copy_to_fd(int fd, int64_t *offset) const
1051 {
1052 return _raw->zero_copy_to_fd(fd, (loff_t*)offset);
1053 }
1054
1055 // -- buffer::list::iterator --
1056 /*
1057 buffer::list::iterator operator=(const buffer::list::iterator& other)
1058 {
1059 if (this != &other) {
1060 bl = other.bl;
1061 ls = other.ls;
1062 off = other.off;
1063 p = other.p;
1064 p_off = other.p_off;
1065 }
1066 return *this;
1067 }*/
1068
1069 template<bool is_const>
1070 buffer::list::iterator_impl<is_const>::iterator_impl(bl_t *l, unsigned o)
1071 : bl(l), ls(&bl->_buffers), off(0), p(ls->begin()), p_off(0)
1072 {
1073 advance(o);
1074 }
1075
1076 template<bool is_const>
1077 buffer::list::iterator_impl<is_const>::iterator_impl(const buffer::list::iterator& i)
1078 : iterator_impl<is_const>(i.bl, i.off, i.p, i.p_off) {}
1079
1080 template<bool is_const>
1081 void buffer::list::iterator_impl<is_const>::advance(int o)
1082 {
1083 //cout << this << " advance " << o << " from " << off << " (p_off " << p_off << " in " << p->length() << ")" << std::endl;
1084 if (o > 0) {
1085 p_off += o;
1086 while (p_off > 0) {
1087 if (p == ls->end())
1088 throw end_of_buffer();
1089 if (p_off >= p->length()) {
1090 // skip this buffer
1091 p_off -= p->length();
1092 p++;
1093 } else {
1094 // somewhere in this buffer!
1095 break;
1096 }
1097 }
1098 off += o;
1099 return;
1100 }
1101 while (o < 0) {
1102 if (p_off) {
1103 unsigned d = -o;
1104 if (d > p_off)
1105 d = p_off;
1106 p_off -= d;
1107 off -= d;
1108 o += d;
1109 } else if (off > 0) {
1110 assert(p != ls->begin());
1111 p--;
1112 p_off = p->length();
1113 } else {
1114 throw end_of_buffer();
1115 }
1116 }
1117 }
1118
1119 template<bool is_const>
1120 void buffer::list::iterator_impl<is_const>::seek(unsigned o)
1121 {
1122 p = ls->begin();
1123 off = p_off = 0;
1124 advance(o);
1125 }
1126
1127 template<bool is_const>
1128 char buffer::list::iterator_impl<is_const>::operator*() const
1129 {
1130 if (p == ls->end())
1131 throw end_of_buffer();
1132 return (*p)[p_off];
1133 }
1134
1135 template<bool is_const>
1136 buffer::list::iterator_impl<is_const>&
1137 buffer::list::iterator_impl<is_const>::operator++()
1138 {
1139 if (p == ls->end())
1140 throw end_of_buffer();
1141 advance(1);
1142 return *this;
1143 }
1144
1145 template<bool is_const>
1146 buffer::ptr buffer::list::iterator_impl<is_const>::get_current_ptr() const
1147 {
1148 if (p == ls->end())
1149 throw end_of_buffer();
1150 return ptr(*p, p_off, p->length() - p_off);
1151 }
1152
1153 // copy data out.
1154 // note that these all _append_ to dest!
1155 template<bool is_const>
1156 void buffer::list::iterator_impl<is_const>::copy(unsigned len, char *dest)
1157 {
1158 if (p == ls->end()) seek(off);
1159 while (len > 0) {
1160 if (p == ls->end())
1161 throw end_of_buffer();
1162 assert(p->length() > 0);
1163
1164 unsigned howmuch = p->length() - p_off;
1165 if (len < howmuch) howmuch = len;
1166 p->copy_out(p_off, howmuch, dest);
1167 dest += howmuch;
1168
1169 len -= howmuch;
1170 advance(howmuch);
1171 }
1172 }
1173
1174 template<bool is_const>
1175 void buffer::list::iterator_impl<is_const>::copy(unsigned len, ptr &dest)
1176 {
1177 copy_deep(len, dest);
1178 }
1179
1180 template<bool is_const>
1181 void buffer::list::iterator_impl<is_const>::copy_deep(unsigned len, ptr &dest)
1182 {
1183 if (!len) {
1184 return;
1185 }
1186 if (p == ls->end())
1187 throw end_of_buffer();
1188 assert(p->length() > 0);
1189 dest = create(len);
1190 copy(len, dest.c_str());
1191 }
1192 template<bool is_const>
1193 void buffer::list::iterator_impl<is_const>::copy_shallow(unsigned len,
1194 ptr &dest)
1195 {
1196 if (!len) {
1197 return;
1198 }
1199 if (p == ls->end())
1200 throw end_of_buffer();
1201 assert(p->length() > 0);
1202 unsigned howmuch = p->length() - p_off;
1203 if (howmuch < len) {
1204 dest = create(len);
1205 copy(len, dest.c_str());
1206 } else {
1207 dest = ptr(*p, p_off, len);
1208 advance(len);
1209 }
1210 }
1211
1212 template<bool is_const>
1213 void buffer::list::iterator_impl<is_const>::copy(unsigned len, list &dest)
1214 {
1215 if (p == ls->end())
1216 seek(off);
1217 while (len > 0) {
1218 if (p == ls->end())
1219 throw end_of_buffer();
1220
1221 unsigned howmuch = p->length() - p_off;
1222 if (len < howmuch)
1223 howmuch = len;
1224 dest.append(*p, p_off, howmuch);
1225
1226 len -= howmuch;
1227 advance(howmuch);
1228 }
1229 }
1230
1231 template<bool is_const>
1232 void buffer::list::iterator_impl<is_const>::copy(unsigned len, std::string &dest)
1233 {
1234 if (p == ls->end())
1235 seek(off);
1236 while (len > 0) {
1237 if (p == ls->end())
1238 throw end_of_buffer();
1239
1240 unsigned howmuch = p->length() - p_off;
1241 const char *c_str = p->c_str();
1242 if (len < howmuch)
1243 howmuch = len;
1244 dest.append(c_str + p_off, howmuch);
1245
1246 len -= howmuch;
1247 advance(howmuch);
1248 }
1249 }
1250
1251 template<bool is_const>
1252 void buffer::list::iterator_impl<is_const>::copy_all(list &dest)
1253 {
1254 if (p == ls->end())
1255 seek(off);
1256 while (1) {
1257 if (p == ls->end())
1258 return;
1259 assert(p->length() > 0);
1260
1261 unsigned howmuch = p->length() - p_off;
1262 const char *c_str = p->c_str();
1263 dest.append(c_str + p_off, howmuch);
1264
1265 advance(howmuch);
1266 }
1267 }
1268
1269 template<bool is_const>
1270 size_t buffer::list::iterator_impl<is_const>::get_ptr_and_advance(
1271 size_t want, const char **data)
1272 {
1273 if (p == ls->end()) {
1274 seek(off);
1275 if (p == ls->end()) {
1276 return 0;
1277 }
1278 }
1279 *data = p->c_str() + p_off;
1280 size_t l = MIN(p->length() - p_off, want);
1281 p_off += l;
1282 if (p_off == p->length()) {
1283 ++p;
1284 p_off = 0;
1285 }
1286 off += l;
1287 return l;
1288 }
1289
1290 template<bool is_const>
1291 uint32_t buffer::list::iterator_impl<is_const>::crc32c(
1292 size_t length, uint32_t crc)
1293 {
1294 length = MIN( length, get_remaining());
1295 while (length > 0) {
1296 const char *p;
1297 size_t l = get_ptr_and_advance(length, &p);
1298 crc = ceph_crc32c(crc, (unsigned char*)p, l);
1299 length -= l;
1300 }
1301 return crc;
1302 }
1303
1304 // explicitly instantiate only the iterator types we need, so we can hide the
1305 // details in this compilation unit without introducing unnecessary link time
1306 // dependencies.
1307 template class buffer::list::iterator_impl<true>;
1308 template class buffer::list::iterator_impl<false>;
1309
1310 buffer::list::iterator::iterator(bl_t *l, unsigned o)
1311 : iterator_impl(l, o)
1312 {}
1313
1314 buffer::list::iterator::iterator(bl_t *l, unsigned o, list_iter_t ip, unsigned po)
1315 : iterator_impl(l, o, ip, po)
1316 {}
1317
1318 void buffer::list::iterator::advance(int o)
1319 {
1320 buffer::list::iterator_impl<false>::advance(o);
1321 }
1322
1323 void buffer::list::iterator::seek(unsigned o)
1324 {
1325 buffer::list::iterator_impl<false>::seek(o);
1326 }
1327
1328 char buffer::list::iterator::operator*()
1329 {
1330 if (p == ls->end()) {
1331 throw end_of_buffer();
1332 }
1333 return (*p)[p_off];
1334 }
1335
1336 buffer::list::iterator& buffer::list::iterator::operator++()
1337 {
1338 buffer::list::iterator_impl<false>::operator++();
1339 return *this;
1340 }
1341
1342 buffer::ptr buffer::list::iterator::get_current_ptr()
1343 {
1344 if (p == ls->end()) {
1345 throw end_of_buffer();
1346 }
1347 return ptr(*p, p_off, p->length() - p_off);
1348 }
1349
1350 void buffer::list::iterator::copy(unsigned len, char *dest)
1351 {
1352 return buffer::list::iterator_impl<false>::copy(len, dest);
1353 }
1354
1355 void buffer::list::iterator::copy(unsigned len, ptr &dest)
1356 {
1357 return buffer::list::iterator_impl<false>::copy_deep(len, dest);
1358 }
1359
1360 void buffer::list::iterator::copy_deep(unsigned len, ptr &dest)
1361 {
1362 buffer::list::iterator_impl<false>::copy_deep(len, dest);
1363 }
1364
1365 void buffer::list::iterator::copy_shallow(unsigned len, ptr &dest)
1366 {
1367 buffer::list::iterator_impl<false>::copy_shallow(len, dest);
1368 }
1369
1370 void buffer::list::iterator::copy(unsigned len, list &dest)
1371 {
1372 buffer::list::iterator_impl<false>::copy(len, dest);
1373 }
1374
1375 void buffer::list::iterator::copy(unsigned len, std::string &dest)
1376 {
1377 buffer::list::iterator_impl<false>::copy(len, dest);
1378 }
1379
1380 void buffer::list::iterator::copy_all(list &dest)
1381 {
1382 buffer::list::iterator_impl<false>::copy_all(dest);
1383 }
1384
1385 void buffer::list::iterator::copy_in(unsigned len, const char *src)
1386 {
1387 copy_in(len, src, true);
1388 }
1389
1390 // copy data in
1391 void buffer::list::iterator::copy_in(unsigned len, const char *src, bool crc_reset)
1392 {
1393 // copy
1394 if (p == ls->end())
1395 seek(off);
1396 while (len > 0) {
1397 if (p == ls->end())
1398 throw end_of_buffer();
1399
1400 unsigned howmuch = p->length() - p_off;
1401 if (len < howmuch)
1402 howmuch = len;
1403 p->copy_in(p_off, howmuch, src, crc_reset);
1404
1405 src += howmuch;
1406 len -= howmuch;
1407 advance(howmuch);
1408 }
1409 }
1410
1411 void buffer::list::iterator::copy_in(unsigned len, const list& otherl)
1412 {
1413 if (p == ls->end())
1414 seek(off);
1415 unsigned left = len;
1416 for (std::list<ptr>::const_iterator i = otherl._buffers.begin();
1417 i != otherl._buffers.end();
1418 ++i) {
1419 unsigned l = (*i).length();
1420 if (left < l)
1421 l = left;
1422 copy_in(l, i->c_str());
1423 left -= l;
1424 if (left == 0)
1425 break;
1426 }
1427 }
1428
1429 // -- buffer::list --
1430
1431 buffer::list::list(list&& other)
1432 : _buffers(std::move(other._buffers)),
1433 _len(other._len),
1434 _memcopy_count(other._memcopy_count),
1435 last_p(this) {
1436 append_buffer.swap(other.append_buffer);
1437 other.clear();
1438 }
1439
1440 void buffer::list::swap(list& other)
1441 {
1442 std::swap(_len, other._len);
1443 std::swap(_memcopy_count, other._memcopy_count);
1444 _buffers.swap(other._buffers);
1445 append_buffer.swap(other.append_buffer);
1446 //last_p.swap(other.last_p);
1447 last_p = begin();
1448 other.last_p = other.begin();
1449 }
1450
1451 bool buffer::list::contents_equal(buffer::list& other)
1452 {
1453 return static_cast<const buffer::list*>(this)->contents_equal(other);
1454 }
1455
1456 bool buffer::list::contents_equal(const ceph::buffer::list& other) const
1457 {
1458 if (length() != other.length())
1459 return false;
1460
1461 // buffer-wise comparison
1462 if (true) {
1463 std::list<ptr>::const_iterator a = _buffers.begin();
1464 std::list<ptr>::const_iterator b = other._buffers.begin();
1465 unsigned aoff = 0, boff = 0;
1466 while (a != _buffers.end()) {
1467 unsigned len = a->length() - aoff;
1468 if (len > b->length() - boff)
1469 len = b->length() - boff;
1470 if (memcmp(a->c_str() + aoff, b->c_str() + boff, len) != 0)
1471 return false;
1472 aoff += len;
1473 if (aoff == a->length()) {
1474 aoff = 0;
1475 ++a;
1476 }
1477 boff += len;
1478 if (boff == b->length()) {
1479 boff = 0;
1480 ++b;
1481 }
1482 }
1483 assert(b == other._buffers.end());
1484 return true;
1485 }
1486
1487 // byte-wise comparison
1488 if (false) {
1489 bufferlist::const_iterator me = begin();
1490 bufferlist::const_iterator him = other.begin();
1491 while (!me.end()) {
1492 if (*me != *him)
1493 return false;
1494 ++me;
1495 ++him;
1496 }
1497 return true;
1498 }
1499 }
1500
1501 bool buffer::list::can_zero_copy() const
1502 {
1503 for (std::list<ptr>::const_iterator it = _buffers.begin();
1504 it != _buffers.end();
1505 ++it)
1506 if (!it->can_zero_copy())
1507 return false;
1508 return true;
1509 }
1510
1511 bool buffer::list::is_provided_buffer(const char *dst) const
1512 {
1513 if (_buffers.empty())
1514 return false;
1515 return (is_contiguous() && (_buffers.front().c_str() == dst));
1516 }
1517
1518 bool buffer::list::is_aligned(unsigned align) const
1519 {
1520 for (std::list<ptr>::const_iterator it = _buffers.begin();
1521 it != _buffers.end();
1522 ++it)
1523 if (!it->is_aligned(align))
1524 return false;
1525 return true;
1526 }
1527
1528 bool buffer::list::is_n_align_sized(unsigned align) const
1529 {
1530 for (std::list<ptr>::const_iterator it = _buffers.begin();
1531 it != _buffers.end();
1532 ++it)
1533 if (!it->is_n_align_sized(align))
1534 return false;
1535 return true;
1536 }
1537
1538 bool buffer::list::is_aligned_size_and_memory(unsigned align_size,
1539 unsigned align_memory) const
1540 {
1541 for (std::list<ptr>::const_iterator it = _buffers.begin();
1542 it != _buffers.end();
1543 ++it) {
1544 if (!it->is_aligned(align_memory) || !it->is_n_align_sized(align_size))
1545 return false;
1546 }
1547 return true;
1548 }
1549
1550 bool buffer::list::is_zero() const {
1551 for (std::list<ptr>::const_iterator it = _buffers.begin();
1552 it != _buffers.end();
1553 ++it) {
1554 if (!it->is_zero()) {
1555 return false;
1556 }
1557 }
1558 return true;
1559 }
1560
1561 void buffer::list::zero()
1562 {
1563 for (std::list<ptr>::iterator it = _buffers.begin();
1564 it != _buffers.end();
1565 ++it)
1566 it->zero();
1567 }
1568
1569 void buffer::list::zero(unsigned o, unsigned l)
1570 {
1571 assert(o+l <= _len);
1572 unsigned p = 0;
1573 for (std::list<ptr>::iterator it = _buffers.begin();
1574 it != _buffers.end();
1575 ++it) {
1576 if (p + it->length() > o) {
1577 if (p >= o && p+it->length() <= o+l) {
1578 // 'o'------------- l -----------|
1579 // 'p'-- it->length() --|
1580 it->zero();
1581 } else if (p >= o) {
1582 // 'o'------------- l -----------|
1583 // 'p'------- it->length() -------|
1584 it->zero(0, o+l-p);
1585 } else if (p + it->length() <= o+l) {
1586 // 'o'------------- l -----------|
1587 // 'p'------- it->length() -------|
1588 it->zero(o-p, it->length()-(o-p));
1589 } else {
1590 // 'o'----------- l -----------|
1591 // 'p'---------- it->length() ----------|
1592 it->zero(o-p, l);
1593 }
1594 }
1595 p += it->length();
1596 if (o+l <= p)
1597 break; // done
1598 }
1599 }
1600
1601 bool buffer::list::is_contiguous() const
1602 {
1603 return &(*_buffers.begin()) == &(*_buffers.rbegin());
1604 }
1605
1606 bool buffer::list::is_n_page_sized() const
1607 {
1608 return is_n_align_sized(CEPH_PAGE_SIZE);
1609 }
1610
1611 bool buffer::list::is_page_aligned() const
1612 {
1613 return is_aligned(CEPH_PAGE_SIZE);
1614 }
1615
1616 void buffer::list::rebuild()
1617 {
1618 if (_len == 0) {
1619 _buffers.clear();
1620 return;
1621 }
1622 ptr nb;
1623 if ((_len & ~CEPH_PAGE_MASK) == 0)
1624 nb = buffer::create_page_aligned(_len);
1625 else
1626 nb = buffer::create(_len);
1627 rebuild(nb);
1628 }
1629
1630 void buffer::list::rebuild(ptr& nb)
1631 {
1632 unsigned pos = 0;
1633 for (std::list<ptr>::iterator it = _buffers.begin();
1634 it != _buffers.end();
1635 ++it) {
1636 nb.copy_in(pos, it->length(), it->c_str(), false);
1637 pos += it->length();
1638 }
1639 _memcopy_count += pos;
1640 _buffers.clear();
1641 if (nb.length())
1642 _buffers.push_back(nb);
1643 invalidate_crc();
1644 last_p = begin();
1645 }
1646
1647 bool buffer::list::rebuild_aligned(unsigned align)
1648 {
1649 return rebuild_aligned_size_and_memory(align, align);
1650 }
1651
1652 bool buffer::list::rebuild_aligned_size_and_memory(unsigned align_size,
1653 unsigned align_memory)
1654 {
1655 unsigned old_memcopy_count = _memcopy_count;
1656 std::list<ptr>::iterator p = _buffers.begin();
1657 while (p != _buffers.end()) {
1658 // keep anything that's already align and sized aligned
1659 if (p->is_aligned(align_memory) && p->is_n_align_sized(align_size)) {
1660 /*cout << " segment " << (void*)p->c_str()
1661 << " offset " << ((unsigned long)p->c_str() & (align - 1))
1662 << " length " << p->length()
1663 << " " << (p->length() & (align - 1)) << " ok" << std::endl;
1664 */
1665 ++p;
1666 continue;
1667 }
1668
1669 // consolidate unaligned items, until we get something that is sized+aligned
1670 list unaligned;
1671 unsigned offset = 0;
1672 do {
1673 /*cout << " segment " << (void*)p->c_str()
1674 << " offset " << ((unsigned long)p->c_str() & (align - 1))
1675 << " length " << p->length() << " " << (p->length() & (align - 1))
1676 << " overall offset " << offset << " " << (offset & (align - 1))
1677 << " not ok" << std::endl;
1678 */
1679 offset += p->length();
1680 unaligned.push_back(*p);
1681 _buffers.erase(p++);
1682 } while (p != _buffers.end() &&
1683 (!p->is_aligned(align_memory) ||
1684 !p->is_n_align_sized(align_size) ||
1685 (offset % align_size)));
1686 if (!(unaligned.is_contiguous() && unaligned._buffers.front().is_aligned(align_memory))) {
1687 ptr nb(buffer::create_aligned(unaligned._len, align_memory));
1688 unaligned.rebuild(nb);
1689 _memcopy_count += unaligned._len;
1690 }
1691 _buffers.insert(p, unaligned._buffers.front());
1692 }
1693 last_p = begin();
1694
1695 return (old_memcopy_count != _memcopy_count);
1696 }
1697
1698 bool buffer::list::rebuild_page_aligned()
1699 {
1700 return rebuild_aligned(CEPH_PAGE_SIZE);
1701 }
1702
1703 // sort-of-like-assignment-op
1704 void buffer::list::claim(list& bl, unsigned int flags)
1705 {
1706 // free my buffers
1707 clear();
1708 claim_append(bl, flags);
1709 }
1710
1711 void buffer::list::claim_append(list& bl, unsigned int flags)
1712 {
1713 // steal the other guy's buffers
1714 _len += bl._len;
1715 if (!(flags & CLAIM_ALLOW_NONSHAREABLE))
1716 bl.make_shareable();
1717 _buffers.splice(_buffers.end(), bl._buffers );
1718 bl._len = 0;
1719 bl.last_p = bl.begin();
1720 }
1721
1722 void buffer::list::claim_prepend(list& bl, unsigned int flags)
1723 {
1724 // steal the other guy's buffers
1725 _len += bl._len;
1726 if (!(flags & CLAIM_ALLOW_NONSHAREABLE))
1727 bl.make_shareable();
1728 _buffers.splice(_buffers.begin(), bl._buffers );
1729 bl._len = 0;
1730 bl.last_p = bl.begin();
1731 }
1732
1733 void buffer::list::copy(unsigned off, unsigned len, char *dest) const
1734 {
1735 if (off + len > length())
1736 throw end_of_buffer();
1737 if (last_p.get_off() != off)
1738 last_p.seek(off);
1739 last_p.copy(len, dest);
1740 }
1741
1742 void buffer::list::copy(unsigned off, unsigned len, list &dest) const
1743 {
1744 if (off + len > length())
1745 throw end_of_buffer();
1746 if (last_p.get_off() != off)
1747 last_p.seek(off);
1748 last_p.copy(len, dest);
1749 }
1750
1751 void buffer::list::copy(unsigned off, unsigned len, std::string& dest) const
1752 {
1753 if (last_p.get_off() != off)
1754 last_p.seek(off);
1755 return last_p.copy(len, dest);
1756 }
1757
1758 void buffer::list::copy_in(unsigned off, unsigned len, const char *src)
1759 {
1760 copy_in(off, len, src, true);
1761 }
1762
1763 void buffer::list::copy_in(unsigned off, unsigned len, const char *src, bool crc_reset)
1764 {
1765 if (off + len > length())
1766 throw end_of_buffer();
1767
1768 if (last_p.get_off() != off)
1769 last_p.seek(off);
1770 last_p.copy_in(len, src, crc_reset);
1771 }
1772
1773 void buffer::list::copy_in(unsigned off, unsigned len, const list& src)
1774 {
1775 if (last_p.get_off() != off)
1776 last_p.seek(off);
1777 last_p.copy_in(len, src);
1778 }
1779
1780 void buffer::list::append(char c)
1781 {
1782 // put what we can into the existing append_buffer.
1783 unsigned gap = append_buffer.unused_tail_length();
1784 if (!gap) {
1785 // make a new append_buffer!
1786 append_buffer = raw_combined::create(CEPH_BUFFER_APPEND_SIZE);
1787 append_buffer.set_length(0); // unused, so far.
1788 }
1789 append(append_buffer, append_buffer.append(c) - 1, 1); // add segment to the list
1790 }
1791
1792 void buffer::list::append(const char *data, unsigned len)
1793 {
1794 while (len > 0) {
1795 // put what we can into the existing append_buffer.
1796 unsigned gap = append_buffer.unused_tail_length();
1797 if (gap > 0) {
1798 if (gap > len) gap = len;
1799 //cout << "append first char is " << data[0] << ", last char is " << data[len-1] << std::endl;
1800 append_buffer.append(data, gap);
1801 append(append_buffer, append_buffer.length() - gap, gap); // add segment to the list
1802 len -= gap;
1803 data += gap;
1804 }
1805 if (len == 0)
1806 break; // done!
1807
1808 // make a new append_buffer. fill out a complete page, factoring in the
1809 // raw_combined overhead.
1810 size_t need = ROUND_UP_TO(len, sizeof(size_t)) + sizeof(raw_combined);
1811 size_t alen = ROUND_UP_TO(need, CEPH_BUFFER_ALLOC_UNIT) -
1812 sizeof(raw_combined);
1813 append_buffer = raw_combined::create(alen);
1814 append_buffer.set_length(0); // unused, so far.
1815 }
1816 }
1817
1818 void buffer::list::append(const ptr& bp)
1819 {
1820 if (bp.length())
1821 push_back(bp);
1822 }
1823
1824 void buffer::list::append(ptr&& bp)
1825 {
1826 if (bp.length())
1827 push_back(std::move(bp));
1828 }
1829
1830 void buffer::list::append(const ptr& bp, unsigned off, unsigned len)
1831 {
1832 assert(len+off <= bp.length());
1833 if (!_buffers.empty()) {
1834 ptr &l = _buffers.back();
1835 if (l.get_raw() == bp.get_raw() &&
1836 l.end() == bp.start() + off) {
1837 // yay contiguous with tail bp!
1838 l.set_length(l.length()+len);
1839 _len += len;
1840 return;
1841 }
1842 }
1843 // add new item to list
1844 push_back(ptr(bp, off, len));
1845 }
1846
1847 void buffer::list::append(const list& bl)
1848 {
1849 _len += bl._len;
1850 for (std::list<ptr>::const_iterator p = bl._buffers.begin();
1851 p != bl._buffers.end();
1852 ++p)
1853 _buffers.push_back(*p);
1854 }
1855
1856 void buffer::list::append(std::istream& in)
1857 {
1858 while (!in.eof()) {
1859 std::string s;
1860 getline(in, s);
1861 append(s.c_str(), s.length());
1862 if (s.length())
1863 append("\n", 1);
1864 }
1865 }
1866
1867 void buffer::list::prepend_zero(unsigned len)
1868 {
1869 ptr bp(len);
1870 bp.zero(false);
1871 _len += len;
1872 _buffers.emplace_front(std::move(bp));
1873 }
1874
1875 void buffer::list::append_zero(unsigned len)
1876 {
1877 ptr bp(len);
1878 bp.zero(false);
1879 append(std::move(bp));
1880 }
1881
1882
1883 /*
1884 * get a char
1885 */
1886 const char& buffer::list::operator[](unsigned n) const
1887 {
1888 if (n >= _len)
1889 throw end_of_buffer();
1890
1891 for (std::list<ptr>::const_iterator p = _buffers.begin();
1892 p != _buffers.end();
1893 ++p) {
1894 if (n >= p->length()) {
1895 n -= p->length();
1896 continue;
1897 }
1898 return (*p)[n];
1899 }
1900 ceph_abort();
1901 }
1902
1903 /*
1904 * return a contiguous ptr to whole bufferlist contents.
1905 */
1906 char *buffer::list::c_str()
1907 {
1908 if (_buffers.empty())
1909 return 0; // no buffers
1910
1911 std::list<ptr>::const_iterator iter = _buffers.begin();
1912 ++iter;
1913
1914 if (iter != _buffers.end())
1915 rebuild();
1916 return _buffers.front().c_str(); // good, we're already contiguous.
1917 }
1918
1919 string buffer::list::to_str() const {
1920 string s;
1921 s.reserve(length());
1922 for (std::list<ptr>::const_iterator p = _buffers.begin();
1923 p != _buffers.end();
1924 ++p) {
1925 if (p->length()) {
1926 s.append(p->c_str(), p->length());
1927 }
1928 }
1929 return s;
1930 }
1931
1932 char *buffer::list::get_contiguous(unsigned orig_off, unsigned len)
1933 {
1934 if (orig_off + len > length())
1935 throw end_of_buffer();
1936
1937 if (len == 0) {
1938 return 0;
1939 }
1940
1941 unsigned off = orig_off;
1942 std::list<ptr>::iterator curbuf = _buffers.begin();
1943 while (off > 0 && off >= curbuf->length()) {
1944 off -= curbuf->length();
1945 ++curbuf;
1946 }
1947
1948 if (off + len > curbuf->length()) {
1949 bufferlist tmp;
1950 unsigned l = off + len;
1951
1952 do {
1953 if (l >= curbuf->length())
1954 l -= curbuf->length();
1955 else
1956 l = 0;
1957 tmp.append(*curbuf);
1958 curbuf = _buffers.erase(curbuf);
1959
1960 } while (curbuf != _buffers.end() && l > 0);
1961
1962 assert(l == 0);
1963
1964 tmp.rebuild();
1965 _buffers.insert(curbuf, tmp._buffers.front());
1966 return tmp.c_str() + off;
1967 }
1968
1969 last_p = begin(); // we modified _buffers
1970
1971 return curbuf->c_str() + off;
1972 }
1973
1974 void buffer::list::substr_of(const list& other, unsigned off, unsigned len)
1975 {
1976 if (off + len > other.length())
1977 throw end_of_buffer();
1978
1979 clear();
1980
1981 // skip off
1982 std::list<ptr>::const_iterator curbuf = other._buffers.begin();
1983 while (off > 0 &&
1984 off >= curbuf->length()) {
1985 // skip this buffer
1986 //cout << "skipping over " << *curbuf << std::endl;
1987 off -= (*curbuf).length();
1988 ++curbuf;
1989 }
1990 assert(len == 0 || curbuf != other._buffers.end());
1991
1992 while (len > 0) {
1993 // partial?
1994 if (off + len < curbuf->length()) {
1995 //cout << "copying partial of " << *curbuf << std::endl;
1996 _buffers.push_back( ptr( *curbuf, off, len ) );
1997 _len += len;
1998 break;
1999 }
2000
2001 // through end
2002 //cout << "copying end (all?) of " << *curbuf << std::endl;
2003 unsigned howmuch = curbuf->length() - off;
2004 _buffers.push_back( ptr( *curbuf, off, howmuch ) );
2005 _len += howmuch;
2006 len -= howmuch;
2007 off = 0;
2008 ++curbuf;
2009 }
2010 }
2011
2012 // funky modifer
2013 void buffer::list::splice(unsigned off, unsigned len, list *claim_by /*, bufferlist& replace_with */)
2014 { // fixme?
2015 if (len == 0)
2016 return;
2017
2018 if (off >= length())
2019 throw end_of_buffer();
2020
2021 assert(len > 0);
2022 //cout << "splice off " << off << " len " << len << " ... mylen = " << length() << std::endl;
2023
2024 // skip off
2025 std::list<ptr>::iterator curbuf = _buffers.begin();
2026 while (off > 0) {
2027 assert(curbuf != _buffers.end());
2028 if (off >= (*curbuf).length()) {
2029 // skip this buffer
2030 //cout << "off = " << off << " skipping over " << *curbuf << std::endl;
2031 off -= (*curbuf).length();
2032 ++curbuf;
2033 } else {
2034 // somewhere in this buffer!
2035 //cout << "off = " << off << " somewhere in " << *curbuf << std::endl;
2036 break;
2037 }
2038 }
2039
2040 if (off) {
2041 // add a reference to the front bit
2042 // insert it before curbuf (which we'll hose)
2043 //cout << "keeping front " << off << " of " << *curbuf << std::endl;
2044 _buffers.insert( curbuf, ptr( *curbuf, 0, off ) );
2045 _len += off;
2046 }
2047
2048 while (len > 0) {
2049 // partial?
2050 if (off + len < (*curbuf).length()) {
2051 //cout << "keeping end of " << *curbuf << ", losing first " << off+len << std::endl;
2052 if (claim_by)
2053 claim_by->append( *curbuf, off, len );
2054 (*curbuf).set_offset( off+len + (*curbuf).offset() ); // ignore beginning big
2055 (*curbuf).set_length( (*curbuf).length() - (len+off) );
2056 _len -= off+len;
2057 //cout << " now " << *curbuf << std::endl;
2058 break;
2059 }
2060
2061 // hose though the end
2062 unsigned howmuch = (*curbuf).length() - off;
2063 //cout << "discarding " << howmuch << " of " << *curbuf << std::endl;
2064 if (claim_by)
2065 claim_by->append( *curbuf, off, howmuch );
2066 _len -= (*curbuf).length();
2067 _buffers.erase( curbuf++ );
2068 len -= howmuch;
2069 off = 0;
2070 }
2071
2072 // splice in *replace (implement me later?)
2073
2074 last_p = begin(); // just in case we were in the removed region.
2075 }
2076
2077 void buffer::list::write(int off, int len, std::ostream& out) const
2078 {
2079 list s;
2080 s.substr_of(*this, off, len);
2081 for (std::list<ptr>::const_iterator it = s._buffers.begin();
2082 it != s._buffers.end();
2083 ++it)
2084 if (it->length())
2085 out.write(it->c_str(), it->length());
2086 /*iterator p(this, off);
2087 while (len > 0 && !p.end()) {
2088 int l = p.left_in_this_buf();
2089 if (l > len)
2090 l = len;
2091 out.write(p.c_str(), l);
2092 len -= l;
2093 }*/
2094 }
2095
2096 void buffer::list::encode_base64(buffer::list& o)
2097 {
2098 bufferptr bp(length() * 4 / 3 + 3);
2099 int l = ceph_armor(bp.c_str(), bp.c_str() + bp.length(), c_str(), c_str() + length());
2100 bp.set_length(l);
2101 o.push_back(std::move(bp));
2102 }
2103
2104 void buffer::list::decode_base64(buffer::list& e)
2105 {
2106 bufferptr bp(4 + ((e.length() * 3) / 4));
2107 int l = ceph_unarmor(bp.c_str(), bp.c_str() + bp.length(), e.c_str(), e.c_str() + e.length());
2108 if (l < 0) {
2109 std::ostringstream oss;
2110 oss << "decode_base64: decoding failed:\n";
2111 hexdump(oss);
2112 throw buffer::malformed_input(oss.str().c_str());
2113 }
2114 assert(l <= (int)bp.length());
2115 bp.set_length(l);
2116 push_back(std::move(bp));
2117 }
2118
2119
2120
2121 int buffer::list::read_file(const char *fn, std::string *error)
2122 {
2123 int fd = TEMP_FAILURE_RETRY(::open(fn, O_RDONLY));
2124 if (fd < 0) {
2125 int err = errno;
2126 std::ostringstream oss;
2127 oss << "can't open " << fn << ": " << cpp_strerror(err);
2128 *error = oss.str();
2129 return -err;
2130 }
2131
2132 struct stat st;
2133 memset(&st, 0, sizeof(st));
2134 if (::fstat(fd, &st) < 0) {
2135 int err = errno;
2136 std::ostringstream oss;
2137 oss << "bufferlist::read_file(" << fn << "): stat error: "
2138 << cpp_strerror(err);
2139 *error = oss.str();
2140 VOID_TEMP_FAILURE_RETRY(::close(fd));
2141 return -err;
2142 }
2143
2144 ssize_t ret = read_fd(fd, st.st_size);
2145 if (ret < 0) {
2146 std::ostringstream oss;
2147 oss << "bufferlist::read_file(" << fn << "): read error:"
2148 << cpp_strerror(ret);
2149 *error = oss.str();
2150 VOID_TEMP_FAILURE_RETRY(::close(fd));
2151 return ret;
2152 }
2153 else if (ret != st.st_size) {
2154 // Premature EOF.
2155 // Perhaps the file changed between stat() and read()?
2156 std::ostringstream oss;
2157 oss << "bufferlist::read_file(" << fn << "): warning: got premature EOF.";
2158 *error = oss.str();
2159 // not actually an error, but weird
2160 }
2161 VOID_TEMP_FAILURE_RETRY(::close(fd));
2162 return 0;
2163 }
2164
2165 ssize_t buffer::list::read_fd(int fd, size_t len)
2166 {
2167 // try zero copy first
2168 if (false && read_fd_zero_copy(fd, len) == 0) {
2169 // TODO fix callers to not require correct read size, which is not
2170 // available for raw_pipe until we actually inspect the data
2171 return 0;
2172 }
2173 bufferptr bp = buffer::create(len);
2174 ssize_t ret = safe_read(fd, (void*)bp.c_str(), len);
2175 if (ret >= 0) {
2176 bp.set_length(ret);
2177 append(std::move(bp));
2178 }
2179 return ret;
2180 }
2181
2182 int buffer::list::read_fd_zero_copy(int fd, size_t len)
2183 {
2184 #ifdef CEPH_HAVE_SPLICE
2185 try {
2186 append(buffer::create_zero_copy(len, fd, NULL));
2187 } catch (buffer::error_code &e) {
2188 return e.code;
2189 } catch (buffer::malformed_input &e) {
2190 return -EIO;
2191 }
2192 return 0;
2193 #else
2194 return -ENOTSUP;
2195 #endif
2196 }
2197
2198 int buffer::list::write_file(const char *fn, int mode)
2199 {
2200 int fd = TEMP_FAILURE_RETRY(::open(fn, O_WRONLY|O_CREAT|O_TRUNC, mode));
2201 if (fd < 0) {
2202 int err = errno;
2203 cerr << "bufferlist::write_file(" << fn << "): failed to open file: "
2204 << cpp_strerror(err) << std::endl;
2205 return -err;
2206 }
2207 int ret = write_fd(fd);
2208 if (ret) {
2209 cerr << "bufferlist::write_fd(" << fn << "): write_fd error: "
2210 << cpp_strerror(ret) << std::endl;
2211 VOID_TEMP_FAILURE_RETRY(::close(fd));
2212 return ret;
2213 }
2214 if (TEMP_FAILURE_RETRY(::close(fd))) {
2215 int err = errno;
2216 cerr << "bufferlist::write_file(" << fn << "): close error: "
2217 << cpp_strerror(err) << std::endl;
2218 return -err;
2219 }
2220 return 0;
2221 }
2222
2223 static int do_writev(int fd, struct iovec *vec, uint64_t offset, unsigned veclen, unsigned bytes)
2224 {
2225 ssize_t r = 0;
2226 while (bytes > 0) {
2227 #ifdef HAVE_PWRITEV
2228 r = ::pwritev(fd, vec, veclen, offset);
2229 #else
2230 r = ::lseek64(fd, offset, SEEK_SET);
2231 if (r != offset) {
2232 r = -errno;
2233 return r;
2234 }
2235 r = ::writev(fd, vec, veclen);
2236 #endif
2237 if (r < 0) {
2238 if (errno == EINTR)
2239 continue;
2240 return -errno;
2241 }
2242
2243 bytes -= r;
2244 offset += r;
2245 if (bytes == 0) break;
2246
2247 while (r > 0) {
2248 if (vec[0].iov_len <= (size_t)r) {
2249 // drain this whole item
2250 r -= vec[0].iov_len;
2251 ++vec;
2252 --veclen;
2253 } else {
2254 vec[0].iov_base = (char *)vec[0].iov_base + r;
2255 vec[0].iov_len -= r;
2256 break;
2257 }
2258 }
2259 }
2260 return 0;
2261 }
2262
2263 int buffer::list::write_fd(int fd) const
2264 {
2265 if (can_zero_copy())
2266 return write_fd_zero_copy(fd);
2267
2268 // use writev!
2269 iovec iov[IOV_MAX];
2270 int iovlen = 0;
2271 ssize_t bytes = 0;
2272
2273 std::list<ptr>::const_iterator p = _buffers.begin();
2274 while (p != _buffers.end()) {
2275 if (p->length() > 0) {
2276 iov[iovlen].iov_base = (void *)p->c_str();
2277 iov[iovlen].iov_len = p->length();
2278 bytes += p->length();
2279 iovlen++;
2280 }
2281 ++p;
2282
2283 if (iovlen == IOV_MAX-1 ||
2284 p == _buffers.end()) {
2285 iovec *start = iov;
2286 int num = iovlen;
2287 ssize_t wrote;
2288 retry:
2289 wrote = ::writev(fd, start, num);
2290 if (wrote < 0) {
2291 int err = errno;
2292 if (err == EINTR)
2293 goto retry;
2294 return -err;
2295 }
2296 if (wrote < bytes) {
2297 // partial write, recover!
2298 while ((size_t)wrote >= start[0].iov_len) {
2299 wrote -= start[0].iov_len;
2300 bytes -= start[0].iov_len;
2301 start++;
2302 num--;
2303 }
2304 if (wrote > 0) {
2305 start[0].iov_len -= wrote;
2306 start[0].iov_base = (char *)start[0].iov_base + wrote;
2307 bytes -= wrote;
2308 }
2309 goto retry;
2310 }
2311 iovlen = 0;
2312 bytes = 0;
2313 }
2314 }
2315 return 0;
2316 }
2317
2318 int buffer::list::write_fd(int fd, uint64_t offset) const
2319 {
2320 iovec iov[IOV_MAX];
2321
2322 std::list<ptr>::const_iterator p = _buffers.begin();
2323 uint64_t left_pbrs = _buffers.size();
2324 while (left_pbrs) {
2325 ssize_t bytes = 0;
2326 unsigned iovlen = 0;
2327 uint64_t size = MIN(left_pbrs, IOV_MAX);
2328 left_pbrs -= size;
2329 while (size > 0) {
2330 iov[iovlen].iov_base = (void *)p->c_str();
2331 iov[iovlen].iov_len = p->length();
2332 iovlen++;
2333 bytes += p->length();
2334 ++p;
2335 size--;
2336 }
2337
2338 int r = do_writev(fd, iov, offset, iovlen, bytes);
2339 if (r < 0)
2340 return r;
2341 offset += bytes;
2342 }
2343 return 0;
2344 }
2345
2346 int buffer::list::write_fd_zero_copy(int fd) const
2347 {
2348 if (!can_zero_copy())
2349 return -ENOTSUP;
2350 /* pass offset to each call to avoid races updating the fd seek
2351 * position, since the I/O may be non-blocking
2352 */
2353 int64_t offset = ::lseek(fd, 0, SEEK_CUR);
2354 int64_t *off_p = &offset;
2355 if (offset < 0 && errno != ESPIPE)
2356 return -errno;
2357 if (errno == ESPIPE)
2358 off_p = NULL;
2359 for (std::list<ptr>::const_iterator it = _buffers.begin();
2360 it != _buffers.end(); ++it) {
2361 int r = it->zero_copy_to_fd(fd, off_p);
2362 if (r < 0)
2363 return r;
2364 if (off_p)
2365 offset += it->length();
2366 }
2367 return 0;
2368 }
2369
2370 __u32 buffer::list::crc32c(__u32 crc) const
2371 {
2372 for (std::list<ptr>::const_iterator it = _buffers.begin();
2373 it != _buffers.end();
2374 ++it) {
2375 if (it->length()) {
2376 raw *r = it->get_raw();
2377 pair<size_t, size_t> ofs(it->offset(), it->offset() + it->length());
2378 pair<uint32_t, uint32_t> ccrc;
2379 if (r->get_crc(ofs, &ccrc)) {
2380 if (ccrc.first == crc) {
2381 // got it already
2382 crc = ccrc.second;
2383 if (buffer_track_crc)
2384 buffer_cached_crc.inc();
2385 } else {
2386 /* If we have cached crc32c(buf, v) for initial value v,
2387 * we can convert this to a different initial value v' by:
2388 * crc32c(buf, v') = crc32c(buf, v) ^ adjustment
2389 * where adjustment = crc32c(0*len(buf), v ^ v')
2390 *
2391 * http://crcutil.googlecode.com/files/crc-doc.1.0.pdf
2392 * note, u for our crc32c implementation is 0
2393 */
2394 crc = ccrc.second ^ ceph_crc32c(ccrc.first ^ crc, NULL, it->length());
2395 if (buffer_track_crc)
2396 buffer_cached_crc_adjusted.inc();
2397 }
2398 } else {
2399 if (buffer_track_crc)
2400 buffer_missed_crc.inc();
2401 uint32_t base = crc;
2402 crc = ceph_crc32c(crc, (unsigned char*)it->c_str(), it->length());
2403 r->set_crc(ofs, make_pair(base, crc));
2404 }
2405 }
2406 }
2407 return crc;
2408 }
2409
2410 void buffer::list::invalidate_crc()
2411 {
2412 for (std::list<ptr>::const_iterator p = _buffers.begin(); p != _buffers.end(); ++p) {
2413 raw *r = p->get_raw();
2414 if (r) {
2415 r->invalidate_crc();
2416 }
2417 }
2418 }
2419
2420 /**
2421 * Binary write all contents to a C++ stream
2422 */
2423 void buffer::list::write_stream(std::ostream &out) const
2424 {
2425 for (std::list<ptr>::const_iterator p = _buffers.begin(); p != _buffers.end(); ++p) {
2426 if (p->length() > 0) {
2427 out.write(p->c_str(), p->length());
2428 }
2429 }
2430 }
2431
2432
2433 void buffer::list::hexdump(std::ostream &out, bool trailing_newline) const
2434 {
2435 if (!length())
2436 return;
2437
2438 std::ios_base::fmtflags original_flags = out.flags();
2439
2440 // do our best to match the output of hexdump -C, for better
2441 // diff'ing!
2442
2443 out.setf(std::ios::right);
2444 out.fill('0');
2445
2446 unsigned per = 16;
2447 bool was_zeros = false, did_star = false;
2448 for (unsigned o=0; o<length(); o += per) {
2449 bool row_is_zeros = false;
2450 if (o + per < length()) {
2451 row_is_zeros = true;
2452 for (unsigned i=0; i<per && o+i<length(); i++) {
2453 if ((*this)[o+i]) {
2454 row_is_zeros = false;
2455 }
2456 }
2457 if (row_is_zeros) {
2458 if (was_zeros) {
2459 if (!did_star) {
2460 out << "\n*";
2461 did_star = true;
2462 }
2463 continue;
2464 }
2465 was_zeros = true;
2466 } else {
2467 was_zeros = false;
2468 did_star = false;
2469 }
2470 }
2471 if (o)
2472 out << "\n";
2473 out << std::hex << std::setw(8) << o << " ";
2474
2475 unsigned i;
2476 for (i=0; i<per && o+i<length(); i++) {
2477 if (i == 8)
2478 out << ' ';
2479 out << " " << std::setw(2) << ((unsigned)(*this)[o+i] & 0xff);
2480 }
2481 for (; i<per; i++) {
2482 if (i == 8)
2483 out << ' ';
2484 out << " ";
2485 }
2486
2487 out << " |";
2488 for (i=0; i<per && o+i<length(); i++) {
2489 char c = (*this)[o+i];
2490 if (isupper(c) || islower(c) || isdigit(c) || c == ' ' || ispunct(c))
2491 out << c;
2492 else
2493 out << '.';
2494 }
2495 out << '|' << std::dec;
2496 }
2497 if (trailing_newline) {
2498 out << "\n" << std::hex << std::setw(8) << length();
2499 out << "\n";
2500 }
2501
2502 out.flags(original_flags);
2503 }
2504
2505 std::ostream& buffer::operator<<(std::ostream& out, const buffer::raw &r) {
2506 return out << "buffer::raw(" << (void*)r.data << " len " << r.len << " nref " << r.nref.read() << ")";
2507 }
2508
2509 std::ostream& buffer::operator<<(std::ostream& out, const buffer::ptr& bp) {
2510 if (bp.have_raw())
2511 out << "buffer::ptr(" << bp.offset() << "~" << bp.length()
2512 << " " << (void*)bp.c_str()
2513 << " in raw " << (void*)bp.raw_c_str()
2514 << " len " << bp.raw_length()
2515 << " nref " << bp.raw_nref() << ")";
2516 else
2517 out << "buffer:ptr(" << bp.offset() << "~" << bp.length() << " no raw)";
2518 return out;
2519 }
2520
2521 std::ostream& buffer::operator<<(std::ostream& out, const buffer::list& bl) {
2522 out << "buffer::list(len=" << bl.length() << "," << std::endl;
2523
2524 std::list<buffer::ptr>::const_iterator it = bl.buffers().begin();
2525 while (it != bl.buffers().end()) {
2526 out << "\t" << *it;
2527 if (++it == bl.buffers().end()) break;
2528 out << "," << std::endl;
2529 }
2530 out << std::endl << ")";
2531 return out;
2532 }
2533
2534 std::ostream& buffer::operator<<(std::ostream& out, const buffer::error& e)
2535 {
2536 return out << e.what();
2537 }
2538
2539 MEMPOOL_DEFINE_OBJECT_FACTORY(buffer::raw_malloc, buffer_raw_malloc,
2540 buffer_meta);
2541 MEMPOOL_DEFINE_OBJECT_FACTORY(buffer::raw_mmap_pages, buffer_raw_mmap_pagse,
2542 buffer_meta);
2543 MEMPOOL_DEFINE_OBJECT_FACTORY(buffer::raw_posix_aligned,
2544 buffer_raw_posix_aligned, buffer_meta);
2545 #ifdef CEPH_HAVE_SPLICE
2546 MEMPOOL_DEFINE_OBJECT_FACTORY(buffer::raw_pipe, buffer_raw_pipe, buffer_meta);
2547 #endif
2548 MEMPOOL_DEFINE_OBJECT_FACTORY(buffer::raw_char, buffer_raw_char, buffer_meta);
2549 MEMPOOL_DEFINE_OBJECT_FACTORY(buffer::raw_unshareable, buffer_raw_unshareable,
2550 buffer_meta);
2551 MEMPOOL_DEFINE_OBJECT_FACTORY(buffer::raw_static, buffer_raw_static,
2552 buffer_meta);
2553