]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
2 | // vim: ts=8 sw=2 smarttab | |
3 | /* | |
4 | * Ceph - scalable distributed file system | |
5 | * | |
6 | * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net> | |
7 | * | |
8 | * This is free software; you can redistribute it and/or | |
9 | * modify it under the terms of the GNU Lesser General Public | |
10 | * License version 2.1, as published by the Free Software | |
11 | * Foundation. See file COPYING. | |
12 | * | |
13 | */ | |
14 | ||
31f18b77 FG |
15 | #include <atomic> |
16 | #include <errno.h> | |
17 | #include <limits.h> | |
18 | ||
19 | #include <sys/uio.h> | |
20 | ||
7c673cae FG |
21 | #include "include/compat.h" |
22 | #include "include/mempool.h" | |
23 | #include "armor.h" | |
24 | #include "common/environment.h" | |
25 | #include "common/errno.h" | |
26 | #include "common/safe_io.h" | |
27 | #include "common/simple_spin.h" | |
28 | #include "common/strtol.h" | |
29 | #include "common/likely.h" | |
30 | #include "common/valgrind.h" | |
31 | #include "common/deleter.h" | |
7c673cae FG |
32 | #include "common/RWLock.h" |
33 | #include "include/types.h" | |
7c673cae | 34 | #include "include/scope_guard.h" |
31f18b77 | 35 | |
7c673cae FG |
36 | #if defined(HAVE_XIO) |
37 | #include "msg/xio/XioMsg.h" | |
38 | #endif | |
39 | ||
31f18b77 | 40 | using namespace ceph; |
7c673cae FG |
41 | |
42 | #define CEPH_BUFFER_ALLOC_UNIT (MIN(CEPH_PAGE_SIZE, 4096)) | |
43 | #define CEPH_BUFFER_APPEND_SIZE (CEPH_BUFFER_ALLOC_UNIT - sizeof(raw_combined)) | |
44 | ||
45 | #ifdef BUFFER_DEBUG | |
46 | static std::atomic_flag buffer_debug_lock = ATOMIC_FLAG_INIT; | |
47 | # define bdout { simple_spin_lock(&buffer_debug_lock); std::cout | |
48 | # define bendl std::endl; simple_spin_unlock(&buffer_debug_lock); } | |
49 | #else | |
50 | # define bdout if (0) { std::cout | |
51 | # define bendl std::endl; } | |
52 | #endif | |
53 | ||
31f18b77 FG |
54 | static std::atomic<uint64_t> buffer_total_alloc { 0 }; |
55 | static std::atomic<uint64_t> buffer_history_alloc_bytes { 0 }; | |
56 | static std::atomic<uint64_t> buffer_history_alloc_num { 0 }; | |
57 | ||
7c673cae FG |
58 | const bool buffer_track_alloc = get_env_bool("CEPH_BUFFER_TRACK"); |
59 | ||
60 | namespace { | |
61 | void inc_total_alloc(unsigned len) { | |
62 | if (buffer_track_alloc) | |
31f18b77 | 63 | buffer_total_alloc += len; |
7c673cae FG |
64 | } |
65 | ||
66 | void dec_total_alloc(unsigned len) { | |
67 | if (buffer_track_alloc) | |
31f18b77 | 68 | buffer_total_alloc -= len; |
7c673cae FG |
69 | } |
70 | ||
71 | void inc_history_alloc(uint64_t len) { | |
72 | if (buffer_track_alloc) { | |
31f18b77 FG |
73 | buffer_history_alloc_bytes += len; |
74 | buffer_history_alloc_num++; | |
7c673cae FG |
75 | } |
76 | } | |
31f18b77 | 77 | } // namespace |
7c673cae FG |
78 | |
79 | int buffer::get_total_alloc() { | |
31f18b77 | 80 | return buffer_total_alloc; |
7c673cae FG |
81 | } |
82 | uint64_t buffer::get_history_alloc_bytes() { | |
31f18b77 | 83 | return buffer_history_alloc_bytes; |
7c673cae FG |
84 | } |
85 | uint64_t buffer::get_history_alloc_num() { | |
31f18b77 | 86 | return buffer_history_alloc_num; |
7c673cae FG |
87 | } |
88 | ||
31f18b77 FG |
89 | static std::atomic<unsigned> buffer_cached_crc { 0 }; |
90 | static std::atomic<unsigned> buffer_cached_crc_adjusted { 0 }; | |
91 | static std::atomic<unsigned> buffer_missed_crc { 0 }; | |
92 | ||
7c673cae FG |
93 | static bool buffer_track_crc = get_env_bool("CEPH_BUFFER_TRACK"); |
94 | ||
95 | void buffer::track_cached_crc(bool b) { | |
96 | buffer_track_crc = b; | |
97 | } | |
98 | int buffer::get_cached_crc() { | |
31f18b77 | 99 | return buffer_cached_crc; |
7c673cae FG |
100 | } |
101 | int buffer::get_cached_crc_adjusted() { | |
31f18b77 | 102 | return buffer_cached_crc_adjusted; |
7c673cae FG |
103 | } |
104 | ||
105 | int buffer::get_missed_crc() { | |
31f18b77 | 106 | return buffer_missed_crc; |
7c673cae FG |
107 | } |
108 | ||
31f18b77 FG |
109 | static std::atomic<unsigned> buffer_c_str_accesses { 0 }; |
110 | ||
7c673cae FG |
111 | static bool buffer_track_c_str = get_env_bool("CEPH_BUFFER_TRACK"); |
112 | ||
113 | void buffer::track_c_str(bool b) { | |
114 | buffer_track_c_str = b; | |
115 | } | |
116 | int buffer::get_c_str_accesses() { | |
31f18b77 | 117 | return buffer_c_str_accesses; |
7c673cae FG |
118 | } |
119 | ||
31f18b77 | 120 | static std::atomic<unsigned> buffer_max_pipe_size { 0 }; |
7c673cae FG |
121 | int update_max_pipe_size() { |
122 | #ifdef CEPH_HAVE_SETPIPE_SZ | |
123 | char buf[32]; | |
124 | int r; | |
125 | std::string err; | |
126 | struct stat stat_result; | |
127 | if (::stat(PROCPREFIX "/proc/sys/fs/pipe-max-size", &stat_result) == -1) | |
128 | return -errno; | |
129 | r = safe_read_file(PROCPREFIX "/proc/sys/fs/", "pipe-max-size", | |
130 | buf, sizeof(buf) - 1); | |
131 | if (r < 0) | |
132 | return r; | |
133 | buf[r] = '\0'; | |
134 | size_t size = strict_strtol(buf, 10, &err); | |
135 | if (!err.empty()) | |
136 | return -EIO; | |
31f18b77 | 137 | buffer_max_pipe_size = size; |
7c673cae FG |
138 | #endif |
139 | return 0; | |
140 | } | |
141 | ||
142 | size_t get_max_pipe_size() { | |
143 | #ifdef CEPH_HAVE_SETPIPE_SZ | |
31f18b77 | 144 | size_t size = buffer_max_pipe_size; |
7c673cae FG |
145 | if (size) |
146 | return size; | |
147 | if (update_max_pipe_size() == 0) | |
31f18b77 | 148 | return buffer_max_pipe_size; |
7c673cae FG |
149 | #endif |
150 | // this is the max size hardcoded in linux before 2.6.35 | |
151 | return 65536; | |
152 | } | |
153 | ||
154 | const char * buffer::error::what() const throw () { | |
155 | return "buffer::exception"; | |
156 | } | |
157 | const char * buffer::bad_alloc::what() const throw () { | |
158 | return "buffer::bad_alloc"; | |
159 | } | |
160 | const char * buffer::end_of_buffer::what() const throw () { | |
161 | return "buffer::end_of_buffer"; | |
162 | } | |
163 | const char * buffer::malformed_input::what() const throw () { | |
164 | return buf; | |
165 | } | |
166 | buffer::error_code::error_code(int error) : | |
167 | buffer::malformed_input(cpp_strerror(error).c_str()), code(error) {} | |
168 | ||
169 | class buffer::raw { | |
170 | public: | |
171 | char *data; | |
172 | unsigned len; | |
31f18b77 FG |
173 | std::atomic<unsigned> nref { 0 }; |
174 | int mempool = mempool::mempool_buffer_anon; | |
7c673cae FG |
175 | |
176 | mutable std::atomic_flag crc_spinlock = ATOMIC_FLAG_INIT; | |
177 | map<pair<size_t, size_t>, pair<uint32_t, uint32_t> > crc_map; | |
178 | ||
179 | explicit raw(unsigned l) | |
31f18b77 FG |
180 | : data(NULL), len(l), nref(0) { |
181 | mempool::get_pool(mempool::pool_index_t(mempool)).adjust_count(1, len); | |
182 | } | |
7c673cae | 183 | raw(char *c, unsigned l) |
31f18b77 FG |
184 | : data(c), len(l), nref(0) { |
185 | mempool::get_pool(mempool::pool_index_t(mempool)).adjust_count(1, len); | |
186 | } | |
187 | virtual ~raw() { | |
188 | mempool::get_pool(mempool::pool_index_t(mempool)).adjust_count( | |
189 | -1, -(int)len); | |
190 | } | |
191 | ||
192 | void _set_len(unsigned l) { | |
193 | mempool::get_pool(mempool::pool_index_t(mempool)).adjust_count( | |
194 | -1, -(int)len); | |
195 | len = l; | |
196 | mempool::get_pool(mempool::pool_index_t(mempool)).adjust_count(1, len); | |
197 | } | |
198 | ||
199 | void reassign_to_mempool(int pool) { | |
200 | if (pool == mempool) { | |
201 | return; | |
202 | } | |
203 | mempool::get_pool(mempool::pool_index_t(mempool)).adjust_count( | |
204 | -1, -(int)len); | |
205 | mempool = pool; | |
206 | mempool::get_pool(mempool::pool_index_t(pool)).adjust_count(1, len); | |
207 | } | |
208 | ||
209 | void try_assign_to_mempool(int pool) { | |
210 | if (mempool == mempool::mempool_buffer_anon) { | |
211 | reassign_to_mempool(pool); | |
212 | } | |
213 | } | |
7c673cae FG |
214 | |
215 | // no copying. | |
216 | // cppcheck-suppress noExplicitConstructor | |
217 | raw(const raw &other); | |
218 | const raw& operator=(const raw &other); | |
219 | ||
220 | virtual char *get_data() { | |
221 | return data; | |
222 | } | |
223 | virtual raw* clone_empty() = 0; | |
224 | raw *clone() { | |
225 | raw *c = clone_empty(); | |
226 | memcpy(c->data, data, len); | |
227 | return c; | |
228 | } | |
229 | virtual bool can_zero_copy() const { | |
230 | return false; | |
231 | } | |
232 | virtual int zero_copy_to_fd(int fd, loff_t *offset) { | |
233 | return -ENOTSUP; | |
234 | } | |
235 | virtual bool is_page_aligned() { | |
236 | return ((long)data & ~CEPH_PAGE_MASK) == 0; | |
237 | } | |
238 | bool is_n_page_sized() { | |
239 | return (len & ~CEPH_PAGE_MASK) == 0; | |
240 | } | |
241 | virtual bool is_shareable() { | |
242 | // true if safe to reference/share the existing buffer copy | |
243 | // false if it is not safe to share the buffer, e.g., due to special | |
244 | // and/or registered memory that is scarce | |
245 | return true; | |
246 | } | |
247 | bool get_crc(const pair<size_t, size_t> &fromto, | |
248 | pair<uint32_t, uint32_t> *crc) const { | |
249 | simple_spin_lock(&crc_spinlock); | |
250 | map<pair<size_t, size_t>, pair<uint32_t, uint32_t> >::const_iterator i = | |
251 | crc_map.find(fromto); | |
252 | if (i == crc_map.end()) { | |
253 | simple_spin_unlock(&crc_spinlock); | |
254 | return false; | |
255 | } | |
256 | *crc = i->second; | |
257 | simple_spin_unlock(&crc_spinlock); | |
258 | return true; | |
259 | } | |
260 | void set_crc(const pair<size_t, size_t> &fromto, | |
261 | const pair<uint32_t, uint32_t> &crc) { | |
262 | simple_spin_lock(&crc_spinlock); | |
263 | crc_map[fromto] = crc; | |
264 | simple_spin_unlock(&crc_spinlock); | |
265 | } | |
266 | void invalidate_crc() { | |
267 | simple_spin_lock(&crc_spinlock); | |
268 | if (crc_map.size() != 0) { | |
269 | crc_map.clear(); | |
270 | } | |
271 | simple_spin_unlock(&crc_spinlock); | |
272 | } | |
273 | }; | |
274 | ||
7c673cae FG |
275 | /* |
276 | * raw_combined is always placed within a single allocation along | |
277 | * with the data buffer. the data goes at the beginning, and | |
278 | * raw_combined at the end. | |
279 | */ | |
280 | class buffer::raw_combined : public buffer::raw { | |
281 | size_t alignment; | |
282 | public: | |
283 | raw_combined(char *dataptr, unsigned l, unsigned align=0) | |
284 | : raw(dataptr, l), | |
285 | alignment(align) { | |
286 | inc_total_alloc(len); | |
287 | inc_history_alloc(len); | |
288 | } | |
289 | ~raw_combined() override { | |
290 | dec_total_alloc(len); | |
291 | } | |
292 | raw* clone_empty() override { | |
293 | return create(len, alignment); | |
294 | } | |
295 | ||
296 | static raw_combined *create(unsigned len, unsigned align=0) { | |
297 | if (!align) | |
298 | align = sizeof(size_t); | |
299 | size_t rawlen = ROUND_UP_TO(sizeof(buffer::raw_combined), | |
300 | alignof(buffer::raw_combined)); | |
301 | size_t datalen = ROUND_UP_TO(len, alignof(buffer::raw_combined)); | |
302 | ||
31f18b77 FG |
303 | #ifdef DARWIN |
304 | char *ptr = (char *) valloc(rawlen + datalen); | |
305 | #else | |
306 | char *ptr = 0; | |
307 | int r = ::posix_memalign((void**)(void*)&ptr, align, rawlen + datalen); | |
308 | if (r) | |
309 | throw bad_alloc(); | |
310 | #endif /* DARWIN */ | |
7c673cae FG |
311 | if (!ptr) |
312 | throw bad_alloc(); | |
313 | ||
314 | // actual data first, since it has presumably larger alignment restriction | |
315 | // then put the raw_combined at the end | |
316 | return new (ptr + datalen) raw_combined(ptr, len, align); | |
317 | } | |
318 | ||
319 | static void operator delete(void *ptr) { | |
320 | raw_combined *raw = (raw_combined *)ptr; | |
31f18b77 | 321 | ::free((void *)raw->data); |
7c673cae FG |
322 | } |
323 | }; | |
324 | ||
325 | class buffer::raw_malloc : public buffer::raw { | |
326 | public: | |
327 | MEMPOOL_CLASS_HELPERS(); | |
328 | ||
329 | explicit raw_malloc(unsigned l) : raw(l) { | |
330 | if (len) { | |
331 | data = (char *)malloc(len); | |
332 | if (!data) | |
333 | throw bad_alloc(); | |
334 | } else { | |
335 | data = 0; | |
336 | } | |
337 | inc_total_alloc(len); | |
338 | inc_history_alloc(len); | |
339 | bdout << "raw_malloc " << this << " alloc " << (void *)data << " " << l << " " << buffer::get_total_alloc() << bendl; | |
340 | } | |
341 | raw_malloc(unsigned l, char *b) : raw(b, l) { | |
342 | inc_total_alloc(len); | |
343 | bdout << "raw_malloc " << this << " alloc " << (void *)data << " " << l << " " << buffer::get_total_alloc() << bendl; | |
344 | } | |
345 | ~raw_malloc() override { | |
346 | free(data); | |
347 | dec_total_alloc(len); | |
348 | bdout << "raw_malloc " << this << " free " << (void *)data << " " << buffer::get_total_alloc() << bendl; | |
349 | } | |
350 | raw* clone_empty() override { | |
351 | return new raw_malloc(len); | |
352 | } | |
353 | }; | |
354 | ||
355 | #ifndef __CYGWIN__ | |
356 | class buffer::raw_mmap_pages : public buffer::raw { | |
357 | public: | |
358 | MEMPOOL_CLASS_HELPERS(); | |
359 | ||
360 | explicit raw_mmap_pages(unsigned l) : raw(l) { | |
361 | data = (char*)::mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANON, -1, 0); | |
362 | if (!data) | |
363 | throw bad_alloc(); | |
364 | inc_total_alloc(len); | |
365 | inc_history_alloc(len); | |
366 | bdout << "raw_mmap " << this << " alloc " << (void *)data << " " << l << " " << buffer::get_total_alloc() << bendl; | |
367 | } | |
368 | ~raw_mmap_pages() override { | |
369 | ::munmap(data, len); | |
370 | dec_total_alloc(len); | |
371 | bdout << "raw_mmap " << this << " free " << (void *)data << " " << buffer::get_total_alloc() << bendl; | |
372 | } | |
373 | raw* clone_empty() override { | |
374 | return new raw_mmap_pages(len); | |
375 | } | |
376 | }; | |
377 | ||
378 | class buffer::raw_posix_aligned : public buffer::raw { | |
379 | unsigned align; | |
380 | public: | |
381 | MEMPOOL_CLASS_HELPERS(); | |
382 | ||
383 | raw_posix_aligned(unsigned l, unsigned _align) : raw(l) { | |
384 | align = _align; | |
385 | assert((align >= sizeof(void *)) && (align & (align - 1)) == 0); | |
31f18b77 FG |
386 | #ifdef DARWIN |
387 | data = (char *) valloc(len); | |
388 | #else | |
389 | int r = ::posix_memalign((void**)(void*)&data, align, len); | |
390 | if (r) | |
391 | throw bad_alloc(); | |
392 | #endif /* DARWIN */ | |
7c673cae FG |
393 | if (!data) |
394 | throw bad_alloc(); | |
395 | inc_total_alloc(len); | |
396 | inc_history_alloc(len); | |
397 | bdout << "raw_posix_aligned " << this << " alloc " << (void *)data << " l=" << l << ", align=" << align << " total_alloc=" << buffer::get_total_alloc() << bendl; | |
398 | } | |
399 | ~raw_posix_aligned() override { | |
31f18b77 | 400 | ::free(data); |
7c673cae FG |
401 | dec_total_alloc(len); |
402 | bdout << "raw_posix_aligned " << this << " free " << (void *)data << " " << buffer::get_total_alloc() << bendl; | |
403 | } | |
404 | raw* clone_empty() override { | |
405 | return new raw_posix_aligned(len, align); | |
406 | } | |
407 | }; | |
408 | #endif | |
409 | ||
410 | #ifdef __CYGWIN__ | |
411 | class buffer::raw_hack_aligned : public buffer::raw { | |
412 | unsigned align; | |
413 | char *realdata; | |
414 | public: | |
415 | raw_hack_aligned(unsigned l, unsigned _align) : raw(l) { | |
416 | align = _align; | |
417 | realdata = new char[len+align-1]; | |
418 | unsigned off = ((unsigned)realdata) & (align-1); | |
419 | if (off) | |
420 | data = realdata + align - off; | |
421 | else | |
422 | data = realdata; | |
423 | inc_total_alloc(len+align-1); | |
424 | inc_history_alloc(len+align-1); | |
425 | //cout << "hack aligned " << (unsigned)data | |
426 | //<< " in raw " << (unsigned)realdata | |
427 | //<< " off " << off << std::endl; | |
428 | assert(((unsigned)data & (align-1)) == 0); | |
429 | } | |
430 | ~raw_hack_aligned() { | |
431 | delete[] realdata; | |
432 | dec_total_alloc(len+align-1); | |
433 | } | |
434 | raw* clone_empty() { | |
435 | return new raw_hack_aligned(len, align); | |
436 | } | |
437 | }; | |
438 | #endif | |
439 | ||
440 | #ifdef CEPH_HAVE_SPLICE | |
441 | class buffer::raw_pipe : public buffer::raw { | |
442 | public: | |
443 | MEMPOOL_CLASS_HELPERS(); | |
444 | ||
445 | explicit raw_pipe(unsigned len) : raw(len), source_consumed(false) { | |
446 | size_t max = get_max_pipe_size(); | |
447 | if (len > max) { | |
448 | bdout << "raw_pipe: requested length " << len | |
449 | << " > max length " << max << bendl; | |
450 | throw malformed_input("length larger than max pipe size"); | |
451 | } | |
452 | pipefds[0] = -1; | |
453 | pipefds[1] = -1; | |
454 | ||
455 | int r; | |
456 | if (::pipe(pipefds) == -1) { | |
457 | r = -errno; | |
458 | bdout << "raw_pipe: error creating pipe: " << cpp_strerror(r) << bendl; | |
459 | throw error_code(r); | |
460 | } | |
461 | ||
462 | r = set_nonblocking(pipefds); | |
463 | if (r < 0) { | |
464 | bdout << "raw_pipe: error setting nonblocking flag on temp pipe: " | |
465 | << cpp_strerror(r) << bendl; | |
466 | throw error_code(r); | |
467 | } | |
468 | ||
469 | r = set_pipe_size(pipefds, len); | |
470 | if (r < 0) { | |
471 | bdout << "raw_pipe: could not set pipe size" << bendl; | |
472 | // continue, since the pipe should become large enough as needed | |
473 | } | |
474 | ||
475 | inc_total_alloc(len); | |
476 | inc_history_alloc(len); | |
477 | bdout << "raw_pipe " << this << " alloc " << len << " " | |
478 | << buffer::get_total_alloc() << bendl; | |
479 | } | |
480 | ||
481 | ~raw_pipe() override { | |
482 | if (data) | |
483 | free(data); | |
484 | close_pipe(pipefds); | |
485 | dec_total_alloc(len); | |
486 | bdout << "raw_pipe " << this << " free " << (void *)data << " " | |
487 | << buffer::get_total_alloc() << bendl; | |
488 | } | |
489 | ||
490 | bool can_zero_copy() const override { | |
491 | return true; | |
492 | } | |
493 | ||
494 | int set_source(int fd, loff_t *off) { | |
495 | int flags = SPLICE_F_NONBLOCK; | |
496 | ssize_t r = safe_splice(fd, off, pipefds[1], NULL, len, flags); | |
497 | if (r < 0) { | |
498 | bdout << "raw_pipe: error splicing into pipe: " << cpp_strerror(r) | |
499 | << bendl; | |
500 | return r; | |
501 | } | |
502 | // update length with actual amount read | |
31f18b77 | 503 | _set_len(r); |
7c673cae FG |
504 | return 0; |
505 | } | |
506 | ||
507 | int zero_copy_to_fd(int fd, loff_t *offset) override { | |
508 | assert(!source_consumed); | |
509 | int flags = SPLICE_F_NONBLOCK; | |
510 | ssize_t r = safe_splice_exact(pipefds[0], NULL, fd, offset, len, flags); | |
511 | if (r < 0) { | |
512 | bdout << "raw_pipe: error splicing from pipe to fd: " | |
513 | << cpp_strerror(r) << bendl; | |
514 | return r; | |
515 | } | |
516 | source_consumed = true; | |
517 | return 0; | |
518 | } | |
519 | ||
520 | buffer::raw* clone_empty() override { | |
521 | // cloning doesn't make sense for pipe-based buffers, | |
522 | // and is only used by unit tests for other types of buffers | |
523 | return NULL; | |
524 | } | |
525 | ||
526 | char *get_data() override { | |
527 | if (data) | |
528 | return data; | |
529 | return copy_pipe(pipefds); | |
530 | } | |
531 | ||
532 | private: | |
533 | int set_pipe_size(int *fds, long length) { | |
534 | #ifdef CEPH_HAVE_SETPIPE_SZ | |
535 | if (::fcntl(fds[1], F_SETPIPE_SZ, length) == -1) { | |
536 | int r = -errno; | |
537 | if (r == -EPERM) { | |
538 | // pipe limit must have changed - EPERM means we requested | |
539 | // more than the maximum size as an unprivileged user | |
540 | update_max_pipe_size(); | |
541 | throw malformed_input("length larger than new max pipe size"); | |
542 | } | |
543 | return r; | |
544 | } | |
545 | #endif | |
546 | return 0; | |
547 | } | |
548 | ||
549 | int set_nonblocking(int *fds) { | |
550 | if (::fcntl(fds[0], F_SETFL, O_NONBLOCK) == -1) | |
551 | return -errno; | |
552 | if (::fcntl(fds[1], F_SETFL, O_NONBLOCK) == -1) | |
553 | return -errno; | |
554 | return 0; | |
555 | } | |
556 | ||
557 | static void close_pipe(const int *fds) { | |
558 | if (fds[0] >= 0) | |
559 | VOID_TEMP_FAILURE_RETRY(::close(fds[0])); | |
560 | if (fds[1] >= 0) | |
561 | VOID_TEMP_FAILURE_RETRY(::close(fds[1])); | |
562 | } | |
563 | char *copy_pipe(int *fds) { | |
564 | /* preserve original pipe contents by copying into a temporary | |
565 | * pipe before reading. | |
566 | */ | |
567 | int tmpfd[2]; | |
568 | int r; | |
569 | ||
570 | assert(!source_consumed); | |
571 | assert(fds[0] >= 0); | |
572 | ||
573 | if (::pipe(tmpfd) == -1) { | |
574 | r = -errno; | |
575 | bdout << "raw_pipe: error creating temp pipe: " << cpp_strerror(r) | |
576 | << bendl; | |
577 | throw error_code(r); | |
578 | } | |
579 | auto sg = make_scope_guard([=] { close_pipe(tmpfd); }); | |
580 | r = set_nonblocking(tmpfd); | |
581 | if (r < 0) { | |
582 | bdout << "raw_pipe: error setting nonblocking flag on temp pipe: " | |
583 | << cpp_strerror(r) << bendl; | |
584 | throw error_code(r); | |
585 | } | |
586 | r = set_pipe_size(tmpfd, len); | |
587 | if (r < 0) { | |
588 | bdout << "raw_pipe: error setting pipe size on temp pipe: " | |
589 | << cpp_strerror(r) << bendl; | |
590 | } | |
591 | int flags = SPLICE_F_NONBLOCK; | |
592 | if (::tee(fds[0], tmpfd[1], len, flags) == -1) { | |
593 | r = errno; | |
594 | bdout << "raw_pipe: error tee'ing into temp pipe: " << cpp_strerror(r) | |
595 | << bendl; | |
596 | throw error_code(r); | |
597 | } | |
598 | data = (char *)malloc(len); | |
599 | if (!data) { | |
600 | throw bad_alloc(); | |
601 | } | |
602 | r = safe_read(tmpfd[0], data, len); | |
603 | if (r < (ssize_t)len) { | |
604 | bdout << "raw_pipe: error reading from temp pipe:" << cpp_strerror(r) | |
605 | << bendl; | |
606 | free(data); | |
607 | data = NULL; | |
608 | throw error_code(r); | |
609 | } | |
610 | return data; | |
611 | } | |
612 | bool source_consumed; | |
613 | int pipefds[2]; | |
614 | }; | |
615 | #endif // CEPH_HAVE_SPLICE | |
616 | ||
617 | /* | |
618 | * primitive buffer types | |
619 | */ | |
620 | class buffer::raw_char : public buffer::raw { | |
621 | public: | |
622 | MEMPOOL_CLASS_HELPERS(); | |
623 | ||
624 | explicit raw_char(unsigned l) : raw(l) { | |
625 | if (len) | |
31f18b77 | 626 | data = new char[len]; |
7c673cae FG |
627 | else |
628 | data = 0; | |
629 | inc_total_alloc(len); | |
630 | inc_history_alloc(len); | |
631 | bdout << "raw_char " << this << " alloc " << (void *)data << " " << l << " " << buffer::get_total_alloc() << bendl; | |
632 | } | |
633 | raw_char(unsigned l, char *b) : raw(b, l) { | |
634 | inc_total_alloc(len); | |
635 | bdout << "raw_char " << this << " alloc " << (void *)data << " " << l << " " << buffer::get_total_alloc() << bendl; | |
636 | } | |
637 | ~raw_char() override { | |
31f18b77 | 638 | delete[] data; |
7c673cae FG |
639 | dec_total_alloc(len); |
640 | bdout << "raw_char " << this << " free " << (void *)data << " " << buffer::get_total_alloc() << bendl; | |
641 | } | |
642 | raw* clone_empty() override { | |
643 | return new raw_char(len); | |
644 | } | |
645 | }; | |
646 | ||
31f18b77 FG |
647 | class buffer::raw_claimed_char : public buffer::raw { |
648 | public: | |
649 | MEMPOOL_CLASS_HELPERS(); | |
650 | ||
651 | explicit raw_claimed_char(unsigned l, char *b) : raw(b, l) { | |
652 | inc_total_alloc(len); | |
653 | bdout << "raw_claimed_char " << this << " alloc " << (void *)data | |
654 | << " " << l << " " << buffer::get_total_alloc() << bendl; | |
655 | } | |
656 | ~raw_claimed_char() override { | |
657 | dec_total_alloc(len); | |
658 | bdout << "raw_claimed_char " << this << " free " << (void *)data | |
659 | << " " << buffer::get_total_alloc() << bendl; | |
660 | } | |
661 | raw* clone_empty() override { | |
662 | return new raw_char(len); | |
663 | } | |
664 | }; | |
665 | ||
7c673cae FG |
666 | class buffer::raw_unshareable : public buffer::raw { |
667 | public: | |
668 | MEMPOOL_CLASS_HELPERS(); | |
669 | ||
670 | explicit raw_unshareable(unsigned l) : raw(l) { | |
671 | if (len) | |
672 | data = new char[len]; | |
673 | else | |
674 | data = 0; | |
675 | } | |
676 | raw_unshareable(unsigned l, char *b) : raw(b, l) { | |
677 | } | |
678 | raw* clone_empty() override { | |
679 | return new raw_char(len); | |
680 | } | |
681 | bool is_shareable() override { | |
682 | return false; // !shareable, will force make_shareable() | |
683 | } | |
684 | ~raw_unshareable() override { | |
685 | delete[] data; | |
686 | } | |
687 | }; | |
688 | ||
689 | class buffer::raw_static : public buffer::raw { | |
690 | public: | |
691 | MEMPOOL_CLASS_HELPERS(); | |
692 | ||
693 | raw_static(const char *d, unsigned l) : raw((char*)d, l) { } | |
694 | ~raw_static() override {} | |
695 | raw* clone_empty() override { | |
696 | return new buffer::raw_char(len); | |
697 | } | |
698 | }; | |
699 | ||
700 | class buffer::raw_claim_buffer : public buffer::raw { | |
701 | deleter del; | |
702 | public: | |
703 | raw_claim_buffer(const char *b, unsigned l, deleter d) | |
704 | : raw((char*)b, l), del(std::move(d)) { } | |
705 | ~raw_claim_buffer() override {} | |
706 | raw* clone_empty() override { | |
707 | return new buffer::raw_char(len); | |
708 | } | |
709 | }; | |
710 | ||
711 | #if defined(HAVE_XIO) | |
712 | class buffer::xio_msg_buffer : public buffer::raw { | |
713 | private: | |
714 | XioDispatchHook* m_hook; | |
715 | public: | |
716 | xio_msg_buffer(XioDispatchHook* _m_hook, const char *d, | |
717 | unsigned l) : | |
718 | raw((char*)d, l), m_hook(_m_hook->get()) {} | |
719 | ||
720 | bool is_shareable() { return false; } | |
721 | static void operator delete(void *p) | |
722 | { | |
723 | xio_msg_buffer *buf = static_cast<xio_msg_buffer*>(p); | |
724 | // return hook ref (counts against pool); it appears illegal | |
725 | // to do this in our dtor, because this fires after that | |
726 | buf->m_hook->put(); | |
727 | } | |
728 | raw* clone_empty() { | |
729 | return new buffer::raw_char(len); | |
730 | } | |
731 | }; | |
732 | ||
733 | class buffer::xio_mempool : public buffer::raw { | |
734 | public: | |
735 | struct xio_reg_mem *mp; | |
736 | xio_mempool(struct xio_reg_mem *_mp, unsigned l) : | |
737 | raw((char*)_mp->addr, l), mp(_mp) | |
738 | { } | |
739 | ~xio_mempool() {} | |
740 | raw* clone_empty() { | |
741 | return new buffer::raw_char(len); | |
742 | } | |
743 | }; | |
744 | ||
745 | struct xio_reg_mem* get_xio_mp(const buffer::ptr& bp) | |
746 | { | |
747 | buffer::xio_mempool *mb = dynamic_cast<buffer::xio_mempool*>(bp.get_raw()); | |
748 | if (mb) { | |
749 | return mb->mp; | |
750 | } | |
751 | return NULL; | |
752 | } | |
753 | ||
754 | buffer::raw* buffer::create_msg( | |
755 | unsigned len, char *buf, XioDispatchHook* m_hook) { | |
756 | XioPool& pool = m_hook->get_pool(); | |
757 | buffer::raw* bp = | |
758 | static_cast<buffer::raw*>(pool.alloc(sizeof(xio_msg_buffer))); | |
759 | new (bp) xio_msg_buffer(m_hook, buf, len); | |
760 | return bp; | |
761 | } | |
762 | #endif /* HAVE_XIO */ | |
763 | ||
764 | buffer::raw* buffer::copy(const char *c, unsigned len) { | |
765 | raw* r = buffer::create_aligned(len, sizeof(size_t)); | |
766 | memcpy(r->data, c, len); | |
767 | return r; | |
768 | } | |
769 | ||
770 | buffer::raw* buffer::create(unsigned len) { | |
771 | return buffer::create_aligned(len, sizeof(size_t)); | |
772 | } | |
773 | buffer::raw* buffer::claim_char(unsigned len, char *buf) { | |
31f18b77 | 774 | return new raw_claimed_char(len, buf); |
7c673cae FG |
775 | } |
776 | buffer::raw* buffer::create_malloc(unsigned len) { | |
777 | return new raw_malloc(len); | |
778 | } | |
779 | buffer::raw* buffer::claim_malloc(unsigned len, char *buf) { | |
780 | return new raw_malloc(len, buf); | |
781 | } | |
782 | buffer::raw* buffer::create_static(unsigned len, char *buf) { | |
783 | return new raw_static(buf, len); | |
784 | } | |
785 | buffer::raw* buffer::claim_buffer(unsigned len, char *buf, deleter del) { | |
786 | return new raw_claim_buffer(buf, len, std::move(del)); | |
787 | } | |
788 | ||
789 | buffer::raw* buffer::create_aligned(unsigned len, unsigned align) { | |
790 | // If alignment is a page multiple, use a separate buffer::raw to | |
791 | // avoid fragmenting the heap. | |
792 | // | |
793 | // Somewhat unexpectedly, I see consistently better performance | |
794 | // from raw_combined than from raw even when the allocation size is | |
795 | // a page multiple (but alignment is not). | |
796 | // | |
797 | // I also see better performance from a separate buffer::raw once the | |
798 | // size passes 8KB. | |
799 | if ((align & ~CEPH_PAGE_MASK) == 0 || | |
800 | len >= CEPH_PAGE_SIZE * 2) { | |
801 | #ifndef __CYGWIN__ | |
802 | return new raw_posix_aligned(len, align); | |
803 | #else | |
804 | return new raw_hack_aligned(len, align); | |
805 | #endif | |
806 | } | |
807 | return raw_combined::create(len, align); | |
808 | } | |
809 | ||
810 | buffer::raw* buffer::create_page_aligned(unsigned len) { | |
811 | return create_aligned(len, CEPH_PAGE_SIZE); | |
812 | } | |
813 | ||
814 | buffer::raw* buffer::create_zero_copy(unsigned len, int fd, int64_t *offset) { | |
815 | #ifdef CEPH_HAVE_SPLICE | |
816 | buffer::raw_pipe* buf = new raw_pipe(len); | |
817 | int r = buf->set_source(fd, (loff_t*)offset); | |
818 | if (r < 0) { | |
819 | delete buf; | |
820 | throw error_code(r); | |
821 | } | |
822 | return buf; | |
823 | #else | |
824 | throw error_code(-ENOTSUP); | |
825 | #endif | |
826 | } | |
827 | ||
828 | buffer::raw* buffer::create_unshareable(unsigned len) { | |
829 | return new raw_unshareable(len); | |
830 | } | |
831 | ||
832 | buffer::ptr::ptr(raw *r) : _raw(r), _off(0), _len(r->len) // no lock needed; this is an unref raw. | |
833 | { | |
31f18b77 | 834 | r->nref++; |
7c673cae FG |
835 | bdout << "ptr " << this << " get " << _raw << bendl; |
836 | } | |
837 | buffer::ptr::ptr(unsigned l) : _off(0), _len(l) | |
838 | { | |
839 | _raw = create(l); | |
31f18b77 | 840 | _raw->nref++; |
7c673cae FG |
841 | bdout << "ptr " << this << " get " << _raw << bendl; |
842 | } | |
843 | buffer::ptr::ptr(const char *d, unsigned l) : _off(0), _len(l) // ditto. | |
844 | { | |
845 | _raw = copy(d, l); | |
31f18b77 | 846 | _raw->nref++; |
7c673cae FG |
847 | bdout << "ptr " << this << " get " << _raw << bendl; |
848 | } | |
849 | buffer::ptr::ptr(const ptr& p) : _raw(p._raw), _off(p._off), _len(p._len) | |
850 | { | |
851 | if (_raw) { | |
31f18b77 | 852 | _raw->nref++; |
7c673cae FG |
853 | bdout << "ptr " << this << " get " << _raw << bendl; |
854 | } | |
855 | } | |
856 | buffer::ptr::ptr(ptr&& p) noexcept : _raw(p._raw), _off(p._off), _len(p._len) | |
857 | { | |
858 | p._raw = nullptr; | |
859 | p._off = p._len = 0; | |
860 | } | |
861 | buffer::ptr::ptr(const ptr& p, unsigned o, unsigned l) | |
862 | : _raw(p._raw), _off(p._off + o), _len(l) | |
863 | { | |
864 | assert(o+l <= p._len); | |
865 | assert(_raw); | |
31f18b77 | 866 | _raw->nref++; |
7c673cae FG |
867 | bdout << "ptr " << this << " get " << _raw << bendl; |
868 | } | |
869 | buffer::ptr& buffer::ptr::operator= (const ptr& p) | |
870 | { | |
871 | if (p._raw) { | |
31f18b77 | 872 | p._raw->nref++; |
7c673cae FG |
873 | bdout << "ptr " << this << " get " << _raw << bendl; |
874 | } | |
875 | buffer::raw *raw = p._raw; | |
876 | release(); | |
877 | if (raw) { | |
878 | _raw = raw; | |
879 | _off = p._off; | |
880 | _len = p._len; | |
881 | } else { | |
882 | _off = _len = 0; | |
883 | } | |
884 | return *this; | |
885 | } | |
886 | buffer::ptr& buffer::ptr::operator= (ptr&& p) noexcept | |
887 | { | |
888 | release(); | |
889 | buffer::raw *raw = p._raw; | |
890 | if (raw) { | |
891 | _raw = raw; | |
892 | _off = p._off; | |
893 | _len = p._len; | |
894 | p._raw = nullptr; | |
895 | p._off = p._len = 0; | |
896 | } else { | |
897 | _off = _len = 0; | |
898 | } | |
899 | return *this; | |
900 | } | |
901 | ||
902 | buffer::raw *buffer::ptr::clone() | |
903 | { | |
904 | return _raw->clone(); | |
905 | } | |
906 | ||
907 | buffer::ptr& buffer::ptr::make_shareable() { | |
908 | if (_raw && !_raw->is_shareable()) { | |
909 | buffer::raw *tr = _raw; | |
910 | _raw = tr->clone(); | |
31f18b77 FG |
911 | _raw->nref = 1; |
912 | if (unlikely(--tr->nref == 0)) { | |
7c673cae FG |
913 | ANNOTATE_HAPPENS_AFTER(&tr->nref); |
914 | ANNOTATE_HAPPENS_BEFORE_FORGET_ALL(&tr->nref); | |
915 | delete tr; | |
916 | } else { | |
917 | ANNOTATE_HAPPENS_BEFORE(&tr->nref); | |
918 | } | |
919 | } | |
920 | return *this; | |
921 | } | |
922 | ||
923 | void buffer::ptr::swap(ptr& other) | |
924 | { | |
925 | raw *r = _raw; | |
926 | unsigned o = _off; | |
927 | unsigned l = _len; | |
928 | _raw = other._raw; | |
929 | _off = other._off; | |
930 | _len = other._len; | |
931 | other._raw = r; | |
932 | other._off = o; | |
933 | other._len = l; | |
934 | } | |
935 | ||
936 | void buffer::ptr::release() | |
937 | { | |
938 | if (_raw) { | |
939 | bdout << "ptr " << this << " release " << _raw << bendl; | |
31f18b77 | 940 | if (--_raw->nref == 0) { |
7c673cae FG |
941 | //cout << "hosing raw " << (void*)_raw << " len " << _raw->len << std::endl; |
942 | ANNOTATE_HAPPENS_AFTER(&_raw->nref); | |
943 | ANNOTATE_HAPPENS_BEFORE_FORGET_ALL(&_raw->nref); | |
944 | delete _raw; // dealloc old (if any) | |
945 | } else { | |
946 | ANNOTATE_HAPPENS_BEFORE(&_raw->nref); | |
947 | } | |
948 | _raw = 0; | |
949 | } | |
950 | } | |
951 | ||
952 | bool buffer::ptr::at_buffer_tail() const { return _off + _len == _raw->len; } | |
953 | ||
954 | const char *buffer::ptr::c_str() const { | |
955 | assert(_raw); | |
956 | if (buffer_track_c_str) | |
31f18b77 | 957 | buffer_c_str_accesses++; |
7c673cae FG |
958 | return _raw->get_data() + _off; |
959 | } | |
960 | char *buffer::ptr::c_str() { | |
961 | assert(_raw); | |
962 | if (buffer_track_c_str) | |
31f18b77 | 963 | buffer_c_str_accesses++; |
7c673cae FG |
964 | return _raw->get_data() + _off; |
965 | } | |
966 | const char *buffer::ptr::end_c_str() const { | |
967 | assert(_raw); | |
968 | if (buffer_track_c_str) | |
31f18b77 | 969 | buffer_c_str_accesses++; |
7c673cae FG |
970 | return _raw->get_data() + _off + _len; |
971 | } | |
972 | char *buffer::ptr::end_c_str() { | |
973 | assert(_raw); | |
974 | if (buffer_track_c_str) | |
31f18b77 | 975 | buffer_c_str_accesses++; |
7c673cae FG |
976 | return _raw->get_data() + _off + _len; |
977 | } | |
978 | ||
979 | unsigned buffer::ptr::unused_tail_length() const | |
980 | { | |
981 | if (_raw) | |
982 | return _raw->len - (_off+_len); | |
983 | else | |
984 | return 0; | |
985 | } | |
986 | const char& buffer::ptr::operator[](unsigned n) const | |
987 | { | |
988 | assert(_raw); | |
989 | assert(n < _len); | |
990 | return _raw->get_data()[_off + n]; | |
991 | } | |
992 | char& buffer::ptr::operator[](unsigned n) | |
993 | { | |
994 | assert(_raw); | |
995 | assert(n < _len); | |
996 | return _raw->get_data()[_off + n]; | |
997 | } | |
998 | ||
999 | const char *buffer::ptr::raw_c_str() const { assert(_raw); return _raw->data; } | |
1000 | unsigned buffer::ptr::raw_length() const { assert(_raw); return _raw->len; } | |
31f18b77 | 1001 | int buffer::ptr::raw_nref() const { assert(_raw); return _raw->nref; } |
7c673cae FG |
1002 | |
1003 | void buffer::ptr::copy_out(unsigned o, unsigned l, char *dest) const { | |
1004 | assert(_raw); | |
1005 | if (o+l > _len) | |
1006 | throw end_of_buffer(); | |
1007 | char* src = _raw->data + _off + o; | |
1008 | maybe_inline_memcpy(dest, src, l, 8); | |
1009 | } | |
1010 | ||
31f18b77 | 1011 | unsigned buffer::ptr::wasted() const |
7c673cae | 1012 | { |
7c673cae FG |
1013 | return _raw->len - _len; |
1014 | } | |
1015 | ||
1016 | int buffer::ptr::cmp(const ptr& o) const | |
1017 | { | |
1018 | int l = _len < o._len ? _len : o._len; | |
1019 | if (l) { | |
1020 | int r = memcmp(c_str(), o.c_str(), l); | |
1021 | if (r) | |
1022 | return r; | |
1023 | } | |
1024 | if (_len < o._len) | |
1025 | return -1; | |
1026 | if (_len > o._len) | |
1027 | return 1; | |
1028 | return 0; | |
1029 | } | |
1030 | ||
1031 | bool buffer::ptr::is_zero() const | |
1032 | { | |
1033 | return mem_is_zero(c_str(), _len); | |
1034 | } | |
1035 | ||
1036 | unsigned buffer::ptr::append(char c) | |
1037 | { | |
1038 | assert(_raw); | |
1039 | assert(1 <= unused_tail_length()); | |
1040 | char* ptr = _raw->data + _off + _len; | |
1041 | *ptr = c; | |
1042 | _len++; | |
1043 | return _len + _off; | |
1044 | } | |
1045 | ||
1046 | unsigned buffer::ptr::append(const char *p, unsigned l) | |
1047 | { | |
1048 | assert(_raw); | |
1049 | assert(l <= unused_tail_length()); | |
1050 | char* c = _raw->data + _off + _len; | |
1051 | maybe_inline_memcpy(c, p, l, 32); | |
1052 | _len += l; | |
1053 | return _len + _off; | |
1054 | } | |
1055 | ||
1056 | void buffer::ptr::copy_in(unsigned o, unsigned l, const char *src) | |
1057 | { | |
1058 | copy_in(o, l, src, true); | |
1059 | } | |
1060 | ||
1061 | void buffer::ptr::copy_in(unsigned o, unsigned l, const char *src, bool crc_reset) | |
1062 | { | |
1063 | assert(_raw); | |
1064 | assert(o <= _len); | |
1065 | assert(o+l <= _len); | |
1066 | char* dest = _raw->data + _off + o; | |
1067 | if (crc_reset) | |
1068 | _raw->invalidate_crc(); | |
1069 | maybe_inline_memcpy(dest, src, l, 64); | |
1070 | } | |
1071 | ||
1072 | void buffer::ptr::zero() | |
1073 | { | |
1074 | zero(true); | |
1075 | } | |
1076 | ||
1077 | void buffer::ptr::zero(bool crc_reset) | |
1078 | { | |
1079 | if (crc_reset) | |
1080 | _raw->invalidate_crc(); | |
1081 | memset(c_str(), 0, _len); | |
1082 | } | |
1083 | ||
1084 | void buffer::ptr::zero(unsigned o, unsigned l) | |
1085 | { | |
1086 | zero(o, l, true); | |
1087 | } | |
1088 | ||
1089 | void buffer::ptr::zero(unsigned o, unsigned l, bool crc_reset) | |
1090 | { | |
1091 | assert(o+l <= _len); | |
1092 | if (crc_reset) | |
1093 | _raw->invalidate_crc(); | |
1094 | memset(c_str()+o, 0, l); | |
1095 | } | |
1096 | bool buffer::ptr::can_zero_copy() const | |
1097 | { | |
1098 | return _raw->can_zero_copy(); | |
1099 | } | |
1100 | ||
1101 | int buffer::ptr::zero_copy_to_fd(int fd, int64_t *offset) const | |
1102 | { | |
1103 | return _raw->zero_copy_to_fd(fd, (loff_t*)offset); | |
1104 | } | |
1105 | ||
1106 | // -- buffer::list::iterator -- | |
1107 | /* | |
1108 | buffer::list::iterator operator=(const buffer::list::iterator& other) | |
1109 | { | |
1110 | if (this != &other) { | |
1111 | bl = other.bl; | |
1112 | ls = other.ls; | |
1113 | off = other.off; | |
1114 | p = other.p; | |
1115 | p_off = other.p_off; | |
1116 | } | |
1117 | return *this; | |
1118 | }*/ | |
1119 | ||
1120 | template<bool is_const> | |
1121 | buffer::list::iterator_impl<is_const>::iterator_impl(bl_t *l, unsigned o) | |
1122 | : bl(l), ls(&bl->_buffers), off(0), p(ls->begin()), p_off(0) | |
1123 | { | |
1124 | advance(o); | |
1125 | } | |
1126 | ||
1127 | template<bool is_const> | |
1128 | buffer::list::iterator_impl<is_const>::iterator_impl(const buffer::list::iterator& i) | |
1129 | : iterator_impl<is_const>(i.bl, i.off, i.p, i.p_off) {} | |
1130 | ||
1131 | template<bool is_const> | |
1132 | void buffer::list::iterator_impl<is_const>::advance(int o) | |
1133 | { | |
1134 | //cout << this << " advance " << o << " from " << off << " (p_off " << p_off << " in " << p->length() << ")" << std::endl; | |
1135 | if (o > 0) { | |
1136 | p_off += o; | |
1137 | while (p_off > 0) { | |
1138 | if (p == ls->end()) | |
1139 | throw end_of_buffer(); | |
1140 | if (p_off >= p->length()) { | |
1141 | // skip this buffer | |
1142 | p_off -= p->length(); | |
1143 | p++; | |
1144 | } else { | |
1145 | // somewhere in this buffer! | |
1146 | break; | |
1147 | } | |
1148 | } | |
1149 | off += o; | |
1150 | return; | |
1151 | } | |
1152 | while (o < 0) { | |
1153 | if (p_off) { | |
1154 | unsigned d = -o; | |
1155 | if (d > p_off) | |
1156 | d = p_off; | |
1157 | p_off -= d; | |
1158 | off -= d; | |
1159 | o += d; | |
1160 | } else if (off > 0) { | |
1161 | assert(p != ls->begin()); | |
1162 | p--; | |
1163 | p_off = p->length(); | |
1164 | } else { | |
1165 | throw end_of_buffer(); | |
1166 | } | |
1167 | } | |
1168 | } | |
1169 | ||
1170 | template<bool is_const> | |
1171 | void buffer::list::iterator_impl<is_const>::seek(unsigned o) | |
1172 | { | |
1173 | p = ls->begin(); | |
1174 | off = p_off = 0; | |
1175 | advance(o); | |
1176 | } | |
1177 | ||
1178 | template<bool is_const> | |
1179 | char buffer::list::iterator_impl<is_const>::operator*() const | |
1180 | { | |
1181 | if (p == ls->end()) | |
1182 | throw end_of_buffer(); | |
1183 | return (*p)[p_off]; | |
1184 | } | |
1185 | ||
1186 | template<bool is_const> | |
1187 | buffer::list::iterator_impl<is_const>& | |
1188 | buffer::list::iterator_impl<is_const>::operator++() | |
1189 | { | |
1190 | if (p == ls->end()) | |
1191 | throw end_of_buffer(); | |
1192 | advance(1); | |
1193 | return *this; | |
1194 | } | |
1195 | ||
1196 | template<bool is_const> | |
1197 | buffer::ptr buffer::list::iterator_impl<is_const>::get_current_ptr() const | |
1198 | { | |
1199 | if (p == ls->end()) | |
1200 | throw end_of_buffer(); | |
1201 | return ptr(*p, p_off, p->length() - p_off); | |
1202 | } | |
1203 | ||
1204 | // copy data out. | |
1205 | // note that these all _append_ to dest! | |
1206 | template<bool is_const> | |
1207 | void buffer::list::iterator_impl<is_const>::copy(unsigned len, char *dest) | |
1208 | { | |
1209 | if (p == ls->end()) seek(off); | |
1210 | while (len > 0) { | |
1211 | if (p == ls->end()) | |
1212 | throw end_of_buffer(); | |
1213 | assert(p->length() > 0); | |
1214 | ||
1215 | unsigned howmuch = p->length() - p_off; | |
1216 | if (len < howmuch) howmuch = len; | |
1217 | p->copy_out(p_off, howmuch, dest); | |
1218 | dest += howmuch; | |
1219 | ||
1220 | len -= howmuch; | |
1221 | advance(howmuch); | |
1222 | } | |
1223 | } | |
1224 | ||
1225 | template<bool is_const> | |
1226 | void buffer::list::iterator_impl<is_const>::copy(unsigned len, ptr &dest) | |
1227 | { | |
1228 | copy_deep(len, dest); | |
1229 | } | |
1230 | ||
1231 | template<bool is_const> | |
1232 | void buffer::list::iterator_impl<is_const>::copy_deep(unsigned len, ptr &dest) | |
1233 | { | |
1234 | if (!len) { | |
1235 | return; | |
1236 | } | |
1237 | if (p == ls->end()) | |
1238 | throw end_of_buffer(); | |
1239 | assert(p->length() > 0); | |
1240 | dest = create(len); | |
1241 | copy(len, dest.c_str()); | |
1242 | } | |
1243 | template<bool is_const> | |
1244 | void buffer::list::iterator_impl<is_const>::copy_shallow(unsigned len, | |
1245 | ptr &dest) | |
1246 | { | |
1247 | if (!len) { | |
1248 | return; | |
1249 | } | |
1250 | if (p == ls->end()) | |
1251 | throw end_of_buffer(); | |
1252 | assert(p->length() > 0); | |
1253 | unsigned howmuch = p->length() - p_off; | |
1254 | if (howmuch < len) { | |
1255 | dest = create(len); | |
1256 | copy(len, dest.c_str()); | |
1257 | } else { | |
1258 | dest = ptr(*p, p_off, len); | |
1259 | advance(len); | |
1260 | } | |
1261 | } | |
1262 | ||
1263 | template<bool is_const> | |
1264 | void buffer::list::iterator_impl<is_const>::copy(unsigned len, list &dest) | |
1265 | { | |
1266 | if (p == ls->end()) | |
1267 | seek(off); | |
1268 | while (len > 0) { | |
1269 | if (p == ls->end()) | |
1270 | throw end_of_buffer(); | |
1271 | ||
1272 | unsigned howmuch = p->length() - p_off; | |
1273 | if (len < howmuch) | |
1274 | howmuch = len; | |
1275 | dest.append(*p, p_off, howmuch); | |
1276 | ||
1277 | len -= howmuch; | |
1278 | advance(howmuch); | |
1279 | } | |
1280 | } | |
1281 | ||
1282 | template<bool is_const> | |
1283 | void buffer::list::iterator_impl<is_const>::copy(unsigned len, std::string &dest) | |
1284 | { | |
1285 | if (p == ls->end()) | |
1286 | seek(off); | |
1287 | while (len > 0) { | |
1288 | if (p == ls->end()) | |
1289 | throw end_of_buffer(); | |
1290 | ||
1291 | unsigned howmuch = p->length() - p_off; | |
1292 | const char *c_str = p->c_str(); | |
1293 | if (len < howmuch) | |
1294 | howmuch = len; | |
1295 | dest.append(c_str + p_off, howmuch); | |
1296 | ||
1297 | len -= howmuch; | |
1298 | advance(howmuch); | |
1299 | } | |
1300 | } | |
1301 | ||
1302 | template<bool is_const> | |
1303 | void buffer::list::iterator_impl<is_const>::copy_all(list &dest) | |
1304 | { | |
1305 | if (p == ls->end()) | |
1306 | seek(off); | |
1307 | while (1) { | |
1308 | if (p == ls->end()) | |
1309 | return; | |
1310 | assert(p->length() > 0); | |
1311 | ||
1312 | unsigned howmuch = p->length() - p_off; | |
1313 | const char *c_str = p->c_str(); | |
1314 | dest.append(c_str + p_off, howmuch); | |
1315 | ||
1316 | advance(howmuch); | |
1317 | } | |
1318 | } | |
1319 | ||
1320 | template<bool is_const> | |
1321 | size_t buffer::list::iterator_impl<is_const>::get_ptr_and_advance( | |
1322 | size_t want, const char **data) | |
1323 | { | |
1324 | if (p == ls->end()) { | |
1325 | seek(off); | |
1326 | if (p == ls->end()) { | |
1327 | return 0; | |
1328 | } | |
1329 | } | |
1330 | *data = p->c_str() + p_off; | |
1331 | size_t l = MIN(p->length() - p_off, want); | |
1332 | p_off += l; | |
1333 | if (p_off == p->length()) { | |
1334 | ++p; | |
1335 | p_off = 0; | |
1336 | } | |
1337 | off += l; | |
1338 | return l; | |
1339 | } | |
1340 | ||
1341 | template<bool is_const> | |
1342 | uint32_t buffer::list::iterator_impl<is_const>::crc32c( | |
1343 | size_t length, uint32_t crc) | |
1344 | { | |
1345 | length = MIN( length, get_remaining()); | |
1346 | while (length > 0) { | |
1347 | const char *p; | |
1348 | size_t l = get_ptr_and_advance(length, &p); | |
1349 | crc = ceph_crc32c(crc, (unsigned char*)p, l); | |
1350 | length -= l; | |
1351 | } | |
1352 | return crc; | |
1353 | } | |
1354 | ||
1355 | // explicitly instantiate only the iterator types we need, so we can hide the | |
1356 | // details in this compilation unit without introducing unnecessary link time | |
1357 | // dependencies. | |
1358 | template class buffer::list::iterator_impl<true>; | |
1359 | template class buffer::list::iterator_impl<false>; | |
1360 | ||
1361 | buffer::list::iterator::iterator(bl_t *l, unsigned o) | |
1362 | : iterator_impl(l, o) | |
1363 | {} | |
1364 | ||
1365 | buffer::list::iterator::iterator(bl_t *l, unsigned o, list_iter_t ip, unsigned po) | |
1366 | : iterator_impl(l, o, ip, po) | |
1367 | {} | |
1368 | ||
1369 | void buffer::list::iterator::advance(int o) | |
1370 | { | |
1371 | buffer::list::iterator_impl<false>::advance(o); | |
1372 | } | |
1373 | ||
1374 | void buffer::list::iterator::seek(unsigned o) | |
1375 | { | |
1376 | buffer::list::iterator_impl<false>::seek(o); | |
1377 | } | |
1378 | ||
1379 | char buffer::list::iterator::operator*() | |
1380 | { | |
1381 | if (p == ls->end()) { | |
1382 | throw end_of_buffer(); | |
1383 | } | |
1384 | return (*p)[p_off]; | |
1385 | } | |
1386 | ||
1387 | buffer::list::iterator& buffer::list::iterator::operator++() | |
1388 | { | |
1389 | buffer::list::iterator_impl<false>::operator++(); | |
1390 | return *this; | |
1391 | } | |
1392 | ||
1393 | buffer::ptr buffer::list::iterator::get_current_ptr() | |
1394 | { | |
1395 | if (p == ls->end()) { | |
1396 | throw end_of_buffer(); | |
1397 | } | |
1398 | return ptr(*p, p_off, p->length() - p_off); | |
1399 | } | |
1400 | ||
1401 | void buffer::list::iterator::copy(unsigned len, char *dest) | |
1402 | { | |
1403 | return buffer::list::iterator_impl<false>::copy(len, dest); | |
1404 | } | |
1405 | ||
1406 | void buffer::list::iterator::copy(unsigned len, ptr &dest) | |
1407 | { | |
1408 | return buffer::list::iterator_impl<false>::copy_deep(len, dest); | |
1409 | } | |
1410 | ||
1411 | void buffer::list::iterator::copy_deep(unsigned len, ptr &dest) | |
1412 | { | |
1413 | buffer::list::iterator_impl<false>::copy_deep(len, dest); | |
1414 | } | |
1415 | ||
1416 | void buffer::list::iterator::copy_shallow(unsigned len, ptr &dest) | |
1417 | { | |
1418 | buffer::list::iterator_impl<false>::copy_shallow(len, dest); | |
1419 | } | |
1420 | ||
1421 | void buffer::list::iterator::copy(unsigned len, list &dest) | |
1422 | { | |
1423 | buffer::list::iterator_impl<false>::copy(len, dest); | |
1424 | } | |
1425 | ||
1426 | void buffer::list::iterator::copy(unsigned len, std::string &dest) | |
1427 | { | |
1428 | buffer::list::iterator_impl<false>::copy(len, dest); | |
1429 | } | |
1430 | ||
1431 | void buffer::list::iterator::copy_all(list &dest) | |
1432 | { | |
1433 | buffer::list::iterator_impl<false>::copy_all(dest); | |
1434 | } | |
1435 | ||
1436 | void buffer::list::iterator::copy_in(unsigned len, const char *src) | |
1437 | { | |
1438 | copy_in(len, src, true); | |
1439 | } | |
1440 | ||
1441 | // copy data in | |
1442 | void buffer::list::iterator::copy_in(unsigned len, const char *src, bool crc_reset) | |
1443 | { | |
1444 | // copy | |
1445 | if (p == ls->end()) | |
1446 | seek(off); | |
1447 | while (len > 0) { | |
1448 | if (p == ls->end()) | |
1449 | throw end_of_buffer(); | |
1450 | ||
1451 | unsigned howmuch = p->length() - p_off; | |
1452 | if (len < howmuch) | |
1453 | howmuch = len; | |
1454 | p->copy_in(p_off, howmuch, src, crc_reset); | |
1455 | ||
1456 | src += howmuch; | |
1457 | len -= howmuch; | |
1458 | advance(howmuch); | |
1459 | } | |
1460 | } | |
1461 | ||
1462 | void buffer::list::iterator::copy_in(unsigned len, const list& otherl) | |
1463 | { | |
1464 | if (p == ls->end()) | |
1465 | seek(off); | |
1466 | unsigned left = len; | |
1467 | for (std::list<ptr>::const_iterator i = otherl._buffers.begin(); | |
1468 | i != otherl._buffers.end(); | |
1469 | ++i) { | |
1470 | unsigned l = (*i).length(); | |
1471 | if (left < l) | |
1472 | l = left; | |
1473 | copy_in(l, i->c_str()); | |
1474 | left -= l; | |
1475 | if (left == 0) | |
1476 | break; | |
1477 | } | |
1478 | } | |
1479 | ||
1480 | // -- buffer::list -- | |
1481 | ||
1482 | buffer::list::list(list&& other) | |
1483 | : _buffers(std::move(other._buffers)), | |
1484 | _len(other._len), | |
1485 | _memcopy_count(other._memcopy_count), | |
1486 | last_p(this) { | |
1487 | append_buffer.swap(other.append_buffer); | |
1488 | other.clear(); | |
1489 | } | |
1490 | ||
1491 | void buffer::list::swap(list& other) | |
1492 | { | |
1493 | std::swap(_len, other._len); | |
1494 | std::swap(_memcopy_count, other._memcopy_count); | |
31f18b77 | 1495 | std::swap(_mempool, other._mempool); |
7c673cae FG |
1496 | _buffers.swap(other._buffers); |
1497 | append_buffer.swap(other.append_buffer); | |
1498 | //last_p.swap(other.last_p); | |
1499 | last_p = begin(); | |
1500 | other.last_p = other.begin(); | |
1501 | } | |
1502 | ||
1503 | bool buffer::list::contents_equal(buffer::list& other) | |
1504 | { | |
1505 | return static_cast<const buffer::list*>(this)->contents_equal(other); | |
1506 | } | |
1507 | ||
1508 | bool buffer::list::contents_equal(const ceph::buffer::list& other) const | |
1509 | { | |
1510 | if (length() != other.length()) | |
1511 | return false; | |
1512 | ||
1513 | // buffer-wise comparison | |
1514 | if (true) { | |
1515 | std::list<ptr>::const_iterator a = _buffers.begin(); | |
1516 | std::list<ptr>::const_iterator b = other._buffers.begin(); | |
1517 | unsigned aoff = 0, boff = 0; | |
1518 | while (a != _buffers.end()) { | |
1519 | unsigned len = a->length() - aoff; | |
1520 | if (len > b->length() - boff) | |
1521 | len = b->length() - boff; | |
1522 | if (memcmp(a->c_str() + aoff, b->c_str() + boff, len) != 0) | |
1523 | return false; | |
1524 | aoff += len; | |
1525 | if (aoff == a->length()) { | |
1526 | aoff = 0; | |
1527 | ++a; | |
1528 | } | |
1529 | boff += len; | |
1530 | if (boff == b->length()) { | |
1531 | boff = 0; | |
1532 | ++b; | |
1533 | } | |
1534 | } | |
1535 | assert(b == other._buffers.end()); | |
1536 | return true; | |
1537 | } | |
1538 | ||
1539 | // byte-wise comparison | |
1540 | if (false) { | |
1541 | bufferlist::const_iterator me = begin(); | |
1542 | bufferlist::const_iterator him = other.begin(); | |
1543 | while (!me.end()) { | |
1544 | if (*me != *him) | |
1545 | return false; | |
1546 | ++me; | |
1547 | ++him; | |
1548 | } | |
1549 | return true; | |
1550 | } | |
1551 | } | |
1552 | ||
1553 | bool buffer::list::can_zero_copy() const | |
1554 | { | |
1555 | for (std::list<ptr>::const_iterator it = _buffers.begin(); | |
1556 | it != _buffers.end(); | |
1557 | ++it) | |
1558 | if (!it->can_zero_copy()) | |
1559 | return false; | |
1560 | return true; | |
1561 | } | |
1562 | ||
1563 | bool buffer::list::is_provided_buffer(const char *dst) const | |
1564 | { | |
1565 | if (_buffers.empty()) | |
1566 | return false; | |
1567 | return (is_contiguous() && (_buffers.front().c_str() == dst)); | |
1568 | } | |
1569 | ||
1570 | bool buffer::list::is_aligned(unsigned align) const | |
1571 | { | |
1572 | for (std::list<ptr>::const_iterator it = _buffers.begin(); | |
1573 | it != _buffers.end(); | |
1574 | ++it) | |
1575 | if (!it->is_aligned(align)) | |
1576 | return false; | |
1577 | return true; | |
1578 | } | |
1579 | ||
1580 | bool buffer::list::is_n_align_sized(unsigned align) const | |
1581 | { | |
1582 | for (std::list<ptr>::const_iterator it = _buffers.begin(); | |
1583 | it != _buffers.end(); | |
1584 | ++it) | |
1585 | if (!it->is_n_align_sized(align)) | |
1586 | return false; | |
1587 | return true; | |
1588 | } | |
1589 | ||
1590 | bool buffer::list::is_aligned_size_and_memory(unsigned align_size, | |
1591 | unsigned align_memory) const | |
1592 | { | |
1593 | for (std::list<ptr>::const_iterator it = _buffers.begin(); | |
1594 | it != _buffers.end(); | |
1595 | ++it) { | |
1596 | if (!it->is_aligned(align_memory) || !it->is_n_align_sized(align_size)) | |
1597 | return false; | |
1598 | } | |
1599 | return true; | |
1600 | } | |
1601 | ||
1602 | bool buffer::list::is_zero() const { | |
1603 | for (std::list<ptr>::const_iterator it = _buffers.begin(); | |
1604 | it != _buffers.end(); | |
1605 | ++it) { | |
1606 | if (!it->is_zero()) { | |
1607 | return false; | |
1608 | } | |
1609 | } | |
1610 | return true; | |
1611 | } | |
1612 | ||
1613 | void buffer::list::zero() | |
1614 | { | |
1615 | for (std::list<ptr>::iterator it = _buffers.begin(); | |
1616 | it != _buffers.end(); | |
1617 | ++it) | |
1618 | it->zero(); | |
1619 | } | |
1620 | ||
1621 | void buffer::list::zero(unsigned o, unsigned l) | |
1622 | { | |
1623 | assert(o+l <= _len); | |
1624 | unsigned p = 0; | |
1625 | for (std::list<ptr>::iterator it = _buffers.begin(); | |
1626 | it != _buffers.end(); | |
1627 | ++it) { | |
1628 | if (p + it->length() > o) { | |
1629 | if (p >= o && p+it->length() <= o+l) { | |
1630 | // 'o'------------- l -----------| | |
1631 | // 'p'-- it->length() --| | |
1632 | it->zero(); | |
1633 | } else if (p >= o) { | |
1634 | // 'o'------------- l -----------| | |
1635 | // 'p'------- it->length() -------| | |
1636 | it->zero(0, o+l-p); | |
1637 | } else if (p + it->length() <= o+l) { | |
1638 | // 'o'------------- l -----------| | |
1639 | // 'p'------- it->length() -------| | |
1640 | it->zero(o-p, it->length()-(o-p)); | |
1641 | } else { | |
1642 | // 'o'----------- l -----------| | |
1643 | // 'p'---------- it->length() ----------| | |
1644 | it->zero(o-p, l); | |
1645 | } | |
1646 | } | |
1647 | p += it->length(); | |
1648 | if (o+l <= p) | |
1649 | break; // done | |
1650 | } | |
1651 | } | |
1652 | ||
1653 | bool buffer::list::is_contiguous() const | |
1654 | { | |
1655 | return &(*_buffers.begin()) == &(*_buffers.rbegin()); | |
1656 | } | |
1657 | ||
1658 | bool buffer::list::is_n_page_sized() const | |
1659 | { | |
1660 | return is_n_align_sized(CEPH_PAGE_SIZE); | |
1661 | } | |
1662 | ||
1663 | bool buffer::list::is_page_aligned() const | |
1664 | { | |
1665 | return is_aligned(CEPH_PAGE_SIZE); | |
1666 | } | |
1667 | ||
31f18b77 FG |
1668 | void buffer::list::reassign_to_mempool(int pool) |
1669 | { | |
1670 | _mempool = pool; | |
1671 | if (append_buffer.get_raw()) { | |
1672 | append_buffer.get_raw()->reassign_to_mempool(pool); | |
1673 | } | |
1674 | for (auto& p : _buffers) { | |
1675 | p.get_raw()->reassign_to_mempool(pool); | |
1676 | } | |
1677 | } | |
1678 | ||
1679 | void buffer::list::try_assign_to_mempool(int pool) | |
1680 | { | |
1681 | _mempool = pool; | |
1682 | if (append_buffer.get_raw()) { | |
1683 | append_buffer.get_raw()->try_assign_to_mempool(pool); | |
1684 | } | |
1685 | for (auto& p : _buffers) { | |
1686 | p.get_raw()->try_assign_to_mempool(pool); | |
1687 | } | |
1688 | } | |
1689 | ||
7c673cae FG |
1690 | void buffer::list::rebuild() |
1691 | { | |
1692 | if (_len == 0) { | |
1693 | _buffers.clear(); | |
1694 | return; | |
1695 | } | |
1696 | ptr nb; | |
1697 | if ((_len & ~CEPH_PAGE_MASK) == 0) | |
1698 | nb = buffer::create_page_aligned(_len); | |
1699 | else | |
1700 | nb = buffer::create(_len); | |
1701 | rebuild(nb); | |
1702 | } | |
1703 | ||
1704 | void buffer::list::rebuild(ptr& nb) | |
1705 | { | |
1706 | unsigned pos = 0; | |
1707 | for (std::list<ptr>::iterator it = _buffers.begin(); | |
1708 | it != _buffers.end(); | |
1709 | ++it) { | |
1710 | nb.copy_in(pos, it->length(), it->c_str(), false); | |
1711 | pos += it->length(); | |
1712 | } | |
1713 | _memcopy_count += pos; | |
1714 | _buffers.clear(); | |
1715 | if (nb.length()) | |
1716 | _buffers.push_back(nb); | |
1717 | invalidate_crc(); | |
1718 | last_p = begin(); | |
1719 | } | |
1720 | ||
1721 | bool buffer::list::rebuild_aligned(unsigned align) | |
1722 | { | |
1723 | return rebuild_aligned_size_and_memory(align, align); | |
1724 | } | |
1725 | ||
1726 | bool buffer::list::rebuild_aligned_size_and_memory(unsigned align_size, | |
1727 | unsigned align_memory) | |
1728 | { | |
1729 | unsigned old_memcopy_count = _memcopy_count; | |
1730 | std::list<ptr>::iterator p = _buffers.begin(); | |
1731 | while (p != _buffers.end()) { | |
1732 | // keep anything that's already align and sized aligned | |
1733 | if (p->is_aligned(align_memory) && p->is_n_align_sized(align_size)) { | |
1734 | /*cout << " segment " << (void*)p->c_str() | |
1735 | << " offset " << ((unsigned long)p->c_str() & (align - 1)) | |
1736 | << " length " << p->length() | |
1737 | << " " << (p->length() & (align - 1)) << " ok" << std::endl; | |
1738 | */ | |
1739 | ++p; | |
1740 | continue; | |
1741 | } | |
1742 | ||
1743 | // consolidate unaligned items, until we get something that is sized+aligned | |
1744 | list unaligned; | |
1745 | unsigned offset = 0; | |
1746 | do { | |
1747 | /*cout << " segment " << (void*)p->c_str() | |
1748 | << " offset " << ((unsigned long)p->c_str() & (align - 1)) | |
1749 | << " length " << p->length() << " " << (p->length() & (align - 1)) | |
1750 | << " overall offset " << offset << " " << (offset & (align - 1)) | |
1751 | << " not ok" << std::endl; | |
1752 | */ | |
1753 | offset += p->length(); | |
1754 | unaligned.push_back(*p); | |
1755 | _buffers.erase(p++); | |
1756 | } while (p != _buffers.end() && | |
1757 | (!p->is_aligned(align_memory) || | |
1758 | !p->is_n_align_sized(align_size) || | |
1759 | (offset % align_size))); | |
1760 | if (!(unaligned.is_contiguous() && unaligned._buffers.front().is_aligned(align_memory))) { | |
1761 | ptr nb(buffer::create_aligned(unaligned._len, align_memory)); | |
1762 | unaligned.rebuild(nb); | |
1763 | _memcopy_count += unaligned._len; | |
1764 | } | |
1765 | _buffers.insert(p, unaligned._buffers.front()); | |
1766 | } | |
1767 | last_p = begin(); | |
1768 | ||
1769 | return (old_memcopy_count != _memcopy_count); | |
1770 | } | |
1771 | ||
1772 | bool buffer::list::rebuild_page_aligned() | |
1773 | { | |
1774 | return rebuild_aligned(CEPH_PAGE_SIZE); | |
1775 | } | |
1776 | ||
31f18b77 FG |
1777 | void buffer::list::reserve(size_t prealloc) |
1778 | { | |
1779 | if (append_buffer.unused_tail_length() < prealloc) { | |
1780 | append_buffer = buffer::create(prealloc); | |
1781 | if (_mempool >= 0) { | |
1782 | append_buffer.get_raw()->reassign_to_mempool(_mempool); | |
1783 | } | |
1784 | append_buffer.set_length(0); // unused, so far. | |
1785 | } | |
1786 | } | |
1787 | ||
7c673cae FG |
1788 | // sort-of-like-assignment-op |
1789 | void buffer::list::claim(list& bl, unsigned int flags) | |
1790 | { | |
1791 | // free my buffers | |
1792 | clear(); | |
1793 | claim_append(bl, flags); | |
1794 | } | |
1795 | ||
1796 | void buffer::list::claim_append(list& bl, unsigned int flags) | |
1797 | { | |
1798 | // steal the other guy's buffers | |
1799 | _len += bl._len; | |
1800 | if (!(flags & CLAIM_ALLOW_NONSHAREABLE)) | |
1801 | bl.make_shareable(); | |
1802 | _buffers.splice(_buffers.end(), bl._buffers ); | |
1803 | bl._len = 0; | |
1804 | bl.last_p = bl.begin(); | |
1805 | } | |
1806 | ||
1807 | void buffer::list::claim_prepend(list& bl, unsigned int flags) | |
1808 | { | |
1809 | // steal the other guy's buffers | |
1810 | _len += bl._len; | |
1811 | if (!(flags & CLAIM_ALLOW_NONSHAREABLE)) | |
1812 | bl.make_shareable(); | |
1813 | _buffers.splice(_buffers.begin(), bl._buffers ); | |
1814 | bl._len = 0; | |
1815 | bl.last_p = bl.begin(); | |
1816 | } | |
1817 | ||
31f18b77 FG |
1818 | void buffer::list::claim_append_piecewise(list& bl) |
1819 | { | |
1820 | // steal the other guy's buffers | |
1821 | for (std::list<buffer::ptr>::const_iterator i = bl.buffers().begin(); | |
1822 | i != bl.buffers().end(); i++) { | |
1823 | append(*i, 0, i->length()); | |
1824 | } | |
1825 | bl.clear(); | |
1826 | } | |
1827 | ||
7c673cae FG |
1828 | void buffer::list::copy(unsigned off, unsigned len, char *dest) const |
1829 | { | |
1830 | if (off + len > length()) | |
1831 | throw end_of_buffer(); | |
1832 | if (last_p.get_off() != off) | |
1833 | last_p.seek(off); | |
1834 | last_p.copy(len, dest); | |
1835 | } | |
1836 | ||
1837 | void buffer::list::copy(unsigned off, unsigned len, list &dest) const | |
1838 | { | |
1839 | if (off + len > length()) | |
1840 | throw end_of_buffer(); | |
1841 | if (last_p.get_off() != off) | |
1842 | last_p.seek(off); | |
1843 | last_p.copy(len, dest); | |
1844 | } | |
1845 | ||
1846 | void buffer::list::copy(unsigned off, unsigned len, std::string& dest) const | |
1847 | { | |
1848 | if (last_p.get_off() != off) | |
1849 | last_p.seek(off); | |
1850 | return last_p.copy(len, dest); | |
1851 | } | |
1852 | ||
1853 | void buffer::list::copy_in(unsigned off, unsigned len, const char *src) | |
1854 | { | |
1855 | copy_in(off, len, src, true); | |
1856 | } | |
1857 | ||
1858 | void buffer::list::copy_in(unsigned off, unsigned len, const char *src, bool crc_reset) | |
1859 | { | |
1860 | if (off + len > length()) | |
1861 | throw end_of_buffer(); | |
1862 | ||
1863 | if (last_p.get_off() != off) | |
1864 | last_p.seek(off); | |
1865 | last_p.copy_in(len, src, crc_reset); | |
1866 | } | |
1867 | ||
1868 | void buffer::list::copy_in(unsigned off, unsigned len, const list& src) | |
1869 | { | |
1870 | if (last_p.get_off() != off) | |
1871 | last_p.seek(off); | |
1872 | last_p.copy_in(len, src); | |
1873 | } | |
1874 | ||
1875 | void buffer::list::append(char c) | |
1876 | { | |
1877 | // put what we can into the existing append_buffer. | |
1878 | unsigned gap = append_buffer.unused_tail_length(); | |
1879 | if (!gap) { | |
1880 | // make a new append_buffer! | |
1881 | append_buffer = raw_combined::create(CEPH_BUFFER_APPEND_SIZE); | |
1882 | append_buffer.set_length(0); // unused, so far. | |
31f18b77 FG |
1883 | if (_mempool >= 0) { |
1884 | append_buffer.get_raw()->reassign_to_mempool(_mempool); | |
1885 | } | |
7c673cae FG |
1886 | } |
1887 | append(append_buffer, append_buffer.append(c) - 1, 1); // add segment to the list | |
1888 | } | |
1889 | ||
1890 | void buffer::list::append(const char *data, unsigned len) | |
1891 | { | |
1892 | while (len > 0) { | |
1893 | // put what we can into the existing append_buffer. | |
1894 | unsigned gap = append_buffer.unused_tail_length(); | |
1895 | if (gap > 0) { | |
1896 | if (gap > len) gap = len; | |
1897 | //cout << "append first char is " << data[0] << ", last char is " << data[len-1] << std::endl; | |
1898 | append_buffer.append(data, gap); | |
1899 | append(append_buffer, append_buffer.length() - gap, gap); // add segment to the list | |
1900 | len -= gap; | |
1901 | data += gap; | |
1902 | } | |
1903 | if (len == 0) | |
1904 | break; // done! | |
1905 | ||
1906 | // make a new append_buffer. fill out a complete page, factoring in the | |
1907 | // raw_combined overhead. | |
1908 | size_t need = ROUND_UP_TO(len, sizeof(size_t)) + sizeof(raw_combined); | |
1909 | size_t alen = ROUND_UP_TO(need, CEPH_BUFFER_ALLOC_UNIT) - | |
1910 | sizeof(raw_combined); | |
1911 | append_buffer = raw_combined::create(alen); | |
1912 | append_buffer.set_length(0); // unused, so far. | |
31f18b77 FG |
1913 | if (_mempool >= 0) { |
1914 | append_buffer.get_raw()->reassign_to_mempool(_mempool); | |
1915 | } | |
7c673cae FG |
1916 | } |
1917 | } | |
1918 | ||
1919 | void buffer::list::append(const ptr& bp) | |
1920 | { | |
1921 | if (bp.length()) | |
1922 | push_back(bp); | |
1923 | } | |
1924 | ||
1925 | void buffer::list::append(ptr&& bp) | |
1926 | { | |
1927 | if (bp.length()) | |
1928 | push_back(std::move(bp)); | |
1929 | } | |
1930 | ||
1931 | void buffer::list::append(const ptr& bp, unsigned off, unsigned len) | |
1932 | { | |
1933 | assert(len+off <= bp.length()); | |
1934 | if (!_buffers.empty()) { | |
1935 | ptr &l = _buffers.back(); | |
1936 | if (l.get_raw() == bp.get_raw() && | |
1937 | l.end() == bp.start() + off) { | |
1938 | // yay contiguous with tail bp! | |
1939 | l.set_length(l.length()+len); | |
1940 | _len += len; | |
1941 | return; | |
1942 | } | |
1943 | } | |
1944 | // add new item to list | |
1945 | push_back(ptr(bp, off, len)); | |
1946 | } | |
1947 | ||
1948 | void buffer::list::append(const list& bl) | |
1949 | { | |
1950 | _len += bl._len; | |
1951 | for (std::list<ptr>::const_iterator p = bl._buffers.begin(); | |
1952 | p != bl._buffers.end(); | |
1953 | ++p) | |
1954 | _buffers.push_back(*p); | |
1955 | } | |
1956 | ||
1957 | void buffer::list::append(std::istream& in) | |
1958 | { | |
1959 | while (!in.eof()) { | |
1960 | std::string s; | |
1961 | getline(in, s); | |
1962 | append(s.c_str(), s.length()); | |
1963 | if (s.length()) | |
1964 | append("\n", 1); | |
1965 | } | |
1966 | } | |
1967 | ||
1968 | void buffer::list::prepend_zero(unsigned len) | |
1969 | { | |
1970 | ptr bp(len); | |
1971 | bp.zero(false); | |
1972 | _len += len; | |
1973 | _buffers.emplace_front(std::move(bp)); | |
1974 | } | |
1975 | ||
1976 | void buffer::list::append_zero(unsigned len) | |
1977 | { | |
1978 | ptr bp(len); | |
1979 | bp.zero(false); | |
1980 | append(std::move(bp)); | |
1981 | } | |
1982 | ||
1983 | ||
1984 | /* | |
1985 | * get a char | |
1986 | */ | |
1987 | const char& buffer::list::operator[](unsigned n) const | |
1988 | { | |
1989 | if (n >= _len) | |
1990 | throw end_of_buffer(); | |
1991 | ||
1992 | for (std::list<ptr>::const_iterator p = _buffers.begin(); | |
1993 | p != _buffers.end(); | |
1994 | ++p) { | |
1995 | if (n >= p->length()) { | |
1996 | n -= p->length(); | |
1997 | continue; | |
1998 | } | |
1999 | return (*p)[n]; | |
2000 | } | |
2001 | ceph_abort(); | |
2002 | } | |
2003 | ||
2004 | /* | |
2005 | * return a contiguous ptr to whole bufferlist contents. | |
2006 | */ | |
2007 | char *buffer::list::c_str() | |
2008 | { | |
2009 | if (_buffers.empty()) | |
2010 | return 0; // no buffers | |
2011 | ||
2012 | std::list<ptr>::const_iterator iter = _buffers.begin(); | |
2013 | ++iter; | |
2014 | ||
2015 | if (iter != _buffers.end()) | |
2016 | rebuild(); | |
2017 | return _buffers.front().c_str(); // good, we're already contiguous. | |
2018 | } | |
2019 | ||
2020 | string buffer::list::to_str() const { | |
2021 | string s; | |
2022 | s.reserve(length()); | |
2023 | for (std::list<ptr>::const_iterator p = _buffers.begin(); | |
2024 | p != _buffers.end(); | |
2025 | ++p) { | |
2026 | if (p->length()) { | |
2027 | s.append(p->c_str(), p->length()); | |
2028 | } | |
2029 | } | |
2030 | return s; | |
2031 | } | |
2032 | ||
2033 | char *buffer::list::get_contiguous(unsigned orig_off, unsigned len) | |
2034 | { | |
2035 | if (orig_off + len > length()) | |
2036 | throw end_of_buffer(); | |
2037 | ||
2038 | if (len == 0) { | |
2039 | return 0; | |
2040 | } | |
2041 | ||
2042 | unsigned off = orig_off; | |
2043 | std::list<ptr>::iterator curbuf = _buffers.begin(); | |
2044 | while (off > 0 && off >= curbuf->length()) { | |
2045 | off -= curbuf->length(); | |
2046 | ++curbuf; | |
2047 | } | |
2048 | ||
2049 | if (off + len > curbuf->length()) { | |
2050 | bufferlist tmp; | |
2051 | unsigned l = off + len; | |
2052 | ||
2053 | do { | |
2054 | if (l >= curbuf->length()) | |
2055 | l -= curbuf->length(); | |
2056 | else | |
2057 | l = 0; | |
2058 | tmp.append(*curbuf); | |
2059 | curbuf = _buffers.erase(curbuf); | |
2060 | ||
2061 | } while (curbuf != _buffers.end() && l > 0); | |
2062 | ||
2063 | assert(l == 0); | |
2064 | ||
2065 | tmp.rebuild(); | |
2066 | _buffers.insert(curbuf, tmp._buffers.front()); | |
2067 | return tmp.c_str() + off; | |
2068 | } | |
2069 | ||
2070 | last_p = begin(); // we modified _buffers | |
2071 | ||
2072 | return curbuf->c_str() + off; | |
2073 | } | |
2074 | ||
2075 | void buffer::list::substr_of(const list& other, unsigned off, unsigned len) | |
2076 | { | |
2077 | if (off + len > other.length()) | |
2078 | throw end_of_buffer(); | |
2079 | ||
2080 | clear(); | |
2081 | ||
2082 | // skip off | |
2083 | std::list<ptr>::const_iterator curbuf = other._buffers.begin(); | |
2084 | while (off > 0 && | |
2085 | off >= curbuf->length()) { | |
2086 | // skip this buffer | |
2087 | //cout << "skipping over " << *curbuf << std::endl; | |
2088 | off -= (*curbuf).length(); | |
2089 | ++curbuf; | |
2090 | } | |
2091 | assert(len == 0 || curbuf != other._buffers.end()); | |
2092 | ||
2093 | while (len > 0) { | |
2094 | // partial? | |
2095 | if (off + len < curbuf->length()) { | |
2096 | //cout << "copying partial of " << *curbuf << std::endl; | |
2097 | _buffers.push_back( ptr( *curbuf, off, len ) ); | |
2098 | _len += len; | |
2099 | break; | |
2100 | } | |
2101 | ||
2102 | // through end | |
2103 | //cout << "copying end (all?) of " << *curbuf << std::endl; | |
2104 | unsigned howmuch = curbuf->length() - off; | |
2105 | _buffers.push_back( ptr( *curbuf, off, howmuch ) ); | |
2106 | _len += howmuch; | |
2107 | len -= howmuch; | |
2108 | off = 0; | |
2109 | ++curbuf; | |
2110 | } | |
2111 | } | |
2112 | ||
2113 | // funky modifer | |
2114 | void buffer::list::splice(unsigned off, unsigned len, list *claim_by /*, bufferlist& replace_with */) | |
2115 | { // fixme? | |
2116 | if (len == 0) | |
2117 | return; | |
2118 | ||
2119 | if (off >= length()) | |
2120 | throw end_of_buffer(); | |
2121 | ||
2122 | assert(len > 0); | |
2123 | //cout << "splice off " << off << " len " << len << " ... mylen = " << length() << std::endl; | |
2124 | ||
2125 | // skip off | |
2126 | std::list<ptr>::iterator curbuf = _buffers.begin(); | |
2127 | while (off > 0) { | |
2128 | assert(curbuf != _buffers.end()); | |
2129 | if (off >= (*curbuf).length()) { | |
2130 | // skip this buffer | |
2131 | //cout << "off = " << off << " skipping over " << *curbuf << std::endl; | |
2132 | off -= (*curbuf).length(); | |
2133 | ++curbuf; | |
2134 | } else { | |
2135 | // somewhere in this buffer! | |
2136 | //cout << "off = " << off << " somewhere in " << *curbuf << std::endl; | |
2137 | break; | |
2138 | } | |
2139 | } | |
2140 | ||
2141 | if (off) { | |
2142 | // add a reference to the front bit | |
2143 | // insert it before curbuf (which we'll hose) | |
2144 | //cout << "keeping front " << off << " of " << *curbuf << std::endl; | |
2145 | _buffers.insert( curbuf, ptr( *curbuf, 0, off ) ); | |
2146 | _len += off; | |
2147 | } | |
2148 | ||
2149 | while (len > 0) { | |
2150 | // partial? | |
2151 | if (off + len < (*curbuf).length()) { | |
2152 | //cout << "keeping end of " << *curbuf << ", losing first " << off+len << std::endl; | |
2153 | if (claim_by) | |
2154 | claim_by->append( *curbuf, off, len ); | |
2155 | (*curbuf).set_offset( off+len + (*curbuf).offset() ); // ignore beginning big | |
2156 | (*curbuf).set_length( (*curbuf).length() - (len+off) ); | |
2157 | _len -= off+len; | |
2158 | //cout << " now " << *curbuf << std::endl; | |
2159 | break; | |
2160 | } | |
2161 | ||
2162 | // hose though the end | |
2163 | unsigned howmuch = (*curbuf).length() - off; | |
2164 | //cout << "discarding " << howmuch << " of " << *curbuf << std::endl; | |
2165 | if (claim_by) | |
2166 | claim_by->append( *curbuf, off, howmuch ); | |
2167 | _len -= (*curbuf).length(); | |
2168 | _buffers.erase( curbuf++ ); | |
2169 | len -= howmuch; | |
2170 | off = 0; | |
2171 | } | |
2172 | ||
2173 | // splice in *replace (implement me later?) | |
2174 | ||
2175 | last_p = begin(); // just in case we were in the removed region. | |
2176 | } | |
2177 | ||
2178 | void buffer::list::write(int off, int len, std::ostream& out) const | |
2179 | { | |
2180 | list s; | |
2181 | s.substr_of(*this, off, len); | |
2182 | for (std::list<ptr>::const_iterator it = s._buffers.begin(); | |
2183 | it != s._buffers.end(); | |
2184 | ++it) | |
2185 | if (it->length()) | |
2186 | out.write(it->c_str(), it->length()); | |
2187 | /*iterator p(this, off); | |
2188 | while (len > 0 && !p.end()) { | |
2189 | int l = p.left_in_this_buf(); | |
2190 | if (l > len) | |
2191 | l = len; | |
2192 | out.write(p.c_str(), l); | |
2193 | len -= l; | |
2194 | }*/ | |
2195 | } | |
2196 | ||
2197 | void buffer::list::encode_base64(buffer::list& o) | |
2198 | { | |
2199 | bufferptr bp(length() * 4 / 3 + 3); | |
2200 | int l = ceph_armor(bp.c_str(), bp.c_str() + bp.length(), c_str(), c_str() + length()); | |
2201 | bp.set_length(l); | |
2202 | o.push_back(std::move(bp)); | |
2203 | } | |
2204 | ||
2205 | void buffer::list::decode_base64(buffer::list& e) | |
2206 | { | |
2207 | bufferptr bp(4 + ((e.length() * 3) / 4)); | |
2208 | int l = ceph_unarmor(bp.c_str(), bp.c_str() + bp.length(), e.c_str(), e.c_str() + e.length()); | |
2209 | if (l < 0) { | |
2210 | std::ostringstream oss; | |
2211 | oss << "decode_base64: decoding failed:\n"; | |
2212 | hexdump(oss); | |
2213 | throw buffer::malformed_input(oss.str().c_str()); | |
2214 | } | |
2215 | assert(l <= (int)bp.length()); | |
2216 | bp.set_length(l); | |
2217 | push_back(std::move(bp)); | |
2218 | } | |
2219 | ||
2220 | ||
2221 | ||
2222 | int buffer::list::read_file(const char *fn, std::string *error) | |
2223 | { | |
2224 | int fd = TEMP_FAILURE_RETRY(::open(fn, O_RDONLY)); | |
2225 | if (fd < 0) { | |
2226 | int err = errno; | |
2227 | std::ostringstream oss; | |
2228 | oss << "can't open " << fn << ": " << cpp_strerror(err); | |
2229 | *error = oss.str(); | |
2230 | return -err; | |
2231 | } | |
2232 | ||
2233 | struct stat st; | |
2234 | memset(&st, 0, sizeof(st)); | |
2235 | if (::fstat(fd, &st) < 0) { | |
2236 | int err = errno; | |
2237 | std::ostringstream oss; | |
2238 | oss << "bufferlist::read_file(" << fn << "): stat error: " | |
2239 | << cpp_strerror(err); | |
2240 | *error = oss.str(); | |
2241 | VOID_TEMP_FAILURE_RETRY(::close(fd)); | |
2242 | return -err; | |
2243 | } | |
2244 | ||
2245 | ssize_t ret = read_fd(fd, st.st_size); | |
2246 | if (ret < 0) { | |
2247 | std::ostringstream oss; | |
2248 | oss << "bufferlist::read_file(" << fn << "): read error:" | |
2249 | << cpp_strerror(ret); | |
2250 | *error = oss.str(); | |
2251 | VOID_TEMP_FAILURE_RETRY(::close(fd)); | |
2252 | return ret; | |
2253 | } | |
2254 | else if (ret != st.st_size) { | |
2255 | // Premature EOF. | |
2256 | // Perhaps the file changed between stat() and read()? | |
2257 | std::ostringstream oss; | |
2258 | oss << "bufferlist::read_file(" << fn << "): warning: got premature EOF."; | |
2259 | *error = oss.str(); | |
2260 | // not actually an error, but weird | |
2261 | } | |
2262 | VOID_TEMP_FAILURE_RETRY(::close(fd)); | |
2263 | return 0; | |
2264 | } | |
2265 | ||
2266 | ssize_t buffer::list::read_fd(int fd, size_t len) | |
2267 | { | |
2268 | // try zero copy first | |
2269 | if (false && read_fd_zero_copy(fd, len) == 0) { | |
2270 | // TODO fix callers to not require correct read size, which is not | |
2271 | // available for raw_pipe until we actually inspect the data | |
2272 | return 0; | |
2273 | } | |
2274 | bufferptr bp = buffer::create(len); | |
2275 | ssize_t ret = safe_read(fd, (void*)bp.c_str(), len); | |
2276 | if (ret >= 0) { | |
2277 | bp.set_length(ret); | |
2278 | append(std::move(bp)); | |
2279 | } | |
2280 | return ret; | |
2281 | } | |
2282 | ||
2283 | int buffer::list::read_fd_zero_copy(int fd, size_t len) | |
2284 | { | |
2285 | #ifdef CEPH_HAVE_SPLICE | |
2286 | try { | |
2287 | append(buffer::create_zero_copy(len, fd, NULL)); | |
2288 | } catch (buffer::error_code &e) { | |
2289 | return e.code; | |
2290 | } catch (buffer::malformed_input &e) { | |
2291 | return -EIO; | |
2292 | } | |
2293 | return 0; | |
2294 | #else | |
2295 | return -ENOTSUP; | |
2296 | #endif | |
2297 | } | |
2298 | ||
2299 | int buffer::list::write_file(const char *fn, int mode) | |
2300 | { | |
2301 | int fd = TEMP_FAILURE_RETRY(::open(fn, O_WRONLY|O_CREAT|O_TRUNC, mode)); | |
2302 | if (fd < 0) { | |
2303 | int err = errno; | |
2304 | cerr << "bufferlist::write_file(" << fn << "): failed to open file: " | |
2305 | << cpp_strerror(err) << std::endl; | |
2306 | return -err; | |
2307 | } | |
2308 | int ret = write_fd(fd); | |
2309 | if (ret) { | |
2310 | cerr << "bufferlist::write_fd(" << fn << "): write_fd error: " | |
2311 | << cpp_strerror(ret) << std::endl; | |
2312 | VOID_TEMP_FAILURE_RETRY(::close(fd)); | |
2313 | return ret; | |
2314 | } | |
2315 | if (TEMP_FAILURE_RETRY(::close(fd))) { | |
2316 | int err = errno; | |
2317 | cerr << "bufferlist::write_file(" << fn << "): close error: " | |
2318 | << cpp_strerror(err) << std::endl; | |
2319 | return -err; | |
2320 | } | |
2321 | return 0; | |
2322 | } | |
2323 | ||
2324 | static int do_writev(int fd, struct iovec *vec, uint64_t offset, unsigned veclen, unsigned bytes) | |
2325 | { | |
2326 | ssize_t r = 0; | |
2327 | while (bytes > 0) { | |
2328 | #ifdef HAVE_PWRITEV | |
2329 | r = ::pwritev(fd, vec, veclen, offset); | |
2330 | #else | |
2331 | r = ::lseek64(fd, offset, SEEK_SET); | |
2332 | if (r != offset) { | |
2333 | r = -errno; | |
2334 | return r; | |
2335 | } | |
2336 | r = ::writev(fd, vec, veclen); | |
2337 | #endif | |
2338 | if (r < 0) { | |
2339 | if (errno == EINTR) | |
2340 | continue; | |
2341 | return -errno; | |
2342 | } | |
2343 | ||
2344 | bytes -= r; | |
2345 | offset += r; | |
2346 | if (bytes == 0) break; | |
2347 | ||
2348 | while (r > 0) { | |
2349 | if (vec[0].iov_len <= (size_t)r) { | |
2350 | // drain this whole item | |
2351 | r -= vec[0].iov_len; | |
2352 | ++vec; | |
2353 | --veclen; | |
2354 | } else { | |
2355 | vec[0].iov_base = (char *)vec[0].iov_base + r; | |
2356 | vec[0].iov_len -= r; | |
2357 | break; | |
2358 | } | |
2359 | } | |
2360 | } | |
2361 | return 0; | |
2362 | } | |
2363 | ||
2364 | int buffer::list::write_fd(int fd) const | |
2365 | { | |
2366 | if (can_zero_copy()) | |
2367 | return write_fd_zero_copy(fd); | |
2368 | ||
2369 | // use writev! | |
2370 | iovec iov[IOV_MAX]; | |
2371 | int iovlen = 0; | |
2372 | ssize_t bytes = 0; | |
2373 | ||
2374 | std::list<ptr>::const_iterator p = _buffers.begin(); | |
2375 | while (p != _buffers.end()) { | |
2376 | if (p->length() > 0) { | |
2377 | iov[iovlen].iov_base = (void *)p->c_str(); | |
2378 | iov[iovlen].iov_len = p->length(); | |
2379 | bytes += p->length(); | |
2380 | iovlen++; | |
2381 | } | |
2382 | ++p; | |
2383 | ||
2384 | if (iovlen == IOV_MAX-1 || | |
2385 | p == _buffers.end()) { | |
2386 | iovec *start = iov; | |
2387 | int num = iovlen; | |
2388 | ssize_t wrote; | |
2389 | retry: | |
2390 | wrote = ::writev(fd, start, num); | |
2391 | if (wrote < 0) { | |
2392 | int err = errno; | |
2393 | if (err == EINTR) | |
2394 | goto retry; | |
2395 | return -err; | |
2396 | } | |
2397 | if (wrote < bytes) { | |
2398 | // partial write, recover! | |
2399 | while ((size_t)wrote >= start[0].iov_len) { | |
2400 | wrote -= start[0].iov_len; | |
2401 | bytes -= start[0].iov_len; | |
2402 | start++; | |
2403 | num--; | |
2404 | } | |
2405 | if (wrote > 0) { | |
2406 | start[0].iov_len -= wrote; | |
2407 | start[0].iov_base = (char *)start[0].iov_base + wrote; | |
2408 | bytes -= wrote; | |
2409 | } | |
2410 | goto retry; | |
2411 | } | |
2412 | iovlen = 0; | |
2413 | bytes = 0; | |
2414 | } | |
2415 | } | |
2416 | return 0; | |
2417 | } | |
2418 | ||
2419 | int buffer::list::write_fd(int fd, uint64_t offset) const | |
2420 | { | |
2421 | iovec iov[IOV_MAX]; | |
2422 | ||
2423 | std::list<ptr>::const_iterator p = _buffers.begin(); | |
2424 | uint64_t left_pbrs = _buffers.size(); | |
2425 | while (left_pbrs) { | |
2426 | ssize_t bytes = 0; | |
2427 | unsigned iovlen = 0; | |
2428 | uint64_t size = MIN(left_pbrs, IOV_MAX); | |
2429 | left_pbrs -= size; | |
2430 | while (size > 0) { | |
2431 | iov[iovlen].iov_base = (void *)p->c_str(); | |
2432 | iov[iovlen].iov_len = p->length(); | |
2433 | iovlen++; | |
2434 | bytes += p->length(); | |
2435 | ++p; | |
2436 | size--; | |
2437 | } | |
2438 | ||
2439 | int r = do_writev(fd, iov, offset, iovlen, bytes); | |
2440 | if (r < 0) | |
2441 | return r; | |
2442 | offset += bytes; | |
2443 | } | |
2444 | return 0; | |
2445 | } | |
2446 | ||
2447 | int buffer::list::write_fd_zero_copy(int fd) const | |
2448 | { | |
2449 | if (!can_zero_copy()) | |
2450 | return -ENOTSUP; | |
2451 | /* pass offset to each call to avoid races updating the fd seek | |
2452 | * position, since the I/O may be non-blocking | |
2453 | */ | |
2454 | int64_t offset = ::lseek(fd, 0, SEEK_CUR); | |
2455 | int64_t *off_p = &offset; | |
2456 | if (offset < 0 && errno != ESPIPE) | |
2457 | return -errno; | |
2458 | if (errno == ESPIPE) | |
2459 | off_p = NULL; | |
2460 | for (std::list<ptr>::const_iterator it = _buffers.begin(); | |
2461 | it != _buffers.end(); ++it) { | |
2462 | int r = it->zero_copy_to_fd(fd, off_p); | |
2463 | if (r < 0) | |
2464 | return r; | |
2465 | if (off_p) | |
2466 | offset += it->length(); | |
2467 | } | |
2468 | return 0; | |
2469 | } | |
2470 | ||
2471 | __u32 buffer::list::crc32c(__u32 crc) const | |
2472 | { | |
2473 | for (std::list<ptr>::const_iterator it = _buffers.begin(); | |
2474 | it != _buffers.end(); | |
2475 | ++it) { | |
2476 | if (it->length()) { | |
2477 | raw *r = it->get_raw(); | |
2478 | pair<size_t, size_t> ofs(it->offset(), it->offset() + it->length()); | |
2479 | pair<uint32_t, uint32_t> ccrc; | |
2480 | if (r->get_crc(ofs, &ccrc)) { | |
2481 | if (ccrc.first == crc) { | |
2482 | // got it already | |
2483 | crc = ccrc.second; | |
2484 | if (buffer_track_crc) | |
31f18b77 | 2485 | buffer_cached_crc++; |
7c673cae FG |
2486 | } else { |
2487 | /* If we have cached crc32c(buf, v) for initial value v, | |
2488 | * we can convert this to a different initial value v' by: | |
2489 | * crc32c(buf, v') = crc32c(buf, v) ^ adjustment | |
2490 | * where adjustment = crc32c(0*len(buf), v ^ v') | |
2491 | * | |
2492 | * http://crcutil.googlecode.com/files/crc-doc.1.0.pdf | |
2493 | * note, u for our crc32c implementation is 0 | |
2494 | */ | |
2495 | crc = ccrc.second ^ ceph_crc32c(ccrc.first ^ crc, NULL, it->length()); | |
2496 | if (buffer_track_crc) | |
31f18b77 | 2497 | buffer_cached_crc_adjusted++; |
7c673cae FG |
2498 | } |
2499 | } else { | |
2500 | if (buffer_track_crc) | |
31f18b77 | 2501 | buffer_missed_crc++; |
7c673cae FG |
2502 | uint32_t base = crc; |
2503 | crc = ceph_crc32c(crc, (unsigned char*)it->c_str(), it->length()); | |
2504 | r->set_crc(ofs, make_pair(base, crc)); | |
2505 | } | |
2506 | } | |
2507 | } | |
2508 | return crc; | |
2509 | } | |
2510 | ||
2511 | void buffer::list::invalidate_crc() | |
2512 | { | |
2513 | for (std::list<ptr>::const_iterator p = _buffers.begin(); p != _buffers.end(); ++p) { | |
2514 | raw *r = p->get_raw(); | |
2515 | if (r) { | |
2516 | r->invalidate_crc(); | |
2517 | } | |
2518 | } | |
2519 | } | |
2520 | ||
2521 | /** | |
2522 | * Binary write all contents to a C++ stream | |
2523 | */ | |
2524 | void buffer::list::write_stream(std::ostream &out) const | |
2525 | { | |
2526 | for (std::list<ptr>::const_iterator p = _buffers.begin(); p != _buffers.end(); ++p) { | |
2527 | if (p->length() > 0) { | |
2528 | out.write(p->c_str(), p->length()); | |
2529 | } | |
2530 | } | |
2531 | } | |
2532 | ||
2533 | ||
2534 | void buffer::list::hexdump(std::ostream &out, bool trailing_newline) const | |
2535 | { | |
2536 | if (!length()) | |
2537 | return; | |
2538 | ||
2539 | std::ios_base::fmtflags original_flags = out.flags(); | |
2540 | ||
2541 | // do our best to match the output of hexdump -C, for better | |
2542 | // diff'ing! | |
2543 | ||
2544 | out.setf(std::ios::right); | |
2545 | out.fill('0'); | |
2546 | ||
2547 | unsigned per = 16; | |
2548 | bool was_zeros = false, did_star = false; | |
2549 | for (unsigned o=0; o<length(); o += per) { | |
2550 | bool row_is_zeros = false; | |
2551 | if (o + per < length()) { | |
2552 | row_is_zeros = true; | |
2553 | for (unsigned i=0; i<per && o+i<length(); i++) { | |
2554 | if ((*this)[o+i]) { | |
2555 | row_is_zeros = false; | |
2556 | } | |
2557 | } | |
2558 | if (row_is_zeros) { | |
2559 | if (was_zeros) { | |
2560 | if (!did_star) { | |
2561 | out << "\n*"; | |
2562 | did_star = true; | |
2563 | } | |
2564 | continue; | |
2565 | } | |
2566 | was_zeros = true; | |
2567 | } else { | |
2568 | was_zeros = false; | |
2569 | did_star = false; | |
2570 | } | |
2571 | } | |
2572 | if (o) | |
2573 | out << "\n"; | |
2574 | out << std::hex << std::setw(8) << o << " "; | |
2575 | ||
2576 | unsigned i; | |
2577 | for (i=0; i<per && o+i<length(); i++) { | |
2578 | if (i == 8) | |
2579 | out << ' '; | |
2580 | out << " " << std::setw(2) << ((unsigned)(*this)[o+i] & 0xff); | |
2581 | } | |
2582 | for (; i<per; i++) { | |
2583 | if (i == 8) | |
2584 | out << ' '; | |
2585 | out << " "; | |
2586 | } | |
2587 | ||
2588 | out << " |"; | |
2589 | for (i=0; i<per && o+i<length(); i++) { | |
2590 | char c = (*this)[o+i]; | |
2591 | if (isupper(c) || islower(c) || isdigit(c) || c == ' ' || ispunct(c)) | |
2592 | out << c; | |
2593 | else | |
2594 | out << '.'; | |
2595 | } | |
2596 | out << '|' << std::dec; | |
2597 | } | |
2598 | if (trailing_newline) { | |
2599 | out << "\n" << std::hex << std::setw(8) << length(); | |
2600 | out << "\n"; | |
2601 | } | |
2602 | ||
2603 | out.flags(original_flags); | |
2604 | } | |
2605 | ||
31f18b77 FG |
2606 | |
2607 | buffer::list buffer::list::static_from_mem(char* c, size_t l) { | |
2608 | list bl; | |
2609 | bl.push_back(ptr(create_static(l, c))); | |
2610 | return bl; | |
2611 | } | |
2612 | ||
2613 | buffer::list buffer::list::static_from_cstring(char* c) { | |
2614 | return static_from_mem(c, std::strlen(c)); | |
2615 | } | |
2616 | ||
2617 | buffer::list buffer::list::static_from_string(string& s) { | |
2618 | // C++14 just has string::data return a char* from a non-const | |
2619 | // string. | |
2620 | return static_from_mem(const_cast<char*>(s.data()), s.length()); | |
2621 | // But the way buffer::list mostly doesn't work in a sane way with | |
2622 | // const makes me generally sad. | |
2623 | } | |
2624 | ||
7c673cae | 2625 | std::ostream& buffer::operator<<(std::ostream& out, const buffer::raw &r) { |
31f18b77 | 2626 | return out << "buffer::raw(" << (void*)r.data << " len " << r.len << " nref " << r.nref.load() << ")"; |
7c673cae FG |
2627 | } |
2628 | ||
2629 | std::ostream& buffer::operator<<(std::ostream& out, const buffer::ptr& bp) { | |
2630 | if (bp.have_raw()) | |
2631 | out << "buffer::ptr(" << bp.offset() << "~" << bp.length() | |
2632 | << " " << (void*)bp.c_str() | |
2633 | << " in raw " << (void*)bp.raw_c_str() | |
2634 | << " len " << bp.raw_length() | |
2635 | << " nref " << bp.raw_nref() << ")"; | |
2636 | else | |
2637 | out << "buffer:ptr(" << bp.offset() << "~" << bp.length() << " no raw)"; | |
2638 | return out; | |
2639 | } | |
2640 | ||
2641 | std::ostream& buffer::operator<<(std::ostream& out, const buffer::list& bl) { | |
2642 | out << "buffer::list(len=" << bl.length() << "," << std::endl; | |
2643 | ||
2644 | std::list<buffer::ptr>::const_iterator it = bl.buffers().begin(); | |
2645 | while (it != bl.buffers().end()) { | |
2646 | out << "\t" << *it; | |
2647 | if (++it == bl.buffers().end()) break; | |
2648 | out << "," << std::endl; | |
2649 | } | |
2650 | out << std::endl << ")"; | |
2651 | return out; | |
2652 | } | |
2653 | ||
2654 | std::ostream& buffer::operator<<(std::ostream& out, const buffer::error& e) | |
2655 | { | |
2656 | return out << e.what(); | |
2657 | } | |
2658 | ||
2659 | MEMPOOL_DEFINE_OBJECT_FACTORY(buffer::raw_malloc, buffer_raw_malloc, | |
2660 | buffer_meta); | |
2661 | MEMPOOL_DEFINE_OBJECT_FACTORY(buffer::raw_mmap_pages, buffer_raw_mmap_pagse, | |
2662 | buffer_meta); | |
2663 | MEMPOOL_DEFINE_OBJECT_FACTORY(buffer::raw_posix_aligned, | |
2664 | buffer_raw_posix_aligned, buffer_meta); | |
2665 | #ifdef CEPH_HAVE_SPLICE | |
2666 | MEMPOOL_DEFINE_OBJECT_FACTORY(buffer::raw_pipe, buffer_raw_pipe, buffer_meta); | |
2667 | #endif | |
2668 | MEMPOOL_DEFINE_OBJECT_FACTORY(buffer::raw_char, buffer_raw_char, buffer_meta); | |
31f18b77 FG |
2669 | MEMPOOL_DEFINE_OBJECT_FACTORY(buffer::raw_claimed_char, buffer_raw_claimed_char, |
2670 | buffer_meta); | |
7c673cae FG |
2671 | MEMPOOL_DEFINE_OBJECT_FACTORY(buffer::raw_unshareable, buffer_raw_unshareable, |
2672 | buffer_meta); | |
2673 | MEMPOOL_DEFINE_OBJECT_FACTORY(buffer::raw_static, buffer_raw_static, | |
2674 | buffer_meta); | |
2675 |