]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
2 | // vim: ts=8 sw=2 smarttab | |
3 | /* | |
4 | * Ceph - scalable distributed file system | |
5 | * | |
6 | * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net> | |
7 | * | |
8 | * This is free software; you can redistribute it and/or | |
9 | * modify it under the terms of the GNU Lesser General Public | |
10 | * License version 2.1, as published by the Free Software | |
11 | * Foundation. See file COPYING. | |
12 | * | |
13 | */ | |
14 | ||
31f18b77 FG |
15 | #include <atomic> |
16 | #include <errno.h> | |
17 | #include <limits.h> | |
18 | ||
19 | #include <sys/uio.h> | |
20 | ||
7c673cae FG |
21 | #include "include/compat.h" |
22 | #include "include/mempool.h" | |
23 | #include "armor.h" | |
24 | #include "common/environment.h" | |
25 | #include "common/errno.h" | |
26 | #include "common/safe_io.h" | |
27 | #include "common/simple_spin.h" | |
28 | #include "common/strtol.h" | |
29 | #include "common/likely.h" | |
30 | #include "common/valgrind.h" | |
31 | #include "common/deleter.h" | |
7c673cae FG |
32 | #include "common/RWLock.h" |
33 | #include "include/types.h" | |
7c673cae | 34 | #include "include/scope_guard.h" |
31f18b77 | 35 | |
7c673cae FG |
36 | #if defined(HAVE_XIO) |
37 | #include "msg/xio/XioMsg.h" | |
38 | #endif | |
39 | ||
31f18b77 | 40 | using namespace ceph; |
7c673cae FG |
41 | |
42 | #define CEPH_BUFFER_ALLOC_UNIT (MIN(CEPH_PAGE_SIZE, 4096)) | |
43 | #define CEPH_BUFFER_APPEND_SIZE (CEPH_BUFFER_ALLOC_UNIT - sizeof(raw_combined)) | |
44 | ||
45 | #ifdef BUFFER_DEBUG | |
46 | static std::atomic_flag buffer_debug_lock = ATOMIC_FLAG_INIT; | |
47 | # define bdout { simple_spin_lock(&buffer_debug_lock); std::cout | |
48 | # define bendl std::endl; simple_spin_unlock(&buffer_debug_lock); } | |
49 | #else | |
50 | # define bdout if (0) { std::cout | |
51 | # define bendl std::endl; } | |
52 | #endif | |
53 | ||
31f18b77 FG |
54 | static std::atomic<uint64_t> buffer_total_alloc { 0 }; |
55 | static std::atomic<uint64_t> buffer_history_alloc_bytes { 0 }; | |
56 | static std::atomic<uint64_t> buffer_history_alloc_num { 0 }; | |
57 | ||
7c673cae FG |
58 | const bool buffer_track_alloc = get_env_bool("CEPH_BUFFER_TRACK"); |
59 | ||
60 | namespace { | |
61 | void inc_total_alloc(unsigned len) { | |
62 | if (buffer_track_alloc) | |
31f18b77 | 63 | buffer_total_alloc += len; |
7c673cae FG |
64 | } |
65 | ||
66 | void dec_total_alloc(unsigned len) { | |
67 | if (buffer_track_alloc) | |
31f18b77 | 68 | buffer_total_alloc -= len; |
7c673cae FG |
69 | } |
70 | ||
71 | void inc_history_alloc(uint64_t len) { | |
72 | if (buffer_track_alloc) { | |
31f18b77 FG |
73 | buffer_history_alloc_bytes += len; |
74 | buffer_history_alloc_num++; | |
7c673cae FG |
75 | } |
76 | } | |
31f18b77 | 77 | } // namespace |
7c673cae FG |
78 | |
79 | int buffer::get_total_alloc() { | |
31f18b77 | 80 | return buffer_total_alloc; |
7c673cae FG |
81 | } |
82 | uint64_t buffer::get_history_alloc_bytes() { | |
31f18b77 | 83 | return buffer_history_alloc_bytes; |
7c673cae FG |
84 | } |
85 | uint64_t buffer::get_history_alloc_num() { | |
31f18b77 | 86 | return buffer_history_alloc_num; |
7c673cae FG |
87 | } |
88 | ||
31f18b77 FG |
89 | static std::atomic<unsigned> buffer_cached_crc { 0 }; |
90 | static std::atomic<unsigned> buffer_cached_crc_adjusted { 0 }; | |
91 | static std::atomic<unsigned> buffer_missed_crc { 0 }; | |
92 | ||
7c673cae FG |
93 | static bool buffer_track_crc = get_env_bool("CEPH_BUFFER_TRACK"); |
94 | ||
95 | void buffer::track_cached_crc(bool b) { | |
96 | buffer_track_crc = b; | |
97 | } | |
98 | int buffer::get_cached_crc() { | |
31f18b77 | 99 | return buffer_cached_crc; |
7c673cae FG |
100 | } |
101 | int buffer::get_cached_crc_adjusted() { | |
31f18b77 | 102 | return buffer_cached_crc_adjusted; |
7c673cae FG |
103 | } |
104 | ||
105 | int buffer::get_missed_crc() { | |
31f18b77 | 106 | return buffer_missed_crc; |
7c673cae FG |
107 | } |
108 | ||
31f18b77 FG |
109 | static std::atomic<unsigned> buffer_c_str_accesses { 0 }; |
110 | ||
7c673cae FG |
111 | static bool buffer_track_c_str = get_env_bool("CEPH_BUFFER_TRACK"); |
112 | ||
113 | void buffer::track_c_str(bool b) { | |
114 | buffer_track_c_str = b; | |
115 | } | |
116 | int buffer::get_c_str_accesses() { | |
31f18b77 | 117 | return buffer_c_str_accesses; |
7c673cae FG |
118 | } |
119 | ||
c07f9fc5 | 120 | #ifdef CEPH_HAVE_SETPIPE_SZ |
31f18b77 | 121 | static std::atomic<unsigned> buffer_max_pipe_size { 0 }; |
7c673cae | 122 | int update_max_pipe_size() { |
7c673cae FG |
123 | char buf[32]; |
124 | int r; | |
125 | std::string err; | |
126 | struct stat stat_result; | |
127 | if (::stat(PROCPREFIX "/proc/sys/fs/pipe-max-size", &stat_result) == -1) | |
128 | return -errno; | |
129 | r = safe_read_file(PROCPREFIX "/proc/sys/fs/", "pipe-max-size", | |
130 | buf, sizeof(buf) - 1); | |
131 | if (r < 0) | |
132 | return r; | |
133 | buf[r] = '\0'; | |
134 | size_t size = strict_strtol(buf, 10, &err); | |
135 | if (!err.empty()) | |
136 | return -EIO; | |
31f18b77 | 137 | buffer_max_pipe_size = size; |
7c673cae FG |
138 | return 0; |
139 | } | |
140 | ||
141 | size_t get_max_pipe_size() { | |
31f18b77 | 142 | size_t size = buffer_max_pipe_size; |
7c673cae FG |
143 | if (size) |
144 | return size; | |
145 | if (update_max_pipe_size() == 0) | |
31f18b77 | 146 | return buffer_max_pipe_size; |
7c673cae FG |
147 | // this is the max size hardcoded in linux before 2.6.35 |
148 | return 65536; | |
149 | } | |
c07f9fc5 FG |
150 | #else |
151 | size_t get_max_pipe_size() { return 65536; } | |
152 | #endif | |
153 | ||
7c673cae FG |
154 | |
155 | const char * buffer::error::what() const throw () { | |
156 | return "buffer::exception"; | |
157 | } | |
158 | const char * buffer::bad_alloc::what() const throw () { | |
159 | return "buffer::bad_alloc"; | |
160 | } | |
161 | const char * buffer::end_of_buffer::what() const throw () { | |
162 | return "buffer::end_of_buffer"; | |
163 | } | |
164 | const char * buffer::malformed_input::what() const throw () { | |
165 | return buf; | |
166 | } | |
167 | buffer::error_code::error_code(int error) : | |
168 | buffer::malformed_input(cpp_strerror(error).c_str()), code(error) {} | |
169 | ||
170 | class buffer::raw { | |
171 | public: | |
172 | char *data; | |
173 | unsigned len; | |
31f18b77 | 174 | std::atomic<unsigned> nref { 0 }; |
3efd9988 | 175 | int mempool; |
7c673cae FG |
176 | |
177 | mutable std::atomic_flag crc_spinlock = ATOMIC_FLAG_INIT; | |
178 | map<pair<size_t, size_t>, pair<uint32_t, uint32_t> > crc_map; | |
179 | ||
3efd9988 FG |
180 | explicit raw(unsigned l, int mempool=mempool::mempool_buffer_anon) |
181 | : data(NULL), len(l), nref(0), mempool(mempool) { | |
31f18b77 FG |
182 | mempool::get_pool(mempool::pool_index_t(mempool)).adjust_count(1, len); |
183 | } | |
3efd9988 FG |
184 | raw(char *c, unsigned l, int mempool=mempool::mempool_buffer_anon) |
185 | : data(c), len(l), nref(0), mempool(mempool) { | |
31f18b77 FG |
186 | mempool::get_pool(mempool::pool_index_t(mempool)).adjust_count(1, len); |
187 | } | |
188 | virtual ~raw() { | |
189 | mempool::get_pool(mempool::pool_index_t(mempool)).adjust_count( | |
190 | -1, -(int)len); | |
191 | } | |
192 | ||
193 | void _set_len(unsigned l) { | |
194 | mempool::get_pool(mempool::pool_index_t(mempool)).adjust_count( | |
195 | -1, -(int)len); | |
196 | len = l; | |
197 | mempool::get_pool(mempool::pool_index_t(mempool)).adjust_count(1, len); | |
198 | } | |
199 | ||
200 | void reassign_to_mempool(int pool) { | |
201 | if (pool == mempool) { | |
202 | return; | |
203 | } | |
204 | mempool::get_pool(mempool::pool_index_t(mempool)).adjust_count( | |
205 | -1, -(int)len); | |
206 | mempool = pool; | |
207 | mempool::get_pool(mempool::pool_index_t(pool)).adjust_count(1, len); | |
208 | } | |
209 | ||
210 | void try_assign_to_mempool(int pool) { | |
211 | if (mempool == mempool::mempool_buffer_anon) { | |
212 | reassign_to_mempool(pool); | |
213 | } | |
214 | } | |
7c673cae FG |
215 | |
216 | // no copying. | |
217 | // cppcheck-suppress noExplicitConstructor | |
218 | raw(const raw &other); | |
219 | const raw& operator=(const raw &other); | |
220 | ||
221 | virtual char *get_data() { | |
222 | return data; | |
223 | } | |
224 | virtual raw* clone_empty() = 0; | |
225 | raw *clone() { | |
226 | raw *c = clone_empty(); | |
227 | memcpy(c->data, data, len); | |
228 | return c; | |
229 | } | |
230 | virtual bool can_zero_copy() const { | |
231 | return false; | |
232 | } | |
233 | virtual int zero_copy_to_fd(int fd, loff_t *offset) { | |
234 | return -ENOTSUP; | |
235 | } | |
236 | virtual bool is_page_aligned() { | |
237 | return ((long)data & ~CEPH_PAGE_MASK) == 0; | |
238 | } | |
239 | bool is_n_page_sized() { | |
240 | return (len & ~CEPH_PAGE_MASK) == 0; | |
241 | } | |
242 | virtual bool is_shareable() { | |
243 | // true if safe to reference/share the existing buffer copy | |
244 | // false if it is not safe to share the buffer, e.g., due to special | |
245 | // and/or registered memory that is scarce | |
246 | return true; | |
247 | } | |
248 | bool get_crc(const pair<size_t, size_t> &fromto, | |
249 | pair<uint32_t, uint32_t> *crc) const { | |
250 | simple_spin_lock(&crc_spinlock); | |
251 | map<pair<size_t, size_t>, pair<uint32_t, uint32_t> >::const_iterator i = | |
252 | crc_map.find(fromto); | |
253 | if (i == crc_map.end()) { | |
254 | simple_spin_unlock(&crc_spinlock); | |
255 | return false; | |
256 | } | |
257 | *crc = i->second; | |
258 | simple_spin_unlock(&crc_spinlock); | |
259 | return true; | |
260 | } | |
261 | void set_crc(const pair<size_t, size_t> &fromto, | |
262 | const pair<uint32_t, uint32_t> &crc) { | |
263 | simple_spin_lock(&crc_spinlock); | |
264 | crc_map[fromto] = crc; | |
265 | simple_spin_unlock(&crc_spinlock); | |
266 | } | |
267 | void invalidate_crc() { | |
268 | simple_spin_lock(&crc_spinlock); | |
269 | if (crc_map.size() != 0) { | |
270 | crc_map.clear(); | |
271 | } | |
272 | simple_spin_unlock(&crc_spinlock); | |
273 | } | |
274 | }; | |
275 | ||
7c673cae FG |
276 | /* |
277 | * raw_combined is always placed within a single allocation along | |
278 | * with the data buffer. the data goes at the beginning, and | |
279 | * raw_combined at the end. | |
280 | */ | |
281 | class buffer::raw_combined : public buffer::raw { | |
282 | size_t alignment; | |
283 | public: | |
3efd9988 FG |
284 | raw_combined(char *dataptr, unsigned l, unsigned align, |
285 | int mempool) | |
286 | : raw(dataptr, l, mempool), | |
7c673cae FG |
287 | alignment(align) { |
288 | inc_total_alloc(len); | |
289 | inc_history_alloc(len); | |
290 | } | |
291 | ~raw_combined() override { | |
292 | dec_total_alloc(len); | |
293 | } | |
294 | raw* clone_empty() override { | |
295 | return create(len, alignment); | |
296 | } | |
297 | ||
3efd9988 FG |
298 | static raw_combined *create(unsigned len, |
299 | unsigned align, | |
300 | int mempool = mempool::mempool_buffer_anon) { | |
7c673cae FG |
301 | if (!align) |
302 | align = sizeof(size_t); | |
303 | size_t rawlen = ROUND_UP_TO(sizeof(buffer::raw_combined), | |
304 | alignof(buffer::raw_combined)); | |
305 | size_t datalen = ROUND_UP_TO(len, alignof(buffer::raw_combined)); | |
306 | ||
31f18b77 FG |
307 | #ifdef DARWIN |
308 | char *ptr = (char *) valloc(rawlen + datalen); | |
309 | #else | |
310 | char *ptr = 0; | |
311 | int r = ::posix_memalign((void**)(void*)&ptr, align, rawlen + datalen); | |
312 | if (r) | |
313 | throw bad_alloc(); | |
314 | #endif /* DARWIN */ | |
7c673cae FG |
315 | if (!ptr) |
316 | throw bad_alloc(); | |
317 | ||
318 | // actual data first, since it has presumably larger alignment restriction | |
319 | // then put the raw_combined at the end | |
3efd9988 | 320 | return new (ptr + datalen) raw_combined(ptr, len, align, mempool); |
7c673cae FG |
321 | } |
322 | ||
323 | static void operator delete(void *ptr) { | |
324 | raw_combined *raw = (raw_combined *)ptr; | |
31f18b77 | 325 | ::free((void *)raw->data); |
7c673cae FG |
326 | } |
327 | }; | |
328 | ||
329 | class buffer::raw_malloc : public buffer::raw { | |
330 | public: | |
331 | MEMPOOL_CLASS_HELPERS(); | |
332 | ||
333 | explicit raw_malloc(unsigned l) : raw(l) { | |
334 | if (len) { | |
335 | data = (char *)malloc(len); | |
336 | if (!data) | |
337 | throw bad_alloc(); | |
338 | } else { | |
339 | data = 0; | |
340 | } | |
341 | inc_total_alloc(len); | |
342 | inc_history_alloc(len); | |
343 | bdout << "raw_malloc " << this << " alloc " << (void *)data << " " << l << " " << buffer::get_total_alloc() << bendl; | |
344 | } | |
345 | raw_malloc(unsigned l, char *b) : raw(b, l) { | |
346 | inc_total_alloc(len); | |
347 | bdout << "raw_malloc " << this << " alloc " << (void *)data << " " << l << " " << buffer::get_total_alloc() << bendl; | |
348 | } | |
349 | ~raw_malloc() override { | |
350 | free(data); | |
351 | dec_total_alloc(len); | |
352 | bdout << "raw_malloc " << this << " free " << (void *)data << " " << buffer::get_total_alloc() << bendl; | |
353 | } | |
354 | raw* clone_empty() override { | |
355 | return new raw_malloc(len); | |
356 | } | |
357 | }; | |
358 | ||
359 | #ifndef __CYGWIN__ | |
360 | class buffer::raw_mmap_pages : public buffer::raw { | |
361 | public: | |
362 | MEMPOOL_CLASS_HELPERS(); | |
363 | ||
364 | explicit raw_mmap_pages(unsigned l) : raw(l) { | |
365 | data = (char*)::mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANON, -1, 0); | |
366 | if (!data) | |
367 | throw bad_alloc(); | |
368 | inc_total_alloc(len); | |
369 | inc_history_alloc(len); | |
370 | bdout << "raw_mmap " << this << " alloc " << (void *)data << " " << l << " " << buffer::get_total_alloc() << bendl; | |
371 | } | |
372 | ~raw_mmap_pages() override { | |
373 | ::munmap(data, len); | |
374 | dec_total_alloc(len); | |
375 | bdout << "raw_mmap " << this << " free " << (void *)data << " " << buffer::get_total_alloc() << bendl; | |
376 | } | |
377 | raw* clone_empty() override { | |
378 | return new raw_mmap_pages(len); | |
379 | } | |
380 | }; | |
381 | ||
382 | class buffer::raw_posix_aligned : public buffer::raw { | |
383 | unsigned align; | |
384 | public: | |
385 | MEMPOOL_CLASS_HELPERS(); | |
386 | ||
387 | raw_posix_aligned(unsigned l, unsigned _align) : raw(l) { | |
388 | align = _align; | |
389 | assert((align >= sizeof(void *)) && (align & (align - 1)) == 0); | |
31f18b77 FG |
390 | #ifdef DARWIN |
391 | data = (char *) valloc(len); | |
392 | #else | |
393 | int r = ::posix_memalign((void**)(void*)&data, align, len); | |
394 | if (r) | |
395 | throw bad_alloc(); | |
396 | #endif /* DARWIN */ | |
7c673cae FG |
397 | if (!data) |
398 | throw bad_alloc(); | |
399 | inc_total_alloc(len); | |
400 | inc_history_alloc(len); | |
401 | bdout << "raw_posix_aligned " << this << " alloc " << (void *)data << " l=" << l << ", align=" << align << " total_alloc=" << buffer::get_total_alloc() << bendl; | |
402 | } | |
403 | ~raw_posix_aligned() override { | |
31f18b77 | 404 | ::free(data); |
7c673cae FG |
405 | dec_total_alloc(len); |
406 | bdout << "raw_posix_aligned " << this << " free " << (void *)data << " " << buffer::get_total_alloc() << bendl; | |
407 | } | |
408 | raw* clone_empty() override { | |
409 | return new raw_posix_aligned(len, align); | |
410 | } | |
411 | }; | |
412 | #endif | |
413 | ||
414 | #ifdef __CYGWIN__ | |
415 | class buffer::raw_hack_aligned : public buffer::raw { | |
416 | unsigned align; | |
417 | char *realdata; | |
418 | public: | |
419 | raw_hack_aligned(unsigned l, unsigned _align) : raw(l) { | |
420 | align = _align; | |
421 | realdata = new char[len+align-1]; | |
422 | unsigned off = ((unsigned)realdata) & (align-1); | |
423 | if (off) | |
424 | data = realdata + align - off; | |
425 | else | |
426 | data = realdata; | |
427 | inc_total_alloc(len+align-1); | |
428 | inc_history_alloc(len+align-1); | |
429 | //cout << "hack aligned " << (unsigned)data | |
430 | //<< " in raw " << (unsigned)realdata | |
431 | //<< " off " << off << std::endl; | |
432 | assert(((unsigned)data & (align-1)) == 0); | |
433 | } | |
434 | ~raw_hack_aligned() { | |
435 | delete[] realdata; | |
436 | dec_total_alloc(len+align-1); | |
437 | } | |
438 | raw* clone_empty() { | |
439 | return new raw_hack_aligned(len, align); | |
440 | } | |
441 | }; | |
442 | #endif | |
443 | ||
444 | #ifdef CEPH_HAVE_SPLICE | |
445 | class buffer::raw_pipe : public buffer::raw { | |
446 | public: | |
447 | MEMPOOL_CLASS_HELPERS(); | |
448 | ||
449 | explicit raw_pipe(unsigned len) : raw(len), source_consumed(false) { | |
450 | size_t max = get_max_pipe_size(); | |
451 | if (len > max) { | |
452 | bdout << "raw_pipe: requested length " << len | |
453 | << " > max length " << max << bendl; | |
454 | throw malformed_input("length larger than max pipe size"); | |
455 | } | |
456 | pipefds[0] = -1; | |
457 | pipefds[1] = -1; | |
458 | ||
459 | int r; | |
460 | if (::pipe(pipefds) == -1) { | |
461 | r = -errno; | |
462 | bdout << "raw_pipe: error creating pipe: " << cpp_strerror(r) << bendl; | |
463 | throw error_code(r); | |
464 | } | |
465 | ||
466 | r = set_nonblocking(pipefds); | |
467 | if (r < 0) { | |
468 | bdout << "raw_pipe: error setting nonblocking flag on temp pipe: " | |
469 | << cpp_strerror(r) << bendl; | |
470 | throw error_code(r); | |
471 | } | |
472 | ||
473 | r = set_pipe_size(pipefds, len); | |
474 | if (r < 0) { | |
475 | bdout << "raw_pipe: could not set pipe size" << bendl; | |
476 | // continue, since the pipe should become large enough as needed | |
477 | } | |
478 | ||
479 | inc_total_alloc(len); | |
480 | inc_history_alloc(len); | |
481 | bdout << "raw_pipe " << this << " alloc " << len << " " | |
482 | << buffer::get_total_alloc() << bendl; | |
483 | } | |
484 | ||
485 | ~raw_pipe() override { | |
486 | if (data) | |
487 | free(data); | |
488 | close_pipe(pipefds); | |
489 | dec_total_alloc(len); | |
490 | bdout << "raw_pipe " << this << " free " << (void *)data << " " | |
491 | << buffer::get_total_alloc() << bendl; | |
492 | } | |
493 | ||
494 | bool can_zero_copy() const override { | |
495 | return true; | |
496 | } | |
497 | ||
498 | int set_source(int fd, loff_t *off) { | |
499 | int flags = SPLICE_F_NONBLOCK; | |
500 | ssize_t r = safe_splice(fd, off, pipefds[1], NULL, len, flags); | |
501 | if (r < 0) { | |
502 | bdout << "raw_pipe: error splicing into pipe: " << cpp_strerror(r) | |
503 | << bendl; | |
504 | return r; | |
505 | } | |
506 | // update length with actual amount read | |
31f18b77 | 507 | _set_len(r); |
7c673cae FG |
508 | return 0; |
509 | } | |
510 | ||
511 | int zero_copy_to_fd(int fd, loff_t *offset) override { | |
512 | assert(!source_consumed); | |
513 | int flags = SPLICE_F_NONBLOCK; | |
514 | ssize_t r = safe_splice_exact(pipefds[0], NULL, fd, offset, len, flags); | |
515 | if (r < 0) { | |
516 | bdout << "raw_pipe: error splicing from pipe to fd: " | |
517 | << cpp_strerror(r) << bendl; | |
518 | return r; | |
519 | } | |
520 | source_consumed = true; | |
521 | return 0; | |
522 | } | |
523 | ||
524 | buffer::raw* clone_empty() override { | |
525 | // cloning doesn't make sense for pipe-based buffers, | |
526 | // and is only used by unit tests for other types of buffers | |
527 | return NULL; | |
528 | } | |
529 | ||
530 | char *get_data() override { | |
531 | if (data) | |
532 | return data; | |
533 | return copy_pipe(pipefds); | |
534 | } | |
535 | ||
536 | private: | |
537 | int set_pipe_size(int *fds, long length) { | |
538 | #ifdef CEPH_HAVE_SETPIPE_SZ | |
539 | if (::fcntl(fds[1], F_SETPIPE_SZ, length) == -1) { | |
540 | int r = -errno; | |
541 | if (r == -EPERM) { | |
542 | // pipe limit must have changed - EPERM means we requested | |
543 | // more than the maximum size as an unprivileged user | |
544 | update_max_pipe_size(); | |
545 | throw malformed_input("length larger than new max pipe size"); | |
546 | } | |
547 | return r; | |
548 | } | |
549 | #endif | |
550 | return 0; | |
551 | } | |
552 | ||
553 | int set_nonblocking(int *fds) { | |
554 | if (::fcntl(fds[0], F_SETFL, O_NONBLOCK) == -1) | |
555 | return -errno; | |
556 | if (::fcntl(fds[1], F_SETFL, O_NONBLOCK) == -1) | |
557 | return -errno; | |
558 | return 0; | |
559 | } | |
560 | ||
561 | static void close_pipe(const int *fds) { | |
562 | if (fds[0] >= 0) | |
563 | VOID_TEMP_FAILURE_RETRY(::close(fds[0])); | |
564 | if (fds[1] >= 0) | |
565 | VOID_TEMP_FAILURE_RETRY(::close(fds[1])); | |
566 | } | |
567 | char *copy_pipe(int *fds) { | |
568 | /* preserve original pipe contents by copying into a temporary | |
569 | * pipe before reading. | |
570 | */ | |
571 | int tmpfd[2]; | |
572 | int r; | |
573 | ||
574 | assert(!source_consumed); | |
575 | assert(fds[0] >= 0); | |
576 | ||
577 | if (::pipe(tmpfd) == -1) { | |
578 | r = -errno; | |
579 | bdout << "raw_pipe: error creating temp pipe: " << cpp_strerror(r) | |
580 | << bendl; | |
581 | throw error_code(r); | |
582 | } | |
583 | auto sg = make_scope_guard([=] { close_pipe(tmpfd); }); | |
584 | r = set_nonblocking(tmpfd); | |
585 | if (r < 0) { | |
586 | bdout << "raw_pipe: error setting nonblocking flag on temp pipe: " | |
587 | << cpp_strerror(r) << bendl; | |
588 | throw error_code(r); | |
589 | } | |
590 | r = set_pipe_size(tmpfd, len); | |
591 | if (r < 0) { | |
592 | bdout << "raw_pipe: error setting pipe size on temp pipe: " | |
593 | << cpp_strerror(r) << bendl; | |
594 | } | |
595 | int flags = SPLICE_F_NONBLOCK; | |
596 | if (::tee(fds[0], tmpfd[1], len, flags) == -1) { | |
597 | r = errno; | |
598 | bdout << "raw_pipe: error tee'ing into temp pipe: " << cpp_strerror(r) | |
599 | << bendl; | |
600 | throw error_code(r); | |
601 | } | |
602 | data = (char *)malloc(len); | |
603 | if (!data) { | |
604 | throw bad_alloc(); | |
605 | } | |
606 | r = safe_read(tmpfd[0], data, len); | |
607 | if (r < (ssize_t)len) { | |
608 | bdout << "raw_pipe: error reading from temp pipe:" << cpp_strerror(r) | |
609 | << bendl; | |
610 | free(data); | |
611 | data = NULL; | |
612 | throw error_code(r); | |
613 | } | |
614 | return data; | |
615 | } | |
616 | bool source_consumed; | |
617 | int pipefds[2]; | |
618 | }; | |
619 | #endif // CEPH_HAVE_SPLICE | |
620 | ||
621 | /* | |
622 | * primitive buffer types | |
623 | */ | |
624 | class buffer::raw_char : public buffer::raw { | |
625 | public: | |
626 | MEMPOOL_CLASS_HELPERS(); | |
627 | ||
628 | explicit raw_char(unsigned l) : raw(l) { | |
629 | if (len) | |
31f18b77 | 630 | data = new char[len]; |
7c673cae FG |
631 | else |
632 | data = 0; | |
633 | inc_total_alloc(len); | |
634 | inc_history_alloc(len); | |
635 | bdout << "raw_char " << this << " alloc " << (void *)data << " " << l << " " << buffer::get_total_alloc() << bendl; | |
636 | } | |
637 | raw_char(unsigned l, char *b) : raw(b, l) { | |
638 | inc_total_alloc(len); | |
639 | bdout << "raw_char " << this << " alloc " << (void *)data << " " << l << " " << buffer::get_total_alloc() << bendl; | |
640 | } | |
641 | ~raw_char() override { | |
31f18b77 | 642 | delete[] data; |
7c673cae FG |
643 | dec_total_alloc(len); |
644 | bdout << "raw_char " << this << " free " << (void *)data << " " << buffer::get_total_alloc() << bendl; | |
645 | } | |
646 | raw* clone_empty() override { | |
647 | return new raw_char(len); | |
648 | } | |
649 | }; | |
650 | ||
31f18b77 FG |
651 | class buffer::raw_claimed_char : public buffer::raw { |
652 | public: | |
653 | MEMPOOL_CLASS_HELPERS(); | |
654 | ||
655 | explicit raw_claimed_char(unsigned l, char *b) : raw(b, l) { | |
656 | inc_total_alloc(len); | |
657 | bdout << "raw_claimed_char " << this << " alloc " << (void *)data | |
658 | << " " << l << " " << buffer::get_total_alloc() << bendl; | |
659 | } | |
660 | ~raw_claimed_char() override { | |
661 | dec_total_alloc(len); | |
662 | bdout << "raw_claimed_char " << this << " free " << (void *)data | |
663 | << " " << buffer::get_total_alloc() << bendl; | |
664 | } | |
665 | raw* clone_empty() override { | |
666 | return new raw_char(len); | |
667 | } | |
668 | }; | |
669 | ||
7c673cae FG |
670 | class buffer::raw_unshareable : public buffer::raw { |
671 | public: | |
672 | MEMPOOL_CLASS_HELPERS(); | |
673 | ||
674 | explicit raw_unshareable(unsigned l) : raw(l) { | |
675 | if (len) | |
676 | data = new char[len]; | |
677 | else | |
678 | data = 0; | |
679 | } | |
680 | raw_unshareable(unsigned l, char *b) : raw(b, l) { | |
681 | } | |
682 | raw* clone_empty() override { | |
683 | return new raw_char(len); | |
684 | } | |
685 | bool is_shareable() override { | |
686 | return false; // !shareable, will force make_shareable() | |
687 | } | |
688 | ~raw_unshareable() override { | |
689 | delete[] data; | |
690 | } | |
691 | }; | |
692 | ||
693 | class buffer::raw_static : public buffer::raw { | |
694 | public: | |
695 | MEMPOOL_CLASS_HELPERS(); | |
696 | ||
697 | raw_static(const char *d, unsigned l) : raw((char*)d, l) { } | |
698 | ~raw_static() override {} | |
699 | raw* clone_empty() override { | |
700 | return new buffer::raw_char(len); | |
701 | } | |
702 | }; | |
703 | ||
704 | class buffer::raw_claim_buffer : public buffer::raw { | |
705 | deleter del; | |
706 | public: | |
707 | raw_claim_buffer(const char *b, unsigned l, deleter d) | |
708 | : raw((char*)b, l), del(std::move(d)) { } | |
709 | ~raw_claim_buffer() override {} | |
710 | raw* clone_empty() override { | |
711 | return new buffer::raw_char(len); | |
712 | } | |
713 | }; | |
714 | ||
715 | #if defined(HAVE_XIO) | |
716 | class buffer::xio_msg_buffer : public buffer::raw { | |
717 | private: | |
718 | XioDispatchHook* m_hook; | |
719 | public: | |
720 | xio_msg_buffer(XioDispatchHook* _m_hook, const char *d, | |
721 | unsigned l) : | |
722 | raw((char*)d, l), m_hook(_m_hook->get()) {} | |
723 | ||
724 | bool is_shareable() { return false; } | |
725 | static void operator delete(void *p) | |
726 | { | |
727 | xio_msg_buffer *buf = static_cast<xio_msg_buffer*>(p); | |
728 | // return hook ref (counts against pool); it appears illegal | |
729 | // to do this in our dtor, because this fires after that | |
730 | buf->m_hook->put(); | |
731 | } | |
732 | raw* clone_empty() { | |
733 | return new buffer::raw_char(len); | |
734 | } | |
735 | }; | |
736 | ||
737 | class buffer::xio_mempool : public buffer::raw { | |
738 | public: | |
739 | struct xio_reg_mem *mp; | |
740 | xio_mempool(struct xio_reg_mem *_mp, unsigned l) : | |
741 | raw((char*)_mp->addr, l), mp(_mp) | |
742 | { } | |
743 | ~xio_mempool() {} | |
744 | raw* clone_empty() { | |
745 | return new buffer::raw_char(len); | |
746 | } | |
747 | }; | |
748 | ||
749 | struct xio_reg_mem* get_xio_mp(const buffer::ptr& bp) | |
750 | { | |
751 | buffer::xio_mempool *mb = dynamic_cast<buffer::xio_mempool*>(bp.get_raw()); | |
752 | if (mb) { | |
753 | return mb->mp; | |
754 | } | |
755 | return NULL; | |
756 | } | |
757 | ||
758 | buffer::raw* buffer::create_msg( | |
759 | unsigned len, char *buf, XioDispatchHook* m_hook) { | |
760 | XioPool& pool = m_hook->get_pool(); | |
761 | buffer::raw* bp = | |
762 | static_cast<buffer::raw*>(pool.alloc(sizeof(xio_msg_buffer))); | |
763 | new (bp) xio_msg_buffer(m_hook, buf, len); | |
764 | return bp; | |
765 | } | |
766 | #endif /* HAVE_XIO */ | |
767 | ||
768 | buffer::raw* buffer::copy(const char *c, unsigned len) { | |
769 | raw* r = buffer::create_aligned(len, sizeof(size_t)); | |
770 | memcpy(r->data, c, len); | |
771 | return r; | |
772 | } | |
773 | ||
774 | buffer::raw* buffer::create(unsigned len) { | |
775 | return buffer::create_aligned(len, sizeof(size_t)); | |
776 | } | |
3efd9988 FG |
777 | buffer::raw* buffer::create_in_mempool(unsigned len, int mempool) { |
778 | return buffer::create_aligned_in_mempool(len, sizeof(size_t), mempool); | |
779 | } | |
7c673cae | 780 | buffer::raw* buffer::claim_char(unsigned len, char *buf) { |
31f18b77 | 781 | return new raw_claimed_char(len, buf); |
7c673cae FG |
782 | } |
783 | buffer::raw* buffer::create_malloc(unsigned len) { | |
784 | return new raw_malloc(len); | |
785 | } | |
786 | buffer::raw* buffer::claim_malloc(unsigned len, char *buf) { | |
787 | return new raw_malloc(len, buf); | |
788 | } | |
789 | buffer::raw* buffer::create_static(unsigned len, char *buf) { | |
790 | return new raw_static(buf, len); | |
791 | } | |
792 | buffer::raw* buffer::claim_buffer(unsigned len, char *buf, deleter del) { | |
793 | return new raw_claim_buffer(buf, len, std::move(del)); | |
794 | } | |
795 | ||
3efd9988 FG |
796 | buffer::raw* buffer::create_aligned_in_mempool( |
797 | unsigned len, unsigned align, int mempool) { | |
7c673cae FG |
798 | // If alignment is a page multiple, use a separate buffer::raw to |
799 | // avoid fragmenting the heap. | |
800 | // | |
801 | // Somewhat unexpectedly, I see consistently better performance | |
802 | // from raw_combined than from raw even when the allocation size is | |
803 | // a page multiple (but alignment is not). | |
804 | // | |
805 | // I also see better performance from a separate buffer::raw once the | |
806 | // size passes 8KB. | |
807 | if ((align & ~CEPH_PAGE_MASK) == 0 || | |
808 | len >= CEPH_PAGE_SIZE * 2) { | |
809 | #ifndef __CYGWIN__ | |
810 | return new raw_posix_aligned(len, align); | |
811 | #else | |
812 | return new raw_hack_aligned(len, align); | |
813 | #endif | |
814 | } | |
3efd9988 FG |
815 | return raw_combined::create(len, align, mempool); |
816 | } | |
817 | buffer::raw* buffer::create_aligned( | |
818 | unsigned len, unsigned align) { | |
819 | return create_aligned_in_mempool(len, align, | |
820 | mempool::mempool_buffer_anon); | |
7c673cae FG |
821 | } |
822 | ||
823 | buffer::raw* buffer::create_page_aligned(unsigned len) { | |
824 | return create_aligned(len, CEPH_PAGE_SIZE); | |
825 | } | |
826 | ||
827 | buffer::raw* buffer::create_zero_copy(unsigned len, int fd, int64_t *offset) { | |
828 | #ifdef CEPH_HAVE_SPLICE | |
829 | buffer::raw_pipe* buf = new raw_pipe(len); | |
830 | int r = buf->set_source(fd, (loff_t*)offset); | |
831 | if (r < 0) { | |
832 | delete buf; | |
833 | throw error_code(r); | |
834 | } | |
835 | return buf; | |
836 | #else | |
837 | throw error_code(-ENOTSUP); | |
838 | #endif | |
839 | } | |
840 | ||
841 | buffer::raw* buffer::create_unshareable(unsigned len) { | |
842 | return new raw_unshareable(len); | |
843 | } | |
844 | ||
845 | buffer::ptr::ptr(raw *r) : _raw(r), _off(0), _len(r->len) // no lock needed; this is an unref raw. | |
846 | { | |
31f18b77 | 847 | r->nref++; |
7c673cae FG |
848 | bdout << "ptr " << this << " get " << _raw << bendl; |
849 | } | |
850 | buffer::ptr::ptr(unsigned l) : _off(0), _len(l) | |
851 | { | |
852 | _raw = create(l); | |
31f18b77 | 853 | _raw->nref++; |
7c673cae FG |
854 | bdout << "ptr " << this << " get " << _raw << bendl; |
855 | } | |
856 | buffer::ptr::ptr(const char *d, unsigned l) : _off(0), _len(l) // ditto. | |
857 | { | |
858 | _raw = copy(d, l); | |
31f18b77 | 859 | _raw->nref++; |
7c673cae FG |
860 | bdout << "ptr " << this << " get " << _raw << bendl; |
861 | } | |
862 | buffer::ptr::ptr(const ptr& p) : _raw(p._raw), _off(p._off), _len(p._len) | |
863 | { | |
864 | if (_raw) { | |
31f18b77 | 865 | _raw->nref++; |
7c673cae FG |
866 | bdout << "ptr " << this << " get " << _raw << bendl; |
867 | } | |
868 | } | |
869 | buffer::ptr::ptr(ptr&& p) noexcept : _raw(p._raw), _off(p._off), _len(p._len) | |
870 | { | |
871 | p._raw = nullptr; | |
872 | p._off = p._len = 0; | |
873 | } | |
874 | buffer::ptr::ptr(const ptr& p, unsigned o, unsigned l) | |
875 | : _raw(p._raw), _off(p._off + o), _len(l) | |
876 | { | |
877 | assert(o+l <= p._len); | |
878 | assert(_raw); | |
31f18b77 | 879 | _raw->nref++; |
7c673cae FG |
880 | bdout << "ptr " << this << " get " << _raw << bendl; |
881 | } | |
882 | buffer::ptr& buffer::ptr::operator= (const ptr& p) | |
883 | { | |
884 | if (p._raw) { | |
31f18b77 | 885 | p._raw->nref++; |
7c673cae FG |
886 | bdout << "ptr " << this << " get " << _raw << bendl; |
887 | } | |
888 | buffer::raw *raw = p._raw; | |
889 | release(); | |
890 | if (raw) { | |
891 | _raw = raw; | |
892 | _off = p._off; | |
893 | _len = p._len; | |
894 | } else { | |
895 | _off = _len = 0; | |
896 | } | |
897 | return *this; | |
898 | } | |
899 | buffer::ptr& buffer::ptr::operator= (ptr&& p) noexcept | |
900 | { | |
901 | release(); | |
902 | buffer::raw *raw = p._raw; | |
903 | if (raw) { | |
904 | _raw = raw; | |
905 | _off = p._off; | |
906 | _len = p._len; | |
907 | p._raw = nullptr; | |
908 | p._off = p._len = 0; | |
909 | } else { | |
910 | _off = _len = 0; | |
911 | } | |
912 | return *this; | |
913 | } | |
914 | ||
915 | buffer::raw *buffer::ptr::clone() | |
916 | { | |
917 | return _raw->clone(); | |
918 | } | |
919 | ||
920 | buffer::ptr& buffer::ptr::make_shareable() { | |
921 | if (_raw && !_raw->is_shareable()) { | |
922 | buffer::raw *tr = _raw; | |
923 | _raw = tr->clone(); | |
31f18b77 FG |
924 | _raw->nref = 1; |
925 | if (unlikely(--tr->nref == 0)) { | |
7c673cae FG |
926 | ANNOTATE_HAPPENS_AFTER(&tr->nref); |
927 | ANNOTATE_HAPPENS_BEFORE_FORGET_ALL(&tr->nref); | |
928 | delete tr; | |
929 | } else { | |
930 | ANNOTATE_HAPPENS_BEFORE(&tr->nref); | |
931 | } | |
932 | } | |
933 | return *this; | |
934 | } | |
935 | ||
936 | void buffer::ptr::swap(ptr& other) | |
937 | { | |
938 | raw *r = _raw; | |
939 | unsigned o = _off; | |
940 | unsigned l = _len; | |
941 | _raw = other._raw; | |
942 | _off = other._off; | |
943 | _len = other._len; | |
944 | other._raw = r; | |
945 | other._off = o; | |
946 | other._len = l; | |
947 | } | |
948 | ||
949 | void buffer::ptr::release() | |
950 | { | |
951 | if (_raw) { | |
952 | bdout << "ptr " << this << " release " << _raw << bendl; | |
31f18b77 | 953 | if (--_raw->nref == 0) { |
7c673cae FG |
954 | //cout << "hosing raw " << (void*)_raw << " len " << _raw->len << std::endl; |
955 | ANNOTATE_HAPPENS_AFTER(&_raw->nref); | |
956 | ANNOTATE_HAPPENS_BEFORE_FORGET_ALL(&_raw->nref); | |
957 | delete _raw; // dealloc old (if any) | |
958 | } else { | |
959 | ANNOTATE_HAPPENS_BEFORE(&_raw->nref); | |
960 | } | |
961 | _raw = 0; | |
962 | } | |
963 | } | |
964 | ||
965 | bool buffer::ptr::at_buffer_tail() const { return _off + _len == _raw->len; } | |
966 | ||
3efd9988 FG |
967 | int buffer::ptr::get_mempool() const { |
968 | if (_raw) { | |
969 | return _raw->mempool; | |
970 | } | |
971 | return mempool::mempool_buffer_anon; | |
972 | } | |
973 | ||
974 | void buffer::ptr::reassign_to_mempool(int pool) { | |
975 | if (_raw) { | |
976 | _raw->reassign_to_mempool(pool); | |
977 | } | |
978 | } | |
979 | void buffer::ptr::try_assign_to_mempool(int pool) { | |
980 | if (_raw) { | |
981 | _raw->try_assign_to_mempool(pool); | |
982 | } | |
983 | } | |
984 | ||
7c673cae FG |
985 | const char *buffer::ptr::c_str() const { |
986 | assert(_raw); | |
987 | if (buffer_track_c_str) | |
31f18b77 | 988 | buffer_c_str_accesses++; |
7c673cae FG |
989 | return _raw->get_data() + _off; |
990 | } | |
991 | char *buffer::ptr::c_str() { | |
992 | assert(_raw); | |
993 | if (buffer_track_c_str) | |
31f18b77 | 994 | buffer_c_str_accesses++; |
7c673cae FG |
995 | return _raw->get_data() + _off; |
996 | } | |
997 | const char *buffer::ptr::end_c_str() const { | |
998 | assert(_raw); | |
999 | if (buffer_track_c_str) | |
31f18b77 | 1000 | buffer_c_str_accesses++; |
7c673cae FG |
1001 | return _raw->get_data() + _off + _len; |
1002 | } | |
1003 | char *buffer::ptr::end_c_str() { | |
1004 | assert(_raw); | |
1005 | if (buffer_track_c_str) | |
31f18b77 | 1006 | buffer_c_str_accesses++; |
7c673cae FG |
1007 | return _raw->get_data() + _off + _len; |
1008 | } | |
1009 | ||
1010 | unsigned buffer::ptr::unused_tail_length() const | |
1011 | { | |
1012 | if (_raw) | |
1013 | return _raw->len - (_off+_len); | |
1014 | else | |
1015 | return 0; | |
1016 | } | |
1017 | const char& buffer::ptr::operator[](unsigned n) const | |
1018 | { | |
1019 | assert(_raw); | |
1020 | assert(n < _len); | |
1021 | return _raw->get_data()[_off + n]; | |
1022 | } | |
1023 | char& buffer::ptr::operator[](unsigned n) | |
1024 | { | |
1025 | assert(_raw); | |
1026 | assert(n < _len); | |
1027 | return _raw->get_data()[_off + n]; | |
1028 | } | |
1029 | ||
1030 | const char *buffer::ptr::raw_c_str() const { assert(_raw); return _raw->data; } | |
1031 | unsigned buffer::ptr::raw_length() const { assert(_raw); return _raw->len; } | |
31f18b77 | 1032 | int buffer::ptr::raw_nref() const { assert(_raw); return _raw->nref; } |
7c673cae FG |
1033 | |
1034 | void buffer::ptr::copy_out(unsigned o, unsigned l, char *dest) const { | |
1035 | assert(_raw); | |
1036 | if (o+l > _len) | |
1037 | throw end_of_buffer(); | |
1038 | char* src = _raw->data + _off + o; | |
1039 | maybe_inline_memcpy(dest, src, l, 8); | |
1040 | } | |
1041 | ||
31f18b77 | 1042 | unsigned buffer::ptr::wasted() const |
7c673cae | 1043 | { |
7c673cae FG |
1044 | return _raw->len - _len; |
1045 | } | |
1046 | ||
1047 | int buffer::ptr::cmp(const ptr& o) const | |
1048 | { | |
1049 | int l = _len < o._len ? _len : o._len; | |
1050 | if (l) { | |
1051 | int r = memcmp(c_str(), o.c_str(), l); | |
1052 | if (r) | |
1053 | return r; | |
1054 | } | |
1055 | if (_len < o._len) | |
1056 | return -1; | |
1057 | if (_len > o._len) | |
1058 | return 1; | |
1059 | return 0; | |
1060 | } | |
1061 | ||
1062 | bool buffer::ptr::is_zero() const | |
1063 | { | |
1064 | return mem_is_zero(c_str(), _len); | |
1065 | } | |
1066 | ||
1067 | unsigned buffer::ptr::append(char c) | |
1068 | { | |
1069 | assert(_raw); | |
1070 | assert(1 <= unused_tail_length()); | |
1071 | char* ptr = _raw->data + _off + _len; | |
1072 | *ptr = c; | |
1073 | _len++; | |
1074 | return _len + _off; | |
1075 | } | |
1076 | ||
1077 | unsigned buffer::ptr::append(const char *p, unsigned l) | |
1078 | { | |
1079 | assert(_raw); | |
1080 | assert(l <= unused_tail_length()); | |
1081 | char* c = _raw->data + _off + _len; | |
1082 | maybe_inline_memcpy(c, p, l, 32); | |
1083 | _len += l; | |
1084 | return _len + _off; | |
1085 | } | |
1086 | ||
1087 | void buffer::ptr::copy_in(unsigned o, unsigned l, const char *src) | |
1088 | { | |
1089 | copy_in(o, l, src, true); | |
1090 | } | |
1091 | ||
1092 | void buffer::ptr::copy_in(unsigned o, unsigned l, const char *src, bool crc_reset) | |
1093 | { | |
1094 | assert(_raw); | |
1095 | assert(o <= _len); | |
1096 | assert(o+l <= _len); | |
1097 | char* dest = _raw->data + _off + o; | |
1098 | if (crc_reset) | |
1099 | _raw->invalidate_crc(); | |
1100 | maybe_inline_memcpy(dest, src, l, 64); | |
1101 | } | |
1102 | ||
1103 | void buffer::ptr::zero() | |
1104 | { | |
1105 | zero(true); | |
1106 | } | |
1107 | ||
1108 | void buffer::ptr::zero(bool crc_reset) | |
1109 | { | |
1110 | if (crc_reset) | |
1111 | _raw->invalidate_crc(); | |
1112 | memset(c_str(), 0, _len); | |
1113 | } | |
1114 | ||
1115 | void buffer::ptr::zero(unsigned o, unsigned l) | |
1116 | { | |
1117 | zero(o, l, true); | |
1118 | } | |
1119 | ||
1120 | void buffer::ptr::zero(unsigned o, unsigned l, bool crc_reset) | |
1121 | { | |
1122 | assert(o+l <= _len); | |
1123 | if (crc_reset) | |
1124 | _raw->invalidate_crc(); | |
1125 | memset(c_str()+o, 0, l); | |
1126 | } | |
1127 | bool buffer::ptr::can_zero_copy() const | |
1128 | { | |
1129 | return _raw->can_zero_copy(); | |
1130 | } | |
1131 | ||
1132 | int buffer::ptr::zero_copy_to_fd(int fd, int64_t *offset) const | |
1133 | { | |
1134 | return _raw->zero_copy_to_fd(fd, (loff_t*)offset); | |
1135 | } | |
1136 | ||
1137 | // -- buffer::list::iterator -- | |
1138 | /* | |
1139 | buffer::list::iterator operator=(const buffer::list::iterator& other) | |
1140 | { | |
1141 | if (this != &other) { | |
1142 | bl = other.bl; | |
1143 | ls = other.ls; | |
1144 | off = other.off; | |
1145 | p = other.p; | |
1146 | p_off = other.p_off; | |
1147 | } | |
1148 | return *this; | |
1149 | }*/ | |
1150 | ||
1151 | template<bool is_const> | |
1152 | buffer::list::iterator_impl<is_const>::iterator_impl(bl_t *l, unsigned o) | |
1153 | : bl(l), ls(&bl->_buffers), off(0), p(ls->begin()), p_off(0) | |
1154 | { | |
1155 | advance(o); | |
1156 | } | |
1157 | ||
1158 | template<bool is_const> | |
1159 | buffer::list::iterator_impl<is_const>::iterator_impl(const buffer::list::iterator& i) | |
1160 | : iterator_impl<is_const>(i.bl, i.off, i.p, i.p_off) {} | |
1161 | ||
1162 | template<bool is_const> | |
1163 | void buffer::list::iterator_impl<is_const>::advance(int o) | |
1164 | { | |
1165 | //cout << this << " advance " << o << " from " << off << " (p_off " << p_off << " in " << p->length() << ")" << std::endl; | |
1166 | if (o > 0) { | |
1167 | p_off += o; | |
1168 | while (p_off > 0) { | |
1169 | if (p == ls->end()) | |
1170 | throw end_of_buffer(); | |
1171 | if (p_off >= p->length()) { | |
1172 | // skip this buffer | |
1173 | p_off -= p->length(); | |
1174 | p++; | |
1175 | } else { | |
1176 | // somewhere in this buffer! | |
1177 | break; | |
1178 | } | |
1179 | } | |
1180 | off += o; | |
1181 | return; | |
1182 | } | |
1183 | while (o < 0) { | |
1184 | if (p_off) { | |
1185 | unsigned d = -o; | |
1186 | if (d > p_off) | |
1187 | d = p_off; | |
1188 | p_off -= d; | |
1189 | off -= d; | |
1190 | o += d; | |
1191 | } else if (off > 0) { | |
1192 | assert(p != ls->begin()); | |
1193 | p--; | |
1194 | p_off = p->length(); | |
1195 | } else { | |
1196 | throw end_of_buffer(); | |
1197 | } | |
1198 | } | |
1199 | } | |
1200 | ||
1201 | template<bool is_const> | |
1202 | void buffer::list::iterator_impl<is_const>::seek(unsigned o) | |
1203 | { | |
1204 | p = ls->begin(); | |
1205 | off = p_off = 0; | |
1206 | advance(o); | |
1207 | } | |
1208 | ||
1209 | template<bool is_const> | |
1210 | char buffer::list::iterator_impl<is_const>::operator*() const | |
1211 | { | |
1212 | if (p == ls->end()) | |
1213 | throw end_of_buffer(); | |
1214 | return (*p)[p_off]; | |
1215 | } | |
1216 | ||
1217 | template<bool is_const> | |
1218 | buffer::list::iterator_impl<is_const>& | |
1219 | buffer::list::iterator_impl<is_const>::operator++() | |
1220 | { | |
1221 | if (p == ls->end()) | |
1222 | throw end_of_buffer(); | |
1223 | advance(1); | |
1224 | return *this; | |
1225 | } | |
1226 | ||
1227 | template<bool is_const> | |
1228 | buffer::ptr buffer::list::iterator_impl<is_const>::get_current_ptr() const | |
1229 | { | |
1230 | if (p == ls->end()) | |
1231 | throw end_of_buffer(); | |
1232 | return ptr(*p, p_off, p->length() - p_off); | |
1233 | } | |
1234 | ||
1235 | // copy data out. | |
1236 | // note that these all _append_ to dest! | |
1237 | template<bool is_const> | |
1238 | void buffer::list::iterator_impl<is_const>::copy(unsigned len, char *dest) | |
1239 | { | |
1240 | if (p == ls->end()) seek(off); | |
1241 | while (len > 0) { | |
1242 | if (p == ls->end()) | |
1243 | throw end_of_buffer(); | |
1244 | assert(p->length() > 0); | |
1245 | ||
1246 | unsigned howmuch = p->length() - p_off; | |
1247 | if (len < howmuch) howmuch = len; | |
1248 | p->copy_out(p_off, howmuch, dest); | |
1249 | dest += howmuch; | |
1250 | ||
1251 | len -= howmuch; | |
1252 | advance(howmuch); | |
1253 | } | |
1254 | } | |
1255 | ||
1256 | template<bool is_const> | |
1257 | void buffer::list::iterator_impl<is_const>::copy(unsigned len, ptr &dest) | |
1258 | { | |
1259 | copy_deep(len, dest); | |
1260 | } | |
1261 | ||
1262 | template<bool is_const> | |
1263 | void buffer::list::iterator_impl<is_const>::copy_deep(unsigned len, ptr &dest) | |
1264 | { | |
1265 | if (!len) { | |
1266 | return; | |
1267 | } | |
1268 | if (p == ls->end()) | |
1269 | throw end_of_buffer(); | |
1270 | assert(p->length() > 0); | |
1271 | dest = create(len); | |
1272 | copy(len, dest.c_str()); | |
1273 | } | |
1274 | template<bool is_const> | |
1275 | void buffer::list::iterator_impl<is_const>::copy_shallow(unsigned len, | |
1276 | ptr &dest) | |
1277 | { | |
1278 | if (!len) { | |
1279 | return; | |
1280 | } | |
1281 | if (p == ls->end()) | |
1282 | throw end_of_buffer(); | |
1283 | assert(p->length() > 0); | |
1284 | unsigned howmuch = p->length() - p_off; | |
1285 | if (howmuch < len) { | |
1286 | dest = create(len); | |
1287 | copy(len, dest.c_str()); | |
1288 | } else { | |
1289 | dest = ptr(*p, p_off, len); | |
1290 | advance(len); | |
1291 | } | |
1292 | } | |
1293 | ||
1294 | template<bool is_const> | |
1295 | void buffer::list::iterator_impl<is_const>::copy(unsigned len, list &dest) | |
1296 | { | |
1297 | if (p == ls->end()) | |
1298 | seek(off); | |
1299 | while (len > 0) { | |
1300 | if (p == ls->end()) | |
1301 | throw end_of_buffer(); | |
1302 | ||
1303 | unsigned howmuch = p->length() - p_off; | |
1304 | if (len < howmuch) | |
1305 | howmuch = len; | |
1306 | dest.append(*p, p_off, howmuch); | |
1307 | ||
1308 | len -= howmuch; | |
1309 | advance(howmuch); | |
1310 | } | |
1311 | } | |
1312 | ||
1313 | template<bool is_const> | |
1314 | void buffer::list::iterator_impl<is_const>::copy(unsigned len, std::string &dest) | |
1315 | { | |
1316 | if (p == ls->end()) | |
1317 | seek(off); | |
1318 | while (len > 0) { | |
1319 | if (p == ls->end()) | |
1320 | throw end_of_buffer(); | |
1321 | ||
1322 | unsigned howmuch = p->length() - p_off; | |
1323 | const char *c_str = p->c_str(); | |
1324 | if (len < howmuch) | |
1325 | howmuch = len; | |
1326 | dest.append(c_str + p_off, howmuch); | |
1327 | ||
1328 | len -= howmuch; | |
1329 | advance(howmuch); | |
1330 | } | |
1331 | } | |
1332 | ||
1333 | template<bool is_const> | |
1334 | void buffer::list::iterator_impl<is_const>::copy_all(list &dest) | |
1335 | { | |
1336 | if (p == ls->end()) | |
1337 | seek(off); | |
1338 | while (1) { | |
1339 | if (p == ls->end()) | |
1340 | return; | |
1341 | assert(p->length() > 0); | |
1342 | ||
1343 | unsigned howmuch = p->length() - p_off; | |
1344 | const char *c_str = p->c_str(); | |
1345 | dest.append(c_str + p_off, howmuch); | |
1346 | ||
1347 | advance(howmuch); | |
1348 | } | |
1349 | } | |
1350 | ||
1351 | template<bool is_const> | |
1352 | size_t buffer::list::iterator_impl<is_const>::get_ptr_and_advance( | |
1353 | size_t want, const char **data) | |
1354 | { | |
1355 | if (p == ls->end()) { | |
1356 | seek(off); | |
1357 | if (p == ls->end()) { | |
1358 | return 0; | |
1359 | } | |
1360 | } | |
1361 | *data = p->c_str() + p_off; | |
1362 | size_t l = MIN(p->length() - p_off, want); | |
1363 | p_off += l; | |
1364 | if (p_off == p->length()) { | |
1365 | ++p; | |
1366 | p_off = 0; | |
1367 | } | |
1368 | off += l; | |
1369 | return l; | |
1370 | } | |
1371 | ||
1372 | template<bool is_const> | |
1373 | uint32_t buffer::list::iterator_impl<is_const>::crc32c( | |
1374 | size_t length, uint32_t crc) | |
1375 | { | |
1376 | length = MIN( length, get_remaining()); | |
1377 | while (length > 0) { | |
1378 | const char *p; | |
1379 | size_t l = get_ptr_and_advance(length, &p); | |
1380 | crc = ceph_crc32c(crc, (unsigned char*)p, l); | |
1381 | length -= l; | |
1382 | } | |
1383 | return crc; | |
1384 | } | |
1385 | ||
1386 | // explicitly instantiate only the iterator types we need, so we can hide the | |
1387 | // details in this compilation unit without introducing unnecessary link time | |
1388 | // dependencies. | |
1389 | template class buffer::list::iterator_impl<true>; | |
1390 | template class buffer::list::iterator_impl<false>; | |
1391 | ||
1392 | buffer::list::iterator::iterator(bl_t *l, unsigned o) | |
1393 | : iterator_impl(l, o) | |
1394 | {} | |
1395 | ||
1396 | buffer::list::iterator::iterator(bl_t *l, unsigned o, list_iter_t ip, unsigned po) | |
1397 | : iterator_impl(l, o, ip, po) | |
1398 | {} | |
1399 | ||
1400 | void buffer::list::iterator::advance(int o) | |
1401 | { | |
1402 | buffer::list::iterator_impl<false>::advance(o); | |
1403 | } | |
1404 | ||
1405 | void buffer::list::iterator::seek(unsigned o) | |
1406 | { | |
1407 | buffer::list::iterator_impl<false>::seek(o); | |
1408 | } | |
1409 | ||
1410 | char buffer::list::iterator::operator*() | |
1411 | { | |
1412 | if (p == ls->end()) { | |
1413 | throw end_of_buffer(); | |
1414 | } | |
1415 | return (*p)[p_off]; | |
1416 | } | |
1417 | ||
1418 | buffer::list::iterator& buffer::list::iterator::operator++() | |
1419 | { | |
1420 | buffer::list::iterator_impl<false>::operator++(); | |
1421 | return *this; | |
1422 | } | |
1423 | ||
1424 | buffer::ptr buffer::list::iterator::get_current_ptr() | |
1425 | { | |
1426 | if (p == ls->end()) { | |
1427 | throw end_of_buffer(); | |
1428 | } | |
1429 | return ptr(*p, p_off, p->length() - p_off); | |
1430 | } | |
1431 | ||
1432 | void buffer::list::iterator::copy(unsigned len, char *dest) | |
1433 | { | |
1434 | return buffer::list::iterator_impl<false>::copy(len, dest); | |
1435 | } | |
1436 | ||
1437 | void buffer::list::iterator::copy(unsigned len, ptr &dest) | |
1438 | { | |
1439 | return buffer::list::iterator_impl<false>::copy_deep(len, dest); | |
1440 | } | |
1441 | ||
1442 | void buffer::list::iterator::copy_deep(unsigned len, ptr &dest) | |
1443 | { | |
1444 | buffer::list::iterator_impl<false>::copy_deep(len, dest); | |
1445 | } | |
1446 | ||
1447 | void buffer::list::iterator::copy_shallow(unsigned len, ptr &dest) | |
1448 | { | |
1449 | buffer::list::iterator_impl<false>::copy_shallow(len, dest); | |
1450 | } | |
1451 | ||
1452 | void buffer::list::iterator::copy(unsigned len, list &dest) | |
1453 | { | |
1454 | buffer::list::iterator_impl<false>::copy(len, dest); | |
1455 | } | |
1456 | ||
1457 | void buffer::list::iterator::copy(unsigned len, std::string &dest) | |
1458 | { | |
1459 | buffer::list::iterator_impl<false>::copy(len, dest); | |
1460 | } | |
1461 | ||
1462 | void buffer::list::iterator::copy_all(list &dest) | |
1463 | { | |
1464 | buffer::list::iterator_impl<false>::copy_all(dest); | |
1465 | } | |
1466 | ||
1467 | void buffer::list::iterator::copy_in(unsigned len, const char *src) | |
1468 | { | |
1469 | copy_in(len, src, true); | |
1470 | } | |
1471 | ||
1472 | // copy data in | |
1473 | void buffer::list::iterator::copy_in(unsigned len, const char *src, bool crc_reset) | |
1474 | { | |
1475 | // copy | |
1476 | if (p == ls->end()) | |
1477 | seek(off); | |
1478 | while (len > 0) { | |
1479 | if (p == ls->end()) | |
1480 | throw end_of_buffer(); | |
1481 | ||
1482 | unsigned howmuch = p->length() - p_off; | |
1483 | if (len < howmuch) | |
1484 | howmuch = len; | |
1485 | p->copy_in(p_off, howmuch, src, crc_reset); | |
1486 | ||
1487 | src += howmuch; | |
1488 | len -= howmuch; | |
1489 | advance(howmuch); | |
1490 | } | |
1491 | } | |
1492 | ||
1493 | void buffer::list::iterator::copy_in(unsigned len, const list& otherl) | |
1494 | { | |
1495 | if (p == ls->end()) | |
1496 | seek(off); | |
1497 | unsigned left = len; | |
1498 | for (std::list<ptr>::const_iterator i = otherl._buffers.begin(); | |
1499 | i != otherl._buffers.end(); | |
1500 | ++i) { | |
1501 | unsigned l = (*i).length(); | |
1502 | if (left < l) | |
1503 | l = left; | |
1504 | copy_in(l, i->c_str()); | |
1505 | left -= l; | |
1506 | if (left == 0) | |
1507 | break; | |
1508 | } | |
1509 | } | |
1510 | ||
1511 | // -- buffer::list -- | |
1512 | ||
1513 | buffer::list::list(list&& other) | |
1514 | : _buffers(std::move(other._buffers)), | |
1515 | _len(other._len), | |
1516 | _memcopy_count(other._memcopy_count), | |
1517 | last_p(this) { | |
1518 | append_buffer.swap(other.append_buffer); | |
1519 | other.clear(); | |
1520 | } | |
1521 | ||
1522 | void buffer::list::swap(list& other) | |
1523 | { | |
1524 | std::swap(_len, other._len); | |
1525 | std::swap(_memcopy_count, other._memcopy_count); | |
1526 | _buffers.swap(other._buffers); | |
1527 | append_buffer.swap(other.append_buffer); | |
1528 | //last_p.swap(other.last_p); | |
1529 | last_p = begin(); | |
1530 | other.last_p = other.begin(); | |
1531 | } | |
1532 | ||
1533 | bool buffer::list::contents_equal(buffer::list& other) | |
1534 | { | |
1535 | return static_cast<const buffer::list*>(this)->contents_equal(other); | |
1536 | } | |
1537 | ||
1538 | bool buffer::list::contents_equal(const ceph::buffer::list& other) const | |
1539 | { | |
1540 | if (length() != other.length()) | |
1541 | return false; | |
1542 | ||
1543 | // buffer-wise comparison | |
1544 | if (true) { | |
1545 | std::list<ptr>::const_iterator a = _buffers.begin(); | |
1546 | std::list<ptr>::const_iterator b = other._buffers.begin(); | |
1547 | unsigned aoff = 0, boff = 0; | |
1548 | while (a != _buffers.end()) { | |
1549 | unsigned len = a->length() - aoff; | |
1550 | if (len > b->length() - boff) | |
1551 | len = b->length() - boff; | |
1552 | if (memcmp(a->c_str() + aoff, b->c_str() + boff, len) != 0) | |
1553 | return false; | |
1554 | aoff += len; | |
1555 | if (aoff == a->length()) { | |
1556 | aoff = 0; | |
1557 | ++a; | |
1558 | } | |
1559 | boff += len; | |
1560 | if (boff == b->length()) { | |
1561 | boff = 0; | |
1562 | ++b; | |
1563 | } | |
1564 | } | |
1565 | assert(b == other._buffers.end()); | |
1566 | return true; | |
1567 | } | |
1568 | ||
1569 | // byte-wise comparison | |
1570 | if (false) { | |
1571 | bufferlist::const_iterator me = begin(); | |
1572 | bufferlist::const_iterator him = other.begin(); | |
1573 | while (!me.end()) { | |
1574 | if (*me != *him) | |
1575 | return false; | |
1576 | ++me; | |
1577 | ++him; | |
1578 | } | |
1579 | return true; | |
1580 | } | |
1581 | } | |
1582 | ||
1583 | bool buffer::list::can_zero_copy() const | |
1584 | { | |
1585 | for (std::list<ptr>::const_iterator it = _buffers.begin(); | |
1586 | it != _buffers.end(); | |
1587 | ++it) | |
1588 | if (!it->can_zero_copy()) | |
1589 | return false; | |
1590 | return true; | |
1591 | } | |
1592 | ||
1593 | bool buffer::list::is_provided_buffer(const char *dst) const | |
1594 | { | |
1595 | if (_buffers.empty()) | |
1596 | return false; | |
1597 | return (is_contiguous() && (_buffers.front().c_str() == dst)); | |
1598 | } | |
1599 | ||
1600 | bool buffer::list::is_aligned(unsigned align) const | |
1601 | { | |
1602 | for (std::list<ptr>::const_iterator it = _buffers.begin(); | |
1603 | it != _buffers.end(); | |
1604 | ++it) | |
1605 | if (!it->is_aligned(align)) | |
1606 | return false; | |
1607 | return true; | |
1608 | } | |
1609 | ||
1610 | bool buffer::list::is_n_align_sized(unsigned align) const | |
1611 | { | |
1612 | for (std::list<ptr>::const_iterator it = _buffers.begin(); | |
1613 | it != _buffers.end(); | |
1614 | ++it) | |
1615 | if (!it->is_n_align_sized(align)) | |
1616 | return false; | |
1617 | return true; | |
1618 | } | |
1619 | ||
1620 | bool buffer::list::is_aligned_size_and_memory(unsigned align_size, | |
1621 | unsigned align_memory) const | |
1622 | { | |
1623 | for (std::list<ptr>::const_iterator it = _buffers.begin(); | |
1624 | it != _buffers.end(); | |
1625 | ++it) { | |
1626 | if (!it->is_aligned(align_memory) || !it->is_n_align_sized(align_size)) | |
1627 | return false; | |
1628 | } | |
1629 | return true; | |
1630 | } | |
1631 | ||
1632 | bool buffer::list::is_zero() const { | |
1633 | for (std::list<ptr>::const_iterator it = _buffers.begin(); | |
1634 | it != _buffers.end(); | |
1635 | ++it) { | |
1636 | if (!it->is_zero()) { | |
1637 | return false; | |
1638 | } | |
1639 | } | |
1640 | return true; | |
1641 | } | |
1642 | ||
1643 | void buffer::list::zero() | |
1644 | { | |
1645 | for (std::list<ptr>::iterator it = _buffers.begin(); | |
1646 | it != _buffers.end(); | |
1647 | ++it) | |
1648 | it->zero(); | |
1649 | } | |
1650 | ||
1651 | void buffer::list::zero(unsigned o, unsigned l) | |
1652 | { | |
1653 | assert(o+l <= _len); | |
1654 | unsigned p = 0; | |
1655 | for (std::list<ptr>::iterator it = _buffers.begin(); | |
1656 | it != _buffers.end(); | |
1657 | ++it) { | |
1658 | if (p + it->length() > o) { | |
1659 | if (p >= o && p+it->length() <= o+l) { | |
1660 | // 'o'------------- l -----------| | |
1661 | // 'p'-- it->length() --| | |
1662 | it->zero(); | |
1663 | } else if (p >= o) { | |
1664 | // 'o'------------- l -----------| | |
1665 | // 'p'------- it->length() -------| | |
1666 | it->zero(0, o+l-p); | |
1667 | } else if (p + it->length() <= o+l) { | |
1668 | // 'o'------------- l -----------| | |
1669 | // 'p'------- it->length() -------| | |
1670 | it->zero(o-p, it->length()-(o-p)); | |
1671 | } else { | |
1672 | // 'o'----------- l -----------| | |
1673 | // 'p'---------- it->length() ----------| | |
1674 | it->zero(o-p, l); | |
1675 | } | |
1676 | } | |
1677 | p += it->length(); | |
1678 | if (o+l <= p) | |
1679 | break; // done | |
1680 | } | |
1681 | } | |
1682 | ||
1683 | bool buffer::list::is_contiguous() const | |
1684 | { | |
1685 | return &(*_buffers.begin()) == &(*_buffers.rbegin()); | |
1686 | } | |
1687 | ||
1688 | bool buffer::list::is_n_page_sized() const | |
1689 | { | |
1690 | return is_n_align_sized(CEPH_PAGE_SIZE); | |
1691 | } | |
1692 | ||
1693 | bool buffer::list::is_page_aligned() const | |
1694 | { | |
1695 | return is_aligned(CEPH_PAGE_SIZE); | |
1696 | } | |
1697 | ||
3efd9988 FG |
1698 | int buffer::list::get_mempool() const |
1699 | { | |
1700 | if (_buffers.empty()) { | |
1701 | return mempool::mempool_buffer_anon; | |
1702 | } | |
1703 | return _buffers.back().get_mempool(); | |
1704 | } | |
1705 | ||
31f18b77 FG |
1706 | void buffer::list::reassign_to_mempool(int pool) |
1707 | { | |
31f18b77 FG |
1708 | if (append_buffer.get_raw()) { |
1709 | append_buffer.get_raw()->reassign_to_mempool(pool); | |
1710 | } | |
1711 | for (auto& p : _buffers) { | |
1712 | p.get_raw()->reassign_to_mempool(pool); | |
1713 | } | |
1714 | } | |
1715 | ||
1716 | void buffer::list::try_assign_to_mempool(int pool) | |
1717 | { | |
31f18b77 FG |
1718 | if (append_buffer.get_raw()) { |
1719 | append_buffer.get_raw()->try_assign_to_mempool(pool); | |
1720 | } | |
1721 | for (auto& p : _buffers) { | |
1722 | p.get_raw()->try_assign_to_mempool(pool); | |
1723 | } | |
1724 | } | |
1725 | ||
7c673cae FG |
1726 | void buffer::list::rebuild() |
1727 | { | |
1728 | if (_len == 0) { | |
1729 | _buffers.clear(); | |
1730 | return; | |
1731 | } | |
1732 | ptr nb; | |
1733 | if ((_len & ~CEPH_PAGE_MASK) == 0) | |
1734 | nb = buffer::create_page_aligned(_len); | |
1735 | else | |
1736 | nb = buffer::create(_len); | |
1737 | rebuild(nb); | |
1738 | } | |
1739 | ||
1740 | void buffer::list::rebuild(ptr& nb) | |
1741 | { | |
1742 | unsigned pos = 0; | |
1743 | for (std::list<ptr>::iterator it = _buffers.begin(); | |
1744 | it != _buffers.end(); | |
1745 | ++it) { | |
1746 | nb.copy_in(pos, it->length(), it->c_str(), false); | |
1747 | pos += it->length(); | |
1748 | } | |
1749 | _memcopy_count += pos; | |
1750 | _buffers.clear(); | |
1751 | if (nb.length()) | |
1752 | _buffers.push_back(nb); | |
1753 | invalidate_crc(); | |
1754 | last_p = begin(); | |
1755 | } | |
1756 | ||
1757 | bool buffer::list::rebuild_aligned(unsigned align) | |
1758 | { | |
1759 | return rebuild_aligned_size_and_memory(align, align); | |
1760 | } | |
1761 | ||
1762 | bool buffer::list::rebuild_aligned_size_and_memory(unsigned align_size, | |
b32b8144 FG |
1763 | unsigned align_memory, |
1764 | unsigned max_buffers) | |
7c673cae FG |
1765 | { |
1766 | unsigned old_memcopy_count = _memcopy_count; | |
b32b8144 FG |
1767 | |
1768 | if (max_buffers && _buffers.size() > max_buffers | |
1769 | && _len > (max_buffers * align_size)) { | |
1770 | align_size = ROUND_UP_TO(ROUND_UP_TO(_len, max_buffers) / max_buffers, align_size); | |
1771 | } | |
7c673cae FG |
1772 | std::list<ptr>::iterator p = _buffers.begin(); |
1773 | while (p != _buffers.end()) { | |
1774 | // keep anything that's already align and sized aligned | |
1775 | if (p->is_aligned(align_memory) && p->is_n_align_sized(align_size)) { | |
1776 | /*cout << " segment " << (void*)p->c_str() | |
1777 | << " offset " << ((unsigned long)p->c_str() & (align - 1)) | |
1778 | << " length " << p->length() | |
1779 | << " " << (p->length() & (align - 1)) << " ok" << std::endl; | |
1780 | */ | |
1781 | ++p; | |
1782 | continue; | |
1783 | } | |
1784 | ||
1785 | // consolidate unaligned items, until we get something that is sized+aligned | |
1786 | list unaligned; | |
1787 | unsigned offset = 0; | |
1788 | do { | |
1789 | /*cout << " segment " << (void*)p->c_str() | |
1790 | << " offset " << ((unsigned long)p->c_str() & (align - 1)) | |
1791 | << " length " << p->length() << " " << (p->length() & (align - 1)) | |
1792 | << " overall offset " << offset << " " << (offset & (align - 1)) | |
1793 | << " not ok" << std::endl; | |
1794 | */ | |
1795 | offset += p->length(); | |
1796 | unaligned.push_back(*p); | |
1797 | _buffers.erase(p++); | |
1798 | } while (p != _buffers.end() && | |
1799 | (!p->is_aligned(align_memory) || | |
1800 | !p->is_n_align_sized(align_size) || | |
1801 | (offset % align_size))); | |
1802 | if (!(unaligned.is_contiguous() && unaligned._buffers.front().is_aligned(align_memory))) { | |
1803 | ptr nb(buffer::create_aligned(unaligned._len, align_memory)); | |
1804 | unaligned.rebuild(nb); | |
1805 | _memcopy_count += unaligned._len; | |
1806 | } | |
1807 | _buffers.insert(p, unaligned._buffers.front()); | |
1808 | } | |
1809 | last_p = begin(); | |
1810 | ||
1811 | return (old_memcopy_count != _memcopy_count); | |
1812 | } | |
1813 | ||
1814 | bool buffer::list::rebuild_page_aligned() | |
1815 | { | |
1816 | return rebuild_aligned(CEPH_PAGE_SIZE); | |
1817 | } | |
1818 | ||
31f18b77 FG |
1819 | void buffer::list::reserve(size_t prealloc) |
1820 | { | |
1821 | if (append_buffer.unused_tail_length() < prealloc) { | |
3efd9988 | 1822 | append_buffer = buffer::create_in_mempool(prealloc, get_mempool()); |
31f18b77 FG |
1823 | append_buffer.set_length(0); // unused, so far. |
1824 | } | |
1825 | } | |
1826 | ||
7c673cae FG |
1827 | // sort-of-like-assignment-op |
1828 | void buffer::list::claim(list& bl, unsigned int flags) | |
1829 | { | |
1830 | // free my buffers | |
1831 | clear(); | |
1832 | claim_append(bl, flags); | |
1833 | } | |
1834 | ||
1835 | void buffer::list::claim_append(list& bl, unsigned int flags) | |
1836 | { | |
1837 | // steal the other guy's buffers | |
1838 | _len += bl._len; | |
1839 | if (!(flags & CLAIM_ALLOW_NONSHAREABLE)) | |
1840 | bl.make_shareable(); | |
1841 | _buffers.splice(_buffers.end(), bl._buffers ); | |
1842 | bl._len = 0; | |
1843 | bl.last_p = bl.begin(); | |
1844 | } | |
1845 | ||
1846 | void buffer::list::claim_prepend(list& bl, unsigned int flags) | |
1847 | { | |
1848 | // steal the other guy's buffers | |
1849 | _len += bl._len; | |
1850 | if (!(flags & CLAIM_ALLOW_NONSHAREABLE)) | |
1851 | bl.make_shareable(); | |
1852 | _buffers.splice(_buffers.begin(), bl._buffers ); | |
1853 | bl._len = 0; | |
1854 | bl.last_p = bl.begin(); | |
1855 | } | |
1856 | ||
31f18b77 FG |
1857 | void buffer::list::claim_append_piecewise(list& bl) |
1858 | { | |
1859 | // steal the other guy's buffers | |
1860 | for (std::list<buffer::ptr>::const_iterator i = bl.buffers().begin(); | |
1861 | i != bl.buffers().end(); i++) { | |
1862 | append(*i, 0, i->length()); | |
1863 | } | |
1864 | bl.clear(); | |
1865 | } | |
1866 | ||
7c673cae FG |
1867 | void buffer::list::copy(unsigned off, unsigned len, char *dest) const |
1868 | { | |
1869 | if (off + len > length()) | |
1870 | throw end_of_buffer(); | |
1871 | if (last_p.get_off() != off) | |
1872 | last_p.seek(off); | |
1873 | last_p.copy(len, dest); | |
1874 | } | |
1875 | ||
1876 | void buffer::list::copy(unsigned off, unsigned len, list &dest) const | |
1877 | { | |
1878 | if (off + len > length()) | |
1879 | throw end_of_buffer(); | |
1880 | if (last_p.get_off() != off) | |
1881 | last_p.seek(off); | |
1882 | last_p.copy(len, dest); | |
1883 | } | |
1884 | ||
1885 | void buffer::list::copy(unsigned off, unsigned len, std::string& dest) const | |
1886 | { | |
1887 | if (last_p.get_off() != off) | |
1888 | last_p.seek(off); | |
1889 | return last_p.copy(len, dest); | |
1890 | } | |
1891 | ||
1892 | void buffer::list::copy_in(unsigned off, unsigned len, const char *src) | |
1893 | { | |
1894 | copy_in(off, len, src, true); | |
1895 | } | |
1896 | ||
1897 | void buffer::list::copy_in(unsigned off, unsigned len, const char *src, bool crc_reset) | |
1898 | { | |
1899 | if (off + len > length()) | |
1900 | throw end_of_buffer(); | |
1901 | ||
1902 | if (last_p.get_off() != off) | |
1903 | last_p.seek(off); | |
1904 | last_p.copy_in(len, src, crc_reset); | |
1905 | } | |
1906 | ||
1907 | void buffer::list::copy_in(unsigned off, unsigned len, const list& src) | |
1908 | { | |
1909 | if (last_p.get_off() != off) | |
1910 | last_p.seek(off); | |
1911 | last_p.copy_in(len, src); | |
1912 | } | |
1913 | ||
1914 | void buffer::list::append(char c) | |
1915 | { | |
1916 | // put what we can into the existing append_buffer. | |
1917 | unsigned gap = append_buffer.unused_tail_length(); | |
1918 | if (!gap) { | |
1919 | // make a new append_buffer! | |
3efd9988 FG |
1920 | append_buffer = raw_combined::create(CEPH_BUFFER_APPEND_SIZE, 0, |
1921 | get_mempool()); | |
7c673cae FG |
1922 | append_buffer.set_length(0); // unused, so far. |
1923 | } | |
1924 | append(append_buffer, append_buffer.append(c) - 1, 1); // add segment to the list | |
1925 | } | |
1926 | ||
1927 | void buffer::list::append(const char *data, unsigned len) | |
1928 | { | |
1929 | while (len > 0) { | |
1930 | // put what we can into the existing append_buffer. | |
1931 | unsigned gap = append_buffer.unused_tail_length(); | |
1932 | if (gap > 0) { | |
1933 | if (gap > len) gap = len; | |
1934 | //cout << "append first char is " << data[0] << ", last char is " << data[len-1] << std::endl; | |
1935 | append_buffer.append(data, gap); | |
1936 | append(append_buffer, append_buffer.length() - gap, gap); // add segment to the list | |
1937 | len -= gap; | |
1938 | data += gap; | |
1939 | } | |
1940 | if (len == 0) | |
1941 | break; // done! | |
1942 | ||
1943 | // make a new append_buffer. fill out a complete page, factoring in the | |
1944 | // raw_combined overhead. | |
1945 | size_t need = ROUND_UP_TO(len, sizeof(size_t)) + sizeof(raw_combined); | |
1946 | size_t alen = ROUND_UP_TO(need, CEPH_BUFFER_ALLOC_UNIT) - | |
1947 | sizeof(raw_combined); | |
3efd9988 | 1948 | append_buffer = raw_combined::create(alen, 0, get_mempool()); |
7c673cae FG |
1949 | append_buffer.set_length(0); // unused, so far. |
1950 | } | |
1951 | } | |
1952 | ||
1953 | void buffer::list::append(const ptr& bp) | |
1954 | { | |
1955 | if (bp.length()) | |
1956 | push_back(bp); | |
1957 | } | |
1958 | ||
1959 | void buffer::list::append(ptr&& bp) | |
1960 | { | |
1961 | if (bp.length()) | |
1962 | push_back(std::move(bp)); | |
1963 | } | |
1964 | ||
1965 | void buffer::list::append(const ptr& bp, unsigned off, unsigned len) | |
1966 | { | |
1967 | assert(len+off <= bp.length()); | |
1968 | if (!_buffers.empty()) { | |
1969 | ptr &l = _buffers.back(); | |
1970 | if (l.get_raw() == bp.get_raw() && | |
1971 | l.end() == bp.start() + off) { | |
1972 | // yay contiguous with tail bp! | |
1973 | l.set_length(l.length()+len); | |
1974 | _len += len; | |
1975 | return; | |
1976 | } | |
1977 | } | |
1978 | // add new item to list | |
1979 | push_back(ptr(bp, off, len)); | |
1980 | } | |
1981 | ||
1982 | void buffer::list::append(const list& bl) | |
1983 | { | |
1984 | _len += bl._len; | |
1985 | for (std::list<ptr>::const_iterator p = bl._buffers.begin(); | |
1986 | p != bl._buffers.end(); | |
1987 | ++p) | |
1988 | _buffers.push_back(*p); | |
1989 | } | |
1990 | ||
1991 | void buffer::list::append(std::istream& in) | |
1992 | { | |
1993 | while (!in.eof()) { | |
1994 | std::string s; | |
1995 | getline(in, s); | |
1996 | append(s.c_str(), s.length()); | |
1997 | if (s.length()) | |
1998 | append("\n", 1); | |
1999 | } | |
2000 | } | |
2001 | ||
2002 | void buffer::list::prepend_zero(unsigned len) | |
2003 | { | |
2004 | ptr bp(len); | |
2005 | bp.zero(false); | |
2006 | _len += len; | |
2007 | _buffers.emplace_front(std::move(bp)); | |
2008 | } | |
2009 | ||
2010 | void buffer::list::append_zero(unsigned len) | |
2011 | { | |
2012 | ptr bp(len); | |
2013 | bp.zero(false); | |
2014 | append(std::move(bp)); | |
2015 | } | |
2016 | ||
2017 | ||
2018 | /* | |
2019 | * get a char | |
2020 | */ | |
2021 | const char& buffer::list::operator[](unsigned n) const | |
2022 | { | |
2023 | if (n >= _len) | |
2024 | throw end_of_buffer(); | |
2025 | ||
2026 | for (std::list<ptr>::const_iterator p = _buffers.begin(); | |
2027 | p != _buffers.end(); | |
2028 | ++p) { | |
2029 | if (n >= p->length()) { | |
2030 | n -= p->length(); | |
2031 | continue; | |
2032 | } | |
2033 | return (*p)[n]; | |
2034 | } | |
2035 | ceph_abort(); | |
2036 | } | |
2037 | ||
2038 | /* | |
2039 | * return a contiguous ptr to whole bufferlist contents. | |
2040 | */ | |
2041 | char *buffer::list::c_str() | |
2042 | { | |
2043 | if (_buffers.empty()) | |
2044 | return 0; // no buffers | |
2045 | ||
2046 | std::list<ptr>::const_iterator iter = _buffers.begin(); | |
2047 | ++iter; | |
2048 | ||
2049 | if (iter != _buffers.end()) | |
2050 | rebuild(); | |
2051 | return _buffers.front().c_str(); // good, we're already contiguous. | |
2052 | } | |
2053 | ||
2054 | string buffer::list::to_str() const { | |
2055 | string s; | |
2056 | s.reserve(length()); | |
2057 | for (std::list<ptr>::const_iterator p = _buffers.begin(); | |
2058 | p != _buffers.end(); | |
2059 | ++p) { | |
2060 | if (p->length()) { | |
2061 | s.append(p->c_str(), p->length()); | |
2062 | } | |
2063 | } | |
2064 | return s; | |
2065 | } | |
2066 | ||
2067 | char *buffer::list::get_contiguous(unsigned orig_off, unsigned len) | |
2068 | { | |
2069 | if (orig_off + len > length()) | |
2070 | throw end_of_buffer(); | |
2071 | ||
2072 | if (len == 0) { | |
2073 | return 0; | |
2074 | } | |
2075 | ||
2076 | unsigned off = orig_off; | |
2077 | std::list<ptr>::iterator curbuf = _buffers.begin(); | |
2078 | while (off > 0 && off >= curbuf->length()) { | |
2079 | off -= curbuf->length(); | |
2080 | ++curbuf; | |
2081 | } | |
2082 | ||
2083 | if (off + len > curbuf->length()) { | |
2084 | bufferlist tmp; | |
2085 | unsigned l = off + len; | |
2086 | ||
2087 | do { | |
2088 | if (l >= curbuf->length()) | |
2089 | l -= curbuf->length(); | |
2090 | else | |
2091 | l = 0; | |
2092 | tmp.append(*curbuf); | |
2093 | curbuf = _buffers.erase(curbuf); | |
2094 | ||
2095 | } while (curbuf != _buffers.end() && l > 0); | |
2096 | ||
2097 | assert(l == 0); | |
2098 | ||
2099 | tmp.rebuild(); | |
2100 | _buffers.insert(curbuf, tmp._buffers.front()); | |
2101 | return tmp.c_str() + off; | |
2102 | } | |
2103 | ||
2104 | last_p = begin(); // we modified _buffers | |
2105 | ||
2106 | return curbuf->c_str() + off; | |
2107 | } | |
2108 | ||
2109 | void buffer::list::substr_of(const list& other, unsigned off, unsigned len) | |
2110 | { | |
2111 | if (off + len > other.length()) | |
2112 | throw end_of_buffer(); | |
2113 | ||
2114 | clear(); | |
2115 | ||
2116 | // skip off | |
2117 | std::list<ptr>::const_iterator curbuf = other._buffers.begin(); | |
2118 | while (off > 0 && | |
2119 | off >= curbuf->length()) { | |
2120 | // skip this buffer | |
2121 | //cout << "skipping over " << *curbuf << std::endl; | |
2122 | off -= (*curbuf).length(); | |
2123 | ++curbuf; | |
2124 | } | |
2125 | assert(len == 0 || curbuf != other._buffers.end()); | |
2126 | ||
2127 | while (len > 0) { | |
2128 | // partial? | |
2129 | if (off + len < curbuf->length()) { | |
2130 | //cout << "copying partial of " << *curbuf << std::endl; | |
2131 | _buffers.push_back( ptr( *curbuf, off, len ) ); | |
2132 | _len += len; | |
2133 | break; | |
2134 | } | |
2135 | ||
2136 | // through end | |
2137 | //cout << "copying end (all?) of " << *curbuf << std::endl; | |
2138 | unsigned howmuch = curbuf->length() - off; | |
2139 | _buffers.push_back( ptr( *curbuf, off, howmuch ) ); | |
2140 | _len += howmuch; | |
2141 | len -= howmuch; | |
2142 | off = 0; | |
2143 | ++curbuf; | |
2144 | } | |
2145 | } | |
2146 | ||
2147 | // funky modifer | |
2148 | void buffer::list::splice(unsigned off, unsigned len, list *claim_by /*, bufferlist& replace_with */) | |
2149 | { // fixme? | |
2150 | if (len == 0) | |
2151 | return; | |
2152 | ||
2153 | if (off >= length()) | |
2154 | throw end_of_buffer(); | |
2155 | ||
2156 | assert(len > 0); | |
2157 | //cout << "splice off " << off << " len " << len << " ... mylen = " << length() << std::endl; | |
2158 | ||
2159 | // skip off | |
2160 | std::list<ptr>::iterator curbuf = _buffers.begin(); | |
2161 | while (off > 0) { | |
2162 | assert(curbuf != _buffers.end()); | |
2163 | if (off >= (*curbuf).length()) { | |
2164 | // skip this buffer | |
2165 | //cout << "off = " << off << " skipping over " << *curbuf << std::endl; | |
2166 | off -= (*curbuf).length(); | |
2167 | ++curbuf; | |
2168 | } else { | |
2169 | // somewhere in this buffer! | |
2170 | //cout << "off = " << off << " somewhere in " << *curbuf << std::endl; | |
2171 | break; | |
2172 | } | |
2173 | } | |
2174 | ||
2175 | if (off) { | |
2176 | // add a reference to the front bit | |
2177 | // insert it before curbuf (which we'll hose) | |
2178 | //cout << "keeping front " << off << " of " << *curbuf << std::endl; | |
2179 | _buffers.insert( curbuf, ptr( *curbuf, 0, off ) ); | |
2180 | _len += off; | |
2181 | } | |
2182 | ||
2183 | while (len > 0) { | |
2184 | // partial? | |
2185 | if (off + len < (*curbuf).length()) { | |
2186 | //cout << "keeping end of " << *curbuf << ", losing first " << off+len << std::endl; | |
2187 | if (claim_by) | |
2188 | claim_by->append( *curbuf, off, len ); | |
2189 | (*curbuf).set_offset( off+len + (*curbuf).offset() ); // ignore beginning big | |
2190 | (*curbuf).set_length( (*curbuf).length() - (len+off) ); | |
2191 | _len -= off+len; | |
2192 | //cout << " now " << *curbuf << std::endl; | |
2193 | break; | |
2194 | } | |
2195 | ||
2196 | // hose though the end | |
2197 | unsigned howmuch = (*curbuf).length() - off; | |
2198 | //cout << "discarding " << howmuch << " of " << *curbuf << std::endl; | |
2199 | if (claim_by) | |
2200 | claim_by->append( *curbuf, off, howmuch ); | |
2201 | _len -= (*curbuf).length(); | |
2202 | _buffers.erase( curbuf++ ); | |
2203 | len -= howmuch; | |
2204 | off = 0; | |
2205 | } | |
2206 | ||
2207 | // splice in *replace (implement me later?) | |
2208 | ||
2209 | last_p = begin(); // just in case we were in the removed region. | |
2210 | } | |
2211 | ||
2212 | void buffer::list::write(int off, int len, std::ostream& out) const | |
2213 | { | |
2214 | list s; | |
2215 | s.substr_of(*this, off, len); | |
2216 | for (std::list<ptr>::const_iterator it = s._buffers.begin(); | |
2217 | it != s._buffers.end(); | |
2218 | ++it) | |
2219 | if (it->length()) | |
2220 | out.write(it->c_str(), it->length()); | |
2221 | /*iterator p(this, off); | |
2222 | while (len > 0 && !p.end()) { | |
2223 | int l = p.left_in_this_buf(); | |
2224 | if (l > len) | |
2225 | l = len; | |
2226 | out.write(p.c_str(), l); | |
2227 | len -= l; | |
2228 | }*/ | |
2229 | } | |
2230 | ||
2231 | void buffer::list::encode_base64(buffer::list& o) | |
2232 | { | |
2233 | bufferptr bp(length() * 4 / 3 + 3); | |
2234 | int l = ceph_armor(bp.c_str(), bp.c_str() + bp.length(), c_str(), c_str() + length()); | |
2235 | bp.set_length(l); | |
2236 | o.push_back(std::move(bp)); | |
2237 | } | |
2238 | ||
2239 | void buffer::list::decode_base64(buffer::list& e) | |
2240 | { | |
2241 | bufferptr bp(4 + ((e.length() * 3) / 4)); | |
2242 | int l = ceph_unarmor(bp.c_str(), bp.c_str() + bp.length(), e.c_str(), e.c_str() + e.length()); | |
2243 | if (l < 0) { | |
2244 | std::ostringstream oss; | |
2245 | oss << "decode_base64: decoding failed:\n"; | |
2246 | hexdump(oss); | |
2247 | throw buffer::malformed_input(oss.str().c_str()); | |
2248 | } | |
2249 | assert(l <= (int)bp.length()); | |
2250 | bp.set_length(l); | |
2251 | push_back(std::move(bp)); | |
2252 | } | |
2253 | ||
2254 | ||
2255 | ||
2256 | int buffer::list::read_file(const char *fn, std::string *error) | |
2257 | { | |
91327a77 | 2258 | int fd = TEMP_FAILURE_RETRY(::open(fn, O_RDONLY|O_CLOEXEC)); |
7c673cae FG |
2259 | if (fd < 0) { |
2260 | int err = errno; | |
2261 | std::ostringstream oss; | |
2262 | oss << "can't open " << fn << ": " << cpp_strerror(err); | |
2263 | *error = oss.str(); | |
2264 | return -err; | |
2265 | } | |
2266 | ||
2267 | struct stat st; | |
2268 | memset(&st, 0, sizeof(st)); | |
2269 | if (::fstat(fd, &st) < 0) { | |
2270 | int err = errno; | |
2271 | std::ostringstream oss; | |
2272 | oss << "bufferlist::read_file(" << fn << "): stat error: " | |
2273 | << cpp_strerror(err); | |
2274 | *error = oss.str(); | |
2275 | VOID_TEMP_FAILURE_RETRY(::close(fd)); | |
2276 | return -err; | |
2277 | } | |
2278 | ||
2279 | ssize_t ret = read_fd(fd, st.st_size); | |
2280 | if (ret < 0) { | |
2281 | std::ostringstream oss; | |
2282 | oss << "bufferlist::read_file(" << fn << "): read error:" | |
2283 | << cpp_strerror(ret); | |
2284 | *error = oss.str(); | |
2285 | VOID_TEMP_FAILURE_RETRY(::close(fd)); | |
2286 | return ret; | |
2287 | } | |
2288 | else if (ret != st.st_size) { | |
2289 | // Premature EOF. | |
2290 | // Perhaps the file changed between stat() and read()? | |
2291 | std::ostringstream oss; | |
2292 | oss << "bufferlist::read_file(" << fn << "): warning: got premature EOF."; | |
2293 | *error = oss.str(); | |
2294 | // not actually an error, but weird | |
2295 | } | |
2296 | VOID_TEMP_FAILURE_RETRY(::close(fd)); | |
2297 | return 0; | |
2298 | } | |
2299 | ||
2300 | ssize_t buffer::list::read_fd(int fd, size_t len) | |
2301 | { | |
2302 | // try zero copy first | |
2303 | if (false && read_fd_zero_copy(fd, len) == 0) { | |
2304 | // TODO fix callers to not require correct read size, which is not | |
2305 | // available for raw_pipe until we actually inspect the data | |
2306 | return 0; | |
2307 | } | |
2308 | bufferptr bp = buffer::create(len); | |
2309 | ssize_t ret = safe_read(fd, (void*)bp.c_str(), len); | |
2310 | if (ret >= 0) { | |
2311 | bp.set_length(ret); | |
2312 | append(std::move(bp)); | |
2313 | } | |
2314 | return ret; | |
2315 | } | |
2316 | ||
2317 | int buffer::list::read_fd_zero_copy(int fd, size_t len) | |
2318 | { | |
2319 | #ifdef CEPH_HAVE_SPLICE | |
2320 | try { | |
2321 | append(buffer::create_zero_copy(len, fd, NULL)); | |
2322 | } catch (buffer::error_code &e) { | |
2323 | return e.code; | |
2324 | } catch (buffer::malformed_input &e) { | |
2325 | return -EIO; | |
2326 | } | |
2327 | return 0; | |
2328 | #else | |
2329 | return -ENOTSUP; | |
2330 | #endif | |
2331 | } | |
2332 | ||
2333 | int buffer::list::write_file(const char *fn, int mode) | |
2334 | { | |
91327a77 | 2335 | int fd = TEMP_FAILURE_RETRY(::open(fn, O_WRONLY|O_CREAT|O_TRUNC|O_CLOEXEC, mode)); |
7c673cae FG |
2336 | if (fd < 0) { |
2337 | int err = errno; | |
2338 | cerr << "bufferlist::write_file(" << fn << "): failed to open file: " | |
2339 | << cpp_strerror(err) << std::endl; | |
2340 | return -err; | |
2341 | } | |
2342 | int ret = write_fd(fd); | |
2343 | if (ret) { | |
2344 | cerr << "bufferlist::write_fd(" << fn << "): write_fd error: " | |
2345 | << cpp_strerror(ret) << std::endl; | |
2346 | VOID_TEMP_FAILURE_RETRY(::close(fd)); | |
2347 | return ret; | |
2348 | } | |
2349 | if (TEMP_FAILURE_RETRY(::close(fd))) { | |
2350 | int err = errno; | |
2351 | cerr << "bufferlist::write_file(" << fn << "): close error: " | |
2352 | << cpp_strerror(err) << std::endl; | |
2353 | return -err; | |
2354 | } | |
2355 | return 0; | |
2356 | } | |
2357 | ||
2358 | static int do_writev(int fd, struct iovec *vec, uint64_t offset, unsigned veclen, unsigned bytes) | |
2359 | { | |
2360 | ssize_t r = 0; | |
2361 | while (bytes > 0) { | |
2362 | #ifdef HAVE_PWRITEV | |
2363 | r = ::pwritev(fd, vec, veclen, offset); | |
2364 | #else | |
2365 | r = ::lseek64(fd, offset, SEEK_SET); | |
2366 | if (r != offset) { | |
2367 | r = -errno; | |
2368 | return r; | |
2369 | } | |
2370 | r = ::writev(fd, vec, veclen); | |
2371 | #endif | |
2372 | if (r < 0) { | |
2373 | if (errno == EINTR) | |
2374 | continue; | |
2375 | return -errno; | |
2376 | } | |
2377 | ||
2378 | bytes -= r; | |
2379 | offset += r; | |
2380 | if (bytes == 0) break; | |
2381 | ||
2382 | while (r > 0) { | |
2383 | if (vec[0].iov_len <= (size_t)r) { | |
2384 | // drain this whole item | |
2385 | r -= vec[0].iov_len; | |
2386 | ++vec; | |
2387 | --veclen; | |
2388 | } else { | |
2389 | vec[0].iov_base = (char *)vec[0].iov_base + r; | |
2390 | vec[0].iov_len -= r; | |
2391 | break; | |
2392 | } | |
2393 | } | |
2394 | } | |
2395 | return 0; | |
2396 | } | |
2397 | ||
2398 | int buffer::list::write_fd(int fd) const | |
2399 | { | |
2400 | if (can_zero_copy()) | |
2401 | return write_fd_zero_copy(fd); | |
2402 | ||
2403 | // use writev! | |
2404 | iovec iov[IOV_MAX]; | |
2405 | int iovlen = 0; | |
2406 | ssize_t bytes = 0; | |
2407 | ||
2408 | std::list<ptr>::const_iterator p = _buffers.begin(); | |
2409 | while (p != _buffers.end()) { | |
2410 | if (p->length() > 0) { | |
2411 | iov[iovlen].iov_base = (void *)p->c_str(); | |
2412 | iov[iovlen].iov_len = p->length(); | |
2413 | bytes += p->length(); | |
2414 | iovlen++; | |
2415 | } | |
2416 | ++p; | |
2417 | ||
d2e6a577 | 2418 | if (iovlen == IOV_MAX || |
7c673cae FG |
2419 | p == _buffers.end()) { |
2420 | iovec *start = iov; | |
2421 | int num = iovlen; | |
2422 | ssize_t wrote; | |
2423 | retry: | |
2424 | wrote = ::writev(fd, start, num); | |
2425 | if (wrote < 0) { | |
2426 | int err = errno; | |
2427 | if (err == EINTR) | |
2428 | goto retry; | |
2429 | return -err; | |
2430 | } | |
2431 | if (wrote < bytes) { | |
2432 | // partial write, recover! | |
2433 | while ((size_t)wrote >= start[0].iov_len) { | |
2434 | wrote -= start[0].iov_len; | |
2435 | bytes -= start[0].iov_len; | |
2436 | start++; | |
2437 | num--; | |
2438 | } | |
2439 | if (wrote > 0) { | |
2440 | start[0].iov_len -= wrote; | |
2441 | start[0].iov_base = (char *)start[0].iov_base + wrote; | |
2442 | bytes -= wrote; | |
2443 | } | |
2444 | goto retry; | |
2445 | } | |
2446 | iovlen = 0; | |
2447 | bytes = 0; | |
2448 | } | |
2449 | } | |
2450 | return 0; | |
2451 | } | |
2452 | ||
2453 | int buffer::list::write_fd(int fd, uint64_t offset) const | |
2454 | { | |
2455 | iovec iov[IOV_MAX]; | |
2456 | ||
2457 | std::list<ptr>::const_iterator p = _buffers.begin(); | |
2458 | uint64_t left_pbrs = _buffers.size(); | |
2459 | while (left_pbrs) { | |
2460 | ssize_t bytes = 0; | |
2461 | unsigned iovlen = 0; | |
2462 | uint64_t size = MIN(left_pbrs, IOV_MAX); | |
2463 | left_pbrs -= size; | |
2464 | while (size > 0) { | |
2465 | iov[iovlen].iov_base = (void *)p->c_str(); | |
2466 | iov[iovlen].iov_len = p->length(); | |
2467 | iovlen++; | |
2468 | bytes += p->length(); | |
2469 | ++p; | |
2470 | size--; | |
2471 | } | |
2472 | ||
2473 | int r = do_writev(fd, iov, offset, iovlen, bytes); | |
2474 | if (r < 0) | |
2475 | return r; | |
2476 | offset += bytes; | |
2477 | } | |
2478 | return 0; | |
2479 | } | |
2480 | ||
2481 | int buffer::list::write_fd_zero_copy(int fd) const | |
2482 | { | |
2483 | if (!can_zero_copy()) | |
2484 | return -ENOTSUP; | |
2485 | /* pass offset to each call to avoid races updating the fd seek | |
2486 | * position, since the I/O may be non-blocking | |
2487 | */ | |
2488 | int64_t offset = ::lseek(fd, 0, SEEK_CUR); | |
2489 | int64_t *off_p = &offset; | |
2490 | if (offset < 0 && errno != ESPIPE) | |
2491 | return -errno; | |
2492 | if (errno == ESPIPE) | |
2493 | off_p = NULL; | |
2494 | for (std::list<ptr>::const_iterator it = _buffers.begin(); | |
2495 | it != _buffers.end(); ++it) { | |
2496 | int r = it->zero_copy_to_fd(fd, off_p); | |
2497 | if (r < 0) | |
2498 | return r; | |
2499 | if (off_p) | |
2500 | offset += it->length(); | |
2501 | } | |
2502 | return 0; | |
2503 | } | |
2504 | ||
2505 | __u32 buffer::list::crc32c(__u32 crc) const | |
2506 | { | |
2507 | for (std::list<ptr>::const_iterator it = _buffers.begin(); | |
2508 | it != _buffers.end(); | |
2509 | ++it) { | |
2510 | if (it->length()) { | |
2511 | raw *r = it->get_raw(); | |
2512 | pair<size_t, size_t> ofs(it->offset(), it->offset() + it->length()); | |
2513 | pair<uint32_t, uint32_t> ccrc; | |
2514 | if (r->get_crc(ofs, &ccrc)) { | |
2515 | if (ccrc.first == crc) { | |
2516 | // got it already | |
2517 | crc = ccrc.second; | |
2518 | if (buffer_track_crc) | |
31f18b77 | 2519 | buffer_cached_crc++; |
7c673cae FG |
2520 | } else { |
2521 | /* If we have cached crc32c(buf, v) for initial value v, | |
2522 | * we can convert this to a different initial value v' by: | |
2523 | * crc32c(buf, v') = crc32c(buf, v) ^ adjustment | |
2524 | * where adjustment = crc32c(0*len(buf), v ^ v') | |
2525 | * | |
2526 | * http://crcutil.googlecode.com/files/crc-doc.1.0.pdf | |
2527 | * note, u for our crc32c implementation is 0 | |
2528 | */ | |
2529 | crc = ccrc.second ^ ceph_crc32c(ccrc.first ^ crc, NULL, it->length()); | |
2530 | if (buffer_track_crc) | |
31f18b77 | 2531 | buffer_cached_crc_adjusted++; |
7c673cae FG |
2532 | } |
2533 | } else { | |
2534 | if (buffer_track_crc) | |
31f18b77 | 2535 | buffer_missed_crc++; |
7c673cae FG |
2536 | uint32_t base = crc; |
2537 | crc = ceph_crc32c(crc, (unsigned char*)it->c_str(), it->length()); | |
2538 | r->set_crc(ofs, make_pair(base, crc)); | |
2539 | } | |
2540 | } | |
2541 | } | |
2542 | return crc; | |
2543 | } | |
2544 | ||
2545 | void buffer::list::invalidate_crc() | |
2546 | { | |
2547 | for (std::list<ptr>::const_iterator p = _buffers.begin(); p != _buffers.end(); ++p) { | |
2548 | raw *r = p->get_raw(); | |
2549 | if (r) { | |
2550 | r->invalidate_crc(); | |
2551 | } | |
2552 | } | |
2553 | } | |
2554 | ||
2555 | /** | |
2556 | * Binary write all contents to a C++ stream | |
2557 | */ | |
2558 | void buffer::list::write_stream(std::ostream &out) const | |
2559 | { | |
2560 | for (std::list<ptr>::const_iterator p = _buffers.begin(); p != _buffers.end(); ++p) { | |
2561 | if (p->length() > 0) { | |
2562 | out.write(p->c_str(), p->length()); | |
2563 | } | |
2564 | } | |
2565 | } | |
2566 | ||
2567 | ||
2568 | void buffer::list::hexdump(std::ostream &out, bool trailing_newline) const | |
2569 | { | |
2570 | if (!length()) | |
2571 | return; | |
2572 | ||
2573 | std::ios_base::fmtflags original_flags = out.flags(); | |
2574 | ||
2575 | // do our best to match the output of hexdump -C, for better | |
2576 | // diff'ing! | |
2577 | ||
2578 | out.setf(std::ios::right); | |
2579 | out.fill('0'); | |
2580 | ||
2581 | unsigned per = 16; | |
2582 | bool was_zeros = false, did_star = false; | |
2583 | for (unsigned o=0; o<length(); o += per) { | |
2584 | bool row_is_zeros = false; | |
2585 | if (o + per < length()) { | |
2586 | row_is_zeros = true; | |
2587 | for (unsigned i=0; i<per && o+i<length(); i++) { | |
2588 | if ((*this)[o+i]) { | |
2589 | row_is_zeros = false; | |
2590 | } | |
2591 | } | |
2592 | if (row_is_zeros) { | |
2593 | if (was_zeros) { | |
2594 | if (!did_star) { | |
2595 | out << "\n*"; | |
2596 | did_star = true; | |
2597 | } | |
2598 | continue; | |
2599 | } | |
2600 | was_zeros = true; | |
2601 | } else { | |
2602 | was_zeros = false; | |
2603 | did_star = false; | |
2604 | } | |
2605 | } | |
2606 | if (o) | |
2607 | out << "\n"; | |
2608 | out << std::hex << std::setw(8) << o << " "; | |
2609 | ||
2610 | unsigned i; | |
2611 | for (i=0; i<per && o+i<length(); i++) { | |
2612 | if (i == 8) | |
2613 | out << ' '; | |
2614 | out << " " << std::setw(2) << ((unsigned)(*this)[o+i] & 0xff); | |
2615 | } | |
2616 | for (; i<per; i++) { | |
2617 | if (i == 8) | |
2618 | out << ' '; | |
2619 | out << " "; | |
2620 | } | |
2621 | ||
2622 | out << " |"; | |
2623 | for (i=0; i<per && o+i<length(); i++) { | |
2624 | char c = (*this)[o+i]; | |
2625 | if (isupper(c) || islower(c) || isdigit(c) || c == ' ' || ispunct(c)) | |
2626 | out << c; | |
2627 | else | |
2628 | out << '.'; | |
2629 | } | |
2630 | out << '|' << std::dec; | |
2631 | } | |
2632 | if (trailing_newline) { | |
2633 | out << "\n" << std::hex << std::setw(8) << length(); | |
2634 | out << "\n"; | |
2635 | } | |
2636 | ||
2637 | out.flags(original_flags); | |
2638 | } | |
2639 | ||
31f18b77 FG |
2640 | |
2641 | buffer::list buffer::list::static_from_mem(char* c, size_t l) { | |
2642 | list bl; | |
2643 | bl.push_back(ptr(create_static(l, c))); | |
2644 | return bl; | |
2645 | } | |
2646 | ||
2647 | buffer::list buffer::list::static_from_cstring(char* c) { | |
2648 | return static_from_mem(c, std::strlen(c)); | |
2649 | } | |
2650 | ||
2651 | buffer::list buffer::list::static_from_string(string& s) { | |
2652 | // C++14 just has string::data return a char* from a non-const | |
2653 | // string. | |
2654 | return static_from_mem(const_cast<char*>(s.data()), s.length()); | |
2655 | // But the way buffer::list mostly doesn't work in a sane way with | |
2656 | // const makes me generally sad. | |
2657 | } | |
2658 | ||
7c673cae | 2659 | std::ostream& buffer::operator<<(std::ostream& out, const buffer::raw &r) { |
31f18b77 | 2660 | return out << "buffer::raw(" << (void*)r.data << " len " << r.len << " nref " << r.nref.load() << ")"; |
7c673cae FG |
2661 | } |
2662 | ||
2663 | std::ostream& buffer::operator<<(std::ostream& out, const buffer::ptr& bp) { | |
2664 | if (bp.have_raw()) | |
2665 | out << "buffer::ptr(" << bp.offset() << "~" << bp.length() | |
2666 | << " " << (void*)bp.c_str() | |
2667 | << " in raw " << (void*)bp.raw_c_str() | |
2668 | << " len " << bp.raw_length() | |
2669 | << " nref " << bp.raw_nref() << ")"; | |
2670 | else | |
2671 | out << "buffer:ptr(" << bp.offset() << "~" << bp.length() << " no raw)"; | |
2672 | return out; | |
2673 | } | |
2674 | ||
2675 | std::ostream& buffer::operator<<(std::ostream& out, const buffer::list& bl) { | |
2676 | out << "buffer::list(len=" << bl.length() << "," << std::endl; | |
2677 | ||
2678 | std::list<buffer::ptr>::const_iterator it = bl.buffers().begin(); | |
2679 | while (it != bl.buffers().end()) { | |
2680 | out << "\t" << *it; | |
2681 | if (++it == bl.buffers().end()) break; | |
2682 | out << "," << std::endl; | |
2683 | } | |
2684 | out << std::endl << ")"; | |
2685 | return out; | |
2686 | } | |
2687 | ||
2688 | std::ostream& buffer::operator<<(std::ostream& out, const buffer::error& e) | |
2689 | { | |
2690 | return out << e.what(); | |
2691 | } | |
2692 | ||
2693 | MEMPOOL_DEFINE_OBJECT_FACTORY(buffer::raw_malloc, buffer_raw_malloc, | |
2694 | buffer_meta); | |
2695 | MEMPOOL_DEFINE_OBJECT_FACTORY(buffer::raw_mmap_pages, buffer_raw_mmap_pagse, | |
2696 | buffer_meta); | |
2697 | MEMPOOL_DEFINE_OBJECT_FACTORY(buffer::raw_posix_aligned, | |
2698 | buffer_raw_posix_aligned, buffer_meta); | |
2699 | #ifdef CEPH_HAVE_SPLICE | |
2700 | MEMPOOL_DEFINE_OBJECT_FACTORY(buffer::raw_pipe, buffer_raw_pipe, buffer_meta); | |
2701 | #endif | |
2702 | MEMPOOL_DEFINE_OBJECT_FACTORY(buffer::raw_char, buffer_raw_char, buffer_meta); | |
31f18b77 FG |
2703 | MEMPOOL_DEFINE_OBJECT_FACTORY(buffer::raw_claimed_char, buffer_raw_claimed_char, |
2704 | buffer_meta); | |
7c673cae FG |
2705 | MEMPOOL_DEFINE_OBJECT_FACTORY(buffer::raw_unshareable, buffer_raw_unshareable, |
2706 | buffer_meta); | |
2707 | MEMPOOL_DEFINE_OBJECT_FACTORY(buffer::raw_static, buffer_raw_static, | |
2708 | buffer_meta); | |
2709 |