]> git.proxmox.com Git - ceph.git/blob - ceph/src/tools/rbd/action/Bench.cc
bump version to 18.2.4-pve3
[ceph.git] / ceph / src / tools / rbd / action / Bench.cc
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3
4 #include "tools/rbd/ArgumentTypes.h"
5 #include "tools/rbd/Shell.h"
6 #include "tools/rbd/Utils.h"
7 #include "common/errno.h"
8 #include "common/strtol.h"
9 #include "common/ceph_mutex.h"
10 #include "include/types.h"
11 #include "global/signal_handler.h"
12 #include <atomic>
13 #include <chrono>
14 #include <iostream>
15 #include <boost/accumulators/accumulators.hpp>
16 #include <boost/accumulators/statistics/stats.hpp>
17 #include <boost/accumulators/statistics/rolling_sum.hpp>
18 #include <boost/program_options.hpp>
19
20 using namespace std::chrono;
21
22 static std::atomic<bool> terminating;
23 static void handle_signal(int signum)
24 {
25 ceph_assert(signum == SIGINT || signum == SIGTERM);
26 terminating = true;
27 }
28
29 namespace rbd {
30 namespace action {
31 namespace bench {
32
33 namespace at = argument_types;
34 namespace po = boost::program_options;
35
36 namespace {
37
38 enum io_type_t {
39 IO_TYPE_READ = 0,
40 IO_TYPE_WRITE,
41 IO_TYPE_RW,
42
43 IO_TYPE_NUM,
44 };
45
46 enum io_pattern_t {
47 IO_PATTERN_RAND,
48 IO_PATTERN_SEQ,
49 IO_PATTERN_FULL_SEQ
50 };
51
52 struct IOType {};
53 struct Size {};
54 struct IOPattern {};
55
56 void validate(boost::any& v, const std::vector<std::string>& values,
57 Size *target_type, int) {
58 po::validators::check_first_occurrence(v);
59 const std::string &s = po::validators::get_single_string(values);
60
61 std::string parse_error;
62 uint64_t size = strict_iecstrtoll(s, &parse_error);
63 if (!parse_error.empty()) {
64 throw po::validation_error(po::validation_error::invalid_option_value);
65 }
66 v = boost::any(size);
67 }
68
69 void validate(boost::any& v, const std::vector<std::string>& values,
70 IOPattern *target_type, int) {
71 po::validators::check_first_occurrence(v);
72 const std::string &s = po::validators::get_single_string(values);
73 if (s == "rand") {
74 v = IO_PATTERN_RAND;
75 } else if (s == "seq") {
76 v = IO_PATTERN_SEQ;
77 } else if (s == "full-seq") {
78 v = IO_PATTERN_FULL_SEQ;
79 } else {
80 throw po::validation_error(po::validation_error::invalid_option_value);
81 }
82 }
83
84 io_type_t get_io_type(std::string io_type_string) {
85 if (io_type_string == "read")
86 return IO_TYPE_READ;
87 else if (io_type_string == "write")
88 return IO_TYPE_WRITE;
89 else if (io_type_string == "readwrite" || io_type_string == "rw")
90 return IO_TYPE_RW;
91 else
92 return IO_TYPE_NUM;
93 }
94
95 void validate(boost::any& v, const std::vector<std::string>& values,
96 IOType *target_type, int) {
97 po::validators::check_first_occurrence(v);
98 const std::string &s = po::validators::get_single_string(values);
99 io_type_t io_type = get_io_type(s);
100 if (io_type >= IO_TYPE_NUM)
101 throw po::validation_error(po::validation_error::invalid_option_value);
102 else
103 v = boost::any(io_type);
104 }
105
106 } // anonymous namespace
107
108 static void rbd_bencher_completion(void *c, void *pc);
109 struct rbd_bencher;
110
111 struct bencher_completer {
112 rbd_bencher *bencher;
113 bufferlist *bl;
114
115 public:
116 bencher_completer(rbd_bencher *bencher, bufferlist *bl)
117 : bencher(bencher), bl(bl)
118 { }
119
120 ~bencher_completer()
121 {
122 if (bl)
123 delete bl;
124 }
125 };
126
127 struct rbd_bencher {
128 librbd::Image *image;
129 ceph::mutex lock = ceph::make_mutex("rbd_bencher::lock");
130 ceph::condition_variable cond;
131 int in_flight;
132 io_type_t io_type;
133 uint64_t io_size;
134 bufferlist write_bl;
135
136 explicit rbd_bencher(librbd::Image *i, io_type_t io_type, uint64_t io_size)
137 : image(i),
138 in_flight(0),
139 io_type(io_type),
140 io_size(io_size)
141 {
142 if (io_type == IO_TYPE_WRITE || io_type == IO_TYPE_RW) {
143 bufferptr bp(io_size);
144 memset(bp.c_str(), rand() & 0xff, io_size);
145 write_bl.push_back(bp);
146 }
147 }
148
149 void start_io(int max, uint64_t off, uint64_t len, int op_flags, bool read_flag)
150 {
151 {
152 std::lock_guard l{lock};
153 in_flight++;
154 }
155
156 librbd::RBD::AioCompletion *c;
157 if (read_flag) {
158 bufferlist *read_bl = new bufferlist();
159 c = new librbd::RBD::AioCompletion((void *)(new bencher_completer(this, read_bl)),
160 rbd_bencher_completion);
161 image->aio_read2(off, len, *read_bl, c, op_flags);
162 } else {
163 c = new librbd::RBD::AioCompletion((void *)(new bencher_completer(this, NULL)),
164 rbd_bencher_completion);
165 image->aio_write2(off, len, write_bl, c, op_flags);
166 }
167 }
168
169 int wait_for(int max, bool interrupt_on_terminating) {
170 std::unique_lock l{lock};
171 while (in_flight > max && !(terminating && interrupt_on_terminating)) {
172 cond.wait_for(l, 200ms);
173 }
174
175 return terminating ? -EINTR : 0;
176 }
177
178 };
179
180 void rbd_bencher_completion(void *vc, void *pc)
181 {
182 librbd::RBD::AioCompletion *c = (librbd::RBD::AioCompletion *)vc;
183 bencher_completer *bc = static_cast<bencher_completer *>(pc);
184 rbd_bencher *b = bc->bencher;
185 //cout << "complete " << c << std::endl;
186 int ret = c->get_return_value();
187 if (b->io_type == IO_TYPE_WRITE && ret != 0) {
188 std::cout << "write error: " << cpp_strerror(ret) << std::endl;
189 exit(ret < 0 ? -ret : ret);
190 } else if (b->io_type == IO_TYPE_READ && (unsigned int)ret != b->io_size) {
191 std::cout << "read error: " << cpp_strerror(ret) << std::endl;
192 exit(ret < 0 ? -ret : ret);
193 }
194 b->lock.lock();
195 b->in_flight--;
196 b->cond.notify_all();
197 b->lock.unlock();
198 c->release();
199 delete bc;
200 }
201
202 bool should_read(uint64_t read_proportion)
203 {
204 uint64_t rand_num = rand() % 100;
205
206 if (rand_num < read_proportion)
207 return true;
208 else
209 return false;
210 }
211
212 int do_bench(librbd::Image& image, io_type_t io_type,
213 uint64_t io_size, uint64_t io_threads,
214 uint64_t io_bytes, io_pattern_t io_pattern,
215 uint64_t read_proportion)
216 {
217 uint64_t size = 0;
218 image.size(&size);
219 if (io_size > size) {
220 std::cerr << "rbd: io-size " << byte_u_t(io_size) << " "
221 << "larger than image size " << byte_u_t(size) << std::endl;
222 return -EINVAL;
223 }
224
225 if (io_size > std::numeric_limits<uint32_t>::max()) {
226 std::cerr << "rbd: io-size should be less than 4G" << std::endl;
227 return -EINVAL;
228 }
229
230 int r = image.flush();
231 if (r < 0 && (r != -EROFS || io_type != IO_TYPE_READ)) {
232 std::cerr << "rbd: failed to flush: " << cpp_strerror(r) << std::endl;
233 return r;
234 }
235
236 rbd_bencher b(&image, io_type, io_size);
237
238 std::cout << "bench "
239 << " type " << (io_type == IO_TYPE_READ ? "read" :
240 io_type == IO_TYPE_WRITE ? "write" : "readwrite")
241 << (io_type == IO_TYPE_RW ? " read:write=" +
242 std::to_string(read_proportion) + ":" +
243 std::to_string(100 - read_proportion) : "")
244 << " io_size " << io_size
245 << " io_threads " << io_threads
246 << " bytes " << io_bytes
247 << " pattern ";
248 switch (io_pattern) {
249 case IO_PATTERN_RAND:
250 std::cout << "random";
251 break;
252 case IO_PATTERN_SEQ:
253 std::cout << "sequential";
254 break;
255 case IO_PATTERN_FULL_SEQ:
256 std::cout << "full sequential";
257 break;
258 default:
259 ceph_assert(false);
260 break;
261 }
262 std::cout << std::endl;
263
264 srand(time(NULL) % (unsigned long) -1);
265
266 coarse_mono_time start = coarse_mono_clock::now();
267 std::chrono::duration<double> last = std::chrono::duration<double>::zero();
268 uint64_t ios = 0;
269
270 std::vector<uint64_t> thread_offset;
271 uint64_t i;
272 uint64_t seq_chunk_length = (size / io_size / io_threads) * io_size;;
273
274 // disturb all thread's offset
275 for (i = 0; i < io_threads; i++) {
276 uint64_t start_pos = 0;
277 switch (io_pattern) {
278 case IO_PATTERN_RAND:
279 start_pos = (rand() % (size / io_size)) * io_size;
280 break;
281 case IO_PATTERN_SEQ:
282 start_pos = seq_chunk_length * i;
283 break;
284 case IO_PATTERN_FULL_SEQ:
285 start_pos = i * io_size;
286 break;
287 default:
288 break;
289 }
290 thread_offset.push_back(start_pos);
291 }
292
293 const int WINDOW_SIZE = 5;
294 typedef boost::accumulators::accumulator_set<
295 double, boost::accumulators::stats<
296 boost::accumulators::tag::rolling_sum> > RollingSum;
297
298 RollingSum time_acc(
299 boost::accumulators::tag::rolling_window::window_size = WINDOW_SIZE);
300 RollingSum ios_acc(
301 boost::accumulators::tag::rolling_window::window_size = WINDOW_SIZE);
302 RollingSum off_acc(
303 boost::accumulators::tag::rolling_window::window_size = WINDOW_SIZE);
304 uint64_t cur_ios = 0;
305 uint64_t cur_off = 0;
306
307 int op_flags;
308 if (io_pattern == IO_PATTERN_RAND) {
309 op_flags = LIBRADOS_OP_FLAG_FADVISE_RANDOM;
310 } else {
311 op_flags = LIBRADOS_OP_FLAG_FADVISE_SEQUENTIAL;
312 }
313
314 printf(" SEC OPS OPS/SEC BYTES/SEC\n");
315 uint64_t off;
316 int read_ops = 0;
317 int write_ops = 0;
318
319 for (off = 0; off < io_bytes; ) {
320 // Issue I/O
321 i = 0;
322 int r = 0;
323 while (i < io_threads && off < io_bytes) {
324 bool read_flag = should_read(read_proportion);
325
326 r = b.wait_for(io_threads - 1, true);
327 if (r < 0) {
328 break;
329 }
330 b.start_io(io_threads, thread_offset[i], io_size, op_flags, read_flag);
331
332 ++i;
333 ++ios;
334 off += io_size;
335
336 ++cur_ios;
337 cur_off += io_size;
338
339 if (read_flag)
340 read_ops++;
341 else
342 write_ops++;
343 }
344
345 if (r < 0) {
346 break;
347 }
348
349 // Set the thread_offsets of next I/O
350 for (i = 0; i < io_threads; ++i) {
351 switch (io_pattern) {
352 case IO_PATTERN_RAND:
353 thread_offset[i] = (rand() % (size / io_size)) * io_size;
354 continue;
355 case IO_PATTERN_SEQ:
356 if (off < (seq_chunk_length * io_threads)) {
357 thread_offset[i] += io_size;
358 } else {
359 // thread_offset is adjusted to the chunks unassigned to threads.
360 thread_offset[i] = off + (i * io_size);
361 }
362 if (thread_offset[i] + io_size > size) {
363 thread_offset[i] = seq_chunk_length * i;
364 }
365 break;
366 case IO_PATTERN_FULL_SEQ:
367 thread_offset[i] += (io_size * io_threads);
368 if (thread_offset[i] >= size) {
369 thread_offset[i] = i * io_size;
370 }
371 break;
372 default:
373 break;
374 }
375 }
376
377 coarse_mono_time now = coarse_mono_clock::now();
378 std::chrono::duration<double> elapsed = now - start;
379 if (last == std::chrono::duration<double>::zero()) {
380 last = elapsed;
381 } else if ((int)elapsed.count() != (int)last.count()) {
382 time_acc((elapsed - last).count());
383 ios_acc(static_cast<double>(cur_ios));
384 off_acc(static_cast<double>(cur_off));
385 cur_ios = 0;
386 cur_off = 0;
387
388 double time_sum = boost::accumulators::rolling_sum(time_acc);
389 std::cout.width(5);
390 std::cout << (int)elapsed.count();
391 std::cout.width(10);
392 std::cout << ios - io_threads;
393 std::cout.width(10);
394 std::cout << boost::accumulators::rolling_sum(ios_acc) / time_sum;
395 std::cout.width(10);
396 std::cout << byte_u_t(boost::accumulators::rolling_sum(off_acc) / time_sum) << "/s"
397 << std::endl;
398 last = elapsed;
399 }
400 }
401 b.wait_for(0, false);
402
403 if (io_type != IO_TYPE_READ) {
404 r = image.flush();
405 if (r < 0) {
406 std::cerr << "rbd: failed to flush at the end: " << cpp_strerror(r)
407 << std::endl;
408 }
409 }
410
411 coarse_mono_time now = coarse_mono_clock::now();
412 std::chrono::duration<double> elapsed = now - start;
413
414 std::cout << "elapsed: " << (int)elapsed.count() << " "
415 << "ops: " << ios << " "
416 << "ops/sec: " << (double)ios / elapsed.count() << " "
417 << "bytes/sec: " << byte_u_t((double)off / elapsed.count()) << "/s"
418 << std::endl;
419
420 if (io_type == IO_TYPE_RW) {
421 std::cout << "read_ops: " << read_ops << " "
422 << "read_ops/sec: " << (double)read_ops / elapsed.count() << " "
423 << "read_bytes/sec: " << byte_u_t((double)read_ops * io_size / elapsed.count()) << "/s"
424 << std::endl;
425
426 std::cout << "write_ops: " << write_ops << " "
427 << "write_ops/sec: " << (double)write_ops / elapsed.count() << " "
428 << "write_bytes/sec: " << byte_u_t((double)write_ops * io_size / elapsed.count()) << "/s"
429 << std::endl;
430
431 }
432
433 return 0;
434 }
435
436 void add_bench_common_options(po::options_description *positional,
437 po::options_description *options) {
438 at::add_image_spec_options(positional, options, at::ARGUMENT_MODIFIER_NONE);
439
440 options->add_options()
441 ("io-size", po::value<Size>(), "IO size (in B/K/M/G) (< 4G) [default: 4K]")
442 ("io-threads", po::value<uint32_t>(), "ios in flight [default: 16]")
443 ("io-total", po::value<Size>(), "total size for IO (in B/K/M/G/T) [default: 1G]")
444 ("io-pattern", po::value<IOPattern>(), "IO pattern (rand, seq, or full-seq) [default: seq]")
445 ("rw-mix-read", po::value<uint64_t>(), "read proportion in readwrite (<= 100) [default: 50]");
446 }
447
448 void get_arguments_for_write(po::options_description *positional,
449 po::options_description *options) {
450 add_bench_common_options(positional, options);
451 }
452
453 void get_arguments_for_bench(po::options_description *positional,
454 po::options_description *options) {
455 add_bench_common_options(positional, options);
456
457 options->add_options()
458 ("io-type", po::value<IOType>()->required(), "IO type (read, write, or readwrite(rw))");
459 }
460
461 int bench_execute(const po::variables_map &vm, io_type_t bench_io_type) {
462 size_t arg_index = 0;
463 std::string pool_name;
464 std::string namespace_name;
465 std::string image_name;
466 std::string snap_name;
467 utils::SnapshotPresence snap_presence = utils::SNAPSHOT_PRESENCE_NONE;
468 if (bench_io_type == IO_TYPE_READ)
469 snap_presence = utils::SNAPSHOT_PRESENCE_PERMITTED;
470
471 int r = utils::get_pool_image_snapshot_names(
472 vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, &pool_name, &namespace_name,
473 &image_name, &snap_name, true, snap_presence, utils::SPEC_VALIDATION_NONE);
474 if (r < 0) {
475 return r;
476 }
477
478 uint64_t bench_io_size;
479 if (vm.count("io-size")) {
480 bench_io_size = vm["io-size"].as<uint64_t>();
481 } else {
482 bench_io_size = 4096;
483 }
484 if (bench_io_size == 0) {
485 std::cerr << "rbd: --io-size should be greater than zero." << std::endl;
486 return -EINVAL;
487 }
488
489 uint32_t bench_io_threads;
490 if (vm.count("io-threads")) {
491 bench_io_threads = vm["io-threads"].as<uint32_t>();
492 } else {
493 bench_io_threads = 16;
494 }
495 if (bench_io_threads == 0) {
496 std::cerr << "rbd: --io-threads should be greater than zero." << std::endl;
497 return -EINVAL;
498 }
499
500 uint64_t bench_bytes;
501 if (vm.count("io-total")) {
502 bench_bytes = vm["io-total"].as<uint64_t>();
503 } else {
504 bench_bytes = 1 << 30;
505 }
506
507 io_pattern_t bench_pattern;
508 if (vm.count("io-pattern")) {
509 bench_pattern = vm["io-pattern"].as<io_pattern_t>();
510 } else {
511 bench_pattern = IO_PATTERN_SEQ;
512 }
513
514 uint64_t bench_read_proportion;
515 if (bench_io_type == IO_TYPE_READ) {
516 bench_read_proportion = 100;
517 } else if (bench_io_type == IO_TYPE_WRITE) {
518 bench_read_proportion = 0;
519 } else {
520 if (vm.count("rw-mix-read")) {
521 bench_read_proportion = vm["rw-mix-read"].as<uint64_t>();
522 } else {
523 bench_read_proportion = 50;
524 }
525
526 if (bench_read_proportion > 100) {
527 std::cerr << "rbd: --rw-mix-read should not be larger than 100." << std::endl;
528 return -EINVAL;
529 }
530 }
531
532 librados::Rados rados;
533 librados::IoCtx io_ctx;
534 librbd::Image image;
535 r = utils::init_and_open_image(pool_name, namespace_name, image_name, "",
536 snap_name, false, &rados, &io_ctx, &image);
537 if (r < 0) {
538 return r;
539 }
540
541 init_async_signal_handler();
542 register_async_signal_handler(SIGHUP, sighup_handler);
543 register_async_signal_handler_oneshot(SIGINT, handle_signal);
544 register_async_signal_handler_oneshot(SIGTERM, handle_signal);
545
546 r = do_bench(image, bench_io_type, bench_io_size, bench_io_threads,
547 bench_bytes, bench_pattern, bench_read_proportion);
548
549 unregister_async_signal_handler(SIGHUP, sighup_handler);
550 unregister_async_signal_handler(SIGINT, handle_signal);
551 unregister_async_signal_handler(SIGTERM, handle_signal);
552 shutdown_async_signal_handler();
553
554 if (r < 0) {
555 std::cerr << "bench failed: " << cpp_strerror(r) << std::endl;
556 return r;
557 }
558 return 0;
559 }
560
561 int execute_for_write(const po::variables_map &vm,
562 const std::vector<std::string> &ceph_global_init_args) {
563 std::cerr << "rbd: bench-write is deprecated, use rbd bench --io-type write ..." << std::endl;
564 return bench_execute(vm, IO_TYPE_WRITE);
565 }
566
567 int execute_for_bench(const po::variables_map &vm,
568 const std::vector<std::string> &ceph_global_init_args) {
569 io_type_t bench_io_type;
570 if (vm.count("io-type")) {
571 bench_io_type = vm["io-type"].as<io_type_t>();
572 } else {
573 std::cerr << "rbd: --io-type must be specified." << std::endl;
574 return -EINVAL;
575 }
576
577 return bench_execute(vm, bench_io_type);
578 }
579
580 Shell::Action action_write(
581 {"bench-write"}, {}, "Simple write benchmark. (Deprecated, please use `rbd bench --io-type write` instead.)",
582 "", &get_arguments_for_write, &execute_for_write, false);
583
584 Shell::Action action_bench(
585 {"bench"}, {}, "Simple benchmark.", "", &get_arguments_for_bench, &execute_for_bench);
586
587 } // namespace bench
588 } // namespace action
589 } // namespace rbd