]> git.proxmox.com Git - ceph.git/blob - ceph/src/tools/rbd/action/Bench.cc
import 15.2.0 Octopus source
[ceph.git] / ceph / src / tools / rbd / action / Bench.cc
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3
4 #include "tools/rbd/ArgumentTypes.h"
5 #include "tools/rbd/Shell.h"
6 #include "tools/rbd/Utils.h"
7 #include "common/errno.h"
8 #include "common/strtol.h"
9 #include "common/ceph_mutex.h"
10 #include "include/types.h"
11 #include "global/signal_handler.h"
12 #include <iostream>
13 #include <boost/accumulators/accumulators.hpp>
14 #include <boost/accumulators/statistics/stats.hpp>
15 #include <boost/accumulators/statistics/rolling_sum.hpp>
16 #include <boost/program_options.hpp>
17
18 using namespace std::chrono;
19
20 static std::atomic<bool> terminating;
21 static void handle_signal(int signum)
22 {
23 ceph_assert(signum == SIGINT || signum == SIGTERM);
24 terminating = true;
25 }
26
27 namespace rbd {
28 namespace action {
29 namespace bench {
30
31 namespace at = argument_types;
32 namespace po = boost::program_options;
33
34 namespace {
35
36 enum io_type_t {
37 IO_TYPE_READ = 0,
38 IO_TYPE_WRITE,
39 IO_TYPE_RW,
40
41 IO_TYPE_NUM,
42 };
43
44 enum io_pattern_t {
45 IO_PATTERN_RAND,
46 IO_PATTERN_SEQ,
47 IO_PATTERN_FULL_SEQ
48 };
49
50 struct IOType {};
51 struct Size {};
52 struct IOPattern {};
53
54 void validate(boost::any& v, const std::vector<std::string>& values,
55 Size *target_type, int) {
56 po::validators::check_first_occurrence(v);
57 const std::string &s = po::validators::get_single_string(values);
58
59 std::string parse_error;
60 uint64_t size = strict_iecstrtoll(s.c_str(), &parse_error);
61 if (!parse_error.empty()) {
62 throw po::validation_error(po::validation_error::invalid_option_value);
63 }
64 v = boost::any(size);
65 }
66
67 void validate(boost::any& v, const std::vector<std::string>& values,
68 IOPattern *target_type, int) {
69 po::validators::check_first_occurrence(v);
70 const std::string &s = po::validators::get_single_string(values);
71 if (s == "rand") {
72 v = IO_PATTERN_RAND;
73 } else if (s == "seq") {
74 v = IO_PATTERN_SEQ;
75 } else if (s == "full-seq") {
76 v = IO_PATTERN_FULL_SEQ;
77 } else {
78 throw po::validation_error(po::validation_error::invalid_option_value);
79 }
80 }
81
82 io_type_t get_io_type(std::string io_type_string) {
83 if (io_type_string == "read")
84 return IO_TYPE_READ;
85 else if (io_type_string == "write")
86 return IO_TYPE_WRITE;
87 else if (io_type_string == "readwrite" || io_type_string == "rw")
88 return IO_TYPE_RW;
89 else
90 return IO_TYPE_NUM;
91 }
92
93 void validate(boost::any& v, const std::vector<std::string>& values,
94 IOType *target_type, int) {
95 po::validators::check_first_occurrence(v);
96 const std::string &s = po::validators::get_single_string(values);
97 io_type_t io_type = get_io_type(s);
98 if (io_type >= IO_TYPE_NUM)
99 throw po::validation_error(po::validation_error::invalid_option_value);
100 else
101 v = boost::any(io_type);
102 }
103
104 } // anonymous namespace
105
106 static void rbd_bencher_completion(void *c, void *pc);
107 struct rbd_bencher;
108
109 struct bencher_completer {
110 rbd_bencher *bencher;
111 bufferlist *bl;
112
113 public:
114 bencher_completer(rbd_bencher *bencher, bufferlist *bl)
115 : bencher(bencher), bl(bl)
116 { }
117
118 ~bencher_completer()
119 {
120 if (bl)
121 delete bl;
122 }
123 };
124
125 struct rbd_bencher {
126 librbd::Image *image;
127 ceph::mutex lock = ceph::make_mutex("rbd_bencher::lock");
128 ceph::condition_variable cond;
129 int in_flight;
130 io_type_t io_type;
131 uint64_t io_size;
132 bufferlist write_bl;
133
134 explicit rbd_bencher(librbd::Image *i, io_type_t io_type, uint64_t io_size)
135 : image(i),
136 in_flight(0),
137 io_type(io_type),
138 io_size(io_size)
139 {
140 if (io_type == IO_TYPE_WRITE || io_type == IO_TYPE_RW) {
141 bufferptr bp(io_size);
142 memset(bp.c_str(), rand() & 0xff, io_size);
143 write_bl.push_back(bp);
144 }
145 }
146
147 void start_io(int max, uint64_t off, uint64_t len, int op_flags, bool read_flag)
148 {
149 {
150 std::lock_guard l{lock};
151 in_flight++;
152 }
153
154 librbd::RBD::AioCompletion *c;
155 if (read_flag) {
156 bufferlist *read_bl = new bufferlist();
157 c = new librbd::RBD::AioCompletion((void *)(new bencher_completer(this, read_bl)),
158 rbd_bencher_completion);
159 image->aio_read2(off, len, *read_bl, c, op_flags);
160 } else {
161 c = new librbd::RBD::AioCompletion((void *)(new bencher_completer(this, NULL)),
162 rbd_bencher_completion);
163 image->aio_write2(off, len, write_bl, c, op_flags);
164 }
165 }
166
167 int wait_for(int max, bool interrupt_on_terminating) {
168 std::unique_lock l{lock};
169 while (in_flight > max && !(terminating && interrupt_on_terminating)) {
170 cond.wait_for(l, 200ms);
171 }
172
173 return terminating ? -EINTR : 0;
174 }
175
176 };
177
178 void rbd_bencher_completion(void *vc, void *pc)
179 {
180 librbd::RBD::AioCompletion *c = (librbd::RBD::AioCompletion *)vc;
181 bencher_completer *bc = static_cast<bencher_completer *>(pc);
182 rbd_bencher *b = bc->bencher;
183 //cout << "complete " << c << std::endl;
184 int ret = c->get_return_value();
185 if (b->io_type == IO_TYPE_WRITE && ret != 0) {
186 std::cout << "write error: " << cpp_strerror(ret) << std::endl;
187 exit(ret < 0 ? -ret : ret);
188 } else if (b->io_type == IO_TYPE_READ && (unsigned int)ret != b->io_size) {
189 cout << "read error: " << cpp_strerror(ret) << std::endl;
190 exit(ret < 0 ? -ret : ret);
191 }
192 b->lock.lock();
193 b->in_flight--;
194 b->cond.notify_all();
195 b->lock.unlock();
196 c->release();
197 delete bc;
198 }
199
200 bool should_read(uint64_t read_proportion)
201 {
202 uint64_t rand_num = rand() % 100;
203
204 if (rand_num < read_proportion)
205 return true;
206 else
207 return false;
208 }
209
210 int do_bench(librbd::Image& image, io_type_t io_type,
211 uint64_t io_size, uint64_t io_threads,
212 uint64_t io_bytes, io_pattern_t io_pattern,
213 uint64_t read_proportion)
214 {
215 uint64_t size = 0;
216 image.size(&size);
217 if (io_size > size) {
218 std::cerr << "rbd: io-size " << byte_u_t(io_size) << " "
219 << "larger than image size " << byte_u_t(size) << std::endl;
220 return -EINVAL;
221 }
222
223 if (io_size > std::numeric_limits<uint32_t>::max()) {
224 std::cerr << "rbd: io-size should be less than 4G" << std::endl;
225 return -EINVAL;
226 }
227
228 int r = image.flush();
229 if (r < 0 && (r != -EROFS || io_type != IO_TYPE_READ)) {
230 std::cerr << "rbd: failed to flush: " << cpp_strerror(r) << std::endl;
231 return r;
232 }
233
234 rbd_bencher b(&image, io_type, io_size);
235
236 std::cout << "bench "
237 << " type " << (io_type == IO_TYPE_READ ? "read" :
238 io_type == IO_TYPE_WRITE ? "write" : "readwrite")
239 << (io_type == IO_TYPE_RW ? " read:write=" +
240 to_string(read_proportion) + ":" + to_string(100 - read_proportion) : "")
241 << " io_size " << io_size
242 << " io_threads " << io_threads
243 << " bytes " << io_bytes
244 << " pattern ";
245 switch (io_pattern) {
246 case IO_PATTERN_RAND:
247 std::cout << "random";
248 break;
249 case IO_PATTERN_SEQ:
250 std::cout << "sequential";
251 break;
252 case IO_PATTERN_FULL_SEQ:
253 std::cout << "full sequential";
254 break;
255 default:
256 ceph_assert(false);
257 break;
258 }
259 std::cout << std::endl;
260
261 srand(time(NULL) % (unsigned long) -1);
262
263 coarse_mono_time start = coarse_mono_clock::now();
264 chrono::duration<double> last = chrono::duration<double>::zero();
265 unsigned ios = 0;
266
267 vector<uint64_t> thread_offset;
268 uint64_t i;
269 uint64_t seq_chunk_length = (size / io_size / io_threads) * io_size;;
270
271 // disturb all thread's offset
272 for (i = 0; i < io_threads; i++) {
273 uint64_t start_pos = 0;
274 switch (io_pattern) {
275 case IO_PATTERN_RAND:
276 start_pos = (rand() % (size / io_size)) * io_size;
277 break;
278 case IO_PATTERN_SEQ:
279 start_pos = seq_chunk_length * i;
280 break;
281 case IO_PATTERN_FULL_SEQ:
282 start_pos = i * io_size;
283 break;
284 default:
285 break;
286 }
287 thread_offset.push_back(start_pos);
288 }
289
290 const int WINDOW_SIZE = 5;
291 typedef boost::accumulators::accumulator_set<
292 double, boost::accumulators::stats<
293 boost::accumulators::tag::rolling_sum> > RollingSum;
294
295 RollingSum time_acc(
296 boost::accumulators::tag::rolling_window::window_size = WINDOW_SIZE);
297 RollingSum ios_acc(
298 boost::accumulators::tag::rolling_window::window_size = WINDOW_SIZE);
299 RollingSum off_acc(
300 boost::accumulators::tag::rolling_window::window_size = WINDOW_SIZE);
301 uint64_t cur_ios = 0;
302 uint64_t cur_off = 0;
303
304 int op_flags;
305 if (io_pattern == IO_PATTERN_RAND) {
306 op_flags = LIBRADOS_OP_FLAG_FADVISE_RANDOM;
307 } else {
308 op_flags = LIBRADOS_OP_FLAG_FADVISE_SEQUENTIAL;
309 }
310
311 printf(" SEC OPS OPS/SEC BYTES/SEC\n");
312 uint64_t off;
313 int read_ops = 0;
314 int write_ops = 0;
315
316 for (off = 0; off < io_bytes; ) {
317 // Issue I/O
318 i = 0;
319 int r = 0;
320 while (i < io_threads && off < io_bytes) {
321 bool read_flag = should_read(read_proportion);
322
323 r = b.wait_for(io_threads - 1, true);
324 if (r < 0) {
325 break;
326 }
327 b.start_io(io_threads, thread_offset[i], io_size, op_flags, read_flag);
328
329 ++i;
330 ++ios;
331 off += io_size;
332
333 ++cur_ios;
334 cur_off += io_size;
335
336 if (read_flag)
337 read_ops++;
338 else
339 write_ops++;
340 }
341
342 if (r < 0) {
343 break;
344 }
345
346 // Set the thread_offsets of next I/O
347 for (i = 0; i < io_threads; ++i) {
348 switch (io_pattern) {
349 case IO_PATTERN_RAND:
350 thread_offset[i] = (rand() % (size / io_size)) * io_size;
351 continue;
352 case IO_PATTERN_SEQ:
353 if (off < (seq_chunk_length * io_threads)) {
354 thread_offset[i] += io_size;
355 } else {
356 // thread_offset is adjusted to the chunks unassigned to threads.
357 thread_offset[i] = off + (i * io_size);
358 }
359 if (thread_offset[i] + io_size > size) {
360 thread_offset[i] = seq_chunk_length * i;
361 }
362 break;
363 case IO_PATTERN_FULL_SEQ:
364 thread_offset[i] += (io_size * io_threads);
365 if (thread_offset[i] >= size) {
366 thread_offset[i] = i * io_size;
367 }
368 break;
369 default:
370 break;
371 }
372 }
373
374 coarse_mono_time now = coarse_mono_clock::now();
375 chrono::duration<double> elapsed = now - start;
376 if (last == chrono::duration<double>::zero()) {
377 last = elapsed;
378 } else if ((int)elapsed.count() != (int)last.count()) {
379 time_acc((elapsed - last).count());
380 ios_acc(static_cast<double>(cur_ios));
381 off_acc(static_cast<double>(cur_off));
382 cur_ios = 0;
383 cur_off = 0;
384
385 double time_sum = boost::accumulators::rolling_sum(time_acc);
386 std::cout.width(5);
387 std::cout << (int)elapsed.count();
388 std::cout.width(10);
389 std::cout << (int)(ios - io_threads);
390 std::cout.width(10);
391 std::cout << boost::accumulators::rolling_sum(ios_acc) / time_sum;
392 std::cout.width(10);
393 std::cout << byte_u_t(boost::accumulators::rolling_sum(off_acc) / time_sum) << "/s"
394 << std::endl;
395 last = elapsed;
396 }
397 }
398 b.wait_for(0, false);
399
400 if (io_type != IO_TYPE_READ) {
401 r = image.flush();
402 if (r < 0) {
403 std::cerr << "rbd: failed to flush at the end: " << cpp_strerror(r)
404 << std::endl;
405 }
406 }
407
408 coarse_mono_time now = coarse_mono_clock::now();
409 chrono::duration<double> elapsed = now - start;
410
411 std::cout << "elapsed: " << (int)elapsed.count() << " "
412 << "ops: " << ios << " "
413 << "ops/sec: " << (double)ios / elapsed.count() << " "
414 << "bytes/sec: " << byte_u_t((double)off / elapsed.count()) << "/s"
415 << std::endl;
416
417 if (io_type == IO_TYPE_RW) {
418 std::cout << "read_ops: " << read_ops << " "
419 << "read_ops/sec: " << (double)read_ops / elapsed.count() << " "
420 << "read_bytes/sec: " << byte_u_t((double)read_ops * io_size / elapsed.count()) << "/s"
421 << std::endl;
422
423 std::cout << "write_ops: " << write_ops << " "
424 << "write_ops/sec: " << (double)write_ops / elapsed.count() << " "
425 << "write_bytes/sec: " << byte_u_t((double)write_ops * io_size / elapsed.count()) << "/s"
426 << std::endl;
427
428 }
429
430 return 0;
431 }
432
433 void add_bench_common_options(po::options_description *positional,
434 po::options_description *options) {
435 at::add_image_spec_options(positional, options, at::ARGUMENT_MODIFIER_NONE);
436
437 options->add_options()
438 ("io-size", po::value<Size>(), "IO size (in B/K/M/G/T) [default: 4K]")
439 ("io-threads", po::value<uint32_t>(), "ios in flight [default: 16]")
440 ("io-total", po::value<Size>(), "total size for IO (in B/K/M/G/T) [default: 1G]")
441 ("io-pattern", po::value<IOPattern>(), "IO pattern (rand, seq, or full-seq) [default: seq]")
442 ("rw-mix-read", po::value<uint64_t>(), "read proportion in readwrite (<= 100) [default: 50]");
443 }
444
445 void get_arguments_for_write(po::options_description *positional,
446 po::options_description *options) {
447 add_bench_common_options(positional, options);
448 }
449
450 void get_arguments_for_bench(po::options_description *positional,
451 po::options_description *options) {
452 add_bench_common_options(positional, options);
453
454 options->add_options()
455 ("io-type", po::value<IOType>()->required(), "IO type (read, write, or readwrite(rw))");
456 }
457
458 int bench_execute(const po::variables_map &vm, io_type_t bench_io_type) {
459 size_t arg_index = 0;
460 std::string pool_name;
461 std::string namespace_name;
462 std::string image_name;
463 std::string snap_name;
464 utils::SnapshotPresence snap_presence = utils::SNAPSHOT_PRESENCE_NONE;
465 if (bench_io_type == IO_TYPE_READ)
466 snap_presence = utils::SNAPSHOT_PRESENCE_PERMITTED;
467
468 int r = utils::get_pool_image_snapshot_names(
469 vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, &pool_name, &namespace_name,
470 &image_name, &snap_name, true, snap_presence, utils::SPEC_VALIDATION_NONE);
471 if (r < 0) {
472 return r;
473 }
474
475 uint64_t bench_io_size;
476 if (vm.count("io-size")) {
477 bench_io_size = vm["io-size"].as<uint64_t>();
478 } else {
479 bench_io_size = 4096;
480 }
481 if (bench_io_size == 0) {
482 std::cerr << "rbd: --io-size should be greater than zero." << std::endl;
483 return -EINVAL;
484 }
485
486 uint32_t bench_io_threads;
487 if (vm.count("io-threads")) {
488 bench_io_threads = vm["io-threads"].as<uint32_t>();
489 } else {
490 bench_io_threads = 16;
491 }
492 if (bench_io_threads == 0) {
493 std::cerr << "rbd: --io-threads should be greater than zero." << std::endl;
494 return -EINVAL;
495 }
496
497 uint64_t bench_bytes;
498 if (vm.count("io-total")) {
499 bench_bytes = vm["io-total"].as<uint64_t>();
500 } else {
501 bench_bytes = 1 << 30;
502 }
503
504 io_pattern_t bench_pattern;
505 if (vm.count("io-pattern")) {
506 bench_pattern = vm["io-pattern"].as<io_pattern_t>();
507 } else {
508 bench_pattern = IO_PATTERN_SEQ;
509 }
510
511 uint64_t bench_read_proportion;
512 if (bench_io_type == IO_TYPE_READ) {
513 bench_read_proportion = 100;
514 } else if (bench_io_type == IO_TYPE_WRITE) {
515 bench_read_proportion = 0;
516 } else {
517 if (vm.count("rw-mix-read")) {
518 bench_read_proportion = vm["rw-mix-read"].as<uint64_t>();
519 } else {
520 bench_read_proportion = 50;
521 }
522
523 if (bench_read_proportion > 100) {
524 std::cerr << "rbd: --rw-mix-read should not be larger than 100." << std::endl;
525 return -EINVAL;
526 }
527 }
528
529 librados::Rados rados;
530 librados::IoCtx io_ctx;
531 librbd::Image image;
532 r = utils::init_and_open_image(pool_name, namespace_name, image_name, "",
533 snap_name, false, &rados, &io_ctx, &image);
534 if (r < 0) {
535 return r;
536 }
537
538 init_async_signal_handler();
539 register_async_signal_handler(SIGHUP, sighup_handler);
540 register_async_signal_handler_oneshot(SIGINT, handle_signal);
541 register_async_signal_handler_oneshot(SIGTERM, handle_signal);
542
543 r = do_bench(image, bench_io_type, bench_io_size, bench_io_threads,
544 bench_bytes, bench_pattern, bench_read_proportion);
545
546 unregister_async_signal_handler(SIGHUP, sighup_handler);
547 unregister_async_signal_handler(SIGINT, handle_signal);
548 unregister_async_signal_handler(SIGTERM, handle_signal);
549 shutdown_async_signal_handler();
550
551 if (r < 0) {
552 std::cerr << "bench failed: " << cpp_strerror(r) << std::endl;
553 return r;
554 }
555 return 0;
556 }
557
558 int execute_for_write(const po::variables_map &vm,
559 const std::vector<std::string> &ceph_global_init_args) {
560 std::cerr << "rbd: bench-write is deprecated, use rbd bench --io-type write ..." << std::endl;
561 return bench_execute(vm, IO_TYPE_WRITE);
562 }
563
564 int execute_for_bench(const po::variables_map &vm,
565 const std::vector<std::string> &ceph_global_init_args) {
566 io_type_t bench_io_type;
567 if (vm.count("io-type")) {
568 bench_io_type = vm["io-type"].as<io_type_t>();
569 } else {
570 std::cerr << "rbd: --io-type must be specified." << std::endl;
571 return -EINVAL;
572 }
573
574 return bench_execute(vm, bench_io_type);
575 }
576
577 Shell::Action action_write(
578 {"bench-write"}, {}, "Simple write benchmark. (Deprecated, please use `rbd bench --io-type write` instead.)",
579 "", &get_arguments_for_write, &execute_for_write, false);
580
581 Shell::Action action_bench(
582 {"bench"}, {}, "Simple benchmark.", "", &get_arguments_for_bench, &execute_for_bench);
583
584 } // namespace bench
585 } // namespace action
586 } // namespace rbd