1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
4 #include "tools/rbd/ArgumentTypes.h"
5 #include "tools/rbd/Shell.h"
6 #include "tools/rbd/Utils.h"
7 #include "common/errno.h"
8 #include "common/strtol.h"
9 #include "common/ceph_mutex.h"
10 #include "include/types.h"
11 #include "global/signal_handler.h"
15 #include <boost/accumulators/accumulators.hpp>
16 #include <boost/accumulators/statistics/stats.hpp>
17 #include <boost/accumulators/statistics/rolling_sum.hpp>
18 #include <boost/program_options.hpp>
20 using namespace std::chrono
;
22 static std::atomic
<bool> terminating
;
23 static void handle_signal(int signum
)
25 ceph_assert(signum
== SIGINT
|| signum
== SIGTERM
);
33 namespace at
= argument_types
;
34 namespace po
= boost::program_options
;
56 void validate(boost::any
& v
, const std::vector
<std::string
>& values
,
57 Size
*target_type
, int) {
58 po::validators::check_first_occurrence(v
);
59 const std::string
&s
= po::validators::get_single_string(values
);
61 std::string parse_error
;
62 uint64_t size
= strict_iecstrtoll(s
, &parse_error
);
63 if (!parse_error
.empty()) {
64 throw po::validation_error(po::validation_error::invalid_option_value
);
69 void validate(boost::any
& v
, const std::vector
<std::string
>& values
,
70 IOPattern
*target_type
, int) {
71 po::validators::check_first_occurrence(v
);
72 const std::string
&s
= po::validators::get_single_string(values
);
75 } else if (s
== "seq") {
77 } else if (s
== "full-seq") {
78 v
= IO_PATTERN_FULL_SEQ
;
80 throw po::validation_error(po::validation_error::invalid_option_value
);
84 io_type_t
get_io_type(std::string io_type_string
) {
85 if (io_type_string
== "read")
87 else if (io_type_string
== "write")
89 else if (io_type_string
== "readwrite" || io_type_string
== "rw")
95 void validate(boost::any
& v
, const std::vector
<std::string
>& values
,
96 IOType
*target_type
, int) {
97 po::validators::check_first_occurrence(v
);
98 const std::string
&s
= po::validators::get_single_string(values
);
99 io_type_t io_type
= get_io_type(s
);
100 if (io_type
>= IO_TYPE_NUM
)
101 throw po::validation_error(po::validation_error::invalid_option_value
);
103 v
= boost::any(io_type
);
106 } // anonymous namespace
108 static void rbd_bencher_completion(void *c
, void *pc
);
111 struct bencher_completer
{
112 rbd_bencher
*bencher
;
116 bencher_completer(rbd_bencher
*bencher
, bufferlist
*bl
)
117 : bencher(bencher
), bl(bl
)
128 librbd::Image
*image
;
129 ceph::mutex lock
= ceph::make_mutex("rbd_bencher::lock");
130 ceph::condition_variable cond
;
136 explicit rbd_bencher(librbd::Image
*i
, io_type_t io_type
, uint64_t io_size
)
142 if (io_type
== IO_TYPE_WRITE
|| io_type
== IO_TYPE_RW
) {
143 bufferptr
bp(io_size
);
144 memset(bp
.c_str(), rand() & 0xff, io_size
);
145 write_bl
.push_back(bp
);
149 void start_io(int max
, uint64_t off
, uint64_t len
, int op_flags
, bool read_flag
)
152 std::lock_guard l
{lock
};
156 librbd::RBD::AioCompletion
*c
;
158 bufferlist
*read_bl
= new bufferlist();
159 c
= new librbd::RBD::AioCompletion((void *)(new bencher_completer(this, read_bl
)),
160 rbd_bencher_completion
);
161 image
->aio_read2(off
, len
, *read_bl
, c
, op_flags
);
163 c
= new librbd::RBD::AioCompletion((void *)(new bencher_completer(this, NULL
)),
164 rbd_bencher_completion
);
165 image
->aio_write2(off
, len
, write_bl
, c
, op_flags
);
169 int wait_for(int max
, bool interrupt_on_terminating
) {
170 std::unique_lock l
{lock
};
171 while (in_flight
> max
&& !(terminating
&& interrupt_on_terminating
)) {
172 cond
.wait_for(l
, 200ms
);
175 return terminating
? -EINTR
: 0;
180 void rbd_bencher_completion(void *vc
, void *pc
)
182 librbd::RBD::AioCompletion
*c
= (librbd::RBD::AioCompletion
*)vc
;
183 bencher_completer
*bc
= static_cast<bencher_completer
*>(pc
);
184 rbd_bencher
*b
= bc
->bencher
;
185 //cout << "complete " << c << std::endl;
186 int ret
= c
->get_return_value();
187 if (b
->io_type
== IO_TYPE_WRITE
&& ret
!= 0) {
188 std::cout
<< "write error: " << cpp_strerror(ret
) << std::endl
;
189 exit(ret
< 0 ? -ret
: ret
);
190 } else if (b
->io_type
== IO_TYPE_READ
&& (unsigned int)ret
!= b
->io_size
) {
191 std::cout
<< "read error: " << cpp_strerror(ret
) << std::endl
;
192 exit(ret
< 0 ? -ret
: ret
);
196 b
->cond
.notify_all();
202 bool should_read(uint64_t read_proportion
)
204 uint64_t rand_num
= rand() % 100;
206 if (rand_num
< read_proportion
)
212 int do_bench(librbd::Image
& image
, io_type_t io_type
,
213 uint64_t io_size
, uint64_t io_threads
,
214 uint64_t io_bytes
, io_pattern_t io_pattern
,
215 uint64_t read_proportion
)
219 if (io_size
> size
) {
220 std::cerr
<< "rbd: io-size " << byte_u_t(io_size
) << " "
221 << "larger than image size " << byte_u_t(size
) << std::endl
;
225 if (io_size
> std::numeric_limits
<uint32_t>::max()) {
226 std::cerr
<< "rbd: io-size should be less than 4G" << std::endl
;
230 int r
= image
.flush();
231 if (r
< 0 && (r
!= -EROFS
|| io_type
!= IO_TYPE_READ
)) {
232 std::cerr
<< "rbd: failed to flush: " << cpp_strerror(r
) << std::endl
;
236 rbd_bencher
b(&image
, io_type
, io_size
);
238 std::cout
<< "bench "
239 << " type " << (io_type
== IO_TYPE_READ
? "read" :
240 io_type
== IO_TYPE_WRITE
? "write" : "readwrite")
241 << (io_type
== IO_TYPE_RW
? " read:write=" +
242 std::to_string(read_proportion
) + ":" +
243 std::to_string(100 - read_proportion
) : "")
244 << " io_size " << io_size
245 << " io_threads " << io_threads
246 << " bytes " << io_bytes
248 switch (io_pattern
) {
249 case IO_PATTERN_RAND
:
250 std::cout
<< "random";
253 std::cout
<< "sequential";
255 case IO_PATTERN_FULL_SEQ
:
256 std::cout
<< "full sequential";
262 std::cout
<< std::endl
;
264 srand(time(NULL
) % (unsigned long) -1);
266 coarse_mono_time start
= coarse_mono_clock::now();
267 std::chrono::duration
<double> last
= std::chrono::duration
<double>::zero();
270 std::vector
<uint64_t> thread_offset
;
272 uint64_t seq_chunk_length
= (size
/ io_size
/ io_threads
) * io_size
;;
274 // disturb all thread's offset
275 for (i
= 0; i
< io_threads
; i
++) {
276 uint64_t start_pos
= 0;
277 switch (io_pattern
) {
278 case IO_PATTERN_RAND
:
279 start_pos
= (rand() % (size
/ io_size
)) * io_size
;
282 start_pos
= seq_chunk_length
* i
;
284 case IO_PATTERN_FULL_SEQ
:
285 start_pos
= i
* io_size
;
290 thread_offset
.push_back(start_pos
);
293 const int WINDOW_SIZE
= 5;
294 typedef boost::accumulators::accumulator_set
<
295 double, boost::accumulators::stats
<
296 boost::accumulators::tag::rolling_sum
> > RollingSum
;
299 boost::accumulators::tag::rolling_window::window_size
= WINDOW_SIZE
);
301 boost::accumulators::tag::rolling_window::window_size
= WINDOW_SIZE
);
303 boost::accumulators::tag::rolling_window::window_size
= WINDOW_SIZE
);
304 uint64_t cur_ios
= 0;
305 uint64_t cur_off
= 0;
308 if (io_pattern
== IO_PATTERN_RAND
) {
309 op_flags
= LIBRADOS_OP_FLAG_FADVISE_RANDOM
;
311 op_flags
= LIBRADOS_OP_FLAG_FADVISE_SEQUENTIAL
;
314 printf(" SEC OPS OPS/SEC BYTES/SEC\n");
319 for (off
= 0; off
< io_bytes
; ) {
323 while (i
< io_threads
&& off
< io_bytes
) {
324 bool read_flag
= should_read(read_proportion
);
326 r
= b
.wait_for(io_threads
- 1, true);
330 b
.start_io(io_threads
, thread_offset
[i
], io_size
, op_flags
, read_flag
);
349 // Set the thread_offsets of next I/O
350 for (i
= 0; i
< io_threads
; ++i
) {
351 switch (io_pattern
) {
352 case IO_PATTERN_RAND
:
353 thread_offset
[i
] = (rand() % (size
/ io_size
)) * io_size
;
356 if (off
< (seq_chunk_length
* io_threads
)) {
357 thread_offset
[i
] += io_size
;
359 // thread_offset is adjusted to the chunks unassigned to threads.
360 thread_offset
[i
] = off
+ (i
* io_size
);
362 if (thread_offset
[i
] + io_size
> size
) {
363 thread_offset
[i
] = seq_chunk_length
* i
;
366 case IO_PATTERN_FULL_SEQ
:
367 thread_offset
[i
] += (io_size
* io_threads
);
368 if (thread_offset
[i
] >= size
) {
369 thread_offset
[i
] = i
* io_size
;
377 coarse_mono_time now
= coarse_mono_clock::now();
378 std::chrono::duration
<double> elapsed
= now
- start
;
379 if (last
== std::chrono::duration
<double>::zero()) {
381 } else if ((int)elapsed
.count() != (int)last
.count()) {
382 time_acc((elapsed
- last
).count());
383 ios_acc(static_cast<double>(cur_ios
));
384 off_acc(static_cast<double>(cur_off
));
388 double time_sum
= boost::accumulators::rolling_sum(time_acc
);
390 std::cout
<< (int)elapsed
.count();
392 std::cout
<< ios
- io_threads
;
394 std::cout
<< boost::accumulators::rolling_sum(ios_acc
) / time_sum
;
396 std::cout
<< byte_u_t(boost::accumulators::rolling_sum(off_acc
) / time_sum
) << "/s"
401 b
.wait_for(0, false);
403 if (io_type
!= IO_TYPE_READ
) {
406 std::cerr
<< "rbd: failed to flush at the end: " << cpp_strerror(r
)
411 coarse_mono_time now
= coarse_mono_clock::now();
412 std::chrono::duration
<double> elapsed
= now
- start
;
414 std::cout
<< "elapsed: " << (int)elapsed
.count() << " "
415 << "ops: " << ios
<< " "
416 << "ops/sec: " << (double)ios
/ elapsed
.count() << " "
417 << "bytes/sec: " << byte_u_t((double)off
/ elapsed
.count()) << "/s"
420 if (io_type
== IO_TYPE_RW
) {
421 std::cout
<< "read_ops: " << read_ops
<< " "
422 << "read_ops/sec: " << (double)read_ops
/ elapsed
.count() << " "
423 << "read_bytes/sec: " << byte_u_t((double)read_ops
* io_size
/ elapsed
.count()) << "/s"
426 std::cout
<< "write_ops: " << write_ops
<< " "
427 << "write_ops/sec: " << (double)write_ops
/ elapsed
.count() << " "
428 << "write_bytes/sec: " << byte_u_t((double)write_ops
* io_size
/ elapsed
.count()) << "/s"
436 void add_bench_common_options(po::options_description
*positional
,
437 po::options_description
*options
) {
438 at::add_image_spec_options(positional
, options
, at::ARGUMENT_MODIFIER_NONE
);
440 options
->add_options()
441 ("io-size", po::value
<Size
>(), "IO size (in B/K/M/G) (< 4G) [default: 4K]")
442 ("io-threads", po::value
<uint32_t>(), "ios in flight [default: 16]")
443 ("io-total", po::value
<Size
>(), "total size for IO (in B/K/M/G/T) [default: 1G]")
444 ("io-pattern", po::value
<IOPattern
>(), "IO pattern (rand, seq, or full-seq) [default: seq]")
445 ("rw-mix-read", po::value
<uint64_t>(), "read proportion in readwrite (<= 100) [default: 50]");
448 void get_arguments_for_write(po::options_description
*positional
,
449 po::options_description
*options
) {
450 add_bench_common_options(positional
, options
);
453 void get_arguments_for_bench(po::options_description
*positional
,
454 po::options_description
*options
) {
455 add_bench_common_options(positional
, options
);
457 options
->add_options()
458 ("io-type", po::value
<IOType
>()->required(), "IO type (read, write, or readwrite(rw))");
461 int bench_execute(const po::variables_map
&vm
, io_type_t bench_io_type
) {
462 size_t arg_index
= 0;
463 std::string pool_name
;
464 std::string namespace_name
;
465 std::string image_name
;
466 std::string snap_name
;
467 utils::SnapshotPresence snap_presence
= utils::SNAPSHOT_PRESENCE_NONE
;
468 if (bench_io_type
== IO_TYPE_READ
)
469 snap_presence
= utils::SNAPSHOT_PRESENCE_PERMITTED
;
471 int r
= utils::get_pool_image_snapshot_names(
472 vm
, at::ARGUMENT_MODIFIER_NONE
, &arg_index
, &pool_name
, &namespace_name
,
473 &image_name
, &snap_name
, true, snap_presence
, utils::SPEC_VALIDATION_NONE
);
478 uint64_t bench_io_size
;
479 if (vm
.count("io-size")) {
480 bench_io_size
= vm
["io-size"].as
<uint64_t>();
482 bench_io_size
= 4096;
484 if (bench_io_size
== 0) {
485 std::cerr
<< "rbd: --io-size should be greater than zero." << std::endl
;
489 uint32_t bench_io_threads
;
490 if (vm
.count("io-threads")) {
491 bench_io_threads
= vm
["io-threads"].as
<uint32_t>();
493 bench_io_threads
= 16;
495 if (bench_io_threads
== 0) {
496 std::cerr
<< "rbd: --io-threads should be greater than zero." << std::endl
;
500 uint64_t bench_bytes
;
501 if (vm
.count("io-total")) {
502 bench_bytes
= vm
["io-total"].as
<uint64_t>();
504 bench_bytes
= 1 << 30;
507 io_pattern_t bench_pattern
;
508 if (vm
.count("io-pattern")) {
509 bench_pattern
= vm
["io-pattern"].as
<io_pattern_t
>();
511 bench_pattern
= IO_PATTERN_SEQ
;
514 uint64_t bench_read_proportion
;
515 if (bench_io_type
== IO_TYPE_READ
) {
516 bench_read_proportion
= 100;
517 } else if (bench_io_type
== IO_TYPE_WRITE
) {
518 bench_read_proportion
= 0;
520 if (vm
.count("rw-mix-read")) {
521 bench_read_proportion
= vm
["rw-mix-read"].as
<uint64_t>();
523 bench_read_proportion
= 50;
526 if (bench_read_proportion
> 100) {
527 std::cerr
<< "rbd: --rw-mix-read should not be larger than 100." << std::endl
;
532 librados::Rados rados
;
533 librados::IoCtx io_ctx
;
535 r
= utils::init_and_open_image(pool_name
, namespace_name
, image_name
, "",
536 snap_name
, false, &rados
, &io_ctx
, &image
);
541 init_async_signal_handler();
542 register_async_signal_handler(SIGHUP
, sighup_handler
);
543 register_async_signal_handler_oneshot(SIGINT
, handle_signal
);
544 register_async_signal_handler_oneshot(SIGTERM
, handle_signal
);
546 r
= do_bench(image
, bench_io_type
, bench_io_size
, bench_io_threads
,
547 bench_bytes
, bench_pattern
, bench_read_proportion
);
549 unregister_async_signal_handler(SIGHUP
, sighup_handler
);
550 unregister_async_signal_handler(SIGINT
, handle_signal
);
551 unregister_async_signal_handler(SIGTERM
, handle_signal
);
552 shutdown_async_signal_handler();
555 std::cerr
<< "bench failed: " << cpp_strerror(r
) << std::endl
;
561 int execute_for_write(const po::variables_map
&vm
,
562 const std::vector
<std::string
> &ceph_global_init_args
) {
563 std::cerr
<< "rbd: bench-write is deprecated, use rbd bench --io-type write ..." << std::endl
;
564 return bench_execute(vm
, IO_TYPE_WRITE
);
567 int execute_for_bench(const po::variables_map
&vm
,
568 const std::vector
<std::string
> &ceph_global_init_args
) {
569 io_type_t bench_io_type
;
570 if (vm
.count("io-type")) {
571 bench_io_type
= vm
["io-type"].as
<io_type_t
>();
573 std::cerr
<< "rbd: --io-type must be specified." << std::endl
;
577 return bench_execute(vm
, bench_io_type
);
580 Shell::Action
action_write(
581 {"bench-write"}, {}, "Simple write benchmark. (Deprecated, please use `rbd bench --io-type write` instead.)",
582 "", &get_arguments_for_write
, &execute_for_write
, false);
584 Shell::Action
action_bench(
585 {"bench"}, {}, "Simple benchmark.", "", &get_arguments_for_bench
, &execute_for_bench
);
588 } // namespace action