1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
4 #include "tools/rbd/ArgumentTypes.h"
5 #include "tools/rbd/Shell.h"
6 #include "tools/rbd/Utils.h"
7 #include "common/errno.h"
8 #include "common/strtol.h"
9 #include "common/ceph_mutex.h"
10 #include "include/types.h"
11 #include "global/signal_handler.h"
13 #include <boost/accumulators/accumulators.hpp>
14 #include <boost/accumulators/statistics/stats.hpp>
15 #include <boost/accumulators/statistics/rolling_sum.hpp>
16 #include <boost/program_options.hpp>
18 using namespace std::chrono
;
20 static std::atomic
<bool> terminating
;
21 static void handle_signal(int signum
)
23 ceph_assert(signum
== SIGINT
|| signum
== SIGTERM
);
31 namespace at
= argument_types
;
32 namespace po
= boost::program_options
;
54 void validate(boost::any
& v
, const std::vector
<std::string
>& values
,
55 Size
*target_type
, int) {
56 po::validators::check_first_occurrence(v
);
57 const std::string
&s
= po::validators::get_single_string(values
);
59 std::string parse_error
;
60 uint64_t size
= strict_iecstrtoll(s
.c_str(), &parse_error
);
61 if (!parse_error
.empty()) {
62 throw po::validation_error(po::validation_error::invalid_option_value
);
67 void validate(boost::any
& v
, const std::vector
<std::string
>& values
,
68 IOPattern
*target_type
, int) {
69 po::validators::check_first_occurrence(v
);
70 const std::string
&s
= po::validators::get_single_string(values
);
73 } else if (s
== "seq") {
75 } else if (s
== "full-seq") {
76 v
= IO_PATTERN_FULL_SEQ
;
78 throw po::validation_error(po::validation_error::invalid_option_value
);
82 io_type_t
get_io_type(std::string io_type_string
) {
83 if (io_type_string
== "read")
85 else if (io_type_string
== "write")
87 else if (io_type_string
== "readwrite" || io_type_string
== "rw")
93 void validate(boost::any
& v
, const std::vector
<std::string
>& values
,
94 IOType
*target_type
, int) {
95 po::validators::check_first_occurrence(v
);
96 const std::string
&s
= po::validators::get_single_string(values
);
97 io_type_t io_type
= get_io_type(s
);
98 if (io_type
>= IO_TYPE_NUM
)
99 throw po::validation_error(po::validation_error::invalid_option_value
);
101 v
= boost::any(io_type
);
104 } // anonymous namespace
106 static void rbd_bencher_completion(void *c
, void *pc
);
109 struct bencher_completer
{
110 rbd_bencher
*bencher
;
114 bencher_completer(rbd_bencher
*bencher
, bufferlist
*bl
)
115 : bencher(bencher
), bl(bl
)
126 librbd::Image
*image
;
127 ceph::mutex lock
= ceph::make_mutex("rbd_bencher::lock");
128 ceph::condition_variable cond
;
134 explicit rbd_bencher(librbd::Image
*i
, io_type_t io_type
, uint64_t io_size
)
140 if (io_type
== IO_TYPE_WRITE
|| io_type
== IO_TYPE_RW
) {
141 bufferptr
bp(io_size
);
142 memset(bp
.c_str(), rand() & 0xff, io_size
);
143 write_bl
.push_back(bp
);
147 void start_io(int max
, uint64_t off
, uint64_t len
, int op_flags
, bool read_flag
)
150 std::lock_guard l
{lock
};
154 librbd::RBD::AioCompletion
*c
;
156 bufferlist
*read_bl
= new bufferlist();
157 c
= new librbd::RBD::AioCompletion((void *)(new bencher_completer(this, read_bl
)),
158 rbd_bencher_completion
);
159 image
->aio_read2(off
, len
, *read_bl
, c
, op_flags
);
161 c
= new librbd::RBD::AioCompletion((void *)(new bencher_completer(this, NULL
)),
162 rbd_bencher_completion
);
163 image
->aio_write2(off
, len
, write_bl
, c
, op_flags
);
167 int wait_for(int max
, bool interrupt_on_terminating
) {
168 std::unique_lock l
{lock
};
169 while (in_flight
> max
&& !(terminating
&& interrupt_on_terminating
)) {
170 cond
.wait_for(l
, 200ms
);
173 return terminating
? -EINTR
: 0;
178 void rbd_bencher_completion(void *vc
, void *pc
)
180 librbd::RBD::AioCompletion
*c
= (librbd::RBD::AioCompletion
*)vc
;
181 bencher_completer
*bc
= static_cast<bencher_completer
*>(pc
);
182 rbd_bencher
*b
= bc
->bencher
;
183 //cout << "complete " << c << std::endl;
184 int ret
= c
->get_return_value();
185 if (b
->io_type
== IO_TYPE_WRITE
&& ret
!= 0) {
186 std::cout
<< "write error: " << cpp_strerror(ret
) << std::endl
;
187 exit(ret
< 0 ? -ret
: ret
);
188 } else if (b
->io_type
== IO_TYPE_READ
&& (unsigned int)ret
!= b
->io_size
) {
189 cout
<< "read error: " << cpp_strerror(ret
) << std::endl
;
190 exit(ret
< 0 ? -ret
: ret
);
194 b
->cond
.notify_all();
200 bool should_read(uint64_t read_proportion
)
202 uint64_t rand_num
= rand() % 100;
204 if (rand_num
< read_proportion
)
210 int do_bench(librbd::Image
& image
, io_type_t io_type
,
211 uint64_t io_size
, uint64_t io_threads
,
212 uint64_t io_bytes
, io_pattern_t io_pattern
,
213 uint64_t read_proportion
)
217 if (io_size
> size
) {
218 std::cerr
<< "rbd: io-size " << byte_u_t(io_size
) << " "
219 << "larger than image size " << byte_u_t(size
) << std::endl
;
223 if (io_size
> std::numeric_limits
<uint32_t>::max()) {
224 std::cerr
<< "rbd: io-size should be less than 4G" << std::endl
;
228 int r
= image
.flush();
229 if (r
< 0 && (r
!= -EROFS
|| io_type
!= IO_TYPE_READ
)) {
230 std::cerr
<< "rbd: failed to flush: " << cpp_strerror(r
) << std::endl
;
234 rbd_bencher
b(&image
, io_type
, io_size
);
236 std::cout
<< "bench "
237 << " type " << (io_type
== IO_TYPE_READ
? "read" :
238 io_type
== IO_TYPE_WRITE
? "write" : "readwrite")
239 << (io_type
== IO_TYPE_RW
? " read:write=" +
240 to_string(read_proportion
) + ":" + to_string(100 - read_proportion
) : "")
241 << " io_size " << io_size
242 << " io_threads " << io_threads
243 << " bytes " << io_bytes
245 switch (io_pattern
) {
246 case IO_PATTERN_RAND
:
247 std::cout
<< "random";
250 std::cout
<< "sequential";
252 case IO_PATTERN_FULL_SEQ
:
253 std::cout
<< "full sequential";
259 std::cout
<< std::endl
;
261 srand(time(NULL
) % (unsigned long) -1);
263 coarse_mono_time start
= coarse_mono_clock::now();
264 chrono::duration
<double> last
= chrono::duration
<double>::zero();
267 vector
<uint64_t> thread_offset
;
269 uint64_t seq_chunk_length
= (size
/ io_size
/ io_threads
) * io_size
;;
271 // disturb all thread's offset
272 for (i
= 0; i
< io_threads
; i
++) {
273 uint64_t start_pos
= 0;
274 switch (io_pattern
) {
275 case IO_PATTERN_RAND
:
276 start_pos
= (rand() % (size
/ io_size
)) * io_size
;
279 start_pos
= seq_chunk_length
* i
;
281 case IO_PATTERN_FULL_SEQ
:
282 start_pos
= i
* io_size
;
287 thread_offset
.push_back(start_pos
);
290 const int WINDOW_SIZE
= 5;
291 typedef boost::accumulators::accumulator_set
<
292 double, boost::accumulators::stats
<
293 boost::accumulators::tag::rolling_sum
> > RollingSum
;
296 boost::accumulators::tag::rolling_window::window_size
= WINDOW_SIZE
);
298 boost::accumulators::tag::rolling_window::window_size
= WINDOW_SIZE
);
300 boost::accumulators::tag::rolling_window::window_size
= WINDOW_SIZE
);
301 uint64_t cur_ios
= 0;
302 uint64_t cur_off
= 0;
305 if (io_pattern
== IO_PATTERN_RAND
) {
306 op_flags
= LIBRADOS_OP_FLAG_FADVISE_RANDOM
;
308 op_flags
= LIBRADOS_OP_FLAG_FADVISE_SEQUENTIAL
;
311 printf(" SEC OPS OPS/SEC BYTES/SEC\n");
316 for (off
= 0; off
< io_bytes
; ) {
320 while (i
< io_threads
&& off
< io_bytes
) {
321 bool read_flag
= should_read(read_proportion
);
323 r
= b
.wait_for(io_threads
- 1, true);
327 b
.start_io(io_threads
, thread_offset
[i
], io_size
, op_flags
, read_flag
);
346 // Set the thread_offsets of next I/O
347 for (i
= 0; i
< io_threads
; ++i
) {
348 switch (io_pattern
) {
349 case IO_PATTERN_RAND
:
350 thread_offset
[i
] = (rand() % (size
/ io_size
)) * io_size
;
353 if (off
< (seq_chunk_length
* io_threads
)) {
354 thread_offset
[i
] += io_size
;
356 // thread_offset is adjusted to the chunks unassigned to threads.
357 thread_offset
[i
] = off
+ (i
* io_size
);
359 if (thread_offset
[i
] + io_size
> size
) {
360 thread_offset
[i
] = seq_chunk_length
* i
;
363 case IO_PATTERN_FULL_SEQ
:
364 thread_offset
[i
] += (io_size
* io_threads
);
365 if (thread_offset
[i
] >= size
) {
366 thread_offset
[i
] = i
* io_size
;
374 coarse_mono_time now
= coarse_mono_clock::now();
375 chrono::duration
<double> elapsed
= now
- start
;
376 if (last
== chrono::duration
<double>::zero()) {
378 } else if ((int)elapsed
.count() != (int)last
.count()) {
379 time_acc((elapsed
- last
).count());
380 ios_acc(static_cast<double>(cur_ios
));
381 off_acc(static_cast<double>(cur_off
));
385 double time_sum
= boost::accumulators::rolling_sum(time_acc
);
387 std::cout
<< (int)elapsed
.count();
389 std::cout
<< (int)(ios
- io_threads
);
391 std::cout
<< boost::accumulators::rolling_sum(ios_acc
) / time_sum
;
393 std::cout
<< byte_u_t(boost::accumulators::rolling_sum(off_acc
) / time_sum
) << "/s"
398 b
.wait_for(0, false);
400 if (io_type
!= IO_TYPE_READ
) {
403 std::cerr
<< "rbd: failed to flush at the end: " << cpp_strerror(r
)
408 coarse_mono_time now
= coarse_mono_clock::now();
409 chrono::duration
<double> elapsed
= now
- start
;
411 std::cout
<< "elapsed: " << (int)elapsed
.count() << " "
412 << "ops: " << ios
<< " "
413 << "ops/sec: " << (double)ios
/ elapsed
.count() << " "
414 << "bytes/sec: " << byte_u_t((double)off
/ elapsed
.count()) << "/s"
417 if (io_type
== IO_TYPE_RW
) {
418 std::cout
<< "read_ops: " << read_ops
<< " "
419 << "read_ops/sec: " << (double)read_ops
/ elapsed
.count() << " "
420 << "read_bytes/sec: " << byte_u_t((double)read_ops
* io_size
/ elapsed
.count()) << "/s"
423 std::cout
<< "write_ops: " << write_ops
<< " "
424 << "write_ops/sec: " << (double)write_ops
/ elapsed
.count() << " "
425 << "write_bytes/sec: " << byte_u_t((double)write_ops
* io_size
/ elapsed
.count()) << "/s"
433 void add_bench_common_options(po::options_description
*positional
,
434 po::options_description
*options
) {
435 at::add_image_spec_options(positional
, options
, at::ARGUMENT_MODIFIER_NONE
);
437 options
->add_options()
438 ("io-size", po::value
<Size
>(), "IO size (in B/K/M/G/T) [default: 4K]")
439 ("io-threads", po::value
<uint32_t>(), "ios in flight [default: 16]")
440 ("io-total", po::value
<Size
>(), "total size for IO (in B/K/M/G/T) [default: 1G]")
441 ("io-pattern", po::value
<IOPattern
>(), "IO pattern (rand, seq, or full-seq) [default: seq]")
442 ("rw-mix-read", po::value
<uint64_t>(), "read proportion in readwrite (<= 100) [default: 50]");
445 void get_arguments_for_write(po::options_description
*positional
,
446 po::options_description
*options
) {
447 add_bench_common_options(positional
, options
);
450 void get_arguments_for_bench(po::options_description
*positional
,
451 po::options_description
*options
) {
452 add_bench_common_options(positional
, options
);
454 options
->add_options()
455 ("io-type", po::value
<IOType
>()->required(), "IO type (read, write, or readwrite(rw))");
458 int bench_execute(const po::variables_map
&vm
, io_type_t bench_io_type
) {
459 size_t arg_index
= 0;
460 std::string pool_name
;
461 std::string namespace_name
;
462 std::string image_name
;
463 std::string snap_name
;
464 utils::SnapshotPresence snap_presence
= utils::SNAPSHOT_PRESENCE_NONE
;
465 if (bench_io_type
== IO_TYPE_READ
)
466 snap_presence
= utils::SNAPSHOT_PRESENCE_PERMITTED
;
468 int r
= utils::get_pool_image_snapshot_names(
469 vm
, at::ARGUMENT_MODIFIER_NONE
, &arg_index
, &pool_name
, &namespace_name
,
470 &image_name
, &snap_name
, true, snap_presence
, utils::SPEC_VALIDATION_NONE
);
475 uint64_t bench_io_size
;
476 if (vm
.count("io-size")) {
477 bench_io_size
= vm
["io-size"].as
<uint64_t>();
479 bench_io_size
= 4096;
481 if (bench_io_size
== 0) {
482 std::cerr
<< "rbd: --io-size should be greater than zero." << std::endl
;
486 uint32_t bench_io_threads
;
487 if (vm
.count("io-threads")) {
488 bench_io_threads
= vm
["io-threads"].as
<uint32_t>();
490 bench_io_threads
= 16;
492 if (bench_io_threads
== 0) {
493 std::cerr
<< "rbd: --io-threads should be greater than zero." << std::endl
;
497 uint64_t bench_bytes
;
498 if (vm
.count("io-total")) {
499 bench_bytes
= vm
["io-total"].as
<uint64_t>();
501 bench_bytes
= 1 << 30;
504 io_pattern_t bench_pattern
;
505 if (vm
.count("io-pattern")) {
506 bench_pattern
= vm
["io-pattern"].as
<io_pattern_t
>();
508 bench_pattern
= IO_PATTERN_SEQ
;
511 uint64_t bench_read_proportion
;
512 if (bench_io_type
== IO_TYPE_READ
) {
513 bench_read_proportion
= 100;
514 } else if (bench_io_type
== IO_TYPE_WRITE
) {
515 bench_read_proportion
= 0;
517 if (vm
.count("rw-mix-read")) {
518 bench_read_proportion
= vm
["rw-mix-read"].as
<uint64_t>();
520 bench_read_proportion
= 50;
523 if (bench_read_proportion
> 100) {
524 std::cerr
<< "rbd: --rw-mix-read should not be larger than 100." << std::endl
;
529 librados::Rados rados
;
530 librados::IoCtx io_ctx
;
532 r
= utils::init_and_open_image(pool_name
, namespace_name
, image_name
, "",
533 snap_name
, false, &rados
, &io_ctx
, &image
);
538 init_async_signal_handler();
539 register_async_signal_handler(SIGHUP
, sighup_handler
);
540 register_async_signal_handler_oneshot(SIGINT
, handle_signal
);
541 register_async_signal_handler_oneshot(SIGTERM
, handle_signal
);
543 r
= do_bench(image
, bench_io_type
, bench_io_size
, bench_io_threads
,
544 bench_bytes
, bench_pattern
, bench_read_proportion
);
546 unregister_async_signal_handler(SIGHUP
, sighup_handler
);
547 unregister_async_signal_handler(SIGINT
, handle_signal
);
548 unregister_async_signal_handler(SIGTERM
, handle_signal
);
549 shutdown_async_signal_handler();
552 std::cerr
<< "bench failed: " << cpp_strerror(r
) << std::endl
;
558 int execute_for_write(const po::variables_map
&vm
,
559 const std::vector
<std::string
> &ceph_global_init_args
) {
560 std::cerr
<< "rbd: bench-write is deprecated, use rbd bench --io-type write ..." << std::endl
;
561 return bench_execute(vm
, IO_TYPE_WRITE
);
564 int execute_for_bench(const po::variables_map
&vm
,
565 const std::vector
<std::string
> &ceph_global_init_args
) {
566 io_type_t bench_io_type
;
567 if (vm
.count("io-type")) {
568 bench_io_type
= vm
["io-type"].as
<io_type_t
>();
570 std::cerr
<< "rbd: --io-type must be specified." << std::endl
;
574 return bench_execute(vm
, bench_io_type
);
577 Shell::Action
action_write(
578 {"bench-write"}, {}, "Simple write benchmark. (Deprecated, please use `rbd bench --io-type write` instead.)",
579 "", &get_arguments_for_write
, &execute_for_write
, false);
581 Shell::Action
action_bench(
582 {"bench"}, {}, "Simple benchmark.", "", &get_arguments_for_bench
, &execute_for_bench
);
585 } // namespace action