1 // -*- mode:C++; tab-width:8; c-basic-offset:8; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=8 smarttab
4 * Copyright (C) 1991, NeXT Computer, Inc. All Rights Reserverd.
7 * Author: Avadis Tevanian, Jr.
9 * File system exerciser.
11 * Rewritten 8/98 by Conrad Minshall.
13 * Small changes to work under Linux -- davej.
15 * Checks for mmap last-page zero fill.
18 #include <sys/types.h>
23 #if defined(__FreeBSD__)
30 #include <sys/ioctl.h>
32 #if defined(__linux__)
50 #include "include/compat.h"
51 #include "include/intarith.h"
52 #if defined(WITH_KRBD)
53 #include "include/krbd.h"
55 #include "include/rados/librados.h"
56 #include "include/rados/librados.hpp"
57 #include "include/rbd/librbd.h"
58 #include "include/rbd/librbd.hpp"
59 #include "common/Cond.h"
60 #include "common/SubProcess.h"
61 #include "common/safe_io.h"
62 #include "journal/Journaler.h"
63 #include "journal/ReplayEntry.h"
64 #include "journal/ReplayHandler.h"
65 #include "journal/Settings.h"
67 #include <boost/scope_exit.hpp>
69 #define NUMPRINTCOLUMNS 32 /* # columns of data to print on each line */
72 * A log entry is an operation and a bunch of arguments.
82 struct log_entry oplog
[LOGSIZE
]; /* the log */
83 int logptr
= 0; /* current position in log */
84 int logcount
= 0; /* total ops */
87 * The operation matrix is complex due to conditional execution of different
88 * features. Hence when we come to deciding what operation to run, we need to
89 * be careful in how we select the different operations. The active operations
90 * are mapped to numbers as follows:
101 * COMPAREANDWRITE: - 8
103 * When mapped read/writes are disabled, they are simply converted to normal
104 * reads and writes. When fallocate/fpunch calls are disabled, they are
105 * converted to OP_SKIPPED. Hence OP_SKIPPED needs to have a number higher than
106 * the operation selction matrix, as does the OP_CLOSEOPEN which is an
107 * operation modifier rather than an operation in itself.
109 * Because of the "lite" version, we also need to have different "maximum
110 * operation" defines to allow the ops to be selected correctly based on the
114 /* common operations */
118 #define OP_MAPWRITE 3
119 #define OP_MAX_LITE 4
121 /* !lite operations */
122 #define OP_TRUNCATE 4
123 #define OP_FALLOCATE 5
124 #define OP_PUNCH_HOLE 6
125 #define OP_WRITESAME 7
126 #define OP_COMPARE_AND_WRITE 8
127 /* rbd-specific operations */
129 #define OP_FLATTEN 10
130 #define OP_MAX_FULL 11
132 /* operation modifiers */
133 #define OP_CLOSEOPEN 100
134 #define OP_SKIPPED 101
137 #define PAGE_SIZE get_page_size()
139 #define PAGE_MASK (PAGE_SIZE - 1)
142 char *original_buf
; /* a pointer to the original data */
143 char *good_buf
; /* a pointer to the correct data */
144 char *temp_buf
; /* a pointer to the current data */
150 unsigned long testcalls
= 0; /* calls to function "test" */
152 const char* cluster_name
= "ceph"; /* --cluster optional */
153 const char* client_id
= "admin"; /* --id optional */
155 unsigned long simulatedopcount
= 0; /* -b flag */
156 int closeprob
= 0; /* -c flag */
157 int debug
= 0; /* -d flag */
158 unsigned long debugstart
= 0; /* -D flag */
159 int flush_enabled
= 0; /* -f flag */
160 int deep_copy
= 0; /* -g flag */
161 int holebdy
= 1; /* -h flag */
162 bool journal_replay
= false; /* -j flah */
163 int keep_on_success
= 0; /* -k flag */
164 int do_fsync
= 0; /* -y flag */
165 unsigned long maxfilelen
= 256 * 1024; /* -l flag */
166 int sizechecks
= 1; /* -n flag disables them */
167 int maxoplen
= 64 * 1024; /* -o flag */
168 int quiet
= 0; /* -q flag */
169 unsigned long progressinterval
= 0; /* -p flag */
170 int readbdy
= 1; /* -r flag */
171 int style
= 0; /* -s flag */
172 int prealloc
= 0; /* -x flag */
173 int truncbdy
= 1; /* -t flag */
174 int writebdy
= 1; /* -w flag */
175 long monitorstart
= -1; /* -m flag */
176 long monitorend
= -1; /* -m flag */
177 int lite
= 0; /* -L flag */
178 long numops
= -1; /* -N flag */
179 int randomoplen
= 1; /* -O flag disables it */
180 int seed
= 1; /* -S flag */
181 int mapped_writes
= 0; /* -W flag disables */
182 int fallocate_calls
= 0; /* -F flag disables */
183 int punch_hole_calls
= 1; /* -H flag disables */
184 int clone_calls
= 1; /* -C flag disables */
185 int randomize_striping
= 1; /* -U flag disables */
186 int randomize_parent_overlap
= 1;
187 int mapped_reads
= 0; /* -R flag disables it */
189 int o_direct
= 0; /* -Z flag */
197 FILE * fsxlogf
= NULL
;
202 vwarnc(int code
, const char *fmt
, va_list ap
) {
203 fprintf(stderr
, "fsx: ");
205 vfprintf(stderr
, fmt
, ap
);
206 fprintf(stderr
, ": ");
208 fprintf(stderr
, "%s\n", strerror(code
));
212 warn(const char * fmt
, ...) {
215 vwarnc(errno
, fmt
, ap
);
219 #define BUF_SIZE 1024
222 prt(const char *fmt
, ...)
225 char buffer
[BUF_SIZE
];
228 vsnprintf(buffer
, BUF_SIZE
, fmt
, args
);
230 fprintf(stdout
, "%s", buffer
);
232 fprintf(fsxlogf
, "%s", buffer
);
236 prterr(const char *prefix
)
238 prt("%s%s%s\n", prefix
, prefix
? ": " : "", strerror(errno
));
242 prterrcode(const char *prefix
, int code
)
244 prt("%s%s%s\n", prefix
, prefix
? ": " : "", strerror(-code
));
248 simple_err(const char *msg
, int err
)
250 fprintf(stderr
, "%s: %s\n", msg
, strerror(-err
));
256 std::mt19937 random_generator
;
261 return random_generator();
264 int get_features(uint64_t* features
);
265 void replay_imagename(char *buf
, size_t len
, int clones
);
269 static const std::string
JOURNAL_CLIENT_ID("fsx");
271 struct ReplayHandler
: public journal::ReplayHandler
{
272 journal::Journaler
*journaler
;
273 journal::Journaler
*replay_journaler
;
276 ReplayHandler(journal::Journaler
*journaler
,
277 journal::Journaler
*replay_journaler
, Context
*on_finish
)
278 : journaler(journaler
), replay_journaler(replay_journaler
),
279 on_finish(on_finish
) {
282 void handle_entries_available() override
{
284 journal::ReplayEntry replay_entry
;
285 if (!journaler
->try_pop_front(&replay_entry
)) {
289 replay_journaler
->append(0, replay_entry
.get_data());
293 void handle_complete(int r
) override
{
294 on_finish
->complete(r
);
298 int get_image_id(librados::IoCtx
&io_ctx
, const char *image_name
,
299 std::string
*image_id
) {
302 int r
= rbd
.open(io_ctx
, image
, image_name
);
304 simple_err("failed to open image", r
);
308 rbd_image_info_t info
;
309 r
= image
.stat(info
, sizeof(info
));
311 simple_err("failed to stat image", r
);
315 *image_id
= std::string(&info
.block_name_prefix
[strlen(RBD_DATA_PREFIX
)]);
319 int register_journal(rados_ioctx_t ioctx
, const char *image_name
) {
320 librados::IoCtx io_ctx
;
321 librados::IoCtx::from_rados_ioctx_t(ioctx
, io_ctx
);
323 std::string image_id
;
324 int r
= get_image_id(io_ctx
, image_name
, &image_id
);
329 journal::Journaler
journaler(io_ctx
, image_id
, JOURNAL_CLIENT_ID
, {},
331 r
= journaler
.register_client(bufferlist());
333 simple_err("failed to register journal client", r
);
339 int unregister_journal(rados_ioctx_t ioctx
, const char *image_name
) {
340 librados::IoCtx io_ctx
;
341 librados::IoCtx::from_rados_ioctx_t(ioctx
, io_ctx
);
343 std::string image_id
;
344 int r
= get_image_id(io_ctx
, image_name
, &image_id
);
349 journal::Journaler
journaler(io_ctx
, image_id
, JOURNAL_CLIENT_ID
, {},
351 r
= journaler
.unregister_client();
353 simple_err("failed to unregister journal client", r
);
359 int create_replay_image(rados_ioctx_t ioctx
, int order
,
360 uint64_t stripe_unit
, int stripe_count
,
361 const char *replay_image_name
,
362 const char *last_replay_image_name
) {
363 librados::IoCtx io_ctx
;
364 librados::IoCtx::from_rados_ioctx_t(ioctx
, io_ctx
);
367 int r
= get_features(&features
);
373 if (last_replay_image_name
== nullptr) {
374 r
= rbd
.create2(io_ctx
, replay_image_name
, 0, features
, &order
);
376 r
= rbd
.clone2(io_ctx
, last_replay_image_name
, "snap",
377 io_ctx
, replay_image_name
, features
, &order
,
378 stripe_unit
, stripe_count
);
382 simple_err("failed to create replay image", r
);
389 int replay_journal(rados_ioctx_t ioctx
, const char *image_name
,
390 const char *replay_image_name
) {
391 librados::IoCtx io_ctx
;
392 librados::IoCtx::from_rados_ioctx_t(ioctx
, io_ctx
);
394 std::string image_id
;
395 int r
= get_image_id(io_ctx
, image_name
, &image_id
);
400 std::string replay_image_id
;
401 r
= get_image_id(io_ctx
, replay_image_name
, &replay_image_id
);
406 journal::Journaler
journaler(io_ctx
, image_id
, JOURNAL_CLIENT_ID
, {},
408 C_SaferCond init_ctx
;
409 journaler
.init(&init_ctx
);
410 BOOST_SCOPE_EXIT_ALL( (&journaler
) ) {
411 journaler
.shut_down();
416 simple_err("failed to initialize journal", r
);
420 journal::Journaler
replay_journaler(io_ctx
, replay_image_id
, "", {},
423 C_SaferCond replay_init_ctx
;
424 replay_journaler
.init(&replay_init_ctx
);
425 BOOST_SCOPE_EXIT_ALL( (&replay_journaler
) ) {
426 replay_journaler
.shut_down();
429 r
= replay_init_ctx
.wait();
431 simple_err("failed to initialize replay journal", r
);
435 replay_journaler
.start_append(0);
437 C_SaferCond replay_ctx
;
438 ReplayHandler
replay_handler(&journaler
, &replay_journaler
,
441 // copy journal events from source image to replay image
442 journaler
.start_replay(&replay_handler
);
443 r
= replay_ctx
.wait();
445 journaler
.stop_replay();
447 C_SaferCond stop_ctx
;
448 replay_journaler
.stop_append(&stop_ctx
);
449 int stop_r
= stop_ctx
.wait();
450 if (r
== 0 && stop_r
< 0) {
455 simple_err("failed to replay journal", r
);
461 r
= rbd
.open(io_ctx
, image
, replay_image_name
);
463 simple_err("failed to open replay image", r
);
467 // perform an IO op to initiate the journal replay
469 r
= static_cast<ssize_t
>(image
.write(0, 0, bl
));
471 simple_err("failed to write to replay image", r
);
477 int finalize_journal(rados_ioctx_t ioctx
, const char *imagename
, int clones
,
478 int order
, uint64_t stripe_unit
, int stripe_count
) {
479 char replayimagename
[1024];
480 replay_imagename(replayimagename
, sizeof(replayimagename
), clones
);
482 char lastreplayimagename
[1024];
484 replay_imagename(lastreplayimagename
,
485 sizeof(lastreplayimagename
), clones
- 1);
488 int ret
= create_replay_image(ioctx
, order
, stripe_unit
,
489 stripe_count
, replayimagename
,
490 clones
> 0 ? lastreplayimagename
:
496 ret
= replay_journal(ioctx
, imagename
, replayimagename
);
503 } // anonymous namespace
510 const char *name
; /* image name */
511 rbd_image_t image
; /* image handle */
512 const char *krbd_name
; /* image /dev/rbd<id> name */ /* reused for nbd test */
513 int krbd_fd
; /* image /dev/rbd<id> fd */ /* reused for nbd test */
516 #define RBD_CTX_INIT (struct rbd_ctx) { NULL, NULL, NULL, -1}
518 struct rbd_operations
{
519 int (*open
)(const char *name
, struct rbd_ctx
*ctx
);
520 int (*close
)(struct rbd_ctx
*ctx
);
521 ssize_t (*read
)(struct rbd_ctx
*ctx
, uint64_t off
, size_t len
, char *buf
);
522 ssize_t (*write
)(struct rbd_ctx
*ctx
, uint64_t off
, size_t len
, const char *buf
);
523 int (*flush
)(struct rbd_ctx
*ctx
);
524 int (*discard
)(struct rbd_ctx
*ctx
, uint64_t off
, uint64_t len
);
525 int (*get_size
)(struct rbd_ctx
*ctx
, uint64_t *size
);
526 int (*resize
)(struct rbd_ctx
*ctx
, uint64_t size
);
527 int (*clone
)(struct rbd_ctx
*ctx
, const char *src_snapname
,
528 const char *dst_imagename
, int *order
, int stripe_unit
,
530 int (*flatten
)(struct rbd_ctx
*ctx
);
531 ssize_t (*writesame
)(struct rbd_ctx
*ctx
, uint64_t off
, size_t len
,
532 const char *buf
, size_t data_len
);
533 ssize_t (*compare_and_write
)(struct rbd_ctx
*ctx
, uint64_t off
, size_t len
,
534 const char *cmp_buf
, const char *buf
);
537 char *pool
; /* name of the pool our test image is in */
538 char *iname
; /* name of our test image */
539 rados_t cluster
; /* handle for our test cluster */
540 rados_ioctx_t ioctx
; /* handle for our test pool */
541 #if defined(WITH_KRBD)
542 struct krbd_ctx
*krbd
; /* handle for libkrbd */
544 bool skip_partial_discard
; /* rbd_skip_partial_discard config value*/
546 int get_features(uint64_t* features
) {
548 int r
= rados_conf_get(cluster
, "rbd_default_features", buf
,
551 simple_err("Could not get rbd_default_features value", r
);
555 *features
= strtol(buf
, NULL
, 0);
558 *features
|= RBD_FEATURE_LAYERING
;
560 if (journal_replay
) {
561 *features
|= (RBD_FEATURE_EXCLUSIVE_LOCK
|
562 RBD_FEATURE_JOURNALING
);
568 * librbd/krbd rbd_operations handlers. Given the rest of fsx.c, no
569 * attempt to do error handling is made in these handlers.
573 __librbd_open(const char *name
, struct rbd_ctx
*ctx
)
578 ceph_assert(!ctx
->name
&& !ctx
->image
&&
579 !ctx
->krbd_name
&& ctx
->krbd_fd
< 0);
581 ret
= rbd_open(ioctx
, name
, &image
, NULL
);
583 prt("rbd_open(%s) failed\n", name
);
587 ctx
->name
= strdup(name
);
589 ctx
->krbd_name
= NULL
;
596 librbd_open(const char *name
, struct rbd_ctx
*ctx
)
598 return __librbd_open(name
, ctx
);
602 __librbd_close(struct rbd_ctx
*ctx
)
606 ceph_assert(ctx
->name
&& ctx
->image
);
608 ret
= rbd_close(ctx
->image
);
610 prt("rbd_close(%s) failed\n", ctx
->name
);
614 free((void *)ctx
->name
);
623 librbd_close(struct rbd_ctx
*ctx
)
625 return __librbd_close(ctx
);
629 librbd_verify_object_map(struct rbd_ctx
*ctx
)
633 n
= rbd_get_flags(ctx
->image
, &flags
);
635 prt("rbd_get_flags() failed\n");
639 if ((flags
& RBD_FLAG_OBJECT_MAP_INVALID
) != 0) {
640 prt("rbd_get_flags() indicates object map is invalid\n");
647 librbd_read(struct rbd_ctx
*ctx
, uint64_t off
, size_t len
, char *buf
)
651 n
= rbd_read(ctx
->image
, off
, len
, buf
);
653 prt("rbd_read(%llu, %zu) failed\n", off
, len
);
659 librbd_write(struct rbd_ctx
*ctx
, uint64_t off
, size_t len
, const char *buf
)
664 n
= rbd_write(ctx
->image
, off
, len
, buf
);
666 prt("rbd_write(%llu, %zu) failed\n", off
, len
);
670 ret
= librbd_verify_object_map(ctx
);
678 librbd_flush(struct rbd_ctx
*ctx
)
682 ret
= rbd_flush(ctx
->image
);
684 prt("rbd_flush failed\n");
688 return librbd_verify_object_map(ctx
);
692 librbd_discard(struct rbd_ctx
*ctx
, uint64_t off
, uint64_t len
)
696 ret
= rbd_discard(ctx
->image
, off
, len
);
698 prt("rbd_discard(%llu, %llu) failed\n", off
, len
);
702 return librbd_verify_object_map(ctx
);
706 librbd_writesame(struct rbd_ctx
*ctx
, uint64_t off
, size_t len
,
707 const char *buf
, size_t data_len
)
712 n
= rbd_writesame(ctx
->image
, off
, len
, buf
, data_len
, 0);
714 prt("rbd_writesame(%llu, %zu) failed\n", off
, len
);
718 ret
= librbd_verify_object_map(ctx
);
726 librbd_compare_and_write(struct rbd_ctx
*ctx
, uint64_t off
, size_t len
,
727 const char *cmp_buf
, const char *buf
)
731 uint64_t mismatch_off
= 0;
733 n
= rbd_compare_and_write(ctx
->image
, off
, len
, cmp_buf
, buf
, &mismatch_off
, 0);
737 prt("rbd_compare_and_write mismatch(%llu, %zu, %llu) failed\n",
738 off
, len
, mismatch_off
);
742 ret
= librbd_verify_object_map(ctx
);
751 librbd_get_size(struct rbd_ctx
*ctx
, uint64_t *size
)
755 ret
= rbd_get_size(ctx
->image
, size
);
757 prt("rbd_get_size failed\n");
765 __librbd_resize(struct rbd_ctx
*ctx
, uint64_t size
)
769 ret
= rbd_resize(ctx
->image
, size
);
771 prt("rbd_resize(%llu) failed\n", size
);
775 return librbd_verify_object_map(ctx
);
779 librbd_resize(struct rbd_ctx
*ctx
, uint64_t size
)
781 return __librbd_resize(ctx
, size
);
785 __librbd_deep_copy(struct rbd_ctx
*ctx
, const char *src_snapname
,
786 const char *dst_imagename
, uint64_t features
, int *order
,
787 int stripe_unit
, int stripe_count
) {
790 rbd_image_options_t opts
;
791 rbd_image_options_create(&opts
);
792 BOOST_SCOPE_EXIT_ALL( (&opts
) ) {
793 rbd_image_options_destroy(opts
);
795 ret
= rbd_image_options_set_uint64(opts
, RBD_IMAGE_OPTION_FEATURES
,
797 ceph_assert(ret
== 0);
798 ret
= rbd_image_options_set_uint64(opts
, RBD_IMAGE_OPTION_ORDER
,
800 ceph_assert(ret
== 0);
801 ret
= rbd_image_options_set_uint64(opts
, RBD_IMAGE_OPTION_STRIPE_UNIT
,
803 ceph_assert(ret
== 0);
804 ret
= rbd_image_options_set_uint64(opts
, RBD_IMAGE_OPTION_STRIPE_COUNT
,
806 ceph_assert(ret
== 0);
808 ret
= rbd_snap_set(ctx
->image
, src_snapname
);
810 prt("rbd_snap_set(%s@%s) failed\n", ctx
->name
, src_snapname
);
814 ret
= rbd_deep_copy(ctx
->image
, ioctx
, dst_imagename
, opts
);
816 prt("rbd_deep_copy(%s@%s -> %s) failed\n",
817 ctx
->name
, src_snapname
, dst_imagename
);
821 ret
= rbd_snap_set(ctx
->image
, "");
823 prt("rbd_snap_set(%s@) failed\n", ctx
->name
);
828 ret
= rbd_open(ioctx
, dst_imagename
, &image
, nullptr);
830 prt("rbd_open(%s) failed\n", dst_imagename
);
834 ret
= rbd_snap_unprotect(image
, src_snapname
);
836 prt("rbd_snap_unprotect(%s@%s) failed\n", dst_imagename
,
841 ret
= rbd_snap_remove(image
, src_snapname
);
843 prt("rbd_snap_remove(%s@%s) failed\n", dst_imagename
,
848 ret
= rbd_close(image
);
850 prt("rbd_close(%s) failed\n", dst_imagename
);
858 __librbd_clone(struct rbd_ctx
*ctx
, const char *src_snapname
,
859 const char *dst_imagename
, int *order
, int stripe_unit
,
864 ret
= rbd_snap_create(ctx
->image
, src_snapname
);
866 prt("rbd_snap_create(%s@%s) failed\n", ctx
->name
,
871 ret
= rbd_snap_protect(ctx
->image
, src_snapname
);
873 prt("rbd_snap_protect(%s@%s) failed\n", ctx
->name
,
879 ret
= get_features(&features
);
885 ret
= __librbd_deep_copy(ctx
, src_snapname
, dst_imagename
, features
,
886 order
, stripe_unit
, stripe_count
);
888 prt("deep_copy(%s@%s -> %s) failed\n", ctx
->name
,
889 src_snapname
, dst_imagename
);
893 ret
= rbd_clone2(ioctx
, ctx
->name
, src_snapname
, ioctx
,
894 dst_imagename
, features
, order
,
895 stripe_unit
, stripe_count
);
897 prt("rbd_clone2(%s@%s -> %s) failed\n", ctx
->name
,
898 src_snapname
, dst_imagename
);
907 librbd_clone(struct rbd_ctx
*ctx
, const char *src_snapname
,
908 const char *dst_imagename
, int *order
, int stripe_unit
,
911 return __librbd_clone(ctx
, src_snapname
, dst_imagename
, order
,
912 stripe_unit
, stripe_count
);
916 __librbd_flatten(struct rbd_ctx
*ctx
)
920 ret
= rbd_flatten(ctx
->image
);
922 prt("rbd_flatten failed\n");
926 return librbd_verify_object_map(ctx
);
930 librbd_flatten(struct rbd_ctx
*ctx
)
932 return __librbd_flatten(ctx
);
935 const struct rbd_operations librbd_operations
= {
947 librbd_compare_and_write
,
950 #if defined(WITH_KRBD)
952 krbd_open(const char *name
, struct rbd_ctx
*ctx
)
959 ret
= __librbd_open(name
, ctx
);
963 ret
= rados_conf_get(cluster
, "rbd_default_map_options", buf
,
966 simple_err("Could not get rbd_default_map_options value", ret
);
970 ret
= krbd_map(krbd
, pool
, "", name
, "", buf
, &devnode
);
972 prt("krbd_map(%s) failed\n", name
);
976 fd
= open(devnode
, O_RDWR
| o_direct
);
979 prt("open(%s) failed\n", devnode
);
983 ctx
->krbd_name
= devnode
;
990 krbd_close(struct rbd_ctx
*ctx
)
994 ceph_assert(ctx
->krbd_name
&& ctx
->krbd_fd
>= 0);
996 if (close(ctx
->krbd_fd
) < 0) {
998 prt("close(%s) failed\n", ctx
->krbd_name
);
1002 ret
= krbd_unmap(krbd
, ctx
->krbd_name
, "");
1004 prt("krbd_unmap(%s) failed\n", ctx
->krbd_name
);
1008 free((void *)ctx
->krbd_name
);
1010 ctx
->krbd_name
= NULL
;
1013 return __librbd_close(ctx
);
1017 #if defined(__linux__)
1019 krbd_read(struct rbd_ctx
*ctx
, uint64_t off
, size_t len
, char *buf
)
1023 n
= pread(ctx
->krbd_fd
, buf
, len
, off
);
1026 prt("pread(%llu, %zu) failed\n", off
, len
);
1034 krbd_write(struct rbd_ctx
*ctx
, uint64_t off
, size_t len
, const char *buf
)
1038 n
= pwrite(ctx
->krbd_fd
, buf
, len
, off
);
1041 prt("pwrite(%llu, %zu) failed\n", off
, len
);
1049 __krbd_flush(struct rbd_ctx
*ctx
, bool invalidate
)
1057 * BLKFLSBUF will sync the filesystem on top of the device (we
1058 * don't care about that here, since we write directly to it),
1059 * write out any dirty buffers and invalidate the buffer cache.
1060 * It won't do a hardware cache flush.
1062 * fsync() will write out any dirty buffers and do a hardware
1063 * cache flush (which we don't care about either, because for
1064 * krbd it's a noop). It won't try to empty the buffer cache
1065 * nor poke the filesystem before writing out.
1067 * Given that, for our purposes, fsync is a flush, while
1068 * BLKFLSBUF is a flush+invalidate.
1071 ret
= ioctl(ctx
->krbd_fd
, BLKFLSBUF
, NULL
);
1073 ret
= fsync(ctx
->krbd_fd
);
1076 prt("%s failed\n", invalidate
? "BLKFLSBUF" : "fsync");
1084 krbd_flush(struct rbd_ctx
*ctx
)
1086 return __krbd_flush(ctx
, false);
1090 krbd_discard(struct rbd_ctx
*ctx
, uint64_t off
, uint64_t len
)
1092 uint64_t range
[2] = { off
, len
};
1096 * BLKZEROOUT goes straight to disk and doesn't do anything
1097 * about dirty buffers. This means we need to flush so that
1102 * results in "data 0000 data" rather than "data data data" on
1103 * disk and invalidate so that
1108 * returns "data 0000 data" rather than "data data data" in
1109 * case 1..2M was cached.
1111 * Note: These cache coherency issues are supposed to be fixed
1112 * in recent kernels.
1114 ret
= __krbd_flush(ctx
, true);
1119 * off and len must be 512-byte aligned, otherwise BLKZEROOUT
1120 * will fail with -EINVAL. This means that -K (enable krbd
1121 * mode) requires -h 512 or similar.
1123 if (ioctl(ctx
->krbd_fd
, BLKZEROOUT
, &range
) < 0) {
1125 prt("BLKZEROOUT(%llu, %llu) failed\n", off
, len
);
1133 krbd_get_size(struct rbd_ctx
*ctx
, uint64_t *size
)
1137 if (ioctl(ctx
->krbd_fd
, BLKGETSIZE64
, &bytes
) < 0) {
1139 prt("BLKGETSIZE64 failed\n");
1149 krbd_resize(struct rbd_ctx
*ctx
, uint64_t size
)
1153 uint64_t effective_size
;
1155 ceph_assert(size
% truncbdy
== 0);
1158 * When krbd detects a size change, it calls revalidate_disk(),
1159 * which ends up calling invalidate_bdev(), which invalidates
1160 * clean pages and does nothing about dirty pages beyond the
1161 * new size. The preceding cache flush makes sure those pages
1162 * are invalidated, which is what we need on shrink so that
1169 * returns "0000 0000" rather than "data 0000".
1171 ret
= __krbd_flush(ctx
, false);
1175 ret
= __librbd_resize(ctx
, size
);
1180 ret
= krbd_get_size(ctx
, &effective_size
);
1184 if (effective_size
== size
)
1187 if (count
++ >= 15) {
1188 prt("BLKGETSIZE64 size error: expected 0x%llx, actual 0x%llx\n",
1189 (unsigned long long)size
,
1190 (unsigned long long)effective_size
);
1194 usleep(count
* 250 * 1000);
1201 krbd_clone(struct rbd_ctx
*ctx
, const char *src_snapname
,
1202 const char *dst_imagename
, int *order
, int stripe_unit
,
1207 ret
= __krbd_flush(ctx
, false);
1211 return __librbd_clone(ctx
, src_snapname
, dst_imagename
, order
,
1212 stripe_unit
, stripe_count
);
1216 krbd_flatten(struct rbd_ctx
*ctx
)
1220 ret
= __krbd_flush(ctx
, false);
1224 return __librbd_flatten(ctx
);
1228 #if defined(WITH_KRBD)
1229 const struct rbd_operations krbd_operations
= {
1244 #if defined(__linux__)
1246 nbd_open(const char *name
, struct rbd_ctx
*ctx
)
1253 SubProcess
process("rbd-nbd", SubProcess::KEEP
, SubProcess::PIPE
,
1255 process
.add_cmd_arg("map");
1256 process
.add_cmd_arg("--io-timeout=600");
1261 process
.add_cmd_arg(img
.c_str());
1263 r
= __librbd_open(name
, ctx
);
1267 r
= process
.spawn();
1269 prt("nbd_open failed to run rbd-nbd error: %s\n", process
.err().c_str());
1272 r
= safe_read(process
.get_stdout(), dev
, sizeof(dev
));
1274 prt("nbd_open failed to get nbd device path\n");
1277 for (int i
= 0; i
< r
; ++i
)
1278 if (dev
[i
] == 10 || dev
[i
] == 13)
1283 prt("rbd-nbd failed with error: %s", process
.err().c_str());
1287 devnode
= strdup(dev
);
1291 fd
= open(devnode
, O_RDWR
| o_direct
);
1294 prt("open(%s) failed\n", devnode
);
1298 ctx
->krbd_name
= devnode
;
1305 nbd_close(struct rbd_ctx
*ctx
)
1309 ceph_assert(ctx
->krbd_name
&& ctx
->krbd_fd
>= 0);
1311 if (close(ctx
->krbd_fd
) < 0) {
1313 prt("close(%s) failed\n", ctx
->krbd_name
);
1317 SubProcess
process("rbd-nbd");
1318 process
.add_cmd_arg("unmap");
1319 process
.add_cmd_arg(ctx
->krbd_name
);
1321 r
= process
.spawn();
1323 prt("nbd_close failed to run rbd-nbd error: %s\n", process
.err().c_str());
1328 prt("rbd-nbd failed with error: %d", process
.err().c_str());
1332 free((void *)ctx
->krbd_name
);
1334 ctx
->krbd_name
= NULL
;
1337 return __librbd_close(ctx
);
1341 nbd_clone(struct rbd_ctx
*ctx
, const char *src_snapname
,
1342 const char *dst_imagename
, int *order
, int stripe_unit
,
1347 ret
= __krbd_flush(ctx
, false);
1351 return __librbd_clone(ctx
, src_snapname
, dst_imagename
, order
,
1352 stripe_unit
, stripe_count
);
1355 const struct rbd_operations nbd_operations
= {
1370 #if defined(__FreeBSD__)
1372 ggate_open(const char *name
, struct rbd_ctx
*ctx
)
1379 SubProcess
process("rbd-ggate", SubProcess::KEEP
, SubProcess::PIPE
,
1381 process
.add_cmd_arg("map");
1386 process
.add_cmd_arg(img
.c_str());
1388 r
= __librbd_open(name
, ctx
);
1393 r
= process
.spawn();
1395 prt("ggate_open failed to run rbd-ggate: %s\n",
1396 process
.err().c_str());
1399 r
= safe_read(process
.get_stdout(), dev
, sizeof(dev
));
1401 prt("ggate_open failed to get ggate device path\n");
1404 for (int i
= 0; i
< r
; ++i
) {
1405 if (dev
[i
] == '\r' || dev
[i
] == '\n') {
1412 prt("rbd-ggate failed with error: %s", process
.err().c_str());
1416 devnode
= strdup(dev
);
1421 for (int i
= 0; i
< 100; i
++) {
1422 fd
= open(devnode
, O_RDWR
| o_direct
);
1423 if (fd
>= 0 || errno
!= ENOENT
) {
1430 prt("open(%s) failed\n", devnode
);
1434 ctx
->krbd_name
= devnode
;
1441 ggate_close(struct rbd_ctx
*ctx
)
1445 ceph_assert(ctx
->krbd_name
&& ctx
->krbd_fd
>= 0);
1447 if (close(ctx
->krbd_fd
) < 0) {
1449 prt("close(%s) failed\n", ctx
->krbd_name
);
1453 SubProcess
process("rbd-ggate");
1454 process
.add_cmd_arg("unmap");
1455 process
.add_cmd_arg(ctx
->krbd_name
);
1457 r
= process
.spawn();
1459 prt("ggate_close failed to run rbd-nbd: %s\n",
1460 process
.err().c_str());
1465 prt("rbd-ggate failed with error: %d", process
.err().c_str());
1469 free((void *)ctx
->krbd_name
);
1471 ctx
->krbd_name
= NULL
;
1474 return __librbd_close(ctx
);
1478 ggate_read(struct rbd_ctx
*ctx
, uint64_t off
, size_t len
, char *buf
)
1482 n
= pread(ctx
->krbd_fd
, buf
, len
, off
);
1485 prt("pread(%llu, %zu) failed\n", off
, len
);
1493 ggate_write(struct rbd_ctx
*ctx
, uint64_t off
, size_t len
, const char *buf
)
1497 n
= pwrite(ctx
->krbd_fd
, buf
, len
, off
);
1500 prt("pwrite(%llu, %zu) failed\n", off
, len
);
1508 __ggate_flush(struct rbd_ctx
*ctx
, bool invalidate
)
1517 ret
= ioctl(ctx
->krbd_fd
, DIOCGFLUSH
, NULL
);
1519 ret
= fsync(ctx
->krbd_fd
);
1523 prt("%s failed\n", invalidate
? "DIOCGFLUSH" : "fsync");
1531 ggate_flush(struct rbd_ctx
*ctx
)
1533 return __ggate_flush(ctx
, false);
1537 ggate_discard(struct rbd_ctx
*ctx
, uint64_t off
, uint64_t len
)
1539 off_t range
[2] = {static_cast<off_t
>(off
), static_cast<off_t
>(len
)};
1542 ret
= __ggate_flush(ctx
, true);
1547 if (ioctl(ctx
->krbd_fd
, DIOCGDELETE
, &range
) < 0) {
1549 prt("DIOCGDELETE(%llu, %llu) failed\n", off
, len
);
1557 ggate_get_size(struct rbd_ctx
*ctx
, uint64_t *size
)
1561 if (ioctl(ctx
->krbd_fd
, DIOCGMEDIASIZE
, &bytes
) < 0) {
1563 prt("DIOCGMEDIASIZE failed\n");
1573 ggate_resize(struct rbd_ctx
*ctx
, uint64_t size
)
1577 ceph_assert(size
% truncbdy
== 0);
1579 ret
= __ggate_flush(ctx
, false);
1584 return __librbd_resize(ctx
, size
);
1588 ggate_clone(struct rbd_ctx
*ctx
, const char *src_snapname
,
1589 const char *dst_imagename
, int *order
, int stripe_unit
,
1594 ret
= __ggate_flush(ctx
, false);
1599 return __librbd_clone(ctx
, src_snapname
, dst_imagename
, order
,
1600 stripe_unit
, stripe_count
);
1604 ggate_flatten(struct rbd_ctx
*ctx
)
1608 ret
= __ggate_flush(ctx
, false);
1613 return __librbd_flatten(ctx
);
1616 const struct rbd_operations ggate_operations
= {
1629 #endif // __FreeBSD__
1631 struct rbd_ctx ctx
= RBD_CTX_INIT
;
1632 const struct rbd_operations
*ops
= &librbd_operations
;
1634 static bool rbd_image_has_parent(struct rbd_ctx
*ctx
)
1637 rbd_linked_image_spec_t parent_image
;
1638 rbd_snap_spec_t parent_snap
;
1640 ret
= rbd_get_parent(ctx
->image
, &parent_image
, &parent_snap
);
1641 if (ret
< 0 && ret
!= -ENOENT
) {
1642 prterrcode("rbd_get_parent_info", ret
);
1645 rbd_linked_image_spec_cleanup(&parent_image
);
1646 rbd_snap_spec_cleanup(&parent_snap
);
1656 log4(int operation
, int arg0
, int arg1
, int arg2
)
1658 struct log_entry
*le
;
1660 le
= &oplog
[logptr
];
1661 le
->operation
= operation
;
1663 le
->operation
= ~ le
->operation
;
1669 if (logptr
>= LOGSIZE
)
1677 struct log_entry
*lp
;
1678 const char *falloc_type
[3] = {"PAST_EOF", "EXTENDING", "INTERIOR"};
1680 prt("LOG DUMP (%d total operations):\n", logcount
);
1681 if (logcount
< LOGSIZE
) {
1688 for ( ; count
> 0; count
--) {
1691 opnum
= i
+1 + (logcount
/LOGSIZE
)*LOGSIZE
;
1692 prt("%d(%3d mod 256): ", opnum
, opnum
%256);
1694 if ((closeopen
= lp
->operation
< 0))
1695 lp
->operation
= ~ lp
->operation
;
1697 switch (lp
->operation
) {
1699 prt("MAPREAD 0x%x thru 0x%x\t(0x%x bytes)",
1700 lp
->args
[0], lp
->args
[0] + lp
->args
[1] - 1,
1702 if (badoff
>= lp
->args
[0] && badoff
<
1703 lp
->args
[0] + lp
->args
[1])
1704 prt("\t***RRRR***");
1707 prt("MAPWRITE 0x%x thru 0x%x\t(0x%x bytes)",
1708 lp
->args
[0], lp
->args
[0] + lp
->args
[1] - 1,
1710 if (badoff
>= lp
->args
[0] && badoff
<
1711 lp
->args
[0] + lp
->args
[1])
1712 prt("\t******WWWW");
1715 prt("READ 0x%x thru 0x%x\t(0x%x bytes)",
1716 lp
->args
[0], lp
->args
[0] + lp
->args
[1] - 1,
1718 if (badoff
>= lp
->args
[0] &&
1719 badoff
< lp
->args
[0] + lp
->args
[1])
1720 prt("\t***RRRR***");
1723 prt("WRITE 0x%x thru 0x%x\t(0x%x bytes)",
1724 lp
->args
[0], lp
->args
[0] + lp
->args
[1] - 1,
1726 if (lp
->args
[0] > lp
->args
[2])
1728 else if (lp
->args
[0] + lp
->args
[1] > lp
->args
[2])
1730 if ((badoff
>= lp
->args
[0] || badoff
>=lp
->args
[2]) &&
1731 badoff
< lp
->args
[0] + lp
->args
[1])
1735 down
= lp
->args
[0] < lp
->args
[1];
1736 prt("TRUNCATE %s\tfrom 0x%x to 0x%x",
1737 down
? "DOWN" : "UP", lp
->args
[1], lp
->args
[0]);
1738 if (badoff
>= lp
->args
[!down
] &&
1739 badoff
< lp
->args
[!!down
])
1740 prt("\t******WWWW");
1743 /* 0: offset 1: length 2: where alloced */
1744 prt("FALLOC 0x%x thru 0x%x\t(0x%x bytes) %s",
1745 lp
->args
[0], lp
->args
[0] + lp
->args
[1],
1746 lp
->args
[1], falloc_type
[lp
->args
[2]]);
1747 if (badoff
>= lp
->args
[0] &&
1748 badoff
< lp
->args
[0] + lp
->args
[1])
1749 prt("\t******FFFF");
1752 prt("PUNCH 0x%x thru 0x%x\t(0x%x bytes)",
1753 lp
->args
[0], lp
->args
[0] + lp
->args
[1] - 1,
1755 if (badoff
>= lp
->args
[0] && badoff
<
1756 lp
->args
[0] + lp
->args
[1])
1757 prt("\t******PPPP");
1760 prt("WRITESAME 0x%x thru 0x%x\t(0x%x bytes) data_size 0x%x",
1761 lp
->args
[0], lp
->args
[0] + lp
->args
[1] - 1,
1762 lp
->args
[1], lp
->args
[2]);
1763 if (badoff
>= lp
->args
[0] &&
1764 badoff
< lp
->args
[0] + lp
->args
[1])
1765 prt("\t***WSWSWSWS");
1767 case OP_COMPARE_AND_WRITE
:
1768 prt("COMPARE_AND_WRITE 0x%x thru 0x%x\t(0x%x bytes)",
1769 lp
->args
[0], lp
->args
[0] + lp
->args
[1] - 1,
1771 if (lp
->args
[0] > lp
->args
[2])
1773 else if (lp
->args
[0] + lp
->args
[1] > lp
->args
[2])
1775 if ((badoff
>= lp
->args
[0] || badoff
>=lp
->args
[2]) &&
1776 badoff
< lp
->args
[0] + lp
->args
[1])
1786 prt("SKIPPED (no operation)");
1789 prt("BOGUS LOG ENTRY (operation code = %d)!",
1793 prt("\n\t\tCLOSE/OPEN");
1802 save_buffer(char *buffer
, off_t bufferlength
, int fd
)
1805 ssize_t byteswritten
;
1807 if (fd
<= 0 || bufferlength
== 0)
1810 if (bufferlength
> SSIZE_MAX
) {
1811 prt("fsx flaw: overflow in save_buffer\n");
1815 ret
= lseek(fd
, (off_t
)0, SEEK_SET
);
1816 if (ret
== (off_t
)-1)
1817 prterr("save_buffer: lseek 0");
1819 byteswritten
= write(fd
, buffer
, (size_t)bufferlength
);
1820 if (byteswritten
!= bufferlength
) {
1821 if (byteswritten
== -1)
1822 prterr("save_buffer write");
1824 warn("save_buffer: short write, 0x%x bytes instead of 0x%llx\n",
1825 (unsigned)byteswritten
,
1826 (unsigned long long)bufferlength
);
1832 report_failure(int status
)
1838 save_buffer(good_buf
, file_size
, fsxgoodfd
);
1839 prt("Correct content saved for comparison\n");
1840 prt("(maybe hexdump \"%s\" vs \"%s.fsxgood\")\n",
1845 sleep(3); // so the log can flush to disk. KLUDGEY!
1849 #define short_at(cp) ((unsigned short)((*((unsigned char *)(cp)) << 8) | \
1850 *(((unsigned char *)(cp)) + 1)))
1853 fsxcmp(char *good_buf
, char *temp_buf
, unsigned size
)
1855 if (!skip_partial_discard
) {
1856 return memcmp(good_buf
, temp_buf
, size
);
1859 for (unsigned i
= 0; i
< size
; i
++) {
1860 if (good_buf
[i
] != temp_buf
[i
] && good_buf
[i
] != 0) {
1861 return good_buf
[i
] - temp_buf
[i
];
1868 check_buffers(char *good_buf
, char *temp_buf
, unsigned offset
, unsigned size
)
1870 if (fsxcmp(good_buf
+ offset
, temp_buf
, size
) != 0) {
1874 prt("READ BAD DATA: offset = 0x%x, size = 0x%x, fname = %s\n",
1875 offset
, size
, iname
);
1876 prt("OFFSET\tGOOD\tBAD\tRANGE\n");
1878 unsigned char c
= good_buf
[offset
];
1879 unsigned char t
= temp_buf
[i
];
1882 unsigned bad
= short_at(&temp_buf
[i
]);
1883 prt("0x%5x\t0x%04x\t0x%04x", offset
,
1884 short_at(&good_buf
[offset
]), bad
);
1885 unsigned op
= temp_buf
[(offset
& 1) ? i
+1 : i
];
1886 prt("\t0x%5x\n", n
);
1888 prt("operation# (mod 256) for "
1889 "the bad data may be %u\n",
1890 ((unsigned)op
& 0xff));
1892 prt("operation# (mod 256) for "
1893 "the bad data unknown, check"
1894 " HOLE and EXTEND ops\n");
1903 report_failure(110);
1914 ret
= ops
->get_size(&ctx
, &size
);
1916 prterrcode("check_size: ops->get_size", ret
);
1918 if ((uint64_t)file_size
!= size
) {
1919 prt("Size error: expected 0x%llx stat 0x%llx\n",
1920 (unsigned long long)file_size
,
1921 (unsigned long long)size
);
1922 report_failure(120);
1926 #define TRUNC_HACK_SIZE (200ULL << 9) /* 512-byte aligned for krbd */
1929 check_trunc_hack(void)
1934 ret
= ops
->resize(&ctx
, 0ULL);
1936 prterrcode("check_trunc_hack: ops->resize pre", ret
);
1938 ret
= ops
->resize(&ctx
, TRUNC_HACK_SIZE
);
1940 prterrcode("check_trunc_hack: ops->resize actual", ret
);
1942 ret
= ops
->get_size(&ctx
, &size
);
1944 prterrcode("check_trunc_hack: ops->get_size", ret
);
1946 if (size
!= TRUNC_HACK_SIZE
) {
1947 prt("no extend on truncate! not posix!\n");
1951 ret
= ops
->resize(&ctx
, 0ULL);
1953 prterrcode("check_trunc_hack: ops->resize post", ret
);
1962 char client_name
[256];
1964 sprintf(client_name
, "client.%s", client_id
);
1966 r
= rados_create2(&cluster
, cluster_name
, client_name
, 0);
1968 simple_err("Could not create cluster handle", r
);
1971 rados_conf_parse_env(cluster
, NULL
);
1972 r
= rados_conf_read_file(cluster
, NULL
);
1974 simple_err("Error reading ceph config file", r
);
1975 goto failed_shutdown
;
1977 r
= rados_connect(cluster
);
1979 simple_err("Error connecting to cluster", r
);
1980 goto failed_shutdown
;
1982 #if defined(WITH_KRBD)
1983 r
= krbd_create_from_context(rados_cct(cluster
), 0, &krbd
);
1985 simple_err("Could not create libkrbd handle", r
);
1986 goto failed_shutdown
;
1990 r
= rados_pool_create(cluster
, pool
);
1991 if (r
< 0 && r
!= -EEXIST
) {
1992 simple_err("Error creating pool", r
);
1995 r
= rados_ioctx_create(cluster
, pool
, &ioctx
);
1997 simple_err("Error creating ioctx", r
);
2000 rados_application_enable(ioctx
, "rbd", 1);
2002 if (clone_calls
|| journal_replay
) {
2004 r
= get_features(&features
);
2009 r
= rbd_create2(ioctx
, iname
, file_size
, features
, &order
);
2011 r
= rbd_create(ioctx
, iname
, file_size
, &order
);
2014 simple_err("Error creating image", r
);
2018 if (journal_replay
) {
2019 r
= register_journal(ioctx
, iname
);
2025 r
= rados_conf_get(cluster
, "rbd_skip_partial_discard", buf
,
2028 simple_err("Could not get rbd_skip_partial_discard value", r
);
2031 skip_partial_discard
= (strcmp(buf
, "true") == 0);
2036 rados_ioctx_destroy(ioctx
);
2038 #if defined(WITH_KRBD)
2042 rados_shutdown(cluster
);
2047 doflush(unsigned offset
, unsigned size
)
2054 ret
= ops
->flush(&ctx
);
2056 prterrcode("doflush: ops->flush", ret
);
2060 doread(unsigned offset
, unsigned size
)
2064 offset
-= offset
% readbdy
;
2066 size
-= size
% readbdy
;
2068 if (!quiet
&& testcalls
> simulatedopcount
&& !o_direct
)
2069 prt("skipping zero size read\n");
2070 log4(OP_SKIPPED
, OP_READ
, offset
, size
);
2073 if (size
+ offset
> file_size
) {
2074 if (!quiet
&& testcalls
> simulatedopcount
)
2075 prt("skipping seek/read past end of file\n");
2076 log4(OP_SKIPPED
, OP_READ
, offset
, size
);
2080 log4(OP_READ
, offset
, size
, 0);
2082 if (testcalls
<= simulatedopcount
)
2086 ((progressinterval
&& testcalls
% progressinterval
== 0) ||
2088 (monitorstart
== -1 ||
2089 (static_cast<long>(offset
+ size
) > monitorstart
&&
2090 (monitorend
== -1 ||
2091 static_cast<long>(offset
) <= monitorend
))))))
2092 prt("%lu read\t0x%x thru\t0x%x\t(0x%x bytes)\n", testcalls
,
2093 offset
, offset
+ size
- 1, size
);
2095 ret
= ops
->read(&ctx
, offset
, size
, temp_buf
);
2096 if (ret
!= (int)size
) {
2098 prterrcode("doread: ops->read", ret
);
2100 prt("short read: 0x%x bytes instead of 0x%x\n",
2102 report_failure(141);
2105 check_buffers(good_buf
, temp_buf
, offset
, size
);
2110 check_eofpage(char *s
, unsigned offset
, char *p
, int size
)
2112 unsigned long last_page
, should_be_zero
;
2114 if (offset
+ size
<= (file_size
& ~page_mask
))
2117 * we landed in the last page of the file
2118 * test to make sure the VM system provided 0's
2119 * beyond the true end of the file mapping
2120 * (as required by mmap def in 1996 posix 1003.1)
2122 last_page
= ((unsigned long)p
+ (offset
& page_mask
) + size
) & ~page_mask
;
2124 for (should_be_zero
= last_page
+ (file_size
& page_mask
);
2125 should_be_zero
< last_page
+ page_size
;
2127 if (*(char *)should_be_zero
) {
2128 prt("Mapped %s: non-zero data past EOF (0x%llx) page offset 0x%x is 0x%04x\n",
2129 s
, file_size
- 1, should_be_zero
& page_mask
,
2130 short_at(should_be_zero
));
2131 report_failure(205);
2137 gendata(char *original_buf
, char *good_buf
, unsigned offset
, unsigned size
)
2140 good_buf
[offset
] = testcalls
% 256;
2142 good_buf
[offset
] += original_buf
[offset
];
2149 dowrite(unsigned offset
, unsigned size
)
2154 offset
-= offset
% writebdy
;
2156 size
-= size
% writebdy
;
2158 if (!quiet
&& testcalls
> simulatedopcount
&& !o_direct
)
2159 prt("skipping zero size write\n");
2160 log4(OP_SKIPPED
, OP_WRITE
, offset
, size
);
2164 log4(OP_WRITE
, offset
, size
, file_size
);
2166 gendata(original_buf
, good_buf
, offset
, size
);
2167 if (file_size
< offset
+ size
) {
2168 newsize
= ceil(((double)offset
+ size
) / truncbdy
) * truncbdy
;
2169 if (file_size
< newsize
)
2170 memset(good_buf
+ file_size
, '\0', newsize
- file_size
);
2171 file_size
= newsize
;
2173 warn("Lite file size bug in fsx!");
2174 report_failure(149);
2176 ret
= ops
->resize(&ctx
, newsize
);
2178 prterrcode("dowrite: ops->resize", ret
);
2179 report_failure(150);
2183 if (testcalls
<= simulatedopcount
)
2187 ((progressinterval
&& testcalls
% progressinterval
== 0) ||
2189 (monitorstart
== -1 ||
2190 (static_cast<long>(offset
+ size
) > monitorstart
&&
2191 (monitorend
== -1 ||
2192 static_cast<long>(offset
) <= monitorend
))))))
2193 prt("%lu write\t0x%x thru\t0x%x\t(0x%x bytes)\n", testcalls
,
2194 offset
, offset
+ size
- 1, size
);
2196 ret
= ops
->write(&ctx
, offset
, size
, good_buf
+ offset
);
2197 if (ret
!= (ssize_t
)size
) {
2199 prterrcode("dowrite: ops->write", ret
);
2201 prt("short write: 0x%x bytes instead of 0x%x\n",
2203 report_failure(151);
2207 doflush(offset
, size
);
2212 dotruncate(unsigned size
)
2214 int oldsize
= file_size
;
2217 size
-= size
% truncbdy
;
2218 if (size
> biggest
) {
2220 if (!quiet
&& testcalls
> simulatedopcount
)
2221 prt("truncating to largest ever: 0x%x\n", size
);
2224 log4(OP_TRUNCATE
, size
, (unsigned)file_size
, 0);
2226 if (size
> file_size
)
2227 memset(good_buf
+ file_size
, '\0', size
- file_size
);
2228 else if (size
< file_size
)
2229 memset(good_buf
+ size
, '\0', file_size
- size
);
2232 if (testcalls
<= simulatedopcount
)
2235 if ((progressinterval
&& testcalls
% progressinterval
== 0) ||
2236 (debug
&& (monitorstart
== -1 || monitorend
== -1 ||
2237 (long)size
<= monitorend
)))
2238 prt("%lu trunc\tfrom 0x%x to 0x%x\n", testcalls
, oldsize
, size
);
2240 ret
= ops
->resize(&ctx
, size
);
2242 prterrcode("dotruncate: ops->resize", ret
);
2243 report_failure(160);
2248 do_punch_hole(unsigned offset
, unsigned length
)
2250 unsigned end_offset
;
2255 offset
-= offset
% holebdy
;
2256 length
-= length
% holebdy
;
2258 if (!quiet
&& testcalls
> simulatedopcount
)
2259 prt("skipping zero length punch hole\n");
2260 log4(OP_SKIPPED
, OP_PUNCH_HOLE
, offset
, length
);
2264 if (file_size
<= (loff_t
)offset
) {
2265 if (!quiet
&& testcalls
> simulatedopcount
)
2266 prt("skipping hole punch off the end of the file\n");
2267 log4(OP_SKIPPED
, OP_PUNCH_HOLE
, offset
, length
);
2271 end_offset
= offset
+ length
;
2273 log4(OP_PUNCH_HOLE
, offset
, length
, 0);
2275 if (testcalls
<= simulatedopcount
)
2278 if ((progressinterval
&& testcalls
% progressinterval
== 0) ||
2279 (debug
&& (monitorstart
== -1 || monitorend
== -1 ||
2280 (long)end_offset
<= monitorend
))) {
2281 prt("%lu punch\tfrom 0x%x to 0x%x, (0x%x bytes)\n", testcalls
,
2282 offset
, offset
+length
, length
);
2285 ret
= ops
->discard(&ctx
, (unsigned long long)offset
,
2286 (unsigned long long)length
);
2288 prterrcode("do_punch_hole: ops->discard", ret
);
2289 report_failure(161);
2292 max_offset
= offset
< file_size
? offset
: file_size
;
2293 max_len
= max_offset
+ length
<= file_size
? length
:
2294 file_size
- max_offset
;
2295 memset(good_buf
+ max_offset
, '\0', max_len
);
2298 unsigned get_data_size(unsigned size
)
2302 unsigned max
= sqrt((double)size
) + 1;
2304 unsigned curr
= good
;
2306 hint
= get_random() % max
;
2308 for (i
= 1; i
< max
&& curr
< hint
; i
++) {
2309 if (size
% i
== 0) {
2322 dowritesame(unsigned offset
, unsigned size
)
2330 offset
-= offset
% writebdy
;
2332 size
-= size
% writebdy
;
2334 if (!quiet
&& testcalls
> simulatedopcount
&& !o_direct
)
2335 prt("skipping zero size writesame\n");
2336 log4(OP_SKIPPED
, OP_WRITESAME
, offset
, size
);
2340 data_size
= get_data_size(size
);
2342 log4(OP_WRITESAME
, offset
, size
, data_size
);
2344 gendata(original_buf
, good_buf
, offset
, data_size
);
2345 if (file_size
< offset
+ size
) {
2346 newsize
= ceil(((double)offset
+ size
) / truncbdy
) * truncbdy
;
2347 if (file_size
< newsize
)
2348 memset(good_buf
+ file_size
, '\0', newsize
- file_size
);
2349 file_size
= newsize
;
2351 warn("Lite file size bug in fsx!");
2352 report_failure(162);
2354 ret
= ops
->resize(&ctx
, newsize
);
2356 prterrcode("dowritesame: ops->resize", ret
);
2357 report_failure(163);
2361 for (n
= size
/ data_size
, buf_off
= data_size
; n
> 1; n
--) {
2362 memcpy(good_buf
+ offset
+ buf_off
, good_buf
+ offset
, data_size
);
2363 buf_off
+= data_size
;
2366 if (testcalls
<= simulatedopcount
)
2370 ((progressinterval
&& testcalls
% progressinterval
== 0) ||
2372 (monitorstart
== -1 ||
2373 (static_cast<long>(offset
+ size
) > monitorstart
&&
2374 (monitorend
== -1 ||
2375 static_cast<long>(offset
) <= monitorend
))))))
2376 prt("%lu writesame\t0x%x thru\t0x%x\tdata_size\t0x%x(0x%x bytes)\n", testcalls
,
2377 offset
, offset
+ size
- 1, data_size
, size
);
2379 ret
= ops
->writesame(&ctx
, offset
, size
, good_buf
+ offset
, data_size
);
2380 if (ret
!= (ssize_t
)size
) {
2382 prterrcode("dowritesame: ops->writesame", ret
);
2384 prt("short writesame: 0x%x bytes instead of 0x%x\n",
2386 report_failure(164);
2390 doflush(offset
, size
);
2394 docompareandwrite(unsigned offset
, unsigned size
)
2398 if (skip_partial_discard
) {
2399 if (!quiet
&& testcalls
> simulatedopcount
)
2400 prt("compare and write disabled\n");
2401 log4(OP_SKIPPED
, OP_COMPARE_AND_WRITE
, offset
, size
);
2405 offset
-= offset
% writebdy
;
2407 size
-= size
% writebdy
;
2410 if (!quiet
&& testcalls
> simulatedopcount
&& !o_direct
)
2411 prt("skipping zero size read\n");
2412 log4(OP_SKIPPED
, OP_READ
, offset
, size
);
2416 if (size
+ offset
> file_size
) {
2417 if (!quiet
&& testcalls
> simulatedopcount
)
2418 prt("skipping seek/compare past end of file\n");
2419 log4(OP_SKIPPED
, OP_COMPARE_AND_WRITE
, offset
, size
);
2423 memcpy(temp_buf
+ offset
, good_buf
+ offset
, size
);
2424 gendata(original_buf
, good_buf
, offset
, size
);
2425 log4(OP_COMPARE_AND_WRITE
, offset
, size
, 0);
2427 if (testcalls
<= simulatedopcount
)
2431 ((progressinterval
&& testcalls
% progressinterval
== 0) ||
2433 (monitorstart
== -1 ||
2434 (static_cast<long>(offset
+ size
) > monitorstart
&&
2435 (monitorend
== -1 ||
2436 static_cast<long>(offset
) <= monitorend
))))))
2437 prt("%lu compareandwrite\t0x%x thru\t0x%x\t(0x%x bytes)\n", testcalls
,
2438 offset
, offset
+ size
- 1, size
);
2440 ret
= ops
->compare_and_write(&ctx
, offset
, size
, temp_buf
+ offset
,
2442 if (ret
!= (ssize_t
)size
) {
2443 if (ret
== -EINVAL
) {
2444 memcpy(good_buf
+ offset
, temp_buf
+ offset
, size
);
2448 prterrcode("docompareandwrite: ops->compare_and_write", ret
);
2450 prt("short write: 0x%x bytes instead of 0x%x\n", ret
, size
);
2451 report_failure(151);
2456 doflush(offset
, size
);
2459 void clone_filename(char *buf
, size_t len
, int clones
)
2461 #if __GNUC__ && __GNUC__ >= 8
2462 #pragma GCC diagnostic push
2463 #pragma GCC diagnostic ignored "-Wformat-truncation"
2465 snprintf(buf
, len
, "%s/fsx-%s-parent%d",
2466 dirpath
, iname
, clones
);
2467 #if __GNUC__ && __GNUC__ >= 8
2468 #pragma GCC diagnostic pop
2472 void clone_imagename(char *buf
, size_t len
, int clones
)
2475 snprintf(buf
, len
, "%s-clone%d", iname
, clones
);
2477 strncpy(buf
, iname
, len
- 1);
2478 buf
[len
- 1] = '\0';
2482 void replay_imagename(char *buf
, size_t len
, int clones
)
2484 clone_imagename(buf
, len
, clones
);
2485 strncat(buf
, "-replay", len
- strlen(buf
));
2486 buf
[len
- 1] = '\0';
2489 void check_clone(int clonenum
, bool replay_image
);
2494 char filename
[1024];
2495 char imagename
[1024];
2496 char lastimagename
[1024];
2498 int order
= 0, stripe_unit
= 0, stripe_count
= 0;
2499 uint64_t newsize
= file_size
;
2501 log4(OP_CLONE
, 0, 0, 0);
2504 if (randomize_striping
) {
2505 order
= 18 + get_random() % 8;
2506 stripe_unit
= 1ull << (order
- 1 - (get_random() % 8));
2507 stripe_count
= 2 + get_random() % 14;
2510 prt("%lu clone\t%d order %d su %d sc %d\n", testcalls
, num_clones
,
2511 order
, stripe_unit
, stripe_count
);
2513 clone_imagename(imagename
, sizeof(imagename
), num_clones
);
2514 clone_imagename(lastimagename
, sizeof(lastimagename
),
2516 ceph_assert(strcmp(lastimagename
, ctx
.name
) == 0);
2518 ret
= ops
->clone(&ctx
, "snap", imagename
, &order
, stripe_unit
,
2521 prterrcode("do_clone: ops->clone", ret
);
2525 if (randomize_parent_overlap
&& rbd_image_has_parent(&ctx
)) {
2526 int rand
= get_random() % 16 + 1; // [1..16]
2531 ret
= rbd_get_overlap(ctx
.image
, &overlap
);
2533 prterrcode("do_clone: rbd_get_overlap", ret
);
2537 if (rand
< 10) { // 9/16
2538 newsize
= overlap
* ((double)rand
/ 10);
2539 newsize
-= newsize
% truncbdy
;
2544 ceph_assert(newsize
!= (uint64_t)file_size
);
2545 prt("truncating image %s from 0x%llx (overlap 0x%llx) to 0x%llx\n",
2546 ctx
.name
, file_size
, overlap
, newsize
);
2548 ret
= ops
->resize(&ctx
, newsize
);
2550 prterrcode("do_clone: ops->resize", ret
);
2553 } else if (rand
< 15) { // 2/16
2554 prt("flattening image %s\n", ctx
.name
);
2556 ret
= ops
->flatten(&ctx
);
2558 prterrcode("do_clone: ops->flatten", ret
);
2562 prt("leaving image %s intact\n", ctx
.name
);
2566 clone_filename(filename
, sizeof(filename
), num_clones
);
2567 if ((fd
= open(filename
, O_WRONLY
|O_CREAT
|O_TRUNC
, 0666)) < 0) {
2568 simple_err("do_clone: open", -errno
);
2571 save_buffer(good_buf
, newsize
, fd
);
2572 if ((ret
= close(fd
)) < 0) {
2573 simple_err("do_clone: close", -errno
);
2580 if ((ret
= ops
->close(&ctx
)) < 0) {
2581 prterrcode("do_clone: ops->close", ret
);
2585 if (journal_replay
) {
2586 ret
= finalize_journal(ioctx
, lastimagename
, num_clones
- 1,
2587 order
, stripe_unit
, stripe_count
);
2592 ret
= register_journal(ioctx
, imagename
);
2599 * Open freshly made clone.
2601 if ((ret
= ops
->open(imagename
, &ctx
)) < 0) {
2602 prterrcode("do_clone: ops->open", ret
);
2606 if (num_clones
> 1) {
2607 if (journal_replay
) {
2608 check_clone(num_clones
- 2, true);
2610 check_clone(num_clones
- 2, false);
2615 check_clone(int clonenum
, bool replay_image
)
2618 char imagename
[128];
2620 struct rbd_ctx cur_ctx
= RBD_CTX_INIT
;
2621 struct stat file_info
;
2622 char *good_buf
, *temp_buf
;
2625 replay_imagename(imagename
, sizeof(imagename
), clonenum
);
2627 clone_imagename(imagename
, sizeof(imagename
), clonenum
);
2630 if ((ret
= ops
->open(imagename
, &cur_ctx
)) < 0) {
2631 prterrcode("check_clone: ops->open", ret
);
2635 clone_filename(filename
, sizeof(filename
), clonenum
+ 1);
2636 if ((fd
= open(filename
, O_RDONLY
)) < 0) {
2637 simple_err("check_clone: open", -errno
);
2641 prt("checking clone #%d, image %s against file %s\n",
2642 clonenum
, imagename
, filename
);
2643 if ((ret
= fstat(fd
, &file_info
)) < 0) {
2644 simple_err("check_clone: fstat", -errno
);
2649 ret
= posix_memalign((void **)&good_buf
,
2650 std::max(writebdy
, (int)sizeof(void *)),
2653 prterrcode("check_clone: posix_memalign(good_buf)", -ret
);
2658 ret
= posix_memalign((void **)&temp_buf
,
2659 std::max(readbdy
, (int)sizeof(void *)),
2662 prterrcode("check_clone: posix_memalign(temp_buf)", -ret
);
2666 if ((ret
= pread(fd
, good_buf
, file_info
.st_size
, 0)) < 0) {
2667 simple_err("check_clone: pread", -errno
);
2670 if ((ret
= ops
->read(&cur_ctx
, 0, file_info
.st_size
, temp_buf
)) < 0) {
2671 prterrcode("check_clone: ops->read", ret
);
2675 if ((ret
= ops
->close(&cur_ctx
)) < 0) {
2676 prterrcode("check_clone: ops->close", ret
);
2679 check_buffers(good_buf
, temp_buf
, 0, file_info
.st_size
);
2681 if (!replay_image
) {
2694 ret
= ops
->write(&ctx
, 0, file_size
, good_buf
);
2695 if (ret
!= file_size
) {
2697 prterrcode("writefileimage: ops->write", ret
);
2699 prt("short write: 0x%x bytes instead of 0x%llx\n",
2700 ret
, (unsigned long long)file_size
);
2701 report_failure(172);
2705 ret
= ops
->resize(&ctx
, file_size
);
2707 prterrcode("writefileimage: ops->resize", ret
);
2708 report_failure(173);
2718 if (!rbd_image_has_parent(&ctx
)) {
2719 log4(OP_SKIPPED
, OP_FLATTEN
, 0, 0);
2722 log4(OP_FLATTEN
, 0, 0, 0);
2723 prt("%lu flatten\n", testcalls
);
2725 ret
= ops
->flatten(&ctx
);
2727 prterrcode("writefileimage: ops->flatten", ret
);
2738 if (testcalls
<= simulatedopcount
)
2741 name
= strdup(ctx
.name
);
2744 prt("%lu close/open\n", testcalls
);
2746 ret
= ops
->close(&ctx
);
2748 prterrcode("docloseopen: ops->close", ret
);
2749 report_failure(180);
2752 ret
= ops
->open(name
, &ctx
);
2754 prterrcode("docloseopen: ops->open", ret
);
2755 report_failure(181);
2761 #define TRIM_OFF_LEN(off, len, size) \
2767 if ((unsigned)(off) + (unsigned)(len) > (unsigned)(size)) \
2768 (len) = (size) - (off); \
2774 unsigned long offset
;
2775 unsigned long size
= maxoplen
;
2776 unsigned long rv
= get_random();
2779 if (simulatedopcount
> 0 && testcalls
== simulatedopcount
)
2785 closeopen
= (rv
>> 3) < (1u << 28) / (unsigned)closeprob
;
2787 if (debugstart
> 0 && testcalls
>= debugstart
)
2790 if (!quiet
&& testcalls
< simulatedopcount
&& testcalls
% 100000 == 0)
2791 prt("%lu...\n", testcalls
);
2793 offset
= get_random();
2795 size
= get_random() % (maxoplen
+ 1);
2797 /* calculate appropriate op to run */
2799 op
= rv
% OP_MAX_LITE
;
2801 op
= rv
% OP_MAX_FULL
;
2813 if (!fallocate_calls
) {
2814 log4(OP_SKIPPED
, OP_FALLOCATE
, offset
, size
);
2819 if (!punch_hole_calls
) {
2820 log4(OP_SKIPPED
, OP_PUNCH_HOLE
, offset
, size
);
2825 /* clone, 8% chance */
2826 if (!clone_calls
|| file_size
== 0 || get_random() % 100 >= 8) {
2827 log4(OP_SKIPPED
, OP_CLONE
, 0, 0);
2832 /* flatten four times as rarely as clone, 2% chance */
2833 if (get_random() % 100 >= 2) {
2834 log4(OP_SKIPPED
, OP_FLATTEN
, 0, 0);
2839 /* writesame not implemented */
2840 if (!ops
->writesame
) {
2841 log4(OP_SKIPPED
, OP_WRITESAME
, offset
, size
);
2845 case OP_COMPARE_AND_WRITE
:
2846 /* compare_and_write not implemented */
2847 if (!ops
->compare_and_write
) {
2848 log4(OP_SKIPPED
, OP_COMPARE_AND_WRITE
, offset
, size
);
2856 TRIM_OFF_LEN(offset
, size
, file_size
);
2857 doread(offset
, size
);
2861 TRIM_OFF_LEN(offset
, size
, maxfilelen
);
2862 dowrite(offset
, size
);
2866 TRIM_OFF_LEN(offset
, size
, file_size
);
2871 TRIM_OFF_LEN(offset
, size
, maxfilelen
);
2877 size
= get_random() % maxfilelen
;
2882 TRIM_OFF_LEN(offset
, size
, file_size
);
2883 do_punch_hole(offset
, size
);
2887 TRIM_OFF_LEN(offset
, size
, maxfilelen
);
2888 dowritesame(offset
, size
);
2890 case OP_COMPARE_AND_WRITE
:
2891 TRIM_OFF_LEN(offset
, size
, file_size
);
2892 docompareandwrite(offset
, size
);
2904 prterr("test: unknown operation");
2910 if (sizechecks
&& testcalls
> simulatedopcount
)
2921 prt("signal %d\n", sig
);
2922 prt("testcalls = %lu\n", testcalls
);
2930 fprintf(stdout
, "usage: %s",
2931 "fsx [-dfjknqxyACFHKLORUWZ] [-b opnum] [-c Prob] [-h holebdy] [-l flen] [-m start:end] [-o oplen] [-p progressinterval] [-r readbdy] [-s style] [-t truncbdy] [-w writebdy] [-D startingop] [-N numops] [-P dirpath] [-S seed] pname iname\n\
2932 -b opnum: beginning operation number (default 1)\n\
2933 -c P: 1 in P chance of file close+open at each op (default infinity)\n\
2934 -d: debug output for all operations\n\
2935 -f: flush and invalidate cache after I/O\n\
2936 -g: deep copy instead of clone\n\
2937 -h holebdy: 4096 would make discards page aligned (default 1)\n\
2938 -j: journal replay stress test\n\
2939 -k: keep data on success (default 0)\n\
2940 -l flen: the upper bound on file size (default 262144)\n\
2941 -m startop:endop: monitor (print debug output) specified byte range (default 0:infinity)\n\
2942 -n: no verifications of file size\n\
2943 -o oplen: the upper bound on operation size (default 65536)\n\
2944 -p progressinterval: debug output at specified operation interval\n\
2945 -q: quieter operation\n\
2946 -r readbdy: 4096 would make reads page aligned (default 1)\n\
2947 -s style: 1 gives smaller truncates (default 0)\n\
2948 -t truncbdy: 4096 would make truncates page aligned (default 1)\n\
2949 -w writebdy: 4096 would make writes page aligned (default 1)\n\
2950 -x: preallocate file space before starting, XFS only (default 0)\n\
2951 -y: synchronize changes to a file\n"
2953 " -C: do not use clone calls\n\
2954 -D startingop: debug output starting at specified operation\n"
2956 " -F: Do not use fallocate (preallocation) calls\n"
2958 #if defined(__FreeBSD__)
2959 " -G: enable rbd-ggate mode (use -L, -r and -w too)\n"
2961 " -H: do not use punch hole calls\n"
2962 #if defined(WITH_KRBD)
2963 " -K: enable krbd mode (use -t and -h too)\n"
2965 #if defined(__linux__)
2966 " -M: enable rbd-nbd mode (use -t and -h too)\n"
2968 " -L: fsxLite - no file creations & no file size changes\n\
2969 -N numops: total # operations to do (default infinity)\n\
2970 -O: use oplen (see -o flag) for every op (default random)\n\
2971 -P dirpath: save .fsxlog and .fsxgood files in dirpath (default ./)\n\
2972 -R: read() system calls only (mapped reads disabled)\n\
2973 -S seed: for random # generator (default 1) 0 gets timestamp\n\
2974 -U: disable randomized striping\n\
2975 -W: mapped write operations DISabled\n\
2976 -Z: O_DIRECT (use -R, -W, -r and -w too)\n\
2977 poolname: this is REQUIRED (no default)\n\
2978 imagename: this is REQUIRED (no default)\n");
2984 getnum(char *s
, char **e
)
2989 ret
= strtol(s
, e
, 0);
3020 if (!lite
&& fallocate_calls
) {
3021 if (fallocate(fd
, 0, 0, 1) && errno
== EOPNOTSUPP
) {
3023 warn("main: filesystem does not support fallocate, disabling\n");
3024 fallocate_calls
= 0;
3029 #else /* ! FALLOCATE */
3030 fallocate_calls
= 0;
3035 void remove_image(rados_ioctx_t ioctx
, char *imagename
, bool remove_snap
,
3041 if ((ret
= rbd_open(ioctx
, imagename
, &image
, NULL
)) < 0) {
3042 sprintf(errmsg
, "rbd_open %s", imagename
);
3043 prterrcode(errmsg
, ret
);
3044 report_failure(101);
3047 if ((ret
= rbd_snap_unprotect(image
, "snap")) < 0) {
3048 sprintf(errmsg
, "rbd_snap_unprotect %s@snap",
3050 prterrcode(errmsg
, ret
);
3051 report_failure(102);
3053 if ((ret
= rbd_snap_remove(image
, "snap")) < 0) {
3054 sprintf(errmsg
, "rbd_snap_remove %s@snap",
3056 prterrcode(errmsg
, ret
);
3057 report_failure(103);
3060 if ((ret
= rbd_close(image
)) < 0) {
3061 sprintf(errmsg
, "rbd_close %s", imagename
);
3062 prterrcode(errmsg
, ret
);
3063 report_failure(104);
3067 (ret
= unregister_journal(ioctx
, imagename
)) < 0) {
3068 report_failure(105);
3071 if ((ret
= rbd_remove(ioctx
, imagename
)) < 0) {
3072 sprintf(errmsg
, "rbd_remove %s", imagename
);
3073 prterrcode(errmsg
, ret
);
3074 report_failure(106);
3079 main(int argc
, char **argv
)
3082 LONG_OPT_CLUSTER
= 1000,
3086 int i
, style
, ch
, ret
;
3088 char goodfile
[1024];
3091 const char* optstring
= "b:c:dfgh:jkl:m:no:p:qr:s:t:w:xyCD:FGHKMLN:OP:RS:UWZ";
3092 const struct option longopts
[] = {
3093 {"cluster", 1, NULL
, LONG_OPT_CLUSTER
},
3094 {"id", 1, NULL
, LONG_OPT_ID
}};
3099 page_size
= PAGE_SIZE
;
3100 page_mask
= page_size
- 1;
3101 mmap_mask
= page_mask
;
3103 setvbuf(stdout
, (char *)0, _IOLBF
, 0); /* line buffered stdout */
3105 while ((ch
= getopt_long(argc
, argv
, optstring
, longopts
, NULL
)) != EOF
) {
3107 case LONG_OPT_CLUSTER
:
3108 cluster_name
= optarg
;
3114 simulatedopcount
= getnum(optarg
, &endp
);
3116 fprintf(stdout
, "Will begin at operation %lu\n",
3118 if (simulatedopcount
== 0)
3120 simulatedopcount
-= 1;
3123 closeprob
= getnum(optarg
, &endp
);
3126 "Chance of close/open is 1 in %d\n",
3141 holebdy
= getnum(optarg
, &endp
);
3146 journal_replay
= true;
3149 keep_on_success
= 1;
3153 int _num
= getnum(optarg
, &endp
);
3160 monitorstart
= getnum(optarg
, &endp
);
3161 if (monitorstart
< 0)
3163 if (!endp
|| *endp
++ != ':')
3165 monitorend
= getnum(endp
, &endp
);
3168 if (monitorend
== 0)
3169 monitorend
= -1; /* aka infinity */
3176 maxoplen
= getnum(optarg
, &endp
);
3181 progressinterval
= getnum(optarg
, &endp
);
3182 if (progressinterval
== 0)
3189 readbdy
= getnum(optarg
, &endp
);
3194 style
= getnum(optarg
, &endp
);
3195 if (style
< 0 || style
> 1)
3199 truncbdy
= getnum(optarg
, &endp
);
3204 writebdy
= getnum(optarg
, &endp
);
3218 debugstart
= getnum(optarg
, &endp
);
3223 fallocate_calls
= 0;
3225 #if defined(__FreeBSD__)
3227 prt("rbd-ggate mode enabled\n");
3228 ops
= &ggate_operations
;
3232 punch_hole_calls
= 0;
3234 #if defined(WITH_KRBD)
3236 prt("krbd mode enabled\n");
3237 ops
= &krbd_operations
;
3240 #if defined(__linux__)
3242 prt("rbd-nbd mode enabled\n");
3243 ops
= &nbd_operations
;
3250 numops
= getnum(optarg
, &endp
);
3258 strncpy(dirpath
, optarg
, sizeof(dirpath
)-1);
3259 dirpath
[sizeof(dirpath
)-1] = '\0';
3260 strncpy(goodfile
, dirpath
, sizeof(goodfile
)-1);
3261 goodfile
[sizeof(goodfile
)-1] = '\0';
3262 if (strlen(goodfile
) < sizeof(goodfile
)-2) {
3263 strcat(goodfile
, "/");
3265 prt("file name to long\n");
3268 strncpy(logfile
, dirpath
, sizeof(logfile
)-1);
3269 logfile
[sizeof(logfile
)-1] = '\0';
3270 if (strlen(logfile
) < sizeof(logfile
)-2) {
3271 strcat(logfile
, "/");
3273 prt("file path to long\n");
3280 fprintf(stdout
, "mapped reads DISABLED\n");
3283 seed
= getnum(optarg
, &endp
);
3285 seed
= std::random_device()() % 10000;
3287 fprintf(stdout
, "Seed set to %d\n", seed
);
3292 randomize_striping
= 0;
3297 fprintf(stdout
, "mapped writes DISABLED\n");
3301 o_direct
= O_DIRECT
;
3317 signal(SIGHUP
, cleanup
);
3318 signal(SIGINT
, cleanup
);
3319 signal(SIGPIPE
, cleanup
);
3320 signal(SIGALRM
, cleanup
);
3321 signal(SIGTERM
, cleanup
);
3322 signal(SIGXCPU
, cleanup
);
3323 signal(SIGXFSZ
, cleanup
);
3324 signal(SIGVTALRM
, cleanup
);
3325 signal(SIGUSR1
, cleanup
);
3326 signal(SIGUSR2
, cleanup
);
3329 random_generator
.seed(seed
);
3332 file_size
= maxfilelen
;
3335 ret
= create_image();
3337 prterrcode(iname
, ret
);
3340 ret
= ops
->open(iname
, &ctx
);
3342 simple_err("Error opening image", ret
);
3346 strcat(dirpath
, ".");
3347 strncat(goodfile
, iname
, 256);
3348 strcat (goodfile
, ".fsxgood");
3349 fsxgoodfd
= open(goodfile
, O_RDWR
|O_CREAT
|O_TRUNC
, 0666);
3350 if (fsxgoodfd
< 0) {
3354 strncat(logfile
, iname
, 256);
3355 strcat (logfile
, ".fsxlog");
3356 fsxlogf
= fopen(logfile
, "w");
3357 if (fsxlogf
== NULL
) {
3362 original_buf
= (char *) malloc(maxfilelen
);
3363 for (i
= 0; i
< (int)maxfilelen
; i
++)
3364 original_buf
[i
] = get_random() % 256;
3366 ret
= posix_memalign((void **)&good_buf
,
3367 std::max(writebdy
, (int)sizeof(void *)), maxfilelen
);
3370 prt("writebdy is not a suitable power of two\n");
3372 prterrcode("main: posix_memalign(good_buf)", -ret
);
3375 memset(good_buf
, '\0', maxfilelen
);
3377 ret
= posix_memalign((void **)&temp_buf
,
3378 std::max(readbdy
, (int)sizeof(void *)), maxfilelen
);
3381 prt("readbdy is not a suitable power of two\n");
3383 prterrcode("main: posix_memalign(temp_buf)", -ret
);
3386 memset(temp_buf
, '\0', maxfilelen
);
3388 if (lite
) { /* zero entire existing file */
3391 written
= ops
->write(&ctx
, 0, (size_t)maxfilelen
, good_buf
);
3392 if (written
!= (ssize_t
)maxfilelen
) {
3394 prterrcode(iname
, written
);
3395 warn("main: error on write");
3397 warn("main: short write, 0x%x bytes instead "
3408 while (numops
== -1 || numops
--)
3411 ret
= ops
->close(&ctx
);
3413 prterrcode("ops->close", ret
);
3417 if (journal_replay
) {
3418 char imagename
[1024];
3419 clone_imagename(imagename
, sizeof(imagename
), num_clones
);
3420 ret
= finalize_journal(ioctx
, imagename
, num_clones
, 0, 0, 0);
3422 report_failure(100);
3426 if (num_clones
> 0) {
3427 if (journal_replay
) {
3428 check_clone(num_clones
- 1, true);
3430 check_clone(num_clones
- 1, false);
3433 if (!keep_on_success
) {
3434 while (num_clones
>= 0) {
3435 static bool remove_snap
= false;
3437 if (journal_replay
) {
3438 char replayimagename
[1024];
3439 replay_imagename(replayimagename
,
3440 sizeof(replayimagename
),
3442 remove_image(ioctx
, replayimagename
,
3447 char clonename
[128];
3448 clone_imagename(clonename
, 128, num_clones
);
3449 remove_image(ioctx
, clonename
, remove_snap
,
3457 prt("All operations completed A-OK!\n");
3460 rados_ioctx_destroy(ioctx
);
3461 #if defined(WITH_KRBD)
3464 rados_shutdown(cluster
);