]> git.proxmox.com Git - ceph.git/blob - ceph/src/test/librbd/fsx.cc
Import ceph 15.2.8
[ceph.git] / ceph / src / test / librbd / fsx.cc
1 // -*- mode:C++; tab-width:8; c-basic-offset:8; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=8 smarttab
3 /*
4 * Copyright (C) 1991, NeXT Computer, Inc. All Rights Reserverd.
5 *
6 * File: fsx.cc
7 * Author: Avadis Tevanian, Jr.
8 *
9 * File system exerciser.
10 *
11 * Rewritten 8/98 by Conrad Minshall.
12 *
13 * Small changes to work under Linux -- davej.
14 *
15 * Checks for mmap last-page zero fill.
16 */
17
18 #include <sys/types.h>
19 #include <unistd.h>
20 #include <getopt.h>
21 #include <limits.h>
22 #include <strings.h>
23 #if defined(__FreeBSD__)
24 #include <sys/disk.h>
25 #endif
26 #include <sys/file.h>
27 #include <sys/stat.h>
28 #include <sys/mman.h>
29 #if defined(__linux__)
30 #include <linux/fs.h>
31 #endif
32 #include <sys/ioctl.h>
33 #ifdef HAVE_ERR_H
34 #include <err.h>
35 #endif
36 #include <signal.h>
37 #include <stddef.h>
38 #include <stdio.h>
39 #include <stdlib.h>
40 #include <string.h>
41 #include <stdarg.h>
42 #include <assert.h>
43 #include <errno.h>
44 #include <math.h>
45 #include <fcntl.h>
46 #include <random>
47
48 #include "include/compat.h"
49 #include "include/intarith.h"
50 #if defined(WITH_KRBD)
51 #include "include/krbd.h"
52 #endif
53 #include "include/rados/librados.h"
54 #include "include/rados/librados.hpp"
55 #include "include/rbd/librbd.h"
56 #include "include/rbd/librbd.hpp"
57 #include "common/Cond.h"
58 #include "common/SubProcess.h"
59 #include "common/safe_io.h"
60 #include "journal/Journaler.h"
61 #include "journal/ReplayEntry.h"
62 #include "journal/ReplayHandler.h"
63 #include "journal/Settings.h"
64
65 #include <boost/scope_exit.hpp>
66
67 #define NUMPRINTCOLUMNS 32 /* # columns of data to print on each line */
68
69 /*
70 * A log entry is an operation and a bunch of arguments.
71 */
72
73 struct log_entry {
74 int operation;
75 int args[3];
76 };
77
78 #define LOGSIZE 1000
79
80 struct log_entry oplog[LOGSIZE]; /* the log */
81 int logptr = 0; /* current position in log */
82 int logcount = 0; /* total ops */
83
84 /*
85 * The operation matrix is complex due to conditional execution of different
86 * features. Hence when we come to deciding what operation to run, we need to
87 * be careful in how we select the different operations. The active operations
88 * are mapped to numbers as follows:
89 *
90 * lite !lite
91 * READ: 0 0
92 * WRITE: 1 1
93 * MAPREAD: 2 2
94 * MAPWRITE: 3 3
95 * TRUNCATE: - 4
96 * FALLOCATE: - 5
97 * PUNCH HOLE: - 6
98 * WRITESAME: - 7
99 * COMPAREANDWRITE: - 8
100 *
101 * When mapped read/writes are disabled, they are simply converted to normal
102 * reads and writes. When fallocate/fpunch calls are disabled, they are
103 * converted to OP_SKIPPED. Hence OP_SKIPPED needs to have a number higher than
104 * the operation selction matrix, as does the OP_CLOSEOPEN which is an
105 * operation modifier rather than an operation in itself.
106 *
107 * Because of the "lite" version, we also need to have different "maximum
108 * operation" defines to allow the ops to be selected correctly based on the
109 * mode being run.
110 */
111
112 /* common operations */
113 #define OP_READ 0
114 #define OP_WRITE 1
115 #define OP_MAPREAD 2
116 #define OP_MAPWRITE 3
117 #define OP_MAX_LITE 4
118
119 /* !lite operations */
120 #define OP_TRUNCATE 4
121 #define OP_FALLOCATE 5
122 #define OP_PUNCH_HOLE 6
123 #define OP_WRITESAME 7
124 #define OP_COMPARE_AND_WRITE 8
125 /* rbd-specific operations */
126 #define OP_CLONE 9
127 #define OP_FLATTEN 10
128 #define OP_MAX_FULL 11
129
130 /* operation modifiers */
131 #define OP_CLOSEOPEN 100
132 #define OP_SKIPPED 101
133
134 #undef PAGE_SIZE
135 #define PAGE_SIZE getpagesize()
136 #undef PAGE_MASK
137 #define PAGE_MASK (PAGE_SIZE - 1)
138
139
140 char *original_buf; /* a pointer to the original data */
141 char *good_buf; /* a pointer to the correct data */
142 char *temp_buf; /* a pointer to the current data */
143
144 char dirpath[1024];
145
146 off_t file_size = 0;
147 off_t biggest = 0;
148 unsigned long testcalls = 0; /* calls to function "test" */
149
150 const char* cluster_name = "ceph"; /* --cluster optional */
151 const char* client_id = "admin"; /* --id optional */
152
153 unsigned long simulatedopcount = 0; /* -b flag */
154 int closeprob = 0; /* -c flag */
155 int debug = 0; /* -d flag */
156 unsigned long debugstart = 0; /* -D flag */
157 int flush_enabled = 0; /* -f flag */
158 int deep_copy = 0; /* -g flag */
159 int holebdy = 1; /* -h flag */
160 bool journal_replay = false; /* -j flah */
161 int keep_on_success = 0; /* -k flag */
162 int do_fsync = 0; /* -y flag */
163 unsigned long maxfilelen = 256 * 1024; /* -l flag */
164 int sizechecks = 1; /* -n flag disables them */
165 int maxoplen = 64 * 1024; /* -o flag */
166 int quiet = 0; /* -q flag */
167 unsigned long progressinterval = 0; /* -p flag */
168 int readbdy = 1; /* -r flag */
169 int style = 0; /* -s flag */
170 int prealloc = 0; /* -x flag */
171 int truncbdy = 1; /* -t flag */
172 int writebdy = 1; /* -w flag */
173 long monitorstart = -1; /* -m flag */
174 long monitorend = -1; /* -m flag */
175 int lite = 0; /* -L flag */
176 long numops = -1; /* -N flag */
177 int randomoplen = 1; /* -O flag disables it */
178 int seed = 1; /* -S flag */
179 int mapped_writes = 0; /* -W flag disables */
180 int fallocate_calls = 0; /* -F flag disables */
181 int punch_hole_calls = 1; /* -H flag disables */
182 int clone_calls = 1; /* -C flag disables */
183 int randomize_striping = 1; /* -U flag disables */
184 int randomize_parent_overlap = 1;
185 int mapped_reads = 0; /* -R flag disables it */
186 int fsxgoodfd = 0;
187 int o_direct = 0; /* -Z flag */
188
189 int num_clones = 0;
190
191 int page_size;
192 int page_mask;
193 int mmap_mask;
194
195 FILE * fsxlogf = NULL;
196 int badoff = -1;
197 int closeopen = 0;
198
199 void
200 vwarnc(int code, const char *fmt, va_list ap) {
201 fprintf(stderr, "fsx: ");
202 if (fmt != NULL) {
203 vfprintf(stderr, fmt, ap);
204 fprintf(stderr, ": ");
205 }
206 fprintf(stderr, "%s\n", strerror(code));
207 }
208
209 void
210 warn(const char * fmt, ...) {
211 va_list ap;
212 va_start(ap, fmt);
213 vwarnc(errno, fmt, ap);
214 va_end(ap);
215 }
216
217 #define BUF_SIZE 1024
218
219 void
220 prt(const char *fmt, ...)
221 {
222 va_list args;
223 char buffer[BUF_SIZE];
224
225 va_start(args, fmt);
226 vsnprintf(buffer, BUF_SIZE, fmt, args);
227 va_end(args);
228 fprintf(stdout, "%s", buffer);
229 if (fsxlogf)
230 fprintf(fsxlogf, "%s", buffer);
231 }
232
233 void
234 prterr(const char *prefix)
235 {
236 prt("%s%s%s\n", prefix, prefix ? ": " : "", strerror(errno));
237 }
238
239 void
240 prterrcode(const char *prefix, int code)
241 {
242 prt("%s%s%s\n", prefix, prefix ? ": " : "", strerror(-code));
243 }
244
245 void
246 simple_err(const char *msg, int err)
247 {
248 fprintf(stderr, "%s: %s\n", msg, strerror(-err));
249 }
250
251 /*
252 * random
253 */
254 std::mt19937 random_generator;
255
256 uint_fast32_t
257 get_random(void)
258 {
259 return random_generator();
260 }
261
262 int get_features(uint64_t* features);
263 void replay_imagename(char *buf, size_t len, int clones);
264
265 namespace {
266
267 static const std::string JOURNAL_CLIENT_ID("fsx");
268
269 struct ReplayHandler : public journal::ReplayHandler {
270 journal::Journaler *journaler;
271 journal::Journaler *replay_journaler;
272 Context *on_finish;
273
274 ReplayHandler(journal::Journaler *journaler,
275 journal::Journaler *replay_journaler, Context *on_finish)
276 : journaler(journaler), replay_journaler(replay_journaler),
277 on_finish(on_finish) {
278 }
279
280 void handle_entries_available() override {
281 while (true) {
282 journal::ReplayEntry replay_entry;
283 if (!journaler->try_pop_front(&replay_entry)) {
284 return;
285 }
286
287 replay_journaler->append(0, replay_entry.get_data());
288 }
289 }
290
291 void handle_complete(int r) override {
292 on_finish->complete(r);
293 }
294 };
295
296 int get_image_id(librados::IoCtx &io_ctx, const char *image_name,
297 std::string *image_id) {
298 librbd::RBD rbd;
299 librbd::Image image;
300 int r = rbd.open(io_ctx, image, image_name);
301 if (r < 0) {
302 simple_err("failed to open image", r);
303 return r;
304 }
305
306 rbd_image_info_t info;
307 r = image.stat(info, sizeof(info));
308 if (r < 0) {
309 simple_err("failed to stat image", r);
310 return r;
311 }
312
313 *image_id = std::string(&info.block_name_prefix[strlen(RBD_DATA_PREFIX)]);
314 return 0;
315 }
316
317 int register_journal(rados_ioctx_t ioctx, const char *image_name) {
318 librados::IoCtx io_ctx;
319 librados::IoCtx::from_rados_ioctx_t(ioctx, io_ctx);
320
321 std::string image_id;
322 int r = get_image_id(io_ctx, image_name, &image_id);
323 if (r < 0) {
324 return r;
325 }
326
327 journal::Journaler journaler(io_ctx, image_id, JOURNAL_CLIENT_ID, {},
328 nullptr);
329 r = journaler.register_client(bufferlist());
330 if (r < 0) {
331 simple_err("failed to register journal client", r);
332 return r;
333 }
334 return 0;
335 }
336
337 int unregister_journal(rados_ioctx_t ioctx, const char *image_name) {
338 librados::IoCtx io_ctx;
339 librados::IoCtx::from_rados_ioctx_t(ioctx, io_ctx);
340
341 std::string image_id;
342 int r = get_image_id(io_ctx, image_name, &image_id);
343 if (r < 0) {
344 return r;
345 }
346
347 journal::Journaler journaler(io_ctx, image_id, JOURNAL_CLIENT_ID, {},
348 nullptr);
349 r = journaler.unregister_client();
350 if (r < 0) {
351 simple_err("failed to unregister journal client", r);
352 return r;
353 }
354 return 0;
355 }
356
357 int create_replay_image(rados_ioctx_t ioctx, int order,
358 uint64_t stripe_unit, int stripe_count,
359 const char *replay_image_name,
360 const char *last_replay_image_name) {
361 librados::IoCtx io_ctx;
362 librados::IoCtx::from_rados_ioctx_t(ioctx, io_ctx);
363
364 uint64_t features;
365 int r = get_features(&features);
366 if (r < 0) {
367 return r;
368 }
369
370 librbd::RBD rbd;
371 if (last_replay_image_name == nullptr) {
372 r = rbd.create2(io_ctx, replay_image_name, 0, features, &order);
373 } else {
374 r = rbd.clone2(io_ctx, last_replay_image_name, "snap",
375 io_ctx, replay_image_name, features, &order,
376 stripe_unit, stripe_count);
377 }
378
379 if (r < 0) {
380 simple_err("failed to create replay image", r);
381 return r;
382 }
383
384 return 0;
385 }
386
387 int replay_journal(rados_ioctx_t ioctx, const char *image_name,
388 const char *replay_image_name) {
389 librados::IoCtx io_ctx;
390 librados::IoCtx::from_rados_ioctx_t(ioctx, io_ctx);
391
392 std::string image_id;
393 int r = get_image_id(io_ctx, image_name, &image_id);
394 if (r < 0) {
395 return r;
396 }
397
398 std::string replay_image_id;
399 r = get_image_id(io_ctx, replay_image_name, &replay_image_id);
400 if (r < 0) {
401 return r;
402 }
403
404 journal::Journaler journaler(io_ctx, image_id, JOURNAL_CLIENT_ID, {},
405 nullptr);
406 C_SaferCond init_ctx;
407 journaler.init(&init_ctx);
408 BOOST_SCOPE_EXIT_ALL( (&journaler) ) {
409 journaler.shut_down();
410 };
411
412 r = init_ctx.wait();
413 if (r < 0) {
414 simple_err("failed to initialize journal", r);
415 return r;
416 }
417
418 journal::Journaler replay_journaler(io_ctx, replay_image_id, "", {},
419 nullptr);
420
421 C_SaferCond replay_init_ctx;
422 replay_journaler.init(&replay_init_ctx);
423 BOOST_SCOPE_EXIT_ALL( (&replay_journaler) ) {
424 replay_journaler.shut_down();
425 };
426
427 r = replay_init_ctx.wait();
428 if (r < 0) {
429 simple_err("failed to initialize replay journal", r);
430 return r;
431 }
432
433 replay_journaler.start_append(0);
434
435 C_SaferCond replay_ctx;
436 ReplayHandler replay_handler(&journaler, &replay_journaler,
437 &replay_ctx);
438
439 // copy journal events from source image to replay image
440 journaler.start_replay(&replay_handler);
441 r = replay_ctx.wait();
442
443 journaler.stop_replay();
444
445 C_SaferCond stop_ctx;
446 replay_journaler.stop_append(&stop_ctx);
447 int stop_r = stop_ctx.wait();
448 if (r == 0 && stop_r < 0) {
449 r = stop_r;
450 }
451
452 if (r < 0) {
453 simple_err("failed to replay journal", r);
454 return r;
455 }
456
457 librbd::RBD rbd;
458 librbd::Image image;
459 r = rbd.open(io_ctx, image, replay_image_name);
460 if (r < 0) {
461 simple_err("failed to open replay image", r);
462 return r;
463 }
464
465 // perform an IO op to initiate the journal replay
466 bufferlist bl;
467 r = static_cast<ssize_t>(image.write(0, 0, bl));
468 if (r < 0) {
469 simple_err("failed to write to replay image", r);
470 return r;
471 }
472 return 0;
473 }
474
475 int finalize_journal(rados_ioctx_t ioctx, const char *imagename, int clones,
476 int order, uint64_t stripe_unit, int stripe_count) {
477 char replayimagename[1024];
478 replay_imagename(replayimagename, sizeof(replayimagename), clones);
479
480 char lastreplayimagename[1024];
481 if (clones > 0) {
482 replay_imagename(lastreplayimagename,
483 sizeof(lastreplayimagename), clones - 1);
484 }
485
486 int ret = create_replay_image(ioctx, order, stripe_unit,
487 stripe_count, replayimagename,
488 clones > 0 ? lastreplayimagename :
489 nullptr);
490 if (ret < 0) {
491 exit(EXIT_FAILURE);
492 }
493
494 ret = replay_journal(ioctx, imagename, replayimagename);
495 if (ret < 0) {
496 exit(EXIT_FAILURE);
497 }
498 return 0;
499 }
500
501 } // anonymous namespace
502
503 /*
504 * rbd
505 */
506
507 struct rbd_ctx {
508 const char *name; /* image name */
509 rbd_image_t image; /* image handle */
510 const char *krbd_name; /* image /dev/rbd<id> name */ /* reused for nbd test */
511 int krbd_fd; /* image /dev/rbd<id> fd */ /* reused for nbd test */
512 };
513
514 #define RBD_CTX_INIT (struct rbd_ctx) { NULL, NULL, NULL, -1}
515
516 struct rbd_operations {
517 int (*open)(const char *name, struct rbd_ctx *ctx);
518 int (*close)(struct rbd_ctx *ctx);
519 ssize_t (*read)(struct rbd_ctx *ctx, uint64_t off, size_t len, char *buf);
520 ssize_t (*write)(struct rbd_ctx *ctx, uint64_t off, size_t len, const char *buf);
521 int (*flush)(struct rbd_ctx *ctx);
522 int (*discard)(struct rbd_ctx *ctx, uint64_t off, uint64_t len);
523 int (*get_size)(struct rbd_ctx *ctx, uint64_t *size);
524 int (*resize)(struct rbd_ctx *ctx, uint64_t size);
525 int (*clone)(struct rbd_ctx *ctx, const char *src_snapname,
526 const char *dst_imagename, int *order, int stripe_unit,
527 int stripe_count);
528 int (*flatten)(struct rbd_ctx *ctx);
529 ssize_t (*writesame)(struct rbd_ctx *ctx, uint64_t off, size_t len,
530 const char *buf, size_t data_len);
531 ssize_t (*compare_and_write)(struct rbd_ctx *ctx, uint64_t off, size_t len,
532 const char *cmp_buf, const char *buf);
533 };
534
535 char *pool; /* name of the pool our test image is in */
536 char *iname; /* name of our test image */
537 rados_t cluster; /* handle for our test cluster */
538 rados_ioctx_t ioctx; /* handle for our test pool */
539 #if defined(WITH_KRBD)
540 struct krbd_ctx *krbd; /* handle for libkrbd */
541 #endif
542 bool skip_partial_discard; /* rbd_skip_partial_discard config value*/
543
544 int get_features(uint64_t* features) {
545 char buf[1024];
546 int r = rados_conf_get(cluster, "rbd_default_features", buf,
547 sizeof(buf));
548 if (r < 0) {
549 simple_err("Could not get rbd_default_features value", r);
550 return r;
551 }
552
553 *features = strtol(buf, NULL, 0);
554
555 if (clone_calls) {
556 *features |= RBD_FEATURE_LAYERING;
557 }
558 if (journal_replay) {
559 *features |= (RBD_FEATURE_EXCLUSIVE_LOCK |
560 RBD_FEATURE_JOURNALING);
561 }
562 return 0;
563 }
564
565 /*
566 * librbd/krbd rbd_operations handlers. Given the rest of fsx.c, no
567 * attempt to do error handling is made in these handlers.
568 */
569
570 int
571 __librbd_open(const char *name, struct rbd_ctx *ctx)
572 {
573 rbd_image_t image;
574 int ret;
575
576 ceph_assert(!ctx->name && !ctx->image &&
577 !ctx->krbd_name && ctx->krbd_fd < 0);
578
579 ret = rbd_open(ioctx, name, &image, NULL);
580 if (ret < 0) {
581 prt("rbd_open(%s) failed\n", name);
582 return ret;
583 }
584
585 ctx->name = strdup(name);
586 ctx->image = image;
587 ctx->krbd_name = NULL;
588 ctx->krbd_fd = -1;
589
590 return 0;
591 }
592
593 int
594 librbd_open(const char *name, struct rbd_ctx *ctx)
595 {
596 return __librbd_open(name, ctx);
597 }
598
599 int
600 __librbd_close(struct rbd_ctx *ctx)
601 {
602 int ret;
603
604 ceph_assert(ctx->name && ctx->image);
605
606 ret = rbd_close(ctx->image);
607 if (ret < 0) {
608 prt("rbd_close(%s) failed\n", ctx->name);
609 return ret;
610 }
611
612 free((void *)ctx->name);
613
614 ctx->name = NULL;
615 ctx->image = NULL;
616
617 return 0;
618 }
619
620 int
621 librbd_close(struct rbd_ctx *ctx)
622 {
623 return __librbd_close(ctx);
624 }
625
626 int
627 librbd_verify_object_map(struct rbd_ctx *ctx)
628 {
629 int n;
630 uint64_t flags;
631 n = rbd_get_flags(ctx->image, &flags);
632 if (n < 0) {
633 prt("rbd_get_flags() failed\n");
634 return n;
635 }
636
637 if ((flags & RBD_FLAG_OBJECT_MAP_INVALID) != 0) {
638 prt("rbd_get_flags() indicates object map is invalid\n");
639 return -EINVAL;
640 }
641 return 0;
642 }
643
644 ssize_t
645 librbd_read(struct rbd_ctx *ctx, uint64_t off, size_t len, char *buf)
646 {
647 ssize_t n;
648
649 n = rbd_read(ctx->image, off, len, buf);
650 if (n < 0)
651 prt("rbd_read(%llu, %zu) failed\n", off, len);
652
653 return n;
654 }
655
656 ssize_t
657 librbd_write(struct rbd_ctx *ctx, uint64_t off, size_t len, const char *buf)
658 {
659 ssize_t n;
660 int ret;
661
662 n = rbd_write(ctx->image, off, len, buf);
663 if (n < 0) {
664 prt("rbd_write(%llu, %zu) failed\n", off, len);
665 return n;
666 }
667
668 ret = librbd_verify_object_map(ctx);
669 if (ret < 0) {
670 return ret;
671 }
672 return n;
673 }
674
675 int
676 librbd_flush(struct rbd_ctx *ctx)
677 {
678 int ret;
679
680 ret = rbd_flush(ctx->image);
681 if (ret < 0) {
682 prt("rbd_flush failed\n");
683 return ret;
684 }
685
686 return librbd_verify_object_map(ctx);
687 }
688
689 int
690 librbd_discard(struct rbd_ctx *ctx, uint64_t off, uint64_t len)
691 {
692 int ret;
693
694 ret = rbd_discard(ctx->image, off, len);
695 if (ret < 0) {
696 prt("rbd_discard(%llu, %llu) failed\n", off, len);
697 return ret;
698 }
699
700 return librbd_verify_object_map(ctx);
701 }
702
703 ssize_t
704 librbd_writesame(struct rbd_ctx *ctx, uint64_t off, size_t len,
705 const char *buf, size_t data_len)
706 {
707 ssize_t n;
708 int ret;
709
710 n = rbd_writesame(ctx->image, off, len, buf, data_len, 0);
711 if (n < 0) {
712 prt("rbd_writesame(%llu, %zu) failed\n", off, len);
713 return n;
714 }
715
716 ret = librbd_verify_object_map(ctx);
717 if (ret < 0) {
718 return ret;
719 }
720 return n;
721 }
722
723 ssize_t
724 librbd_compare_and_write(struct rbd_ctx *ctx, uint64_t off, size_t len,
725 const char *cmp_buf, const char *buf)
726 {
727 ssize_t n;
728 int ret;
729 uint64_t mismatch_off = 0;
730
731 n = rbd_compare_and_write(ctx->image, off, len, cmp_buf, buf, &mismatch_off, 0);
732 if (n == -EINVAL) {
733 return n;
734 } else if (n < 0) {
735 prt("rbd_compare_and_write mismatch(%llu, %zu, %llu) failed\n",
736 off, len, mismatch_off);
737 return n;
738 }
739
740 ret = librbd_verify_object_map(ctx);
741 if (ret < 0) {
742 return ret;
743 }
744 return n;
745
746 }
747
748 int
749 librbd_get_size(struct rbd_ctx *ctx, uint64_t *size)
750 {
751 int ret;
752
753 ret = rbd_get_size(ctx->image, size);
754 if (ret < 0) {
755 prt("rbd_get_size failed\n");
756 return ret;
757 }
758
759 return 0;
760 }
761
762 int
763 __librbd_resize(struct rbd_ctx *ctx, uint64_t size)
764 {
765 int ret;
766
767 ret = rbd_resize(ctx->image, size);
768 if (ret < 0) {
769 prt("rbd_resize(%llu) failed\n", size);
770 return ret;
771 }
772
773 return librbd_verify_object_map(ctx);
774 }
775
776 int
777 librbd_resize(struct rbd_ctx *ctx, uint64_t size)
778 {
779 return __librbd_resize(ctx, size);
780 }
781
782 int
783 __librbd_deep_copy(struct rbd_ctx *ctx, const char *src_snapname,
784 const char *dst_imagename, uint64_t features, int *order,
785 int stripe_unit, int stripe_count) {
786 int ret;
787
788 rbd_image_options_t opts;
789 rbd_image_options_create(&opts);
790 BOOST_SCOPE_EXIT_ALL( (&opts) ) {
791 rbd_image_options_destroy(opts);
792 };
793 ret = rbd_image_options_set_uint64(opts, RBD_IMAGE_OPTION_FEATURES,
794 features);
795 ceph_assert(ret == 0);
796 ret = rbd_image_options_set_uint64(opts, RBD_IMAGE_OPTION_ORDER,
797 *order);
798 ceph_assert(ret == 0);
799 ret = rbd_image_options_set_uint64(opts, RBD_IMAGE_OPTION_STRIPE_UNIT,
800 stripe_unit);
801 ceph_assert(ret == 0);
802 ret = rbd_image_options_set_uint64(opts, RBD_IMAGE_OPTION_STRIPE_COUNT,
803 stripe_count);
804 ceph_assert(ret == 0);
805
806 ret = rbd_snap_set(ctx->image, src_snapname);
807 if (ret < 0) {
808 prt("rbd_snap_set(%s@%s) failed\n", ctx->name, src_snapname);
809 return ret;
810 }
811
812 ret = rbd_deep_copy(ctx->image, ioctx, dst_imagename, opts);
813 if (ret < 0) {
814 prt("rbd_deep_copy(%s@%s -> %s) failed\n",
815 ctx->name, src_snapname, dst_imagename);
816 return ret;
817 }
818
819 ret = rbd_snap_set(ctx->image, "");
820 if (ret < 0) {
821 prt("rbd_snap_set(%s@) failed\n", ctx->name);
822 return ret;
823 }
824
825 rbd_image_t image;
826 ret = rbd_open(ioctx, dst_imagename, &image, nullptr);
827 if (ret < 0) {
828 prt("rbd_open(%s) failed\n", dst_imagename);
829 return ret;
830 }
831
832 ret = rbd_snap_unprotect(image, src_snapname);
833 if (ret < 0) {
834 prt("rbd_snap_unprotect(%s@%s) failed\n", dst_imagename,
835 src_snapname);
836 return ret;
837 }
838
839 ret = rbd_snap_remove(image, src_snapname);
840 if (ret < 0) {
841 prt("rbd_snap_remove(%s@%s) failed\n", dst_imagename,
842 src_snapname);
843 return ret;
844 }
845
846 ret = rbd_close(image);
847 if (ret < 0) {
848 prt("rbd_close(%s) failed\n", dst_imagename);
849 return ret;
850 }
851
852 return 0;
853 }
854
855 int
856 __librbd_clone(struct rbd_ctx *ctx, const char *src_snapname,
857 const char *dst_imagename, int *order, int stripe_unit,
858 int stripe_count)
859 {
860 int ret;
861
862 ret = rbd_snap_create(ctx->image, src_snapname);
863 if (ret < 0) {
864 prt("rbd_snap_create(%s@%s) failed\n", ctx->name,
865 src_snapname);
866 return ret;
867 }
868
869 ret = rbd_snap_protect(ctx->image, src_snapname);
870 if (ret < 0) {
871 prt("rbd_snap_protect(%s@%s) failed\n", ctx->name,
872 src_snapname);
873 return ret;
874 }
875
876 uint64_t features;
877 ret = get_features(&features);
878 if (ret < 0) {
879 return ret;
880 }
881
882 if (deep_copy) {
883 ret = __librbd_deep_copy(ctx, src_snapname, dst_imagename, features,
884 order, stripe_unit, stripe_count);
885 if (ret < 0) {
886 prt("deep_copy(%s@%s -> %s) failed\n", ctx->name,
887 src_snapname, dst_imagename);
888 return ret;
889 }
890 } else {
891 ret = rbd_clone2(ioctx, ctx->name, src_snapname, ioctx,
892 dst_imagename, features, order,
893 stripe_unit, stripe_count);
894 if (ret < 0) {
895 prt("rbd_clone2(%s@%s -> %s) failed\n", ctx->name,
896 src_snapname, dst_imagename);
897 return ret;
898 }
899 }
900
901 return 0;
902 }
903
904 int
905 librbd_clone(struct rbd_ctx *ctx, const char *src_snapname,
906 const char *dst_imagename, int *order, int stripe_unit,
907 int stripe_count)
908 {
909 return __librbd_clone(ctx, src_snapname, dst_imagename, order,
910 stripe_unit, stripe_count);
911 }
912
913 int
914 __librbd_flatten(struct rbd_ctx *ctx)
915 {
916 int ret;
917
918 ret = rbd_flatten(ctx->image);
919 if (ret < 0) {
920 prt("rbd_flatten failed\n");
921 return ret;
922 }
923
924 return librbd_verify_object_map(ctx);
925 }
926
927 int
928 librbd_flatten(struct rbd_ctx *ctx)
929 {
930 return __librbd_flatten(ctx);
931 }
932
933 const struct rbd_operations librbd_operations = {
934 librbd_open,
935 librbd_close,
936 librbd_read,
937 librbd_write,
938 librbd_flush,
939 librbd_discard,
940 librbd_get_size,
941 librbd_resize,
942 librbd_clone,
943 librbd_flatten,
944 librbd_writesame,
945 librbd_compare_and_write,
946 };
947
948 #if defined(WITH_KRBD)
949 int
950 krbd_open(const char *name, struct rbd_ctx *ctx)
951 {
952 char buf[1024];
953 char *devnode;
954 int fd;
955 int ret;
956
957 ret = __librbd_open(name, ctx);
958 if (ret < 0)
959 return ret;
960
961 ret = rados_conf_get(cluster, "rbd_default_map_options", buf,
962 sizeof(buf));
963 if (ret < 0) {
964 simple_err("Could not get rbd_default_map_options value", ret);
965 return ret;
966 }
967
968 ret = krbd_map(krbd, pool, "", name, "", buf, &devnode);
969 if (ret < 0) {
970 prt("krbd_map(%s) failed\n", name);
971 return ret;
972 }
973
974 fd = open(devnode, O_RDWR | o_direct);
975 if (fd < 0) {
976 ret = -errno;
977 prt("open(%s) failed\n", devnode);
978 return ret;
979 }
980
981 ctx->krbd_name = devnode;
982 ctx->krbd_fd = fd;
983
984 return 0;
985 }
986
987 int
988 krbd_close(struct rbd_ctx *ctx)
989 {
990 int ret;
991
992 ceph_assert(ctx->krbd_name && ctx->krbd_fd >= 0);
993
994 if (close(ctx->krbd_fd) < 0) {
995 ret = -errno;
996 prt("close(%s) failed\n", ctx->krbd_name);
997 return ret;
998 }
999
1000 ret = krbd_unmap(krbd, ctx->krbd_name, "");
1001 if (ret < 0) {
1002 prt("krbd_unmap(%s) failed\n", ctx->krbd_name);
1003 return ret;
1004 }
1005
1006 free((void *)ctx->krbd_name);
1007
1008 ctx->krbd_name = NULL;
1009 ctx->krbd_fd = -1;
1010
1011 return __librbd_close(ctx);
1012 }
1013 #endif // WITH_KRBD
1014
1015 #if defined(__linux__)
1016 ssize_t
1017 krbd_read(struct rbd_ctx *ctx, uint64_t off, size_t len, char *buf)
1018 {
1019 ssize_t n;
1020
1021 n = pread(ctx->krbd_fd, buf, len, off);
1022 if (n < 0) {
1023 n = -errno;
1024 prt("pread(%llu, %zu) failed\n", off, len);
1025 return n;
1026 }
1027
1028 return n;
1029 }
1030
1031 ssize_t
1032 krbd_write(struct rbd_ctx *ctx, uint64_t off, size_t len, const char *buf)
1033 {
1034 ssize_t n;
1035
1036 n = pwrite(ctx->krbd_fd, buf, len, off);
1037 if (n < 0) {
1038 n = -errno;
1039 prt("pwrite(%llu, %zu) failed\n", off, len);
1040 return n;
1041 }
1042
1043 return n;
1044 }
1045
1046 int
1047 __krbd_flush(struct rbd_ctx *ctx, bool invalidate)
1048 {
1049 int ret;
1050
1051 if (o_direct)
1052 return 0;
1053
1054 /*
1055 * BLKFLSBUF will sync the filesystem on top of the device (we
1056 * don't care about that here, since we write directly to it),
1057 * write out any dirty buffers and invalidate the buffer cache.
1058 * It won't do a hardware cache flush.
1059 *
1060 * fsync() will write out any dirty buffers and do a hardware
1061 * cache flush (which we don't care about either, because for
1062 * krbd it's a noop). It won't try to empty the buffer cache
1063 * nor poke the filesystem before writing out.
1064 *
1065 * Given that, for our purposes, fsync is a flush, while
1066 * BLKFLSBUF is a flush+invalidate.
1067 */
1068 if (invalidate)
1069 ret = ioctl(ctx->krbd_fd, BLKFLSBUF, NULL);
1070 else
1071 ret = fsync(ctx->krbd_fd);
1072 if (ret < 0) {
1073 ret = -errno;
1074 prt("%s failed\n", invalidate ? "BLKFLSBUF" : "fsync");
1075 return ret;
1076 }
1077
1078 return 0;
1079 }
1080
1081 int
1082 krbd_flush(struct rbd_ctx *ctx)
1083 {
1084 return __krbd_flush(ctx, false);
1085 }
1086
1087 int
1088 krbd_discard(struct rbd_ctx *ctx, uint64_t off, uint64_t len)
1089 {
1090 uint64_t range[2] = { off, len };
1091 int ret;
1092
1093 /*
1094 * BLKZEROOUT goes straight to disk and doesn't do anything
1095 * about dirty buffers. This means we need to flush so that
1096 *
1097 * write 0..3M
1098 * discard 1..2M
1099 *
1100 * results in "data 0000 data" rather than "data data data" on
1101 * disk and invalidate so that
1102 *
1103 * discard 1..2M
1104 * read 0..3M
1105 *
1106 * returns "data 0000 data" rather than "data data data" in
1107 * case 1..2M was cached.
1108 *
1109 * Note: These cache coherency issues are supposed to be fixed
1110 * in recent kernels.
1111 */
1112 ret = __krbd_flush(ctx, true);
1113 if (ret < 0)
1114 return ret;
1115
1116 /*
1117 * off and len must be 512-byte aligned, otherwise BLKZEROOUT
1118 * will fail with -EINVAL. This means that -K (enable krbd
1119 * mode) requires -h 512 or similar.
1120 */
1121 if (ioctl(ctx->krbd_fd, BLKZEROOUT, &range) < 0) {
1122 ret = -errno;
1123 prt("BLKZEROOUT(%llu, %llu) failed\n", off, len);
1124 return ret;
1125 }
1126
1127 return 0;
1128 }
1129
1130 int
1131 krbd_get_size(struct rbd_ctx *ctx, uint64_t *size)
1132 {
1133 uint64_t bytes;
1134
1135 if (ioctl(ctx->krbd_fd, BLKGETSIZE64, &bytes) < 0) {
1136 int ret = -errno;
1137 prt("BLKGETSIZE64 failed\n");
1138 return ret;
1139 }
1140
1141 *size = bytes;
1142
1143 return 0;
1144 }
1145
1146 int
1147 krbd_resize(struct rbd_ctx *ctx, uint64_t size)
1148 {
1149 int ret;
1150
1151 ceph_assert(size % truncbdy == 0);
1152
1153 /*
1154 * When krbd detects a size change, it calls revalidate_disk(),
1155 * which ends up calling invalidate_bdev(), which invalidates
1156 * clean pages and does nothing about dirty pages beyond the
1157 * new size. The preceding cache flush makes sure those pages
1158 * are invalidated, which is what we need on shrink so that
1159 *
1160 * write 0..1M
1161 * resize 0
1162 * resize 2M
1163 * read 0..2M
1164 *
1165 * returns "0000 0000" rather than "data 0000".
1166 */
1167 ret = __krbd_flush(ctx, false);
1168 if (ret < 0)
1169 return ret;
1170
1171 return __librbd_resize(ctx, size);
1172 }
1173
1174 int
1175 krbd_clone(struct rbd_ctx *ctx, const char *src_snapname,
1176 const char *dst_imagename, int *order, int stripe_unit,
1177 int stripe_count)
1178 {
1179 int ret;
1180
1181 ret = __krbd_flush(ctx, false);
1182 if (ret < 0)
1183 return ret;
1184
1185 return __librbd_clone(ctx, src_snapname, dst_imagename, order,
1186 stripe_unit, stripe_count);
1187 }
1188
1189 int
1190 krbd_flatten(struct rbd_ctx *ctx)
1191 {
1192 int ret;
1193
1194 ret = __krbd_flush(ctx, false);
1195 if (ret < 0)
1196 return ret;
1197
1198 return __librbd_flatten(ctx);
1199 }
1200 #endif // __linux__
1201
1202 #if defined(WITH_KRBD)
1203 const struct rbd_operations krbd_operations = {
1204 krbd_open,
1205 krbd_close,
1206 krbd_read,
1207 krbd_write,
1208 krbd_flush,
1209 krbd_discard,
1210 krbd_get_size,
1211 krbd_resize,
1212 krbd_clone,
1213 krbd_flatten,
1214 NULL,
1215 };
1216 #endif // WITH_KRBD
1217
1218 #if defined(__linux__)
1219 int
1220 nbd_open(const char *name, struct rbd_ctx *ctx)
1221 {
1222 int r;
1223 int fd;
1224 char dev[4096];
1225 char *devnode;
1226
1227 SubProcess process("rbd-nbd", SubProcess::KEEP, SubProcess::PIPE,
1228 SubProcess::KEEP);
1229 process.add_cmd_arg("map");
1230 process.add_cmd_arg("--timeout=600");
1231 std::string img;
1232 img.append(pool);
1233 img.append("/");
1234 img.append(name);
1235 process.add_cmd_arg(img.c_str());
1236
1237 r = __librbd_open(name, ctx);
1238 if (r < 0)
1239 return r;
1240
1241 r = process.spawn();
1242 if (r < 0) {
1243 prt("nbd_open failed to run rbd-nbd error: %s\n", process.err().c_str());
1244 return r;
1245 }
1246 r = safe_read(process.get_stdout(), dev, sizeof(dev));
1247 if (r < 0) {
1248 prt("nbd_open failed to get nbd device path\n");
1249 return r;
1250 }
1251 for (int i = 0; i < r; ++i)
1252 if (dev[i] == 10 || dev[i] == 13)
1253 dev[i] = 0;
1254 dev[r] = 0;
1255 r = process.join();
1256 if (r) {
1257 prt("rbd-nbd failed with error: %s", process.err().c_str());
1258 return -EINVAL;
1259 }
1260
1261 devnode = strdup(dev);
1262 if (!devnode)
1263 return -ENOMEM;
1264
1265 fd = open(devnode, O_RDWR | o_direct);
1266 if (fd < 0) {
1267 r = -errno;
1268 prt("open(%s) failed\n", devnode);
1269 return r;
1270 }
1271
1272 ctx->krbd_name = devnode;
1273 ctx->krbd_fd = fd;
1274
1275 return 0;
1276 }
1277
1278 int
1279 nbd_close(struct rbd_ctx *ctx)
1280 {
1281 int r;
1282
1283 ceph_assert(ctx->krbd_name && ctx->krbd_fd >= 0);
1284
1285 if (close(ctx->krbd_fd) < 0) {
1286 r = -errno;
1287 prt("close(%s) failed\n", ctx->krbd_name);
1288 return r;
1289 }
1290
1291 SubProcess process("rbd-nbd");
1292 process.add_cmd_arg("unmap");
1293 process.add_cmd_arg(ctx->krbd_name);
1294
1295 r = process.spawn();
1296 if (r < 0) {
1297 prt("nbd_close failed to run rbd-nbd error: %s\n", process.err().c_str());
1298 return r;
1299 }
1300 r = process.join();
1301 if (r) {
1302 prt("rbd-nbd failed with error: %d", process.err().c_str());
1303 return -EINVAL;
1304 }
1305
1306 free((void *)ctx->krbd_name);
1307
1308 ctx->krbd_name = NULL;
1309 ctx->krbd_fd = -1;
1310
1311 return __librbd_close(ctx);
1312 }
1313
1314 int
1315 nbd_clone(struct rbd_ctx *ctx, const char *src_snapname,
1316 const char *dst_imagename, int *order, int stripe_unit,
1317 int stripe_count)
1318 {
1319 int ret;
1320
1321 ret = __krbd_flush(ctx, false);
1322 if (ret < 0)
1323 return ret;
1324
1325 return __librbd_clone(ctx, src_snapname, dst_imagename, order,
1326 stripe_unit, stripe_count);
1327 }
1328
1329 const struct rbd_operations nbd_operations = {
1330 nbd_open,
1331 nbd_close,
1332 krbd_read,
1333 krbd_write,
1334 krbd_flush,
1335 krbd_discard,
1336 krbd_get_size,
1337 krbd_resize,
1338 nbd_clone,
1339 krbd_flatten,
1340 NULL,
1341 };
1342 #endif // __linux__
1343
1344 #if defined(__FreeBSD__)
1345 int
1346 ggate_open(const char *name, struct rbd_ctx *ctx)
1347 {
1348 int r;
1349 int fd;
1350 char dev[4096];
1351 char *devnode;
1352
1353 SubProcess process("rbd-ggate", SubProcess::KEEP, SubProcess::PIPE,
1354 SubProcess::KEEP);
1355 process.add_cmd_arg("map");
1356 std::string img;
1357 img.append(pool);
1358 img.append("/");
1359 img.append(name);
1360 process.add_cmd_arg(img.c_str());
1361
1362 r = __librbd_open(name, ctx);
1363 if (r < 0) {
1364 return r;
1365 }
1366
1367 r = process.spawn();
1368 if (r < 0) {
1369 prt("ggate_open failed to run rbd-ggate: %s\n",
1370 process.err().c_str());
1371 return r;
1372 }
1373 r = safe_read(process.get_stdout(), dev, sizeof(dev));
1374 if (r < 0) {
1375 prt("ggate_open failed to get ggate device path\n");
1376 return r;
1377 }
1378 for (int i = 0; i < r; ++i) {
1379 if (dev[i] == '\r' || dev[i] == '\n') {
1380 dev[i] = 0;
1381 }
1382 }
1383 dev[r] = 0;
1384 r = process.join();
1385 if (r) {
1386 prt("rbd-ggate failed with error: %s", process.err().c_str());
1387 return -EINVAL;
1388 }
1389
1390 devnode = strdup(dev);
1391 if (!devnode) {
1392 return -ENOMEM;
1393 }
1394
1395 for (int i = 0; i < 100; i++) {
1396 fd = open(devnode, O_RDWR | o_direct);
1397 if (fd >= 0 || errno != ENOENT) {
1398 break;
1399 }
1400 usleep(100000);
1401 }
1402 if (fd < 0) {
1403 r = -errno;
1404 prt("open(%s) failed\n", devnode);
1405 return r;
1406 }
1407
1408 ctx->krbd_name = devnode;
1409 ctx->krbd_fd = fd;
1410
1411 return 0;
1412 }
1413
1414 int
1415 ggate_close(struct rbd_ctx *ctx)
1416 {
1417 int r;
1418
1419 ceph_assert(ctx->krbd_name && ctx->krbd_fd >= 0);
1420
1421 if (close(ctx->krbd_fd) < 0) {
1422 r = -errno;
1423 prt("close(%s) failed\n", ctx->krbd_name);
1424 return r;
1425 }
1426
1427 SubProcess process("rbd-ggate");
1428 process.add_cmd_arg("unmap");
1429 process.add_cmd_arg(ctx->krbd_name);
1430
1431 r = process.spawn();
1432 if (r < 0) {
1433 prt("ggate_close failed to run rbd-nbd: %s\n",
1434 process.err().c_str());
1435 return r;
1436 }
1437 r = process.join();
1438 if (r) {
1439 prt("rbd-ggate failed with error: %d", process.err().c_str());
1440 return -EINVAL;
1441 }
1442
1443 free((void *)ctx->krbd_name);
1444
1445 ctx->krbd_name = NULL;
1446 ctx->krbd_fd = -1;
1447
1448 return __librbd_close(ctx);
1449 }
1450
1451 ssize_t
1452 ggate_read(struct rbd_ctx *ctx, uint64_t off, size_t len, char *buf)
1453 {
1454 ssize_t n;
1455
1456 n = pread(ctx->krbd_fd, buf, len, off);
1457 if (n < 0) {
1458 n = -errno;
1459 prt("pread(%llu, %zu) failed\n", off, len);
1460 return n;
1461 }
1462
1463 return n;
1464 }
1465
1466 ssize_t
1467 ggate_write(struct rbd_ctx *ctx, uint64_t off, size_t len, const char *buf)
1468 {
1469 ssize_t n;
1470
1471 n = pwrite(ctx->krbd_fd, buf, len, off);
1472 if (n < 0) {
1473 n = -errno;
1474 prt("pwrite(%llu, %zu) failed\n", off, len);
1475 return n;
1476 }
1477
1478 return n;
1479 }
1480
1481 int
1482 __ggate_flush(struct rbd_ctx *ctx, bool invalidate)
1483 {
1484 int ret;
1485
1486 if (o_direct) {
1487 return 0;
1488 }
1489
1490 if (invalidate) {
1491 ret = ioctl(ctx->krbd_fd, DIOCGFLUSH, NULL);
1492 } else {
1493 ret = fsync(ctx->krbd_fd);
1494 }
1495 if (ret < 0) {
1496 ret = -errno;
1497 prt("%s failed\n", invalidate ? "DIOCGFLUSH" : "fsync");
1498 return ret;
1499 }
1500
1501 return 0;
1502 }
1503
1504 int
1505 ggate_flush(struct rbd_ctx *ctx)
1506 {
1507 return __ggate_flush(ctx, false);
1508 }
1509
1510 int
1511 ggate_discard(struct rbd_ctx *ctx, uint64_t off, uint64_t len)
1512 {
1513 off_t range[2] = {static_cast<off_t>(off), static_cast<off_t>(len)};
1514 int ret;
1515
1516 ret = __ggate_flush(ctx, true);
1517 if (ret < 0) {
1518 return ret;
1519 }
1520
1521 if (ioctl(ctx->krbd_fd, DIOCGDELETE, &range) < 0) {
1522 ret = -errno;
1523 prt("DIOCGDELETE(%llu, %llu) failed\n", off, len);
1524 return ret;
1525 }
1526
1527 return 0;
1528 }
1529
1530 int
1531 ggate_get_size(struct rbd_ctx *ctx, uint64_t *size)
1532 {
1533 off_t bytes;
1534
1535 if (ioctl(ctx->krbd_fd, DIOCGMEDIASIZE, &bytes) < 0) {
1536 int ret = -errno;
1537 prt("DIOCGMEDIASIZE failed\n");
1538 return ret;
1539 }
1540
1541 *size = bytes;
1542
1543 return 0;
1544 }
1545
1546 int
1547 ggate_resize(struct rbd_ctx *ctx, uint64_t size)
1548 {
1549 int ret;
1550
1551 ceph_assert(size % truncbdy == 0);
1552
1553 ret = __ggate_flush(ctx, false);
1554 if (ret < 0) {
1555 return ret;
1556 }
1557
1558 return __librbd_resize(ctx, size);
1559 }
1560
1561 int
1562 ggate_clone(struct rbd_ctx *ctx, const char *src_snapname,
1563 const char *dst_imagename, int *order, int stripe_unit,
1564 int stripe_count)
1565 {
1566 int ret;
1567
1568 ret = __ggate_flush(ctx, false);
1569 if (ret < 0) {
1570 return ret;
1571 }
1572
1573 return __librbd_clone(ctx, src_snapname, dst_imagename, order,
1574 stripe_unit, stripe_count);
1575 }
1576
1577 int
1578 ggate_flatten(struct rbd_ctx *ctx)
1579 {
1580 int ret;
1581
1582 ret = __ggate_flush(ctx, false);
1583 if (ret < 0) {
1584 return ret;
1585 }
1586
1587 return __librbd_flatten(ctx);
1588 }
1589
1590 const struct rbd_operations ggate_operations = {
1591 ggate_open,
1592 ggate_close,
1593 ggate_read,
1594 ggate_write,
1595 ggate_flush,
1596 ggate_discard,
1597 ggate_get_size,
1598 ggate_resize,
1599 ggate_clone,
1600 ggate_flatten,
1601 NULL,
1602 };
1603 #endif // __FreeBSD__
1604
1605 struct rbd_ctx ctx = RBD_CTX_INIT;
1606 const struct rbd_operations *ops = &librbd_operations;
1607
1608 static bool rbd_image_has_parent(struct rbd_ctx *ctx)
1609 {
1610 int ret;
1611 rbd_linked_image_spec_t parent_image;
1612 rbd_snap_spec_t parent_snap;
1613
1614 ret = rbd_get_parent(ctx->image, &parent_image, &parent_snap);
1615 if (ret < 0 && ret != -ENOENT) {
1616 prterrcode("rbd_get_parent_info", ret);
1617 exit(1);
1618 }
1619 rbd_linked_image_spec_cleanup(&parent_image);
1620 rbd_snap_spec_cleanup(&parent_snap);
1621
1622 return !ret;
1623 }
1624
1625 /*
1626 * fsx
1627 */
1628
1629 void
1630 log4(int operation, int arg0, int arg1, int arg2)
1631 {
1632 struct log_entry *le;
1633
1634 le = &oplog[logptr];
1635 le->operation = operation;
1636 if (closeopen)
1637 le->operation = ~ le->operation;
1638 le->args[0] = arg0;
1639 le->args[1] = arg1;
1640 le->args[2] = arg2;
1641 logptr++;
1642 logcount++;
1643 if (logptr >= LOGSIZE)
1644 logptr = 0;
1645 }
1646
1647 void
1648 logdump(void)
1649 {
1650 int i, count, down;
1651 struct log_entry *lp;
1652 const char *falloc_type[3] = {"PAST_EOF", "EXTENDING", "INTERIOR"};
1653
1654 prt("LOG DUMP (%d total operations):\n", logcount);
1655 if (logcount < LOGSIZE) {
1656 i = 0;
1657 count = logcount;
1658 } else {
1659 i = logptr;
1660 count = LOGSIZE;
1661 }
1662 for ( ; count > 0; count--) {
1663 int opnum;
1664
1665 opnum = i+1 + (logcount/LOGSIZE)*LOGSIZE;
1666 prt("%d(%3d mod 256): ", opnum, opnum%256);
1667 lp = &oplog[i];
1668 if ((closeopen = lp->operation < 0))
1669 lp->operation = ~ lp->operation;
1670
1671 switch (lp->operation) {
1672 case OP_MAPREAD:
1673 prt("MAPREAD 0x%x thru 0x%x\t(0x%x bytes)",
1674 lp->args[0], lp->args[0] + lp->args[1] - 1,
1675 lp->args[1]);
1676 if (badoff >= lp->args[0] && badoff <
1677 lp->args[0] + lp->args[1])
1678 prt("\t***RRRR***");
1679 break;
1680 case OP_MAPWRITE:
1681 prt("MAPWRITE 0x%x thru 0x%x\t(0x%x bytes)",
1682 lp->args[0], lp->args[0] + lp->args[1] - 1,
1683 lp->args[1]);
1684 if (badoff >= lp->args[0] && badoff <
1685 lp->args[0] + lp->args[1])
1686 prt("\t******WWWW");
1687 break;
1688 case OP_READ:
1689 prt("READ 0x%x thru 0x%x\t(0x%x bytes)",
1690 lp->args[0], lp->args[0] + lp->args[1] - 1,
1691 lp->args[1]);
1692 if (badoff >= lp->args[0] &&
1693 badoff < lp->args[0] + lp->args[1])
1694 prt("\t***RRRR***");
1695 break;
1696 case OP_WRITE:
1697 prt("WRITE 0x%x thru 0x%x\t(0x%x bytes)",
1698 lp->args[0], lp->args[0] + lp->args[1] - 1,
1699 lp->args[1]);
1700 if (lp->args[0] > lp->args[2])
1701 prt(" HOLE");
1702 else if (lp->args[0] + lp->args[1] > lp->args[2])
1703 prt(" EXTEND");
1704 if ((badoff >= lp->args[0] || badoff >=lp->args[2]) &&
1705 badoff < lp->args[0] + lp->args[1])
1706 prt("\t***WWWW");
1707 break;
1708 case OP_TRUNCATE:
1709 down = lp->args[0] < lp->args[1];
1710 prt("TRUNCATE %s\tfrom 0x%x to 0x%x",
1711 down ? "DOWN" : "UP", lp->args[1], lp->args[0]);
1712 if (badoff >= lp->args[!down] &&
1713 badoff < lp->args[!!down])
1714 prt("\t******WWWW");
1715 break;
1716 case OP_FALLOCATE:
1717 /* 0: offset 1: length 2: where alloced */
1718 prt("FALLOC 0x%x thru 0x%x\t(0x%x bytes) %s",
1719 lp->args[0], lp->args[0] + lp->args[1],
1720 lp->args[1], falloc_type[lp->args[2]]);
1721 if (badoff >= lp->args[0] &&
1722 badoff < lp->args[0] + lp->args[1])
1723 prt("\t******FFFF");
1724 break;
1725 case OP_PUNCH_HOLE:
1726 prt("PUNCH 0x%x thru 0x%x\t(0x%x bytes)",
1727 lp->args[0], lp->args[0] + lp->args[1] - 1,
1728 lp->args[1]);
1729 if (badoff >= lp->args[0] && badoff <
1730 lp->args[0] + lp->args[1])
1731 prt("\t******PPPP");
1732 break;
1733 case OP_WRITESAME:
1734 prt("WRITESAME 0x%x thru 0x%x\t(0x%x bytes) data_size 0x%x",
1735 lp->args[0], lp->args[0] + lp->args[1] - 1,
1736 lp->args[1], lp->args[2]);
1737 if (badoff >= lp->args[0] &&
1738 badoff < lp->args[0] + lp->args[1])
1739 prt("\t***WSWSWSWS");
1740 break;
1741 case OP_COMPARE_AND_WRITE:
1742 prt("COMPARE_AND_WRITE 0x%x thru 0x%x\t(0x%x bytes)",
1743 lp->args[0], lp->args[0] + lp->args[1] - 1,
1744 lp->args[1]);
1745 if (lp->args[0] > lp->args[2])
1746 prt(" HOLE");
1747 else if (lp->args[0] + lp->args[1] > lp->args[2])
1748 prt(" EXTEND");
1749 if ((badoff >= lp->args[0] || badoff >=lp->args[2]) &&
1750 badoff < lp->args[0] + lp->args[1])
1751 prt("\t***WWWW");
1752 break;
1753 case OP_CLONE:
1754 prt("CLONE");
1755 break;
1756 case OP_FLATTEN:
1757 prt("FLATTEN");
1758 break;
1759 case OP_SKIPPED:
1760 prt("SKIPPED (no operation)");
1761 break;
1762 default:
1763 prt("BOGUS LOG ENTRY (operation code = %d)!",
1764 lp->operation);
1765 }
1766 if (closeopen)
1767 prt("\n\t\tCLOSE/OPEN");
1768 prt("\n");
1769 i++;
1770 if (i == LOGSIZE)
1771 i = 0;
1772 }
1773 }
1774
1775 void
1776 save_buffer(char *buffer, off_t bufferlength, int fd)
1777 {
1778 off_t ret;
1779 ssize_t byteswritten;
1780
1781 if (fd <= 0 || bufferlength == 0)
1782 return;
1783
1784 if (bufferlength > SSIZE_MAX) {
1785 prt("fsx flaw: overflow in save_buffer\n");
1786 exit(67);
1787 }
1788
1789 ret = lseek(fd, (off_t)0, SEEK_SET);
1790 if (ret == (off_t)-1)
1791 prterr("save_buffer: lseek 0");
1792
1793 byteswritten = write(fd, buffer, (size_t)bufferlength);
1794 if (byteswritten != bufferlength) {
1795 if (byteswritten == -1)
1796 prterr("save_buffer write");
1797 else
1798 warn("save_buffer: short write, 0x%x bytes instead of 0x%llx\n",
1799 (unsigned)byteswritten,
1800 (unsigned long long)bufferlength);
1801 }
1802 }
1803
1804
1805 void
1806 report_failure(int status)
1807 {
1808 logdump();
1809
1810 if (fsxgoodfd) {
1811 if (good_buf) {
1812 save_buffer(good_buf, file_size, fsxgoodfd);
1813 prt("Correct content saved for comparison\n");
1814 prt("(maybe hexdump \"%s\" vs \"%s.fsxgood\")\n",
1815 iname, iname);
1816 }
1817 close(fsxgoodfd);
1818 }
1819 sleep(3); // so the log can flush to disk. KLUDGEY!
1820 exit(status);
1821 }
1822
1823 #define short_at(cp) ((unsigned short)((*((unsigned char *)(cp)) << 8) | \
1824 *(((unsigned char *)(cp)) + 1)))
1825
1826 int
1827 fsxcmp(char *good_buf, char *temp_buf, unsigned size)
1828 {
1829 if (!skip_partial_discard) {
1830 return memcmp(good_buf, temp_buf, size);
1831 }
1832
1833 for (unsigned i = 0; i < size; i++) {
1834 if (good_buf[i] != temp_buf[i] && good_buf[i] != 0) {
1835 return good_buf[i] - temp_buf[i];
1836 }
1837 }
1838 return 0;
1839 }
1840
1841 void
1842 check_buffers(char *good_buf, char *temp_buf, unsigned offset, unsigned size)
1843 {
1844 if (fsxcmp(good_buf + offset, temp_buf, size) != 0) {
1845 unsigned i = 0;
1846 unsigned n = 0;
1847
1848 prt("READ BAD DATA: offset = 0x%x, size = 0x%x, fname = %s\n",
1849 offset, size, iname);
1850 prt("OFFSET\tGOOD\tBAD\tRANGE\n");
1851 while (size > 0) {
1852 unsigned char c = good_buf[offset];
1853 unsigned char t = temp_buf[i];
1854 if (c != t) {
1855 if (n < 16) {
1856 unsigned bad = short_at(&temp_buf[i]);
1857 prt("0x%5x\t0x%04x\t0x%04x", offset,
1858 short_at(&good_buf[offset]), bad);
1859 unsigned op = temp_buf[(offset & 1) ? i+1 : i];
1860 prt("\t0x%5x\n", n);
1861 if (op)
1862 prt("operation# (mod 256) for "
1863 "the bad data may be %u\n",
1864 ((unsigned)op & 0xff));
1865 else
1866 prt("operation# (mod 256) for "
1867 "the bad data unknown, check"
1868 " HOLE and EXTEND ops\n");
1869 }
1870 n++;
1871 badoff = offset;
1872 }
1873 offset++;
1874 i++;
1875 size--;
1876 }
1877 report_failure(110);
1878 }
1879 }
1880
1881
1882 void
1883 check_size(void)
1884 {
1885 uint64_t size;
1886 int ret;
1887
1888 ret = ops->get_size(&ctx, &size);
1889 if (ret < 0)
1890 prterrcode("check_size: ops->get_size", ret);
1891
1892 if ((uint64_t)file_size != size) {
1893 prt("Size error: expected 0x%llx stat 0x%llx\n",
1894 (unsigned long long)file_size,
1895 (unsigned long long)size);
1896 report_failure(120);
1897 }
1898 }
1899
1900 #define TRUNC_HACK_SIZE (200ULL << 9) /* 512-byte aligned for krbd */
1901
1902 void
1903 check_trunc_hack(void)
1904 {
1905 uint64_t size;
1906 int ret;
1907
1908 ret = ops->resize(&ctx, 0ULL);
1909 if (ret < 0)
1910 prterrcode("check_trunc_hack: ops->resize pre", ret);
1911
1912 ret = ops->resize(&ctx, TRUNC_HACK_SIZE);
1913 if (ret < 0)
1914 prterrcode("check_trunc_hack: ops->resize actual", ret);
1915
1916 ret = ops->get_size(&ctx, &size);
1917 if (ret < 0)
1918 prterrcode("check_trunc_hack: ops->get_size", ret);
1919
1920 if (size != TRUNC_HACK_SIZE) {
1921 prt("no extend on truncate! not posix!\n");
1922 exit(130);
1923 }
1924
1925 ret = ops->resize(&ctx, 0ULL);
1926 if (ret < 0)
1927 prterrcode("check_trunc_hack: ops->resize post", ret);
1928 }
1929
1930 int
1931 create_image()
1932 {
1933 int r;
1934 int order = 0;
1935 char buf[32];
1936 char client_name[256];
1937
1938 sprintf(client_name, "client.%s", client_id);
1939
1940 r = rados_create2(&cluster, cluster_name, client_name, 0);
1941 if (r < 0) {
1942 simple_err("Could not create cluster handle", r);
1943 return r;
1944 }
1945 rados_conf_parse_env(cluster, NULL);
1946 r = rados_conf_read_file(cluster, NULL);
1947 if (r < 0) {
1948 simple_err("Error reading ceph config file", r);
1949 goto failed_shutdown;
1950 }
1951 r = rados_connect(cluster);
1952 if (r < 0) {
1953 simple_err("Error connecting to cluster", r);
1954 goto failed_shutdown;
1955 }
1956 #if defined(WITH_KRBD)
1957 r = krbd_create_from_context(rados_cct(cluster), 0, &krbd);
1958 if (r < 0) {
1959 simple_err("Could not create libkrbd handle", r);
1960 goto failed_shutdown;
1961 }
1962 #endif
1963
1964 r = rados_pool_create(cluster, pool);
1965 if (r < 0 && r != -EEXIST) {
1966 simple_err("Error creating pool", r);
1967 goto failed_krbd;
1968 }
1969 r = rados_ioctx_create(cluster, pool, &ioctx);
1970 if (r < 0) {
1971 simple_err("Error creating ioctx", r);
1972 goto failed_krbd;
1973 }
1974 rados_application_enable(ioctx, "rbd", 1);
1975
1976 if (clone_calls || journal_replay) {
1977 uint64_t features;
1978 r = get_features(&features);
1979 if (r < 0) {
1980 goto failed_open;
1981 }
1982
1983 r = rbd_create2(ioctx, iname, file_size, features, &order);
1984 } else {
1985 r = rbd_create(ioctx, iname, file_size, &order);
1986 }
1987 if (r < 0) {
1988 simple_err("Error creating image", r);
1989 goto failed_open;
1990 }
1991
1992 if (journal_replay) {
1993 r = register_journal(ioctx, iname);
1994 if (r < 0) {
1995 goto failed_open;
1996 }
1997 }
1998
1999 r = rados_conf_get(cluster, "rbd_skip_partial_discard", buf,
2000 sizeof(buf));
2001 if (r < 0) {
2002 simple_err("Could not get rbd_skip_partial_discard value", r);
2003 goto failed_open;
2004 }
2005 skip_partial_discard = (strcmp(buf, "true") == 0);
2006
2007 return 0;
2008
2009 failed_open:
2010 rados_ioctx_destroy(ioctx);
2011 failed_krbd:
2012 #if defined(WITH_KRBD)
2013 krbd_destroy(krbd);
2014 #endif
2015 failed_shutdown:
2016 rados_shutdown(cluster);
2017 return r;
2018 }
2019
2020 void
2021 doflush(unsigned offset, unsigned size)
2022 {
2023 int ret;
2024
2025 if (o_direct)
2026 return;
2027
2028 ret = ops->flush(&ctx);
2029 if (ret < 0)
2030 prterrcode("doflush: ops->flush", ret);
2031 }
2032
2033 void
2034 doread(unsigned offset, unsigned size)
2035 {
2036 int ret;
2037
2038 offset -= offset % readbdy;
2039 if (o_direct)
2040 size -= size % readbdy;
2041 if (size == 0) {
2042 if (!quiet && testcalls > simulatedopcount && !o_direct)
2043 prt("skipping zero size read\n");
2044 log4(OP_SKIPPED, OP_READ, offset, size);
2045 return;
2046 }
2047 if (size + offset > file_size) {
2048 if (!quiet && testcalls > simulatedopcount)
2049 prt("skipping seek/read past end of file\n");
2050 log4(OP_SKIPPED, OP_READ, offset, size);
2051 return;
2052 }
2053
2054 log4(OP_READ, offset, size, 0);
2055
2056 if (testcalls <= simulatedopcount)
2057 return;
2058
2059 if (!quiet &&
2060 ((progressinterval && testcalls % progressinterval == 0) ||
2061 (debug &&
2062 (monitorstart == -1 ||
2063 (static_cast<long>(offset + size) > monitorstart &&
2064 (monitorend == -1 ||
2065 static_cast<long>(offset) <= monitorend))))))
2066 prt("%lu read\t0x%x thru\t0x%x\t(0x%x bytes)\n", testcalls,
2067 offset, offset + size - 1, size);
2068
2069 ret = ops->read(&ctx, offset, size, temp_buf);
2070 if (ret != (int)size) {
2071 if (ret < 0)
2072 prterrcode("doread: ops->read", ret);
2073 else
2074 prt("short read: 0x%x bytes instead of 0x%x\n",
2075 ret, size);
2076 report_failure(141);
2077 }
2078
2079 check_buffers(good_buf, temp_buf, offset, size);
2080 }
2081
2082
2083 void
2084 check_eofpage(char *s, unsigned offset, char *p, int size)
2085 {
2086 unsigned long last_page, should_be_zero;
2087
2088 if (offset + size <= (file_size & ~page_mask))
2089 return;
2090 /*
2091 * we landed in the last page of the file
2092 * test to make sure the VM system provided 0's
2093 * beyond the true end of the file mapping
2094 * (as required by mmap def in 1996 posix 1003.1)
2095 */
2096 last_page = ((unsigned long)p + (offset & page_mask) + size) & ~page_mask;
2097
2098 for (should_be_zero = last_page + (file_size & page_mask);
2099 should_be_zero < last_page + page_size;
2100 should_be_zero++)
2101 if (*(char *)should_be_zero) {
2102 prt("Mapped %s: non-zero data past EOF (0x%llx) page offset 0x%x is 0x%04x\n",
2103 s, file_size - 1, should_be_zero & page_mask,
2104 short_at(should_be_zero));
2105 report_failure(205);
2106 }
2107 }
2108
2109
2110 void
2111 gendata(char *original_buf, char *good_buf, unsigned offset, unsigned size)
2112 {
2113 while (size--) {
2114 good_buf[offset] = testcalls % 256;
2115 if (offset % 2)
2116 good_buf[offset] += original_buf[offset];
2117 offset++;
2118 }
2119 }
2120
2121
2122 void
2123 dowrite(unsigned offset, unsigned size)
2124 {
2125 ssize_t ret;
2126 off_t newsize;
2127
2128 offset -= offset % writebdy;
2129 if (o_direct)
2130 size -= size % writebdy;
2131 if (size == 0) {
2132 if (!quiet && testcalls > simulatedopcount && !o_direct)
2133 prt("skipping zero size write\n");
2134 log4(OP_SKIPPED, OP_WRITE, offset, size);
2135 return;
2136 }
2137
2138 log4(OP_WRITE, offset, size, file_size);
2139
2140 gendata(original_buf, good_buf, offset, size);
2141 if (file_size < offset + size) {
2142 newsize = ceil(((double)offset + size) / truncbdy) * truncbdy;
2143 if (file_size < newsize)
2144 memset(good_buf + file_size, '\0', newsize - file_size);
2145 file_size = newsize;
2146 if (lite) {
2147 warn("Lite file size bug in fsx!");
2148 report_failure(149);
2149 }
2150 ret = ops->resize(&ctx, newsize);
2151 if (ret < 0) {
2152 prterrcode("dowrite: ops->resize", ret);
2153 report_failure(150);
2154 }
2155 }
2156
2157 if (testcalls <= simulatedopcount)
2158 return;
2159
2160 if (!quiet &&
2161 ((progressinterval && testcalls % progressinterval == 0) ||
2162 (debug &&
2163 (monitorstart == -1 ||
2164 (static_cast<long>(offset + size) > monitorstart &&
2165 (monitorend == -1 ||
2166 static_cast<long>(offset) <= monitorend))))))
2167 prt("%lu write\t0x%x thru\t0x%x\t(0x%x bytes)\n", testcalls,
2168 offset, offset + size - 1, size);
2169
2170 ret = ops->write(&ctx, offset, size, good_buf + offset);
2171 if (ret != (ssize_t)size) {
2172 if (ret < 0)
2173 prterrcode("dowrite: ops->write", ret);
2174 else
2175 prt("short write: 0x%x bytes instead of 0x%x\n",
2176 ret, size);
2177 report_failure(151);
2178 }
2179
2180 if (flush_enabled)
2181 doflush(offset, size);
2182 }
2183
2184
2185 void
2186 dotruncate(unsigned size)
2187 {
2188 int oldsize = file_size;
2189 int ret;
2190
2191 size -= size % truncbdy;
2192 if (size > biggest) {
2193 biggest = size;
2194 if (!quiet && testcalls > simulatedopcount)
2195 prt("truncating to largest ever: 0x%x\n", size);
2196 }
2197
2198 log4(OP_TRUNCATE, size, (unsigned)file_size, 0);
2199
2200 if (size > file_size)
2201 memset(good_buf + file_size, '\0', size - file_size);
2202 else if (size < file_size)
2203 memset(good_buf + size, '\0', file_size - size);
2204 file_size = size;
2205
2206 if (testcalls <= simulatedopcount)
2207 return;
2208
2209 if ((progressinterval && testcalls % progressinterval == 0) ||
2210 (debug && (monitorstart == -1 || monitorend == -1 ||
2211 (long)size <= monitorend)))
2212 prt("%lu trunc\tfrom 0x%x to 0x%x\n", testcalls, oldsize, size);
2213
2214 ret = ops->resize(&ctx, size);
2215 if (ret < 0) {
2216 prterrcode("dotruncate: ops->resize", ret);
2217 report_failure(160);
2218 }
2219 }
2220
2221 void
2222 do_punch_hole(unsigned offset, unsigned length)
2223 {
2224 unsigned end_offset;
2225 int max_offset = 0;
2226 int max_len = 0;
2227 int ret;
2228
2229 offset -= offset % holebdy;
2230 length -= length % holebdy;
2231 if (length == 0) {
2232 if (!quiet && testcalls > simulatedopcount)
2233 prt("skipping zero length punch hole\n");
2234 log4(OP_SKIPPED, OP_PUNCH_HOLE, offset, length);
2235 return;
2236 }
2237
2238 if (file_size <= (loff_t)offset) {
2239 if (!quiet && testcalls > simulatedopcount)
2240 prt("skipping hole punch off the end of the file\n");
2241 log4(OP_SKIPPED, OP_PUNCH_HOLE, offset, length);
2242 return;
2243 }
2244
2245 end_offset = offset + length;
2246
2247 log4(OP_PUNCH_HOLE, offset, length, 0);
2248
2249 if (testcalls <= simulatedopcount)
2250 return;
2251
2252 if ((progressinterval && testcalls % progressinterval == 0) ||
2253 (debug && (monitorstart == -1 || monitorend == -1 ||
2254 (long)end_offset <= monitorend))) {
2255 prt("%lu punch\tfrom 0x%x to 0x%x, (0x%x bytes)\n", testcalls,
2256 offset, offset+length, length);
2257 }
2258
2259 ret = ops->discard(&ctx, (unsigned long long)offset,
2260 (unsigned long long)length);
2261 if (ret < 0) {
2262 prterrcode("do_punch_hole: ops->discard", ret);
2263 report_failure(161);
2264 }
2265
2266 max_offset = offset < file_size ? offset : file_size;
2267 max_len = max_offset + length <= file_size ? length :
2268 file_size - max_offset;
2269 memset(good_buf + max_offset, '\0', max_len);
2270 }
2271
2272 unsigned get_data_size(unsigned size)
2273 {
2274 unsigned i;
2275 unsigned hint;
2276 unsigned max = sqrt((double)size) + 1;
2277 unsigned good = 1;
2278 unsigned curr = good;
2279
2280 hint = get_random() % max;
2281
2282 for (i = 1; i < max && curr < hint; i++) {
2283 if (size % i == 0) {
2284 good = curr;
2285 curr = i;
2286 }
2287 }
2288
2289 if (curr == hint)
2290 good = curr;
2291
2292 return good;
2293 }
2294
2295 void
2296 dowritesame(unsigned offset, unsigned size)
2297 {
2298 ssize_t ret;
2299 off_t newsize;
2300 unsigned buf_off;
2301 unsigned data_size;
2302 int n;
2303
2304 offset -= offset % writebdy;
2305 if (o_direct)
2306 size -= size % writebdy;
2307 if (size == 0) {
2308 if (!quiet && testcalls > simulatedopcount && !o_direct)
2309 prt("skipping zero size writesame\n");
2310 log4(OP_SKIPPED, OP_WRITESAME, offset, size);
2311 return;
2312 }
2313
2314 data_size = get_data_size(size);
2315
2316 log4(OP_WRITESAME, offset, size, data_size);
2317
2318 gendata(original_buf, good_buf, offset, data_size);
2319 if (file_size < offset + size) {
2320 newsize = ceil(((double)offset + size) / truncbdy) * truncbdy;
2321 if (file_size < newsize)
2322 memset(good_buf + file_size, '\0', newsize - file_size);
2323 file_size = newsize;
2324 if (lite) {
2325 warn("Lite file size bug in fsx!");
2326 report_failure(162);
2327 }
2328 ret = ops->resize(&ctx, newsize);
2329 if (ret < 0) {
2330 prterrcode("dowritesame: ops->resize", ret);
2331 report_failure(163);
2332 }
2333 }
2334
2335 for (n = size / data_size, buf_off = data_size; n > 1; n--) {
2336 memcpy(good_buf + offset + buf_off, good_buf + offset, data_size);
2337 buf_off += data_size;
2338 }
2339
2340 if (testcalls <= simulatedopcount)
2341 return;
2342
2343 if (!quiet &&
2344 ((progressinterval && testcalls % progressinterval == 0) ||
2345 (debug &&
2346 (monitorstart == -1 ||
2347 (static_cast<long>(offset + size) > monitorstart &&
2348 (monitorend == -1 ||
2349 static_cast<long>(offset) <= monitorend))))))
2350 prt("%lu writesame\t0x%x thru\t0x%x\tdata_size\t0x%x(0x%x bytes)\n", testcalls,
2351 offset, offset + size - 1, data_size, size);
2352
2353 ret = ops->writesame(&ctx, offset, size, good_buf + offset, data_size);
2354 if (ret != (ssize_t)size) {
2355 if (ret < 0)
2356 prterrcode("dowritesame: ops->writesame", ret);
2357 else
2358 prt("short writesame: 0x%x bytes instead of 0x%x\n",
2359 ret, size);
2360 report_failure(164);
2361 }
2362
2363 if (flush_enabled)
2364 doflush(offset, size);
2365 }
2366
2367 void
2368 docompareandwrite(unsigned offset, unsigned size)
2369 {
2370 int ret;
2371
2372 if (skip_partial_discard) {
2373 if (!quiet && testcalls > simulatedopcount)
2374 prt("compare and write disabled\n");
2375 log4(OP_SKIPPED, OP_COMPARE_AND_WRITE, offset, size);
2376 return;
2377 }
2378
2379 offset -= offset % writebdy;
2380 if (o_direct)
2381 size -= size % writebdy;
2382
2383 if (size == 0) {
2384 if (!quiet && testcalls > simulatedopcount && !o_direct)
2385 prt("skipping zero size read\n");
2386 log4(OP_SKIPPED, OP_READ, offset, size);
2387 return;
2388 }
2389
2390 if (size + offset > file_size) {
2391 if (!quiet && testcalls > simulatedopcount)
2392 prt("skipping seek/compare past end of file\n");
2393 log4(OP_SKIPPED, OP_COMPARE_AND_WRITE, offset, size);
2394 return;
2395 }
2396
2397 memcpy(temp_buf + offset, good_buf + offset, size);
2398 gendata(original_buf, good_buf, offset, size);
2399 log4(OP_COMPARE_AND_WRITE, offset, size, 0);
2400
2401 if (testcalls <= simulatedopcount)
2402 return;
2403
2404 if (!quiet &&
2405 ((progressinterval && testcalls % progressinterval == 0) ||
2406 (debug &&
2407 (monitorstart == -1 ||
2408 (static_cast<long>(offset + size) > monitorstart &&
2409 (monitorend == -1 ||
2410 static_cast<long>(offset) <= monitorend))))))
2411 prt("%lu compareandwrite\t0x%x thru\t0x%x\t(0x%x bytes)\n", testcalls,
2412 offset, offset + size - 1, size);
2413
2414 ret = ops->compare_and_write(&ctx, offset, size, temp_buf + offset,
2415 good_buf + offset);
2416 if (ret != (ssize_t)size) {
2417 if (ret == -EINVAL) {
2418 memcpy(good_buf + offset, temp_buf + offset, size);
2419 return;
2420 }
2421 if (ret < 0)
2422 prterrcode("docompareandwrite: ops->compare_and_write", ret);
2423 else
2424 prt("short write: 0x%x bytes instead of 0x%x\n", ret, size);
2425 report_failure(151);
2426 return;
2427 }
2428
2429 if (flush_enabled)
2430 doflush(offset, size);
2431 }
2432
2433 void clone_filename(char *buf, size_t len, int clones)
2434 {
2435 #if __GNUC__ && __GNUC__ >= 8
2436 #pragma GCC diagnostic push
2437 #pragma GCC diagnostic ignored "-Wformat-truncation"
2438 #endif
2439 snprintf(buf, len, "%s/fsx-%s-parent%d",
2440 dirpath, iname, clones);
2441 #if __GNUC__ && __GNUC__ >= 8
2442 #pragma GCC diagnostic pop
2443 #endif
2444 }
2445
2446 void clone_imagename(char *buf, size_t len, int clones)
2447 {
2448 if (clones > 0)
2449 snprintf(buf, len, "%s-clone%d", iname, clones);
2450 else
2451 strncpy(buf, iname, len);
2452 buf[len - 1] = '\0';
2453 }
2454
2455 void replay_imagename(char *buf, size_t len, int clones)
2456 {
2457 clone_imagename(buf, len, clones);
2458 strncat(buf, "-replay", len - strlen(buf));
2459 buf[len - 1] = '\0';
2460 }
2461
2462 void check_clone(int clonenum, bool replay_image);
2463
2464 void
2465 do_clone()
2466 {
2467 char filename[1024];
2468 char imagename[1024];
2469 char lastimagename[1024];
2470 int ret, fd;
2471 int order = 0, stripe_unit = 0, stripe_count = 0;
2472 uint64_t newsize = file_size;
2473
2474 log4(OP_CLONE, 0, 0, 0);
2475 ++num_clones;
2476
2477 if (randomize_striping) {
2478 order = 18 + get_random() % 8;
2479 stripe_unit = 1ull << (order - 1 - (get_random() % 8));
2480 stripe_count = 2 + get_random() % 14;
2481 }
2482
2483 prt("%lu clone\t%d order %d su %d sc %d\n", testcalls, num_clones,
2484 order, stripe_unit, stripe_count);
2485
2486 clone_imagename(imagename, sizeof(imagename), num_clones);
2487 clone_imagename(lastimagename, sizeof(lastimagename),
2488 num_clones - 1);
2489 ceph_assert(strcmp(lastimagename, ctx.name) == 0);
2490
2491 ret = ops->clone(&ctx, "snap", imagename, &order, stripe_unit,
2492 stripe_count);
2493 if (ret < 0) {
2494 prterrcode("do_clone: ops->clone", ret);
2495 exit(165);
2496 }
2497
2498 if (randomize_parent_overlap && rbd_image_has_parent(&ctx)) {
2499 int rand = get_random() % 16 + 1; // [1..16]
2500
2501 if (rand < 13) {
2502 uint64_t overlap;
2503
2504 ret = rbd_get_overlap(ctx.image, &overlap);
2505 if (ret < 0) {
2506 prterrcode("do_clone: rbd_get_overlap", ret);
2507 exit(1);
2508 }
2509
2510 if (rand < 10) { // 9/16
2511 newsize = overlap * ((double)rand / 10);
2512 newsize -= newsize % truncbdy;
2513 } else { // 3/16
2514 newsize = 0;
2515 }
2516
2517 ceph_assert(newsize != (uint64_t)file_size);
2518 prt("truncating image %s from 0x%llx (overlap 0x%llx) to 0x%llx\n",
2519 ctx.name, file_size, overlap, newsize);
2520
2521 ret = ops->resize(&ctx, newsize);
2522 if (ret < 0) {
2523 prterrcode("do_clone: ops->resize", ret);
2524 exit(1);
2525 }
2526 } else if (rand < 15) { // 2/16
2527 prt("flattening image %s\n", ctx.name);
2528
2529 ret = ops->flatten(&ctx);
2530 if (ret < 0) {
2531 prterrcode("do_clone: ops->flatten", ret);
2532 exit(1);
2533 }
2534 } else { // 2/16
2535 prt("leaving image %s intact\n", ctx.name);
2536 }
2537 }
2538
2539 clone_filename(filename, sizeof(filename), num_clones);
2540 if ((fd = open(filename, O_WRONLY|O_CREAT|O_TRUNC, 0666)) < 0) {
2541 simple_err("do_clone: open", -errno);
2542 exit(162);
2543 }
2544 save_buffer(good_buf, newsize, fd);
2545 if ((ret = close(fd)) < 0) {
2546 simple_err("do_clone: close", -errno);
2547 exit(163);
2548 }
2549
2550 /*
2551 * Close parent.
2552 */
2553 if ((ret = ops->close(&ctx)) < 0) {
2554 prterrcode("do_clone: ops->close", ret);
2555 exit(174);
2556 }
2557
2558 if (journal_replay) {
2559 ret = finalize_journal(ioctx, lastimagename, num_clones - 1,
2560 order, stripe_unit, stripe_count);
2561 if (ret < 0) {
2562 exit(EXIT_FAILURE);
2563 }
2564
2565 ret = register_journal(ioctx, imagename);
2566 if (ret < 0) {
2567 exit(EXIT_FAILURE);
2568 }
2569 }
2570
2571 /*
2572 * Open freshly made clone.
2573 */
2574 if ((ret = ops->open(imagename, &ctx)) < 0) {
2575 prterrcode("do_clone: ops->open", ret);
2576 exit(166);
2577 }
2578
2579 if (num_clones > 1) {
2580 if (journal_replay) {
2581 check_clone(num_clones - 2, true);
2582 }
2583 check_clone(num_clones - 2, false);
2584 }
2585 }
2586
2587 void
2588 check_clone(int clonenum, bool replay_image)
2589 {
2590 char filename[128];
2591 char imagename[128];
2592 int ret, fd;
2593 struct rbd_ctx cur_ctx = RBD_CTX_INIT;
2594 struct stat file_info;
2595 char *good_buf, *temp_buf;
2596
2597 if (replay_image) {
2598 replay_imagename(imagename, sizeof(imagename), clonenum);
2599 } else {
2600 clone_imagename(imagename, sizeof(imagename), clonenum);
2601 }
2602
2603 if ((ret = ops->open(imagename, &cur_ctx)) < 0) {
2604 prterrcode("check_clone: ops->open", ret);
2605 exit(167);
2606 }
2607
2608 clone_filename(filename, sizeof(filename), clonenum + 1);
2609 if ((fd = open(filename, O_RDONLY)) < 0) {
2610 simple_err("check_clone: open", -errno);
2611 exit(168);
2612 }
2613
2614 prt("checking clone #%d, image %s against file %s\n",
2615 clonenum, imagename, filename);
2616 if ((ret = fstat(fd, &file_info)) < 0) {
2617 simple_err("check_clone: fstat", -errno);
2618 exit(169);
2619 }
2620
2621 good_buf = NULL;
2622 ret = posix_memalign((void **)&good_buf,
2623 std::max(writebdy, (int)sizeof(void *)),
2624 file_info.st_size);
2625 if (ret > 0) {
2626 prterrcode("check_clone: posix_memalign(good_buf)", -ret);
2627 exit(96);
2628 }
2629
2630 temp_buf = NULL;
2631 ret = posix_memalign((void **)&temp_buf,
2632 std::max(readbdy, (int)sizeof(void *)),
2633 file_info.st_size);
2634 if (ret > 0) {
2635 prterrcode("check_clone: posix_memalign(temp_buf)", -ret);
2636 exit(97);
2637 }
2638
2639 if ((ret = pread(fd, good_buf, file_info.st_size, 0)) < 0) {
2640 simple_err("check_clone: pread", -errno);
2641 exit(170);
2642 }
2643 if ((ret = ops->read(&cur_ctx, 0, file_info.st_size, temp_buf)) < 0) {
2644 prterrcode("check_clone: ops->read", ret);
2645 exit(171);
2646 }
2647 close(fd);
2648 if ((ret = ops->close(&cur_ctx)) < 0) {
2649 prterrcode("check_clone: ops->close", ret);
2650 exit(174);
2651 }
2652 check_buffers(good_buf, temp_buf, 0, file_info.st_size);
2653
2654 if (!replay_image) {
2655 unlink(filename);
2656 }
2657
2658 free(good_buf);
2659 free(temp_buf);
2660 }
2661
2662 void
2663 writefileimage()
2664 {
2665 ssize_t ret;
2666
2667 ret = ops->write(&ctx, 0, file_size, good_buf);
2668 if (ret != file_size) {
2669 if (ret < 0)
2670 prterrcode("writefileimage: ops->write", ret);
2671 else
2672 prt("short write: 0x%x bytes instead of 0x%llx\n",
2673 ret, (unsigned long long)file_size);
2674 report_failure(172);
2675 }
2676
2677 if (!lite) {
2678 ret = ops->resize(&ctx, file_size);
2679 if (ret < 0) {
2680 prterrcode("writefileimage: ops->resize", ret);
2681 report_failure(173);
2682 }
2683 }
2684 }
2685
2686 void
2687 do_flatten()
2688 {
2689 int ret;
2690
2691 if (!rbd_image_has_parent(&ctx)) {
2692 log4(OP_SKIPPED, OP_FLATTEN, 0, 0);
2693 return;
2694 }
2695 log4(OP_FLATTEN, 0, 0, 0);
2696 prt("%lu flatten\n", testcalls);
2697
2698 ret = ops->flatten(&ctx);
2699 if (ret < 0) {
2700 prterrcode("writefileimage: ops->flatten", ret);
2701 exit(177);
2702 }
2703 }
2704
2705 void
2706 docloseopen(void)
2707 {
2708 char *name;
2709 int ret;
2710
2711 if (testcalls <= simulatedopcount)
2712 return;
2713
2714 name = strdup(ctx.name);
2715
2716 if (debug)
2717 prt("%lu close/open\n", testcalls);
2718
2719 ret = ops->close(&ctx);
2720 if (ret < 0) {
2721 prterrcode("docloseopen: ops->close", ret);
2722 report_failure(180);
2723 }
2724
2725 ret = ops->open(name, &ctx);
2726 if (ret < 0) {
2727 prterrcode("docloseopen: ops->open", ret);
2728 report_failure(181);
2729 }
2730
2731 free(name);
2732 }
2733
2734 #define TRIM_OFF_LEN(off, len, size) \
2735 do { \
2736 if (size) \
2737 (off) %= (size); \
2738 else \
2739 (off) = 0; \
2740 if ((unsigned)(off) + (unsigned)(len) > (unsigned)(size)) \
2741 (len) = (size) - (off); \
2742 } while (0)
2743
2744 void
2745 test(void)
2746 {
2747 unsigned long offset;
2748 unsigned long size = maxoplen;
2749 unsigned long rv = get_random();
2750 unsigned long op;
2751
2752 if (simulatedopcount > 0 && testcalls == simulatedopcount)
2753 writefileimage();
2754
2755 testcalls++;
2756
2757 if (closeprob)
2758 closeopen = (rv >> 3) < (1u << 28) / (unsigned)closeprob;
2759
2760 if (debugstart > 0 && testcalls >= debugstart)
2761 debug = 1;
2762
2763 if (!quiet && testcalls < simulatedopcount && testcalls % 100000 == 0)
2764 prt("%lu...\n", testcalls);
2765
2766 offset = get_random();
2767 if (randomoplen)
2768 size = get_random() % (maxoplen + 1);
2769
2770 /* calculate appropriate op to run */
2771 if (lite)
2772 op = rv % OP_MAX_LITE;
2773 else
2774 op = rv % OP_MAX_FULL;
2775
2776 switch (op) {
2777 case OP_MAPREAD:
2778 if (!mapped_reads)
2779 op = OP_READ;
2780 break;
2781 case OP_MAPWRITE:
2782 if (!mapped_writes)
2783 op = OP_WRITE;
2784 break;
2785 case OP_FALLOCATE:
2786 if (!fallocate_calls) {
2787 log4(OP_SKIPPED, OP_FALLOCATE, offset, size);
2788 goto out;
2789 }
2790 break;
2791 case OP_PUNCH_HOLE:
2792 if (!punch_hole_calls) {
2793 log4(OP_SKIPPED, OP_PUNCH_HOLE, offset, size);
2794 goto out;
2795 }
2796 break;
2797 case OP_CLONE:
2798 /* clone, 8% chance */
2799 if (!clone_calls || file_size == 0 || get_random() % 100 >= 8) {
2800 log4(OP_SKIPPED, OP_CLONE, 0, 0);
2801 goto out;
2802 }
2803 break;
2804 case OP_FLATTEN:
2805 /* flatten four times as rarely as clone, 2% chance */
2806 if (get_random() % 100 >= 2) {
2807 log4(OP_SKIPPED, OP_FLATTEN, 0, 0);
2808 goto out;
2809 }
2810 break;
2811 case OP_WRITESAME:
2812 /* writesame not implemented */
2813 if (!ops->writesame) {
2814 log4(OP_SKIPPED, OP_WRITESAME, offset, size);
2815 goto out;
2816 }
2817 break;
2818 case OP_COMPARE_AND_WRITE:
2819 /* compare_and_write not implemented */
2820 if (!ops->compare_and_write) {
2821 log4(OP_SKIPPED, OP_COMPARE_AND_WRITE, offset, size);
2822 goto out;
2823 }
2824 break;
2825 }
2826
2827 switch (op) {
2828 case OP_READ:
2829 TRIM_OFF_LEN(offset, size, file_size);
2830 doread(offset, size);
2831 break;
2832
2833 case OP_WRITE:
2834 TRIM_OFF_LEN(offset, size, maxfilelen);
2835 dowrite(offset, size);
2836 break;
2837
2838 case OP_MAPREAD:
2839 TRIM_OFF_LEN(offset, size, file_size);
2840 exit(183);
2841 break;
2842
2843 case OP_MAPWRITE:
2844 TRIM_OFF_LEN(offset, size, maxfilelen);
2845 exit(182);
2846 break;
2847
2848 case OP_TRUNCATE:
2849 if (!style)
2850 size = get_random() % maxfilelen;
2851 dotruncate(size);
2852 break;
2853
2854 case OP_PUNCH_HOLE:
2855 TRIM_OFF_LEN(offset, size, file_size);
2856 do_punch_hole(offset, size);
2857 break;
2858
2859 case OP_WRITESAME:
2860 TRIM_OFF_LEN(offset, size, maxfilelen);
2861 dowritesame(offset, size);
2862 break;
2863 case OP_COMPARE_AND_WRITE:
2864 TRIM_OFF_LEN(offset, size, file_size);
2865 docompareandwrite(offset, size);
2866 break;
2867
2868 case OP_CLONE:
2869 do_clone();
2870 break;
2871
2872 case OP_FLATTEN:
2873 do_flatten();
2874 break;
2875
2876 default:
2877 prterr("test: unknown operation");
2878 report_failure(42);
2879 break;
2880 }
2881
2882 out:
2883 if (sizechecks && testcalls > simulatedopcount)
2884 check_size();
2885 if (closeopen)
2886 docloseopen();
2887 }
2888
2889
2890 void
2891 cleanup(int sig)
2892 {
2893 if (sig)
2894 prt("signal %d\n", sig);
2895 prt("testcalls = %lu\n", testcalls);
2896 exit(sig);
2897 }
2898
2899
2900 void
2901 usage(void)
2902 {
2903 fprintf(stdout, "usage: %s",
2904 "fsx [-dfjknqxyACFHKLORUWZ] [-b opnum] [-c Prob] [-h holebdy] [-l flen] [-m start:end] [-o oplen] [-p progressinterval] [-r readbdy] [-s style] [-t truncbdy] [-w writebdy] [-D startingop] [-N numops] [-P dirpath] [-S seed] pname iname\n\
2905 -b opnum: beginning operation number (default 1)\n\
2906 -c P: 1 in P chance of file close+open at each op (default infinity)\n\
2907 -d: debug output for all operations\n\
2908 -f: flush and invalidate cache after I/O\n\
2909 -g: deep copy instead of clone\n\
2910 -h holebdy: 4096 would make discards page aligned (default 1)\n\
2911 -j: journal replay stress test\n\
2912 -k: keep data on success (default 0)\n\
2913 -l flen: the upper bound on file size (default 262144)\n\
2914 -m startop:endop: monitor (print debug output) specified byte range (default 0:infinity)\n\
2915 -n: no verifications of file size\n\
2916 -o oplen: the upper bound on operation size (default 65536)\n\
2917 -p progressinterval: debug output at specified operation interval\n\
2918 -q: quieter operation\n\
2919 -r readbdy: 4096 would make reads page aligned (default 1)\n\
2920 -s style: 1 gives smaller truncates (default 0)\n\
2921 -t truncbdy: 4096 would make truncates page aligned (default 1)\n\
2922 -w writebdy: 4096 would make writes page aligned (default 1)\n\
2923 -x: preallocate file space before starting, XFS only (default 0)\n\
2924 -y: synchronize changes to a file\n"
2925
2926 " -C: do not use clone calls\n\
2927 -D startingop: debug output starting at specified operation\n"
2928 #ifdef FALLOCATE
2929 " -F: Do not use fallocate (preallocation) calls\n"
2930 #endif
2931 #if defined(__FreeBSD__)
2932 " -G: enable rbd-ggate mode (use -L, -r and -w too)\n"
2933 #endif
2934 " -H: do not use punch hole calls\n"
2935 #if defined(WITH_KRBD)
2936 " -K: enable krbd mode (use -t and -h too)\n"
2937 #endif
2938 #if defined(__linux__)
2939 " -M: enable rbd-nbd mode (use -t and -h too)\n"
2940 #endif
2941 " -L: fsxLite - no file creations & no file size changes\n\
2942 -N numops: total # operations to do (default infinity)\n\
2943 -O: use oplen (see -o flag) for every op (default random)\n\
2944 -P dirpath: save .fsxlog and .fsxgood files in dirpath (default ./)\n\
2945 -R: read() system calls only (mapped reads disabled)\n\
2946 -S seed: for random # generator (default 1) 0 gets timestamp\n\
2947 -U: disable randomized striping\n\
2948 -W: mapped write operations DISabled\n\
2949 -Z: O_DIRECT (use -R, -W, -r and -w too)\n\
2950 poolname: this is REQUIRED (no default)\n\
2951 imagename: this is REQUIRED (no default)\n");
2952 exit(89);
2953 }
2954
2955
2956 int
2957 getnum(char *s, char **e)
2958 {
2959 int ret;
2960
2961 *e = (char *) 0;
2962 ret = strtol(s, e, 0);
2963 if (*e)
2964 switch (**e) {
2965 case 'b':
2966 case 'B':
2967 ret *= 512;
2968 *e = *e + 1;
2969 break;
2970 case 'k':
2971 case 'K':
2972 ret *= 1024;
2973 *e = *e + 1;
2974 break;
2975 case 'm':
2976 case 'M':
2977 ret *= 1024*1024;
2978 *e = *e + 1;
2979 break;
2980 case 'w':
2981 case 'W':
2982 ret *= 4;
2983 *e = *e + 1;
2984 break;
2985 }
2986 return (ret);
2987 }
2988
2989 void
2990 test_fallocate()
2991 {
2992 #ifdef FALLOCATE
2993 if (!lite && fallocate_calls) {
2994 if (fallocate(fd, 0, 0, 1) && errno == EOPNOTSUPP) {
2995 if(!quiet)
2996 warn("main: filesystem does not support fallocate, disabling\n");
2997 fallocate_calls = 0;
2998 } else {
2999 ftruncate(fd, 0);
3000 }
3001 }
3002 #else /* ! FALLOCATE */
3003 fallocate_calls = 0;
3004 #endif
3005
3006 }
3007
3008 void remove_image(rados_ioctx_t ioctx, char *imagename, bool remove_snap,
3009 bool unregister) {
3010 rbd_image_t image;
3011 char errmsg[128];
3012 int ret;
3013
3014 if ((ret = rbd_open(ioctx, imagename, &image, NULL)) < 0) {
3015 sprintf(errmsg, "rbd_open %s", imagename);
3016 prterrcode(errmsg, ret);
3017 report_failure(101);
3018 }
3019 if (remove_snap) {
3020 if ((ret = rbd_snap_unprotect(image, "snap")) < 0) {
3021 sprintf(errmsg, "rbd_snap_unprotect %s@snap",
3022 imagename);
3023 prterrcode(errmsg, ret);
3024 report_failure(102);
3025 }
3026 if ((ret = rbd_snap_remove(image, "snap")) < 0) {
3027 sprintf(errmsg, "rbd_snap_remove %s@snap",
3028 imagename);
3029 prterrcode(errmsg, ret);
3030 report_failure(103);
3031 }
3032 }
3033 if ((ret = rbd_close(image)) < 0) {
3034 sprintf(errmsg, "rbd_close %s", imagename);
3035 prterrcode(errmsg, ret);
3036 report_failure(104);
3037 }
3038
3039 if (unregister &&
3040 (ret = unregister_journal(ioctx, imagename)) < 0) {
3041 report_failure(105);
3042 }
3043
3044 if ((ret = rbd_remove(ioctx, imagename)) < 0) {
3045 sprintf(errmsg, "rbd_remove %s", imagename);
3046 prterrcode(errmsg, ret);
3047 report_failure(106);
3048 }
3049 }
3050
3051 int
3052 main(int argc, char **argv)
3053 {
3054 enum {
3055 LONG_OPT_CLUSTER = 1000,
3056 LONG_OPT_ID = 1001
3057 };
3058
3059 int i, style, ch, ret;
3060 char *endp;
3061 char goodfile[1024];
3062 char logfile[1024];
3063
3064 const char* optstring = "b:c:dfgh:jkl:m:no:p:qr:s:t:w:xyCD:FGHKMLN:OP:RS:UWZ";
3065 const struct option longopts[] = {
3066 {"cluster", 1, NULL, LONG_OPT_CLUSTER},
3067 {"id", 1, NULL, LONG_OPT_ID}};
3068
3069 goodfile[0] = 0;
3070 logfile[0] = 0;
3071
3072 page_size = getpagesize();
3073 page_mask = page_size - 1;
3074 mmap_mask = page_mask;
3075
3076 setvbuf(stdout, (char *)0, _IOLBF, 0); /* line buffered stdout */
3077
3078 while ((ch = getopt_long(argc, argv, optstring, longopts, NULL)) != EOF) {
3079 switch (ch) {
3080 case LONG_OPT_CLUSTER:
3081 cluster_name = optarg;
3082 break;
3083 case LONG_OPT_ID:
3084 client_id = optarg;
3085 break;
3086 case 'b':
3087 simulatedopcount = getnum(optarg, &endp);
3088 if (!quiet)
3089 fprintf(stdout, "Will begin at operation %lu\n",
3090 simulatedopcount);
3091 if (simulatedopcount == 0)
3092 usage();
3093 simulatedopcount -= 1;
3094 break;
3095 case 'c':
3096 closeprob = getnum(optarg, &endp);
3097 if (!quiet)
3098 fprintf(stdout,
3099 "Chance of close/open is 1 in %d\n",
3100 closeprob);
3101 if (closeprob <= 0)
3102 usage();
3103 break;
3104 case 'd':
3105 debug = 1;
3106 break;
3107 case 'f':
3108 flush_enabled = 1;
3109 break;
3110 case 'g':
3111 deep_copy = 1;
3112 break;
3113 case 'h':
3114 holebdy = getnum(optarg, &endp);
3115 if (holebdy <= 0)
3116 usage();
3117 break;
3118 case 'j':
3119 journal_replay = true;
3120 break;
3121 case 'k':
3122 keep_on_success = 1;
3123 break;
3124 case 'l':
3125 {
3126 int _num = getnum(optarg, &endp);
3127 if (_num <= 0)
3128 usage();
3129 maxfilelen = _num;
3130 }
3131 break;
3132 case 'm':
3133 monitorstart = getnum(optarg, &endp);
3134 if (monitorstart < 0)
3135 usage();
3136 if (!endp || *endp++ != ':')
3137 usage();
3138 monitorend = getnum(endp, &endp);
3139 if (monitorend < 0)
3140 usage();
3141 if (monitorend == 0)
3142 monitorend = -1; /* aka infinity */
3143 debug = 1;
3144 break;
3145 case 'n':
3146 sizechecks = 0;
3147 break;
3148 case 'o':
3149 maxoplen = getnum(optarg, &endp);
3150 if (maxoplen <= 0)
3151 usage();
3152 break;
3153 case 'p':
3154 progressinterval = getnum(optarg, &endp);
3155 if (progressinterval == 0)
3156 usage();
3157 break;
3158 case 'q':
3159 quiet = 1;
3160 break;
3161 case 'r':
3162 readbdy = getnum(optarg, &endp);
3163 if (readbdy <= 0)
3164 usage();
3165 break;
3166 case 's':
3167 style = getnum(optarg, &endp);
3168 if (style < 0 || style > 1)
3169 usage();
3170 break;
3171 case 't':
3172 truncbdy = getnum(optarg, &endp);
3173 if (truncbdy <= 0)
3174 usage();
3175 break;
3176 case 'w':
3177 writebdy = getnum(optarg, &endp);
3178 if (writebdy <= 0)
3179 usage();
3180 break;
3181 case 'x':
3182 prealloc = 1;
3183 break;
3184 case 'y':
3185 do_fsync = 1;
3186 break;
3187 case 'C':
3188 clone_calls = 0;
3189 break;
3190 case 'D':
3191 debugstart = getnum(optarg, &endp);
3192 if (debugstart < 1)
3193 usage();
3194 break;
3195 case 'F':
3196 fallocate_calls = 0;
3197 break;
3198 #if defined(__FreeBSD__)
3199 case 'G':
3200 prt("rbd-ggate mode enabled\n");
3201 ops = &ggate_operations;
3202 break;
3203 #endif
3204 case 'H':
3205 punch_hole_calls = 0;
3206 break;
3207 #if defined(WITH_KRBD)
3208 case 'K':
3209 prt("krbd mode enabled\n");
3210 ops = &krbd_operations;
3211 break;
3212 #endif
3213 #if defined(__linux__)
3214 case 'M':
3215 prt("rbd-nbd mode enabled\n");
3216 ops = &nbd_operations;
3217 break;
3218 #endif
3219 case 'L':
3220 lite = 1;
3221 break;
3222 case 'N':
3223 numops = getnum(optarg, &endp);
3224 if (numops < 0)
3225 usage();
3226 break;
3227 case 'O':
3228 randomoplen = 0;
3229 break;
3230 case 'P':
3231 strncpy(dirpath, optarg, sizeof(dirpath)-1);
3232 dirpath[sizeof(dirpath)-1] = '\0';
3233 strncpy(goodfile, dirpath, sizeof(goodfile)-1);
3234 goodfile[sizeof(goodfile)-1] = '\0';
3235 if (strlen(goodfile) < sizeof(goodfile)-2) {
3236 strcat(goodfile, "/");
3237 } else {
3238 prt("file name to long\n");
3239 exit(1);
3240 }
3241 strncpy(logfile, dirpath, sizeof(logfile)-1);
3242 logfile[sizeof(logfile)-1] = '\0';
3243 if (strlen(logfile) < sizeof(logfile)-2) {
3244 strcat(logfile, "/");
3245 } else {
3246 prt("file path to long\n");
3247 exit(1);
3248 }
3249 break;
3250 case 'R':
3251 mapped_reads = 0;
3252 if (!quiet)
3253 fprintf(stdout, "mapped reads DISABLED\n");
3254 break;
3255 case 'S':
3256 seed = getnum(optarg, &endp);
3257 if (seed == 0)
3258 seed = std::random_device()() % 10000;
3259 if (!quiet)
3260 fprintf(stdout, "Seed set to %d\n", seed);
3261 if (seed < 0)
3262 usage();
3263 break;
3264 case 'U':
3265 randomize_striping = 0;
3266 break;
3267 case 'W':
3268 mapped_writes = 0;
3269 if (!quiet)
3270 fprintf(stdout, "mapped writes DISABLED\n");
3271 break;
3272 case 'Z':
3273 o_direct = O_DIRECT;
3274 break;
3275 default:
3276 usage();
3277 /* NOTREACHED */
3278 }
3279 }
3280 argc -= optind;
3281 argv += optind;
3282 if (argc != 2)
3283 usage();
3284 pool = argv[0];
3285 iname = argv[1];
3286
3287 signal(SIGHUP, cleanup);
3288 signal(SIGINT, cleanup);
3289 signal(SIGPIPE, cleanup);
3290 signal(SIGALRM, cleanup);
3291 signal(SIGTERM, cleanup);
3292 signal(SIGXCPU, cleanup);
3293 signal(SIGXFSZ, cleanup);
3294 signal(SIGVTALRM, cleanup);
3295 signal(SIGUSR1, cleanup);
3296 signal(SIGUSR2, cleanup);
3297
3298 random_generator.seed(seed);
3299
3300 if (lite) {
3301 file_size = maxfilelen;
3302 }
3303
3304 ret = create_image();
3305 if (ret < 0) {
3306 prterrcode(iname, ret);
3307 exit(90);
3308 }
3309 ret = ops->open(iname, &ctx);
3310 if (ret < 0) {
3311 simple_err("Error opening image", ret);
3312 exit(91);
3313 }
3314 if (!dirpath[0])
3315 strcat(dirpath, ".");
3316 strncat(goodfile, iname, 256);
3317 strcat (goodfile, ".fsxgood");
3318 fsxgoodfd = open(goodfile, O_RDWR|O_CREAT|O_TRUNC, 0666);
3319 if (fsxgoodfd < 0) {
3320 prterr(goodfile);
3321 exit(92);
3322 }
3323 strncat(logfile, iname, 256);
3324 strcat (logfile, ".fsxlog");
3325 fsxlogf = fopen(logfile, "w");
3326 if (fsxlogf == NULL) {
3327 prterr(logfile);
3328 exit(93);
3329 }
3330
3331 original_buf = (char *) malloc(maxfilelen);
3332 for (i = 0; i < (int)maxfilelen; i++)
3333 original_buf[i] = get_random() % 256;
3334
3335 ret = posix_memalign((void **)&good_buf,
3336 std::max(writebdy, (int)sizeof(void *)), maxfilelen);
3337 if (ret > 0) {
3338 if (ret == EINVAL)
3339 prt("writebdy is not a suitable power of two\n");
3340 else
3341 prterrcode("main: posix_memalign(good_buf)", -ret);
3342 exit(94);
3343 }
3344 memset(good_buf, '\0', maxfilelen);
3345
3346 ret = posix_memalign((void **)&temp_buf,
3347 std::max(readbdy, (int)sizeof(void *)), maxfilelen);
3348 if (ret > 0) {
3349 if (ret == EINVAL)
3350 prt("readbdy is not a suitable power of two\n");
3351 else
3352 prterrcode("main: posix_memalign(temp_buf)", -ret);
3353 exit(95);
3354 }
3355 memset(temp_buf, '\0', maxfilelen);
3356
3357 if (lite) { /* zero entire existing file */
3358 ssize_t written;
3359
3360 written = ops->write(&ctx, 0, (size_t)maxfilelen, good_buf);
3361 if (written != (ssize_t)maxfilelen) {
3362 if (written < 0) {
3363 prterrcode(iname, written);
3364 warn("main: error on write");
3365 } else
3366 warn("main: short write, 0x%x bytes instead "
3367 "of 0x%lx\n",
3368 (unsigned)written,
3369 maxfilelen);
3370 exit(98);
3371 }
3372 } else
3373 check_trunc_hack();
3374
3375 //test_fallocate();
3376
3377 while (numops == -1 || numops--)
3378 test();
3379
3380 ret = ops->close(&ctx);
3381 if (ret < 0) {
3382 prterrcode("ops->close", ret);
3383 report_failure(99);
3384 }
3385
3386 if (journal_replay) {
3387 char imagename[1024];
3388 clone_imagename(imagename, sizeof(imagename), num_clones);
3389 ret = finalize_journal(ioctx, imagename, num_clones, 0, 0, 0);
3390 if (ret < 0) {
3391 report_failure(100);
3392 }
3393 }
3394
3395 if (num_clones > 0) {
3396 if (journal_replay) {
3397 check_clone(num_clones - 1, true);
3398 }
3399 check_clone(num_clones - 1, false);
3400 }
3401
3402 if (!keep_on_success) {
3403 while (num_clones >= 0) {
3404 static bool remove_snap = false;
3405
3406 if (journal_replay) {
3407 char replayimagename[1024];
3408 replay_imagename(replayimagename,
3409 sizeof(replayimagename),
3410 num_clones);
3411 remove_image(ioctx, replayimagename,
3412 remove_snap,
3413 false);
3414 }
3415
3416 char clonename[128];
3417 clone_imagename(clonename, 128, num_clones);
3418 remove_image(ioctx, clonename, remove_snap,
3419 journal_replay);
3420
3421 remove_snap = true;
3422 num_clones--;
3423 }
3424 }
3425
3426 prt("All operations completed A-OK!\n");
3427 fclose(fsxlogf);
3428
3429 rados_ioctx_destroy(ioctx);
3430 #if defined(WITH_KRBD)
3431 krbd_destroy(krbd);
3432 #endif
3433 rados_shutdown(cluster);
3434
3435 free(original_buf);
3436 free(good_buf);
3437 free(temp_buf);
3438
3439 exit(0);
3440 return 0;
3441 }