]> git.proxmox.com Git - ceph.git/blob - ceph/src/tools/rados/rados.cc
7017a4479436eb60a7e52068da52f2f9e3fdb5a0
[ceph.git] / ceph / src / tools / rados / rados.cc
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3 /*
4 * Ceph - scalable distributed file system
5 *
6 * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
7 *
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
12 *
13 */
14
15 #include "include/types.h"
16
17 #include "include/rados/librados.hpp"
18 #include "include/rados/rados_types.hpp"
19 #include "include/radosstriper/libradosstriper.hpp"
20 using namespace libradosstriper;
21
22 #include "common/config.h"
23 #include "common/ceph_argparse.h"
24 #include "global/global_init.h"
25 #include "common/Cond.h"
26 #include "common/debug.h"
27 #include "common/errno.h"
28 #include "common/Formatter.h"
29 #include "common/obj_bencher.h"
30 #include "common/TextTable.h"
31 #include "include/stringify.h"
32 #include "mds/inode_backtrace.h"
33 #include "auth/Crypto.h"
34 #include <iostream>
35 #include <fstream>
36
37 #include <stdlib.h>
38 #include <time.h>
39 #include <sstream>
40 #include <errno.h>
41 #include <dirent.h>
42 #include <stdexcept>
43 #include <climits>
44 #include <locale>
45 #include <memory>
46
47 #include "cls/lock/cls_lock_client.h"
48 #include "include/compat.h"
49 #include "include/util.h"
50 #include "common/hobject.h"
51
52 #include "PoolDump.h"
53 #include "RadosImport.h"
54
55 #include "osd/ECUtil.h"
56
57 using namespace librados;
58
59 // two steps seem to be necessary to do this right
60 #define STR(x) _STR(x)
61 #define _STR(x) #x
62
63 void usage(ostream& out)
64 {
65 out << \
66 "usage: rados [options] [commands]\n"
67 "POOL COMMANDS\n"
68 " lspools list pools\n"
69 " mkpool <pool-name> [123[ 4]] create pool <pool-name>'\n"
70 " [with auid 123[and using crush rule 4]]\n"
71 " cppool <pool-name> <dest-pool> copy content of a pool\n"
72 " rmpool <pool-name> [<pool-name> --yes-i-really-really-mean-it]\n"
73 " remove pool <pool-name>'\n"
74 " purge <pool-name> --yes-i-really-really-mean-it\n"
75 " remove all objects from pool <pool-name> without removing it\n"
76 " df show per-pool and total usage\n"
77 " ls list objects in pool\n\n"
78 " chown 123 change the pool owner to auid 123\n"
79 "\n"
80 "POOL SNAP COMMANDS\n"
81 " lssnap list snaps\n"
82 " mksnap <snap-name> create snap <snap-name>\n"
83 " rmsnap <snap-name> remove snap <snap-name>\n"
84 "\n"
85 "OBJECT COMMANDS\n"
86 " get <obj-name> [outfile] fetch object\n"
87 " put <obj-name> [infile] [--offset offset]\n"
88 " write object with start offset (default:0)\n"
89 " append <obj-name> [infile] append object\n"
90 " truncate <obj-name> length truncate object\n"
91 " create <obj-name> create object\n"
92 " rm <obj-name> ...[--force-full] [force no matter full or not]remove object(s)\n"
93 " cp <obj-name> [target-obj] copy object\n"
94 " listxattr <obj-name>\n"
95 " getxattr <obj-name> attr\n"
96 " setxattr <obj-name> attr val\n"
97 " rmxattr <obj-name> attr\n"
98 " stat <obj-name> stat the named object\n"
99 " mapext <obj-name>\n"
100 " rollback <obj-name> <snap-name> roll back object to snap <snap-name>\n"
101 "\n"
102 " listsnaps <obj-name> list the snapshots of this object\n"
103 " bench <seconds> write|seq|rand [-t concurrent_operations] [--no-cleanup] [--run-name run_name] [--no-hints]\n"
104 " default is 16 concurrent IOs and 4 MB ops\n"
105 " default is to clean up after write benchmark\n"
106 " default run-name is 'benchmark_last_metadata'\n"
107 " cleanup [--run-name run_name] [--prefix prefix]\n"
108 " clean up a previous benchmark operation\n"
109 " default run-name is 'benchmark_last_metadata'\n"
110 " load-gen [options] generate load on the cluster\n"
111 " listomapkeys <obj-name> list the keys in the object map\n"
112 " listomapvals <obj-name> list the keys and vals in the object map \n"
113 " getomapval <obj-name> <key> [file] show the value for the specified key\n"
114 " in the object's object map\n"
115 " setomapval <obj-name> <key> <val>\n"
116 " rmomapkey <obj-name> <key>\n"
117 " getomapheader <obj-name> [file]\n"
118 " setomapheader <obj-name> <val>\n"
119 " tmap-to-omap <obj-name> convert tmap keys/values to omap\n"
120 " watch <obj-name> add watcher on this object\n"
121 " notify <obj-name> <message> notify watcher of this object with message\n"
122 " listwatchers <obj-name> list the watchers of this object\n"
123 " set-alloc-hint <obj-name> <expected-object-size> <expected-write-size>\n"
124 " set allocation hint for an object\n"
125 "\n"
126 "IMPORT AND EXPORT\n"
127 " export [filename]\n"
128 " Serialize pool contents to a file or standard out.\n"
129 " import [--dry-run] [--no-overwrite] < filename | - >\n"
130 " Load pool contents from a file or standard in\n"
131 "\n"
132 "ADVISORY LOCKS\n"
133 " lock list <obj-name>\n"
134 " List all advisory locks on an object\n"
135 " lock get <obj-name> <lock-name>\n"
136 " Try to acquire a lock\n"
137 " lock break <obj-name> <lock-name> <locker-name>\n"
138 " Try to break a lock acquired by another client\n"
139 " lock info <obj-name> <lock-name>\n"
140 " Show lock information\n"
141 " options:\n"
142 " --lock-tag Lock tag, all locks operation should use\n"
143 " the same tag\n"
144 " --lock-cookie Locker cookie\n"
145 " --lock-description Description of lock\n"
146 " --lock-duration Lock duration (in seconds)\n"
147 " --lock-type Lock type (shared, exclusive)\n"
148 "\n"
149 "SCRUB AND REPAIR:\n"
150 " list-inconsistent-pg <pool> list inconsistent PGs in given pool\n"
151 " list-inconsistent-obj <pgid> list inconsistent objects in given pg\n"
152 " list-inconsistent-snapset <pgid> list inconsistent snapsets in the given pg\n"
153 "\n"
154 "CACHE POOLS: (for testing/development only)\n"
155 " cache-flush <obj-name> flush cache pool object (blocking)\n"
156 " cache-try-flush <obj-name> flush cache pool object (non-blocking)\n"
157 " cache-evict <obj-name> evict cache pool object\n"
158 " cache-flush-evict-all flush+evict all objects\n"
159 " cache-try-flush-evict-all try-flush+evict all objects\n"
160 "\n"
161 "GLOBAL OPTIONS:\n"
162 " --object_locator object_locator\n"
163 " set object_locator for operation\n"
164 " -p pool\n"
165 " --pool=pool\n"
166 " select given pool by name\n"
167 " --target-pool=pool\n"
168 " select target pool by name\n"
169 " -b op_size\n"
170 " set the block size for put/get ops and for write benchmarking\n"
171 " -o object_size\n"
172 " set the object size for put/get ops and for write benchmarking\n"
173 " --max-objects\n"
174 " set the max number of objects for write benchmarking\n"
175 " -s name\n"
176 " --snap name\n"
177 " select given snap name for (read) IO\n"
178 " -i infile\n"
179 " --create\n"
180 " create the pool or directory that was specified\n"
181 " -N namespace\n"
182 " --namespace=namespace\n"
183 " specify the namespace to use for the object\n"
184 " --all\n"
185 " Use with ls to list objects in all namespaces\n"
186 " Put in CEPH_ARGS environment variable to make this the default\n"
187 " --default\n"
188 " Use with ls to list objects in default namespace\n"
189 " Takes precedence over --all in case --all is in environment\n"
190 " --target-locator\n"
191 " Use with cp to specify the locator of the new object\n"
192 " --target-nspace\n"
193 " Use with cp to specify the namespace of the new object\n"
194 " --striper\n"
195 " Use radostriper interface rather than pure rados\n"
196 " Available for stat, get, put, truncate, rm, ls and \n"
197 " all xattr related operations\n"
198 "\n"
199 "BENCH OPTIONS:\n"
200 " -t N\n"
201 " --concurrent-ios=N\n"
202 " Set number of concurrent I/O operations\n"
203 " --show-time\n"
204 " prefix output with date/time\n"
205 " --no-verify\n"
206 " do not verify contents of read objects\n"
207 " --write-object\n"
208 " write contents to the objects\n"
209 " --write-omap\n"
210 " write contents to the omap\n"
211 " --write-xattr\n"
212 " write contents to the extended attributes\n"
213 "\n"
214 "LOAD GEN OPTIONS:\n"
215 " --num-objects total number of objects\n"
216 " --min-object-size min object size\n"
217 " --max-object-size max object size\n"
218 " --min-op-len min io size of operations\n"
219 " --max-op-len max io size of operations\n"
220 " --max-ops max number of operations\n"
221 " --max-backlog max backlog size\n"
222 " --read-percent percent of operations that are read\n"
223 " --target-throughput target throughput (in bytes)\n"
224 " --run-length total time (in seconds)\n"
225 "CACHE POOLS OPTIONS:\n"
226 " --with-clones include clones when doing flush or evict\n"
227 "OMAP OPTIONS:\n"
228 " --omap-key-file file read the omap key from a file\n";
229 }
230
231 unsigned default_op_size = 1 << 22;
232
233 static void usage_exit()
234 {
235 usage(cerr);
236 exit(1);
237 }
238
239
240 template <typename I, typename T>
241 static int rados_sistrtoll(I &i, T *val) {
242 std::string err;
243 *val = strict_sistrtoll(i->second.c_str(), &err);
244 if (err != "") {
245 cerr << "Invalid value for " << i->first << ": " << err << std::endl;
246 return -EINVAL;
247 } else {
248 return 0;
249 }
250 }
251
252
253 static int dump_data(std::string const &filename, bufferlist const &data)
254 {
255 int fd;
256 if (filename == "-") {
257 fd = STDOUT_FILENO;
258 } else {
259 fd = TEMP_FAILURE_RETRY(::open(filename.c_str(), O_WRONLY|O_CREAT|O_TRUNC, 0644));
260 if (fd < 0) {
261 int err = errno;
262 cerr << "failed to open file: " << cpp_strerror(err) << std::endl;
263 return -err;
264 }
265 }
266
267 int r = data.write_fd(fd);
268
269 if (fd != 1) {
270 VOID_TEMP_FAILURE_RETRY(::close(fd));
271 }
272
273 return r;
274 }
275
276
277 static int do_get(IoCtx& io_ctx, RadosStriper& striper,
278 const char *objname, const char *outfile, unsigned op_size,
279 bool use_striper)
280 {
281 string oid(objname);
282
283 int fd;
284 if (strcmp(outfile, "-") == 0) {
285 fd = STDOUT_FILENO;
286 } else {
287 fd = TEMP_FAILURE_RETRY(::open(outfile, O_WRONLY|O_CREAT|O_TRUNC, 0644));
288 if (fd < 0) {
289 int err = errno;
290 cerr << "failed to open file: " << cpp_strerror(err) << std::endl;
291 return -err;
292 }
293 }
294
295 uint64_t offset = 0;
296 int ret;
297 while (true) {
298 bufferlist outdata;
299 if (use_striper) {
300 ret = striper.read(oid, &outdata, op_size, offset);
301 } else {
302 ret = io_ctx.read(oid, outdata, op_size, offset);
303 }
304 if (ret <= 0) {
305 goto out;
306 }
307 ret = outdata.write_fd(fd);
308 if (ret < 0) {
309 cerr << "error writing to file: " << cpp_strerror(ret) << std::endl;
310 goto out;
311 }
312 if (outdata.length() < op_size)
313 break;
314 offset += outdata.length();
315 }
316 ret = 0;
317
318 out:
319 if (fd != 1)
320 VOID_TEMP_FAILURE_RETRY(::close(fd));
321 return ret;
322 }
323
324 static int do_copy(IoCtx& io_ctx, const char *objname,
325 IoCtx& target_ctx, const char *target_obj)
326 {
327 __le32 src_fadvise_flags = LIBRADOS_OP_FLAG_FADVISE_SEQUENTIAL | LIBRADOS_OP_FLAG_FADVISE_NOCACHE;
328 __le32 dest_fadvise_flags = LIBRADOS_OP_FLAG_FADVISE_SEQUENTIAL | LIBRADOS_OP_FLAG_FADVISE_DONTNEED;
329 ObjectWriteOperation op;
330 op.copy_from2(objname, io_ctx, 0, src_fadvise_flags);
331 op.set_op_flags2(dest_fadvise_flags);
332
333 return target_ctx.operate(target_obj, &op);
334 }
335
336 static int do_copy_pool(Rados& rados, const char *src_pool, const char *target_pool)
337 {
338 IoCtx src_ctx, target_ctx;
339 int ret = rados.ioctx_create(src_pool, src_ctx);
340 if (ret < 0) {
341 cerr << "cannot open source pool: " << src_pool << std::endl;
342 return ret;
343 }
344 ret = rados.ioctx_create(target_pool, target_ctx);
345 if (ret < 0) {
346 cerr << "cannot open target pool: " << target_pool << std::endl;
347 return ret;
348 }
349 src_ctx.set_namespace(all_nspaces);
350 librados::NObjectIterator i = src_ctx.nobjects_begin();
351 librados::NObjectIterator i_end = src_ctx.nobjects_end();
352 for (; i != i_end; ++i) {
353 string nspace = i->get_nspace();
354 string oid = i->get_oid();
355 string locator = i->get_locator();
356
357 string target_name = (nspace.size() ? nspace + "/" : "") + oid;
358 string src_name = target_name;
359 if (locator.size())
360 src_name += "(@" + locator + ")";
361 cout << src_pool << ":" << src_name << " => "
362 << target_pool << ":" << target_name << std::endl;
363
364 src_ctx.locator_set_key(locator);
365 src_ctx.set_namespace(nspace);
366 target_ctx.set_namespace(nspace);
367 ret = do_copy(src_ctx, oid.c_str(), target_ctx, oid.c_str());
368 if (ret < 0) {
369 cerr << "error copying object: " << cpp_strerror(errno) << std::endl;
370 return ret;
371 }
372 }
373
374 return 0;
375 }
376
377 static int do_put(IoCtx& io_ctx, RadosStriper& striper,
378 const char *objname, const char *infile, int op_size,
379 uint64_t obj_offset, bool use_striper)
380 {
381 string oid(objname);
382 bool stdio = (strcmp(infile, "-") == 0);
383 int ret = 0;
384 int fd = STDIN_FILENO;
385 if (!stdio)
386 fd = open(infile, O_RDONLY);
387 if (fd < 0) {
388 cerr << "error reading input file " << infile << ": " << cpp_strerror(errno) << std::endl;
389 return 1;
390 }
391 int count = op_size;
392 uint64_t offset = obj_offset;
393 while (count != 0) {
394 bufferlist indata;
395 count = indata.read_fd(fd, op_size);
396 if (count < 0) {
397 ret = -errno;
398 cerr << "error reading input file " << infile << ": " << cpp_strerror(ret) << std::endl;
399 goto out;
400 }
401
402 if (count == 0) {
403 if (offset == obj_offset) { // in case we have to create an empty object & if obj_offset > 0 do a hole
404 if (use_striper) {
405 ret = striper.write_full(oid, indata); // indata is empty
406 } else {
407 ret = io_ctx.write_full(oid, indata); // indata is empty
408 }
409 if (ret < 0) {
410 goto out;
411 }
412 if (offset) {
413 if (use_striper) {
414 ret = striper.trunc(oid, offset); // before truncate, object must be existed.
415 } else {
416 ret = io_ctx.trunc(oid, offset); // before truncate, object must be existed.
417 }
418
419 if (ret < 0) {
420 goto out;
421 }
422 }
423 }
424 continue;
425 }
426 if (use_striper) {
427 if (offset == 0)
428 ret = striper.write_full(oid, indata);
429 else
430 ret = striper.write(oid, indata, count, offset);
431 } else {
432 if (offset == 0)
433 ret = io_ctx.write_full(oid, indata);
434 else
435 ret = io_ctx.write(oid, indata, count, offset);
436 }
437
438 if (ret < 0) {
439 goto out;
440 }
441 offset += count;
442 }
443 ret = 0;
444 out:
445 if (fd != STDOUT_FILENO)
446 VOID_TEMP_FAILURE_RETRY(close(fd));
447 return ret;
448 }
449
450 static int do_append(IoCtx& io_ctx, RadosStriper& striper,
451 const char *objname, const char *infile, int op_size,
452 bool use_striper)
453 {
454 string oid(objname);
455 bool stdio = (strcmp(infile, "-") == 0);
456 int ret = 0;
457 int fd = STDIN_FILENO;
458 if (!stdio)
459 fd = open(infile, O_RDONLY);
460 if (fd < 0) {
461 cerr << "error reading input file " << infile << ": " << cpp_strerror(errno) << std::endl;
462 return 1;
463 }
464 int count = op_size;
465 while (count != 0) {
466 bufferlist indata;
467 count = indata.read_fd(fd, op_size);
468 if (count < 0) {
469 ret = -errno;
470 cerr << "error reading input file " << infile << ": " << cpp_strerror(ret) << std::endl;
471 goto out;
472 }
473 if (use_striper) {
474 ret = striper.append(oid, indata, count);
475 } else {
476 ret = io_ctx.append(oid, indata, count);
477 }
478
479 if (ret < 0) {
480 goto out;
481 }
482 }
483 ret = 0;
484 out:
485 if (fd != STDOUT_FILENO)
486 VOID_TEMP_FAILURE_RETRY(close(fd));
487 return ret;
488 }
489
490 class RadosWatchCtx : public librados::WatchCtx2 {
491 IoCtx& ioctx;
492 string name;
493 public:
494 RadosWatchCtx(IoCtx& io, const char *imgname) : ioctx(io), name(imgname) {}
495 ~RadosWatchCtx() override {}
496 void handle_notify(uint64_t notify_id,
497 uint64_t cookie,
498 uint64_t notifier_id,
499 bufferlist& bl) override {
500 cout << "NOTIFY"
501 << " cookie " << cookie
502 << " notify_id " << notify_id
503 << " from " << notifier_id
504 << std::endl;
505 bl.hexdump(cout);
506 ioctx.notify_ack(name, notify_id, cookie, bl);
507 }
508 void handle_error(uint64_t cookie, int err) override {
509 cout << "ERROR"
510 << " cookie " << cookie
511 << " err " << cpp_strerror(err)
512 << std::endl;
513 }
514 };
515
516 static const char alphanum_table[]="ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_";
517
518 int gen_rand_alphanumeric(char *dest, int size) /* size should be the required string size + 1 */
519 {
520 int ret = get_random_bytes(dest, size);
521 if (ret < 0) {
522 cerr << "cannot get random bytes: " << cpp_strerror(ret) << std::endl;
523 return -1;
524 }
525
526 int i;
527 for (i=0; i<size - 1; i++) {
528 int pos = (unsigned)dest[i];
529 dest[i] = alphanum_table[pos & 63];
530 }
531 dest[i] = '\0';
532
533 return 0;
534 }
535
536 struct obj_info {
537 string name;
538 size_t len;
539 };
540
541 class LoadGen {
542 size_t total_sent;
543 size_t total_completed;
544
545 IoCtx io_ctx;
546 Rados *rados;
547
548 map<int, obj_info> objs;
549
550 utime_t start_time;
551
552 bool going_down;
553
554 public:
555 int read_percent;
556 int num_objs;
557 size_t min_obj_len;
558 uint64_t max_obj_len;
559 size_t min_op_len;
560 size_t max_op_len;
561 size_t max_ops;
562 size_t max_backlog;
563 size_t target_throughput;
564 int run_length;
565
566 enum {
567 OP_READ,
568 OP_WRITE,
569 };
570
571 struct LoadGenOp {
572 int id;
573 int type;
574 string oid;
575 size_t off;
576 size_t len;
577 bufferlist bl;
578 LoadGen *lg;
579 librados::AioCompletion *completion;
580
581 LoadGenOp() : id(0), type(0), off(0), len(0), lg(NULL), completion(NULL) {}
582 explicit LoadGenOp(LoadGen *_lg) : id(0), type(0), off(0), len(0), lg(_lg), completion(NULL) {}
583 };
584
585 int max_op;
586
587 map<int, LoadGenOp *> pending_ops;
588
589 void gen_op(LoadGenOp *op);
590 uint64_t gen_next_op();
591 void run_op(LoadGenOp *op);
592
593 uint64_t cur_sent_rate() {
594 return total_sent / time_passed();
595 }
596
597 uint64_t cur_completed_rate() {
598 return total_completed / time_passed();
599 }
600
601 uint64_t total_expected() {
602 return target_throughput * time_passed();
603 }
604
605 float time_passed() {
606 utime_t now = ceph_clock_now();
607 now -= start_time;
608 uint64_t ns = now.nsec();
609 float total = (float) ns / 1000000000.0;
610 total += now.sec();
611 return total;
612 }
613
614 Mutex lock;
615 Cond cond;
616
617 explicit LoadGen(Rados *_rados) : rados(_rados), going_down(false), lock("LoadGen") {
618 read_percent = 80;
619 min_obj_len = 1024;
620 max_obj_len = 5ull * 1024ull * 1024ull * 1024ull;
621 min_op_len = 1024;
622 target_throughput = 5 * 1024 * 1024; // B/sec
623 max_op_len = 2 * 1024 * 1024;
624 max_ops = 16;
625 max_backlog = target_throughput * 2;
626 run_length = 60;
627
628 total_sent = 0;
629 total_completed = 0;
630 num_objs = 200;
631 max_op = 0;
632 }
633 int bootstrap(const char *pool);
634 int run();
635 void cleanup();
636
637 void io_cb(completion_t c, LoadGenOp *op) {
638 Mutex::Locker l(lock);
639
640 total_completed += op->len;
641
642 double rate = (double)cur_completed_rate() / (1024 * 1024);
643 std::streamsize original_precision = cout.precision();
644 cout.precision(3);
645 cout << "op " << op->id << " completed, throughput=" << rate << "MB/sec" << std::endl;
646 cout.precision(original_precision);
647
648 map<int, LoadGenOp *>::iterator iter = pending_ops.find(op->id);
649 if (iter != pending_ops.end())
650 pending_ops.erase(iter);
651
652 if (!going_down)
653 op->completion->release();
654
655 delete op;
656
657 cond.Signal();
658 }
659 };
660
661 static void _load_gen_cb(completion_t c, void *param)
662 {
663 LoadGen::LoadGenOp *op = (LoadGen::LoadGenOp *)param;
664 op->lg->io_cb(c, op);
665 }
666
667 int LoadGen::bootstrap(const char *pool)
668 {
669 char buf[128];
670 int i;
671
672 if (!pool) {
673 cerr << "ERROR: pool name was not specified" << std::endl;
674 return -EINVAL;
675 }
676
677 int ret = rados->ioctx_create(pool, io_ctx);
678 if (ret < 0) {
679 cerr << "error opening pool " << pool << ": " << cpp_strerror(ret) << std::endl;
680 return ret;
681 }
682
683 int buf_len = 1;
684 bufferptr p = buffer::create(buf_len);
685 bufferlist bl;
686 memset(p.c_str(), 0, buf_len);
687 bl.push_back(p);
688
689 list<librados::AioCompletion *> completions;
690 for (i = 0; i < num_objs; i++) {
691 obj_info info;
692 gen_rand_alphanumeric(buf, 16);
693 info.name = "obj-";
694 info.name.append(buf);
695 info.len = get_random(min_obj_len, max_obj_len);
696
697 // throttle...
698 while (completions.size() > max_ops) {
699 AioCompletion *c = completions.front();
700 c->wait_for_complete();
701 ret = c->get_return_value();
702 c->release();
703 completions.pop_front();
704 if (ret < 0) {
705 cerr << "aio_write failed" << std::endl;
706 return ret;
707 }
708 }
709
710 librados::AioCompletion *c = rados->aio_create_completion(NULL, NULL, NULL);
711 completions.push_back(c);
712 // generate object
713 ret = io_ctx.aio_write(info.name, c, bl, buf_len, info.len - buf_len);
714 if (ret < 0) {
715 cerr << "couldn't write obj: " << info.name << " ret=" << ret << std::endl;
716 return ret;
717 }
718 objs[i] = info;
719 }
720
721 list<librados::AioCompletion *>::iterator iter;
722 for (iter = completions.begin(); iter != completions.end(); ++iter) {
723 AioCompletion *c = *iter;
724 c->wait_for_complete();
725 ret = c->get_return_value();
726 c->release();
727 if (ret < 0) { // yes, we leak.
728 cerr << "aio_write failed" << std::endl;
729 return ret;
730 }
731 }
732 return 0;
733 }
734
735 void LoadGen::run_op(LoadGenOp *op)
736 {
737 op->completion = rados->aio_create_completion(op, _load_gen_cb, NULL);
738
739 switch (op->type) {
740 case OP_READ:
741 io_ctx.aio_read(op->oid, op->completion, &op->bl, op->len, op->off);
742 break;
743 case OP_WRITE:
744 bufferptr p = buffer::create(op->len);
745 memset(p.c_str(), 0, op->len);
746 op->bl.push_back(p);
747
748 io_ctx.aio_write(op->oid, op->completion, op->bl, op->len, op->off);
749 break;
750 }
751
752 total_sent += op->len;
753 }
754
755 void LoadGen::gen_op(LoadGenOp *op)
756 {
757 int i = get_random(0, objs.size() - 1);
758 obj_info& info = objs[i];
759 op->oid = info.name;
760
761 size_t len = get_random(min_op_len, max_op_len);
762 if (len > info.len)
763 len = info.len;
764 size_t off = get_random(0, info.len);
765
766 if (off + len > info.len)
767 off = info.len - len;
768
769 op->off = off;
770 op->len = len;
771
772 i = get_random(1, 100);
773 if (i > read_percent)
774 op->type = OP_WRITE;
775 else
776 op->type = OP_READ;
777
778 cout << (op->type == OP_READ ? "READ" : "WRITE") << " : oid=" << op->oid << " off=" << op->off << " len=" << op->len << std::endl;
779 }
780
781 uint64_t LoadGen::gen_next_op()
782 {
783 lock.Lock();
784
785 LoadGenOp *op = new LoadGenOp(this);
786 gen_op(op);
787 op->id = max_op++;
788 pending_ops[op->id] = op;
789
790 lock.Unlock();
791
792 run_op(op);
793
794 return op->len;
795 }
796
797 int LoadGen::run()
798 {
799 start_time = ceph_clock_now();
800 utime_t end_time = start_time;
801 end_time += run_length;
802 utime_t stamp_time = start_time;
803 uint32_t total_sec = 0;
804
805 while (1) {
806 lock.Lock();
807 utime_t one_second(1, 0);
808 cond.WaitInterval(lock, one_second);
809 lock.Unlock();
810 utime_t now = ceph_clock_now();
811
812 if (now > end_time)
813 break;
814
815 uint64_t expected = total_expected();
816 lock.Lock();
817 uint64_t sent = total_sent;
818 uint64_t completed = total_completed;
819 lock.Unlock();
820
821 if (now - stamp_time >= utime_t(1, 0)) {
822 double rate = (double)cur_completed_rate() / (1024 * 1024);
823 ++total_sec;
824 std::streamsize original_precision = cout.precision();
825 cout.precision(3);
826 cout << setw(5) << total_sec << ": throughput=" << rate << "MB/sec" << " pending data=" << sent - completed << std::endl;
827 cout.precision(original_precision);
828 stamp_time = now;
829 }
830
831 while (sent < expected &&
832 sent - completed < max_backlog &&
833 pending_ops.size() < max_ops) {
834 sent += gen_next_op();
835 }
836 }
837
838 // get a reference to all pending requests
839 vector<librados::AioCompletion *> completions;
840 lock.Lock();
841 going_down = true;
842 map<int, LoadGenOp *>::iterator iter;
843 for (iter = pending_ops.begin(); iter != pending_ops.end(); ++iter) {
844 LoadGenOp *op = iter->second;
845 completions.push_back(op->completion);
846 }
847 lock.Unlock();
848
849 cout << "waiting for all operations to complete" << std::endl;
850
851 // now wait on all the pending requests
852 for (vector<librados::AioCompletion *>::iterator citer = completions.begin(); citer != completions.end(); ++citer) {
853 librados::AioCompletion *c = *citer;
854 c->wait_for_complete();
855 c->release();
856 }
857
858 return 0;
859 }
860
861 void LoadGen::cleanup()
862 {
863 cout << "cleaning up objects" << std::endl;
864 map<int, obj_info>::iterator iter;
865 for (iter = objs.begin(); iter != objs.end(); ++iter) {
866 obj_info& info = iter->second;
867 int ret = io_ctx.remove(info.name);
868 if (ret < 0)
869 cerr << "couldn't remove obj: " << info.name << " ret=" << ret << std::endl;
870 }
871 }
872
873 enum OpWriteDest {
874 OP_WRITE_DEST_OBJ = 2 << 0,
875 OP_WRITE_DEST_OMAP = 2 << 1,
876 OP_WRITE_DEST_XATTR = 2 << 2,
877 };
878
879 class RadosBencher : public ObjBencher {
880 librados::AioCompletion **completions;
881 librados::Rados& rados;
882 librados::IoCtx& io_ctx;
883 librados::NObjectIterator oi;
884 bool iterator_valid;
885 OpWriteDest write_destination;
886
887 protected:
888 int completions_init(int concurrentios) override {
889 completions = new librados::AioCompletion *[concurrentios];
890 return 0;
891 }
892 void completions_done() override {
893 delete[] completions;
894 completions = NULL;
895 }
896 int create_completion(int slot, void (*cb)(void *, void*), void *arg) override {
897 completions[slot] = rados.aio_create_completion((void *) arg, 0, cb);
898
899 if (!completions[slot])
900 return -EINVAL;
901
902 return 0;
903 }
904 void release_completion(int slot) override {
905 completions[slot]->release();
906 completions[slot] = 0;
907 }
908
909 int aio_read(const std::string& oid, int slot, bufferlist *pbl, size_t len,
910 size_t offset) override {
911 return io_ctx.aio_read(oid, completions[slot], pbl, len, 0);
912 }
913
914 int aio_write(const std::string& oid, int slot, bufferlist& bl, size_t len,
915 size_t offset) override {
916 librados::ObjectWriteOperation op;
917
918 if (write_destination & OP_WRITE_DEST_OBJ) {
919 if (data.hints)
920 op.set_alloc_hint2(data.object_size, data.op_size,
921 ALLOC_HINT_FLAG_SEQUENTIAL_WRITE |
922 ALLOC_HINT_FLAG_SEQUENTIAL_READ |
923 ALLOC_HINT_FLAG_APPEND_ONLY |
924 ALLOC_HINT_FLAG_IMMUTABLE);
925 op.write(offset, bl);
926 }
927
928 if (write_destination & OP_WRITE_DEST_OMAP) {
929 std::map<std::string, librados::bufferlist> omap;
930 omap[string("bench-omap-key-") + stringify(offset)] = bl;
931 op.omap_set(omap);
932 }
933
934 if (write_destination & OP_WRITE_DEST_XATTR) {
935 char key[80];
936 snprintf(key, sizeof(key), "bench-xattr-key-%d", (int)offset);
937 op.setxattr(key, bl);
938 }
939
940 return io_ctx.aio_operate(oid, completions[slot], &op);
941 }
942
943 int aio_remove(const std::string& oid, int slot) override {
944 return io_ctx.aio_remove(oid, completions[slot]);
945 }
946
947 int sync_read(const std::string& oid, bufferlist& bl, size_t len) override {
948 return io_ctx.read(oid, bl, len, 0);
949 }
950 int sync_write(const std::string& oid, bufferlist& bl, size_t len) override {
951 return io_ctx.write_full(oid, bl);
952 }
953
954 int sync_remove(const std::string& oid) override {
955 return io_ctx.remove(oid);
956 }
957
958 bool completion_is_done(int slot) override {
959 return completions[slot]->is_safe();
960 }
961
962 int completion_wait(int slot) override {
963 return completions[slot]->wait_for_safe_and_cb();
964 }
965 int completion_ret(int slot) override {
966 return completions[slot]->get_return_value();
967 }
968
969 bool get_objects(std::list<Object>* objects, int num) override {
970 int count = 0;
971
972 if (!iterator_valid) {
973 oi = io_ctx.nobjects_begin();
974 iterator_valid = true;
975 }
976
977 librados::NObjectIterator ei = io_ctx.nobjects_end();
978
979 if (oi == ei) {
980 iterator_valid = false;
981 return false;
982 }
983
984 objects->clear();
985 for ( ; oi != ei && count < num; ++oi) {
986 Object obj(oi->get_oid(), oi->get_nspace());
987 objects->push_back(obj);
988 ++count;
989 }
990
991 return true;
992 }
993
994 void set_namespace( const std::string& ns) override {
995 io_ctx.set_namespace(ns);
996 }
997
998 public:
999 RadosBencher(CephContext *cct_, librados::Rados& _r, librados::IoCtx& _i)
1000 : ObjBencher(cct_), completions(NULL), rados(_r), io_ctx(_i), iterator_valid(false), write_destination(OP_WRITE_DEST_OBJ) {}
1001 ~RadosBencher() override { }
1002
1003 void set_write_destination(OpWriteDest dest) {
1004 write_destination = dest;
1005 }
1006 };
1007
1008 static int do_lock_cmd(std::vector<const char*> &nargs,
1009 const std::map < std::string, std::string > &opts,
1010 IoCtx *ioctx,
1011 Formatter *formatter)
1012 {
1013 if (nargs.size() < 3)
1014 usage_exit();
1015
1016 string cmd(nargs[1]);
1017 string oid(nargs[2]);
1018
1019 string lock_tag;
1020 string lock_cookie;
1021 string lock_description;
1022 int lock_duration = 0;
1023 ClsLockType lock_type = LOCK_EXCLUSIVE;
1024
1025 map<string, string>::const_iterator i;
1026 i = opts.find("lock-tag");
1027 if (i != opts.end()) {
1028 lock_tag = i->second;
1029 }
1030 i = opts.find("lock-cookie");
1031 if (i != opts.end()) {
1032 lock_cookie = i->second;
1033 }
1034 i = opts.find("lock-description");
1035 if (i != opts.end()) {
1036 lock_description = i->second;
1037 }
1038 i = opts.find("lock-duration");
1039 if (i != opts.end()) {
1040 if (rados_sistrtoll(i, &lock_duration)) {
1041 return -EINVAL;
1042 }
1043 }
1044 i = opts.find("lock-type");
1045 if (i != opts.end()) {
1046 const string& type_str = i->second;
1047 if (type_str.compare("exclusive") == 0) {
1048 lock_type = LOCK_EXCLUSIVE;
1049 } else if (type_str.compare("shared") == 0) {
1050 lock_type = LOCK_SHARED;
1051 } else {
1052 cerr << "unknown lock type was specified, aborting" << std::endl;
1053 return -EINVAL;
1054 }
1055 }
1056
1057 if (cmd.compare("list") == 0) {
1058 list<string> locks;
1059 int ret = rados::cls::lock::list_locks(ioctx, oid, &locks);
1060 if (ret < 0) {
1061 cerr << "ERROR: rados_list_locks(): " << cpp_strerror(ret) << std::endl;
1062 return ret;
1063 }
1064
1065 formatter->open_object_section("object");
1066 formatter->dump_string("objname", oid);
1067 formatter->open_array_section("locks");
1068 list<string>::iterator iter;
1069 for (iter = locks.begin(); iter != locks.end(); ++iter) {
1070 formatter->open_object_section("lock");
1071 formatter->dump_string("name", *iter);
1072 formatter->close_section();
1073 }
1074 formatter->close_section();
1075 formatter->close_section();
1076 formatter->flush(cout);
1077 return 0;
1078 }
1079
1080 if (nargs.size() < 4)
1081 usage_exit();
1082
1083 string lock_name(nargs[3]);
1084
1085 if (cmd.compare("info") == 0) {
1086 map<rados::cls::lock::locker_id_t, rados::cls::lock::locker_info_t> lockers;
1087 ClsLockType type = LOCK_NONE;
1088 string tag;
1089 int ret = rados::cls::lock::get_lock_info(ioctx, oid, lock_name, &lockers, &type, &tag);
1090 if (ret < 0) {
1091 cerr << "ERROR: rados_lock_get_lock_info(): " << cpp_strerror(ret) << std::endl;
1092 return ret;
1093 }
1094
1095 formatter->open_object_section("lock");
1096 formatter->dump_string("name", lock_name);
1097 formatter->dump_string("type", cls_lock_type_str(type));
1098 formatter->dump_string("tag", tag);
1099 formatter->open_array_section("lockers");
1100 map<rados::cls::lock::locker_id_t, rados::cls::lock::locker_info_t>::iterator iter;
1101 for (iter = lockers.begin(); iter != lockers.end(); ++iter) {
1102 const rados::cls::lock::locker_id_t& id = iter->first;
1103 const rados::cls::lock::locker_info_t& info = iter->second;
1104 formatter->open_object_section("locker");
1105 formatter->dump_stream("name") << id.locker;
1106 formatter->dump_string("cookie", id.cookie);
1107 formatter->dump_string("description", info.description);
1108 formatter->dump_stream("expiration") << info.expiration;
1109 formatter->dump_stream("addr") << info.addr;
1110 formatter->close_section();
1111 }
1112 formatter->close_section();
1113 formatter->close_section();
1114 formatter->flush(cout);
1115
1116 return ret;
1117 } else if (cmd.compare("get") == 0) {
1118 rados::cls::lock::Lock l(lock_name);
1119 l.set_cookie(lock_cookie);
1120 l.set_tag(lock_tag);
1121 l.set_duration(utime_t(lock_duration, 0));
1122 l.set_description(lock_description);
1123 int ret;
1124 switch (lock_type) {
1125 case LOCK_SHARED:
1126 ret = l.lock_shared(ioctx, oid);
1127 break;
1128 default:
1129 ret = l.lock_exclusive(ioctx, oid);
1130 }
1131 if (ret < 0) {
1132 cerr << "ERROR: failed locking: " << cpp_strerror(ret) << std::endl;
1133 return ret;
1134 }
1135
1136 return ret;
1137 }
1138
1139 if (nargs.size() < 5)
1140 usage_exit();
1141
1142 if (cmd.compare("break") == 0) {
1143 string locker(nargs[4]);
1144 rados::cls::lock::Lock l(lock_name);
1145 l.set_cookie(lock_cookie);
1146 l.set_tag(lock_tag);
1147 entity_name_t name;
1148 if (!name.parse(locker)) {
1149 cerr << "ERROR: failed to parse locker name (" << locker << ")" << std::endl;
1150 return -EINVAL;
1151 }
1152 int ret = l.break_lock(ioctx, oid, name);
1153 if (ret < 0) {
1154 cerr << "ERROR: failed breaking lock: " << cpp_strerror(ret) << std::endl;
1155 return ret;
1156 }
1157 } else {
1158 usage_exit();
1159 }
1160
1161 return 0;
1162 }
1163
1164 static int do_cache_flush(IoCtx& io_ctx, string oid)
1165 {
1166 ObjectReadOperation op;
1167 op.cache_flush();
1168 librados::AioCompletion *completion =
1169 librados::Rados::aio_create_completion();
1170 io_ctx.aio_operate(oid.c_str(), completion, &op,
1171 librados::OPERATION_IGNORE_CACHE |
1172 librados::OPERATION_IGNORE_OVERLAY,
1173 NULL);
1174 completion->wait_for_safe();
1175 int r = completion->get_return_value();
1176 completion->release();
1177 return r;
1178 }
1179
1180 static int do_cache_try_flush(IoCtx& io_ctx, string oid)
1181 {
1182 ObjectReadOperation op;
1183 op.cache_try_flush();
1184 librados::AioCompletion *completion =
1185 librados::Rados::aio_create_completion();
1186 io_ctx.aio_operate(oid.c_str(), completion, &op,
1187 librados::OPERATION_IGNORE_CACHE |
1188 librados::OPERATION_IGNORE_OVERLAY |
1189 librados::OPERATION_SKIPRWLOCKS,
1190 NULL);
1191 completion->wait_for_safe();
1192 int r = completion->get_return_value();
1193 completion->release();
1194 return r;
1195 }
1196
1197 static int do_cache_evict(IoCtx& io_ctx, string oid)
1198 {
1199 ObjectReadOperation op;
1200 op.cache_evict();
1201 librados::AioCompletion *completion =
1202 librados::Rados::aio_create_completion();
1203 io_ctx.aio_operate(oid.c_str(), completion, &op,
1204 librados::OPERATION_IGNORE_CACHE |
1205 librados::OPERATION_IGNORE_OVERLAY |
1206 librados::OPERATION_SKIPRWLOCKS,
1207 NULL);
1208 completion->wait_for_safe();
1209 int r = completion->get_return_value();
1210 completion->release();
1211 return r;
1212 }
1213
1214 static int do_cache_flush_evict_all(IoCtx& io_ctx, bool blocking)
1215 {
1216 int errors = 0;
1217 io_ctx.set_namespace(all_nspaces);
1218 try {
1219 librados::NObjectIterator i = io_ctx.nobjects_begin();
1220 librados::NObjectIterator i_end = io_ctx.nobjects_end();
1221 for (; i != i_end; ++i) {
1222 int r;
1223 cout << i->get_nspace() << "\t" << i->get_oid() << "\t" << i->get_locator() << std::endl;
1224 if (i->get_locator().size()) {
1225 io_ctx.locator_set_key(i->get_locator());
1226 } else {
1227 io_ctx.locator_set_key(string());
1228 }
1229 io_ctx.set_namespace(i->get_nspace());
1230 snap_set_t ls;
1231 io_ctx.snap_set_read(LIBRADOS_SNAP_DIR);
1232 r = io_ctx.list_snaps(i->get_oid(), &ls);
1233 if (r < 0) {
1234 cerr << "error listing snap shots " << i->get_nspace() << "/" << i->get_oid() << ": "
1235 << cpp_strerror(r) << std::endl;
1236 ++errors;
1237 continue;
1238 }
1239 std::vector<clone_info_t>::iterator ci = ls.clones.begin();
1240 // no snapshots
1241 if (ci == ls.clones.end()) {
1242 io_ctx.snap_set_read(CEPH_NOSNAP);
1243 if (blocking)
1244 r = do_cache_flush(io_ctx, i->get_oid());
1245 else
1246 r = do_cache_try_flush(io_ctx, i->get_oid());
1247 if (r < 0) {
1248 cerr << "failed to flush " << i->get_nspace() << "/" << i->get_oid() << ": "
1249 << cpp_strerror(r) << std::endl;
1250 ++errors;
1251 continue;
1252 }
1253 r = do_cache_evict(io_ctx, i->get_oid());
1254 if (r < 0) {
1255 cerr << "failed to evict " << i->get_nspace() << "/" << i->get_oid() << ": "
1256 << cpp_strerror(r) << std::endl;
1257 ++errors;
1258 continue;
1259 }
1260 } else {
1261 // has snapshots
1262 for (std::vector<clone_info_t>::iterator ci = ls.clones.begin();
1263 ci != ls.clones.end(); ++ci) {
1264 io_ctx.snap_set_read(ci->cloneid);
1265 if (blocking)
1266 r = do_cache_flush(io_ctx, i->get_oid());
1267 else
1268 r = do_cache_try_flush(io_ctx, i->get_oid());
1269 if (r < 0) {
1270 cerr << "failed to flush " << i->get_nspace() << "/" << i->get_oid() << ": "
1271 << cpp_strerror(r) << std::endl;
1272 ++errors;
1273 break;
1274 }
1275 r = do_cache_evict(io_ctx, i->get_oid());
1276 if (r < 0) {
1277 cerr << "failed to evict " << i->get_nspace() << "/" << i->get_oid() << ": "
1278 << cpp_strerror(r) << std::endl;
1279 ++errors;
1280 break;
1281 }
1282 }
1283 }
1284 }
1285 }
1286 catch (const std::runtime_error& e) {
1287 cerr << e.what() << std::endl;
1288 return -1;
1289 }
1290 return errors ? -1 : 0;
1291 }
1292
1293 static int do_get_inconsistent_pg_cmd(const std::vector<const char*> &nargs,
1294 Rados& rados,
1295 Formatter& formatter)
1296 {
1297 if (nargs.size() < 2) {
1298 usage_exit();
1299 }
1300 int64_t pool_id = rados.pool_lookup(nargs[1]);
1301 if (pool_id < 0) {
1302 cerr << "pool \"" << nargs[1] << "\" not found" << std::endl;
1303 return (int)pool_id;
1304 }
1305 std::vector<PlacementGroup> pgs;
1306 int ret = rados.get_inconsistent_pgs(pool_id, &pgs);
1307 if (ret) {
1308 return ret;
1309 }
1310 formatter.open_array_section("pgs");
1311 for (auto& pg : pgs) {
1312 formatter.dump_stream("pg") << pg;
1313 }
1314 formatter.close_section();
1315 formatter.flush(cout);
1316 cout << std::endl;
1317 return 0;
1318 }
1319
1320 static void dump_errors(const err_t &err, Formatter &f, const char *name)
1321 {
1322 f.open_array_section(name);
1323 if (err.has_shard_missing())
1324 f.dump_string("error", "missing");
1325 if (err.has_stat_error())
1326 f.dump_string("error", "stat_error");
1327 if (err.has_read_error())
1328 f.dump_string("error", "read_error");
1329 if (err.has_data_digest_mismatch_info())
1330 f.dump_string("error", "data_digest_mismatch_info");
1331 if (err.has_omap_digest_mismatch_info())
1332 f.dump_string("error", "omap_digest_mismatch_info");
1333 if (err.has_size_mismatch_info())
1334 f.dump_string("error", "size_mismatch_info");
1335 if (err.has_ec_hash_error())
1336 f.dump_string("error", "ec_hash_error");
1337 if (err.has_ec_size_error())
1338 f.dump_string("error", "ec_size_error");
1339 if (err.has_info_missing())
1340 f.dump_string("error", "info_missing");
1341 if (err.has_info_corrupted())
1342 f.dump_string("error", "info_corrupted");
1343 if (err.has_obj_size_info_mismatch())
1344 f.dump_string("error", "obj_size_info_mismatch");
1345 if (err.has_snapset_missing())
1346 f.dump_string("error", "snapset_missing");
1347 if (err.has_snapset_corrupted())
1348 f.dump_string("error", "snapset_corrupted");
1349 if (err.has_hinfo_missing())
1350 f.dump_string("error", "hinfo_missing");
1351 if (err.has_hinfo_corrupted())
1352 f.dump_string("error", "hinfo_corrupted");
1353 f.close_section();
1354 }
1355
1356 static void dump_shard(const shard_info_t& shard,
1357 const inconsistent_obj_t& inc,
1358 Formatter &f)
1359 {
1360 dump_errors(shard, f, "errors");
1361
1362 if (shard.has_shard_missing())
1363 return;
1364
1365 if (!shard.has_stat_error())
1366 f.dump_unsigned("size", shard.size);
1367 if (shard.omap_digest_present) {
1368 f.dump_format("omap_digest", "0x%08x", shard.omap_digest);
1369 }
1370 if (shard.data_digest_present) {
1371 f.dump_format("data_digest", "0x%08x", shard.data_digest);
1372 }
1373
1374 if ((inc.union_shards.has_info_missing()
1375 || inc.union_shards.has_info_corrupted()
1376 || inc.has_object_info_inconsistency()
1377 || shard.has_obj_size_info_mismatch()) &&
1378 !shard.has_info_missing()) {
1379 map<std::string, ceph::bufferlist>::iterator k = (const_cast<shard_info_t&>(shard)).attrs.find(OI_ATTR);
1380 assert(k != shard.attrs.end()); // Can't be missing
1381 if (!shard.has_info_corrupted()) {
1382 object_info_t oi;
1383 bufferlist bl;
1384 bufferlist::iterator bliter = k->second.begin();
1385 ::decode(oi, bliter); // Can't be corrupted
1386 f.open_object_section("object_info");
1387 oi.dump(&f);
1388 f.close_section();
1389 } else {
1390 bool b64;
1391 f.dump_string("object_info", cleanbin(k->second, b64));
1392 }
1393 }
1394 if ((inc.union_shards.has_snapset_missing()
1395 || inc.union_shards.has_snapset_corrupted()
1396 || inc.has_snapset_inconsistency()) &&
1397 !shard.has_snapset_missing()) {
1398 map<std::string, ceph::bufferlist>::iterator k = (const_cast<shard_info_t&>(shard)).attrs.find(SS_ATTR);
1399 assert(k != shard.attrs.end()); // Can't be missing
1400 if (!shard.has_snapset_corrupted()) {
1401 SnapSet ss;
1402 bufferlist bl;
1403 bufferlist::iterator bliter = k->second.begin();
1404 decode(ss, bliter); // Can't be corrupted
1405 f.open_object_section("snapset");
1406 ss.dump(&f);
1407 f.close_section();
1408 } else {
1409 bool b64;
1410 f.dump_string("snapset", cleanbin(k->second, b64));
1411 }
1412 }
1413 if ((inc.union_shards.has_hinfo_missing()
1414 || inc.union_shards.has_hinfo_corrupted()
1415 || inc.has_hinfo_inconsistency()) &&
1416 !shard.has_hinfo_missing()) {
1417 map<std::string, ceph::bufferlist>::iterator k = (const_cast<shard_info_t&>(shard)).attrs.find(ECUtil::get_hinfo_key());
1418 assert(k != shard.attrs.end()); // Can't be missing
1419 if (!shard.has_hinfo_corrupted()) {
1420 ECUtil::HashInfo hi;
1421 bufferlist bl;
1422 bufferlist::iterator bliter = k->second.begin();
1423 decode(hi, bliter); // Can't be corrupted
1424 f.open_object_section("hashinfo");
1425 hi.dump(&f);
1426 f.close_section();
1427 } else {
1428 bool b64;
1429 f.dump_string("hashinfo", cleanbin(k->second, b64));
1430 }
1431 }
1432 if (inc.has_attr_name_mismatch() || inc.has_attr_value_mismatch()) {
1433 f.open_array_section("attrs");
1434 for (auto kv : shard.attrs) {
1435 // System attribute handled above
1436 if (kv.first == OI_ATTR || kv.first[0] != '_')
1437 continue;
1438 f.open_object_section("attr");
1439 // Skip leading underscore since only giving user attrs
1440 f.dump_string("name", kv.first.substr(1));
1441 bool b64;
1442 f.dump_string("value", cleanbin(kv.second, b64));
1443 f.dump_bool("Base64", b64);
1444 f.close_section();
1445 }
1446 f.close_section();
1447 }
1448 }
1449
1450 static void dump_obj_errors(const obj_err_t &err, Formatter &f)
1451 {
1452 f.open_array_section("errors");
1453 if (err.has_object_info_inconsistency())
1454 f.dump_string("error", "object_info_inconsistency");
1455 if (err.has_data_digest_mismatch())
1456 f.dump_string("error", "data_digest_mismatch");
1457 if (err.has_omap_digest_mismatch())
1458 f.dump_string("error", "omap_digest_mismatch");
1459 if (err.has_size_mismatch())
1460 f.dump_string("error", "size_mismatch");
1461 if (err.has_attr_value_mismatch())
1462 f.dump_string("error", "attr_value_mismatch");
1463 if (err.has_attr_name_mismatch())
1464 f.dump_string("error", "attr_name_mismatch");
1465 if (err.has_snapset_inconsistency())
1466 f.dump_string("error", "snapset_inconsistency");
1467 if (err.has_hinfo_inconsistency())
1468 f.dump_string("error", "hinfo_inconsistency");
1469 f.close_section();
1470 }
1471
1472 static void dump_object_id(const object_id_t& object,
1473 Formatter &f)
1474 {
1475 f.dump_string("name", object.name);
1476 f.dump_string("nspace", object.nspace);
1477 f.dump_string("locator", object.locator);
1478 switch (object.snap) {
1479 case CEPH_NOSNAP:
1480 f.dump_string("snap", "head");
1481 break;
1482 case CEPH_SNAPDIR:
1483 f.dump_string("snap", "snapdir");
1484 break;
1485 default:
1486 f.dump_unsigned("snap", object.snap);
1487 break;
1488 }
1489 }
1490
1491 static void dump_inconsistent(const inconsistent_obj_t& inc,
1492 Formatter &f)
1493 {
1494 f.open_object_section("object");
1495 dump_object_id(inc.object, f);
1496 f.dump_unsigned("version", inc.version);
1497 f.close_section();
1498
1499 dump_obj_errors(inc, f);
1500 dump_errors(inc.union_shards, f, "union_shard_errors");
1501 for (const auto& shard_info : inc.shards) {
1502 shard_info_t shard = const_cast<shard_info_t&>(shard_info.second);
1503 if (shard.selected_oi) {
1504 object_info_t oi;
1505 bufferlist bl;
1506 auto k = shard.attrs.find(OI_ATTR);
1507 assert(k != shard.attrs.end()); // Can't be missing
1508 bufferlist::iterator bliter = k->second.begin();
1509 ::decode(oi, bliter); // Can't be corrupted
1510 f.open_object_section("selected_object_info");
1511 oi.dump(&f);
1512 f.close_section();
1513 break;
1514 }
1515 }
1516 f.open_array_section("shards");
1517 for (const auto& shard_info : inc.shards) {
1518 f.open_object_section("shard");
1519 auto& osd_shard = shard_info.first;
1520 f.dump_int("osd", osd_shard.osd);
1521 f.dump_bool("primary", shard_info.second.primary);
1522 auto shard = osd_shard.shard;
1523 if (shard != shard_id_t::NO_SHARD)
1524 f.dump_unsigned("shard", shard);
1525 dump_shard(shard_info.second, inc, f);
1526 f.close_section();
1527 }
1528 f.close_section();
1529 }
1530
1531 static void dump_inconsistent(const inconsistent_snapset_t& inc,
1532 Formatter &f)
1533 {
1534 dump_object_id(inc.object, f);
1535
1536 if (inc.ss_bl.length()) {
1537 SnapSet ss;
1538 bufferlist bl = inc.ss_bl;
1539 bufferlist::iterator bliter = bl.begin();
1540 decode(ss, bliter); // Can't be corrupted
1541 f.open_object_section("snapset");
1542 ss.dump(&f);
1543 f.close_section();
1544 }
1545 f.open_array_section("errors");
1546 if (inc.snapset_missing())
1547 f.dump_string("error", "snapset_missing");
1548 if (inc.snapset_corrupted())
1549 f.dump_string("error", "snapset_corrupted");
1550 if (inc.info_missing())
1551 f.dump_string("error", "info_missing");
1552 if (inc.info_corrupted())
1553 f.dump_string("error", "info_corrupted");
1554 if (inc.snapset_error())
1555 f.dump_string("error", "snapset_error");
1556 if (inc.head_mismatch())
1557 f.dump_string("error", "head_mismatch");
1558 if (inc.headless())
1559 f.dump_string("error", "headless");
1560 if (inc.size_mismatch())
1561 f.dump_string("error", "size_mismatch");
1562 if (inc.extra_clones())
1563 f.dump_string("error", "extra_clones");
1564 if (inc.clone_missing())
1565 f.dump_string("error", "clone_missing");
1566 f.close_section();
1567
1568 if (inc.extra_clones()) {
1569 f.open_array_section("extra clones");
1570 for (auto snap : inc.clones) {
1571 f.dump_unsigned("snap", snap);
1572 }
1573 f.close_section();
1574 }
1575
1576 if (inc.clone_missing()) {
1577 f.open_array_section("missing");
1578 for (auto snap : inc.missing) {
1579 f.dump_unsigned("snap", snap);
1580 }
1581 f.close_section();
1582 }
1583 }
1584
1585 // dispatch the call by type
1586 static int do_get_inconsistent(Rados& rados,
1587 const PlacementGroup& pg,
1588 const librados::object_id_t &start,
1589 unsigned max_return,
1590 AioCompletion *c,
1591 std::vector<inconsistent_obj_t>* objs,
1592 uint32_t* interval)
1593 {
1594 return rados.get_inconsistent_objects(pg, start, max_return, c,
1595 objs, interval);
1596 }
1597
1598 static int do_get_inconsistent(Rados& rados,
1599 const PlacementGroup& pg,
1600 const librados::object_id_t &start,
1601 unsigned max_return,
1602 AioCompletion *c,
1603 std::vector<inconsistent_snapset_t>* snapsets,
1604 uint32_t* interval)
1605 {
1606 return rados.get_inconsistent_snapsets(pg, start, max_return, c,
1607 snapsets, interval);
1608 }
1609
1610 template <typename T>
1611 static int do_get_inconsistent_cmd(const std::vector<const char*> &nargs,
1612 Rados& rados,
1613 Formatter& formatter)
1614 {
1615 if (nargs.size() < 2) {
1616 usage_exit();
1617 }
1618 PlacementGroup pg;
1619 int ret = 0;
1620 ret = pg.parse(nargs[1]);
1621 if (!ret) {
1622 cerr << "bad pg: " << nargs[1] << std::endl;
1623 return ret;
1624 }
1625 uint32_t interval = 0, first_interval = 0;
1626 const unsigned max_item_num = 32;
1627 bool opened = false;
1628 for (librados::object_id_t start;;) {
1629 std::vector<T> items;
1630 auto completion = librados::Rados::aio_create_completion();
1631 ret = do_get_inconsistent(rados, pg, start, max_item_num, completion,
1632 &items, &interval);
1633 completion->wait_for_safe();
1634 ret = completion->get_return_value();
1635 completion->release();
1636 if (ret < 0) {
1637 if (ret == -EAGAIN)
1638 cerr << "interval#" << interval << " expired." << std::endl;
1639 else if (ret == -ENOENT)
1640 cerr << "No scrub information available for pg " << pg << std::endl;
1641 else
1642 cerr << "Unknown error " << cpp_strerror(ret) << std::endl;
1643 break;
1644 }
1645 // It must be the same interval every time. EAGAIN would
1646 // occur if interval changes.
1647 assert(start.name.empty() || first_interval == interval);
1648 if (start.name.empty()) {
1649 first_interval = interval;
1650 formatter.open_object_section("info");
1651 formatter.dump_int("epoch", interval);
1652 formatter.open_array_section("inconsistents");
1653 opened = true;
1654 }
1655 for (auto& inc : items) {
1656 formatter.open_object_section("inconsistent");
1657 dump_inconsistent(inc, formatter);
1658 formatter.close_section();
1659 }
1660 if (items.size() < max_item_num) {
1661 formatter.close_section();
1662 break;
1663 }
1664 if (!items.empty()) {
1665 start = items.back().object;
1666 }
1667 items.clear();
1668 }
1669 if (opened) {
1670 formatter.close_section();
1671 formatter.flush(cout);
1672 }
1673 return ret;
1674 }
1675
1676 /**********************************************
1677
1678 **********************************************/
1679 static int rados_tool_common(const std::map < std::string, std::string > &opts,
1680 std::vector<const char*> &nargs)
1681 {
1682 int ret;
1683 bool create_pool = false;
1684 const char *pool_name = NULL;
1685 const char *target_pool_name = NULL;
1686 string oloc, target_oloc, nspace, target_nspace;
1687 int concurrent_ios = 16;
1688 unsigned op_size = default_op_size;
1689 unsigned object_size = 0;
1690 unsigned max_objects = 0;
1691 uint64_t obj_offset = 0;
1692 bool block_size_specified = false;
1693 int bench_write_dest = 0;
1694 bool cleanup = true;
1695 bool hints = true; // for rados bench
1696 bool no_verify = false;
1697 bool use_striper = false;
1698 bool with_clones = false;
1699 const char *snapname = NULL;
1700 snap_t snapid = CEPH_NOSNAP;
1701 std::map<std::string, std::string>::const_iterator i;
1702
1703 uint64_t min_obj_len = 0;
1704 uint64_t max_obj_len = 0;
1705 uint64_t min_op_len = 0;
1706 uint64_t max_op_len = 0;
1707 uint64_t max_ops = 0;
1708 uint64_t max_backlog = 0;
1709 uint64_t target_throughput = 0;
1710 int64_t read_percent = -1;
1711 uint64_t num_objs = 0;
1712 int run_length = 0;
1713
1714 bool show_time = false;
1715 bool wildcard = false;
1716
1717 std::string run_name;
1718 std::string prefix;
1719 bool forcefull = false;
1720 Formatter *formatter = NULL;
1721 bool pretty_format = false;
1722 const char *output = NULL;
1723 bool omap_key_valid = false;
1724 std::string omap_key;
1725 std::string omap_key_pretty;
1726
1727 Rados rados;
1728 IoCtx io_ctx;
1729 RadosStriper striper;
1730
1731 i = opts.find("create");
1732 if (i != opts.end()) {
1733 create_pool = true;
1734 }
1735 i = opts.find("pool");
1736 if (i != opts.end()) {
1737 pool_name = i->second.c_str();
1738 }
1739 i = opts.find("target_pool");
1740 if (i != opts.end()) {
1741 target_pool_name = i->second.c_str();
1742 }
1743 i = opts.find("object_locator");
1744 if (i != opts.end()) {
1745 oloc = i->second;
1746 }
1747 i = opts.find("target_locator");
1748 if (i != opts.end()) {
1749 target_oloc = i->second;
1750 }
1751 i = opts.find("target_nspace");
1752 if (i != opts.end()) {
1753 target_nspace = i->second;
1754 }
1755 i = opts.find("concurrent-ios");
1756 if (i != opts.end()) {
1757 if (rados_sistrtoll(i, &concurrent_ios)) {
1758 return -EINVAL;
1759 }
1760 }
1761 i = opts.find("run-name");
1762 if (i != opts.end()) {
1763 run_name = i->second;
1764 }
1765
1766 i = opts.find("force-full");
1767 if (i != opts.end()) {
1768 forcefull = true;
1769 }
1770 i = opts.find("prefix");
1771 if (i != opts.end()) {
1772 prefix = i->second;
1773 }
1774 i = opts.find("block-size");
1775 if (i != opts.end()) {
1776 if (rados_sistrtoll(i, &op_size)) {
1777 return -EINVAL;
1778 }
1779 block_size_specified = true;
1780 }
1781 i = opts.find("object-size");
1782 if (i != opts.end()) {
1783 if (rados_sistrtoll(i, &object_size)) {
1784 return -EINVAL;
1785 }
1786 block_size_specified = true;
1787 }
1788 i = opts.find("max-objects");
1789 if (i != opts.end()) {
1790 if (rados_sistrtoll(i, &max_objects)) {
1791 return -EINVAL;
1792 }
1793 }
1794 i = opts.find("offset");
1795 if (i != opts.end()) {
1796 if (rados_sistrtoll(i, &obj_offset)) {
1797 return -EINVAL;
1798 }
1799 }
1800 i = opts.find("snap");
1801 if (i != opts.end()) {
1802 snapname = i->second.c_str();
1803 }
1804 i = opts.find("snapid");
1805 if (i != opts.end()) {
1806 if (rados_sistrtoll(i, &snapid)) {
1807 return -EINVAL;
1808 }
1809 }
1810 i = opts.find("min-object-size");
1811 if (i != opts.end()) {
1812 if (rados_sistrtoll(i, &min_obj_len)) {
1813 return -EINVAL;
1814 }
1815 }
1816 i = opts.find("max-object-size");
1817 if (i != opts.end()) {
1818 if (rados_sistrtoll(i, &max_obj_len)) {
1819 return -EINVAL;
1820 }
1821 }
1822 i = opts.find("min-op-len");
1823 if (i != opts.end()) {
1824 if (rados_sistrtoll(i, &min_op_len)) {
1825 return -EINVAL;
1826 }
1827 }
1828 i = opts.find("max-op-len");
1829 if (i != opts.end()) {
1830 if (rados_sistrtoll(i, &max_op_len)) {
1831 return -EINVAL;
1832 }
1833 }
1834 i = opts.find("max-ops");
1835 if (i != opts.end()) {
1836 if (rados_sistrtoll(i, &max_ops)) {
1837 return -EINVAL;
1838 }
1839 }
1840 i = opts.find("max-backlog");
1841 if (i != opts.end()) {
1842 if (rados_sistrtoll(i, &max_backlog)) {
1843 return -EINVAL;
1844 }
1845 }
1846 i = opts.find("target-throughput");
1847 if (i != opts.end()) {
1848 if (rados_sistrtoll(i, &target_throughput)) {
1849 return -EINVAL;
1850 }
1851 }
1852 i = opts.find("read-percent");
1853 if (i != opts.end()) {
1854 if (rados_sistrtoll(i, &read_percent)) {
1855 return -EINVAL;
1856 }
1857 }
1858 i = opts.find("num-objects");
1859 if (i != opts.end()) {
1860 if (rados_sistrtoll(i, &num_objs)) {
1861 return -EINVAL;
1862 }
1863 }
1864 i = opts.find("run-length");
1865 if (i != opts.end()) {
1866 if (rados_sistrtoll(i, &run_length)) {
1867 return -EINVAL;
1868 }
1869 }
1870 i = opts.find("show-time");
1871 if (i != opts.end()) {
1872 show_time = true;
1873 }
1874 i = opts.find("no-cleanup");
1875 if (i != opts.end()) {
1876 cleanup = false;
1877 }
1878 i = opts.find("no-hints");
1879 if (i != opts.end()) {
1880 hints = false;
1881 }
1882 i = opts.find("pretty-format");
1883 if (i != opts.end()) {
1884 pretty_format = true;
1885 }
1886 i = opts.find("format");
1887 if (i != opts.end()) {
1888 const char *format = i->second.c_str();
1889 formatter = Formatter::create(format);
1890 if (!formatter) {
1891 cerr << "unrecognized format: " << format << std::endl;
1892 return -EINVAL;
1893 }
1894 }
1895 i = opts.find("namespace");
1896 if (i != opts.end()) {
1897 nspace = i->second;
1898 }
1899 i = opts.find("no-verify");
1900 if (i != opts.end()) {
1901 no_verify = true;
1902 }
1903 i = opts.find("output");
1904 if (i != opts.end()) {
1905 output = i->second.c_str();
1906 }
1907 i = opts.find("write-dest-obj");
1908 if (i != opts.end()) {
1909 bench_write_dest |= static_cast<int>(OP_WRITE_DEST_OBJ);
1910 }
1911 i = opts.find("write-dest-omap");
1912 if (i != opts.end()) {
1913 bench_write_dest |= static_cast<int>(OP_WRITE_DEST_OMAP);
1914 }
1915 i = opts.find("write-dest-xattr");
1916 if (i != opts.end()) {
1917 bench_write_dest |= static_cast<int>(OP_WRITE_DEST_XATTR);
1918 }
1919 i = opts.find("with-clones");
1920 if (i != opts.end()) {
1921 with_clones = true;
1922 }
1923 i = opts.find("omap-key-file");
1924 if (i != opts.end()) {
1925 string err;
1926 bufferlist indata;
1927 ret = indata.read_file(i->second.c_str(), &err);
1928 if (ret < 0) {
1929 cerr << err << std::endl;
1930 return 1;
1931 }
1932
1933 omap_key_valid = true;
1934 omap_key = std::string(indata.c_str(), indata.length());
1935 omap_key_pretty = omap_key;
1936 if (std::find_if_not(omap_key.begin(), omap_key.end(),
1937 (int (*)(int))isprint) != omap_key.end()) {
1938 omap_key_pretty = "(binary key)";
1939 }
1940 }
1941
1942 // open rados
1943 ret = rados.init_with_context(g_ceph_context);
1944 if (ret < 0) {
1945 cerr << "couldn't initialize rados: " << cpp_strerror(ret) << std::endl;
1946 goto out;
1947 }
1948
1949 ret = rados.connect();
1950 if (ret) {
1951 cerr << "couldn't connect to cluster: " << cpp_strerror(ret) << std::endl;
1952 ret = -1;
1953 goto out;
1954 }
1955
1956 if (create_pool && !pool_name) {
1957 cerr << "--create-pool requested but pool_name was not specified!" << std::endl;
1958 usage_exit();
1959 }
1960
1961 if (create_pool) {
1962 ret = rados.pool_create(pool_name, 0, 0);
1963 if (ret < 0) {
1964 cerr << "error creating pool " << pool_name << ": "
1965 << cpp_strerror(ret) << std::endl;
1966 goto out;
1967 }
1968 }
1969
1970 // open io context.
1971 if (pool_name) {
1972 ret = rados.ioctx_create(pool_name, io_ctx);
1973 if (ret < 0) {
1974 cerr << "error opening pool " << pool_name << ": "
1975 << cpp_strerror(ret) << std::endl;
1976 goto out;
1977 }
1978
1979 // align op_size
1980 {
1981 bool requires;
1982 ret = io_ctx.pool_requires_alignment2(&requires);
1983 if (ret < 0) {
1984 cerr << "error checking pool alignment requirement"
1985 << cpp_strerror(ret) << std::endl;
1986 goto out;
1987 }
1988
1989 if (requires) {
1990 uint64_t align = 0;
1991 ret = io_ctx.pool_required_alignment2(&align);
1992 if (ret < 0) {
1993 cerr << "error getting pool alignment"
1994 << cpp_strerror(ret) << std::endl;
1995 goto out;
1996 }
1997
1998 const uint64_t prev_op_size = op_size;
1999 op_size = uint64_t((op_size + align - 1) / align) * align;
2000 // Warn: if user specified and it was rounded
2001 if (prev_op_size != default_op_size && prev_op_size != op_size)
2002 cerr << "INFO: op_size has been rounded to " << op_size << std::endl;
2003 }
2004 }
2005
2006 // create striper interface
2007 if (opts.find("striper") != opts.end()) {
2008 ret = RadosStriper::striper_create(io_ctx, &striper);
2009 if (0 != ret) {
2010 cerr << "error opening pool " << pool_name << " with striper interface: "
2011 << cpp_strerror(ret) << std::endl;
2012 goto out;
2013 }
2014 use_striper = true;
2015 }
2016 }
2017
2018 // snapname?
2019 if (snapname) {
2020 if (!pool_name) {
2021 cerr << "pool name must be specified with --snap" << std::endl;
2022 ret = -1;
2023 goto out;
2024 }
2025 ret = io_ctx.snap_lookup(snapname, &snapid);
2026 if (ret < 0) {
2027 cerr << "error looking up snap '" << snapname << "': " << cpp_strerror(ret) << std::endl;
2028 goto out;
2029 }
2030 }
2031 if (oloc.size()) {
2032 if (!pool_name) {
2033 cerr << "pool name must be specified with --object_locator" << std::endl;
2034 ret = -1;
2035 goto out;
2036 }
2037 io_ctx.locator_set_key(oloc);
2038 }
2039 // Use namespace from command line if specified
2040 if (opts.find("namespace") != opts.end()) {
2041 if (!pool_name) {
2042 cerr << "pool name must be specified with --namespace" << std::endl;
2043 ret = -1;
2044 goto out;
2045 }
2046 io_ctx.set_namespace(nspace);
2047 // Use wildcard if --all specified and --default NOT specified
2048 } else if (opts.find("all") != opts.end() && opts.find("default") == opts.end()) {
2049 // Only the ls should ever set namespace to special value
2050 wildcard = true;
2051 }
2052 if (snapid != CEPH_NOSNAP) {
2053 if (!pool_name) {
2054 cerr << "pool name must be specified with --snapid" << std::endl;
2055 ret = -1;
2056 goto out;
2057 }
2058 string name;
2059 ret = io_ctx.snap_get_name(snapid, &name);
2060 if (ret < 0) {
2061 cerr << "snapid " << snapid << " doesn't exist in pool "
2062 << io_ctx.get_pool_name() << std::endl;
2063 goto out;
2064 }
2065 io_ctx.snap_set_read(snapid);
2066 cout << "selected snap " << snapid << " '" << name << "'" << std::endl;
2067 }
2068
2069 assert(!nargs.empty());
2070
2071 // list pools?
2072 if (strcmp(nargs[0], "lspools") == 0) {
2073 list<string> vec;
2074 ret = rados.pool_list(vec);
2075 if (ret < 0) {
2076 cerr << "error listing pools: " << cpp_strerror(ret) << std::endl;
2077 goto out;
2078 }
2079 for (list<string>::iterator i = vec.begin(); i != vec.end(); ++i)
2080 cout << *i << std::endl;
2081 }
2082 else if (strcmp(nargs[0], "df") == 0) {
2083 // pools
2084 list<string> vec;
2085
2086 if (!pool_name) {
2087 ret = rados.pool_list(vec);
2088 if (ret < 0) {
2089 cerr << "error listing pools: " << cpp_strerror(ret) << std::endl;
2090 goto out;
2091 }
2092 } else {
2093 vec.push_back(pool_name);
2094 }
2095
2096 map<string,librados::pool_stat_t> stats;
2097 ret = rados.get_pool_stats(vec, stats);
2098 if (ret < 0) {
2099 cerr << "error fetching pool stats: " << cpp_strerror(ret) << std::endl;
2100 goto out;
2101 }
2102
2103 TextTable tab;
2104
2105 if (!formatter) {
2106 tab.define_column("POOL_NAME", TextTable::LEFT, TextTable::LEFT);
2107 tab.define_column("USED", TextTable::LEFT, TextTable::RIGHT);
2108 tab.define_column("OBJECTS", TextTable::LEFT, TextTable::RIGHT);
2109 tab.define_column("CLONES", TextTable::LEFT, TextTable::RIGHT);
2110 tab.define_column("COPIES", TextTable::LEFT, TextTable::RIGHT);
2111 tab.define_column("MISSING_ON_PRIMARY", TextTable::LEFT, TextTable::RIGHT);
2112 tab.define_column("UNFOUND", TextTable::LEFT, TextTable::RIGHT);
2113 tab.define_column("DEGRADED", TextTable::LEFT, TextTable::RIGHT);
2114 tab.define_column("RD_OPS", TextTable::LEFT, TextTable::RIGHT);
2115 tab.define_column("RD", TextTable::LEFT, TextTable::RIGHT);
2116 tab.define_column("WR_OPS", TextTable::LEFT, TextTable::RIGHT);
2117 tab.define_column("WR", TextTable::LEFT, TextTable::RIGHT);
2118 } else {
2119 formatter->open_object_section("stats");
2120 formatter->open_array_section("pools");
2121 }
2122 for (map<string,librados::pool_stat_t>::iterator i = stats.begin();
2123 i != stats.end();
2124 ++i) {
2125 const char *pool_name = i->first.c_str();
2126 librados::pool_stat_t& s = i->second;
2127 if (!formatter) {
2128 tab << pool_name
2129 << si_t(s.num_bytes)
2130 << s.num_objects
2131 << s.num_object_clones
2132 << s.num_object_copies
2133 << s.num_objects_missing_on_primary
2134 << s.num_objects_unfound
2135 << s.num_objects_degraded
2136 << s.num_rd
2137 << si_t(s.num_rd_kb << 10)
2138 << s.num_wr
2139 << si_t(s.num_wr_kb << 10)
2140 << TextTable::endrow;
2141 } else {
2142 formatter->open_object_section("pool");
2143 int64_t pool_id = rados.pool_lookup(pool_name);
2144 formatter->dump_string("name", pool_name);
2145 if (pool_id >= 0)
2146 formatter->dump_int("id", pool_id);
2147 else
2148 cerr << "ERROR: lookup_pg_pool_name for name=" << pool_name
2149 << " returned " << pool_id << std::endl;
2150 formatter->dump_int("size_bytes",s.num_bytes);
2151 formatter->dump_int("size_kb", s.num_kb);
2152 formatter->dump_int("num_objects", s.num_objects);
2153 formatter->dump_int("num_object_clones", s.num_object_clones);
2154 formatter->dump_int("num_object_copies", s.num_object_copies);
2155 formatter->dump_int("num_objects_missing_on_primary", s.num_objects_missing_on_primary);
2156 formatter->dump_int("num_objects_unfound", s.num_objects_unfound);
2157 formatter->dump_int("num_objects_degraded", s.num_objects_degraded);
2158 formatter->dump_int("read_ops", s.num_rd);
2159 formatter->dump_int("read_bytes", s.num_rd_kb * 1024ull);
2160 formatter->dump_int("write_ops", s.num_wr);
2161 formatter->dump_int("write_bytes", s.num_wr_kb * 1024ull);
2162 formatter->close_section();
2163 }
2164 }
2165
2166 if (!formatter) {
2167 cout << tab;
2168 }
2169
2170 // total
2171 cluster_stat_t tstats;
2172 ret = rados.cluster_stat(tstats);
2173 if (ret < 0) {
2174 cerr << "error getting total cluster usage: " << cpp_strerror(ret) << std::endl;
2175 goto out;
2176 }
2177 if (!formatter) {
2178 cout << std::endl;
2179 cout << "total_objects " << tstats.num_objects
2180 << std::endl;
2181 cout << "total_used " << si_t(tstats.kb_used << 10)
2182 << std::endl;
2183 cout << "total_avail " << si_t(tstats.kb_avail << 10)
2184 << std::endl;
2185 cout << "total_space " << si_t(tstats.kb << 10)
2186 << std::endl;
2187 } else {
2188 formatter->close_section();
2189 formatter->dump_int("total_objects", tstats.num_objects);
2190 formatter->dump_int("total_used", tstats.kb_used);
2191 formatter->dump_int("total_avail", tstats.kb_avail);
2192 formatter->dump_int("total_space", tstats.kb);
2193 formatter->close_section();
2194 formatter->flush(cout);
2195 }
2196 }
2197
2198 else if (strcmp(nargs[0], "ls") == 0) {
2199 if (!pool_name) {
2200 cerr << "pool name was not specified" << std::endl;
2201 ret = -1;
2202 goto out;
2203 }
2204
2205 if (wildcard)
2206 io_ctx.set_namespace(all_nspaces);
2207 bool use_stdout = (nargs.size() < 2) || (strcmp(nargs[1], "-") == 0);
2208 ostream *outstream;
2209 if(use_stdout)
2210 outstream = &cout;
2211 else
2212 outstream = new ofstream(nargs[1]);
2213
2214 {
2215 if (formatter)
2216 formatter->open_array_section("objects");
2217 try {
2218 librados::NObjectIterator i = io_ctx.nobjects_begin();
2219 librados::NObjectIterator i_end = io_ctx.nobjects_end();
2220 for (; i != i_end; ++i) {
2221 if (use_striper) {
2222 // in case of --striper option, we only list striped
2223 // objects, so we only display the first object of
2224 // each, without its suffix '.000...000'
2225 size_t l = i->get_oid().length();
2226 if (l <= 17 ||
2227 (0 != i->get_oid().compare(l-17, 17,".0000000000000000"))) continue;
2228 }
2229 if (!formatter) {
2230 // Only include namespace in output when wildcard specified
2231 if (wildcard)
2232 *outstream << i->get_nspace() << "\t";
2233 if (use_striper) {
2234 *outstream << i->get_oid().substr(0, i->get_oid().length()-17);
2235 } else {
2236 *outstream << i->get_oid();
2237 }
2238 if (i->get_locator().size())
2239 *outstream << "\t" << i->get_locator();
2240 *outstream << std::endl;
2241 } else {
2242 formatter->open_object_section("object");
2243 formatter->dump_string("namespace", i->get_nspace());
2244 if (use_striper) {
2245 formatter->dump_string("name", i->get_oid().substr(0, i->get_oid().length()-17));
2246 } else {
2247 formatter->dump_string("name", i->get_oid());
2248 }
2249 if (i->get_locator().size())
2250 formatter->dump_string("locator", i->get_locator());
2251 formatter->close_section(); //object
2252 }
2253 }
2254 }
2255 catch (const std::runtime_error& e) {
2256 cerr << e.what() << std::endl;
2257 ret = -1;
2258 goto out;
2259 }
2260 }
2261 if (formatter) {
2262 formatter->close_section(); //objects
2263 formatter->flush(*outstream);
2264 if (pretty_format)
2265 *outstream << std::endl;
2266 formatter->flush(*outstream);
2267 }
2268 if (!stdout)
2269 delete outstream;
2270 }
2271 else if (strcmp(nargs[0], "chown") == 0) {
2272 if (!pool_name || nargs.size() < 2)
2273 usage_exit();
2274
2275 char* endptr = NULL;
2276 uint64_t new_auid = strtol(nargs[1], &endptr, 10);
2277 if (*endptr) {
2278 cerr << "Invalid value for new-auid: '" << nargs[1] << "'" << std::endl;
2279 ret = -1;
2280 goto out;
2281 }
2282 ret = io_ctx.set_auid(new_auid);
2283 if (ret < 0) {
2284 cerr << "error changing auid on pool " << io_ctx.get_pool_name() << ':'
2285 << cpp_strerror(ret) << std::endl;
2286 } else cerr << "changed auid on pool " << io_ctx.get_pool_name()
2287 << " to " << new_auid << std::endl;
2288 }
2289 else if (strcmp(nargs[0], "mapext") == 0) {
2290 if (!pool_name || nargs.size() < 2)
2291 usage_exit();
2292 string oid(nargs[1]);
2293 std::map<uint64_t,uint64_t> m;
2294 ret = io_ctx.mapext(oid, 0, -1, m);
2295 if (ret < 0) {
2296 cerr << "mapext error on " << pool_name << "/" << oid << ": " << cpp_strerror(ret) << std::endl;
2297 goto out;
2298 }
2299 std::map<uint64_t,uint64_t>::iterator iter;
2300 for (iter = m.begin(); iter != m.end(); ++iter) {
2301 cout << hex << iter->first << "\t" << iter->second << dec << std::endl;
2302 }
2303 }
2304 else if (strcmp(nargs[0], "stat") == 0) {
2305 if (!pool_name || nargs.size() < 2)
2306 usage_exit();
2307 string oid(nargs[1]);
2308 uint64_t size;
2309 time_t mtime;
2310 if (use_striper) {
2311 ret = striper.stat(oid, &size, &mtime);
2312 } else {
2313 ret = io_ctx.stat(oid, &size, &mtime);
2314 }
2315 if (ret < 0) {
2316 cerr << " error stat-ing " << pool_name << "/" << oid << ": "
2317 << cpp_strerror(ret) << std::endl;
2318 goto out;
2319 } else {
2320 utime_t t(mtime, 0);
2321 cout << pool_name << "/" << oid
2322 << " mtime " << t << ", size " << size << std::endl;
2323 }
2324 }
2325 else if (strcmp(nargs[0], "get") == 0) {
2326 if (!pool_name || nargs.size() < 3)
2327 usage_exit();
2328 ret = do_get(io_ctx, striper, nargs[1], nargs[2], op_size, use_striper);
2329 if (ret < 0) {
2330 cerr << "error getting " << pool_name << "/" << nargs[1] << ": " << cpp_strerror(ret) << std::endl;
2331 goto out;
2332 }
2333 }
2334 else if (strcmp(nargs[0], "put") == 0) {
2335 if (!pool_name || nargs.size() < 3)
2336 usage_exit();
2337 ret = do_put(io_ctx, striper, nargs[1], nargs[2], op_size, obj_offset, use_striper);
2338 if (ret < 0) {
2339 cerr << "error putting " << pool_name << "/" << nargs[1] << ": " << cpp_strerror(ret) << std::endl;
2340 goto out;
2341 }
2342 }
2343 else if (strcmp(nargs[0], "append") == 0) {
2344 if (!pool_name || nargs.size() < 3)
2345 usage_exit();
2346 ret = do_append(io_ctx, striper, nargs[1], nargs[2], op_size, use_striper);
2347 if (ret < 0) {
2348 cerr << "error appending " << pool_name << "/" << nargs[1] << ": " << cpp_strerror(ret) << std::endl;
2349 goto out;
2350 }
2351 }
2352 else if (strcmp(nargs[0], "truncate") == 0) {
2353 if (!pool_name || nargs.size() < 3)
2354 usage_exit();
2355
2356 string oid(nargs[1]);
2357 char* endptr = NULL;
2358 long size = strtoll(nargs[2], &endptr, 10);
2359 if (*endptr) {
2360 cerr << "Invalid value for size: '" << nargs[2] << "'" << std::endl;
2361 ret = -EINVAL;
2362 goto out;
2363 }
2364 if (size < 0) {
2365 cerr << "error, cannot truncate to negative value" << std::endl;
2366 usage_exit();
2367 }
2368 if (use_striper) {
2369 ret = striper.trunc(oid, size);
2370 } else {
2371 ret = io_ctx.trunc(oid, size);
2372 }
2373 if (ret < 0) {
2374 cerr << "error truncating oid "
2375 << oid << " to " << size << ": "
2376 << cpp_strerror(ret) << std::endl;
2377 } else {
2378 ret = 0;
2379 }
2380 }
2381 else if (strcmp(nargs[0], "setxattr") == 0) {
2382 if (!pool_name || nargs.size() < 3 || nargs.size() > 4)
2383 usage_exit();
2384
2385 string oid(nargs[1]);
2386 string attr_name(nargs[2]);
2387 bufferlist bl;
2388 if (nargs.size() == 4) {
2389 string attr_val(nargs[3]);
2390 bl.append(attr_val.c_str(), attr_val.length());
2391 } else {
2392 do {
2393 ret = bl.read_fd(STDIN_FILENO, 1024); // from stdin
2394 if (ret < 0)
2395 goto out;
2396 } while (ret > 0);
2397 }
2398
2399 if (use_striper) {
2400 ret = striper.setxattr(oid, attr_name.c_str(), bl);
2401 } else {
2402 ret = io_ctx.setxattr(oid, attr_name.c_str(), bl);
2403 }
2404 if (ret < 0) {
2405 cerr << "error setting xattr " << pool_name << "/" << oid << "/" << attr_name << ": " << cpp_strerror(ret) << std::endl;
2406 goto out;
2407 }
2408 else
2409 ret = 0;
2410 }
2411 else if (strcmp(nargs[0], "getxattr") == 0) {
2412 if (!pool_name || nargs.size() < 3)
2413 usage_exit();
2414
2415 string oid(nargs[1]);
2416 string attr_name(nargs[2]);
2417
2418 bufferlist bl;
2419 if (use_striper) {
2420 ret = striper.getxattr(oid, attr_name.c_str(), bl);
2421 } else {
2422 ret = io_ctx.getxattr(oid, attr_name.c_str(), bl);
2423 }
2424 if (ret < 0) {
2425 cerr << "error getting xattr " << pool_name << "/" << oid << "/" << attr_name << ": " << cpp_strerror(ret) << std::endl;
2426 goto out;
2427 }
2428 else
2429 ret = 0;
2430 string s(bl.c_str(), bl.length());
2431 cout << s;
2432 } else if (strcmp(nargs[0], "rmxattr") == 0) {
2433 if (!pool_name || nargs.size() < 3)
2434 usage_exit();
2435
2436 string oid(nargs[1]);
2437 string attr_name(nargs[2]);
2438
2439 if (use_striper) {
2440 ret = striper.rmxattr(oid, attr_name.c_str());
2441 } else {
2442 ret = io_ctx.rmxattr(oid, attr_name.c_str());
2443 }
2444 if (ret < 0) {
2445 cerr << "error removing xattr " << pool_name << "/" << oid << "/" << attr_name << ": " << cpp_strerror(ret) << std::endl;
2446 goto out;
2447 }
2448 } else if (strcmp(nargs[0], "listxattr") == 0) {
2449 if (!pool_name || nargs.size() < 2)
2450 usage_exit();
2451
2452 string oid(nargs[1]);
2453 map<std::string, bufferlist> attrset;
2454 bufferlist bl;
2455 if (use_striper) {
2456 ret = striper.getxattrs(oid, attrset);
2457 } else {
2458 ret = io_ctx.getxattrs(oid, attrset);
2459 }
2460 if (ret < 0) {
2461 cerr << "error getting xattr set " << pool_name << "/" << oid << ": " << cpp_strerror(ret) << std::endl;
2462 goto out;
2463 }
2464
2465 for (map<std::string, bufferlist>::iterator iter = attrset.begin();
2466 iter != attrset.end(); ++iter) {
2467 cout << iter->first << std::endl;
2468 }
2469 } else if (strcmp(nargs[0], "getomapheader") == 0) {
2470 if (!pool_name || nargs.size() < 2)
2471 usage_exit();
2472
2473 string oid(nargs[1]);
2474 string outfile;
2475 if (nargs.size() >= 3) {
2476 outfile = nargs[2];
2477 }
2478
2479 bufferlist header;
2480 ret = io_ctx.omap_get_header(oid, &header);
2481 if (ret < 0) {
2482 cerr << "error getting omap header " << pool_name << "/" << oid
2483 << ": " << cpp_strerror(ret) << std::endl;
2484 goto out;
2485 } else {
2486 if (!outfile.empty()) {
2487 cerr << "Writing to " << outfile << std::endl;
2488 dump_data(outfile, header);
2489 } else {
2490 cout << "header (" << header.length() << " bytes) :\n";
2491 header.hexdump(cout);
2492 cout << std::endl;
2493 }
2494 ret = 0;
2495 }
2496 } else if (strcmp(nargs[0], "setomapheader") == 0) {
2497 if (!pool_name || nargs.size() < 3)
2498 usage_exit();
2499
2500 string oid(nargs[1]);
2501 string val(nargs[2]);
2502
2503 bufferlist bl;
2504 bl.append(val);
2505
2506 ret = io_ctx.omap_set_header(oid, bl);
2507 if (ret < 0) {
2508 cerr << "error setting omap value " << pool_name << "/" << oid
2509 << ": " << cpp_strerror(ret) << std::endl;
2510 goto out;
2511 } else {
2512 ret = 0;
2513 }
2514 } else if (strcmp(nargs[0], "setomapval") == 0) {
2515 uint32_t min_args = (omap_key_valid ? 2 : 3);
2516 if (!pool_name || nargs.size() < min_args || nargs.size() > min_args + 1) {
2517 usage_exit();
2518 }
2519
2520 string oid(nargs[1]);
2521 if (!omap_key_valid) {
2522 omap_key = nargs[2];
2523 omap_key_pretty = omap_key;
2524 }
2525
2526 bufferlist bl;
2527 if (nargs.size() > min_args) {
2528 string val(nargs[min_args]);
2529 bl.append(val);
2530 } else {
2531 do {
2532 ret = bl.read_fd(STDIN_FILENO, 1024); // from stdin
2533 if (ret < 0) {
2534 goto out;
2535 }
2536 } while (ret > 0);
2537 }
2538
2539 map<string, bufferlist> values;
2540 values[omap_key] = bl;
2541
2542 ret = io_ctx.omap_set(oid, values);
2543 if (ret < 0) {
2544 cerr << "error setting omap value " << pool_name << "/" << oid << "/"
2545 << omap_key_pretty << ": " << cpp_strerror(ret) << std::endl;
2546 goto out;
2547 } else {
2548 ret = 0;
2549 }
2550 } else if (strcmp(nargs[0], "getomapval") == 0) {
2551 uint32_t min_args = (omap_key_valid ? 2 : 3);
2552 if (!pool_name || nargs.size() < min_args || nargs.size() > min_args + 1) {
2553 usage_exit();
2554 }
2555
2556 string oid(nargs[1]);
2557 if (!omap_key_valid) {
2558 omap_key = nargs[2];
2559 omap_key_pretty = omap_key;
2560 }
2561
2562 set<string> keys;
2563 keys.insert(omap_key);
2564
2565 std::string outfile;
2566 if (nargs.size() > min_args) {
2567 outfile = nargs[min_args];
2568 }
2569
2570 map<string, bufferlist> values;
2571 ret = io_ctx.omap_get_vals_by_keys(oid, keys, &values);
2572 if (ret < 0) {
2573 cerr << "error getting omap value " << pool_name << "/" << oid << "/"
2574 << omap_key_pretty << ": " << cpp_strerror(ret) << std::endl;
2575 goto out;
2576 } else {
2577 ret = 0;
2578 }
2579
2580 if (values.size() && values.begin()->first == omap_key) {
2581 if (!outfile.empty()) {
2582 cerr << "Writing to " << outfile << std::endl;
2583 dump_data(outfile, values.begin()->second);
2584 } else {
2585 cout << "value (" << values.begin()->second.length() << " bytes) :\n";
2586 values.begin()->second.hexdump(cout);
2587 cout << std::endl;
2588 }
2589 ret = 0;
2590 } else {
2591 cout << "No such key: " << pool_name << "/" << oid << "/"
2592 << omap_key_pretty << std::endl;
2593 ret = -1;
2594 goto out;
2595 }
2596 } else if (strcmp(nargs[0], "rmomapkey") == 0) {
2597 uint32_t num_args = (omap_key_valid ? 2 : 3);
2598 if (!pool_name || nargs.size() != num_args) {
2599 usage_exit();
2600 }
2601
2602 string oid(nargs[1]);
2603 if (!omap_key_valid) {
2604 omap_key = nargs[2];
2605 omap_key_pretty = omap_key;
2606 }
2607 set<string> keys;
2608 keys.insert(omap_key);
2609
2610 ret = io_ctx.omap_rm_keys(oid, keys);
2611 if (ret < 0) {
2612 cerr << "error removing omap key " << pool_name << "/" << oid << "/"
2613 << omap_key_pretty << ": " << cpp_strerror(ret) << std::endl;
2614 goto out;
2615 } else {
2616 ret = 0;
2617 }
2618 } else if (strcmp(nargs[0], "listomapvals") == 0) {
2619 if (!pool_name || nargs.size() < 2)
2620 usage_exit();
2621
2622 string oid(nargs[1]);
2623 string last_read = "";
2624 int MAX_READ = 512;
2625 do {
2626 map<string, bufferlist> values;
2627 ret = io_ctx.omap_get_vals(oid, last_read, MAX_READ, &values);
2628 if (ret < 0) {
2629 cerr << "error getting omap keys " << pool_name << "/" << oid << ": "
2630 << cpp_strerror(ret) << std::endl;
2631 return 1;
2632 }
2633 ret = values.size();
2634 for (map<string, bufferlist>::const_iterator it = values.begin();
2635 it != values.end(); ++it) {
2636 last_read = it->first;
2637 // dump key in hex if it contains nonprintable characters
2638 if (std::count_if(it->first.begin(), it->first.end(),
2639 (int (*)(int))isprint) < (int)it->first.length()) {
2640 cout << "key (" << it->first.length() << " bytes):\n";
2641 bufferlist keybl;
2642 keybl.append(it->first);
2643 keybl.hexdump(cout);
2644 } else {
2645 cout << it->first;
2646 }
2647 cout << std::endl;
2648 cout << "value (" << it->second.length() << " bytes) :\n";
2649 it->second.hexdump(cout);
2650 cout << std::endl;
2651 }
2652 } while (ret == MAX_READ);
2653 ret = 0;
2654 }
2655 else if (strcmp(nargs[0], "cp") == 0) {
2656 if (!pool_name)
2657 usage_exit();
2658
2659 if (nargs.size() < 2 || nargs.size() > 3)
2660 usage_exit();
2661
2662 const char *target = target_pool_name;
2663 if (!target)
2664 target = pool_name;
2665
2666 const char *target_obj;
2667 if (nargs.size() < 3) {
2668 if (strcmp(target, pool_name) == 0) {
2669 cerr << "cannot copy object into itself" << std::endl;
2670 ret = -1;
2671 goto out;
2672 }
2673 target_obj = nargs[1];
2674 } else {
2675 target_obj = nargs[2];
2676 }
2677
2678 // open io context.
2679 IoCtx target_ctx;
2680 ret = rados.ioctx_create(target, target_ctx);
2681 if (ret < 0) {
2682 cerr << "error opening target pool " << target << ": "
2683 << cpp_strerror(ret) << std::endl;
2684 goto out;
2685 }
2686 if (target_oloc.size()) {
2687 target_ctx.locator_set_key(target_oloc);
2688 }
2689 if (target_nspace.size()) {
2690 target_ctx.set_namespace(target_nspace);
2691 }
2692
2693 ret = do_copy(io_ctx, nargs[1], target_ctx, target_obj);
2694 if (ret < 0) {
2695 cerr << "error copying " << pool_name << "/" << nargs[1] << " => " << target << "/" << target_obj << ": " << cpp_strerror(ret) << std::endl;
2696 goto out;
2697 }
2698 } else if (strcmp(nargs[0], "rm") == 0) {
2699 if (!pool_name || nargs.size() < 2)
2700 usage_exit();
2701 vector<const char *>::iterator iter = nargs.begin();
2702 ++iter;
2703 for (; iter != nargs.end(); ++iter) {
2704 const string & oid = *iter;
2705 if (use_striper) {
2706 if (forcefull) {
2707 ret = striper.remove(oid, CEPH_OSD_FLAG_FULL_FORCE);
2708 } else {
2709 ret = striper.remove(oid);
2710 }
2711 } else {
2712 if (forcefull) {
2713 ret = io_ctx.remove(oid, CEPH_OSD_FLAG_FULL_FORCE);
2714 } else {
2715 ret = io_ctx.remove(oid);
2716 }
2717 }
2718 if (ret < 0) {
2719 string name = (nspace.size() ? nspace + "/" : "" ) + oid;
2720 cerr << "error removing " << pool_name << ">" << name << ": " << cpp_strerror(ret) << std::endl;
2721 goto out;
2722 }
2723 }
2724 }
2725 else if (strcmp(nargs[0], "create") == 0) {
2726 if (!pool_name || nargs.size() < 2)
2727 usage_exit();
2728 string oid(nargs[1]);
2729 ret = io_ctx.create(oid, true);
2730 if (ret < 0) {
2731 cerr << "error creating " << pool_name << "/" << oid << ": " << cpp_strerror(ret) << std::endl;
2732 goto out;
2733 }
2734 }
2735
2736 else if (strcmp(nargs[0], "tmap") == 0) {
2737 if (nargs.size() < 3)
2738 usage_exit();
2739 if (strcmp(nargs[1], "dump") == 0) {
2740 bufferlist outdata;
2741 string oid(nargs[2]);
2742 ret = io_ctx.read(oid, outdata, 0, 0);
2743 if (ret < 0) {
2744 cerr << "error reading " << pool_name << "/" << oid << ": " << cpp_strerror(ret) << std::endl;
2745 goto out;
2746 }
2747 bufferlist::iterator p = outdata.begin();
2748 bufferlist header;
2749 map<string, bufferlist> kv;
2750 try {
2751 ::decode(header, p);
2752 ::decode(kv, p);
2753 }
2754 catch (buffer::error& e) {
2755 cerr << "error decoding tmap " << pool_name << "/" << oid << std::endl;
2756 ret = -EINVAL;
2757 goto out;
2758 }
2759 cout << "header (" << header.length() << " bytes):\n";
2760 header.hexdump(cout);
2761 cout << "\n";
2762 cout << kv.size() << " keys\n";
2763 for (map<string,bufferlist>::iterator q = kv.begin(); q != kv.end(); ++q) {
2764 cout << "key '" << q->first << "' (" << q->second.length() << " bytes):\n";
2765 q->second.hexdump(cout);
2766 cout << "\n";
2767 }
2768 }
2769 else if (strcmp(nargs[1], "set") == 0 ||
2770 strcmp(nargs[1], "create") == 0) {
2771 if (nargs.size() < 5)
2772 usage_exit();
2773 string oid(nargs[2]);
2774 string k(nargs[3]);
2775 string v(nargs[4]);
2776 bufferlist bl;
2777 char c = (strcmp(nargs[1], "set") == 0) ? CEPH_OSD_TMAP_SET : CEPH_OSD_TMAP_CREATE;
2778 ::encode(c, bl);
2779 ::encode(k, bl);
2780 ::encode(v, bl);
2781 ret = io_ctx.tmap_update(oid, bl);
2782 }
2783 }
2784
2785 else if (strcmp(nargs[0], "tmap-to-omap") == 0) {
2786 if (!pool_name || nargs.size() < 2)
2787 usage_exit();
2788 string oid(nargs[1]);
2789
2790 bufferlist bl;
2791 int r = io_ctx.tmap_get(oid, bl);
2792 if (r < 0) {
2793 ret = r;
2794 cerr << "error reading tmap " << pool_name << "/" << oid
2795 << ": " << cpp_strerror(ret) << std::endl;
2796 goto out;
2797 }
2798 bufferlist hdr;
2799 map<string, bufferlist> kv;
2800 bufferlist::iterator p = bl.begin();
2801 try {
2802 ::decode(hdr, p);
2803 ::decode(kv, p);
2804 }
2805 catch (buffer::error& e) {
2806 cerr << "error decoding tmap " << pool_name << "/" << oid << std::endl;
2807 ret = -EINVAL;
2808 goto out;
2809 }
2810 if (!p.end()) {
2811 cerr << "error decoding tmap (stray trailing data) in " << pool_name << "/" << oid << std::endl;
2812 ret = -EINVAL;
2813 goto out;
2814 }
2815 librados::ObjectWriteOperation wr;
2816 wr.omap_set_header(hdr);
2817 wr.omap_set(kv);
2818 wr.truncate(0); // delete the old tmap data
2819 r = io_ctx.operate(oid, &wr);
2820 if (r < 0) {
2821 ret = r;
2822 cerr << "error writing tmap data as omap on " << pool_name << "/" << oid
2823 << ": " << cpp_strerror(ret) << std::endl;
2824 goto out;
2825 }
2826 ret = 0;
2827 }
2828
2829 else if (strcmp(nargs[0], "mkpool") == 0) {
2830 int auid = 0;
2831 __u8 crush_rule = 0;
2832 if (nargs.size() < 2)
2833 usage_exit();
2834 if (nargs.size() > 2) {
2835 char* endptr = NULL;
2836 auid = strtol(nargs[2], &endptr, 10);
2837 if (*endptr) {
2838 cerr << "Invalid value for auid: '" << nargs[2] << "'" << std::endl;
2839 ret = -EINVAL;
2840 goto out;
2841 }
2842 cerr << "setting auid:" << auid << std::endl;
2843 if (nargs.size() > 3) {
2844 crush_rule = (__u8)strtol(nargs[3], &endptr, 10);
2845 if (*endptr) {
2846 cerr << "Invalid value for crush-rule: '" << nargs[3] << "'" << std::endl;
2847 ret = -EINVAL;
2848 goto out;
2849 }
2850 cerr << "using crush rule " << (int)crush_rule << std::endl;
2851 }
2852 }
2853 ret = rados.pool_create(nargs[1], auid, crush_rule);
2854 if (ret < 0) {
2855 cerr << "error creating pool " << nargs[1] << ": "
2856 << cpp_strerror(ret) << std::endl;
2857 goto out;
2858 }
2859 cout << "successfully created pool " << nargs[1] << std::endl;
2860 }
2861 else if (strcmp(nargs[0], "cppool") == 0) {
2862 bool force = nargs.size() == 4 && !strcmp(nargs[3], "--yes-i-really-mean-it");
2863 if (nargs.size() != 3 && !(nargs.size() == 4 && force))
2864 usage_exit();
2865 const char *src_pool = nargs[1];
2866 const char *target_pool = nargs[2];
2867
2868 if (strcmp(src_pool, target_pool) == 0) {
2869 cerr << "cannot copy pool into itself" << std::endl;
2870 ret = -1;
2871 goto out;
2872 }
2873
2874 cerr << "WARNING: pool copy does not preserve user_version, which some "
2875 << " apps may rely on." << std::endl;
2876
2877 if (rados.get_pool_is_selfmanaged_snaps_mode(src_pool)) {
2878 cerr << "WARNING: pool " << src_pool << " has selfmanaged snaps, which are not preserved\n"
2879 << " by the cppool operation. This will break any snapshot user."
2880 << std::endl;
2881 if (!force) {
2882 cerr << " If you insist on making a broken copy, you can pass\n"
2883 << " --yes-i-really-mean-it to proceed anyway."
2884 << std::endl;
2885 exit(1);
2886 }
2887 }
2888
2889 ret = do_copy_pool(rados, src_pool, target_pool);
2890 if (ret < 0) {
2891 cerr << "error copying pool " << src_pool << " => " << target_pool << ": "
2892 << cpp_strerror(ret) << std::endl;
2893 goto out;
2894 }
2895 cout << "successfully copied pool " << nargs[1] << std::endl;
2896 }
2897 else if (strcmp(nargs[0], "rmpool") == 0) {
2898 if (nargs.size() < 2)
2899 usage_exit();
2900 if (nargs.size() < 4 ||
2901 strcmp(nargs[1], nargs[2]) != 0 ||
2902 strcmp(nargs[3], "--yes-i-really-really-mean-it") != 0) {
2903 cerr << "WARNING:\n"
2904 << " This will PERMANENTLY DESTROY an entire pool of objects with no way back.\n"
2905 << " To confirm, pass the pool to remove twice, followed by\n"
2906 << " --yes-i-really-really-mean-it" << std::endl;
2907 ret = -1;
2908 goto out;
2909 }
2910 ret = rados.pool_delete(nargs[1]);
2911 if (ret >= 0) {
2912 cout << "successfully deleted pool " << nargs[1] << std::endl;
2913 } else { //error
2914 cerr << "pool " << nargs[1] << " could not be removed" << std::endl;
2915 cerr << "Check your monitor configuration - `mon allow pool delete` is set to false by default,"
2916 << " change it to true to allow deletion of pools" << std::endl;
2917 }
2918 }
2919 else if (strcmp(nargs[0], "purge") == 0) {
2920 if (nargs.size() < 2)
2921 usage_exit();
2922 if (nargs.size() < 3 ||
2923 strcmp(nargs[2], "--yes-i-really-really-mean-it") != 0) {
2924 cerr << "WARNING:\n"
2925 << " This will PERMANENTLY DESTROY all objects from a pool with no way back.\n"
2926 << " To confirm, follow pool with --yes-i-really-really-mean-it" << std::endl;
2927 ret = -1;
2928 goto out;
2929 }
2930 ret = rados.ioctx_create(nargs[1], io_ctx);
2931 if (ret < 0) {
2932 cerr << "error pool " << nargs[1] << ": "
2933 << cpp_strerror(ret) << std::endl;
2934 goto out;
2935 }
2936 io_ctx.set_namespace(all_nspaces);
2937 io_ctx.set_osdmap_full_try();
2938 RadosBencher bencher(g_ceph_context, rados, io_ctx);
2939 ret = bencher.clean_up_slow("", concurrent_ios);
2940 if (ret >= 0) {
2941 cout << "successfully purged pool " << nargs[1] << std::endl;
2942 } else { //error
2943 cerr << "pool " << nargs[1] << " could not be purged" << std::endl;
2944 cerr << "Check your monitor configuration - `mon allow pool delete` is set to false by default,"
2945 << " change it to true to allow deletion of pools" << std::endl;
2946 }
2947 }
2948 else if (strcmp(nargs[0], "lssnap") == 0) {
2949 if (!pool_name || nargs.size() != 1)
2950 usage_exit();
2951
2952 vector<snap_t> snaps;
2953 io_ctx.snap_list(&snaps);
2954 for (vector<snap_t>::iterator i = snaps.begin();
2955 i != snaps.end();
2956 ++i) {
2957 string s;
2958 time_t t;
2959 if (io_ctx.snap_get_name(*i, &s) < 0)
2960 continue;
2961 if (io_ctx.snap_get_stamp(*i, &t) < 0)
2962 continue;
2963 struct tm bdt;
2964 localtime_r(&t, &bdt);
2965 cout << *i << "\t" << s << "\t";
2966
2967 std::ios_base::fmtflags original_flags = cout.flags();
2968 cout.setf(std::ios::right);
2969 cout.fill('0');
2970 cout << std::setw(4) << (bdt.tm_year+1900)
2971 << '.' << std::setw(2) << (bdt.tm_mon+1)
2972 << '.' << std::setw(2) << bdt.tm_mday
2973 << ' '
2974 << std::setw(2) << bdt.tm_hour
2975 << ':' << std::setw(2) << bdt.tm_min
2976 << ':' << std::setw(2) << bdt.tm_sec
2977 << std::endl;
2978 cout.flags(original_flags);
2979 }
2980 cout << snaps.size() << " snaps" << std::endl;
2981 }
2982
2983 else if (strcmp(nargs[0], "mksnap") == 0) {
2984 if (!pool_name || nargs.size() < 2)
2985 usage_exit();
2986
2987 ret = io_ctx.snap_create(nargs[1]);
2988 if (ret < 0) {
2989 cerr << "error creating pool " << pool_name << " snapshot " << nargs[1]
2990 << ": " << cpp_strerror(ret) << std::endl;
2991 goto out;
2992 }
2993 cout << "created pool " << pool_name << " snap " << nargs[1] << std::endl;
2994 }
2995
2996 else if (strcmp(nargs[0], "rmsnap") == 0) {
2997 if (!pool_name || nargs.size() < 2)
2998 usage_exit();
2999
3000 ret = io_ctx.snap_remove(nargs[1]);
3001 if (ret < 0) {
3002 cerr << "error removing pool " << pool_name << " snapshot " << nargs[1]
3003 << ": " << cpp_strerror(ret) << std::endl;
3004 goto out;
3005 }
3006 cout << "removed pool " << pool_name << " snap " << nargs[1] << std::endl;
3007 }
3008
3009 else if (strcmp(nargs[0], "rollback") == 0) {
3010 if (!pool_name || nargs.size() < 3)
3011 usage_exit();
3012
3013 ret = io_ctx.snap_rollback(nargs[1], nargs[2]);
3014 if (ret < 0) {
3015 cerr << "error rolling back pool " << pool_name << " to snapshot " << nargs[1]
3016 << cpp_strerror(ret) << std::endl;
3017 goto out;
3018 }
3019 cout << "rolled back pool " << pool_name
3020 << " to snapshot " << nargs[2] << std::endl;
3021 }
3022 else if (strcmp(nargs[0], "bench") == 0) {
3023 if (!pool_name || nargs.size() < 3)
3024 usage_exit();
3025 char* endptr = NULL;
3026 int seconds = strtol(nargs[1], &endptr, 10);
3027 if (*endptr) {
3028 cerr << "Invalid value for seconds: '" << nargs[1] << "'" << std::endl;
3029 ret = -EINVAL;
3030 goto out;
3031 }
3032 int operation = 0;
3033 if (strcmp(nargs[2], "write") == 0)
3034 operation = OP_WRITE;
3035 else if (strcmp(nargs[2], "seq") == 0)
3036 operation = OP_SEQ_READ;
3037 else if (strcmp(nargs[2], "rand") == 0)
3038 operation = OP_RAND_READ;
3039 else
3040 usage_exit();
3041 if (operation != OP_WRITE) {
3042 if (block_size_specified) {
3043 cerr << "-b|--block_size option can be used only with 'write' bench test"
3044 << std::endl;
3045 ret = -EINVAL;
3046 goto out;
3047 }
3048 if (bench_write_dest != 0) {
3049 cerr << "--write-object, --write-omap and --write-xattr options can "
3050 "only be used with the 'write' bench test"
3051 << std::endl;
3052 ret = -EINVAL;
3053 goto out;
3054 }
3055 }
3056 else if (bench_write_dest == 0) {
3057 bench_write_dest = OP_WRITE_DEST_OBJ;
3058 }
3059
3060 if (!formatter && output) {
3061 cerr << "-o|--output option can only be used with '--format' option"
3062 << std::endl;
3063 ret = -EINVAL;
3064 goto out;
3065 }
3066 RadosBencher bencher(g_ceph_context, rados, io_ctx);
3067 bencher.set_show_time(show_time);
3068 bencher.set_write_destination(static_cast<OpWriteDest>(bench_write_dest));
3069
3070 ostream *outstream = NULL;
3071 if (formatter) {
3072 bencher.set_formatter(formatter);
3073 if (output)
3074 outstream = new ofstream(output);
3075 else
3076 outstream = &cout;
3077 bencher.set_outstream(*outstream);
3078 }
3079 if (!object_size)
3080 object_size = op_size;
3081 else if (object_size < op_size)
3082 op_size = object_size;
3083 cout << "hints = " << (int)hints << std::endl;
3084 ret = bencher.aio_bench(operation, seconds,
3085 concurrent_ios, op_size, object_size,
3086 max_objects, cleanup, hints, run_name, no_verify);
3087 if (ret != 0)
3088 cerr << "error during benchmark: " << cpp_strerror(ret) << std::endl;
3089 if (formatter && output)
3090 delete outstream;
3091 }
3092 else if (strcmp(nargs[0], "cleanup") == 0) {
3093 if (!pool_name)
3094 usage_exit();
3095 if (wildcard)
3096 io_ctx.set_namespace(all_nspaces);
3097 RadosBencher bencher(g_ceph_context, rados, io_ctx);
3098 ret = bencher.clean_up(prefix, concurrent_ios, run_name);
3099 if (ret != 0)
3100 cerr << "error during cleanup: " << cpp_strerror(ret) << std::endl;
3101 }
3102 else if (strcmp(nargs[0], "watch") == 0) {
3103 if (!pool_name || nargs.size() < 2)
3104 usage_exit();
3105 string oid(nargs[1]);
3106 RadosWatchCtx ctx(io_ctx, oid.c_str());
3107 uint64_t cookie;
3108 ret = io_ctx.watch2(oid, &cookie, &ctx);
3109 if (ret != 0)
3110 cerr << "error calling watch: " << cpp_strerror(ret) << std::endl;
3111 else {
3112 cout << "press enter to exit..." << std::endl;
3113 getchar();
3114 io_ctx.unwatch2(cookie);
3115 rados.watch_flush();
3116 }
3117 }
3118 else if (strcmp(nargs[0], "notify") == 0) {
3119 if (!pool_name || nargs.size() < 3)
3120 usage_exit();
3121 string oid(nargs[1]);
3122 string msg(nargs[2]);
3123 bufferlist bl, replybl;
3124 ::encode(msg, bl);
3125 ret = io_ctx.notify2(oid, bl, 10000, &replybl);
3126 if (ret != 0)
3127 cerr << "error calling notify: " << cpp_strerror(ret) << std::endl;
3128 if (replybl.length()) {
3129 map<pair<uint64_t,uint64_t>,bufferlist> rm;
3130 set<pair<uint64_t,uint64_t> > missed;
3131 bufferlist::iterator p = replybl.begin();
3132 ::decode(rm, p);
3133 ::decode(missed, p);
3134 for (map<pair<uint64_t,uint64_t>,bufferlist>::iterator p = rm.begin();
3135 p != rm.end();
3136 ++p) {
3137 cout << "reply client." << p->first.first
3138 << " cookie " << p->first.second
3139 << " : " << p->second.length() << " bytes" << std::endl;
3140 if (p->second.length())
3141 p->second.hexdump(cout);
3142 }
3143 for (multiset<pair<uint64_t,uint64_t> >::iterator p = missed.begin();
3144 p != missed.end(); ++p) {
3145 cout << "timeout client." << p->first
3146 << " cookie " << p->second << std::endl;
3147 }
3148 }
3149 } else if (strcmp(nargs[0], "set-alloc-hint") == 0) {
3150 if (!pool_name || nargs.size() < 4)
3151 usage_exit();
3152 string err;
3153 string oid(nargs[1]);
3154 uint64_t expected_object_size = strict_strtoll(nargs[2], 10, &err);
3155 if (!err.empty()) {
3156 cerr << "couldn't parse expected_object_size: " << err << std::endl;
3157 usage_exit();
3158 }
3159 uint64_t expected_write_size = strict_strtoll(nargs[3], 10, &err);
3160 if (!err.empty()) {
3161 cerr << "couldn't parse expected_write_size: " << err << std::endl;
3162 usage_exit();
3163 }
3164 ret = io_ctx.set_alloc_hint(oid, expected_object_size, expected_write_size);
3165 if (ret < 0) {
3166 cerr << "error setting alloc-hint " << pool_name << "/" << oid << ": "
3167 << cpp_strerror(ret) << std::endl;
3168 goto out;
3169 }
3170 } else if (strcmp(nargs[0], "load-gen") == 0) {
3171 if (!pool_name) {
3172 cerr << "error: must specify pool" << std::endl;
3173 usage_exit();
3174 }
3175 LoadGen lg(&rados);
3176 if (min_obj_len)
3177 lg.min_obj_len = min_obj_len;
3178 if (max_obj_len)
3179 lg.max_obj_len = max_obj_len;
3180 if (min_op_len)
3181 lg.min_op_len = min_op_len;
3182 if (max_op_len)
3183 lg.max_op_len = max_op_len;
3184 if (max_ops)
3185 lg.max_ops = max_ops;
3186 if (max_backlog)
3187 lg.max_backlog = max_backlog;
3188 if (target_throughput)
3189 lg.target_throughput = target_throughput << 20;
3190 if (read_percent >= 0)
3191 lg.read_percent = read_percent;
3192 if (num_objs)
3193 lg.num_objs = num_objs;
3194 if (run_length)
3195 lg.run_length = run_length;
3196
3197 cout << "run length " << run_length << " seconds" << std::endl;
3198 cout << "preparing " << lg.num_objs << " objects" << std::endl;
3199 ret = lg.bootstrap(pool_name);
3200 if (ret < 0) {
3201 cerr << "load-gen bootstrap failed" << std::endl;
3202 exit(1);
3203 }
3204 cout << "load-gen will run " << lg.run_length << " seconds" << std::endl;
3205 lg.run();
3206 lg.cleanup();
3207 } else if (strcmp(nargs[0], "listomapkeys") == 0) {
3208 if (!pool_name || nargs.size() < 2)
3209 usage_exit();
3210
3211 set<string> out_keys;
3212 ret = io_ctx.omap_get_keys(nargs[1], "", LONG_MAX, &out_keys);
3213 if (ret < 0) {
3214 cerr << "error getting omap key set " << pool_name << "/"
3215 << nargs[1] << ": " << cpp_strerror(ret) << std::endl;
3216 goto out;
3217 }
3218
3219 for (set<string>::iterator iter = out_keys.begin();
3220 iter != out_keys.end(); ++iter) {
3221 cout << *iter << std::endl;
3222 }
3223 } else if (strcmp(nargs[0], "lock") == 0) {
3224 if (!pool_name)
3225 usage_exit();
3226
3227 if (!formatter) {
3228 formatter = new JSONFormatter(pretty_format);
3229 }
3230 ret = do_lock_cmd(nargs, opts, &io_ctx, formatter);
3231 } else if (strcmp(nargs[0], "listwatchers") == 0) {
3232 if (!pool_name || nargs.size() < 2)
3233 usage_exit();
3234
3235 string oid(nargs[1]);
3236 std::list<obj_watch_t> lw;
3237
3238 ret = io_ctx.list_watchers(oid, &lw);
3239 if (ret < 0) {
3240 cerr << "error listing watchers " << pool_name << "/" << oid << ": " << cpp_strerror(ret) << std::endl;
3241 goto out;
3242 }
3243 else
3244 ret = 0;
3245
3246 for (std::list<obj_watch_t>::iterator i = lw.begin(); i != lw.end(); ++i) {
3247 cout << "watcher=" << i->addr << " client." << i->watcher_id << " cookie=" << i->cookie << std::endl;
3248 }
3249 } else if (strcmp(nargs[0], "listsnaps") == 0) {
3250 if (!pool_name || nargs.size() < 2)
3251 usage_exit();
3252
3253 string oid(nargs[1]);
3254 snap_set_t ls;
3255
3256 io_ctx.snap_set_read(LIBRADOS_SNAP_DIR);
3257 ret = io_ctx.list_snaps(oid, &ls);
3258 if (ret < 0) {
3259 cerr << "error listing snap shots " << pool_name << "/" << oid << ": " << cpp_strerror(ret) << std::endl;
3260 goto out;
3261 }
3262 else
3263 ret = 0;
3264
3265 map<snap_t,string> snamemap;
3266 if (formatter || pretty_format) {
3267 vector<snap_t> snaps;
3268 io_ctx.snap_list(&snaps);
3269 for (vector<snap_t>::iterator i = snaps.begin();
3270 i != snaps.end(); ++i) {
3271 string s;
3272 if (io_ctx.snap_get_name(*i, &s) < 0)
3273 continue;
3274 snamemap.insert(pair<snap_t,string>(*i, s));
3275 }
3276 }
3277
3278 if (formatter) {
3279 formatter->open_object_section("object");
3280 formatter->dump_string("name", oid);
3281 formatter->open_array_section("clones");
3282 } else {
3283 cout << oid << ":" << std::endl;
3284 cout << "cloneid snaps size overlap" << std::endl;
3285 }
3286
3287 for (std::vector<clone_info_t>::iterator ci = ls.clones.begin();
3288 ci != ls.clones.end(); ++ci) {
3289
3290 if (formatter) formatter->open_object_section("clone");
3291
3292 if (ci->cloneid == librados::SNAP_HEAD) {
3293 if (formatter)
3294 formatter->dump_string("id", "head");
3295 else
3296 cout << "head";
3297 } else {
3298 if (formatter)
3299 formatter->dump_unsigned("id", ci->cloneid);
3300 else
3301 cout << ci->cloneid;
3302 }
3303
3304 if (formatter)
3305 formatter->open_array_section("snapshots");
3306 else
3307 cout << "\t";
3308
3309 if (!formatter && ci->snaps.empty()) {
3310 cout << "-";
3311 }
3312 for (std::vector<snap_t>::const_iterator snapindex = ci->snaps.begin();
3313 snapindex != ci->snaps.end(); ++snapindex) {
3314
3315 map<snap_t,string>::iterator si;
3316
3317 if (formatter || pretty_format) si = snamemap.find(*snapindex);
3318
3319 if (formatter) {
3320 formatter->open_object_section("snapshot");
3321 formatter->dump_unsigned("id", *snapindex);
3322 if (si != snamemap.end())
3323 formatter->dump_string("name", si->second);
3324 formatter->close_section(); //snapshot
3325 } else {
3326 if (snapindex != ci->snaps.begin()) cout << ",";
3327 if (!pretty_format || (si == snamemap.end()))
3328 cout << *snapindex;
3329 else
3330 cout << si->second << "(" << *snapindex << ")";
3331 }
3332 }
3333
3334 if (formatter) {
3335 formatter->close_section(); //Snapshots
3336 formatter->dump_unsigned("size", ci->size);
3337 } else {
3338 cout << "\t" << ci->size;
3339 }
3340
3341 if (ci->cloneid != librados::SNAP_HEAD) {
3342 if (formatter)
3343 formatter->open_array_section("overlaps");
3344 else
3345 cout << "\t[";
3346
3347 for (std::vector< std::pair<uint64_t,uint64_t> >::iterator ovi = ci->overlap.begin();
3348 ovi != ci->overlap.end(); ++ovi) {
3349 if (formatter) {
3350 formatter->open_object_section("section");
3351 formatter->dump_unsigned("start", ovi->first);
3352 formatter->dump_unsigned("length", ovi->second);
3353 formatter->close_section(); //section
3354 } else {
3355 if (ovi != ci->overlap.begin()) cout << ",";
3356 cout << ovi->first << "~" << ovi->second;
3357 }
3358 }
3359 if (formatter)
3360 formatter->close_section(); //overlaps
3361 else
3362 cout << "]" << std::endl;
3363 }
3364 if (formatter) formatter->close_section(); //clone
3365 }
3366 if (formatter) {
3367 formatter->close_section(); //clones
3368 formatter->close_section(); //object
3369 formatter->flush(cout);
3370 } else {
3371 cout << std::endl;
3372 }
3373 } else if (strcmp(nargs[0], "list-inconsistent-pg") == 0) {
3374 if (!formatter) {
3375 formatter = new JSONFormatter(pretty_format);
3376 }
3377 ret = do_get_inconsistent_pg_cmd(nargs, rados, *formatter);
3378 } else if (strcmp(nargs[0], "list-inconsistent-obj") == 0) {
3379 if (!formatter) {
3380 formatter = new JSONFormatter(pretty_format);
3381 }
3382 ret = do_get_inconsistent_cmd<inconsistent_obj_t>(nargs, rados, *formatter);
3383 } else if (strcmp(nargs[0], "list-inconsistent-snapset") == 0) {
3384 if (!formatter) {
3385 formatter = new JSONFormatter(pretty_format);
3386 }
3387 ret = do_get_inconsistent_cmd<inconsistent_snapset_t>(nargs, rados, *formatter);
3388 } else if (strcmp(nargs[0], "cache-flush") == 0) {
3389 if (!pool_name || nargs.size() < 2)
3390 usage_exit();
3391 string oid(nargs[1]);
3392 if (with_clones) {
3393 snap_set_t ls;
3394 io_ctx.snap_set_read(LIBRADOS_SNAP_DIR);
3395 ret = io_ctx.list_snaps(oid, &ls);
3396 if (ret < 0) {
3397 cerr << "error listing snapshots " << pool_name << "/" << oid << ": "
3398 << cpp_strerror(ret) << std::endl;
3399 goto out;
3400 }
3401 for (std::vector<clone_info_t>::iterator ci = ls.clones.begin();
3402 ci != ls.clones.end(); ++ci) {
3403 if (snapid != CEPH_NOSNAP && ci->cloneid > snapid)
3404 break;
3405 io_ctx.snap_set_read(ci->cloneid);
3406 ret = do_cache_flush(io_ctx, oid);
3407 if (ret < 0) {
3408 cerr << "error from cache-flush " << oid << ": "
3409 << cpp_strerror(ret) << std::endl;
3410 goto out;
3411 }
3412 }
3413 } else {
3414 ret = do_cache_flush(io_ctx, oid);
3415 if (ret < 0) {
3416 cerr << "error from cache-flush " << oid << ": "
3417 << cpp_strerror(ret) << std::endl;
3418 goto out;
3419 }
3420 }
3421 } else if (strcmp(nargs[0], "cache-try-flush") == 0) {
3422 if (!pool_name || nargs.size() < 2)
3423 usage_exit();
3424 string oid(nargs[1]);
3425 if (with_clones) {
3426 snap_set_t ls;
3427 io_ctx.snap_set_read(LIBRADOS_SNAP_DIR);
3428 ret = io_ctx.list_snaps(oid, &ls);
3429 if (ret < 0) {
3430 cerr << "error listing snapshots " << pool_name << "/" << oid << ": "
3431 << cpp_strerror(ret) << std::endl;
3432 goto out;
3433 }
3434 for (std::vector<clone_info_t>::iterator ci = ls.clones.begin();
3435 ci != ls.clones.end(); ++ci) {
3436 if (snapid != CEPH_NOSNAP && ci->cloneid > snapid)
3437 break;
3438 io_ctx.snap_set_read(ci->cloneid);
3439 ret = do_cache_try_flush(io_ctx, oid);
3440 if (ret < 0) {
3441 cerr << "error from cache-flush " << oid << ": "
3442 << cpp_strerror(ret) << std::endl;
3443 goto out;
3444 }
3445 }
3446 } else {
3447 ret = do_cache_try_flush(io_ctx, oid);
3448 if (ret < 0) {
3449 cerr << "error from cache-flush " << oid << ": "
3450 << cpp_strerror(ret) << std::endl;
3451 goto out;
3452 }
3453 }
3454 } else if (strcmp(nargs[0], "cache-evict") == 0) {
3455 if (!pool_name || nargs.size() < 2)
3456 usage_exit();
3457 string oid(nargs[1]);
3458 if (with_clones) {
3459 snap_set_t ls;
3460 io_ctx.snap_set_read(LIBRADOS_SNAP_DIR);
3461 ret = io_ctx.list_snaps(oid, &ls);
3462 if (ret < 0) {
3463 cerr << "error listing snapshots " << pool_name << "/" << oid << ": "
3464 << cpp_strerror(ret) << std::endl;
3465 goto out;
3466 }
3467 for (std::vector<clone_info_t>::iterator ci = ls.clones.begin();
3468 ci != ls.clones.end(); ++ci) {
3469 if (snapid != CEPH_NOSNAP && ci->cloneid > snapid)
3470 break;
3471 io_ctx.snap_set_read(ci->cloneid);
3472 ret = do_cache_evict(io_ctx, oid);
3473 if (ret < 0) {
3474 cerr << "error from cache-flush " << oid << ": "
3475 << cpp_strerror(ret) << std::endl;
3476 goto out;
3477 }
3478 }
3479 } else {
3480 ret = do_cache_evict(io_ctx, oid);
3481 if (ret < 0) {
3482 cerr << "error from cache-flush " << oid << ": "
3483 << cpp_strerror(ret) << std::endl;
3484 goto out;
3485 }
3486 }
3487 } else if (strcmp(nargs[0], "cache-flush-evict-all") == 0) {
3488 if (!pool_name)
3489 usage_exit();
3490 ret = do_cache_flush_evict_all(io_ctx, true);
3491 if (ret < 0) {
3492 cerr << "error from cache-flush-evict-all: "
3493 << cpp_strerror(ret) << std::endl;
3494 goto out;
3495 }
3496 } else if (strcmp(nargs[0], "cache-try-flush-evict-all") == 0) {
3497 if (!pool_name)
3498 usage_exit();
3499 ret = do_cache_flush_evict_all(io_ctx, false);
3500 if (ret < 0) {
3501 cerr << "error from cache-try-flush-evict-all: "
3502 << cpp_strerror(ret) << std::endl;
3503 goto out;
3504 }
3505 } else if (strcmp(nargs[0], "set-redirect") == 0) {
3506 if (!pool_name)
3507 usage_exit();
3508
3509 const char *target = target_pool_name;
3510 if (!target)
3511 target = pool_name;
3512
3513 const char *target_obj;
3514 if (nargs.size() < 3) {
3515 if (strcmp(target, pool_name) == 0) {
3516 cerr << "cannot copy object into itself" << std::endl;
3517 ret = -1;
3518 goto out;
3519 }
3520 target_obj = nargs[1];
3521 } else {
3522 target_obj = nargs[2];
3523 }
3524
3525 IoCtx target_ctx;
3526 ret = rados.ioctx_create(target, target_ctx);
3527 if (target_oloc.size()) {
3528 target_ctx.locator_set_key(target_oloc);
3529 }
3530 if (target_nspace.size()) {
3531 target_ctx.set_namespace(target_nspace);
3532 }
3533
3534 ObjectWriteOperation op;
3535 op.set_redirect(target_obj, target_ctx, 0);
3536 ret = io_ctx.operate(nargs[1], &op);
3537 if (ret < 0) {
3538 cerr << "error set-redirect " << pool_name << "/" << nargs[1] << " => " << target << "/" << target_obj << ": " << cpp_strerror(ret) << std::endl;
3539 goto out;
3540 }
3541 } else if (strcmp(nargs[0], "export") == 0) {
3542 // export [filename]
3543 if (!pool_name || nargs.size() > 2) {
3544 usage_exit();
3545 }
3546
3547 int file_fd;
3548 if (nargs.size() < 2 || std::string(nargs[1]) == "-") {
3549 file_fd = STDOUT_FILENO;
3550 } else {
3551 file_fd = open(nargs[1], O_WRONLY|O_CREAT|O_TRUNC, 0666);
3552 if (file_fd < 0) {
3553 cerr << "Error opening '" << nargs[1] << "': "
3554 << cpp_strerror(file_fd) << std::endl;
3555 ret = file_fd;
3556 goto out;
3557 }
3558 }
3559
3560 ret = PoolDump(file_fd).dump(&io_ctx);
3561
3562 if (file_fd != STDIN_FILENO) {
3563 VOID_TEMP_FAILURE_RETRY(::close(file_fd));
3564 }
3565
3566 if (ret < 0) {
3567 cerr << "error from export: "
3568 << cpp_strerror(ret) << std::endl;
3569 goto out;
3570 }
3571 } else if (strcmp(nargs[0], "import") == 0) {
3572 // import [--no-overwrite] [--dry-run] <filename | - >
3573 if (!pool_name || nargs.size() > 4 || nargs.size() < 2) {
3574 usage_exit();
3575 }
3576
3577 // Last arg is the filename
3578 std::string const filename = nargs[nargs.size() - 1];
3579
3580 // All other args may be flags
3581 bool dry_run = false;
3582 bool no_overwrite = false;
3583 for (unsigned i = 1; i < nargs.size() - 1; ++i) {
3584 std::string arg(nargs[i]);
3585
3586 if (arg == std::string("--no-overwrite")) {
3587 no_overwrite = true;
3588 } else if (arg == std::string("--dry-run")) {
3589 dry_run = true;
3590 } else {
3591 std::cerr << "Invalid argument '" << arg << "'" << std::endl;
3592 ret = -EINVAL;
3593 goto out;
3594 }
3595 }
3596
3597 int file_fd;
3598 if (filename == "-") {
3599 file_fd = STDIN_FILENO;
3600 } else {
3601 file_fd = open(filename.c_str(), O_RDONLY);
3602 if (file_fd < 0) {
3603 cerr << "Error opening '" << filename << "': "
3604 << cpp_strerror(file_fd) << std::endl;
3605 ret = file_fd;
3606 goto out;
3607 }
3608 }
3609
3610 ret = RadosImport(file_fd, 0, dry_run).import(io_ctx, no_overwrite);
3611
3612 if (file_fd != STDIN_FILENO) {
3613 VOID_TEMP_FAILURE_RETRY(::close(file_fd));
3614 }
3615
3616 if (ret < 0) {
3617 cerr << "error from import: "
3618 << cpp_strerror(ret) << std::endl;
3619 goto out;
3620 }
3621 } else {
3622 cerr << "unrecognized command " << nargs[0] << "; -h or --help for usage" << std::endl;
3623 ret = -EINVAL;
3624 goto out;
3625 }
3626
3627 if (ret < 0)
3628 cerr << "error " << (-ret) << ": " << cpp_strerror(ret) << std::endl;
3629
3630 out:
3631 delete formatter;
3632 return (ret < 0) ? 1 : 0;
3633 }
3634
3635 int main(int argc, const char **argv)
3636 {
3637 vector<const char*> args;
3638 argv_to_vec(argc, argv, args);
3639 env_to_vec(args);
3640
3641 std::map < std::string, std::string > opts;
3642 std::string val;
3643
3644 // Necessary to support usage of -f for formatting,
3645 // since global_init will remove the -f using ceph
3646 // argparse procedures.
3647 for (auto j = args.begin(); j != args.end(); ++j) {
3648 if (strcmp(*j, "--") == 0) {
3649 break;
3650 } else if ((j+1) == args.end()) {
3651 // This can't be a formatting call (no format arg)
3652 break;
3653 } else if (strcmp(*j, "-f") == 0) {
3654 val = *(j+1);
3655 unique_ptr<Formatter> formatter(Formatter::create(val.c_str()));
3656
3657 if (formatter) {
3658 j = args.erase(j);
3659 opts["format"] = val;
3660
3661 j = args.erase(j);
3662 break;
3663 }
3664 }
3665 }
3666
3667 auto cct = global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT,
3668 CODE_ENVIRONMENT_UTILITY, 0);
3669 common_init_finish(g_ceph_context);
3670
3671 std::vector<const char*>::iterator i;
3672 for (i = args.begin(); i != args.end(); ) {
3673 if (ceph_argparse_double_dash(args, i)) {
3674 break;
3675 } else if (ceph_argparse_flag(args, i, "-h", "--help", (char*)NULL)) {
3676 usage(cout);
3677 exit(0);
3678 } else if (ceph_argparse_flag(args, i, "-f", "--force", (char*)NULL)) {
3679 opts["force"] = "true";
3680 } else if (ceph_argparse_flag(args, i, "--force-full", (char*)NULL)) {
3681 opts["force-full"] = "true";
3682 } else if (ceph_argparse_flag(args, i, "-d", "--delete-after", (char*)NULL)) {
3683 opts["delete-after"] = "true";
3684 } else if (ceph_argparse_flag(args, i, "-C", "--create", "--create-pool",
3685 (char*)NULL)) {
3686 opts["create"] = "true";
3687 } else if (ceph_argparse_flag(args, i, "--pretty-format", (char*)NULL)) {
3688 opts["pretty-format"] = "true";
3689 } else if (ceph_argparse_flag(args, i, "--show-time", (char*)NULL)) {
3690 opts["show-time"] = "true";
3691 } else if (ceph_argparse_flag(args, i, "--no-cleanup", (char*)NULL)) {
3692 opts["no-cleanup"] = "true";
3693 } else if (ceph_argparse_flag(args, i, "--no-hints", (char*)NULL)) {
3694 opts["no-hints"] = "true";
3695 } else if (ceph_argparse_flag(args, i, "--no-verify", (char*)NULL)) {
3696 opts["no-verify"] = "true";
3697 } else if (ceph_argparse_witharg(args, i, &val, "--run-name", (char*)NULL)) {
3698 opts["run-name"] = val;
3699 } else if (ceph_argparse_witharg(args, i, &val, "--prefix", (char*)NULL)) {
3700 opts["prefix"] = val;
3701 } else if (ceph_argparse_witharg(args, i, &val, "-p", "--pool", (char*)NULL)) {
3702 opts["pool"] = val;
3703 } else if (ceph_argparse_witharg(args, i, &val, "--target-pool", (char*)NULL)) {
3704 opts["target_pool"] = val;
3705 } else if (ceph_argparse_witharg(args, i, &val, "--object-locator" , (char *)NULL)) {
3706 opts["object_locator"] = val;
3707 } else if (ceph_argparse_witharg(args, i, &val, "--target-locator" , (char *)NULL)) {
3708 opts["target_locator"] = val;
3709 } else if (ceph_argparse_witharg(args, i, &val, "--target-nspace" , (char *)NULL)) {
3710 opts["target_nspace"] = val;
3711 } else if (ceph_argparse_flag(args, i, "--striper" , (char *)NULL)) {
3712 opts["striper"] = "true";
3713 } else if (ceph_argparse_witharg(args, i, &val, "-t", "--concurrent-ios", (char*)NULL)) {
3714 opts["concurrent-ios"] = val;
3715 } else if (ceph_argparse_witharg(args, i, &val, "--block-size", (char*)NULL)) {
3716 opts["block-size"] = val;
3717 } else if (ceph_argparse_witharg(args, i, &val, "-b", (char*)NULL)) {
3718 opts["block-size"] = val;
3719 } else if (ceph_argparse_witharg(args, i, &val, "--object-size", (char*)NULL)) {
3720 opts["object-size"] = val;
3721 } else if (ceph_argparse_witharg(args, i, &val, "--max-objects", (char*)NULL)) {
3722 opts["max-objects"] = val;
3723 } else if (ceph_argparse_witharg(args, i, &val, "--offset", (char*)NULL)) {
3724 opts["offset"] = val;
3725 } else if (ceph_argparse_witharg(args, i, &val, "-o", (char*)NULL)) {
3726 opts["object-size"] = val;
3727 } else if (ceph_argparse_witharg(args, i, &val, "-s", "--snap", (char*)NULL)) {
3728 opts["snap"] = val;
3729 } else if (ceph_argparse_witharg(args, i, &val, "-S", "--snapid", (char*)NULL)) {
3730 opts["snapid"] = val;
3731 } else if (ceph_argparse_witharg(args, i, &val, "--min-object-size", (char*)NULL)) {
3732 opts["min-object-size"] = val;
3733 } else if (ceph_argparse_witharg(args, i, &val, "--max-object-size", (char*)NULL)) {
3734 opts["max-object-size"] = val;
3735 } else if (ceph_argparse_witharg(args, i, &val, "--min-op-len", (char*)NULL)) {
3736 opts["min-op-len"] = val;
3737 } else if (ceph_argparse_witharg(args, i, &val, "--max-op-len", (char*)NULL)) {
3738 opts["max-op-len"] = val;
3739 } else if (ceph_argparse_witharg(args, i, &val, "--max-ops", (char*)NULL)) {
3740 opts["max-ops"] = val;
3741 } else if (ceph_argparse_witharg(args, i, &val, "--max-backlog", (char*)NULL)) {
3742 opts["max-backlog"] = val;
3743 } else if (ceph_argparse_witharg(args, i, &val, "--target-throughput", (char*)NULL)) {
3744 opts["target-throughput"] = val;
3745 } else if (ceph_argparse_witharg(args, i, &val, "--read-percent", (char*)NULL)) {
3746 opts["read-percent"] = val;
3747 } else if (ceph_argparse_witharg(args, i, &val, "--num-objects", (char*)NULL)) {
3748 opts["num-objects"] = val;
3749 } else if (ceph_argparse_witharg(args, i, &val, "--run-length", (char*)NULL)) {
3750 opts["run-length"] = val;
3751 } else if (ceph_argparse_witharg(args, i, &val, "--workers", (char*)NULL)) {
3752 opts["workers"] = val;
3753 } else if (ceph_argparse_witharg(args, i, &val, "--format", (char*)NULL)) {
3754 opts["format"] = val;
3755 } else if (ceph_argparse_witharg(args, i, &val, "--lock-tag", (char*)NULL)) {
3756 opts["lock-tag"] = val;
3757 } else if (ceph_argparse_witharg(args, i, &val, "--lock-cookie", (char*)NULL)) {
3758 opts["lock-cookie"] = val;
3759 } else if (ceph_argparse_witharg(args, i, &val, "--lock-description", (char*)NULL)) {
3760 opts["lock-description"] = val;
3761 } else if (ceph_argparse_witharg(args, i, &val, "--lock-duration", (char*)NULL)) {
3762 opts["lock-duration"] = val;
3763 } else if (ceph_argparse_witharg(args, i, &val, "--lock-type", (char*)NULL)) {
3764 opts["lock-type"] = val;
3765 } else if (ceph_argparse_witharg(args, i, &val, "-N", "--namespace", (char*)NULL)) {
3766 opts["namespace"] = val;
3767 } else if (ceph_argparse_flag(args, i, "--all", (char*)NULL)) {
3768 opts["all"] = "true";
3769 } else if (ceph_argparse_flag(args, i, "--default", (char*)NULL)) {
3770 opts["default"] = "true";
3771 } else if (ceph_argparse_witharg(args, i, &val, "-o", "--output", (char*)NULL)) {
3772 opts["output"] = val;
3773 } else if (ceph_argparse_flag(args, i, "--write-omap", (char*)NULL)) {
3774 opts["write-dest-omap"] = "true";
3775 } else if (ceph_argparse_flag(args, i, "--write-object", (char*)NULL)) {
3776 opts["write-dest-obj"] = "true";
3777 } else if (ceph_argparse_flag(args, i, "--write-xattr", (char*)NULL)) {
3778 opts["write-dest-xattr"] = "true";
3779 } else if (ceph_argparse_flag(args, i, "--with-clones", (char*)NULL)) {
3780 opts["with-clones"] = "true";
3781 } else if (ceph_argparse_witharg(args, i, &val, "--omap-key-file", (char*)NULL)) {
3782 opts["omap-key-file"] = val;
3783 } else {
3784 if (val[0] == '-')
3785 usage_exit();
3786 ++i;
3787 }
3788 }
3789
3790 if (args.empty()) {
3791 cerr << "rados: you must give an action. Try --help" << std::endl;
3792 return 1;
3793 }
3794
3795 return rados_tool_common(opts, args);
3796 }