]> git.proxmox.com Git - ceph.git/blob - ceph/src/tools/rados/rados.cc
import 12.2.13 release
[ceph.git] / ceph / src / tools / rados / rados.cc
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3 /*
4 * Ceph - scalable distributed file system
5 *
6 * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
7 *
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
12 *
13 */
14
15 #include "include/types.h"
16
17 #include "include/rados/librados.hpp"
18 #include "include/rados/rados_types.hpp"
19 #include "include/radosstriper/libradosstriper.hpp"
20 using namespace libradosstriper;
21
22 #include "common/config.h"
23 #include "common/ceph_argparse.h"
24 #include "global/global_init.h"
25 #include "common/Cond.h"
26 #include "common/debug.h"
27 #include "common/errno.h"
28 #include "common/Formatter.h"
29 #include "common/obj_bencher.h"
30 #include "common/TextTable.h"
31 #include "include/stringify.h"
32 #include "mds/inode_backtrace.h"
33 #include "auth/Crypto.h"
34 #include <iostream>
35 #include <fstream>
36
37 #include <stdlib.h>
38 #include <time.h>
39 #include <sstream>
40 #include <errno.h>
41 #include <dirent.h>
42 #include <stdexcept>
43 #include <climits>
44 #include <locale>
45 #include <memory>
46
47 #include "cls/lock/cls_lock_client.h"
48 #include "include/compat.h"
49 #include "include/util.h"
50 #include "common/hobject.h"
51
52 #include "PoolDump.h"
53 #include "RadosImport.h"
54
55 #include "osd/ECUtil.h"
56
57 using namespace librados;
58
59 // two steps seem to be necessary to do this right
60 #define STR(x) _STR(x)
61 #define _STR(x) #x
62
63 void usage(ostream& out)
64 {
65 out << \
66 "usage: rados [options] [commands]\n"
67 "POOL COMMANDS\n"
68 " lspools list pools\n"
69 " mkpool <pool-name> [123[ 4]] create pool <pool-name>'\n"
70 " [with auid 123[and using crush rule 4]]\n"
71 " cppool <pool-name> <dest-pool> copy content of a pool\n"
72 " rmpool <pool-name> [<pool-name> --yes-i-really-really-mean-it]\n"
73 " remove pool <pool-name>'\n"
74 " purge <pool-name> --yes-i-really-really-mean-it\n"
75 " remove all objects from pool <pool-name> without removing it\n"
76 " df show per-pool and total usage\n"
77 " ls list objects in pool\n\n"
78 " chown 123 change the pool owner to auid 123\n"
79 "\n"
80 "POOL SNAP COMMANDS\n"
81 " lssnap list snaps\n"
82 " mksnap <snap-name> create snap <snap-name>\n"
83 " rmsnap <snap-name> remove snap <snap-name>\n"
84 "\n"
85 "OBJECT COMMANDS\n"
86 " get <obj-name> [outfile] fetch object\n"
87 " put <obj-name> [infile] [--offset offset]\n"
88 " write object with start offset (default:0)\n"
89 " append <obj-name> [infile] append object\n"
90 " truncate <obj-name> length truncate object\n"
91 " create <obj-name> create object\n"
92 " rm <obj-name> ...[--force-full] [force no matter full or not]remove object(s)\n"
93 " cp <obj-name> [target-obj] copy object\n"
94 " listxattr <obj-name>\n"
95 " getxattr <obj-name> attr\n"
96 " setxattr <obj-name> attr val\n"
97 " rmxattr <obj-name> attr\n"
98 " stat <obj-name> stat the named object\n"
99 " mapext <obj-name>\n"
100 " rollback <obj-name> <snap-name> roll back object to snap <snap-name>\n"
101 "\n"
102 " listsnaps <obj-name> list the snapshots of this object\n"
103 " bench <seconds> write|seq|rand [-t concurrent_operations] [--no-cleanup] [--run-name run_name] [--no-hints]\n"
104 " default is 16 concurrent IOs and 4 MB ops\n"
105 " default is to clean up after write benchmark\n"
106 " default run-name is 'benchmark_last_metadata'\n"
107 " cleanup [--run-name run_name] [--prefix prefix]\n"
108 " clean up a previous benchmark operation\n"
109 " default run-name is 'benchmark_last_metadata'\n"
110 " load-gen [options] generate load on the cluster\n"
111 " listomapkeys <obj-name> list the keys in the object map\n"
112 " listomapvals <obj-name> list the keys and vals in the object map \n"
113 " getomapval <obj-name> <key> [file] show the value for the specified key\n"
114 " in the object's object map\n"
115 " setomapval <obj-name> <key> <val>\n"
116 " rmomapkey <obj-name> <key>\n"
117 " getomapheader <obj-name> [file]\n"
118 " setomapheader <obj-name> <val>\n"
119 " tmap-to-omap <obj-name> convert tmap keys/values to omap\n"
120 " watch <obj-name> add watcher on this object\n"
121 " notify <obj-name> <message> notify watcher of this object with message\n"
122 " listwatchers <obj-name> list the watchers of this object\n"
123 " set-alloc-hint <obj-name> <expected-object-size> <expected-write-size>\n"
124 " set allocation hint for an object\n"
125 "\n"
126 "IMPORT AND EXPORT\n"
127 " export [filename]\n"
128 " Serialize pool contents to a file or standard out.\n"
129 " import [--dry-run] [--no-overwrite] < filename | - >\n"
130 " Load pool contents from a file or standard in\n"
131 "\n"
132 "ADVISORY LOCKS\n"
133 " lock list <obj-name>\n"
134 " List all advisory locks on an object\n"
135 " lock get <obj-name> <lock-name>\n"
136 " Try to acquire a lock\n"
137 " lock break <obj-name> <lock-name> <locker-name>\n"
138 " Try to break a lock acquired by another client\n"
139 " lock info <obj-name> <lock-name>\n"
140 " Show lock information\n"
141 " options:\n"
142 " --lock-tag Lock tag, all locks operation should use\n"
143 " the same tag\n"
144 " --lock-cookie Locker cookie\n"
145 " --lock-description Description of lock\n"
146 " --lock-duration Lock duration (in seconds)\n"
147 " --lock-type Lock type (shared, exclusive)\n"
148 "\n"
149 "SCRUB AND REPAIR:\n"
150 " list-inconsistent-pg <pool> list inconsistent PGs in given pool\n"
151 " list-inconsistent-obj <pgid> list inconsistent objects in given PG\n"
152 " list-inconsistent-snapset <pgid> list inconsistent snapsets in the given PG\n"
153 "\n"
154 "CACHE POOLS: (for testing/development only)\n"
155 " cache-flush <obj-name> flush cache pool object (blocking)\n"
156 " cache-try-flush <obj-name> flush cache pool object (non-blocking)\n"
157 " cache-evict <obj-name> evict cache pool object\n"
158 " cache-flush-evict-all flush+evict all objects\n"
159 " cache-try-flush-evict-all try-flush+evict all objects\n"
160 "\n"
161 "GLOBAL OPTIONS:\n"
162 " --object_locator object_locator\n"
163 " set object_locator for operation\n"
164 " -p pool\n"
165 " --pool=pool\n"
166 " select given pool by name\n"
167 " --target-pool=pool\n"
168 " select target pool by name\n"
169 " --pgid PG id\n"
170 " select given PG id\n"
171 " -b op_size\n"
172 " set the block size for put/get ops and for write benchmarking\n"
173 " -o object_size\n"
174 " set the object size for put/get ops and for write benchmarking\n"
175 " --max-objects\n"
176 " set the max number of objects for write benchmarking\n"
177 " -s name\n"
178 " --snap name\n"
179 " select given snap name for (read) IO\n"
180 " -i infile\n"
181 " --create\n"
182 " create the pool or directory that was specified\n"
183 " -N namespace\n"
184 " --namespace=namespace\n"
185 " specify the namespace to use for the object\n"
186 " --all\n"
187 " Use with ls to list objects in all namespaces\n"
188 " Put in CEPH_ARGS environment variable to make this the default\n"
189 " --default\n"
190 " Use with ls to list objects in default namespace\n"
191 " Takes precedence over --all in case --all is in environment\n"
192 " --target-locator\n"
193 " Use with cp to specify the locator of the new object\n"
194 " --target-nspace\n"
195 " Use with cp to specify the namespace of the new object\n"
196 " --striper\n"
197 " Use radostriper interface rather than pure rados\n"
198 " Available for stat, get, put, truncate, rm, ls and \n"
199 " all xattr related operations\n"
200 "\n"
201 "BENCH OPTIONS:\n"
202 " -t N\n"
203 " --concurrent-ios=N\n"
204 " Set number of concurrent I/O operations\n"
205 " --show-time\n"
206 " prefix output with date/time\n"
207 " --no-verify\n"
208 " do not verify contents of read objects\n"
209 " --write-object\n"
210 " write contents to the objects\n"
211 " --write-omap\n"
212 " write contents to the omap\n"
213 " --write-xattr\n"
214 " write contents to the extended attributes\n"
215 "\n"
216 "LOAD GEN OPTIONS:\n"
217 " --num-objects total number of objects\n"
218 " --min-object-size min object size\n"
219 " --max-object-size max object size\n"
220 " --min-op-len min io size of operations\n"
221 " --max-op-len max io size of operations\n"
222 " --max-ops max number of operations\n"
223 " --max-backlog max backlog size\n"
224 " --read-percent percent of operations that are read\n"
225 " --target-throughput target throughput (in bytes)\n"
226 " --run-length total time (in seconds)\n"
227 "CACHE POOLS OPTIONS:\n"
228 " --with-clones include clones when doing flush or evict\n"
229 "OMAP OPTIONS:\n"
230 " --omap-key-file file read the omap key from a file\n";
231 }
232
233 unsigned default_op_size = 1 << 22;
234
235 static void usage_exit()
236 {
237 usage(cerr);
238 exit(1);
239 }
240
241
242 template <typename I, typename T>
243 static int rados_sistrtoll(I &i, T *val) {
244 std::string err;
245 *val = strict_iecstrtoll(i->second.c_str(), &err);
246 if (err != "") {
247 cerr << "Invalid value for " << i->first << ": " << err << std::endl;
248 return -EINVAL;
249 } else {
250 return 0;
251 }
252 }
253
254
255 static int dump_data(std::string const &filename, bufferlist const &data)
256 {
257 int fd;
258 if (filename == "-") {
259 fd = STDOUT_FILENO;
260 } else {
261 fd = TEMP_FAILURE_RETRY(::open(filename.c_str(), O_WRONLY|O_CREAT|O_TRUNC, 0644));
262 if (fd < 0) {
263 int err = errno;
264 cerr << "failed to open file: " << cpp_strerror(err) << std::endl;
265 return -err;
266 }
267 }
268
269 int r = data.write_fd(fd);
270
271 if (fd != 1) {
272 VOID_TEMP_FAILURE_RETRY(::close(fd));
273 }
274
275 return r;
276 }
277
278
279 static int do_get(IoCtx& io_ctx, RadosStriper& striper,
280 const char *objname, const char *outfile, unsigned op_size,
281 bool use_striper)
282 {
283 string oid(objname);
284
285 int fd;
286 if (strcmp(outfile, "-") == 0) {
287 fd = STDOUT_FILENO;
288 } else {
289 fd = TEMP_FAILURE_RETRY(::open(outfile, O_WRONLY|O_CREAT|O_TRUNC, 0644));
290 if (fd < 0) {
291 int err = errno;
292 cerr << "failed to open file: " << cpp_strerror(err) << std::endl;
293 return -err;
294 }
295 }
296
297 uint64_t offset = 0;
298 int ret;
299 while (true) {
300 bufferlist outdata;
301 if (use_striper) {
302 ret = striper.read(oid, &outdata, op_size, offset);
303 } else {
304 ret = io_ctx.read(oid, outdata, op_size, offset);
305 }
306 if (ret <= 0) {
307 goto out;
308 }
309 ret = outdata.write_fd(fd);
310 if (ret < 0) {
311 cerr << "error writing to file: " << cpp_strerror(ret) << std::endl;
312 goto out;
313 }
314 if (outdata.length() < op_size)
315 break;
316 offset += outdata.length();
317 }
318 ret = 0;
319
320 out:
321 if (fd != 1)
322 VOID_TEMP_FAILURE_RETRY(::close(fd));
323 return ret;
324 }
325
326 static int do_copy(IoCtx& io_ctx, const char *objname,
327 IoCtx& target_ctx, const char *target_obj)
328 {
329 __le32 src_fadvise_flags = LIBRADOS_OP_FLAG_FADVISE_SEQUENTIAL | LIBRADOS_OP_FLAG_FADVISE_NOCACHE;
330 __le32 dest_fadvise_flags = LIBRADOS_OP_FLAG_FADVISE_SEQUENTIAL | LIBRADOS_OP_FLAG_FADVISE_DONTNEED;
331 ObjectWriteOperation op;
332 op.copy_from2(objname, io_ctx, 0, src_fadvise_flags);
333 op.set_op_flags2(dest_fadvise_flags);
334
335 return target_ctx.operate(target_obj, &op);
336 }
337
338 static int do_copy_pool(Rados& rados, const char *src_pool, const char *target_pool)
339 {
340 IoCtx src_ctx, target_ctx;
341 int ret = rados.ioctx_create(src_pool, src_ctx);
342 if (ret < 0) {
343 cerr << "cannot open source pool: " << src_pool << std::endl;
344 return ret;
345 }
346 ret = rados.ioctx_create(target_pool, target_ctx);
347 if (ret < 0) {
348 cerr << "cannot open target pool: " << target_pool << std::endl;
349 return ret;
350 }
351 src_ctx.set_namespace(all_nspaces);
352 librados::NObjectIterator i = src_ctx.nobjects_begin();
353 librados::NObjectIterator i_end = src_ctx.nobjects_end();
354 for (; i != i_end; ++i) {
355 string nspace = i->get_nspace();
356 string oid = i->get_oid();
357 string locator = i->get_locator();
358
359 string target_name = (nspace.size() ? nspace + "/" : "") + oid;
360 string src_name = target_name;
361 if (locator.size())
362 src_name += "(@" + locator + ")";
363 cout << src_pool << ":" << src_name << " => "
364 << target_pool << ":" << target_name << std::endl;
365
366 src_ctx.locator_set_key(locator);
367 src_ctx.set_namespace(nspace);
368 target_ctx.set_namespace(nspace);
369 ret = do_copy(src_ctx, oid.c_str(), target_ctx, oid.c_str());
370 if (ret < 0) {
371 cerr << "error copying object: " << cpp_strerror(errno) << std::endl;
372 return ret;
373 }
374 }
375
376 return 0;
377 }
378
379 static int do_put(IoCtx& io_ctx, RadosStriper& striper,
380 const char *objname, const char *infile, int op_size,
381 uint64_t obj_offset, bool use_striper)
382 {
383 string oid(objname);
384 bool stdio = (strcmp(infile, "-") == 0);
385 int ret = 0;
386 int fd = STDIN_FILENO;
387 if (!stdio)
388 fd = open(infile, O_RDONLY);
389 if (fd < 0) {
390 cerr << "error reading input file " << infile << ": " << cpp_strerror(errno) << std::endl;
391 return 1;
392 }
393 int count = op_size;
394 uint64_t offset = obj_offset;
395 while (count != 0) {
396 bufferlist indata;
397 count = indata.read_fd(fd, op_size);
398 if (count < 0) {
399 ret = -errno;
400 cerr << "error reading input file " << infile << ": " << cpp_strerror(ret) << std::endl;
401 goto out;
402 }
403
404 if (count == 0) {
405 if (offset == obj_offset) { // in case we have to create an empty object & if obj_offset > 0 do a hole
406 if (use_striper) {
407 ret = striper.write_full(oid, indata); // indata is empty
408 } else {
409 ret = io_ctx.write_full(oid, indata); // indata is empty
410 }
411 if (ret < 0) {
412 goto out;
413 }
414 if (offset) {
415 if (use_striper) {
416 ret = striper.trunc(oid, offset); // before truncate, object must be existed.
417 } else {
418 ret = io_ctx.trunc(oid, offset); // before truncate, object must be existed.
419 }
420
421 if (ret < 0) {
422 goto out;
423 }
424 }
425 }
426 continue;
427 }
428 if (use_striper) {
429 if (offset == 0)
430 ret = striper.write_full(oid, indata);
431 else
432 ret = striper.write(oid, indata, count, offset);
433 } else {
434 if (offset == 0)
435 ret = io_ctx.write_full(oid, indata);
436 else
437 ret = io_ctx.write(oid, indata, count, offset);
438 }
439
440 if (ret < 0) {
441 goto out;
442 }
443 offset += count;
444 }
445 ret = 0;
446 out:
447 if (fd != STDOUT_FILENO)
448 VOID_TEMP_FAILURE_RETRY(close(fd));
449 return ret;
450 }
451
452 static int do_append(IoCtx& io_ctx, RadosStriper& striper,
453 const char *objname, const char *infile, int op_size,
454 bool use_striper)
455 {
456 string oid(objname);
457 bool stdio = (strcmp(infile, "-") == 0);
458 int ret = 0;
459 int fd = STDIN_FILENO;
460 if (!stdio)
461 fd = open(infile, O_RDONLY);
462 if (fd < 0) {
463 cerr << "error reading input file " << infile << ": " << cpp_strerror(errno) << std::endl;
464 return 1;
465 }
466 int count = op_size;
467 while (count != 0) {
468 bufferlist indata;
469 count = indata.read_fd(fd, op_size);
470 if (count < 0) {
471 ret = -errno;
472 cerr << "error reading input file " << infile << ": " << cpp_strerror(ret) << std::endl;
473 goto out;
474 }
475 if (use_striper) {
476 ret = striper.append(oid, indata, count);
477 } else {
478 ret = io_ctx.append(oid, indata, count);
479 }
480
481 if (ret < 0) {
482 goto out;
483 }
484 }
485 ret = 0;
486 out:
487 if (fd != STDOUT_FILENO)
488 VOID_TEMP_FAILURE_RETRY(close(fd));
489 return ret;
490 }
491
492 class RadosWatchCtx : public librados::WatchCtx2 {
493 IoCtx& ioctx;
494 string name;
495 public:
496 RadosWatchCtx(IoCtx& io, const char *imgname) : ioctx(io), name(imgname) {}
497 ~RadosWatchCtx() override {}
498 void handle_notify(uint64_t notify_id,
499 uint64_t cookie,
500 uint64_t notifier_id,
501 bufferlist& bl) override {
502 cout << "NOTIFY"
503 << " cookie " << cookie
504 << " notify_id " << notify_id
505 << " from " << notifier_id
506 << std::endl;
507 bl.hexdump(cout);
508 ioctx.notify_ack(name, notify_id, cookie, bl);
509 }
510 void handle_error(uint64_t cookie, int err) override {
511 cout << "ERROR"
512 << " cookie " << cookie
513 << " err " << cpp_strerror(err)
514 << std::endl;
515 }
516 };
517
518 static const char alphanum_table[]="ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_";
519
520 int gen_rand_alphanumeric(char *dest, int size) /* size should be the required string size + 1 */
521 {
522 int ret = get_random_bytes(dest, size);
523 if (ret < 0) {
524 cerr << "cannot get random bytes: " << cpp_strerror(ret) << std::endl;
525 return -1;
526 }
527
528 int i;
529 for (i=0; i<size - 1; i++) {
530 int pos = (unsigned)dest[i];
531 dest[i] = alphanum_table[pos & 63];
532 }
533 dest[i] = '\0';
534
535 return 0;
536 }
537
538 struct obj_info {
539 string name;
540 size_t len;
541 };
542
543 class LoadGen {
544 size_t total_sent;
545 size_t total_completed;
546
547 IoCtx io_ctx;
548 Rados *rados;
549
550 map<int, obj_info> objs;
551
552 utime_t start_time;
553
554 bool going_down;
555
556 public:
557 int read_percent;
558 int num_objs;
559 size_t min_obj_len;
560 uint64_t max_obj_len;
561 size_t min_op_len;
562 size_t max_op_len;
563 size_t max_ops;
564 size_t max_backlog;
565 size_t target_throughput;
566 int run_length;
567
568 enum {
569 OP_READ,
570 OP_WRITE,
571 };
572
573 struct LoadGenOp {
574 int id;
575 int type;
576 string oid;
577 size_t off;
578 size_t len;
579 bufferlist bl;
580 LoadGen *lg;
581 librados::AioCompletion *completion;
582
583 LoadGenOp() : id(0), type(0), off(0), len(0), lg(NULL), completion(NULL) {}
584 explicit LoadGenOp(LoadGen *_lg) : id(0), type(0), off(0), len(0), lg(_lg), completion(NULL) {}
585 };
586
587 int max_op;
588
589 map<int, LoadGenOp *> pending_ops;
590
591 void gen_op(LoadGenOp *op);
592 uint64_t gen_next_op();
593 void run_op(LoadGenOp *op);
594
595 uint64_t cur_sent_rate() {
596 return total_sent / time_passed();
597 }
598
599 uint64_t cur_completed_rate() {
600 return total_completed / time_passed();
601 }
602
603 uint64_t total_expected() {
604 return target_throughput * time_passed();
605 }
606
607 float time_passed() {
608 utime_t now = ceph_clock_now();
609 now -= start_time;
610 uint64_t ns = now.nsec();
611 float total = (float) ns / 1000000000.0;
612 total += now.sec();
613 return total;
614 }
615
616 Mutex lock;
617 Cond cond;
618
619 explicit LoadGen(Rados *_rados) : rados(_rados), going_down(false), lock("LoadGen") {
620 read_percent = 80;
621 min_obj_len = 1024;
622 max_obj_len = 5ull * 1024ull * 1024ull * 1024ull;
623 min_op_len = 1024;
624 target_throughput = 5 * 1024 * 1024; // B/sec
625 max_op_len = 2 * 1024 * 1024;
626 max_ops = 16;
627 max_backlog = target_throughput * 2;
628 run_length = 60;
629
630 total_sent = 0;
631 total_completed = 0;
632 num_objs = 200;
633 max_op = 0;
634 }
635 int bootstrap(const char *pool);
636 int run();
637 void cleanup();
638
639 void io_cb(completion_t c, LoadGenOp *op) {
640 Mutex::Locker l(lock);
641
642 total_completed += op->len;
643
644 double rate = (double)cur_completed_rate() / (1024 * 1024);
645 std::streamsize original_precision = cout.precision();
646 cout.precision(3);
647 cout << "op " << op->id << " completed, throughput=" << rate << "MB/sec" << std::endl;
648 cout.precision(original_precision);
649
650 map<int, LoadGenOp *>::iterator iter = pending_ops.find(op->id);
651 if (iter != pending_ops.end())
652 pending_ops.erase(iter);
653
654 if (!going_down)
655 op->completion->release();
656
657 delete op;
658
659 cond.Signal();
660 }
661 };
662
663 static void _load_gen_cb(completion_t c, void *param)
664 {
665 LoadGen::LoadGenOp *op = (LoadGen::LoadGenOp *)param;
666 op->lg->io_cb(c, op);
667 }
668
669 int LoadGen::bootstrap(const char *pool)
670 {
671 char buf[128];
672 int i;
673
674 if (!pool) {
675 cerr << "ERROR: pool name was not specified" << std::endl;
676 return -EINVAL;
677 }
678
679 int ret = rados->ioctx_create(pool, io_ctx);
680 if (ret < 0) {
681 cerr << "error opening pool " << pool << ": " << cpp_strerror(ret) << std::endl;
682 return ret;
683 }
684
685 int buf_len = 1;
686 bufferptr p = buffer::create(buf_len);
687 bufferlist bl;
688 memset(p.c_str(), 0, buf_len);
689 bl.push_back(p);
690
691 list<librados::AioCompletion *> completions;
692 for (i = 0; i < num_objs; i++) {
693 obj_info info;
694 gen_rand_alphanumeric(buf, 16);
695 info.name = "obj-";
696 info.name.append(buf);
697 info.len = get_random(min_obj_len, max_obj_len);
698
699 // throttle...
700 while (completions.size() > max_ops) {
701 AioCompletion *c = completions.front();
702 c->wait_for_complete();
703 ret = c->get_return_value();
704 c->release();
705 completions.pop_front();
706 if (ret < 0) {
707 cerr << "aio_write failed" << std::endl;
708 return ret;
709 }
710 }
711
712 librados::AioCompletion *c = rados->aio_create_completion(NULL, NULL, NULL);
713 completions.push_back(c);
714 // generate object
715 ret = io_ctx.aio_write(info.name, c, bl, buf_len, info.len - buf_len);
716 if (ret < 0) {
717 cerr << "couldn't write obj: " << info.name << " ret=" << ret << std::endl;
718 return ret;
719 }
720 objs[i] = info;
721 }
722
723 list<librados::AioCompletion *>::iterator iter;
724 for (iter = completions.begin(); iter != completions.end(); ++iter) {
725 AioCompletion *c = *iter;
726 c->wait_for_complete();
727 ret = c->get_return_value();
728 c->release();
729 if (ret < 0) { // yes, we leak.
730 cerr << "aio_write failed" << std::endl;
731 return ret;
732 }
733 }
734 return 0;
735 }
736
737 void LoadGen::run_op(LoadGenOp *op)
738 {
739 op->completion = rados->aio_create_completion(op, _load_gen_cb, NULL);
740
741 switch (op->type) {
742 case OP_READ:
743 io_ctx.aio_read(op->oid, op->completion, &op->bl, op->len, op->off);
744 break;
745 case OP_WRITE:
746 bufferptr p = buffer::create(op->len);
747 memset(p.c_str(), 0, op->len);
748 op->bl.push_back(p);
749
750 io_ctx.aio_write(op->oid, op->completion, op->bl, op->len, op->off);
751 break;
752 }
753
754 total_sent += op->len;
755 }
756
757 void LoadGen::gen_op(LoadGenOp *op)
758 {
759 int i = get_random(0, objs.size() - 1);
760 obj_info& info = objs[i];
761 op->oid = info.name;
762
763 size_t len = get_random(min_op_len, max_op_len);
764 if (len > info.len)
765 len = info.len;
766 size_t off = get_random(0, info.len);
767
768 if (off + len > info.len)
769 off = info.len - len;
770
771 op->off = off;
772 op->len = len;
773
774 i = get_random(1, 100);
775 if (i > read_percent)
776 op->type = OP_WRITE;
777 else
778 op->type = OP_READ;
779
780 cout << (op->type == OP_READ ? "READ" : "WRITE") << " : oid=" << op->oid << " off=" << op->off << " len=" << op->len << std::endl;
781 }
782
783 uint64_t LoadGen::gen_next_op()
784 {
785 lock.Lock();
786
787 LoadGenOp *op = new LoadGenOp(this);
788 gen_op(op);
789 op->id = max_op++;
790 pending_ops[op->id] = op;
791
792 lock.Unlock();
793
794 run_op(op);
795
796 return op->len;
797 }
798
799 int LoadGen::run()
800 {
801 start_time = ceph_clock_now();
802 utime_t end_time = start_time;
803 end_time += run_length;
804 utime_t stamp_time = start_time;
805 uint32_t total_sec = 0;
806
807 while (1) {
808 lock.Lock();
809 utime_t one_second(1, 0);
810 cond.WaitInterval(lock, one_second);
811 lock.Unlock();
812 utime_t now = ceph_clock_now();
813
814 if (now > end_time)
815 break;
816
817 uint64_t expected = total_expected();
818 lock.Lock();
819 uint64_t sent = total_sent;
820 uint64_t completed = total_completed;
821 lock.Unlock();
822
823 if (now - stamp_time >= utime_t(1, 0)) {
824 double rate = (double)cur_completed_rate() / (1024 * 1024);
825 ++total_sec;
826 std::streamsize original_precision = cout.precision();
827 cout.precision(3);
828 cout << setw(5) << total_sec << ": throughput=" << rate << "MB/sec" << " pending data=" << sent - completed << std::endl;
829 cout.precision(original_precision);
830 stamp_time = now;
831 }
832
833 while (sent < expected &&
834 sent - completed < max_backlog &&
835 pending_ops.size() < max_ops) {
836 sent += gen_next_op();
837 }
838 }
839
840 // get a reference to all pending requests
841 vector<librados::AioCompletion *> completions;
842 lock.Lock();
843 going_down = true;
844 map<int, LoadGenOp *>::iterator iter;
845 for (iter = pending_ops.begin(); iter != pending_ops.end(); ++iter) {
846 LoadGenOp *op = iter->second;
847 completions.push_back(op->completion);
848 }
849 lock.Unlock();
850
851 cout << "waiting for all operations to complete" << std::endl;
852
853 // now wait on all the pending requests
854 for (vector<librados::AioCompletion *>::iterator citer = completions.begin(); citer != completions.end(); ++citer) {
855 librados::AioCompletion *c = *citer;
856 c->wait_for_complete();
857 c->release();
858 }
859
860 return 0;
861 }
862
863 void LoadGen::cleanup()
864 {
865 cout << "cleaning up objects" << std::endl;
866 map<int, obj_info>::iterator iter;
867 for (iter = objs.begin(); iter != objs.end(); ++iter) {
868 obj_info& info = iter->second;
869 int ret = io_ctx.remove(info.name);
870 if (ret < 0)
871 cerr << "couldn't remove obj: " << info.name << " ret=" << ret << std::endl;
872 }
873 }
874
875 enum OpWriteDest {
876 OP_WRITE_DEST_OBJ = 2 << 0,
877 OP_WRITE_DEST_OMAP = 2 << 1,
878 OP_WRITE_DEST_XATTR = 2 << 2,
879 };
880
881 class RadosBencher : public ObjBencher {
882 librados::AioCompletion **completions;
883 librados::Rados& rados;
884 librados::IoCtx& io_ctx;
885 librados::NObjectIterator oi;
886 bool iterator_valid;
887 OpWriteDest write_destination;
888
889 protected:
890 int completions_init(int concurrentios) override {
891 completions = new librados::AioCompletion *[concurrentios];
892 return 0;
893 }
894 void completions_done() override {
895 delete[] completions;
896 completions = NULL;
897 }
898 int create_completion(int slot, void (*cb)(void *, void*), void *arg) override {
899 completions[slot] = rados.aio_create_completion((void *) arg, 0, cb);
900
901 if (!completions[slot])
902 return -EINVAL;
903
904 return 0;
905 }
906 void release_completion(int slot) override {
907 completions[slot]->release();
908 completions[slot] = 0;
909 }
910
911 int aio_read(const std::string& oid, int slot, bufferlist *pbl, size_t len,
912 size_t offset) override {
913 return io_ctx.aio_read(oid, completions[slot], pbl, len, 0);
914 }
915
916 int aio_write(const std::string& oid, int slot, bufferlist& bl, size_t len,
917 size_t offset) override {
918 librados::ObjectWriteOperation op;
919
920 if (write_destination & OP_WRITE_DEST_OBJ) {
921 if (data.hints)
922 op.set_alloc_hint2(data.object_size, data.op_size,
923 ALLOC_HINT_FLAG_SEQUENTIAL_WRITE |
924 ALLOC_HINT_FLAG_SEQUENTIAL_READ |
925 ALLOC_HINT_FLAG_APPEND_ONLY |
926 ALLOC_HINT_FLAG_IMMUTABLE);
927 op.write(offset, bl);
928 }
929
930 if (write_destination & OP_WRITE_DEST_OMAP) {
931 std::map<std::string, librados::bufferlist> omap;
932 omap[string("bench-omap-key-") + stringify(offset)] = bl;
933 op.omap_set(omap);
934 }
935
936 if (write_destination & OP_WRITE_DEST_XATTR) {
937 char key[80];
938 snprintf(key, sizeof(key), "bench-xattr-key-%d", (int)offset);
939 op.setxattr(key, bl);
940 }
941
942 return io_ctx.aio_operate(oid, completions[slot], &op);
943 }
944
945 int aio_remove(const std::string& oid, int slot) override {
946 return io_ctx.aio_remove(oid, completions[slot]);
947 }
948
949 int sync_read(const std::string& oid, bufferlist& bl, size_t len) override {
950 return io_ctx.read(oid, bl, len, 0);
951 }
952 int sync_write(const std::string& oid, bufferlist& bl, size_t len) override {
953 return io_ctx.write_full(oid, bl);
954 }
955
956 int sync_remove(const std::string& oid) override {
957 return io_ctx.remove(oid);
958 }
959
960 bool completion_is_done(int slot) override {
961 return completions[slot]->is_safe();
962 }
963
964 int completion_wait(int slot) override {
965 return completions[slot]->wait_for_safe_and_cb();
966 }
967 int completion_ret(int slot) override {
968 return completions[slot]->get_return_value();
969 }
970
971 bool get_objects(std::list<Object>* objects, int num) override {
972 int count = 0;
973
974 if (!iterator_valid) {
975 oi = io_ctx.nobjects_begin();
976 iterator_valid = true;
977 }
978
979 librados::NObjectIterator ei = io_ctx.nobjects_end();
980
981 if (oi == ei) {
982 iterator_valid = false;
983 return false;
984 }
985
986 objects->clear();
987 for ( ; oi != ei && count < num; ++oi) {
988 Object obj(oi->get_oid(), oi->get_nspace());
989 objects->push_back(obj);
990 ++count;
991 }
992
993 return true;
994 }
995
996 void set_namespace( const std::string& ns) override {
997 io_ctx.set_namespace(ns);
998 }
999
1000 public:
1001 RadosBencher(CephContext *cct_, librados::Rados& _r, librados::IoCtx& _i)
1002 : ObjBencher(cct_), completions(NULL), rados(_r), io_ctx(_i), iterator_valid(false), write_destination(OP_WRITE_DEST_OBJ) {}
1003 ~RadosBencher() override { }
1004
1005 void set_write_destination(OpWriteDest dest) {
1006 write_destination = dest;
1007 }
1008 };
1009
1010 static int do_lock_cmd(std::vector<const char*> &nargs,
1011 const std::map < std::string, std::string > &opts,
1012 IoCtx *ioctx,
1013 Formatter *formatter)
1014 {
1015 if (nargs.size() < 3)
1016 usage_exit();
1017
1018 string cmd(nargs[1]);
1019 string oid(nargs[2]);
1020
1021 string lock_tag;
1022 string lock_cookie;
1023 string lock_description;
1024 int lock_duration = 0;
1025 ClsLockType lock_type = LOCK_EXCLUSIVE;
1026
1027 map<string, string>::const_iterator i;
1028 i = opts.find("lock-tag");
1029 if (i != opts.end()) {
1030 lock_tag = i->second;
1031 }
1032 i = opts.find("lock-cookie");
1033 if (i != opts.end()) {
1034 lock_cookie = i->second;
1035 }
1036 i = opts.find("lock-description");
1037 if (i != opts.end()) {
1038 lock_description = i->second;
1039 }
1040 i = opts.find("lock-duration");
1041 if (i != opts.end()) {
1042 if (rados_sistrtoll(i, &lock_duration)) {
1043 return -EINVAL;
1044 }
1045 }
1046 i = opts.find("lock-type");
1047 if (i != opts.end()) {
1048 const string& type_str = i->second;
1049 if (type_str.compare("exclusive") == 0) {
1050 lock_type = LOCK_EXCLUSIVE;
1051 } else if (type_str.compare("shared") == 0) {
1052 lock_type = LOCK_SHARED;
1053 } else {
1054 cerr << "unknown lock type was specified, aborting" << std::endl;
1055 return -EINVAL;
1056 }
1057 }
1058
1059 if (cmd.compare("list") == 0) {
1060 list<string> locks;
1061 int ret = rados::cls::lock::list_locks(ioctx, oid, &locks);
1062 if (ret < 0) {
1063 cerr << "ERROR: rados_list_locks(): " << cpp_strerror(ret) << std::endl;
1064 return ret;
1065 }
1066
1067 formatter->open_object_section("object");
1068 formatter->dump_string("objname", oid);
1069 formatter->open_array_section("locks");
1070 list<string>::iterator iter;
1071 for (iter = locks.begin(); iter != locks.end(); ++iter) {
1072 formatter->open_object_section("lock");
1073 formatter->dump_string("name", *iter);
1074 formatter->close_section();
1075 }
1076 formatter->close_section();
1077 formatter->close_section();
1078 formatter->flush(cout);
1079 return 0;
1080 }
1081
1082 if (nargs.size() < 4)
1083 usage_exit();
1084
1085 string lock_name(nargs[3]);
1086
1087 if (cmd.compare("info") == 0) {
1088 map<rados::cls::lock::locker_id_t, rados::cls::lock::locker_info_t> lockers;
1089 ClsLockType type = LOCK_NONE;
1090 string tag;
1091 int ret = rados::cls::lock::get_lock_info(ioctx, oid, lock_name, &lockers, &type, &tag);
1092 if (ret < 0) {
1093 cerr << "ERROR: rados_lock_get_lock_info(): " << cpp_strerror(ret) << std::endl;
1094 return ret;
1095 }
1096
1097 formatter->open_object_section("lock");
1098 formatter->dump_string("name", lock_name);
1099 formatter->dump_string("type", cls_lock_type_str(type));
1100 formatter->dump_string("tag", tag);
1101 formatter->open_array_section("lockers");
1102 map<rados::cls::lock::locker_id_t, rados::cls::lock::locker_info_t>::iterator iter;
1103 for (iter = lockers.begin(); iter != lockers.end(); ++iter) {
1104 const rados::cls::lock::locker_id_t& id = iter->first;
1105 const rados::cls::lock::locker_info_t& info = iter->second;
1106 formatter->open_object_section("locker");
1107 formatter->dump_stream("name") << id.locker;
1108 formatter->dump_string("cookie", id.cookie);
1109 formatter->dump_string("description", info.description);
1110 formatter->dump_stream("expiration") << info.expiration;
1111 formatter->dump_stream("addr") << info.addr;
1112 formatter->close_section();
1113 }
1114 formatter->close_section();
1115 formatter->close_section();
1116 formatter->flush(cout);
1117
1118 return ret;
1119 } else if (cmd.compare("get") == 0) {
1120 rados::cls::lock::Lock l(lock_name);
1121 l.set_cookie(lock_cookie);
1122 l.set_tag(lock_tag);
1123 l.set_duration(utime_t(lock_duration, 0));
1124 l.set_description(lock_description);
1125 int ret;
1126 switch (lock_type) {
1127 case LOCK_SHARED:
1128 ret = l.lock_shared(ioctx, oid);
1129 break;
1130 default:
1131 ret = l.lock_exclusive(ioctx, oid);
1132 }
1133 if (ret < 0) {
1134 cerr << "ERROR: failed locking: " << cpp_strerror(ret) << std::endl;
1135 return ret;
1136 }
1137
1138 return ret;
1139 }
1140
1141 if (nargs.size() < 5)
1142 usage_exit();
1143
1144 if (cmd.compare("break") == 0) {
1145 string locker(nargs[4]);
1146 rados::cls::lock::Lock l(lock_name);
1147 l.set_cookie(lock_cookie);
1148 l.set_tag(lock_tag);
1149 entity_name_t name;
1150 if (!name.parse(locker)) {
1151 cerr << "ERROR: failed to parse locker name (" << locker << ")" << std::endl;
1152 return -EINVAL;
1153 }
1154 int ret = l.break_lock(ioctx, oid, name);
1155 if (ret < 0) {
1156 cerr << "ERROR: failed breaking lock: " << cpp_strerror(ret) << std::endl;
1157 return ret;
1158 }
1159 } else {
1160 usage_exit();
1161 }
1162
1163 return 0;
1164 }
1165
1166 static int do_cache_flush(IoCtx& io_ctx, string oid)
1167 {
1168 ObjectReadOperation op;
1169 op.cache_flush();
1170 librados::AioCompletion *completion =
1171 librados::Rados::aio_create_completion();
1172 io_ctx.aio_operate(oid.c_str(), completion, &op,
1173 librados::OPERATION_IGNORE_CACHE |
1174 librados::OPERATION_IGNORE_OVERLAY,
1175 NULL);
1176 completion->wait_for_safe();
1177 int r = completion->get_return_value();
1178 completion->release();
1179 return r;
1180 }
1181
1182 static int do_cache_try_flush(IoCtx& io_ctx, string oid)
1183 {
1184 ObjectReadOperation op;
1185 op.cache_try_flush();
1186 librados::AioCompletion *completion =
1187 librados::Rados::aio_create_completion();
1188 io_ctx.aio_operate(oid.c_str(), completion, &op,
1189 librados::OPERATION_IGNORE_CACHE |
1190 librados::OPERATION_IGNORE_OVERLAY |
1191 librados::OPERATION_SKIPRWLOCKS,
1192 NULL);
1193 completion->wait_for_safe();
1194 int r = completion->get_return_value();
1195 completion->release();
1196 return r;
1197 }
1198
1199 static int do_cache_evict(IoCtx& io_ctx, string oid)
1200 {
1201 ObjectReadOperation op;
1202 op.cache_evict();
1203 librados::AioCompletion *completion =
1204 librados::Rados::aio_create_completion();
1205 io_ctx.aio_operate(oid.c_str(), completion, &op,
1206 librados::OPERATION_IGNORE_CACHE |
1207 librados::OPERATION_IGNORE_OVERLAY |
1208 librados::OPERATION_SKIPRWLOCKS,
1209 NULL);
1210 completion->wait_for_safe();
1211 int r = completion->get_return_value();
1212 completion->release();
1213 return r;
1214 }
1215
1216 static int do_cache_flush_evict_all(IoCtx& io_ctx, bool blocking)
1217 {
1218 int errors = 0;
1219 io_ctx.set_namespace(all_nspaces);
1220 try {
1221 librados::NObjectIterator i = io_ctx.nobjects_begin();
1222 librados::NObjectIterator i_end = io_ctx.nobjects_end();
1223 for (; i != i_end; ++i) {
1224 int r;
1225 cout << i->get_nspace() << "\t" << i->get_oid() << "\t" << i->get_locator() << std::endl;
1226 if (i->get_locator().size()) {
1227 io_ctx.locator_set_key(i->get_locator());
1228 } else {
1229 io_ctx.locator_set_key(string());
1230 }
1231 io_ctx.set_namespace(i->get_nspace());
1232 snap_set_t ls;
1233 io_ctx.snap_set_read(LIBRADOS_SNAP_DIR);
1234 r = io_ctx.list_snaps(i->get_oid(), &ls);
1235 if (r < 0) {
1236 cerr << "error listing snap shots " << i->get_nspace() << "/" << i->get_oid() << ": "
1237 << cpp_strerror(r) << std::endl;
1238 ++errors;
1239 continue;
1240 }
1241 std::vector<clone_info_t>::iterator ci = ls.clones.begin();
1242 // no snapshots
1243 if (ci == ls.clones.end()) {
1244 io_ctx.snap_set_read(CEPH_NOSNAP);
1245 if (blocking)
1246 r = do_cache_flush(io_ctx, i->get_oid());
1247 else
1248 r = do_cache_try_flush(io_ctx, i->get_oid());
1249 if (r < 0) {
1250 cerr << "failed to flush " << i->get_nspace() << "/" << i->get_oid() << ": "
1251 << cpp_strerror(r) << std::endl;
1252 ++errors;
1253 continue;
1254 }
1255 r = do_cache_evict(io_ctx, i->get_oid());
1256 if (r < 0) {
1257 cerr << "failed to evict " << i->get_nspace() << "/" << i->get_oid() << ": "
1258 << cpp_strerror(r) << std::endl;
1259 ++errors;
1260 continue;
1261 }
1262 } else {
1263 // has snapshots
1264 for (std::vector<clone_info_t>::iterator ci = ls.clones.begin();
1265 ci != ls.clones.end(); ++ci) {
1266 io_ctx.snap_set_read(ci->cloneid);
1267 if (blocking)
1268 r = do_cache_flush(io_ctx, i->get_oid());
1269 else
1270 r = do_cache_try_flush(io_ctx, i->get_oid());
1271 if (r < 0) {
1272 cerr << "failed to flush " << i->get_nspace() << "/" << i->get_oid() << ": "
1273 << cpp_strerror(r) << std::endl;
1274 ++errors;
1275 break;
1276 }
1277 r = do_cache_evict(io_ctx, i->get_oid());
1278 if (r < 0) {
1279 cerr << "failed to evict " << i->get_nspace() << "/" << i->get_oid() << ": "
1280 << cpp_strerror(r) << std::endl;
1281 ++errors;
1282 break;
1283 }
1284 }
1285 }
1286 }
1287 }
1288 catch (const std::exception& e) {
1289 cerr << e.what() << std::endl;
1290 return -1;
1291 }
1292 return errors ? -1 : 0;
1293 }
1294
1295 static int do_get_inconsistent_pg_cmd(const std::vector<const char*> &nargs,
1296 Rados& rados,
1297 Formatter& formatter)
1298 {
1299 if (nargs.size() < 2) {
1300 usage_exit();
1301 }
1302 int64_t pool_id = rados.pool_lookup(nargs[1]);
1303 if (pool_id < 0) {
1304 cerr << "pool \"" << nargs[1] << "\" not found" << std::endl;
1305 return (int)pool_id;
1306 }
1307 std::vector<PlacementGroup> pgs;
1308 int ret = rados.get_inconsistent_pgs(pool_id, &pgs);
1309 if (ret) {
1310 return ret;
1311 }
1312 formatter.open_array_section("pgs");
1313 for (auto& pg : pgs) {
1314 formatter.dump_stream("pg") << pg;
1315 }
1316 formatter.close_section();
1317 formatter.flush(cout);
1318 cout << std::endl;
1319 return 0;
1320 }
1321
1322 static void dump_errors(const err_t &err, Formatter &f, const char *name)
1323 {
1324 f.open_array_section(name);
1325 if (err.has_shard_missing())
1326 f.dump_string("error", "missing");
1327 if (err.has_stat_error())
1328 f.dump_string("error", "stat_error");
1329 if (err.has_read_error())
1330 f.dump_string("error", "read_error");
1331 if (err.has_data_digest_mismatch_info())
1332 f.dump_string("error", "data_digest_mismatch_info");
1333 if (err.has_omap_digest_mismatch_info())
1334 f.dump_string("error", "omap_digest_mismatch_info");
1335 if (err.has_size_mismatch_info())
1336 f.dump_string("error", "size_mismatch_info");
1337 if (err.has_ec_hash_error())
1338 f.dump_string("error", "ec_hash_error");
1339 if (err.has_ec_size_error())
1340 f.dump_string("error", "ec_size_error");
1341 if (err.has_info_missing())
1342 f.dump_string("error", "info_missing");
1343 if (err.has_info_corrupted())
1344 f.dump_string("error", "info_corrupted");
1345 if (err.has_obj_size_info_mismatch())
1346 f.dump_string("error", "obj_size_info_mismatch");
1347 if (err.has_snapset_missing())
1348 f.dump_string("error", "snapset_missing");
1349 if (err.has_snapset_corrupted())
1350 f.dump_string("error", "snapset_corrupted");
1351 if (err.has_hinfo_missing())
1352 f.dump_string("error", "hinfo_missing");
1353 if (err.has_hinfo_corrupted())
1354 f.dump_string("error", "hinfo_corrupted");
1355 f.close_section();
1356 }
1357
1358 static void dump_shard(const shard_info_t& shard,
1359 const inconsistent_obj_t& inc,
1360 Formatter &f)
1361 {
1362 dump_errors(shard, f, "errors");
1363
1364 if (shard.has_shard_missing())
1365 return;
1366
1367 if (!shard.has_stat_error())
1368 f.dump_unsigned("size", shard.size);
1369 if (shard.omap_digest_present) {
1370 f.dump_format("omap_digest", "0x%08x", shard.omap_digest);
1371 }
1372 if (shard.data_digest_present) {
1373 f.dump_format("data_digest", "0x%08x", shard.data_digest);
1374 }
1375
1376 if ((inc.union_shards.has_info_missing()
1377 || inc.union_shards.has_info_corrupted()
1378 || inc.has_object_info_inconsistency()
1379 || shard.has_obj_size_info_mismatch()) &&
1380 !shard.has_info_missing()) {
1381 map<std::string, ceph::bufferlist>::iterator k = (const_cast<shard_info_t&>(shard)).attrs.find(OI_ATTR);
1382 assert(k != shard.attrs.end()); // Can't be missing
1383 if (!shard.has_info_corrupted()) {
1384 object_info_t oi;
1385 bufferlist bl;
1386 bufferlist::iterator bliter = k->second.begin();
1387 ::decode(oi, bliter); // Can't be corrupted
1388 f.open_object_section("object_info");
1389 oi.dump(&f);
1390 f.close_section();
1391 } else {
1392 bool b64;
1393 f.dump_string("object_info", cleanbin(k->second, b64));
1394 }
1395 }
1396 if ((inc.union_shards.has_snapset_missing()
1397 || inc.union_shards.has_snapset_corrupted()
1398 || inc.has_snapset_inconsistency()) &&
1399 !shard.has_snapset_missing()) {
1400 map<std::string, ceph::bufferlist>::iterator k = (const_cast<shard_info_t&>(shard)).attrs.find(SS_ATTR);
1401 assert(k != shard.attrs.end()); // Can't be missing
1402 if (!shard.has_snapset_corrupted()) {
1403 SnapSet ss;
1404 bufferlist bl;
1405 bufferlist::iterator bliter = k->second.begin();
1406 decode(ss, bliter); // Can't be corrupted
1407 f.open_object_section("snapset");
1408 ss.dump(&f);
1409 f.close_section();
1410 } else {
1411 bool b64;
1412 f.dump_string("snapset", cleanbin(k->second, b64));
1413 }
1414 }
1415 if ((inc.union_shards.has_hinfo_missing()
1416 || inc.union_shards.has_hinfo_corrupted()
1417 || inc.has_hinfo_inconsistency()) &&
1418 !shard.has_hinfo_missing()) {
1419 map<std::string, ceph::bufferlist>::iterator k = (const_cast<shard_info_t&>(shard)).attrs.find(ECUtil::get_hinfo_key());
1420 assert(k != shard.attrs.end()); // Can't be missing
1421 if (!shard.has_hinfo_corrupted()) {
1422 ECUtil::HashInfo hi;
1423 bufferlist bl;
1424 bufferlist::iterator bliter = k->second.begin();
1425 decode(hi, bliter); // Can't be corrupted
1426 f.open_object_section("hashinfo");
1427 hi.dump(&f);
1428 f.close_section();
1429 } else {
1430 bool b64;
1431 f.dump_string("hashinfo", cleanbin(k->second, b64));
1432 }
1433 }
1434 if (inc.has_attr_name_mismatch() || inc.has_attr_value_mismatch()) {
1435 f.open_array_section("attrs");
1436 for (auto kv : shard.attrs) {
1437 // System attribute handled above
1438 if (kv.first == OI_ATTR || kv.first[0] != '_')
1439 continue;
1440 f.open_object_section("attr");
1441 // Skip leading underscore since only giving user attrs
1442 f.dump_string("name", kv.first.substr(1));
1443 bool b64;
1444 f.dump_string("value", cleanbin(kv.second, b64));
1445 f.dump_bool("Base64", b64);
1446 f.close_section();
1447 }
1448 f.close_section();
1449 }
1450 }
1451
1452 static void dump_obj_errors(const obj_err_t &err, Formatter &f)
1453 {
1454 f.open_array_section("errors");
1455 if (err.has_object_info_inconsistency())
1456 f.dump_string("error", "object_info_inconsistency");
1457 if (err.has_data_digest_mismatch())
1458 f.dump_string("error", "data_digest_mismatch");
1459 if (err.has_omap_digest_mismatch())
1460 f.dump_string("error", "omap_digest_mismatch");
1461 if (err.has_size_mismatch())
1462 f.dump_string("error", "size_mismatch");
1463 if (err.has_attr_value_mismatch())
1464 f.dump_string("error", "attr_value_mismatch");
1465 if (err.has_attr_name_mismatch())
1466 f.dump_string("error", "attr_name_mismatch");
1467 if (err.has_snapset_inconsistency())
1468 f.dump_string("error", "snapset_inconsistency");
1469 if (err.has_hinfo_inconsistency())
1470 f.dump_string("error", "hinfo_inconsistency");
1471 if (err.has_size_too_large())
1472 f.dump_string("error", "size_too_large");
1473 f.close_section();
1474 }
1475
1476 static void dump_object_id(const object_id_t& object,
1477 Formatter &f)
1478 {
1479 f.dump_string("name", object.name);
1480 f.dump_string("nspace", object.nspace);
1481 f.dump_string("locator", object.locator);
1482 switch (object.snap) {
1483 case CEPH_NOSNAP:
1484 f.dump_string("snap", "head");
1485 break;
1486 case CEPH_SNAPDIR:
1487 f.dump_string("snap", "snapdir");
1488 break;
1489 default:
1490 f.dump_unsigned("snap", object.snap);
1491 break;
1492 }
1493 }
1494
1495 static void dump_inconsistent(const inconsistent_obj_t& inc,
1496 Formatter &f)
1497 {
1498 f.open_object_section("object");
1499 dump_object_id(inc.object, f);
1500 f.dump_unsigned("version", inc.version);
1501 f.close_section();
1502
1503 dump_obj_errors(inc, f);
1504 dump_errors(inc.union_shards, f, "union_shard_errors");
1505 for (const auto& shard_info : inc.shards) {
1506 shard_info_t shard = const_cast<shard_info_t&>(shard_info.second);
1507 if (shard.selected_oi) {
1508 object_info_t oi;
1509 bufferlist bl;
1510 auto k = shard.attrs.find(OI_ATTR);
1511 assert(k != shard.attrs.end()); // Can't be missing
1512 bufferlist::iterator bliter = k->second.begin();
1513 ::decode(oi, bliter); // Can't be corrupted
1514 f.open_object_section("selected_object_info");
1515 oi.dump(&f);
1516 f.close_section();
1517 break;
1518 }
1519 }
1520 f.open_array_section("shards");
1521 for (const auto& shard_info : inc.shards) {
1522 f.open_object_section("shard");
1523 auto& osd_shard = shard_info.first;
1524 f.dump_int("osd", osd_shard.osd);
1525 f.dump_bool("primary", shard_info.second.primary);
1526 auto shard = osd_shard.shard;
1527 if (shard != shard_id_t::NO_SHARD)
1528 f.dump_unsigned("shard", shard);
1529 dump_shard(shard_info.second, inc, f);
1530 f.close_section();
1531 }
1532 f.close_section();
1533 }
1534
1535 static void dump_inconsistent(const inconsistent_snapset_t& inc,
1536 Formatter &f)
1537 {
1538 dump_object_id(inc.object, f);
1539
1540 if (inc.ss_bl.length()) {
1541 SnapSet ss;
1542 bufferlist bl = inc.ss_bl;
1543 bufferlist::iterator bliter = bl.begin();
1544 decode(ss, bliter); // Can't be corrupted
1545 f.open_object_section("snapset");
1546 ss.dump(&f);
1547 f.close_section();
1548 }
1549 f.open_array_section("errors");
1550 if (inc.snapset_missing())
1551 f.dump_string("error", "snapset_missing");
1552 if (inc.snapset_corrupted())
1553 f.dump_string("error", "snapset_corrupted");
1554 if (inc.info_missing())
1555 f.dump_string("error", "info_missing");
1556 if (inc.info_corrupted())
1557 f.dump_string("error", "info_corrupted");
1558 if (inc.snapset_error())
1559 f.dump_string("error", "snapset_error");
1560 if (inc.head_mismatch())
1561 f.dump_string("error", "head_mismatch");
1562 if (inc.headless())
1563 f.dump_string("error", "headless");
1564 if (inc.size_mismatch())
1565 f.dump_string("error", "size_mismatch");
1566 if (inc.extra_clones())
1567 f.dump_string("error", "extra_clones");
1568 if (inc.clone_missing())
1569 f.dump_string("error", "clone_missing");
1570 f.close_section();
1571
1572 if (inc.extra_clones()) {
1573 f.open_array_section("extra clones");
1574 for (auto snap : inc.clones) {
1575 f.dump_unsigned("snap", snap);
1576 }
1577 f.close_section();
1578 }
1579
1580 if (inc.clone_missing()) {
1581 f.open_array_section("missing");
1582 for (auto snap : inc.missing) {
1583 f.dump_unsigned("snap", snap);
1584 }
1585 f.close_section();
1586 }
1587 }
1588
1589 // dispatch the call by type
1590 static int do_get_inconsistent(Rados& rados,
1591 const PlacementGroup& pg,
1592 const librados::object_id_t &start,
1593 unsigned max_return,
1594 AioCompletion *c,
1595 std::vector<inconsistent_obj_t>* objs,
1596 uint32_t* interval)
1597 {
1598 return rados.get_inconsistent_objects(pg, start, max_return, c,
1599 objs, interval);
1600 }
1601
1602 static int do_get_inconsistent(Rados& rados,
1603 const PlacementGroup& pg,
1604 const librados::object_id_t &start,
1605 unsigned max_return,
1606 AioCompletion *c,
1607 std::vector<inconsistent_snapset_t>* snapsets,
1608 uint32_t* interval)
1609 {
1610 return rados.get_inconsistent_snapsets(pg, start, max_return, c,
1611 snapsets, interval);
1612 }
1613
1614 template <typename T>
1615 static int do_get_inconsistent_cmd(const std::vector<const char*> &nargs,
1616 Rados& rados,
1617 Formatter& formatter)
1618 {
1619 if (nargs.size() < 2) {
1620 usage_exit();
1621 }
1622 PlacementGroup pg;
1623 int ret = 0;
1624 ret = pg.parse(nargs[1]);
1625 if (!ret) {
1626 cerr << "bad pg: " << nargs[1] << std::endl;
1627 return ret;
1628 }
1629 uint32_t interval = 0, first_interval = 0;
1630 const unsigned max_item_num = 32;
1631 bool opened = false;
1632 for (librados::object_id_t start;;) {
1633 std::vector<T> items;
1634 auto completion = librados::Rados::aio_create_completion();
1635 ret = do_get_inconsistent(rados, pg, start, max_item_num, completion,
1636 &items, &interval);
1637 completion->wait_for_safe();
1638 ret = completion->get_return_value();
1639 completion->release();
1640 if (ret < 0) {
1641 if (ret == -EAGAIN)
1642 cerr << "interval#" << interval << " expired." << std::endl;
1643 else if (ret == -ENOENT)
1644 cerr << "No scrub information available for pg " << pg << std::endl;
1645 else
1646 cerr << "Unknown error " << cpp_strerror(ret) << std::endl;
1647 break;
1648 }
1649 // It must be the same interval every time. EAGAIN would
1650 // occur if interval changes.
1651 assert(start.name.empty() || first_interval == interval);
1652 if (start.name.empty()) {
1653 first_interval = interval;
1654 formatter.open_object_section("info");
1655 formatter.dump_int("epoch", interval);
1656 formatter.open_array_section("inconsistents");
1657 opened = true;
1658 }
1659 for (auto& inc : items) {
1660 formatter.open_object_section("inconsistent");
1661 dump_inconsistent(inc, formatter);
1662 formatter.close_section();
1663 }
1664 if (items.size() < max_item_num) {
1665 formatter.close_section();
1666 break;
1667 }
1668 if (!items.empty()) {
1669 start = items.back().object;
1670 }
1671 items.clear();
1672 }
1673 if (opened) {
1674 formatter.close_section();
1675 formatter.flush(cout);
1676 }
1677 return ret;
1678 }
1679
1680 /**********************************************
1681
1682 **********************************************/
1683 static int rados_tool_common(const std::map < std::string, std::string > &opts,
1684 std::vector<const char*> &nargs)
1685 {
1686 int ret;
1687 bool create_pool = false;
1688 const char *pool_name = NULL;
1689 const char *target_pool_name = NULL;
1690 string oloc, target_oloc, nspace, target_nspace;
1691 int concurrent_ios = 16;
1692 unsigned op_size = default_op_size;
1693 unsigned object_size = 0;
1694 unsigned max_objects = 0;
1695 uint64_t obj_offset = 0;
1696 bool block_size_specified = false;
1697 int bench_write_dest = 0;
1698 bool cleanup = true;
1699 bool hints = true; // for rados bench
1700 bool no_verify = false;
1701 bool use_striper = false;
1702 bool with_clones = false;
1703 const char *snapname = NULL;
1704 snap_t snapid = CEPH_NOSNAP;
1705 std::map<std::string, std::string>::const_iterator i;
1706
1707 uint64_t min_obj_len = 0;
1708 uint64_t max_obj_len = 0;
1709 uint64_t min_op_len = 0;
1710 uint64_t max_op_len = 0;
1711 uint64_t max_ops = 0;
1712 uint64_t max_backlog = 0;
1713 uint64_t target_throughput = 0;
1714 int64_t read_percent = -1;
1715 uint64_t num_objs = 0;
1716 int run_length = 0;
1717
1718 bool show_time = false;
1719 bool wildcard = false;
1720
1721 std::string run_name;
1722 std::string prefix;
1723 bool forcefull = false;
1724 Formatter *formatter = NULL;
1725 bool pretty_format = false;
1726 const char *output = NULL;
1727 bool omap_key_valid = false;
1728 std::string omap_key;
1729 std::string omap_key_pretty;
1730
1731 Rados rados;
1732 IoCtx io_ctx;
1733 RadosStriper striper;
1734
1735 i = opts.find("create");
1736 if (i != opts.end()) {
1737 create_pool = true;
1738 }
1739 i = opts.find("pool");
1740 if (i != opts.end()) {
1741 pool_name = i->second.c_str();
1742 }
1743 i = opts.find("target_pool");
1744 if (i != opts.end()) {
1745 target_pool_name = i->second.c_str();
1746 }
1747 i = opts.find("object_locator");
1748 if (i != opts.end()) {
1749 oloc = i->second;
1750 }
1751 i = opts.find("target_locator");
1752 if (i != opts.end()) {
1753 target_oloc = i->second;
1754 }
1755 i = opts.find("target_nspace");
1756 if (i != opts.end()) {
1757 target_nspace = i->second;
1758 }
1759 i = opts.find("concurrent-ios");
1760 if (i != opts.end()) {
1761 if (rados_sistrtoll(i, &concurrent_ios)) {
1762 return -EINVAL;
1763 }
1764 }
1765 i = opts.find("run-name");
1766 if (i != opts.end()) {
1767 run_name = i->second;
1768 }
1769
1770 i = opts.find("force-full");
1771 if (i != opts.end()) {
1772 forcefull = true;
1773 }
1774 i = opts.find("prefix");
1775 if (i != opts.end()) {
1776 prefix = i->second;
1777 }
1778 i = opts.find("block-size");
1779 if (i != opts.end()) {
1780 if (rados_sistrtoll(i, &op_size)) {
1781 return -EINVAL;
1782 }
1783 block_size_specified = true;
1784 }
1785 i = opts.find("object-size");
1786 if (i != opts.end()) {
1787 if (rados_sistrtoll(i, &object_size)) {
1788 return -EINVAL;
1789 }
1790 block_size_specified = true;
1791 }
1792 i = opts.find("max-objects");
1793 if (i != opts.end()) {
1794 if (rados_sistrtoll(i, &max_objects)) {
1795 return -EINVAL;
1796 }
1797 }
1798 i = opts.find("offset");
1799 if (i != opts.end()) {
1800 if (rados_sistrtoll(i, &obj_offset)) {
1801 return -EINVAL;
1802 }
1803 }
1804 i = opts.find("snap");
1805 if (i != opts.end()) {
1806 snapname = i->second.c_str();
1807 }
1808 i = opts.find("snapid");
1809 if (i != opts.end()) {
1810 if (rados_sistrtoll(i, &snapid)) {
1811 return -EINVAL;
1812 }
1813 }
1814 i = opts.find("min-object-size");
1815 if (i != opts.end()) {
1816 if (rados_sistrtoll(i, &min_obj_len)) {
1817 return -EINVAL;
1818 }
1819 }
1820 i = opts.find("max-object-size");
1821 if (i != opts.end()) {
1822 if (rados_sistrtoll(i, &max_obj_len)) {
1823 return -EINVAL;
1824 }
1825 }
1826 i = opts.find("min-op-len");
1827 if (i != opts.end()) {
1828 if (rados_sistrtoll(i, &min_op_len)) {
1829 return -EINVAL;
1830 }
1831 }
1832 i = opts.find("max-op-len");
1833 if (i != opts.end()) {
1834 if (rados_sistrtoll(i, &max_op_len)) {
1835 return -EINVAL;
1836 }
1837 }
1838 i = opts.find("max-ops");
1839 if (i != opts.end()) {
1840 if (rados_sistrtoll(i, &max_ops)) {
1841 return -EINVAL;
1842 }
1843 }
1844 i = opts.find("max-backlog");
1845 if (i != opts.end()) {
1846 if (rados_sistrtoll(i, &max_backlog)) {
1847 return -EINVAL;
1848 }
1849 }
1850 i = opts.find("target-throughput");
1851 if (i != opts.end()) {
1852 if (rados_sistrtoll(i, &target_throughput)) {
1853 return -EINVAL;
1854 }
1855 }
1856 i = opts.find("read-percent");
1857 if (i != opts.end()) {
1858 if (rados_sistrtoll(i, &read_percent)) {
1859 return -EINVAL;
1860 }
1861 }
1862 i = opts.find("num-objects");
1863 if (i != opts.end()) {
1864 if (rados_sistrtoll(i, &num_objs)) {
1865 return -EINVAL;
1866 }
1867 }
1868 i = opts.find("run-length");
1869 if (i != opts.end()) {
1870 if (rados_sistrtoll(i, &run_length)) {
1871 return -EINVAL;
1872 }
1873 }
1874 i = opts.find("show-time");
1875 if (i != opts.end()) {
1876 show_time = true;
1877 }
1878 i = opts.find("no-cleanup");
1879 if (i != opts.end()) {
1880 cleanup = false;
1881 }
1882 i = opts.find("no-hints");
1883 if (i != opts.end()) {
1884 hints = false;
1885 }
1886 i = opts.find("pretty-format");
1887 if (i != opts.end()) {
1888 pretty_format = true;
1889 }
1890 i = opts.find("format");
1891 if (i != opts.end()) {
1892 const char *format = i->second.c_str();
1893 formatter = Formatter::create(format);
1894 if (!formatter) {
1895 cerr << "unrecognized format: " << format << std::endl;
1896 return -EINVAL;
1897 }
1898 }
1899 i = opts.find("namespace");
1900 if (i != opts.end()) {
1901 nspace = i->second;
1902 }
1903 i = opts.find("no-verify");
1904 if (i != opts.end()) {
1905 no_verify = true;
1906 }
1907 i = opts.find("output");
1908 if (i != opts.end()) {
1909 output = i->second.c_str();
1910 }
1911 i = opts.find("write-dest-obj");
1912 if (i != opts.end()) {
1913 bench_write_dest |= static_cast<int>(OP_WRITE_DEST_OBJ);
1914 }
1915 i = opts.find("write-dest-omap");
1916 if (i != opts.end()) {
1917 bench_write_dest |= static_cast<int>(OP_WRITE_DEST_OMAP);
1918 }
1919 i = opts.find("write-dest-xattr");
1920 if (i != opts.end()) {
1921 bench_write_dest |= static_cast<int>(OP_WRITE_DEST_XATTR);
1922 }
1923 i = opts.find("with-clones");
1924 if (i != opts.end()) {
1925 with_clones = true;
1926 }
1927 i = opts.find("omap-key-file");
1928 if (i != opts.end()) {
1929 string err;
1930 bufferlist indata;
1931 ret = indata.read_file(i->second.c_str(), &err);
1932 if (ret < 0) {
1933 cerr << err << std::endl;
1934 return 1;
1935 }
1936
1937 omap_key_valid = true;
1938 omap_key = std::string(indata.c_str(), indata.length());
1939 omap_key_pretty = omap_key;
1940 if (std::find_if_not(omap_key.begin(), omap_key.end(),
1941 (int (*)(int))isprint) != omap_key.end()) {
1942 omap_key_pretty = "(binary key)";
1943 }
1944 }
1945
1946 // open rados
1947 ret = rados.init_with_context(g_ceph_context);
1948 if (ret < 0) {
1949 cerr << "couldn't initialize rados: " << cpp_strerror(ret) << std::endl;
1950 return 1;
1951 }
1952
1953 ret = rados.connect();
1954 if (ret) {
1955 cerr << "couldn't connect to cluster: " << cpp_strerror(ret) << std::endl;
1956 return 1;
1957 }
1958
1959 if (create_pool && !pool_name) {
1960 cerr << "--create-pool requested but pool_name was not specified!" << std::endl;
1961 usage(cerr);
1962 return 1;
1963 }
1964
1965 if (create_pool) {
1966 ret = rados.pool_create(pool_name, 0, 0);
1967 if (ret < 0) {
1968 cerr << "error creating pool " << pool_name << ": "
1969 << cpp_strerror(ret) << std::endl;
1970 return 1;
1971 }
1972 }
1973
1974 i = opts.find("pgid");
1975 boost::optional<pg_t> pgid(i != opts.end(), pg_t());
1976 if (pgid && (!pgid->parse(i->second.c_str()) || (pool_name && rados.pool_lookup(pool_name) != pgid->pool()))) {
1977 cerr << "invalid pgid" << std::endl;
1978 return 1;
1979 }
1980
1981 // open io context.
1982 if (pool_name || pgid) {
1983 ret = pool_name ? rados.ioctx_create(pool_name, io_ctx) : rados.ioctx_create2(pgid->pool(), io_ctx);
1984 if (ret < 0) {
1985 cerr << "error opening pool "
1986 << (pool_name ? pool_name : std::string("with id ") + std::to_string(pgid->pool())) << ": "
1987 << cpp_strerror(ret) << std::endl;
1988 return 1;
1989 }
1990
1991 // align op_size
1992 {
1993 bool requires;
1994 ret = io_ctx.pool_requires_alignment2(&requires);
1995 if (ret < 0) {
1996 cerr << "error checking pool alignment requirement"
1997 << cpp_strerror(ret) << std::endl;
1998 return 1;
1999 }
2000
2001 if (requires) {
2002 uint64_t align = 0;
2003 ret = io_ctx.pool_required_alignment2(&align);
2004 if (ret < 0) {
2005 cerr << "error getting pool alignment"
2006 << cpp_strerror(ret) << std::endl;
2007 return 1;
2008 }
2009
2010 const uint64_t prev_op_size = op_size;
2011 op_size = uint64_t((op_size + align - 1) / align) * align;
2012 // Warn: if user specified and it was rounded
2013 if (prev_op_size != default_op_size && prev_op_size != op_size)
2014 cerr << "INFO: op_size has been rounded to " << op_size << std::endl;
2015 }
2016 }
2017
2018 // create striper interface
2019 if (opts.find("striper") != opts.end()) {
2020 ret = RadosStriper::striper_create(io_ctx, &striper);
2021 if (0 != ret) {
2022 cerr << "error opening pool " << pool_name << " with striper interface: "
2023 << cpp_strerror(ret) << std::endl;
2024 return 1;
2025 }
2026 use_striper = true;
2027 }
2028 }
2029
2030 // snapname?
2031 if (snapname) {
2032 if (!pool_name) {
2033 cerr << "pool name must be specified with --snap" << std::endl;
2034 return 1;
2035 }
2036 ret = io_ctx.snap_lookup(snapname, &snapid);
2037 if (ret < 0) {
2038 cerr << "error looking up snap '" << snapname << "': " << cpp_strerror(ret) << std::endl;
2039 return 1;
2040 }
2041 }
2042 if (oloc.size()) {
2043 if (!pool_name) {
2044 cerr << "pool name must be specified with --object_locator" << std::endl;
2045 return 1;
2046 }
2047 io_ctx.locator_set_key(oloc);
2048 }
2049 // Use namespace from command line if specified
2050 if (opts.find("namespace") != opts.end()) {
2051 if (!pool_name) {
2052 cerr << "pool name must be specified with --namespace" << std::endl;
2053 return 1;
2054 }
2055 io_ctx.set_namespace(nspace);
2056 // Use wildcard if --all specified and --default NOT specified
2057 } else if (opts.find("all") != opts.end() && opts.find("default") == opts.end()) {
2058 // Only the ls should ever set namespace to special value
2059 wildcard = true;
2060 }
2061 if (snapid != CEPH_NOSNAP) {
2062 if (!pool_name) {
2063 cerr << "pool name must be specified with --snapid" << std::endl;
2064 return 1;
2065 }
2066 string name;
2067 ret = io_ctx.snap_get_name(snapid, &name);
2068 if (ret < 0) {
2069 cerr << "snapid " << snapid << " doesn't exist in pool "
2070 << io_ctx.get_pool_name() << std::endl;
2071 return 1;
2072 }
2073 io_ctx.snap_set_read(snapid);
2074 cout << "selected snap " << snapid << " '" << name << "'" << std::endl;
2075 }
2076
2077 assert(!nargs.empty());
2078
2079 // list pools?
2080 if (strcmp(nargs[0], "lspools") == 0) {
2081 list<string> vec;
2082 ret = rados.pool_list(vec);
2083 if (ret < 0) {
2084 cerr << "error listing pools: " << cpp_strerror(ret) << std::endl;
2085 return 1;
2086 }
2087 for (list<string>::iterator i = vec.begin(); i != vec.end(); ++i)
2088 cout << *i << std::endl;
2089 }
2090 else if (strcmp(nargs[0], "df") == 0) {
2091 // pools
2092 list<string> vec;
2093
2094 if (!pool_name) {
2095 ret = rados.pool_list(vec);
2096 if (ret < 0) {
2097 cerr << "error listing pools: " << cpp_strerror(ret) << std::endl;
2098 return 1;
2099 }
2100 } else {
2101 vec.push_back(pool_name);
2102 }
2103
2104 map<string,librados::pool_stat_t> stats;
2105 ret = rados.get_pool_stats(vec, stats);
2106 if (ret < 0) {
2107 cerr << "error fetching pool stats: " << cpp_strerror(ret) << std::endl;
2108 return 1;
2109 }
2110
2111 TextTable tab;
2112
2113 if (!formatter) {
2114 tab.define_column("POOL_NAME", TextTable::LEFT, TextTable::LEFT);
2115 tab.define_column("USED", TextTable::LEFT, TextTable::RIGHT);
2116 tab.define_column("OBJECTS", TextTable::LEFT, TextTable::RIGHT);
2117 tab.define_column("CLONES", TextTable::LEFT, TextTable::RIGHT);
2118 tab.define_column("COPIES", TextTable::LEFT, TextTable::RIGHT);
2119 tab.define_column("MISSING_ON_PRIMARY", TextTable::LEFT, TextTable::RIGHT);
2120 tab.define_column("UNFOUND", TextTable::LEFT, TextTable::RIGHT);
2121 tab.define_column("DEGRADED", TextTable::LEFT, TextTable::RIGHT);
2122 tab.define_column("RD_OPS", TextTable::LEFT, TextTable::RIGHT);
2123 tab.define_column("RD", TextTable::LEFT, TextTable::RIGHT);
2124 tab.define_column("WR_OPS", TextTable::LEFT, TextTable::RIGHT);
2125 tab.define_column("WR", TextTable::LEFT, TextTable::RIGHT);
2126 } else {
2127 formatter->open_object_section("stats");
2128 formatter->open_array_section("pools");
2129 }
2130 for (map<string,librados::pool_stat_t>::iterator i = stats.begin();
2131 i != stats.end();
2132 ++i) {
2133 const char *pool_name = i->first.c_str();
2134 librados::pool_stat_t& s = i->second;
2135 if (!formatter) {
2136 tab << pool_name
2137 << byte_u_t(s.num_bytes)
2138 << s.num_objects
2139 << s.num_object_clones
2140 << s.num_object_copies
2141 << s.num_objects_missing_on_primary
2142 << s.num_objects_unfound
2143 << s.num_objects_degraded
2144 << s.num_rd
2145 << byte_u_t(s.num_rd_kb << 10)
2146 << s.num_wr
2147 << byte_u_t(s.num_wr_kb << 10)
2148 << TextTable::endrow;
2149 } else {
2150 formatter->open_object_section("pool");
2151 int64_t pool_id = rados.pool_lookup(pool_name);
2152 formatter->dump_string("name", pool_name);
2153 if (pool_id >= 0)
2154 formatter->dump_int("id", pool_id);
2155 else
2156 cerr << "ERROR: lookup_pg_pool_name for name=" << pool_name
2157 << " returned " << pool_id << std::endl;
2158 formatter->dump_int("size_bytes",s.num_bytes);
2159 formatter->dump_int("size_kb", s.num_kb);
2160 formatter->dump_int("num_objects", s.num_objects);
2161 formatter->dump_int("num_object_clones", s.num_object_clones);
2162 formatter->dump_int("num_object_copies", s.num_object_copies);
2163 formatter->dump_int("num_objects_missing_on_primary", s.num_objects_missing_on_primary);
2164 formatter->dump_int("num_objects_unfound", s.num_objects_unfound);
2165 formatter->dump_int("num_objects_degraded", s.num_objects_degraded);
2166 formatter->dump_int("read_ops", s.num_rd);
2167 formatter->dump_int("read_bytes", s.num_rd_kb * 1024ull);
2168 formatter->dump_int("write_ops", s.num_wr);
2169 formatter->dump_int("write_bytes", s.num_wr_kb * 1024ull);
2170 formatter->close_section();
2171 }
2172 }
2173
2174 if (!formatter) {
2175 cout << tab;
2176 }
2177
2178 // total
2179 cluster_stat_t tstats;
2180 ret = rados.cluster_stat(tstats);
2181 if (ret < 0) {
2182 cerr << "error getting total cluster usage: " << cpp_strerror(ret) << std::endl;
2183 return 1;
2184 }
2185 if (!formatter) {
2186 cout << std::endl;
2187 cout << "total_objects " << tstats.num_objects
2188 << std::endl;
2189 cout << "total_used " << byte_u_t(tstats.kb_used << 10)
2190 << std::endl;
2191 cout << "total_avail " << byte_u_t(tstats.kb_avail << 10)
2192 << std::endl;
2193 cout << "total_space " << byte_u_t(tstats.kb << 10)
2194 << std::endl;
2195 } else {
2196 formatter->close_section();
2197 formatter->dump_int("total_objects", tstats.num_objects);
2198 formatter->dump_int("total_used", tstats.kb_used);
2199 formatter->dump_int("total_avail", tstats.kb_avail);
2200 formatter->dump_int("total_space", tstats.kb);
2201 formatter->close_section();
2202 formatter->flush(cout);
2203 }
2204 }
2205
2206 else if (strcmp(nargs[0], "ls") == 0) {
2207 if (!pool_name && !pgid) {
2208 cerr << "either pool name or pg id needs to be specified" << std::endl;
2209 return 1;
2210 }
2211
2212 if (wildcard)
2213 io_ctx.set_namespace(all_nspaces);
2214 bool use_stdout = (nargs.size() < 2) || (strcmp(nargs[1], "-") == 0);
2215 ostream *outstream;
2216 if(use_stdout)
2217 outstream = &cout;
2218 else
2219 outstream = new ofstream(nargs[1]);
2220
2221 {
2222 if (formatter)
2223 formatter->open_array_section("objects");
2224 try {
2225 librados::NObjectIterator i = pgid ? io_ctx.nobjects_begin(pgid->ps()) : io_ctx.nobjects_begin();
2226 librados::NObjectIterator i_end = io_ctx.nobjects_end();
2227 for (; i != i_end; ++i) {
2228 if (use_striper) {
2229 // in case of --striper option, we only list striped
2230 // objects, so we only display the first object of
2231 // each, without its suffix '.000...000'
2232 size_t l = i->get_oid().length();
2233 if (l <= 17 ||
2234 (0 != i->get_oid().compare(l-17, 17,".0000000000000000"))) continue;
2235 }
2236 if (pgid) {
2237 uint32_t ps;
2238 if (io_ctx.get_object_pg_hash_position2(i->get_oid(), &ps) || pgid->ps() != ps)
2239 break;
2240 }
2241 if (!formatter) {
2242 // Only include namespace in output when wildcard specified
2243 if (wildcard)
2244 *outstream << i->get_nspace() << "\t";
2245 if (use_striper) {
2246 *outstream << i->get_oid().substr(0, i->get_oid().length()-17);
2247 } else {
2248 *outstream << i->get_oid();
2249 }
2250 if (i->get_locator().size())
2251 *outstream << "\t" << i->get_locator();
2252 *outstream << std::endl;
2253 } else {
2254 formatter->open_object_section("object");
2255 formatter->dump_string("namespace", i->get_nspace());
2256 if (use_striper) {
2257 formatter->dump_string("name", i->get_oid().substr(0, i->get_oid().length()-17));
2258 } else {
2259 formatter->dump_string("name", i->get_oid());
2260 }
2261 if (i->get_locator().size())
2262 formatter->dump_string("locator", i->get_locator());
2263 formatter->close_section(); //object
2264 }
2265 }
2266 }
2267 catch (const std::exception& e) {
2268 cerr << e.what() << std::endl;
2269 return 1;
2270 }
2271 }
2272 if (formatter) {
2273 formatter->close_section(); //objects
2274 formatter->flush(*outstream);
2275 if (pretty_format)
2276 *outstream << std::endl;
2277 formatter->flush(*outstream);
2278 }
2279 if (!stdout)
2280 delete outstream;
2281 }
2282 else if (strcmp(nargs[0], "chown") == 0) {
2283 if (!pool_name || nargs.size() < 2)
2284 usage_exit();
2285
2286 char* endptr = NULL;
2287 uint64_t new_auid = strtol(nargs[1], &endptr, 10);
2288 if (*endptr) {
2289 cerr << "Invalid value for new-auid: '" << nargs[1] << "'" << std::endl;
2290 ret = -1;
2291 goto out;
2292 }
2293 ret = io_ctx.set_auid(new_auid);
2294 if (ret < 0) {
2295 cerr << "error changing auid on pool " << io_ctx.get_pool_name() << ':'
2296 << cpp_strerror(ret) << std::endl;
2297 } else cerr << "changed auid on pool " << io_ctx.get_pool_name()
2298 << " to " << new_auid << std::endl;
2299 }
2300 else if (strcmp(nargs[0], "mapext") == 0) {
2301 if (!pool_name || nargs.size() < 2) {
2302 usage(cerr);
2303 return 1;
2304 }
2305 string oid(nargs[1]);
2306 std::map<uint64_t,uint64_t> m;
2307 ret = io_ctx.mapext(oid, 0, -1, m);
2308 if (ret < 0) {
2309 cerr << "mapext error on " << pool_name << "/" << oid << ": " << cpp_strerror(ret) << std::endl;
2310 return 1;
2311 }
2312 std::map<uint64_t,uint64_t>::iterator iter;
2313 for (iter = m.begin(); iter != m.end(); ++iter) {
2314 cout << hex << iter->first << "\t" << iter->second << dec << std::endl;
2315 }
2316 }
2317 else if (strcmp(nargs[0], "stat") == 0) {
2318 if (!pool_name || nargs.size() < 2) {
2319 usage(cerr);
2320 return 1;
2321 }
2322 string oid(nargs[1]);
2323 uint64_t size;
2324 time_t mtime;
2325 if (use_striper) {
2326 ret = striper.stat(oid, &size, &mtime);
2327 } else {
2328 ret = io_ctx.stat(oid, &size, &mtime);
2329 }
2330 if (ret < 0) {
2331 cerr << " error stat-ing " << pool_name << "/" << oid << ": "
2332 << cpp_strerror(ret) << std::endl;
2333 return 1;
2334 } else {
2335 utime_t t(mtime, 0);
2336 cout << pool_name << "/" << oid
2337 << " mtime " << t << ", size " << size << std::endl;
2338 }
2339 }
2340 else if (strcmp(nargs[0], "get") == 0) {
2341 if (!pool_name || nargs.size() < 3) {
2342 usage(cerr);
2343 return 1;
2344 }
2345 ret = do_get(io_ctx, striper, nargs[1], nargs[2], op_size, use_striper);
2346 if (ret < 0) {
2347 cerr << "error getting " << pool_name << "/" << nargs[1] << ": " << cpp_strerror(ret) << std::endl;
2348 return 1;
2349 }
2350 }
2351 else if (strcmp(nargs[0], "put") == 0) {
2352 if (!pool_name || nargs.size() < 3) {
2353 usage(cerr);
2354 return 1;
2355 }
2356 ret = do_put(io_ctx, striper, nargs[1], nargs[2], op_size, obj_offset, use_striper);
2357 if (ret < 0) {
2358 cerr << "error putting " << pool_name << "/" << nargs[1] << ": " << cpp_strerror(ret) << std::endl;
2359 return 1;
2360 }
2361 }
2362 else if (strcmp(nargs[0], "append") == 0) {
2363 if (!pool_name || nargs.size() < 3) {
2364 usage(cerr);
2365 return 1;
2366 }
2367 ret = do_append(io_ctx, striper, nargs[1], nargs[2], op_size, use_striper);
2368 if (ret < 0) {
2369 cerr << "error appending " << pool_name << "/" << nargs[1] << ": " << cpp_strerror(ret) << std::endl;
2370 return 1;
2371 }
2372 }
2373 else if (strcmp(nargs[0], "truncate") == 0) {
2374 if (!pool_name || nargs.size() < 3) {
2375 usage(cerr);
2376 return 1;
2377 }
2378
2379 string oid(nargs[1]);
2380 char* endptr = NULL;
2381 long size = strtoll(nargs[2], &endptr, 10);
2382 if (*endptr) {
2383 cerr << "Invalid value for size: '" << nargs[2] << "'" << std::endl;
2384 ret = -EINVAL;
2385 return 1;
2386 }
2387 if (size < 0) {
2388 cerr << "error, cannot truncate to negative value" << std::endl;
2389 usage(cerr);
2390 return 1;
2391 }
2392 if (use_striper) {
2393 ret = striper.trunc(oid, size);
2394 } else {
2395 ret = io_ctx.trunc(oid, size);
2396 }
2397 if (ret < 0) {
2398 cerr << "error truncating oid "
2399 << oid << " to " << size << ": "
2400 << cpp_strerror(ret) << std::endl;
2401 } else {
2402 ret = 0;
2403 }
2404 }
2405 else if (strcmp(nargs[0], "setxattr") == 0) {
2406 if (!pool_name || nargs.size() < 3 || nargs.size() > 4) {
2407 usage(cerr);
2408 return 1;
2409 }
2410
2411 string oid(nargs[1]);
2412 string attr_name(nargs[2]);
2413 bufferlist bl;
2414 if (nargs.size() == 4) {
2415 string attr_val(nargs[3]);
2416 bl.append(attr_val.c_str(), attr_val.length());
2417 } else {
2418 do {
2419 ret = bl.read_fd(STDIN_FILENO, 1024); // from stdin
2420 if (ret < 0)
2421 return 1;
2422 } while (ret > 0);
2423 }
2424
2425 if (use_striper) {
2426 ret = striper.setxattr(oid, attr_name.c_str(), bl);
2427 } else {
2428 ret = io_ctx.setxattr(oid, attr_name.c_str(), bl);
2429 }
2430 if (ret < 0) {
2431 cerr << "error setting xattr " << pool_name << "/" << oid << "/" << attr_name << ": " << cpp_strerror(ret) << std::endl;
2432 return 1;
2433 }
2434 else
2435 ret = 0;
2436 }
2437 else if (strcmp(nargs[0], "getxattr") == 0) {
2438 if (!pool_name || nargs.size() < 3) {
2439 usage(cerr);
2440 return 1;
2441 }
2442
2443 string oid(nargs[1]);
2444 string attr_name(nargs[2]);
2445
2446 bufferlist bl;
2447 if (use_striper) {
2448 ret = striper.getxattr(oid, attr_name.c_str(), bl);
2449 } else {
2450 ret = io_ctx.getxattr(oid, attr_name.c_str(), bl);
2451 }
2452 if (ret < 0) {
2453 cerr << "error getting xattr " << pool_name << "/" << oid << "/" << attr_name << ": " << cpp_strerror(ret) << std::endl;
2454 return 1;
2455 }
2456 else
2457 ret = 0;
2458 string s(bl.c_str(), bl.length());
2459 cout << s;
2460 } else if (strcmp(nargs[0], "rmxattr") == 0) {
2461 if (!pool_name || nargs.size() < 3) {
2462 usage(cerr);
2463 return 1;
2464 }
2465
2466 string oid(nargs[1]);
2467 string attr_name(nargs[2]);
2468
2469 if (use_striper) {
2470 ret = striper.rmxattr(oid, attr_name.c_str());
2471 } else {
2472 ret = io_ctx.rmxattr(oid, attr_name.c_str());
2473 }
2474 if (ret < 0) {
2475 cerr << "error removing xattr " << pool_name << "/" << oid << "/" << attr_name << ": " << cpp_strerror(ret) << std::endl;
2476 return 1;
2477 }
2478 } else if (strcmp(nargs[0], "listxattr") == 0) {
2479 if (!pool_name || nargs.size() < 2) {
2480 usage(cerr);
2481 return 1;
2482 }
2483
2484 string oid(nargs[1]);
2485 map<std::string, bufferlist> attrset;
2486 bufferlist bl;
2487 if (use_striper) {
2488 ret = striper.getxattrs(oid, attrset);
2489 } else {
2490 ret = io_ctx.getxattrs(oid, attrset);
2491 }
2492 if (ret < 0) {
2493 cerr << "error getting xattr set " << pool_name << "/" << oid << ": " << cpp_strerror(ret) << std::endl;
2494 return 1;
2495 }
2496
2497 for (map<std::string, bufferlist>::iterator iter = attrset.begin();
2498 iter != attrset.end(); ++iter) {
2499 cout << iter->first << std::endl;
2500 }
2501 } else if (strcmp(nargs[0], "getomapheader") == 0) {
2502 if (!pool_name || nargs.size() < 2) {
2503 usage(cerr);
2504 return 1;
2505 }
2506
2507 string oid(nargs[1]);
2508 string outfile;
2509 if (nargs.size() >= 3) {
2510 outfile = nargs[2];
2511 }
2512
2513 bufferlist header;
2514 ret = io_ctx.omap_get_header(oid, &header);
2515 if (ret < 0) {
2516 cerr << "error getting omap header " << pool_name << "/" << oid
2517 << ": " << cpp_strerror(ret) << std::endl;
2518 return 1;
2519 } else {
2520 if (!outfile.empty()) {
2521 cerr << "Writing to " << outfile << std::endl;
2522 dump_data(outfile, header);
2523 } else {
2524 cout << "header (" << header.length() << " bytes) :\n";
2525 header.hexdump(cout);
2526 cout << std::endl;
2527 }
2528 ret = 0;
2529 }
2530 } else if (strcmp(nargs[0], "setomapheader") == 0) {
2531 if (!pool_name || nargs.size() < 3) {
2532 usage(cerr);
2533 return 1;
2534 }
2535
2536 string oid(nargs[1]);
2537 string val(nargs[2]);
2538
2539 bufferlist bl;
2540 bl.append(val);
2541
2542 ret = io_ctx.omap_set_header(oid, bl);
2543 if (ret < 0) {
2544 cerr << "error setting omap value " << pool_name << "/" << oid
2545 << ": " << cpp_strerror(ret) << std::endl;
2546 return 1;
2547 } else {
2548 ret = 0;
2549 }
2550 } else if (strcmp(nargs[0], "setomapval") == 0) {
2551 uint32_t min_args = (omap_key_valid ? 2 : 3);
2552 if (!pool_name || nargs.size() < min_args || nargs.size() > min_args + 1) {
2553 usage(cerr);
2554 return 1;
2555 }
2556
2557 string oid(nargs[1]);
2558 if (!omap_key_valid) {
2559 omap_key = nargs[2];
2560 omap_key_pretty = omap_key;
2561 }
2562
2563 bufferlist bl;
2564 if (nargs.size() > min_args) {
2565 string val(nargs[min_args]);
2566 bl.append(val);
2567 } else {
2568 do {
2569 ret = bl.read_fd(STDIN_FILENO, 1024); // from stdin
2570 if (ret < 0) {
2571 return 1;
2572 }
2573 } while (ret > 0);
2574 }
2575
2576 map<string, bufferlist> values;
2577 values[omap_key] = bl;
2578
2579 ret = io_ctx.omap_set(oid, values);
2580 if (ret < 0) {
2581 cerr << "error setting omap value " << pool_name << "/" << oid << "/"
2582 << omap_key_pretty << ": " << cpp_strerror(ret) << std::endl;
2583 return 1;
2584 } else {
2585 ret = 0;
2586 }
2587 } else if (strcmp(nargs[0], "getomapval") == 0) {
2588 uint32_t min_args = (omap_key_valid ? 2 : 3);
2589 if (!pool_name || nargs.size() < min_args || nargs.size() > min_args + 1) {
2590 usage(cerr);
2591 return 1;
2592 }
2593
2594 string oid(nargs[1]);
2595 if (!omap_key_valid) {
2596 omap_key = nargs[2];
2597 omap_key_pretty = omap_key;
2598 }
2599
2600 set<string> keys;
2601 keys.insert(omap_key);
2602
2603 std::string outfile;
2604 if (nargs.size() > min_args) {
2605 outfile = nargs[min_args];
2606 }
2607
2608 map<string, bufferlist> values;
2609 ret = io_ctx.omap_get_vals_by_keys(oid, keys, &values);
2610 if (ret < 0) {
2611 cerr << "error getting omap value " << pool_name << "/" << oid << "/"
2612 << omap_key_pretty << ": " << cpp_strerror(ret) << std::endl;
2613 return 1;
2614 } else {
2615 ret = 0;
2616 }
2617
2618 if (values.size() && values.begin()->first == omap_key) {
2619 if (!outfile.empty()) {
2620 cerr << "Writing to " << outfile << std::endl;
2621 dump_data(outfile, values.begin()->second);
2622 } else {
2623 cout << "value (" << values.begin()->second.length() << " bytes) :\n";
2624 values.begin()->second.hexdump(cout);
2625 cout << std::endl;
2626 }
2627 ret = 0;
2628 } else {
2629 cout << "No such key: " << pool_name << "/" << oid << "/"
2630 << omap_key_pretty << std::endl;
2631 return 1;
2632 }
2633 } else if (strcmp(nargs[0], "rmomapkey") == 0) {
2634 uint32_t num_args = (omap_key_valid ? 2 : 3);
2635 if (!pool_name || nargs.size() != num_args) {
2636 usage(cerr);
2637 return 1;
2638 }
2639
2640 string oid(nargs[1]);
2641 if (!omap_key_valid) {
2642 omap_key = nargs[2];
2643 omap_key_pretty = omap_key;
2644 }
2645 set<string> keys;
2646 keys.insert(omap_key);
2647
2648 ret = io_ctx.omap_rm_keys(oid, keys);
2649 if (ret < 0) {
2650 cerr << "error removing omap key " << pool_name << "/" << oid << "/"
2651 << omap_key_pretty << ": " << cpp_strerror(ret) << std::endl;
2652 return 1;
2653 } else {
2654 ret = 0;
2655 }
2656 } else if (strcmp(nargs[0], "listomapvals") == 0) {
2657 if (!pool_name || nargs.size() < 2) {
2658 usage(cerr);
2659 return 1;
2660 }
2661
2662 string oid(nargs[1]);
2663 string last_read = "";
2664 int MAX_READ = 512;
2665 do {
2666 map<string, bufferlist> values;
2667 ret = io_ctx.omap_get_vals(oid, last_read, MAX_READ, &values);
2668 if (ret < 0) {
2669 cerr << "error getting omap keys " << pool_name << "/" << oid << ": "
2670 << cpp_strerror(ret) << std::endl;
2671 return 1;
2672 }
2673 ret = values.size();
2674 for (map<string, bufferlist>::const_iterator it = values.begin();
2675 it != values.end(); ++it) {
2676 last_read = it->first;
2677 // dump key in hex if it contains nonprintable characters
2678 if (std::count_if(it->first.begin(), it->first.end(),
2679 (int (*)(int))isprint) < (int)it->first.length()) {
2680 cout << "key (" << it->first.length() << " bytes):\n";
2681 bufferlist keybl;
2682 keybl.append(it->first);
2683 keybl.hexdump(cout);
2684 } else {
2685 cout << it->first;
2686 }
2687 cout << std::endl;
2688 cout << "value (" << it->second.length() << " bytes) :\n";
2689 it->second.hexdump(cout);
2690 cout << std::endl;
2691 }
2692 } while (ret == MAX_READ);
2693 ret = 0;
2694 }
2695 else if (strcmp(nargs[0], "cp") == 0) {
2696 if (!pool_name) {
2697 usage(cerr);
2698 return 1;
2699 }
2700
2701 if (nargs.size() < 2 || nargs.size() > 3) {
2702 usage(cerr);
2703 return 1;
2704 }
2705
2706 const char *target = target_pool_name;
2707 if (!target)
2708 target = pool_name;
2709
2710 const char *target_obj;
2711 if (nargs.size() < 3) {
2712 if (strcmp(target, pool_name) == 0) {
2713 cerr << "cannot copy object into itself" << std::endl;
2714 return 1;
2715 }
2716 target_obj = nargs[1];
2717 } else {
2718 target_obj = nargs[2];
2719 }
2720
2721 // open io context.
2722 IoCtx target_ctx;
2723 ret = rados.ioctx_create(target, target_ctx);
2724 if (ret < 0) {
2725 cerr << "error opening target pool " << target << ": "
2726 << cpp_strerror(ret) << std::endl;
2727 return 1;
2728 }
2729 if (target_oloc.size()) {
2730 target_ctx.locator_set_key(target_oloc);
2731 }
2732 if (target_nspace.size()) {
2733 target_ctx.set_namespace(target_nspace);
2734 }
2735
2736 ret = do_copy(io_ctx, nargs[1], target_ctx, target_obj);
2737 if (ret < 0) {
2738 cerr << "error copying " << pool_name << "/" << nargs[1] << " => " << target << "/" << target_obj << ": " << cpp_strerror(ret) << std::endl;
2739 return 1;
2740 }
2741 } else if (strcmp(nargs[0], "rm") == 0) {
2742 if (!pool_name || nargs.size() < 2) {
2743 usage(cerr);
2744 return 1;
2745 }
2746 vector<const char *>::iterator iter = nargs.begin();
2747 ++iter;
2748 for (; iter != nargs.end(); ++iter) {
2749 const string & oid = *iter;
2750 if (use_striper) {
2751 if (forcefull) {
2752 ret = striper.remove(oid, (CEPH_OSD_FLAG_FULL_FORCE | CEPH_OSD_FLAG_FULL_TRY));
2753 } else {
2754 ret = striper.remove(oid);
2755 }
2756 } else {
2757 if (forcefull) {
2758 ret = io_ctx.remove(oid, (CEPH_OSD_FLAG_FULL_FORCE | CEPH_OSD_FLAG_FULL_TRY));
2759 } else {
2760 ret = io_ctx.remove(oid);
2761 }
2762 }
2763 if (ret < 0) {
2764 string name = (nspace.size() ? nspace + "/" : "" ) + oid;
2765 cerr << "error removing " << pool_name << ">" << name << ": " << cpp_strerror(ret) << std::endl;
2766 return 1;
2767 }
2768 }
2769 }
2770 else if (strcmp(nargs[0], "create") == 0) {
2771 if (!pool_name || nargs.size() < 2) {
2772 usage(cerr);
2773 return 1;
2774 }
2775 string oid(nargs[1]);
2776 ret = io_ctx.create(oid, true);
2777 if (ret < 0) {
2778 cerr << "error creating " << pool_name << "/" << oid << ": " << cpp_strerror(ret) << std::endl;
2779 return 1;
2780 }
2781 }
2782
2783 else if (strcmp(nargs[0], "tmap") == 0) {
2784 if (nargs.size() < 3)
2785 usage_exit();
2786 if (strcmp(nargs[1], "dump") == 0) {
2787 bufferlist outdata;
2788 string oid(nargs[2]);
2789 ret = io_ctx.read(oid, outdata, 0, 0);
2790 if (ret < 0) {
2791 cerr << "error reading " << pool_name << "/" << oid << ": " << cpp_strerror(ret) << std::endl;
2792 goto out;
2793 }
2794 bufferlist::iterator p = outdata.begin();
2795 bufferlist header;
2796 map<string, bufferlist> kv;
2797 try {
2798 ::decode(header, p);
2799 ::decode(kv, p);
2800 }
2801 catch (buffer::error& e) {
2802 cerr << "error decoding tmap " << pool_name << "/" << oid << std::endl;
2803 ret = -EINVAL;
2804 goto out;
2805 }
2806 cout << "header (" << header.length() << " bytes):\n";
2807 header.hexdump(cout);
2808 cout << "\n";
2809 cout << kv.size() << " keys\n";
2810 for (map<string,bufferlist>::iterator q = kv.begin(); q != kv.end(); ++q) {
2811 cout << "key '" << q->first << "' (" << q->second.length() << " bytes):\n";
2812 q->second.hexdump(cout);
2813 cout << "\n";
2814 }
2815 }
2816 else if (strcmp(nargs[1], "set") == 0 ||
2817 strcmp(nargs[1], "create") == 0) {
2818 if (nargs.size() < 5)
2819 usage_exit();
2820 string oid(nargs[2]);
2821 string k(nargs[3]);
2822 string v(nargs[4]);
2823 bufferlist bl;
2824 char c = (strcmp(nargs[1], "set") == 0) ? CEPH_OSD_TMAP_SET : CEPH_OSD_TMAP_CREATE;
2825 ::encode(c, bl);
2826 ::encode(k, bl);
2827 ::encode(v, bl);
2828 ret = io_ctx.tmap_update(oid, bl);
2829 }
2830 }
2831
2832 else if (strcmp(nargs[0], "tmap-to-omap") == 0) {
2833 if (!pool_name || nargs.size() < 2)
2834 usage_exit();
2835 string oid(nargs[1]);
2836
2837 bufferlist bl;
2838 int r = io_ctx.tmap_get(oid, bl);
2839 if (r < 0) {
2840 ret = r;
2841 cerr << "error reading tmap " << pool_name << "/" << oid
2842 << ": " << cpp_strerror(ret) << std::endl;
2843 goto out;
2844 }
2845 bufferlist hdr;
2846 map<string, bufferlist> kv;
2847 bufferlist::iterator p = bl.begin();
2848 try {
2849 ::decode(hdr, p);
2850 ::decode(kv, p);
2851 }
2852 catch (buffer::error& e) {
2853 cerr << "error decoding tmap " << pool_name << "/" << oid << std::endl;
2854 ret = -EINVAL;
2855 goto out;
2856 }
2857 if (!p.end()) {
2858 cerr << "error decoding tmap (stray trailing data) in " << pool_name << "/" << oid << std::endl;
2859 ret = -EINVAL;
2860 goto out;
2861 }
2862 librados::ObjectWriteOperation wr;
2863 wr.omap_set_header(hdr);
2864 wr.omap_set(kv);
2865 wr.truncate(0); // delete the old tmap data
2866 r = io_ctx.operate(oid, &wr);
2867 if (r < 0) {
2868 ret = r;
2869 cerr << "error writing tmap data as omap on " << pool_name << "/" << oid
2870 << ": " << cpp_strerror(ret) << std::endl;
2871 goto out;
2872 }
2873 ret = 0;
2874 }
2875
2876 else if (strcmp(nargs[0], "mkpool") == 0) {
2877 int auid = 0;
2878 __u8 crush_rule = 0;
2879 if (nargs.size() < 2)
2880 usage_exit();
2881 if (nargs.size() > 2) {
2882 char* endptr = NULL;
2883 auid = strtol(nargs[2], &endptr, 10);
2884 if (*endptr) {
2885 cerr << "Invalid value for auid: '" << nargs[2] << "'" << std::endl;
2886 ret = -EINVAL;
2887 goto out;
2888 }
2889 cerr << "setting auid:" << auid << std::endl;
2890 if (nargs.size() > 3) {
2891 crush_rule = (__u8)strtol(nargs[3], &endptr, 10);
2892 if (*endptr) {
2893 cerr << "Invalid value for crush-rule: '" << nargs[3] << "'" << std::endl;
2894 ret = -EINVAL;
2895 goto out;
2896 }
2897 cerr << "using crush rule " << (int)crush_rule << std::endl;
2898 }
2899 }
2900 ret = rados.pool_create(nargs[1], auid, crush_rule);
2901 if (ret < 0) {
2902 cerr << "error creating pool " << nargs[1] << ": "
2903 << cpp_strerror(ret) << std::endl;
2904 goto out;
2905 }
2906 cout << "successfully created pool " << nargs[1] << std::endl;
2907 }
2908 else if (strcmp(nargs[0], "cppool") == 0) {
2909 bool force = nargs.size() == 4 && !strcmp(nargs[3], "--yes-i-really-mean-it");
2910 if (nargs.size() != 3 && !(nargs.size() == 4 && force)) {
2911 usage(cerr);
2912 return 1;
2913 }
2914 const char *src_pool = nargs[1];
2915 const char *target_pool = nargs[2];
2916
2917 if (strcmp(src_pool, target_pool) == 0) {
2918 cerr << "cannot copy pool into itself" << std::endl;
2919 return 1;
2920 }
2921
2922 cerr << "WARNING: pool copy does not preserve user_version, which some "
2923 << " apps may rely on." << std::endl;
2924
2925 if (rados.get_pool_is_selfmanaged_snaps_mode(src_pool)) {
2926 cerr << "WARNING: pool " << src_pool << " has selfmanaged snaps, which are not preserved\n"
2927 << " by the cppool operation. This will break any snapshot user."
2928 << std::endl;
2929 if (!force) {
2930 cerr << " If you insist on making a broken copy, you can pass\n"
2931 << " --yes-i-really-mean-it to proceed anyway."
2932 << std::endl;
2933 exit(1);
2934 }
2935 }
2936
2937 ret = do_copy_pool(rados, src_pool, target_pool);
2938 if (ret < 0) {
2939 cerr << "error copying pool " << src_pool << " => " << target_pool << ": "
2940 << cpp_strerror(ret) << std::endl;
2941 return 1;
2942 }
2943 cout << "successfully copied pool " << nargs[1] << std::endl;
2944 }
2945 else if (strcmp(nargs[0], "rmpool") == 0) {
2946 if (nargs.size() < 2)
2947 usage_exit();
2948 if (nargs.size() < 4 ||
2949 strcmp(nargs[1], nargs[2]) != 0 ||
2950 strcmp(nargs[3], "--yes-i-really-really-mean-it") != 0) {
2951 cerr << "WARNING:\n"
2952 << " This will PERMANENTLY DESTROY an entire pool of objects with no way back.\n"
2953 << " To confirm, pass the pool to remove twice, followed by\n"
2954 << " --yes-i-really-really-mean-it" << std::endl;
2955 ret = -1;
2956 goto out;
2957 }
2958 ret = rados.pool_delete(nargs[1]);
2959 if (ret >= 0) {
2960 cout << "successfully deleted pool " << nargs[1] << std::endl;
2961 } else { //error
2962 cerr << "pool " << nargs[1] << " could not be removed" << std::endl;
2963 cerr << "Check your monitor configuration - `mon allow pool delete` is set to false by default,"
2964 << " change it to true to allow deletion of pools" << std::endl;
2965 }
2966 }
2967 else if (strcmp(nargs[0], "purge") == 0) {
2968 if (nargs.size() < 2) {
2969 usage(cerr);
2970 return 1;
2971 }
2972 if (nargs.size() < 3 ||
2973 strcmp(nargs[2], "--yes-i-really-really-mean-it") != 0) {
2974 cerr << "WARNING:\n"
2975 << " This will PERMANENTLY DESTROY all objects from a pool with no way back.\n"
2976 << " To confirm, follow pool with --yes-i-really-really-mean-it" << std::endl;
2977 return 1;
2978 }
2979 ret = rados.ioctx_create(nargs[1], io_ctx);
2980 if (ret < 0) {
2981 cerr << "error pool " << nargs[1] << ": "
2982 << cpp_strerror(ret) << std::endl;
2983 return 1;
2984 }
2985 io_ctx.set_namespace(all_nspaces);
2986 io_ctx.set_osdmap_full_try();
2987 RadosBencher bencher(g_ceph_context, rados, io_ctx);
2988 ret = bencher.clean_up_slow("", concurrent_ios);
2989 if (ret >= 0) {
2990 cout << "successfully purged pool " << nargs[1] << std::endl;
2991 } else { //error
2992 cerr << "pool " << nargs[1] << " could not be purged" << std::endl;
2993 cerr << "Check your monitor configuration - `mon allow pool delete` is set to false by default,"
2994 << " change it to true to allow deletion of pools" << std::endl;
2995 }
2996 }
2997 else if (strcmp(nargs[0], "lssnap") == 0) {
2998 if (!pool_name || nargs.size() != 1) {
2999 usage(cerr);
3000 return 1;
3001 }
3002
3003 vector<snap_t> snaps;
3004 io_ctx.snap_list(&snaps);
3005 for (vector<snap_t>::iterator i = snaps.begin();
3006 i != snaps.end();
3007 ++i) {
3008 string s;
3009 time_t t;
3010 if (io_ctx.snap_get_name(*i, &s) < 0)
3011 continue;
3012 if (io_ctx.snap_get_stamp(*i, &t) < 0)
3013 continue;
3014 struct tm bdt;
3015 localtime_r(&t, &bdt);
3016 cout << *i << "\t" << s << "\t";
3017
3018 std::ios_base::fmtflags original_flags = cout.flags();
3019 cout.setf(std::ios::right);
3020 cout.fill('0');
3021 cout << std::setw(4) << (bdt.tm_year+1900)
3022 << '.' << std::setw(2) << (bdt.tm_mon+1)
3023 << '.' << std::setw(2) << bdt.tm_mday
3024 << ' '
3025 << std::setw(2) << bdt.tm_hour
3026 << ':' << std::setw(2) << bdt.tm_min
3027 << ':' << std::setw(2) << bdt.tm_sec
3028 << std::endl;
3029 cout.flags(original_flags);
3030 }
3031 cout << snaps.size() << " snaps" << std::endl;
3032 }
3033
3034 else if (strcmp(nargs[0], "mksnap") == 0) {
3035 if (!pool_name || nargs.size() < 2) {
3036 usage(cerr);
3037 return 1;
3038 }
3039
3040 ret = io_ctx.snap_create(nargs[1]);
3041 if (ret < 0) {
3042 cerr << "error creating pool " << pool_name << " snapshot " << nargs[1]
3043 << ": " << cpp_strerror(ret) << std::endl;
3044 return 1;
3045 }
3046 cout << "created pool " << pool_name << " snap " << nargs[1] << std::endl;
3047 }
3048
3049 else if (strcmp(nargs[0], "rmsnap") == 0) {
3050 if (!pool_name || nargs.size() < 2) {
3051 usage(cerr);
3052 return 1;
3053 }
3054
3055 ret = io_ctx.snap_remove(nargs[1]);
3056 if (ret < 0) {
3057 cerr << "error removing pool " << pool_name << " snapshot " << nargs[1]
3058 << ": " << cpp_strerror(ret) << std::endl;
3059 return 1;
3060 }
3061 cout << "removed pool " << pool_name << " snap " << nargs[1] << std::endl;
3062 }
3063
3064 else if (strcmp(nargs[0], "rollback") == 0) {
3065 if (!pool_name || nargs.size() < 3) {
3066 usage(cerr);
3067 return 1;
3068 }
3069
3070 ret = io_ctx.snap_rollback(nargs[1], nargs[2]);
3071 if (ret < 0) {
3072 cerr << "error rolling back pool " << pool_name << " to snapshot " << nargs[1]
3073 << cpp_strerror(ret) << std::endl;
3074 return 1;
3075 }
3076 cout << "rolled back pool " << pool_name
3077 << " to snapshot " << nargs[2] << std::endl;
3078 }
3079 else if (strcmp(nargs[0], "bench") == 0) {
3080 if (!pool_name || nargs.size() < 3) {
3081 usage(cerr);
3082 return 1;
3083 }
3084 char* endptr = NULL;
3085 int seconds = strtol(nargs[1], &endptr, 10);
3086 if (*endptr) {
3087 cerr << "Invalid value for seconds: '" << nargs[1] << "'" << std::endl;
3088 return 1;
3089 }
3090 int operation = 0;
3091 if (strcmp(nargs[2], "write") == 0)
3092 operation = OP_WRITE;
3093 else if (strcmp(nargs[2], "seq") == 0)
3094 operation = OP_SEQ_READ;
3095 else if (strcmp(nargs[2], "rand") == 0)
3096 operation = OP_RAND_READ;
3097 else {
3098 usage(cerr);
3099 return 1;
3100 }
3101 if (operation != OP_WRITE) {
3102 if (block_size_specified) {
3103 cerr << "-b|--block_size option can be used only with 'write' bench test"
3104 << std::endl;
3105 return 1;
3106 }
3107 if (bench_write_dest != 0) {
3108 cerr << "--write-object, --write-omap and --write-xattr options can "
3109 "only be used with the 'write' bench test"
3110 << std::endl;
3111 return 1;
3112 }
3113 }
3114 else if (bench_write_dest == 0) {
3115 bench_write_dest = OP_WRITE_DEST_OBJ;
3116 }
3117
3118 if (!formatter && output) {
3119 cerr << "-o|--output option can only be used with '--format' option"
3120 << std::endl;
3121 return 1;
3122 }
3123 RadosBencher bencher(g_ceph_context, rados, io_ctx);
3124 bencher.set_show_time(show_time);
3125 bencher.set_write_destination(static_cast<OpWriteDest>(bench_write_dest));
3126
3127 ostream *outstream = NULL;
3128 if (formatter) {
3129 bencher.set_formatter(formatter);
3130 if (output)
3131 outstream = new ofstream(output);
3132 else
3133 outstream = &cout;
3134 bencher.set_outstream(*outstream);
3135 }
3136 if (!object_size)
3137 object_size = op_size;
3138 else if (object_size < op_size)
3139 op_size = object_size;
3140 cout << "hints = " << (int)hints << std::endl;
3141 ret = bencher.aio_bench(operation, seconds,
3142 concurrent_ios, op_size, object_size,
3143 max_objects, cleanup, hints, run_name, no_verify);
3144 if (ret != 0)
3145 cerr << "error during benchmark: " << cpp_strerror(ret) << std::endl;
3146 if (formatter && output)
3147 delete outstream;
3148 }
3149 else if (strcmp(nargs[0], "cleanup") == 0) {
3150 if (!pool_name) {
3151 usage(cerr);
3152 return 1;
3153 }
3154 if (wildcard)
3155 io_ctx.set_namespace(all_nspaces);
3156 RadosBencher bencher(g_ceph_context, rados, io_ctx);
3157 ret = bencher.clean_up(prefix, concurrent_ios, run_name);
3158 if (ret != 0)
3159 cerr << "error during cleanup: " << cpp_strerror(ret) << std::endl;
3160 }
3161 else if (strcmp(nargs[0], "watch") == 0) {
3162 if (!pool_name || nargs.size() < 2) {
3163 usage(cerr);
3164 return 1;
3165 }
3166 string oid(nargs[1]);
3167 RadosWatchCtx ctx(io_ctx, oid.c_str());
3168 uint64_t cookie;
3169 ret = io_ctx.watch2(oid, &cookie, &ctx);
3170 if (ret != 0)
3171 cerr << "error calling watch: " << cpp_strerror(ret) << std::endl;
3172 else {
3173 cout << "press enter to exit..." << std::endl;
3174 getchar();
3175 io_ctx.unwatch2(cookie);
3176 rados.watch_flush();
3177 }
3178 }
3179 else if (strcmp(nargs[0], "notify") == 0) {
3180 if (!pool_name || nargs.size() < 3) {
3181 usage(cerr);
3182 return 1;
3183 }
3184 string oid(nargs[1]);
3185 string msg(nargs[2]);
3186 bufferlist bl, replybl;
3187 ::encode(msg, bl);
3188 ret = io_ctx.notify2(oid, bl, 10000, &replybl);
3189 if (ret != 0)
3190 cerr << "error calling notify: " << cpp_strerror(ret) << std::endl;
3191 if (replybl.length()) {
3192 map<pair<uint64_t,uint64_t>,bufferlist> rm;
3193 set<pair<uint64_t,uint64_t> > missed;
3194 bufferlist::iterator p = replybl.begin();
3195 ::decode(rm, p);
3196 ::decode(missed, p);
3197 for (map<pair<uint64_t,uint64_t>,bufferlist>::iterator p = rm.begin();
3198 p != rm.end();
3199 ++p) {
3200 cout << "reply client." << p->first.first
3201 << " cookie " << p->first.second
3202 << " : " << p->second.length() << " bytes" << std::endl;
3203 if (p->second.length())
3204 p->second.hexdump(cout);
3205 }
3206 for (multiset<pair<uint64_t,uint64_t> >::iterator p = missed.begin();
3207 p != missed.end(); ++p) {
3208 cout << "timeout client." << p->first
3209 << " cookie " << p->second << std::endl;
3210 }
3211 }
3212 } else if (strcmp(nargs[0], "set-alloc-hint") == 0) {
3213 if (!pool_name || nargs.size() < 4) {
3214 usage(cerr);
3215 return 1;
3216 }
3217 string err;
3218 string oid(nargs[1]);
3219 uint64_t expected_object_size = strict_strtoll(nargs[2], 10, &err);
3220 if (!err.empty()) {
3221 cerr << "couldn't parse expected_object_size: " << err << std::endl;
3222 usage(cerr);
3223 return 1;
3224 }
3225 uint64_t expected_write_size = strict_strtoll(nargs[3], 10, &err);
3226 if (!err.empty()) {
3227 cerr << "couldn't parse expected_write_size: " << err << std::endl;
3228 usage(cerr);
3229 return 1;
3230 }
3231 ret = io_ctx.set_alloc_hint(oid, expected_object_size, expected_write_size);
3232 if (ret < 0) {
3233 cerr << "error setting alloc-hint " << pool_name << "/" << oid << ": "
3234 << cpp_strerror(ret) << std::endl;
3235 return 1;
3236 }
3237 } else if (strcmp(nargs[0], "load-gen") == 0) {
3238 if (!pool_name) {
3239 cerr << "error: must specify pool" << std::endl;
3240 usage(cerr);
3241 return 1;
3242 }
3243 LoadGen lg(&rados);
3244 if (min_obj_len)
3245 lg.min_obj_len = min_obj_len;
3246 if (max_obj_len)
3247 lg.max_obj_len = max_obj_len;
3248 if (min_op_len)
3249 lg.min_op_len = min_op_len;
3250 if (max_op_len)
3251 lg.max_op_len = max_op_len;
3252 if (max_ops)
3253 lg.max_ops = max_ops;
3254 if (max_backlog)
3255 lg.max_backlog = max_backlog;
3256 if (target_throughput)
3257 lg.target_throughput = target_throughput << 20;
3258 if (read_percent >= 0)
3259 lg.read_percent = read_percent;
3260 if (num_objs)
3261 lg.num_objs = num_objs;
3262 if (run_length)
3263 lg.run_length = run_length;
3264
3265 cout << "run length " << run_length << " seconds" << std::endl;
3266 cout << "preparing " << lg.num_objs << " objects" << std::endl;
3267 ret = lg.bootstrap(pool_name);
3268 if (ret < 0) {
3269 cerr << "load-gen bootstrap failed" << std::endl;
3270 return 1;
3271 }
3272 cout << "load-gen will run " << lg.run_length << " seconds" << std::endl;
3273 lg.run();
3274 lg.cleanup();
3275 } else if (strcmp(nargs[0], "listomapkeys") == 0) {
3276 if (!pool_name || nargs.size() < 2) {
3277 usage(cerr);
3278 return 1;
3279 }
3280
3281 set<string> out_keys;
3282 ret = io_ctx.omap_get_keys(nargs[1], "", LONG_MAX, &out_keys);
3283 if (ret < 0) {
3284 cerr << "error getting omap key set " << pool_name << "/"
3285 << nargs[1] << ": " << cpp_strerror(ret) << std::endl;
3286 return 1;
3287 }
3288
3289 for (set<string>::iterator iter = out_keys.begin();
3290 iter != out_keys.end(); ++iter) {
3291 cout << *iter << std::endl;
3292 }
3293 } else if (strcmp(nargs[0], "lock") == 0) {
3294 if (!pool_name) {
3295 usage(cerr);
3296 return 1;
3297 }
3298
3299 if (!formatter) {
3300 formatter = new JSONFormatter(pretty_format);
3301 }
3302 ret = do_lock_cmd(nargs, opts, &io_ctx, formatter);
3303 } else if (strcmp(nargs[0], "listwatchers") == 0) {
3304 if (!pool_name || nargs.size() < 2) {
3305 usage(cerr);
3306 return 1;
3307 }
3308
3309 string oid(nargs[1]);
3310 std::list<obj_watch_t> lw;
3311
3312 ret = io_ctx.list_watchers(oid, &lw);
3313 if (ret < 0) {
3314 cerr << "error listing watchers " << pool_name << "/" << oid << ": " << cpp_strerror(ret) << std::endl;
3315 return 1;
3316 }
3317 else
3318 ret = 0;
3319
3320 for (std::list<obj_watch_t>::iterator i = lw.begin(); i != lw.end(); ++i) {
3321 cout << "watcher=" << i->addr << " client." << i->watcher_id << " cookie=" << i->cookie << std::endl;
3322 }
3323 } else if (strcmp(nargs[0], "listsnaps") == 0) {
3324 if (!pool_name || nargs.size() < 2) {
3325 usage(cerr);
3326 return 1;
3327 }
3328
3329 string oid(nargs[1]);
3330 snap_set_t ls;
3331
3332 io_ctx.snap_set_read(LIBRADOS_SNAP_DIR);
3333 ret = io_ctx.list_snaps(oid, &ls);
3334 if (ret < 0) {
3335 cerr << "error listing snap shots " << pool_name << "/" << oid << ": " << cpp_strerror(ret) << std::endl;
3336 return 1;
3337 }
3338 else
3339 ret = 0;
3340
3341 map<snap_t,string> snamemap;
3342 if (formatter || pretty_format) {
3343 vector<snap_t> snaps;
3344 io_ctx.snap_list(&snaps);
3345 for (vector<snap_t>::iterator i = snaps.begin();
3346 i != snaps.end(); ++i) {
3347 string s;
3348 if (io_ctx.snap_get_name(*i, &s) < 0)
3349 continue;
3350 snamemap.insert(pair<snap_t,string>(*i, s));
3351 }
3352 }
3353
3354 if (formatter) {
3355 formatter->open_object_section("object");
3356 formatter->dump_string("name", oid);
3357 formatter->open_array_section("clones");
3358 } else {
3359 cout << oid << ":" << std::endl;
3360 cout << "cloneid snaps size overlap" << std::endl;
3361 }
3362
3363 for (std::vector<clone_info_t>::iterator ci = ls.clones.begin();
3364 ci != ls.clones.end(); ++ci) {
3365
3366 if (formatter) formatter->open_object_section("clone");
3367
3368 if (ci->cloneid == librados::SNAP_HEAD) {
3369 if (formatter)
3370 formatter->dump_string("id", "head");
3371 else
3372 cout << "head";
3373 } else {
3374 if (formatter)
3375 formatter->dump_unsigned("id", ci->cloneid);
3376 else
3377 cout << ci->cloneid;
3378 }
3379
3380 if (formatter)
3381 formatter->open_array_section("snapshots");
3382 else
3383 cout << "\t";
3384
3385 if (!formatter && ci->snaps.empty()) {
3386 cout << "-";
3387 }
3388 for (std::vector<snap_t>::const_iterator snapindex = ci->snaps.begin();
3389 snapindex != ci->snaps.end(); ++snapindex) {
3390
3391 map<snap_t,string>::iterator si;
3392
3393 if (formatter || pretty_format) si = snamemap.find(*snapindex);
3394
3395 if (formatter) {
3396 formatter->open_object_section("snapshot");
3397 formatter->dump_unsigned("id", *snapindex);
3398 if (si != snamemap.end())
3399 formatter->dump_string("name", si->second);
3400 formatter->close_section(); //snapshot
3401 } else {
3402 if (snapindex != ci->snaps.begin()) cout << ",";
3403 if (!pretty_format || (si == snamemap.end()))
3404 cout << *snapindex;
3405 else
3406 cout << si->second << "(" << *snapindex << ")";
3407 }
3408 }
3409
3410 if (formatter) {
3411 formatter->close_section(); //Snapshots
3412 formatter->dump_unsigned("size", ci->size);
3413 } else {
3414 cout << "\t" << ci->size;
3415 }
3416
3417 if (ci->cloneid != librados::SNAP_HEAD) {
3418 if (formatter)
3419 formatter->open_array_section("overlaps");
3420 else
3421 cout << "\t[";
3422
3423 for (std::vector< std::pair<uint64_t,uint64_t> >::iterator ovi = ci->overlap.begin();
3424 ovi != ci->overlap.end(); ++ovi) {
3425 if (formatter) {
3426 formatter->open_object_section("section");
3427 formatter->dump_unsigned("start", ovi->first);
3428 formatter->dump_unsigned("length", ovi->second);
3429 formatter->close_section(); //section
3430 } else {
3431 if (ovi != ci->overlap.begin()) cout << ",";
3432 cout << ovi->first << "~" << ovi->second;
3433 }
3434 }
3435 if (formatter)
3436 formatter->close_section(); //overlaps
3437 else
3438 cout << "]" << std::endl;
3439 }
3440 if (formatter) formatter->close_section(); //clone
3441 }
3442 if (formatter) {
3443 formatter->close_section(); //clones
3444 formatter->close_section(); //object
3445 formatter->flush(cout);
3446 } else {
3447 cout << std::endl;
3448 }
3449 } else if (strcmp(nargs[0], "list-inconsistent-pg") == 0) {
3450 if (!formatter) {
3451 formatter = new JSONFormatter(pretty_format);
3452 }
3453 ret = do_get_inconsistent_pg_cmd(nargs, rados, *formatter);
3454 } else if (strcmp(nargs[0], "list-inconsistent-obj") == 0) {
3455 if (!formatter) {
3456 formatter = new JSONFormatter(pretty_format);
3457 }
3458 ret = do_get_inconsistent_cmd<inconsistent_obj_t>(nargs, rados, *formatter);
3459 } else if (strcmp(nargs[0], "list-inconsistent-snapset") == 0) {
3460 if (!formatter) {
3461 formatter = new JSONFormatter(pretty_format);
3462 }
3463 ret = do_get_inconsistent_cmd<inconsistent_snapset_t>(nargs, rados, *formatter);
3464 } else if (strcmp(nargs[0], "cache-flush") == 0) {
3465 if (!pool_name || nargs.size() < 2) {
3466 usage(cerr);
3467 return 1;
3468 }
3469 string oid(nargs[1]);
3470 if (with_clones) {
3471 snap_set_t ls;
3472 io_ctx.snap_set_read(LIBRADOS_SNAP_DIR);
3473 ret = io_ctx.list_snaps(oid, &ls);
3474 if (ret < 0) {
3475 cerr << "error listing snapshots " << pool_name << "/" << oid << ": "
3476 << cpp_strerror(ret) << std::endl;
3477 return 1;
3478 }
3479 for (std::vector<clone_info_t>::iterator ci = ls.clones.begin();
3480 ci != ls.clones.end(); ++ci) {
3481 if (snapid != CEPH_NOSNAP && ci->cloneid > snapid)
3482 break;
3483 io_ctx.snap_set_read(ci->cloneid);
3484 ret = do_cache_flush(io_ctx, oid);
3485 if (ret < 0) {
3486 cerr << "error from cache-flush " << oid << ": "
3487 << cpp_strerror(ret) << std::endl;
3488 return 1;
3489 }
3490 }
3491 } else {
3492 ret = do_cache_flush(io_ctx, oid);
3493 if (ret < 0) {
3494 cerr << "error from cache-flush " << oid << ": "
3495 << cpp_strerror(ret) << std::endl;
3496 return 1;
3497 }
3498 }
3499 } else if (strcmp(nargs[0], "cache-try-flush") == 0) {
3500 if (!pool_name || nargs.size() < 2) {
3501 usage(cerr);
3502 return 1;
3503 }
3504 string oid(nargs[1]);
3505 if (with_clones) {
3506 snap_set_t ls;
3507 io_ctx.snap_set_read(LIBRADOS_SNAP_DIR);
3508 ret = io_ctx.list_snaps(oid, &ls);
3509 if (ret < 0) {
3510 cerr << "error listing snapshots " << pool_name << "/" << oid << ": "
3511 << cpp_strerror(ret) << std::endl;
3512 return 1;
3513 }
3514 for (std::vector<clone_info_t>::iterator ci = ls.clones.begin();
3515 ci != ls.clones.end(); ++ci) {
3516 if (snapid != CEPH_NOSNAP && ci->cloneid > snapid)
3517 break;
3518 io_ctx.snap_set_read(ci->cloneid);
3519 ret = do_cache_try_flush(io_ctx, oid);
3520 if (ret < 0) {
3521 cerr << "error from cache-flush " << oid << ": "
3522 << cpp_strerror(ret) << std::endl;
3523 return 1;
3524 }
3525 }
3526 } else {
3527 ret = do_cache_try_flush(io_ctx, oid);
3528 if (ret < 0) {
3529 cerr << "error from cache-flush " << oid << ": "
3530 << cpp_strerror(ret) << std::endl;
3531 return 1;
3532 }
3533 }
3534 } else if (strcmp(nargs[0], "cache-evict") == 0) {
3535 if (!pool_name || nargs.size() < 2) {
3536 usage(cerr);
3537 return 1;
3538 }
3539 string oid(nargs[1]);
3540 if (with_clones) {
3541 snap_set_t ls;
3542 io_ctx.snap_set_read(LIBRADOS_SNAP_DIR);
3543 ret = io_ctx.list_snaps(oid, &ls);
3544 if (ret < 0) {
3545 cerr << "error listing snapshots " << pool_name << "/" << oid << ": "
3546 << cpp_strerror(ret) << std::endl;
3547 return 1;
3548 }
3549 for (std::vector<clone_info_t>::iterator ci = ls.clones.begin();
3550 ci != ls.clones.end(); ++ci) {
3551 if (snapid != CEPH_NOSNAP && ci->cloneid > snapid)
3552 break;
3553 io_ctx.snap_set_read(ci->cloneid);
3554 ret = do_cache_evict(io_ctx, oid);
3555 if (ret < 0) {
3556 cerr << "error from cache-flush " << oid << ": "
3557 << cpp_strerror(ret) << std::endl;
3558 return 1;
3559 }
3560 }
3561 } else {
3562 ret = do_cache_evict(io_ctx, oid);
3563 if (ret < 0) {
3564 cerr << "error from cache-flush " << oid << ": "
3565 << cpp_strerror(ret) << std::endl;
3566 return 1;
3567 }
3568 }
3569 } else if (strcmp(nargs[0], "cache-flush-evict-all") == 0) {
3570 if (!pool_name) {
3571 usage(cerr);
3572 return 1;
3573 }
3574 ret = do_cache_flush_evict_all(io_ctx, true);
3575 if (ret < 0) {
3576 cerr << "error from cache-flush-evict-all: "
3577 << cpp_strerror(ret) << std::endl;
3578 return 1;
3579 }
3580 } else if (strcmp(nargs[0], "cache-try-flush-evict-all") == 0) {
3581 if (!pool_name) {
3582 usage(cerr);
3583 return 1;
3584 }
3585 ret = do_cache_flush_evict_all(io_ctx, false);
3586 if (ret < 0) {
3587 cerr << "error from cache-try-flush-evict-all: "
3588 << cpp_strerror(ret) << std::endl;
3589 return 1;
3590 }
3591 } else if (strcmp(nargs[0], "set-redirect") == 0) {
3592 if (!pool_name) {
3593 usage(cerr);
3594 return 1;
3595 }
3596
3597 const char *target = target_pool_name;
3598 if (!target)
3599 target = pool_name;
3600
3601 const char *target_obj;
3602 if (nargs.size() < 3) {
3603 if (strcmp(target, pool_name) == 0) {
3604 cerr << "cannot copy object into itself" << std::endl;
3605 return 1;
3606 }
3607 target_obj = nargs[1];
3608 } else {
3609 target_obj = nargs[2];
3610 }
3611
3612 IoCtx target_ctx;
3613 ret = rados.ioctx_create(target, target_ctx);
3614 if (target_oloc.size()) {
3615 target_ctx.locator_set_key(target_oloc);
3616 }
3617 if (target_nspace.size()) {
3618 target_ctx.set_namespace(target_nspace);
3619 }
3620
3621 ObjectWriteOperation op;
3622 op.set_redirect(target_obj, target_ctx, 0);
3623 ret = io_ctx.operate(nargs[1], &op);
3624 if (ret < 0) {
3625 cerr << "error set-redirect " << pool_name << "/" << nargs[1] << " => " << target << "/" << target_obj << ": " << cpp_strerror(ret) << std::endl;
3626 return 1;
3627 }
3628 } else if (strcmp(nargs[0], "export") == 0) {
3629 // export [filename]
3630 if (!pool_name || nargs.size() > 2) {
3631 usage(cerr);
3632 return 1;
3633 }
3634
3635 int file_fd;
3636 if (nargs.size() < 2 || std::string(nargs[1]) == "-") {
3637 file_fd = STDOUT_FILENO;
3638 } else {
3639 file_fd = open(nargs[1], O_WRONLY|O_CREAT|O_TRUNC, 0666);
3640 if (file_fd < 0) {
3641 cerr << "Error opening '" << nargs[1] << "': "
3642 << cpp_strerror(file_fd) << std::endl;
3643 return 1;
3644 }
3645 }
3646
3647 ret = PoolDump(file_fd).dump(&io_ctx);
3648
3649 if (file_fd != STDIN_FILENO) {
3650 VOID_TEMP_FAILURE_RETRY(::close(file_fd));
3651 }
3652
3653 if (ret < 0) {
3654 cerr << "error from export: "
3655 << cpp_strerror(ret) << std::endl;
3656 return 1;
3657 }
3658 } else if (strcmp(nargs[0], "import") == 0) {
3659 // import [--no-overwrite] [--dry-run] <filename | - >
3660 if (!pool_name || nargs.size() > 4 || nargs.size() < 2) {
3661 usage(cerr);
3662 return 1;
3663 }
3664
3665 // Last arg is the filename
3666 std::string const filename = nargs[nargs.size() - 1];
3667
3668 // All other args may be flags
3669 bool dry_run = false;
3670 bool no_overwrite = false;
3671 for (unsigned i = 1; i < nargs.size() - 1; ++i) {
3672 std::string arg(nargs[i]);
3673
3674 if (arg == std::string("--no-overwrite")) {
3675 no_overwrite = true;
3676 } else if (arg == std::string("--dry-run")) {
3677 dry_run = true;
3678 } else {
3679 std::cerr << "Invalid argument '" << arg << "'" << std::endl;
3680 return 1;
3681 }
3682 }
3683
3684 int file_fd;
3685 if (filename == "-") {
3686 file_fd = STDIN_FILENO;
3687 } else {
3688 file_fd = open(filename.c_str(), O_RDONLY);
3689 if (file_fd < 0) {
3690 cerr << "Error opening '" << filename << "': "
3691 << cpp_strerror(file_fd) << std::endl;
3692 return 1;
3693 }
3694 }
3695
3696 ret = RadosImport(file_fd, 0, dry_run).import(io_ctx, no_overwrite);
3697
3698 if (file_fd != STDIN_FILENO) {
3699 VOID_TEMP_FAILURE_RETRY(::close(file_fd));
3700 }
3701
3702 if (ret < 0) {
3703 cerr << "error from import: "
3704 << cpp_strerror(ret) << std::endl;
3705 return 1;
3706 }
3707 } else {
3708 cerr << "unrecognized command " << nargs[0] << "; -h or --help for usage" << std::endl;
3709 ret = -EINVAL;
3710 }
3711
3712 if (ret < 0)
3713 cerr << "error " << (-ret) << ": " << cpp_strerror(ret) << std::endl;
3714
3715 out:
3716 delete formatter;
3717 return (ret < 0) ? 1 : 0;
3718 }
3719
3720 int main(int argc, const char **argv)
3721 {
3722 vector<const char*> args;
3723 argv_to_vec(argc, argv, args);
3724 env_to_vec(args);
3725
3726 std::map < std::string, std::string > opts;
3727 std::string val;
3728
3729 // Necessary to support usage of -f for formatting,
3730 // since global_init will remove the -f using ceph
3731 // argparse procedures.
3732 for (auto j = args.begin(); j != args.end(); ++j) {
3733 if (strcmp(*j, "--") == 0) {
3734 break;
3735 } else if ((j+1) == args.end()) {
3736 // This can't be a formatting call (no format arg)
3737 break;
3738 } else if (strcmp(*j, "-f") == 0) {
3739 val = *(j+1);
3740 unique_ptr<Formatter> formatter(Formatter::create(val.c_str()));
3741
3742 if (formatter) {
3743 j = args.erase(j);
3744 opts["format"] = val;
3745
3746 j = args.erase(j);
3747 break;
3748 }
3749 }
3750 }
3751
3752 auto cct = global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT,
3753 CODE_ENVIRONMENT_UTILITY, 0);
3754 common_init_finish(g_ceph_context);
3755
3756 std::vector<const char*>::iterator i;
3757 for (i = args.begin(); i != args.end(); ) {
3758 if (ceph_argparse_double_dash(args, i)) {
3759 break;
3760 } else if (ceph_argparse_flag(args, i, "-h", "--help", (char*)NULL)) {
3761 usage(cout);
3762 exit(0);
3763 } else if (ceph_argparse_flag(args, i, "-f", "--force", (char*)NULL)) {
3764 opts["force"] = "true";
3765 } else if (ceph_argparse_flag(args, i, "--force-full", (char*)NULL)) {
3766 opts["force-full"] = "true";
3767 } else if (ceph_argparse_flag(args, i, "-d", "--delete-after", (char*)NULL)) {
3768 opts["delete-after"] = "true";
3769 } else if (ceph_argparse_flag(args, i, "-C", "--create", "--create-pool",
3770 (char*)NULL)) {
3771 opts["create"] = "true";
3772 } else if (ceph_argparse_flag(args, i, "--pretty-format", (char*)NULL)) {
3773 opts["pretty-format"] = "true";
3774 } else if (ceph_argparse_flag(args, i, "--show-time", (char*)NULL)) {
3775 opts["show-time"] = "true";
3776 } else if (ceph_argparse_flag(args, i, "--no-cleanup", (char*)NULL)) {
3777 opts["no-cleanup"] = "true";
3778 } else if (ceph_argparse_flag(args, i, "--no-hints", (char*)NULL)) {
3779 opts["no-hints"] = "true";
3780 } else if (ceph_argparse_flag(args, i, "--no-verify", (char*)NULL)) {
3781 opts["no-verify"] = "true";
3782 } else if (ceph_argparse_witharg(args, i, &val, "--run-name", (char*)NULL)) {
3783 opts["run-name"] = val;
3784 } else if (ceph_argparse_witharg(args, i, &val, "--prefix", (char*)NULL)) {
3785 opts["prefix"] = val;
3786 } else if (ceph_argparse_witharg(args, i, &val, "-p", "--pool", (char*)NULL)) {
3787 opts["pool"] = val;
3788 } else if (ceph_argparse_witharg(args, i, &val, "--target-pool", (char*)NULL)) {
3789 opts["target_pool"] = val;
3790 } else if (ceph_argparse_witharg(args, i, &val, "--object-locator" , (char *)NULL)) {
3791 opts["object_locator"] = val;
3792 } else if (ceph_argparse_witharg(args, i, &val, "--target-locator" , (char *)NULL)) {
3793 opts["target_locator"] = val;
3794 } else if (ceph_argparse_witharg(args, i, &val, "--target-nspace" , (char *)NULL)) {
3795 opts["target_nspace"] = val;
3796 } else if (ceph_argparse_flag(args, i, "--striper" , (char *)NULL)) {
3797 opts["striper"] = "true";
3798 } else if (ceph_argparse_witharg(args, i, &val, "-t", "--concurrent-ios", (char*)NULL)) {
3799 opts["concurrent-ios"] = val;
3800 } else if (ceph_argparse_witharg(args, i, &val, "--block-size", (char*)NULL)) {
3801 opts["block-size"] = val;
3802 } else if (ceph_argparse_witharg(args, i, &val, "-b", (char*)NULL)) {
3803 opts["block-size"] = val;
3804 } else if (ceph_argparse_witharg(args, i, &val, "--object-size", (char*)NULL)) {
3805 opts["object-size"] = val;
3806 } else if (ceph_argparse_witharg(args, i, &val, "--max-objects", (char*)NULL)) {
3807 opts["max-objects"] = val;
3808 } else if (ceph_argparse_witharg(args, i, &val, "--offset", (char*)NULL)) {
3809 opts["offset"] = val;
3810 } else if (ceph_argparse_witharg(args, i, &val, "-o", (char*)NULL)) {
3811 opts["object-size"] = val;
3812 } else if (ceph_argparse_witharg(args, i, &val, "-s", "--snap", (char*)NULL)) {
3813 opts["snap"] = val;
3814 } else if (ceph_argparse_witharg(args, i, &val, "-S", "--snapid", (char*)NULL)) {
3815 opts["snapid"] = val;
3816 } else if (ceph_argparse_witharg(args, i, &val, "--min-object-size", (char*)NULL)) {
3817 opts["min-object-size"] = val;
3818 } else if (ceph_argparse_witharg(args, i, &val, "--max-object-size", (char*)NULL)) {
3819 opts["max-object-size"] = val;
3820 } else if (ceph_argparse_witharg(args, i, &val, "--min-op-len", (char*)NULL)) {
3821 opts["min-op-len"] = val;
3822 } else if (ceph_argparse_witharg(args, i, &val, "--max-op-len", (char*)NULL)) {
3823 opts["max-op-len"] = val;
3824 } else if (ceph_argparse_witharg(args, i, &val, "--max-ops", (char*)NULL)) {
3825 opts["max-ops"] = val;
3826 } else if (ceph_argparse_witharg(args, i, &val, "--max-backlog", (char*)NULL)) {
3827 opts["max-backlog"] = val;
3828 } else if (ceph_argparse_witharg(args, i, &val, "--target-throughput", (char*)NULL)) {
3829 opts["target-throughput"] = val;
3830 } else if (ceph_argparse_witharg(args, i, &val, "--read-percent", (char*)NULL)) {
3831 opts["read-percent"] = val;
3832 } else if (ceph_argparse_witharg(args, i, &val, "--num-objects", (char*)NULL)) {
3833 opts["num-objects"] = val;
3834 } else if (ceph_argparse_witharg(args, i, &val, "--run-length", (char*)NULL)) {
3835 opts["run-length"] = val;
3836 } else if (ceph_argparse_witharg(args, i, &val, "--workers", (char*)NULL)) {
3837 opts["workers"] = val;
3838 } else if (ceph_argparse_witharg(args, i, &val, "--format", (char*)NULL)) {
3839 opts["format"] = val;
3840 } else if (ceph_argparse_witharg(args, i, &val, "--lock-tag", (char*)NULL)) {
3841 opts["lock-tag"] = val;
3842 } else if (ceph_argparse_witharg(args, i, &val, "--lock-cookie", (char*)NULL)) {
3843 opts["lock-cookie"] = val;
3844 } else if (ceph_argparse_witharg(args, i, &val, "--lock-description", (char*)NULL)) {
3845 opts["lock-description"] = val;
3846 } else if (ceph_argparse_witharg(args, i, &val, "--lock-duration", (char*)NULL)) {
3847 opts["lock-duration"] = val;
3848 } else if (ceph_argparse_witharg(args, i, &val, "--lock-type", (char*)NULL)) {
3849 opts["lock-type"] = val;
3850 } else if (ceph_argparse_witharg(args, i, &val, "-N", "--namespace", (char*)NULL)) {
3851 opts["namespace"] = val;
3852 } else if (ceph_argparse_flag(args, i, "--all", (char*)NULL)) {
3853 opts["all"] = "true";
3854 } else if (ceph_argparse_flag(args, i, "--default", (char*)NULL)) {
3855 opts["default"] = "true";
3856 } else if (ceph_argparse_witharg(args, i, &val, "-o", "--output", (char*)NULL)) {
3857 opts["output"] = val;
3858 } else if (ceph_argparse_flag(args, i, "--write-omap", (char*)NULL)) {
3859 opts["write-dest-omap"] = "true";
3860 } else if (ceph_argparse_flag(args, i, "--write-object", (char*)NULL)) {
3861 opts["write-dest-obj"] = "true";
3862 } else if (ceph_argparse_flag(args, i, "--write-xattr", (char*)NULL)) {
3863 opts["write-dest-xattr"] = "true";
3864 } else if (ceph_argparse_flag(args, i, "--with-clones", (char*)NULL)) {
3865 opts["with-clones"] = "true";
3866 } else if (ceph_argparse_witharg(args, i, &val, "--omap-key-file", (char*)NULL)) {
3867 opts["omap-key-file"] = val;
3868 } else if (ceph_argparse_witharg(args, i, &val, "--pgid", (char*)NULL)) {
3869 opts["pgid"] = val;
3870 } else {
3871 if (val[0] == '-')
3872 usage_exit();
3873 ++i;
3874 }
3875 }
3876
3877 if (args.empty()) {
3878 cerr << "rados: you must give an action. Try --help" << std::endl;
3879 return 1;
3880 }
3881
3882 return rados_tool_common(opts, args);
3883 }