1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
4 * Ceph - scalable distributed file system
6 * Copyright (C) 2012 Inktank, Inc.
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
13 #include <boost/program_options/variables_map.hpp>
14 #include <boost/program_options/parsers.hpp>
15 #include <boost/scope_exit.hpp>
20 #include "common/Formatter.h"
21 #include "common/errno.h"
23 #include "auth/KeyRing.h"
24 #include "auth/cephx/CephxKeyServer.h"
25 #include "global/global_init.h"
26 #include "include/stringify.h"
27 #include "mgr/mgr_commands.h"
28 #include "mon/AuthMonitor.h"
29 #include "mon/MonitorDBStore.h"
30 #include "mon/Paxos.h"
31 #include "mon/MonMap.h"
32 #include "mds/FSMap.h"
33 #include "mon/MgrMap.h"
34 #include "osd/OSDMap.h"
35 #include "crush/CrushCompiler.h"
37 namespace po
= boost::program_options
;
43 MonitorDBStore::TransactionRef t
;
45 explicit TraceIter(string fname
) : fd(-1), idx(-1) {
46 fd
= ::open(fname
.c_str(), O_RDONLY
);
47 t
.reset(new MonitorDBStore::Transaction
);
52 MonitorDBStore::TransactionRef
cur() {
56 unsigned num() { return idx
; }
60 int r
= bl
.read_fd(fd
, 6);
62 std::cerr
<< "Got error: " << cpp_strerror(r
) << " on read_fd"
67 } else if ((unsigned)r
< 6) {
68 std::cerr
<< "short read" << std::endl
;
73 bufferlist::iterator bliter
= bl
.begin();
75 ::decode(ver
, bliter
);
76 ::decode(ver2
, bliter
);
78 ::decode(len
, bliter
);
79 r
= bl
.read_fd(fd
, len
);
81 std::cerr
<< "Got error: " << cpp_strerror(r
) << " on read_fd"
86 } else if ((unsigned)r
< len
) {
87 std::cerr
<< "short read" << std::endl
;
93 t
.reset(new MonitorDBStore::Transaction
);
109 po::options_description
*desc
, /// < visible options description
110 po::options_description
*hidden_desc
, /// < hidden options description
111 po::positional_options_description
*positional
, /// < positional args
112 vector
<string
> &cmd_args
, /// < arguments to be parsed
113 po::variables_map
*vm
/// > post-parsing variable map
116 // desc_all will aggregate all visible and hidden options for parsing.
118 // From boost's program_options point of view, there is absolutely no
119 // distinction between 'desc' and 'hidden_desc'. This is a distinction
120 // that is only useful to us: 'desc' is whatever we are willing to show
121 // on 'usage()', whereas 'hidden_desc' refers to parameters we wish to
122 // take advantage of but do not wish to show on 'usage()'.
124 // For example, consider that program_options matches positional arguments
125 // (specified via 'positional') against the paramenters defined on a
126 // given 'po::options_description' class. This is performed below,
127 // supplying both the description and the positional arguments to the
128 // parser. However, we do not want the parameters that are mapped to
129 // positional arguments to be shown on usage, as that makes for ugly and
130 // confusing usage messages. Therefore we dissociate the options'
131 // description that is to be used as an aid to the user from those options
132 // that are nothing but useful for internal purposes (i.e., mapping options
133 // to positional arguments). We still need to aggregate them before parsing
134 // and that's what 'desc_all' is all about.
137 assert(desc
!= NULL
);
139 po::options_description desc_all
;
141 if (hidden_desc
!= NULL
)
142 desc_all
.add(*hidden_desc
);
145 po::command_line_parser parser
= po::command_line_parser(cmd_args
).
149 parser
= parser
.positional(*positional
);
152 po::parsed_options parsed
= parser
.run();
153 po::store(parsed
, *vm
);
155 } catch (po::error
&e
) {
156 std::cerr
<< "error: " << e
.what() << std::endl
;
164 * usage: ceph-monstore-tool <store-path> <command> [options]
168 * store-copy < --out arg >
171 * getmonmap < --out arg [ --version arg ] >
172 * getosdmap < --out arg [ --version arg ] >
173 * dump-paxos <--dump-start VER> <--dump-end VER>
174 * dump-trace < --trace-file arg >
182 * ceph-monstore-tool PATH CMD [options]
184 * ceph-monstore-tool PATH store-copy <PATH2 | -o PATH2>
185 * ceph-monstore-tool PATH dump-keys
186 * ceph-monstore-tool PATH compact
187 * ceph-monstore-tool PATH get monmap [VER]
188 * ceph-monstore-tool PATH get osdmap [VER]
189 * ceph-monstore-tool PATH dump-paxos STARTVER ENDVER
193 void usage(const char *n
, po::options_description
&d
)
196 "usage: " << n
<< " <store-path> <cmd> [args|options]\n"
199 << " store-copy PATH copies store to PATH\n"
200 << " compact compacts the store\n"
201 << " get monmap [-- options] get monmap (version VER if specified)\n"
202 << " (default: last committed)\n"
203 << " get osdmap [-- options] get osdmap (version VER if specified)\n"
204 << " (default: last committed)\n"
205 << " get mdsmap [-- options] get mdsmap (version VER if specified)\n"
206 << " (default: last committed)\n"
207 << " get mgr [-- options] get mgr map (version VER if specified)\n"
208 << " (default: last committed)\n"
209 << " get crushmap [-- options] get crushmap (version VER if specified)\n"
210 << " (default: last committed)\n"
211 << " show-versions [-- options] show the first&last committed version of map\n"
212 << " (show-versions -- --help for more info)\n"
213 << " dump-keys dumps store keys to FILE\n"
214 << " (default: stdout)\n"
215 << " dump-paxos [-- options] dump paxos transactions\n"
216 << " (dump-paxos -- --help for more info)\n"
217 << " dump-trace FILE [-- options] dump contents of trace file FILE\n"
218 << " (dump-trace -- --help for more info)\n"
219 << " replay-trace FILE [-- options] replay trace from FILE\n"
220 << " (replay-trace -- --help for more info)\n"
221 << " random-gen [-- options] add randomly generated ops to the store\n"
222 << " (random-gen -- --help for more info)\n"
223 << " rewrite-crush [-- options] add a rewrite commit to the store\n"
224 << " (rewrite-crush -- --help for more info)\n"
225 << " inflate-pgmap [-- options] add given number of pgmaps to store\n"
226 << " (inflate-pgmap -- --help for more info)\n"
227 << " rebuild rebuild store\n"
228 << " (rebuild -- --help for more info)\n"
230 std::cerr
<< d
<< std::endl
;
232 << "\nPlease Note:\n"
233 << "* Ceph-specific options should be in the format --option-name=VAL\n"
234 << " (specifically, do not forget the '='!!)\n"
235 << "* Command-specific options need to be passed after a '--'\n"
236 << " e.g., 'get monmap -- --version 10 --out /tmp/foo'"
240 int update_osdmap(MonitorDBStore
& store
, version_t ver
, bool copy
,
241 ceph::shared_ptr
<CrushWrapper
> crush
,
242 MonitorDBStore::Transaction
* t
) {
243 const string
prefix("osdmap");
248 r
= store
.get(prefix
, store
.combine_strings("full", ver
), bl
);
250 std::cerr
<< "Error getting full map: " << cpp_strerror(r
) << std::endl
;
255 osdmap
.crush
= crush
;
260 // be consistent with OSDMonitor::update_from_paxos()
261 osdmap
.encode(bl
, CEPH_FEATURES_ALL
|CEPH_FEATURE_RESERVED
);
262 t
->put(prefix
, store
.combine_strings("full", osdmap
.get_epoch()), bl
);
265 OSDMap::Incremental inc
;
267 inc
.epoch
= osdmap
.get_epoch();
268 inc
.fsid
= osdmap
.get_fsid();
271 r
= store
.get(prefix
, ver
, bl
);
273 std::cerr
<< "Error getting inc map: " << cpp_strerror(r
) << std::endl
;
276 OSDMap::Incremental
inc(bl
);
277 if (inc
.crush
.length()) {
279 crush
->encode(inc
.crush
, CEPH_FEATURES_SUPPORTED_DEFAULT
);
281 if (inc
.fullmap
.length()) {
283 fullmap
.decode(inc
.fullmap
);
284 fullmap
.crush
= crush
;
286 fullmap
.encode(inc
.fullmap
);
289 assert(osdmap
.have_crc());
290 inc
.full_crc
= osdmap
.get_crc();
292 // be consistent with OSDMonitor::update_from_paxos()
293 inc
.encode(bl
, CEPH_FEATURES_ALL
|CEPH_FEATURE_RESERVED
);
294 t
->put(prefix
, inc
.epoch
, bl
);
298 int rewrite_transaction(MonitorDBStore
& store
, int version
,
299 const string
& crush_file
,
300 MonitorDBStore::Transaction
* t
) {
301 const string
prefix("osdmap");
303 // calc the known-good epoch
304 version_t last_committed
= store
.get(prefix
, "last_committed");
305 version_t good_version
= 0;
307 if (last_committed
>= (unsigned)-version
) {
308 good_version
= last_committed
+ version
;
310 std::cerr
<< "osdmap-version is less than: -" << last_committed
<< std::endl
;
314 good_version
= version
;
316 if (good_version
>= last_committed
) {
317 std::cout
<< "good epoch is greater or equal to the last committed one: "
318 << good_version
<< " >= " << last_committed
<< std::endl
;
322 // load/extract the crush map
324 ceph::shared_ptr
<CrushWrapper
> crush(new CrushWrapper
);
325 if (crush_file
.empty()) {
327 r
= store
.get(prefix
, store
.combine_strings("full", good_version
), bl
);
329 std::cerr
<< "Error getting map: " << cpp_strerror(r
) << std::endl
;
334 crush
= osdmap
.crush
;
338 r
= bl
.read_file(crush_file
.c_str(), &err
);
340 std::cerr
<< err
<< ": " << cpp_strerror(r
) << std::endl
;
343 bufferlist::iterator p
= bl
.begin();
347 // prepare a transaction to rewrite the epochs
348 // (good_version, last_committed]
349 // with the good crush map.
350 // XXX: may need to break this into several paxos versions?
351 assert(good_version
< last_committed
);
352 for (version_t v
= good_version
+ 1; v
<= last_committed
; v
++) {
353 cout
<< "rewriting epoch #" << v
<< "/" << last_committed
<< std::endl
;
354 r
= update_osdmap(store
, v
, false, crush
, t
);
359 // add a new osdmap epoch to store, so monitors will update their current osdmap
360 // in addition to the ones stored in epochs.
362 // This is needed due to the way the monitor updates from paxos and the
363 // facilities we are leveraging to push this update to the rest of the
366 // In a nutshell, we are generating a good version of the osdmap, with a
367 // proper crush, and building a transaction that will replace the bad
368 // osdmaps with good osdmaps. But this transaction needs to be applied on
369 // all nodes, so that the monitors will have good osdmaps to share with
370 // clients. We thus leverage Paxos, specifically the recovery mechanism, by
371 // creating a pending value that will be committed once the monitors form an
372 // initial quorum after being brought back to life.
374 // However, the way the monitor works has the paxos services, including the
375 // OSDMonitor, updating their state from disk *prior* to the recovery phase
376 // begins (so they have an up to date state in memory). This means the
377 // OSDMonitor will see the old, broken map, before the new paxos version is
378 // applied to disk, and the old version is cached. Even though we have the
379 // good map now, and we share the good map with clients, we will still be
380 // working on the old broken map. Instead of mucking around the monitor to
381 // make this work, we instead opt for adding the same osdmap but with a
382 // newer version, so that the OSDMonitor picks up on it when it updates from
383 // paxos after the proposal has been committed. This is not elegant, but
384 // avoids further unpleasantness that would arise from kludging around the
385 // current behavior. Also, has the added benefit of making sure the clients
386 // get an updated version of the map (because last_committed+1 >
387 // last_committed) :)
389 cout
<< "adding a new epoch #" << last_committed
+1 << std::endl
;
390 r
= update_osdmap(store
, last_committed
++, true, crush
, t
);
393 t
->put(prefix
, store
.combine_strings("full", "latest"), last_committed
);
394 t
->put(prefix
, "last_committed", last_committed
);
399 * create a new paxos version which carries a proposal to rewrite all epochs
400 * of incremental and full map of "osdmap" after a faulty crush map is injected.
401 * so the leader will trigger a recovery and propagate this fix to its peons,
402 * after the proposal is accepted, and the transaction in it is applied. all
403 * monitors will rewrite the bad crush map with the good one, and have a new
404 * osdmap epoch with the good crush map in it.
406 int rewrite_crush(const char* progname
,
407 vector
<string
>& subcmds
,
408 MonitorDBStore
& store
) {
409 po::options_description
op_desc("Allowed 'rewrite-crush' options");
412 op_desc
.add_options()
413 ("help,h", "produce this help message")
414 ("crush", po::value
<string
>(&crush_file
),
415 ("path to the crush map file "
416 "(default: will instead extract it from the known-good osdmap)"))
417 ("good-epoch", po::value
<int>(&version
),
418 "known-good epoch of osdmap, if a negative number '-N' is given, the "
419 "$last_committed-N is used instead (default: -1). "
420 "Please note, -1 is not necessarily a good epoch, because there are "
421 "good chance that we have more epochs slipped into the monstore after "
422 "the one where the crushmap is firstly injected.")
424 po::variables_map op_vm
;
425 int r
= parse_cmd_args(&op_desc
, NULL
, NULL
, subcmds
, &op_vm
);
429 if (op_vm
.count("help")) {
430 usage(progname
, op_desc
);
434 MonitorDBStore::Transaction rewrite_txn
;
435 r
= rewrite_transaction(store
, version
, crush_file
, &rewrite_txn
);
440 // store the transaction into store as a proposal
441 const string
prefix("paxos");
442 version_t pending_v
= store
.get(prefix
, "last_committed") + 1;
443 auto t(std::make_shared
<MonitorDBStore::Transaction
>());
445 rewrite_txn
.encode(bl
);
446 cout
<< "adding pending commit " << pending_v
447 << " " << bl
.length() << " bytes" << std::endl
;
448 t
->put(prefix
, pending_v
, bl
);
449 t
->put(prefix
, "pending_v", pending_v
);
450 // a large enough yet unique proposal number will probably do the trick
451 version_t pending_pn
= (store
.get(prefix
, "accepted_pn") / 100 + 4) * 100 + 1;
452 t
->put(prefix
, "pending_pn", pending_pn
);
453 store
.apply_transaction(t
);
457 int inflate_pgmap(MonitorDBStore
& st
, unsigned n
, bool can_be_trimmed
) {
458 // put latest pg map into monstore to bloat it up
459 // only format version == 1 is supported
460 version_t last
= st
.get("pgmap", "last_committed");
463 // get the latest delta
464 int r
= st
.get("pgmap", last
, bl
);
466 std::cerr
<< "Error getting pgmap: " << cpp_strerror(r
) << std::endl
;
470 // try to pull together an idempotent "delta"
471 ceph::unordered_map
<pg_t
, pg_stat_t
> pg_stat
;
472 for (KeyValueDB::Iterator i
= st
.get_iterator("pgmap_pg");
473 i
->valid(); i
->next()) {
475 if (!pgid
.parse(i
->key().c_str())) {
476 std::cerr
<< "unable to parse key " << i
->key() << std::endl
;
479 bufferlist pg_bl
= i
->value();
481 bufferlist::iterator p
= pg_bl
.begin();
483 // will update the last_epoch_clean of all the pgs.
487 version_t first
= st
.get("pgmap", "first_committed");
488 version_t ver
= last
;
489 auto txn(std::make_shared
<MonitorDBStore::Transaction
>());
490 for (unsigned i
= 0; i
< n
; i
++) {
492 bufferlist dirty_pgs
;
493 for (ceph::unordered_map
<pg_t
, pg_stat_t
>::iterator ps
= pg_stat
.begin();
494 ps
!= pg_stat
.end(); ++ps
) {
495 ::encode(ps
->first
, dirty_pgs
);
496 if (!can_be_trimmed
) {
497 ps
->second
.last_epoch_clean
= first
;
499 ::encode(ps
->second
, dirty_pgs
);
501 utime_t inc_stamp
= ceph_clock_now();
502 ::encode(inc_stamp
, trans_bl
);
503 ::encode_destructively(dirty_pgs
, trans_bl
);
504 bufferlist dirty_osds
;
505 ::encode(dirty_osds
, trans_bl
);
506 txn
->put("pgmap", ++ver
, trans_bl
);
507 // update the db in batch
508 if (txn
->size() > 1024) {
509 st
.apply_transaction(txn
);
510 // reset the transaction
511 txn
.reset(new MonitorDBStore::Transaction
);
514 txn
->put("pgmap", "last_committed", ver
);
515 txn
->put("pgmap_meta", "version", ver
);
516 // this will also piggy back the leftover pgmap added in the loop above
517 st
.apply_transaction(txn
);
521 static int update_auth(MonitorDBStore
& st
, const string
& keyring_path
)
523 // import all keyrings stored in the keyring file
525 int r
= keyring
.load(g_ceph_context
, keyring_path
);
527 cerr
<< "unable to load admin keyring: " << keyring_path
<< std::endl
;
535 for (const auto& k
: keyring
.get_keys()) {
536 KeyServerData::Incremental auth_inc
;
537 auth_inc
.name
= k
.first
;
538 auth_inc
.auth
= k
.second
;
539 if (auth_inc
.auth
.caps
.empty()) {
540 cerr
<< "no caps granted to: " << auth_inc
.name
<< std::endl
;
543 auth_inc
.op
= KeyServerData::AUTH_INC_ADD
;
545 AuthMonitor::Incremental inc
;
546 inc
.inc_type
= AuthMonitor::AUTH_DATA
;
547 ::encode(auth_inc
, inc
.auth_data
);
548 inc
.auth_type
= CEPH_AUTH_CEPHX
;
550 inc
.encode(bl
, CEPH_FEATURES_ALL
);
553 const string
prefix("auth");
554 auto last_committed
= st
.get(prefix
, "last_committed") + 1;
555 auto t
= make_shared
<MonitorDBStore::Transaction
>();
556 t
->put(prefix
, last_committed
, bl
);
557 t
->put(prefix
, "last_committed", last_committed
);
558 auto first_committed
= st
.get(prefix
, "first_committed");
559 if (!first_committed
) {
560 t
->put(prefix
, "first_committed", last_committed
);
562 st
.apply_transaction(t
);
566 static int update_mkfs(MonitorDBStore
& st
)
569 int r
= monmap
.build_initial(g_ceph_context
, cerr
);
571 cerr
<< "no initial monitors" << std::endl
;
575 monmap
.encode(bl
, CEPH_FEATURES_ALL
);
577 auto t
= make_shared
<MonitorDBStore::Transaction
>();
578 t
->put("mkfs", "monmap", bl
);
579 st
.apply_transaction(t
);
583 static int update_monitor(MonitorDBStore
& st
)
585 const string
prefix("monitor");
586 // a stripped-down Monitor::mkfs()
588 bl
.append(CEPH_MON_ONDISK_MAGIC
"\n");
589 auto t
= make_shared
<MonitorDBStore::Transaction
>();
590 t
->put(prefix
, "magic", bl
);
591 st
.apply_transaction(t
);
597 // - mgr_command_desc
598 static int update_mgrmap(MonitorDBStore
& st
)
600 auto t
= make_shared
<MonitorDBStore::Transaction
>();
604 // mgr expects epoch > 1
606 auto initial_modules
=
607 get_str_vec(g_ceph_context
->_conf
->get_val
<string
>("mgr_initial_modules"));
608 copy(begin(initial_modules
),
609 end(initial_modules
),
610 inserter(map
.modules
, end(map
.modules
)));
612 map
.encode(bl
, CEPH_FEATURES_ALL
);
613 t
->put("mgr", map
.epoch
, bl
);
614 t
->put("mgr", "last_committed", map
.epoch
);
617 auto mgr_command_descs
= mgr_commands
;
618 for (auto& c
: mgr_command_descs
) {
619 c
.set_flag(MonCommand::FLAG_MGR
);
622 ::encode(mgr_command_descs
, bl
);
623 t
->put("mgr_command_desc", "", bl
);
625 return st
.apply_transaction(t
);
628 static int update_paxos(MonitorDBStore
& st
)
630 // build a pending paxos proposal from all non-permanent k/v pairs. once the
631 // proposal is committed, it will gets applied. on the sync provider side, it
632 // will be a no-op, but on its peers, the paxos commit will help to build up
633 // the necessary epochs.
634 bufferlist pending_proposal
;
636 MonitorDBStore::Transaction t
;
637 vector
<string
> prefixes
= {"auth", "osdmap",
638 "mgr", "mgr_command_desc",
639 "pgmap", "pgmap_pg", "pgmap_meta"};
640 for (const auto& prefix
: prefixes
) {
641 for (auto i
= st
.get_iterator(prefix
); i
->valid(); i
->next()) {
642 auto key
= i
->raw_key();
643 auto val
= i
->value();
644 t
.put(key
.first
, key
.second
, val
);
647 t
.encode(pending_proposal
);
649 const string
prefix("paxos");
650 auto t
= make_shared
<MonitorDBStore::Transaction
>();
651 t
->put(prefix
, "first_committed", 0);
652 t
->put(prefix
, "last_committed", 0);
654 t
->put(prefix
, pending_v
, pending_proposal
);
655 t
->put(prefix
, "pending_v", pending_v
);
656 t
->put(prefix
, "pending_pn", 400);
657 st
.apply_transaction(t
);
662 // - pgmap_meta/version
663 // - pgmap_meta/last_osdmap_epoch
664 // - pgmap_meta/last_pg_scan
665 // - pgmap_meta/full_ratio
666 // - pgmap_meta/nearfull_ratio
667 // - pgmap_meta/stamp
668 static int update_pgmap_meta(MonitorDBStore
& st
)
670 const string
prefix("pgmap_meta");
671 auto t
= make_shared
<MonitorDBStore::Transaction
>();
672 // stolen from PGMonitor::create_pending()
673 // the first pgmap_meta
674 t
->put(prefix
, "version", 1);
676 auto stamp
= ceph_clock_now();
679 t
->put(prefix
, "stamp", bl
);
682 auto last_osdmap_epoch
= st
.get("osdmap", "last_committed");
683 t
->put(prefix
, "last_osdmap_epoch", last_osdmap_epoch
);
685 // be conservative, so PGMonitor will scan the all pools for pg changes
686 t
->put(prefix
, "last_pg_scan", 1);
688 auto full_ratio
= g_ceph_context
->_conf
->mon_osd_full_ratio
;
689 if (full_ratio
> 1.0)
692 ::encode(full_ratio
, bl
);
693 t
->put(prefix
, "full_ratio", bl
);
696 auto backfillfull_ratio
= g_ceph_context
->_conf
->mon_osd_backfillfull_ratio
;
697 if (backfillfull_ratio
> 1.0)
698 backfillfull_ratio
/= 100.0;
700 ::encode(backfillfull_ratio
, bl
);
701 t
->put(prefix
, "backfillfull_ratio", bl
);
704 auto nearfull_ratio
= g_ceph_context
->_conf
->mon_osd_nearfull_ratio
;
705 if (nearfull_ratio
> 1.0)
706 nearfull_ratio
/= 100.0;
708 ::encode(nearfull_ratio
, bl
);
709 t
->put(prefix
, "nearfull_ratio", bl
);
711 st
.apply_transaction(t
);
715 int rebuild_monstore(const char* progname
,
716 vector
<string
>& subcmds
,
719 po::options_description
op_desc("Allowed 'rebuild' options");
721 op_desc
.add_options()
722 ("keyring", po::value
<string
>(&keyring_path
),
723 "path to the client.admin key");
724 po::variables_map op_vm
;
725 int r
= parse_cmd_args(&op_desc
, nullptr, nullptr, subcmds
, &op_vm
);
729 if (op_vm
.count("help")) {
730 usage(progname
, op_desc
);
733 if (!keyring_path
.empty())
734 update_auth(st
, keyring_path
);
735 if ((r
= update_pgmap_meta(st
))) {
738 if ((r
= update_mgrmap(st
))) {
741 if ((r
= update_paxos(st
))) {
744 if ((r
= update_mkfs(st
))) {
747 if ((r
= update_monitor(st
))) {
753 int main(int argc
, char **argv
) {
755 po::options_description
desc("Allowed options");
756 string store_path
, cmd
;
757 vector
<string
> subcmds
;
759 ("help,h", "produce help message")
762 /* Dear Future Developer:
764 * for further improvement, should you need to pass specific options to
765 * a command (e.g., get osdmap VER --hex), you can expand the current
766 * format by creating additional 'po::option_description' and passing
767 * 'subcmds' to 'po::command_line_parser', much like what is currently
768 * done by default. However, beware: in order to differentiate a
769 * command-specific option from the generic/global options, you will need
770 * to pass '--' in the command line (so that the first parser, the one
771 * below, assumes it has reached the end of all options); e.g.,
772 * 'get osdmap VER -- --hex'. Not pretty; far from intuitive; it was as
773 * far as I got with this library. Improvements on this format will be
774 * left as an excercise for the reader. -Joao
776 po::options_description
positional_desc("Positional argument options");
777 positional_desc
.add_options()
778 ("store-path", po::value
<string
>(&store_path
),
779 "path to monitor's store")
780 ("command", po::value
<string
>(&cmd
),
782 ("subcmd", po::value
<vector
<string
> >(&subcmds
),
783 "Command arguments/Sub-Commands")
785 po::positional_options_description positional
;
786 positional
.add("store-path", 1);
787 positional
.add("command", 1);
788 positional
.add("subcmd", -1);
790 po::options_description
all_desc("All options");
791 all_desc
.add(desc
).add(positional_desc
);
793 vector
<string
> ceph_option_strings
;
794 po::variables_map vm
;
796 po::parsed_options parsed
=
797 po::command_line_parser(argc
, argv
).
799 positional(positional
).
800 allow_unregistered().run();
807 // Specifying po::include_positional would have our positional arguments
808 // being collected (thus being part of ceph_option_strings and eventually
809 // passed on to global_init() below).
810 // Instead we specify po::exclude_positional, which has the upside of
811 // completely avoid this, but the downside of having to specify ceph
812 // options as --VAR=VAL (note the '='); otherwise we will capture the
813 // positional 'VAL' as belonging to us, never being collected.
814 ceph_option_strings
= po::collect_unrecognized(parsed
.options
,
815 po::exclude_positional
);
817 } catch(po::error
&e
) {
818 std::cerr
<< "error: " << e
.what() << std::endl
;
822 // parse command structure before calling global_init() and friends.
824 if (vm
.empty() || vm
.count("help") ||
825 store_path
.empty() || cmd
.empty() ||
826 *cmd
.begin() == '-') {
827 usage(argv
[0], desc
);
831 vector
<const char *> ceph_options
, def_args
;
832 ceph_options
.reserve(ceph_option_strings
.size());
833 for (vector
<string
>::iterator i
= ceph_option_strings
.begin();
834 i
!= ceph_option_strings
.end();
836 ceph_options
.push_back(i
->c_str());
839 auto cct
= global_init(
840 &def_args
, ceph_options
, CEPH_ENTITY_TYPE_MON
,
841 CODE_ENVIRONMENT_UTILITY
, 0);
842 common_init_finish(g_ceph_context
);
843 g_ceph_context
->_conf
->apply_changes(NULL
);
844 g_conf
= g_ceph_context
->_conf
;
846 // this is where we'll write *whatever*, on a per-command basis.
847 // not all commands require some place to write their things.
848 MonitorDBStore
st(store_path
);
849 if (store_path
.size()) {
853 std::cerr
<< ss
.str() << std::endl
;
858 if (cmd
== "dump-keys") {
859 KeyValueDB::WholeSpaceIterator iter
= st
.get_iterator();
860 while (iter
->valid()) {
861 pair
<string
,string
> key(iter
->raw_key());
862 cout
<< key
.first
<< " / " << key
.second
<< std::endl
;
865 } else if (cmd
== "compact") {
867 } else if (cmd
== "get") {
870 bool readable
= false;
872 // visible options for this command
873 po::options_description
op_desc("Allowed 'get' options");
874 op_desc
.add_options()
875 ("help,h", "produce this help message")
876 ("out,o", po::value
<string
>(&outpath
),
877 "output file (default: stdout)")
878 ("version,v", po::value
<unsigned>(&v
),
879 "map version to obtain")
880 ("readable,r", po::value
<bool>(&readable
)->default_value(false),
881 "print the map infomation in human readable format")
883 // this is going to be a positional argument; we don't want to show
884 // it as an option during --help, but we do want to have it captured
886 po::options_description
hidden_op_desc("Hidden 'get' options");
887 hidden_op_desc
.add_options()
888 ("map-type", po::value
<string
>(&map_type
),
891 po::positional_options_description op_positional
;
892 op_positional
.add("map-type", 1);
894 po::variables_map op_vm
;
895 int r
= parse_cmd_args(&op_desc
, &hidden_op_desc
, &op_positional
,
902 if (op_vm
.count("help") || map_type
.empty()) {
903 usage(argv
[0], op_desc
);
909 if (map_type
== "crushmap") {
910 v
= st
.get("osdmap", "last_committed");
912 v
= st
.get(map_type
, "last_committed");
916 int fd
= STDOUT_FILENO
;
917 if (!outpath
.empty()){
918 fd
= ::open(outpath
.c_str(), O_WRONLY
|O_CREAT
|O_TRUNC
, 0666);
920 std::cerr
<< "error opening output file: "
921 << cpp_strerror(errno
) << std::endl
;
927 BOOST_SCOPE_EXIT((&r
) (&fd
) (&outpath
)) {
929 if (r
< 0 && fd
!= STDOUT_FILENO
) {
930 ::remove(outpath
.c_str());
932 } BOOST_SCOPE_EXIT_END
936 if (map_type
== "osdmap") {
937 r
= st
.get(map_type
, st
.combine_strings("full", v
), bl
);
938 } else if (map_type
== "crushmap") {
940 r
= st
.get("osdmap", st
.combine_strings("full", v
), tmp
);
944 osdmap
.crush
->encode(bl
, CEPH_FEATURES_SUPPORTED_DEFAULT
);
947 r
= st
.get(map_type
, v
, bl
);
950 std::cerr
<< "Error getting map: " << cpp_strerror(r
) << std::endl
;
959 if (map_type
== "monmap") {
963 } else if (map_type
== "osdmap") {
967 } else if (map_type
== "mdsmap") {
971 } else if (map_type
== "mgr") {
976 f
.dump_object("mgrmap", mgr_map
);
978 } else if (map_type
== "crushmap") {
980 bufferlist::iterator it
= bl
.begin();
982 CrushCompiler
cc(cw
, std::cerr
, 0);
985 std::cerr
<< "This type of readable map does not exist: " << map_type
986 << std::endl
<< "You can only specify[osdmap|monmap|mdsmap"
987 "|crushmap|mgr]" << std::endl
;
989 } catch (const buffer::error
&err
) {
990 std::cerr
<< "Could not decode for human readable output (you may still"
991 " use non-readable mode). Detail: " << err
<< std::endl
;
1000 if (!outpath
.empty()) {
1001 std::cout
<< "wrote " << map_type
1002 << " version " << v
<< " to " << outpath
1005 } else if (cmd
== "show-versions") {
1006 string map_type
; //map type:osdmap,monmap...
1007 // visible options for this command
1008 po::options_description
op_desc("Allowed 'show-versions' options");
1009 op_desc
.add_options()
1010 ("help,h", "produce this help message")
1011 ("map-type", po::value
<string
>(&map_type
), "map_type");
1013 po::positional_options_description op_positional
;
1014 op_positional
.add("map-type", 1);
1016 po::variables_map op_vm
;
1017 int r
= parse_cmd_args(&op_desc
, NULL
, &op_positional
,
1024 if (op_vm
.count("help") || map_type
.empty()) {
1025 usage(argv
[0], op_desc
);
1030 unsigned int v_first
= 0;
1031 unsigned int v_last
= 0;
1032 v_first
= st
.get(map_type
, "first_committed");
1033 v_last
= st
.get(map_type
, "last_committed");
1035 std::cout
<< "first committed:\t" << v_first
<< "\n"
1036 << "last committed:\t" << v_last
<< std::endl
;
1037 } else if (cmd
== "dump-paxos") {
1038 unsigned dstart
= 0;
1039 unsigned dstop
= ~0;
1040 po::options_description
op_desc("Allowed 'dump-paxos' options");
1041 op_desc
.add_options()
1042 ("help,h", "produce this help message")
1043 ("start,s", po::value
<unsigned>(&dstart
),
1044 "starting version (default: 0)")
1045 ("end,e", po::value
<unsigned>(&dstop
),
1046 "finish version (default: ~0)")
1049 po::variables_map op_vm
;
1050 int r
= parse_cmd_args(&op_desc
, NULL
, NULL
,
1057 if (op_vm
.count("help")) {
1058 usage(argv
[0], op_desc
);
1063 if (dstart
> dstop
) {
1064 std::cerr
<< "error: 'start' version (value: " << dstart
<< ") "
1065 << " is greater than 'end' version (value: " << dstop
<< ")"
1071 version_t v
= dstart
;
1072 for (; v
<= dstop
; ++v
) {
1074 st
.get("paxos", v
, bl
);
1075 if (bl
.length() == 0)
1077 cout
<< "\n--- " << v
<< " ---" << std::endl
;
1078 auto tx(std::make_shared
<MonitorDBStore::Transaction
>());
1079 Paxos::decode_append_transaction(tx
, bl
);
1080 JSONFormatter
f(true);
1085 std::cout
<< "dumped " << v
<< " paxos versions" << std::endl
;
1087 } else if (cmd
== "dump-trace") {
1088 unsigned dstart
= 0;
1089 unsigned dstop
= ~0;
1092 // visible options for this command
1093 po::options_description
op_desc("Allowed 'dump-trace' options");
1094 op_desc
.add_options()
1095 ("help,h", "produce this help message")
1096 ("start,s", po::value
<unsigned>(&dstart
),
1097 "starting version (default: 0)")
1098 ("end,e", po::value
<unsigned>(&dstop
),
1099 "finish version (default: ~0)")
1101 // this is going to be a positional argument; we don't want to show
1102 // it as an option during --help, but we do want to have it captured
1104 po::options_description
hidden_op_desc("Hidden 'dump-trace' options");
1105 hidden_op_desc
.add_options()
1106 ("out,o", po::value
<string
>(&outpath
),
1107 "file to write the dump to")
1109 po::positional_options_description op_positional
;
1110 op_positional
.add("out", 1);
1112 po::variables_map op_vm
;
1113 int r
= parse_cmd_args(&op_desc
, &hidden_op_desc
, &op_positional
,
1120 if (op_vm
.count("help")) {
1121 usage(argv
[0], op_desc
);
1126 if (outpath
.empty()) {
1127 usage(argv
[0], op_desc
);
1132 if (dstart
> dstop
) {
1133 std::cerr
<< "error: 'start' version (value: " << dstart
<< ") "
1134 << " is greater than 'stop' version (value: " << dstop
<< ")"
1140 TraceIter
iter(outpath
.c_str());
1145 if (iter
.num() >= dstop
) {
1148 if (iter
.num() >= dstart
) {
1149 JSONFormatter
f(true);
1150 iter
.cur()->dump(&f
, false);
1152 std::cout
<< std::endl
;
1156 std::cerr
<< "Read up to transaction " << iter
.num() << std::endl
;
1157 } else if (cmd
== "replay-trace") {
1159 unsigned num_replays
= 1;
1160 // visible options for this command
1161 po::options_description
op_desc("Allowed 'replay-trace' options");
1162 op_desc
.add_options()
1163 ("help,h", "produce this help message")
1164 ("num-replays,n", po::value
<unsigned>(&num_replays
),
1165 "finish version (default: 1)")
1167 // this is going to be a positional argument; we don't want to show
1168 // it as an option during --help, but we do want to have it captured
1170 po::options_description
hidden_op_desc("Hidden 'replay-trace' options");
1171 hidden_op_desc
.add_options()
1172 ("in,i", po::value
<string
>(&inpath
),
1173 "file to write the dump to")
1175 po::positional_options_description op_positional
;
1176 op_positional
.add("in", 1);
1178 // op_desc_all will aggregate all visible and hidden options for parsing.
1179 // when we call 'usage()' we just pass 'op_desc', as that's the description
1180 // holding the visible options.
1181 po::options_description op_desc_all
;
1182 op_desc_all
.add(op_desc
).add(hidden_op_desc
);
1184 po::variables_map op_vm
;
1186 po::parsed_options op_parsed
= po::command_line_parser(subcmds
).
1187 options(op_desc_all
).positional(op_positional
).run();
1188 po::store(op_parsed
, op_vm
);
1190 } catch (po::error
&e
) {
1191 std::cerr
<< "error: " << e
.what() << std::endl
;
1196 if (op_vm
.count("help")) {
1197 usage(argv
[0], op_desc
);
1202 if (inpath
.empty()) {
1203 usage(argv
[0], op_desc
);
1209 for (unsigned i
= 0; i
< num_replays
; ++i
) {
1210 TraceIter
iter(inpath
.c_str());
1215 std::cerr
<< "Replaying trans num " << num
<< std::endl
;
1216 st
.apply_transaction(iter
.cur());
1220 std::cerr
<< "Read up to transaction " << iter
.num() << std::endl
;
1222 } else if (cmd
== "random-gen") {
1223 unsigned tsize
= 200;
1224 unsigned tvalsize
= 1024;
1225 unsigned ntrans
= 100;
1226 po::options_description
op_desc("Allowed 'random-gen' options");
1227 op_desc
.add_options()
1228 ("help,h", "produce this help message")
1229 ("num-keys,k", po::value
<unsigned>(&tsize
),
1230 "keys to write in each transaction (default: 200)")
1231 ("size,s", po::value
<unsigned>(&tvalsize
),
1232 "size (in bytes) of the value to write in each key (default: 1024)")
1233 ("ntrans,n", po::value
<unsigned>(&ntrans
),
1234 "number of transactions to run (default: 100)")
1237 po::variables_map op_vm
;
1239 po::parsed_options op_parsed
= po::command_line_parser(subcmds
).
1240 options(op_desc
).run();
1241 po::store(op_parsed
, op_vm
);
1243 } catch (po::error
&e
) {
1244 std::cerr
<< "error: " << e
.what() << std::endl
;
1249 if (op_vm
.count("help")) {
1250 usage(argv
[0], op_desc
);
1256 for (unsigned i
= 0; i
< ntrans
; ++i
) {
1257 std::cerr
<< "Applying trans " << i
<< std::endl
;
1258 auto t(std::make_shared
<MonitorDBStore::Transaction
>());
1260 prefix
.push_back((i
%26)+'a');
1261 for (unsigned j
= 0; j
< tsize
; ++j
) {
1265 for (unsigned k
= 0; k
< tvalsize
; ++k
) bl
.append(rand());
1266 t
->put(prefix
, os
.str(), bl
);
1269 t
->compact_prefix(prefix
);
1270 st
.apply_transaction(t
);
1272 } else if (cmd
== "store-copy") {
1273 if (subcmds
.size() < 1 || subcmds
[0].empty()) {
1274 usage(argv
[0], desc
);
1279 string out_path
= subcmds
[0];
1281 MonitorDBStore
out_store(out_path
);
1284 int r
= out_store
.create_and_open(ss
);
1286 std::cerr
<< ss
.str() << std::endl
;
1292 KeyValueDB::WholeSpaceIterator it
= st
.get_iterator();
1293 uint64_t total_keys
= 0;
1294 uint64_t total_size
= 0;
1295 uint64_t total_tx
= 0;
1298 uint64_t num_keys
= 0;
1300 auto tx(std::make_shared
<MonitorDBStore::Transaction
>());
1302 while (it
->valid() && num_keys
< 128) {
1303 pair
<string
,string
> k
= it
->raw_key();
1304 bufferlist v
= it
->value();
1305 tx
->put(k
.first
, k
.second
, v
);
1309 total_size
+= v
.length();
1314 total_keys
+= num_keys
;
1317 out_store
.apply_transaction(tx
);
1319 std::cout
<< "copied " << total_keys
<< " keys so far ("
1320 << stringify(si_t(total_size
)) << ")" << std::endl
;
1322 } while (it
->valid());
1324 std::cout
<< "summary: copied " << total_keys
<< " keys, using "
1325 << total_tx
<< " transactions, totalling "
1326 << stringify(si_t(total_size
)) << std::endl
;
1327 std::cout
<< "from '" << store_path
<< "' to '" << out_path
<< "'"
1329 } else if (cmd
== "rewrite-crush") {
1330 err
= rewrite_crush(argv
[0], subcmds
, st
);
1331 } else if (cmd
== "inflate-pgmap") {
1333 bool can_be_trimmed
= false;
1334 po::options_description
op_desc("Allowed 'inflate-pgmap' options");
1335 op_desc
.add_options()
1336 ("num-maps,n", po::value
<unsigned>(&n
),
1337 "number of maps to add (default: 2000)")
1338 ("can-be-trimmed", po::value
<bool>(&can_be_trimmed
),
1339 "can be trimmed (default: false)")
1342 po::variables_map op_vm
;
1344 po::parsed_options op_parsed
= po::command_line_parser(subcmds
).
1345 options(op_desc
).run();
1346 po::store(op_parsed
, op_vm
);
1348 } catch (po::error
&e
) {
1349 std::cerr
<< "error: " << e
.what() << std::endl
;
1353 err
= inflate_pgmap(st
, n
, can_be_trimmed
);
1354 } else if (cmd
== "rebuild") {
1355 err
= rebuild_monstore(argv
[0], subcmds
, st
);
1357 std::cerr
<< "Unrecognized command: " << cmd
<< std::endl
;
1358 usage(argv
[0], desc
);