]> git.proxmox.com Git - ceph.git/blob - ceph/src/tools/ceph_monstore_tool.cc
import ceph 12.2.12
[ceph.git] / ceph / src / tools / ceph_monstore_tool.cc
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3 /*
4 * Ceph - scalable distributed file system
5 *
6 * Copyright (C) 2012 Inktank, Inc.
7 *
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
12 */
13 #include <boost/program_options/variables_map.hpp>
14 #include <boost/program_options/parsers.hpp>
15 #include <boost/scope_exit.hpp>
16
17 #include <stdlib.h>
18 #include <string>
19
20 #include "common/Formatter.h"
21 #include "common/errno.h"
22
23 #include "auth/KeyRing.h"
24 #include "auth/cephx/CephxKeyServer.h"
25 #include "global/global_init.h"
26 #include "include/stringify.h"
27 #include "mgr/mgr_commands.h"
28 #include "mon/AuthMonitor.h"
29 #include "mon/MonitorDBStore.h"
30 #include "mon/Paxos.h"
31 #include "mon/MonMap.h"
32 #include "mds/FSMap.h"
33 #include "mon/MgrMap.h"
34 #include "osd/OSDMap.h"
35 #include "crush/CrushCompiler.h"
36 #include "mon/CreatingPGs.h"
37
38 namespace po = boost::program_options;
39 using namespace std;
40
41 class TraceIter {
42 int fd;
43 unsigned idx;
44 MonitorDBStore::TransactionRef t;
45 public:
46 explicit TraceIter(string fname) : fd(-1), idx(-1) {
47 fd = ::open(fname.c_str(), O_RDONLY);
48 t.reset(new MonitorDBStore::Transaction);
49 }
50 bool valid() {
51 return fd != -1;
52 }
53 MonitorDBStore::TransactionRef cur() {
54 assert(valid());
55 return t;
56 }
57 unsigned num() { return idx; }
58 void next() {
59 ++idx;
60 bufferlist bl;
61 int r = bl.read_fd(fd, 6);
62 if (r < 0) {
63 std::cerr << "Got error: " << cpp_strerror(r) << " on read_fd"
64 << std::endl;
65 ::close(fd);
66 fd = -1;
67 return;
68 } else if ((unsigned)r < 6) {
69 std::cerr << "short read" << std::endl;
70 ::close(fd);
71 fd = -1;
72 return;
73 }
74 bufferlist::iterator bliter = bl.begin();
75 uint8_t ver, ver2;
76 ::decode(ver, bliter);
77 ::decode(ver2, bliter);
78 uint32_t len;
79 ::decode(len, bliter);
80 r = bl.read_fd(fd, len);
81 if (r < 0) {
82 std::cerr << "Got error: " << cpp_strerror(r) << " on read_fd"
83 << std::endl;
84 ::close(fd);
85 fd = -1;
86 return;
87 } else if ((unsigned)r < len) {
88 std::cerr << "short read" << std::endl;
89 ::close(fd);
90 fd = -1;
91 return;
92 }
93 bliter = bl.begin();
94 t.reset(new MonitorDBStore::Transaction);
95 t->decode(bliter);
96 }
97 void init() {
98 next();
99 }
100 ~TraceIter() {
101 if (fd != -1) {
102 ::close(fd);
103 fd = -1;
104 }
105 }
106 };
107
108
109 int parse_cmd_args(
110 po::options_description *desc, /// < visible options description
111 po::options_description *hidden_desc, /// < hidden options description
112 po::positional_options_description *positional, /// < positional args
113 vector<string> &cmd_args, /// < arguments to be parsed
114 po::variables_map *vm /// > post-parsing variable map
115 )
116 {
117 // desc_all will aggregate all visible and hidden options for parsing.
118 //
119 // From boost's program_options point of view, there is absolutely no
120 // distinction between 'desc' and 'hidden_desc'. This is a distinction
121 // that is only useful to us: 'desc' is whatever we are willing to show
122 // on 'usage()', whereas 'hidden_desc' refers to parameters we wish to
123 // take advantage of but do not wish to show on 'usage()'.
124 //
125 // For example, consider that program_options matches positional arguments
126 // (specified via 'positional') against the paramenters defined on a
127 // given 'po::options_description' class. This is performed below,
128 // supplying both the description and the positional arguments to the
129 // parser. However, we do not want the parameters that are mapped to
130 // positional arguments to be shown on usage, as that makes for ugly and
131 // confusing usage messages. Therefore we dissociate the options'
132 // description that is to be used as an aid to the user from those options
133 // that are nothing but useful for internal purposes (i.e., mapping options
134 // to positional arguments). We still need to aggregate them before parsing
135 // and that's what 'desc_all' is all about.
136 //
137
138 assert(desc != NULL);
139
140 po::options_description desc_all;
141 desc_all.add(*desc);
142 if (hidden_desc != NULL)
143 desc_all.add(*hidden_desc);
144
145 try {
146 po::command_line_parser parser = po::command_line_parser(cmd_args).
147 options(desc_all);
148
149 if (positional) {
150 parser = parser.positional(*positional);
151 }
152
153 po::parsed_options parsed = parser.run();
154 po::store(parsed, *vm);
155 po::notify(*vm);
156 } catch (po::error &e) {
157 std::cerr << "error: " << e.what() << std::endl;
158 return -EINVAL;
159 }
160 return 0;
161 }
162
163
164 /**
165 * usage: ceph-monstore-tool <store-path> <command> [options]
166 *
167 * commands:
168 *
169 * store-copy < --out arg >
170 * dump-keys
171 * compact
172 * getmonmap < --out arg [ --version arg ] >
173 * getosdmap < --out arg [ --version arg ] >
174 * dump-paxos <--dump-start VER> <--dump-end VER>
175 * dump-trace < --trace-file arg >
176 * replay-trace
177 * random-gen
178 * rewrite-crush
179 *
180 * wanted syntax:
181 *
182 * ceph-monstore-tool PATH CMD [options]
183 *
184 * ceph-monstore-tool PATH store-copy <PATH2 | -o PATH2>
185 * ceph-monstore-tool PATH dump-keys
186 * ceph-monstore-tool PATH compact
187 * ceph-monstore-tool PATH get monmap [VER]
188 * ceph-monstore-tool PATH get osdmap [VER]
189 * ceph-monstore-tool PATH dump-paxos STARTVER ENDVER
190 *
191 *
192 */
193 void usage(const char *n, po::options_description &d)
194 {
195 std::cerr <<
196 "usage: " << n << " <store-path> <cmd> [args|options]\n"
197 << "\n"
198 << "Commands:\n"
199 << " store-copy PATH copies store to PATH\n"
200 << " compact compacts the store\n"
201 << " get monmap [-- options] get monmap (version VER if specified)\n"
202 << " (default: last committed)\n"
203 << " get osdmap [-- options] get osdmap (version VER if specified)\n"
204 << " (default: last committed)\n"
205 << " get mdsmap [-- options] get mdsmap (version VER if specified)\n"
206 << " (default: last committed)\n"
207 << " get mgr [-- options] get mgr map (version VER if specified)\n"
208 << " (default: last committed)\n"
209 << " get crushmap [-- options] get crushmap (version VER if specified)\n"
210 << " (default: last committed)\n"
211 << " show-versions [-- options] show the first&last committed version of map\n"
212 << " (show-versions -- --help for more info)\n"
213 << " dump-keys dumps store keys to FILE\n"
214 << " (default: stdout)\n"
215 << " dump-paxos [-- options] dump paxos transactions\n"
216 << " (dump-paxos -- --help for more info)\n"
217 << " dump-trace FILE [-- options] dump contents of trace file FILE\n"
218 << " (dump-trace -- --help for more info)\n"
219 << " replay-trace FILE [-- options] replay trace from FILE\n"
220 << " (replay-trace -- --help for more info)\n"
221 << " random-gen [-- options] add randomly generated ops to the store\n"
222 << " (random-gen -- --help for more info)\n"
223 << " rewrite-crush [-- options] add a rewrite commit to the store\n"
224 << " (rewrite-crush -- --help for more info)\n"
225 << " rebuild rebuild store\n"
226 << " (rebuild -- --help for more info)\n"
227 << std::endl;
228 std::cerr << d << std::endl;
229 std::cerr
230 << "\nPlease Note:\n"
231 << "* Ceph-specific options should be in the format --option-name=VAL\n"
232 << " (specifically, do not forget the '='!!)\n"
233 << "* Command-specific options need to be passed after a '--'\n"
234 << " e.g., 'get monmap -- --version 10 --out /tmp/foo'"
235 << std::endl;
236 }
237
238 int update_osdmap(MonitorDBStore& store, version_t ver, bool copy,
239 ceph::shared_ptr<CrushWrapper> crush,
240 MonitorDBStore::Transaction* t) {
241 const string prefix("osdmap");
242
243 // full
244 bufferlist bl;
245 int r = 0;
246 r = store.get(prefix, store.combine_strings("full", ver), bl);
247 if (r) {
248 std::cerr << "Error getting full map: " << cpp_strerror(r) << std::endl;
249 return r;
250 }
251 OSDMap osdmap;
252 osdmap.decode(bl);
253 osdmap.crush = crush;
254 if (copy) {
255 osdmap.inc_epoch();
256 }
257 bl.clear();
258 // be consistent with OSDMonitor::update_from_paxos()
259 osdmap.encode(bl, CEPH_FEATURES_ALL|CEPH_FEATURE_RESERVED);
260 t->put(prefix, store.combine_strings("full", osdmap.get_epoch()), bl);
261
262 // incremental
263 OSDMap::Incremental inc;
264 if (copy) {
265 inc.epoch = osdmap.get_epoch();
266 inc.fsid = osdmap.get_fsid();
267 } else {
268 bl.clear();
269 r = store.get(prefix, ver, bl);
270 if (r) {
271 std::cerr << "Error getting inc map: " << cpp_strerror(r) << std::endl;
272 return r;
273 }
274 OSDMap::Incremental inc(bl);
275 if (inc.crush.length()) {
276 inc.crush.clear();
277 crush->encode(inc.crush, CEPH_FEATURES_SUPPORTED_DEFAULT);
278 }
279 if (inc.fullmap.length()) {
280 OSDMap fullmap;
281 fullmap.decode(inc.fullmap);
282 fullmap.crush = crush;
283 inc.fullmap.clear();
284 fullmap.encode(inc.fullmap);
285 }
286 }
287 assert(osdmap.have_crc());
288 inc.full_crc = osdmap.get_crc();
289 bl.clear();
290 // be consistent with OSDMonitor::update_from_paxos()
291 inc.encode(bl, CEPH_FEATURES_ALL|CEPH_FEATURE_RESERVED);
292 t->put(prefix, inc.epoch, bl);
293 return 0;
294 }
295
296 int rewrite_transaction(MonitorDBStore& store, int version,
297 const string& crush_file,
298 MonitorDBStore::Transaction* t) {
299 const string prefix("osdmap");
300
301 // calc the known-good epoch
302 version_t last_committed = store.get(prefix, "last_committed");
303 version_t good_version = 0;
304 if (version <= 0) {
305 if (last_committed >= (unsigned)-version) {
306 good_version = last_committed + version;
307 } else {
308 std::cerr << "osdmap-version is less than: -" << last_committed << std::endl;
309 return EINVAL;
310 }
311 } else {
312 good_version = version;
313 }
314 if (good_version >= last_committed) {
315 std::cout << "good epoch is greater or equal to the last committed one: "
316 << good_version << " >= " << last_committed << std::endl;
317 return 0;
318 }
319
320 // load/extract the crush map
321 int r = 0;
322 ceph::shared_ptr<CrushWrapper> crush(new CrushWrapper);
323 if (crush_file.empty()) {
324 bufferlist bl;
325 r = store.get(prefix, store.combine_strings("full", good_version), bl);
326 if (r) {
327 std::cerr << "Error getting map: " << cpp_strerror(r) << std::endl;
328 return r;
329 }
330 OSDMap osdmap;
331 osdmap.decode(bl);
332 crush = osdmap.crush;
333 } else {
334 string err;
335 bufferlist bl;
336 r = bl.read_file(crush_file.c_str(), &err);
337 if (r) {
338 std::cerr << err << ": " << cpp_strerror(r) << std::endl;
339 return r;
340 }
341 bufferlist::iterator p = bl.begin();
342 crush->decode(p);
343 }
344
345 // prepare a transaction to rewrite the epochs
346 // (good_version, last_committed]
347 // with the good crush map.
348 // XXX: may need to break this into several paxos versions?
349 assert(good_version < last_committed);
350 for (version_t v = good_version + 1; v <= last_committed; v++) {
351 cout << "rewriting epoch #" << v << "/" << last_committed << std::endl;
352 r = update_osdmap(store, v, false, crush, t);
353 if (r)
354 return r;
355 }
356
357 // add a new osdmap epoch to store, so monitors will update their current osdmap
358 // in addition to the ones stored in epochs.
359 //
360 // This is needed due to the way the monitor updates from paxos and the
361 // facilities we are leveraging to push this update to the rest of the
362 // quorum.
363 //
364 // In a nutshell, we are generating a good version of the osdmap, with a
365 // proper crush, and building a transaction that will replace the bad
366 // osdmaps with good osdmaps. But this transaction needs to be applied on
367 // all nodes, so that the monitors will have good osdmaps to share with
368 // clients. We thus leverage Paxos, specifically the recovery mechanism, by
369 // creating a pending value that will be committed once the monitors form an
370 // initial quorum after being brought back to life.
371 //
372 // However, the way the monitor works has the paxos services, including the
373 // OSDMonitor, updating their state from disk *prior* to the recovery phase
374 // begins (so they have an up to date state in memory). This means the
375 // OSDMonitor will see the old, broken map, before the new paxos version is
376 // applied to disk, and the old version is cached. Even though we have the
377 // good map now, and we share the good map with clients, we will still be
378 // working on the old broken map. Instead of mucking around the monitor to
379 // make this work, we instead opt for adding the same osdmap but with a
380 // newer version, so that the OSDMonitor picks up on it when it updates from
381 // paxos after the proposal has been committed. This is not elegant, but
382 // avoids further unpleasantness that would arise from kludging around the
383 // current behavior. Also, has the added benefit of making sure the clients
384 // get an updated version of the map (because last_committed+1 >
385 // last_committed) :)
386 //
387 cout << "adding a new epoch #" << last_committed+1 << std::endl;
388 r = update_osdmap(store, last_committed++, true, crush, t);
389 if (r)
390 return r;
391 t->put(prefix, store.combine_strings("full", "latest"), last_committed);
392 t->put(prefix, "last_committed", last_committed);
393 return 0;
394 }
395
396 /**
397 * create a new paxos version which carries a proposal to rewrite all epochs
398 * of incremental and full map of "osdmap" after a faulty crush map is injected.
399 * so the leader will trigger a recovery and propagate this fix to its peons,
400 * after the proposal is accepted, and the transaction in it is applied. all
401 * monitors will rewrite the bad crush map with the good one, and have a new
402 * osdmap epoch with the good crush map in it.
403 */
404 int rewrite_crush(const char* progname,
405 vector<string>& subcmds,
406 MonitorDBStore& store) {
407 po::options_description op_desc("Allowed 'rewrite-crush' options");
408 int version = -1;
409 string crush_file;
410 op_desc.add_options()
411 ("help,h", "produce this help message")
412 ("crush", po::value<string>(&crush_file),
413 ("path to the crush map file "
414 "(default: will instead extract it from the known-good osdmap)"))
415 ("good-epoch", po::value<int>(&version),
416 "known-good epoch of osdmap, if a negative number '-N' is given, the "
417 "$last_committed-N is used instead (default: -1). "
418 "Please note, -1 is not necessarily a good epoch, because there are "
419 "good chance that we have more epochs slipped into the monstore after "
420 "the one where the crushmap is firstly injected.")
421 ;
422 po::variables_map op_vm;
423 int r = parse_cmd_args(&op_desc, NULL, NULL, subcmds, &op_vm);
424 if (r) {
425 return -r;
426 }
427 if (op_vm.count("help")) {
428 usage(progname, op_desc);
429 return 0;
430 }
431
432 MonitorDBStore::Transaction rewrite_txn;
433 r = rewrite_transaction(store, version, crush_file, &rewrite_txn);
434 if (r) {
435 return r;
436 }
437
438 // store the transaction into store as a proposal
439 const string prefix("paxos");
440 version_t pending_v = store.get(prefix, "last_committed") + 1;
441 auto t(std::make_shared<MonitorDBStore::Transaction>());
442 bufferlist bl;
443 rewrite_txn.encode(bl);
444 cout << "adding pending commit " << pending_v
445 << " " << bl.length() << " bytes" << std::endl;
446 t->put(prefix, pending_v, bl);
447 t->put(prefix, "pending_v", pending_v);
448 // a large enough yet unique proposal number will probably do the trick
449 version_t pending_pn = (store.get(prefix, "accepted_pn") / 100 + 4) * 100 + 1;
450 t->put(prefix, "pending_pn", pending_pn);
451 store.apply_transaction(t);
452 return 0;
453 }
454
455 static int update_auth(MonitorDBStore& st, const string& keyring_path)
456 {
457 // import all keyrings stored in the keyring file
458 KeyRing keyring;
459 int r = keyring.load(g_ceph_context, keyring_path);
460 if (r < 0) {
461 cerr << "unable to load admin keyring: " << keyring_path << std::endl;
462 return r;
463 }
464
465 bufferlist bl;
466 __u8 v = 1;
467 ::encode(v, bl);
468
469 for (const auto& k : keyring.get_keys()) {
470 KeyServerData::Incremental auth_inc;
471 auth_inc.name = k.first;
472 auth_inc.auth = k.second;
473 if (auth_inc.auth.caps.empty()) {
474 cerr << "no caps granted to: " << auth_inc.name << std::endl;
475 return -EINVAL;
476 }
477 auth_inc.op = KeyServerData::AUTH_INC_ADD;
478
479 AuthMonitor::Incremental inc;
480 inc.inc_type = AuthMonitor::AUTH_DATA;
481 ::encode(auth_inc, inc.auth_data);
482 inc.auth_type = CEPH_AUTH_CEPHX;
483
484 inc.encode(bl, CEPH_FEATURES_ALL);
485 }
486
487 const string prefix("auth");
488 auto last_committed = st.get(prefix, "last_committed") + 1;
489 auto t = make_shared<MonitorDBStore::Transaction>();
490 t->put(prefix, last_committed, bl);
491 t->put(prefix, "last_committed", last_committed);
492 auto first_committed = st.get(prefix, "first_committed");
493 if (!first_committed) {
494 t->put(prefix, "first_committed", last_committed);
495 }
496 st.apply_transaction(t);
497 return 0;
498 }
499
500 static int update_mkfs(MonitorDBStore& st)
501 {
502 MonMap monmap;
503 int r = monmap.build_initial(g_ceph_context, cerr);
504 if (r) {
505 cerr << "no initial monitors" << std::endl;
506 return -EINVAL;
507 }
508 bufferlist bl;
509 monmap.encode(bl, CEPH_FEATURES_ALL);
510 monmap.set_epoch(0);
511 auto t = make_shared<MonitorDBStore::Transaction>();
512 t->put("mkfs", "monmap", bl);
513 st.apply_transaction(t);
514 return 0;
515 }
516
517 static int update_monitor(MonitorDBStore& st)
518 {
519 const string prefix("monitor");
520 // a stripped-down Monitor::mkfs()
521 bufferlist bl;
522 bl.append(CEPH_MON_ONDISK_MAGIC "\n");
523 auto t = make_shared<MonitorDBStore::Transaction>();
524 t->put(prefix, "magic", bl);
525 st.apply_transaction(t);
526 return 0;
527 }
528
529 // rebuild
530 // - creating_pgs
531 static int update_creating_pgs(MonitorDBStore& st)
532 {
533 bufferlist bl;
534 auto last_osdmap_epoch = st.get("osdmap", "last_committed");
535 int r = st.get("osdmap", st.combine_strings("full", last_osdmap_epoch), bl);
536 if (r < 0) {
537 cerr << "unable to losd osdmap e" << last_osdmap_epoch << std::endl;
538 return r;
539 }
540
541 OSDMap osdmap;
542 osdmap.decode(bl);
543 creating_pgs_t creating;
544 for (auto& i : osdmap.get_pools()) {
545 creating.created_pools.insert(i.first);
546 }
547 creating.last_scan_epoch = last_osdmap_epoch;
548
549 bufferlist newbl;
550 ::encode(creating, newbl);
551
552 auto t = make_shared<MonitorDBStore::Transaction>();
553 t->put("osd_pg_creating", "creating", newbl);
554 st.apply_transaction(t);
555 return 0;
556 }
557
558 // rebuild
559 // - mgr
560 // - mgr_command_desc
561 static int update_mgrmap(MonitorDBStore& st)
562 {
563 auto t = make_shared<MonitorDBStore::Transaction>();
564
565 {
566 MgrMap map;
567 // mgr expects epoch > 1
568 map.epoch++;
569 auto initial_modules =
570 get_str_vec(g_ceph_context->_conf->get_val<string>("mgr_initial_modules"));
571 copy(begin(initial_modules),
572 end(initial_modules),
573 inserter(map.modules, end(map.modules)));
574 bufferlist bl;
575 map.encode(bl, CEPH_FEATURES_ALL);
576 t->put("mgr", map.epoch, bl);
577 t->put("mgr", "last_committed", map.epoch);
578 }
579 {
580 auto mgr_command_descs = mgr_commands;
581 for (auto& c : mgr_command_descs) {
582 c.set_flag(MonCommand::FLAG_MGR);
583 }
584 bufferlist bl;
585 ::encode(mgr_command_descs, bl);
586 t->put("mgr_command_desc", "", bl);
587 }
588 return st.apply_transaction(t);
589 }
590
591 static int update_paxos(MonitorDBStore& st)
592 {
593 // build a pending paxos proposal from all non-permanent k/v pairs. once the
594 // proposal is committed, it will gets applied. on the sync provider side, it
595 // will be a no-op, but on its peers, the paxos commit will help to build up
596 // the necessary epochs.
597 bufferlist pending_proposal;
598 {
599 MonitorDBStore::Transaction t;
600 vector<string> prefixes = {"auth", "osdmap",
601 "mgr", "mgr_command_desc"};
602 for (const auto& prefix : prefixes) {
603 for (auto i = st.get_iterator(prefix); i->valid(); i->next()) {
604 auto key = i->raw_key();
605 auto val = i->value();
606 t.put(key.first, key.second, val);
607 }
608 }
609 t.encode(pending_proposal);
610 }
611 const string prefix("paxos");
612 auto t = make_shared<MonitorDBStore::Transaction>();
613 t->put(prefix, "first_committed", 0);
614 t->put(prefix, "last_committed", 0);
615 auto pending_v = 1;
616 t->put(prefix, pending_v, pending_proposal);
617 t->put(prefix, "pending_v", pending_v);
618 t->put(prefix, "pending_pn", 400);
619 st.apply_transaction(t);
620 return 0;
621 }
622
623 int rebuild_monstore(const char* progname,
624 vector<string>& subcmds,
625 MonitorDBStore& st)
626 {
627 po::options_description op_desc("Allowed 'rebuild' options");
628 string keyring_path;
629 op_desc.add_options()
630 ("keyring", po::value<string>(&keyring_path),
631 "path to the client.admin key");
632 po::variables_map op_vm;
633 int r = parse_cmd_args(&op_desc, nullptr, nullptr, subcmds, &op_vm);
634 if (r) {
635 return -r;
636 }
637 if (op_vm.count("help")) {
638 usage(progname, op_desc);
639 return 0;
640 }
641 if (!keyring_path.empty())
642 update_auth(st, keyring_path);
643 if ((r = update_creating_pgs(st))) {
644 return r;
645 }
646 if ((r = update_mgrmap(st))) {
647 return r;
648 }
649 if ((r = update_paxos(st))) {
650 return r;
651 }
652 if ((r = update_mkfs(st))) {
653 return r;
654 }
655 if ((r = update_monitor(st))) {
656 return r;
657 }
658 return 0;
659 }
660
661 int main(int argc, char **argv) {
662 int err = 0;
663 po::options_description desc("Allowed options");
664 string store_path, cmd;
665 vector<string> subcmds;
666 desc.add_options()
667 ("help,h", "produce help message")
668 ;
669
670 /* Dear Future Developer:
671 *
672 * for further improvement, should you need to pass specific options to
673 * a command (e.g., get osdmap VER --hex), you can expand the current
674 * format by creating additional 'po::option_description' and passing
675 * 'subcmds' to 'po::command_line_parser', much like what is currently
676 * done by default. However, beware: in order to differentiate a
677 * command-specific option from the generic/global options, you will need
678 * to pass '--' in the command line (so that the first parser, the one
679 * below, assumes it has reached the end of all options); e.g.,
680 * 'get osdmap VER -- --hex'. Not pretty; far from intuitive; it was as
681 * far as I got with this library. Improvements on this format will be
682 * left as an excercise for the reader. -Joao
683 */
684 po::options_description positional_desc("Positional argument options");
685 positional_desc.add_options()
686 ("store-path", po::value<string>(&store_path),
687 "path to monitor's store")
688 ("command", po::value<string>(&cmd),
689 "Command")
690 ("subcmd", po::value<vector<string> >(&subcmds),
691 "Command arguments/Sub-Commands")
692 ;
693 po::positional_options_description positional;
694 positional.add("store-path", 1);
695 positional.add("command", 1);
696 positional.add("subcmd", -1);
697
698 po::options_description all_desc("All options");
699 all_desc.add(desc).add(positional_desc);
700
701 vector<string> ceph_option_strings;
702 po::variables_map vm;
703 try {
704 po::parsed_options parsed =
705 po::command_line_parser(argc, argv).
706 options(all_desc).
707 positional(positional).
708 allow_unregistered().run();
709
710 po::store(
711 parsed,
712 vm);
713 po::notify(vm);
714
715 // Specifying po::include_positional would have our positional arguments
716 // being collected (thus being part of ceph_option_strings and eventually
717 // passed on to global_init() below).
718 // Instead we specify po::exclude_positional, which has the upside of
719 // completely avoid this, but the downside of having to specify ceph
720 // options as --VAR=VAL (note the '='); otherwise we will capture the
721 // positional 'VAL' as belonging to us, never being collected.
722 ceph_option_strings = po::collect_unrecognized(parsed.options,
723 po::exclude_positional);
724
725 } catch(po::error &e) {
726 std::cerr << "error: " << e.what() << std::endl;
727 return 1;
728 }
729
730 // parse command structure before calling global_init() and friends.
731
732 if (vm.empty() || vm.count("help") ||
733 store_path.empty() || cmd.empty() ||
734 *cmd.begin() == '-') {
735 usage(argv[0], desc);
736 return 1;
737 }
738
739 vector<const char *> ceph_options, def_args;
740 ceph_options.reserve(ceph_option_strings.size());
741 for (vector<string>::iterator i = ceph_option_strings.begin();
742 i != ceph_option_strings.end();
743 ++i) {
744 ceph_options.push_back(i->c_str());
745 }
746
747 auto cct = global_init(
748 &def_args, ceph_options, CEPH_ENTITY_TYPE_MON,
749 CODE_ENVIRONMENT_UTILITY, 0);
750 common_init_finish(g_ceph_context);
751 g_ceph_context->_conf->apply_changes(NULL);
752 g_conf = g_ceph_context->_conf;
753
754 // this is where we'll write *whatever*, on a per-command basis.
755 // not all commands require some place to write their things.
756 MonitorDBStore st(store_path);
757 if (store_path.size()) {
758 stringstream ss;
759 int r = st.open(ss);
760 if (r < 0) {
761 std::cerr << ss.str() << std::endl;
762 return EINVAL;
763 }
764 }
765
766 if (cmd == "dump-keys") {
767 KeyValueDB::WholeSpaceIterator iter = st.get_iterator();
768 while (iter->valid()) {
769 pair<string,string> key(iter->raw_key());
770 cout << key.first << " / " << key.second << std::endl;
771 iter->next();
772 }
773 } else if (cmd == "compact") {
774 st.compact();
775 } else if (cmd == "get") {
776 unsigned v = 0;
777 string outpath;
778 bool readable = false;
779 string map_type;
780 // visible options for this command
781 po::options_description op_desc("Allowed 'get' options");
782 op_desc.add_options()
783 ("help,h", "produce this help message")
784 ("out,o", po::value<string>(&outpath),
785 "output file (default: stdout)")
786 ("version,v", po::value<unsigned>(&v),
787 "map version to obtain")
788 ("readable,r", po::value<bool>(&readable)->default_value(false),
789 "print the map infomation in human readable format")
790 ;
791 // this is going to be a positional argument; we don't want to show
792 // it as an option during --help, but we do want to have it captured
793 // when parsing.
794 po::options_description hidden_op_desc("Hidden 'get' options");
795 hidden_op_desc.add_options()
796 ("map-type", po::value<string>(&map_type),
797 "map-type")
798 ;
799 po::positional_options_description op_positional;
800 op_positional.add("map-type", 1);
801
802 po::variables_map op_vm;
803 int r = parse_cmd_args(&op_desc, &hidden_op_desc, &op_positional,
804 subcmds, &op_vm);
805 if (r < 0) {
806 err = -r;
807 goto done;
808 }
809
810 if (op_vm.count("help") || map_type.empty()) {
811 usage(argv[0], op_desc);
812 err = 0;
813 goto done;
814 }
815
816 if (v == 0) {
817 if (map_type == "crushmap") {
818 v = st.get("osdmap", "last_committed");
819 } else {
820 v = st.get(map_type, "last_committed");
821 }
822 }
823
824 int fd = STDOUT_FILENO;
825 if (!outpath.empty()){
826 fd = ::open(outpath.c_str(), O_WRONLY|O_CREAT|O_TRUNC, 0666);
827 if (fd < 0) {
828 std::cerr << "error opening output file: "
829 << cpp_strerror(errno) << std::endl;
830 err = EINVAL;
831 goto done;
832 }
833 }
834
835 BOOST_SCOPE_EXIT((&r) (&fd) (&outpath)) {
836 ::close(fd);
837 if (r < 0 && fd != STDOUT_FILENO) {
838 ::remove(outpath.c_str());
839 }
840 } BOOST_SCOPE_EXIT_END
841
842 bufferlist bl;
843 r = 0;
844 if (map_type == "osdmap") {
845 r = st.get(map_type, st.combine_strings("full", v), bl);
846 } else if (map_type == "crushmap") {
847 bufferlist tmp;
848 r = st.get("osdmap", st.combine_strings("full", v), tmp);
849 if (r >= 0) {
850 OSDMap osdmap;
851 osdmap.decode(tmp);
852 osdmap.crush->encode(bl, CEPH_FEATURES_SUPPORTED_DEFAULT);
853 }
854 } else {
855 r = st.get(map_type, v, bl);
856 }
857 if (r < 0) {
858 std::cerr << "Error getting map: " << cpp_strerror(r) << std::endl;
859 err = EINVAL;
860 goto done;
861 }
862
863 if (readable) {
864 stringstream ss;
865 bufferlist out;
866 try {
867 if (map_type == "monmap") {
868 MonMap monmap;
869 monmap.decode(bl);
870 monmap.print(ss);
871 } else if (map_type == "osdmap") {
872 OSDMap osdmap;
873 osdmap.decode(bl);
874 osdmap.print(ss);
875 } else if (map_type == "mdsmap") {
876 FSMap fs_map;
877 fs_map.decode(bl);
878 fs_map.print(ss);
879 } else if (map_type == "mgr") {
880 MgrMap mgr_map;
881 auto p = bl.begin();
882 mgr_map.decode(p);
883 JSONFormatter f;
884 f.dump_object("mgrmap", mgr_map);
885 f.flush(ss);
886 } else if (map_type == "crushmap") {
887 CrushWrapper cw;
888 bufferlist::iterator it = bl.begin();
889 cw.decode(it);
890 CrushCompiler cc(cw, std::cerr, 0);
891 cc.decompile(ss);
892 } else {
893 std::cerr << "This type of readable map does not exist: " << map_type
894 << std::endl << "You can only specify[osdmap|monmap|mdsmap"
895 "|crushmap|mgr]" << std::endl;
896 }
897 } catch (const buffer::error &err) {
898 std::cerr << "Could not decode for human readable output (you may still"
899 " use non-readable mode). Detail: " << err << std::endl;
900 }
901
902 out.append(ss);
903 out.write_fd(fd);
904 } else {
905 bl.write_fd(fd);
906 }
907
908 if (!outpath.empty()) {
909 std::cout << "wrote " << map_type
910 << " version " << v << " to " << outpath
911 << std::endl;
912 }
913 } else if (cmd == "show-versions") {
914 string map_type; //map type:osdmap,monmap...
915 // visible options for this command
916 po::options_description op_desc("Allowed 'show-versions' options");
917 op_desc.add_options()
918 ("help,h", "produce this help message")
919 ("map-type", po::value<string>(&map_type), "map_type");
920
921 po::positional_options_description op_positional;
922 op_positional.add("map-type", 1);
923
924 po::variables_map op_vm;
925 int r = parse_cmd_args(&op_desc, NULL, &op_positional,
926 subcmds, &op_vm);
927 if (r < 0) {
928 err = -r;
929 goto done;
930 }
931
932 if (op_vm.count("help") || map_type.empty()) {
933 usage(argv[0], op_desc);
934 err = 0;
935 goto done;
936 }
937
938 unsigned int v_first = 0;
939 unsigned int v_last = 0;
940 v_first = st.get(map_type, "first_committed");
941 v_last = st.get(map_type, "last_committed");
942
943 std::cout << "first committed:\t" << v_first << "\n"
944 << "last committed:\t" << v_last << std::endl;
945 } else if (cmd == "dump-paxos") {
946 unsigned dstart = 0;
947 unsigned dstop = ~0;
948 po::options_description op_desc("Allowed 'dump-paxos' options");
949 op_desc.add_options()
950 ("help,h", "produce this help message")
951 ("start,s", po::value<unsigned>(&dstart),
952 "starting version (default: 0)")
953 ("end,e", po::value<unsigned>(&dstop),
954 "finish version (default: ~0)")
955 ;
956
957 po::variables_map op_vm;
958 int r = parse_cmd_args(&op_desc, NULL, NULL,
959 subcmds, &op_vm);
960 if (r < 0) {
961 err = -r;
962 goto done;
963 }
964
965 if (op_vm.count("help")) {
966 usage(argv[0], op_desc);
967 err = 0;
968 goto done;
969 }
970
971 if (dstart > dstop) {
972 std::cerr << "error: 'start' version (value: " << dstart << ") "
973 << " is greater than 'end' version (value: " << dstop << ")"
974 << std::endl;
975 err = EINVAL;
976 goto done;
977 }
978
979 version_t v = dstart;
980 for (; v <= dstop; ++v) {
981 bufferlist bl;
982 st.get("paxos", v, bl);
983 if (bl.length() == 0)
984 break;
985 cout << "\n--- " << v << " ---" << std::endl;
986 auto tx(std::make_shared<MonitorDBStore::Transaction>());
987 Paxos::decode_append_transaction(tx, bl);
988 JSONFormatter f(true);
989 tx->dump(&f);
990 f.flush(cout);
991 }
992
993 std::cout << "dumped " << v << " paxos versions" << std::endl;
994
995 } else if (cmd == "dump-trace") {
996 unsigned dstart = 0;
997 unsigned dstop = ~0;
998 string outpath;
999
1000 // visible options for this command
1001 po::options_description op_desc("Allowed 'dump-trace' options");
1002 op_desc.add_options()
1003 ("help,h", "produce this help message")
1004 ("start,s", po::value<unsigned>(&dstart),
1005 "starting version (default: 0)")
1006 ("end,e", po::value<unsigned>(&dstop),
1007 "finish version (default: ~0)")
1008 ;
1009 // this is going to be a positional argument; we don't want to show
1010 // it as an option during --help, but we do want to have it captured
1011 // when parsing.
1012 po::options_description hidden_op_desc("Hidden 'dump-trace' options");
1013 hidden_op_desc.add_options()
1014 ("out,o", po::value<string>(&outpath),
1015 "file to write the dump to")
1016 ;
1017 po::positional_options_description op_positional;
1018 op_positional.add("out", 1);
1019
1020 po::variables_map op_vm;
1021 int r = parse_cmd_args(&op_desc, &hidden_op_desc, &op_positional,
1022 subcmds, &op_vm);
1023 if (r < 0) {
1024 err = -r;
1025 goto done;
1026 }
1027
1028 if (op_vm.count("help")) {
1029 usage(argv[0], op_desc);
1030 err = 0;
1031 goto done;
1032 }
1033
1034 if (outpath.empty()) {
1035 usage(argv[0], op_desc);
1036 err = EINVAL;
1037 goto done;
1038 }
1039
1040 if (dstart > dstop) {
1041 std::cerr << "error: 'start' version (value: " << dstart << ") "
1042 << " is greater than 'stop' version (value: " << dstop << ")"
1043 << std::endl;
1044 err = EINVAL;
1045 goto done;
1046 }
1047
1048 TraceIter iter(outpath.c_str());
1049 iter.init();
1050 while (true) {
1051 if (!iter.valid())
1052 break;
1053 if (iter.num() >= dstop) {
1054 break;
1055 }
1056 if (iter.num() >= dstart) {
1057 JSONFormatter f(true);
1058 iter.cur()->dump(&f, false);
1059 f.flush(std::cout);
1060 std::cout << std::endl;
1061 }
1062 iter.next();
1063 }
1064 std::cerr << "Read up to transaction " << iter.num() << std::endl;
1065 } else if (cmd == "replay-trace") {
1066 string inpath;
1067 unsigned num_replays = 1;
1068 // visible options for this command
1069 po::options_description op_desc("Allowed 'replay-trace' options");
1070 op_desc.add_options()
1071 ("help,h", "produce this help message")
1072 ("num-replays,n", po::value<unsigned>(&num_replays),
1073 "finish version (default: 1)")
1074 ;
1075 // this is going to be a positional argument; we don't want to show
1076 // it as an option during --help, but we do want to have it captured
1077 // when parsing.
1078 po::options_description hidden_op_desc("Hidden 'replay-trace' options");
1079 hidden_op_desc.add_options()
1080 ("in,i", po::value<string>(&inpath),
1081 "file to write the dump to")
1082 ;
1083 po::positional_options_description op_positional;
1084 op_positional.add("in", 1);
1085
1086 // op_desc_all will aggregate all visible and hidden options for parsing.
1087 // when we call 'usage()' we just pass 'op_desc', as that's the description
1088 // holding the visible options.
1089 po::options_description op_desc_all;
1090 op_desc_all.add(op_desc).add(hidden_op_desc);
1091
1092 po::variables_map op_vm;
1093 try {
1094 po::parsed_options op_parsed = po::command_line_parser(subcmds).
1095 options(op_desc_all).positional(op_positional).run();
1096 po::store(op_parsed, op_vm);
1097 po::notify(op_vm);
1098 } catch (po::error &e) {
1099 std::cerr << "error: " << e.what() << std::endl;
1100 err = EINVAL;
1101 goto done;
1102 }
1103
1104 if (op_vm.count("help")) {
1105 usage(argv[0], op_desc);
1106 err = 0;
1107 goto done;
1108 }
1109
1110 if (inpath.empty()) {
1111 usage(argv[0], op_desc);
1112 err = EINVAL;
1113 goto done;
1114 }
1115
1116 unsigned num = 0;
1117 for (unsigned i = 0; i < num_replays; ++i) {
1118 TraceIter iter(inpath.c_str());
1119 iter.init();
1120 while (true) {
1121 if (!iter.valid())
1122 break;
1123 std::cerr << "Replaying trans num " << num << std::endl;
1124 st.apply_transaction(iter.cur());
1125 iter.next();
1126 ++num;
1127 }
1128 std::cerr << "Read up to transaction " << iter.num() << std::endl;
1129 }
1130 } else if (cmd == "random-gen") {
1131 unsigned tsize = 200;
1132 unsigned tvalsize = 1024;
1133 unsigned ntrans = 100;
1134 po::options_description op_desc("Allowed 'random-gen' options");
1135 op_desc.add_options()
1136 ("help,h", "produce this help message")
1137 ("num-keys,k", po::value<unsigned>(&tsize),
1138 "keys to write in each transaction (default: 200)")
1139 ("size,s", po::value<unsigned>(&tvalsize),
1140 "size (in bytes) of the value to write in each key (default: 1024)")
1141 ("ntrans,n", po::value<unsigned>(&ntrans),
1142 "number of transactions to run (default: 100)")
1143 ;
1144
1145 po::variables_map op_vm;
1146 try {
1147 po::parsed_options op_parsed = po::command_line_parser(subcmds).
1148 options(op_desc).run();
1149 po::store(op_parsed, op_vm);
1150 po::notify(op_vm);
1151 } catch (po::error &e) {
1152 std::cerr << "error: " << e.what() << std::endl;
1153 err = EINVAL;
1154 goto done;
1155 }
1156
1157 if (op_vm.count("help")) {
1158 usage(argv[0], op_desc);
1159 err = 0;
1160 goto done;
1161 }
1162
1163 unsigned num = 0;
1164 for (unsigned i = 0; i < ntrans; ++i) {
1165 std::cerr << "Applying trans " << i << std::endl;
1166 auto t(std::make_shared<MonitorDBStore::Transaction>());
1167 string prefix;
1168 prefix.push_back((i%26)+'a');
1169 for (unsigned j = 0; j < tsize; ++j) {
1170 stringstream os;
1171 os << num;
1172 bufferlist bl;
1173 for (unsigned k = 0; k < tvalsize; ++k) bl.append(rand());
1174 t->put(prefix, os.str(), bl);
1175 ++num;
1176 }
1177 t->compact_prefix(prefix);
1178 st.apply_transaction(t);
1179 }
1180 } else if (cmd == "store-copy") {
1181 if (subcmds.size() < 1 || subcmds[0].empty()) {
1182 usage(argv[0], desc);
1183 err = EINVAL;
1184 goto done;
1185 }
1186
1187 string out_path = subcmds[0];
1188
1189 MonitorDBStore out_store(out_path);
1190 {
1191 stringstream ss;
1192 int r = out_store.create_and_open(ss);
1193 if (r < 0) {
1194 std::cerr << ss.str() << std::endl;
1195 goto done;
1196 }
1197 }
1198
1199
1200 KeyValueDB::WholeSpaceIterator it = st.get_iterator();
1201 uint64_t total_keys = 0;
1202 uint64_t total_size = 0;
1203 uint64_t total_tx = 0;
1204
1205 do {
1206 uint64_t num_keys = 0;
1207
1208 auto tx(std::make_shared<MonitorDBStore::Transaction>());
1209
1210 while (it->valid() && num_keys < 128) {
1211 pair<string,string> k = it->raw_key();
1212 bufferlist v = it->value();
1213 tx->put(k.first, k.second, v);
1214
1215 num_keys ++;
1216 total_tx ++;
1217 total_size += v.length();
1218
1219 it->next();
1220 }
1221
1222 total_keys += num_keys;
1223
1224 if (!tx->empty())
1225 out_store.apply_transaction(tx);
1226
1227 std::cout << "copied " << total_keys << " keys so far ("
1228 << stringify(byte_u_t(total_size)) << ")" << std::endl;
1229
1230 } while (it->valid());
1231 out_store.close();
1232 std::cout << "summary: copied " << total_keys << " keys, using "
1233 << total_tx << " transactions, totalling "
1234 << stringify(byte_u_t(total_size)) << std::endl;
1235 std::cout << "from '" << store_path << "' to '" << out_path << "'"
1236 << std::endl;
1237 } else if (cmd == "rewrite-crush") {
1238 err = rewrite_crush(argv[0], subcmds, st);
1239 } else if (cmd == "rebuild") {
1240 err = rebuild_monstore(argv[0], subcmds, st);
1241 } else {
1242 std::cerr << "Unrecognized command: " << cmd << std::endl;
1243 usage(argv[0], desc);
1244 goto done;
1245 }
1246
1247 done:
1248 st.close();
1249 return err;
1250 }