]> git.proxmox.com Git - ceph.git/blame - ceph/src/tools/ceph_monstore_tool.cc
update sources to v12.2.3
[ceph.git] / ceph / src / tools / ceph_monstore_tool.cc
CommitLineData
7c673cae
FG
1// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2// vim: ts=8 sw=2 smarttab
3/*
4* Ceph - scalable distributed file system
5*
6* Copyright (C) 2012 Inktank, Inc.
7*
8* This is free software; you can redistribute it and/or
9* modify it under the terms of the GNU Lesser General Public
10* License version 2.1, as published by the Free Software
11* Foundation. See file COPYING.
12*/
13#include <boost/program_options/variables_map.hpp>
14#include <boost/program_options/parsers.hpp>
15#include <boost/scope_exit.hpp>
16
17#include <stdlib.h>
18#include <string>
19
20#include "common/Formatter.h"
21#include "common/errno.h"
22
23#include "auth/KeyRing.h"
24#include "auth/cephx/CephxKeyServer.h"
25#include "global/global_init.h"
26#include "include/stringify.h"
3efd9988 27#include "mgr/mgr_commands.h"
7c673cae
FG
28#include "mon/AuthMonitor.h"
29#include "mon/MonitorDBStore.h"
30#include "mon/Paxos.h"
31#include "mon/MonMap.h"
b32b8144
FG
32#include "mds/FSMap.h"
33#include "mon/MgrMap.h"
7c673cae
FG
34#include "osd/OSDMap.h"
35#include "crush/CrushCompiler.h"
36
37namespace po = boost::program_options;
38using namespace std;
39
40class TraceIter {
41 int fd;
42 unsigned idx;
43 MonitorDBStore::TransactionRef t;
44public:
45 explicit TraceIter(string fname) : fd(-1), idx(-1) {
46 fd = ::open(fname.c_str(), O_RDONLY);
47 t.reset(new MonitorDBStore::Transaction);
48 }
49 bool valid() {
50 return fd != -1;
51 }
52 MonitorDBStore::TransactionRef cur() {
53 assert(valid());
54 return t;
55 }
56 unsigned num() { return idx; }
57 void next() {
58 ++idx;
59 bufferlist bl;
60 int r = bl.read_fd(fd, 6);
61 if (r < 0) {
62 std::cerr << "Got error: " << cpp_strerror(r) << " on read_fd"
63 << std::endl;
64 ::close(fd);
65 fd = -1;
66 return;
67 } else if ((unsigned)r < 6) {
68 std::cerr << "short read" << std::endl;
69 ::close(fd);
70 fd = -1;
71 return;
72 }
73 bufferlist::iterator bliter = bl.begin();
74 uint8_t ver, ver2;
75 ::decode(ver, bliter);
76 ::decode(ver2, bliter);
77 uint32_t len;
78 ::decode(len, bliter);
79 r = bl.read_fd(fd, len);
80 if (r < 0) {
81 std::cerr << "Got error: " << cpp_strerror(r) << " on read_fd"
82 << std::endl;
83 ::close(fd);
84 fd = -1;
85 return;
86 } else if ((unsigned)r < len) {
87 std::cerr << "short read" << std::endl;
88 ::close(fd);
89 fd = -1;
90 return;
91 }
92 bliter = bl.begin();
93 t.reset(new MonitorDBStore::Transaction);
94 t->decode(bliter);
95 }
96 void init() {
97 next();
98 }
99 ~TraceIter() {
100 if (fd != -1) {
101 ::close(fd);
102 fd = -1;
103 }
104 }
105};
106
107
108int parse_cmd_args(
109 po::options_description *desc, /// < visible options description
110 po::options_description *hidden_desc, /// < hidden options description
111 po::positional_options_description *positional, /// < positional args
112 vector<string> &cmd_args, /// < arguments to be parsed
113 po::variables_map *vm /// > post-parsing variable map
114 )
115{
116 // desc_all will aggregate all visible and hidden options for parsing.
117 //
118 // From boost's program_options point of view, there is absolutely no
119 // distinction between 'desc' and 'hidden_desc'. This is a distinction
120 // that is only useful to us: 'desc' is whatever we are willing to show
121 // on 'usage()', whereas 'hidden_desc' refers to parameters we wish to
122 // take advantage of but do not wish to show on 'usage()'.
123 //
124 // For example, consider that program_options matches positional arguments
125 // (specified via 'positional') against the paramenters defined on a
126 // given 'po::options_description' class. This is performed below,
127 // supplying both the description and the positional arguments to the
128 // parser. However, we do not want the parameters that are mapped to
129 // positional arguments to be shown on usage, as that makes for ugly and
130 // confusing usage messages. Therefore we dissociate the options'
131 // description that is to be used as an aid to the user from those options
132 // that are nothing but useful for internal purposes (i.e., mapping options
133 // to positional arguments). We still need to aggregate them before parsing
134 // and that's what 'desc_all' is all about.
135 //
136
137 assert(desc != NULL);
138
139 po::options_description desc_all;
140 desc_all.add(*desc);
141 if (hidden_desc != NULL)
142 desc_all.add(*hidden_desc);
143
144 try {
145 po::command_line_parser parser = po::command_line_parser(cmd_args).
146 options(desc_all);
147
148 if (positional) {
149 parser = parser.positional(*positional);
150 }
151
152 po::parsed_options parsed = parser.run();
153 po::store(parsed, *vm);
154 po::notify(*vm);
155 } catch (po::error &e) {
156 std::cerr << "error: " << e.what() << std::endl;
157 return -EINVAL;
158 }
159 return 0;
160}
161
162
163/**
164 * usage: ceph-monstore-tool <store-path> <command> [options]
165 *
166 * commands:
167 *
168 * store-copy < --out arg >
169 * dump-keys
170 * compact
171 * getmonmap < --out arg [ --version arg ] >
172 * getosdmap < --out arg [ --version arg ] >
173 * dump-paxos <--dump-start VER> <--dump-end VER>
174 * dump-trace < --trace-file arg >
175 * replay-trace
176 * random-gen
177 * rewrite-crush
178 * inflate-pgmap
179 *
180 * wanted syntax:
181 *
182 * ceph-monstore-tool PATH CMD [options]
183 *
184 * ceph-monstore-tool PATH store-copy <PATH2 | -o PATH2>
185 * ceph-monstore-tool PATH dump-keys
186 * ceph-monstore-tool PATH compact
187 * ceph-monstore-tool PATH get monmap [VER]
188 * ceph-monstore-tool PATH get osdmap [VER]
189 * ceph-monstore-tool PATH dump-paxos STARTVER ENDVER
190 *
191 *
192 */
193void usage(const char *n, po::options_description &d)
194{
195 std::cerr <<
196 "usage: " << n << " <store-path> <cmd> [args|options]\n"
197 << "\n"
198 << "Commands:\n"
199 << " store-copy PATH copies store to PATH\n"
200 << " compact compacts the store\n"
201 << " get monmap [-- options] get monmap (version VER if specified)\n"
202 << " (default: last committed)\n"
203 << " get osdmap [-- options] get osdmap (version VER if specified)\n"
204 << " (default: last committed)\n"
205 << " get mdsmap [-- options] get mdsmap (version VER if specified)\n"
206 << " (default: last committed)\n"
b32b8144
FG
207 << " get mgr [-- options] get mgr map (version VER if specified)\n"
208 << " (default: last committed)\n"
7c673cae
FG
209 << " get crushmap [-- options] get crushmap (version VER if specified)\n"
210 << " (default: last committed)\n"
211 << " show-versions [-- options] show the first&last committed version of map\n"
212 << " (show-versions -- --help for more info)\n"
213 << " dump-keys dumps store keys to FILE\n"
214 << " (default: stdout)\n"
215 << " dump-paxos [-- options] dump paxos transactions\n"
216 << " (dump-paxos -- --help for more info)\n"
217 << " dump-trace FILE [-- options] dump contents of trace file FILE\n"
218 << " (dump-trace -- --help for more info)\n"
219 << " replay-trace FILE [-- options] replay trace from FILE\n"
220 << " (replay-trace -- --help for more info)\n"
221 << " random-gen [-- options] add randomly generated ops to the store\n"
222 << " (random-gen -- --help for more info)\n"
223 << " rewrite-crush [-- options] add a rewrite commit to the store\n"
224 << " (rewrite-crush -- --help for more info)\n"
225 << " inflate-pgmap [-- options] add given number of pgmaps to store\n"
226 << " (inflate-pgmap -- --help for more info)\n"
227 << " rebuild rebuild store\n"
228 << " (rebuild -- --help for more info)\n"
229 << std::endl;
230 std::cerr << d << std::endl;
231 std::cerr
232 << "\nPlease Note:\n"
233 << "* Ceph-specific options should be in the format --option-name=VAL\n"
234 << " (specifically, do not forget the '='!!)\n"
235 << "* Command-specific options need to be passed after a '--'\n"
236 << " e.g., 'get monmap -- --version 10 --out /tmp/foo'"
237 << std::endl;
238}
239
240int update_osdmap(MonitorDBStore& store, version_t ver, bool copy,
241 ceph::shared_ptr<CrushWrapper> crush,
242 MonitorDBStore::Transaction* t) {
243 const string prefix("osdmap");
244
245 // full
246 bufferlist bl;
247 int r = 0;
248 r = store.get(prefix, store.combine_strings("full", ver), bl);
249 if (r) {
250 std::cerr << "Error getting full map: " << cpp_strerror(r) << std::endl;
251 return r;
252 }
253 OSDMap osdmap;
254 osdmap.decode(bl);
255 osdmap.crush = crush;
256 if (copy) {
257 osdmap.inc_epoch();
258 }
259 bl.clear();
260 // be consistent with OSDMonitor::update_from_paxos()
261 osdmap.encode(bl, CEPH_FEATURES_ALL|CEPH_FEATURE_RESERVED);
262 t->put(prefix, store.combine_strings("full", osdmap.get_epoch()), bl);
263
264 // incremental
265 OSDMap::Incremental inc;
266 if (copy) {
267 inc.epoch = osdmap.get_epoch();
268 inc.fsid = osdmap.get_fsid();
269 } else {
270 bl.clear();
271 r = store.get(prefix, ver, bl);
272 if (r) {
273 std::cerr << "Error getting inc map: " << cpp_strerror(r) << std::endl;
274 return r;
275 }
276 OSDMap::Incremental inc(bl);
277 if (inc.crush.length()) {
278 inc.crush.clear();
279 crush->encode(inc.crush, CEPH_FEATURES_SUPPORTED_DEFAULT);
280 }
281 if (inc.fullmap.length()) {
282 OSDMap fullmap;
283 fullmap.decode(inc.fullmap);
284 fullmap.crush = crush;
285 inc.fullmap.clear();
286 fullmap.encode(inc.fullmap);
287 }
288 }
289 assert(osdmap.have_crc());
290 inc.full_crc = osdmap.get_crc();
291 bl.clear();
292 // be consistent with OSDMonitor::update_from_paxos()
293 inc.encode(bl, CEPH_FEATURES_ALL|CEPH_FEATURE_RESERVED);
294 t->put(prefix, inc.epoch, bl);
295 return 0;
296}
297
298int rewrite_transaction(MonitorDBStore& store, int version,
299 const string& crush_file,
300 MonitorDBStore::Transaction* t) {
301 const string prefix("osdmap");
302
303 // calc the known-good epoch
304 version_t last_committed = store.get(prefix, "last_committed");
305 version_t good_version = 0;
306 if (version <= 0) {
307 if (last_committed >= (unsigned)-version) {
308 good_version = last_committed + version;
309 } else {
310 std::cerr << "osdmap-version is less than: -" << last_committed << std::endl;
311 return EINVAL;
312 }
313 } else {
314 good_version = version;
315 }
316 if (good_version >= last_committed) {
317 std::cout << "good epoch is greater or equal to the last committed one: "
318 << good_version << " >= " << last_committed << std::endl;
319 return 0;
320 }
321
322 // load/extract the crush map
323 int r = 0;
324 ceph::shared_ptr<CrushWrapper> crush(new CrushWrapper);
325 if (crush_file.empty()) {
326 bufferlist bl;
327 r = store.get(prefix, store.combine_strings("full", good_version), bl);
328 if (r) {
329 std::cerr << "Error getting map: " << cpp_strerror(r) << std::endl;
330 return r;
331 }
332 OSDMap osdmap;
333 osdmap.decode(bl);
334 crush = osdmap.crush;
335 } else {
336 string err;
337 bufferlist bl;
338 r = bl.read_file(crush_file.c_str(), &err);
339 if (r) {
340 std::cerr << err << ": " << cpp_strerror(r) << std::endl;
341 return r;
342 }
343 bufferlist::iterator p = bl.begin();
344 crush->decode(p);
345 }
346
347 // prepare a transaction to rewrite the epochs
348 // (good_version, last_committed]
349 // with the good crush map.
350 // XXX: may need to break this into several paxos versions?
351 assert(good_version < last_committed);
352 for (version_t v = good_version + 1; v <= last_committed; v++) {
353 cout << "rewriting epoch #" << v << "/" << last_committed << std::endl;
354 r = update_osdmap(store, v, false, crush, t);
355 if (r)
356 return r;
357 }
358
359 // add a new osdmap epoch to store, so monitors will update their current osdmap
360 // in addition to the ones stored in epochs.
361 //
362 // This is needed due to the way the monitor updates from paxos and the
363 // facilities we are leveraging to push this update to the rest of the
364 // quorum.
365 //
366 // In a nutshell, we are generating a good version of the osdmap, with a
367 // proper crush, and building a transaction that will replace the bad
368 // osdmaps with good osdmaps. But this transaction needs to be applied on
369 // all nodes, so that the monitors will have good osdmaps to share with
370 // clients. We thus leverage Paxos, specifically the recovery mechanism, by
371 // creating a pending value that will be committed once the monitors form an
372 // initial quorum after being brought back to life.
373 //
374 // However, the way the monitor works has the paxos services, including the
375 // OSDMonitor, updating their state from disk *prior* to the recovery phase
376 // begins (so they have an up to date state in memory). This means the
377 // OSDMonitor will see the old, broken map, before the new paxos version is
378 // applied to disk, and the old version is cached. Even though we have the
379 // good map now, and we share the good map with clients, we will still be
380 // working on the old broken map. Instead of mucking around the monitor to
381 // make this work, we instead opt for adding the same osdmap but with a
382 // newer version, so that the OSDMonitor picks up on it when it updates from
383 // paxos after the proposal has been committed. This is not elegant, but
384 // avoids further unpleasantness that would arise from kludging around the
385 // current behavior. Also, has the added benefit of making sure the clients
386 // get an updated version of the map (because last_committed+1 >
387 // last_committed) :)
388 //
389 cout << "adding a new epoch #" << last_committed+1 << std::endl;
390 r = update_osdmap(store, last_committed++, true, crush, t);
391 if (r)
392 return r;
393 t->put(prefix, store.combine_strings("full", "latest"), last_committed);
394 t->put(prefix, "last_committed", last_committed);
395 return 0;
396}
397
398/**
399 * create a new paxos version which carries a proposal to rewrite all epochs
400 * of incremental and full map of "osdmap" after a faulty crush map is injected.
401 * so the leader will trigger a recovery and propagate this fix to its peons,
402 * after the proposal is accepted, and the transaction in it is applied. all
403 * monitors will rewrite the bad crush map with the good one, and have a new
404 * osdmap epoch with the good crush map in it.
405 */
406int rewrite_crush(const char* progname,
407 vector<string>& subcmds,
408 MonitorDBStore& store) {
409 po::options_description op_desc("Allowed 'rewrite-crush' options");
410 int version = -1;
411 string crush_file;
412 op_desc.add_options()
413 ("help,h", "produce this help message")
414 ("crush", po::value<string>(&crush_file),
415 ("path to the crush map file "
416 "(default: will instead extract it from the known-good osdmap)"))
417 ("good-epoch", po::value<int>(&version),
418 "known-good epoch of osdmap, if a negative number '-N' is given, the "
419 "$last_committed-N is used instead (default: -1). "
420 "Please note, -1 is not necessarily a good epoch, because there are "
421 "good chance that we have more epochs slipped into the monstore after "
422 "the one where the crushmap is firstly injected.")
423 ;
424 po::variables_map op_vm;
425 int r = parse_cmd_args(&op_desc, NULL, NULL, subcmds, &op_vm);
426 if (r) {
427 return -r;
428 }
429 if (op_vm.count("help")) {
430 usage(progname, op_desc);
431 return 0;
432 }
433
434 MonitorDBStore::Transaction rewrite_txn;
435 r = rewrite_transaction(store, version, crush_file, &rewrite_txn);
436 if (r) {
437 return r;
438 }
439
440 // store the transaction into store as a proposal
441 const string prefix("paxos");
442 version_t pending_v = store.get(prefix, "last_committed") + 1;
443 auto t(std::make_shared<MonitorDBStore::Transaction>());
444 bufferlist bl;
445 rewrite_txn.encode(bl);
446 cout << "adding pending commit " << pending_v
447 << " " << bl.length() << " bytes" << std::endl;
448 t->put(prefix, pending_v, bl);
449 t->put(prefix, "pending_v", pending_v);
450 // a large enough yet unique proposal number will probably do the trick
451 version_t pending_pn = (store.get(prefix, "accepted_pn") / 100 + 4) * 100 + 1;
452 t->put(prefix, "pending_pn", pending_pn);
453 store.apply_transaction(t);
454 return 0;
455}
456
457int inflate_pgmap(MonitorDBStore& st, unsigned n, bool can_be_trimmed) {
458 // put latest pg map into monstore to bloat it up
459 // only format version == 1 is supported
460 version_t last = st.get("pgmap", "last_committed");
461 bufferlist bl;
462
463 // get the latest delta
464 int r = st.get("pgmap", last, bl);
465 if (r) {
466 std::cerr << "Error getting pgmap: " << cpp_strerror(r) << std::endl;
467 return r;
468 }
469
470 // try to pull together an idempotent "delta"
471 ceph::unordered_map<pg_t, pg_stat_t> pg_stat;
472 for (KeyValueDB::Iterator i = st.get_iterator("pgmap_pg");
473 i->valid(); i->next()) {
474 pg_t pgid;
475 if (!pgid.parse(i->key().c_str())) {
476 std::cerr << "unable to parse key " << i->key() << std::endl;
477 continue;
478 }
479 bufferlist pg_bl = i->value();
480 pg_stat_t ps;
481 bufferlist::iterator p = pg_bl.begin();
482 ::decode(ps, p);
483 // will update the last_epoch_clean of all the pgs.
484 pg_stat[pgid] = ps;
485 }
486
487 version_t first = st.get("pgmap", "first_committed");
488 version_t ver = last;
489 auto txn(std::make_shared<MonitorDBStore::Transaction>());
490 for (unsigned i = 0; i < n; i++) {
491 bufferlist trans_bl;
492 bufferlist dirty_pgs;
493 for (ceph::unordered_map<pg_t, pg_stat_t>::iterator ps = pg_stat.begin();
494 ps != pg_stat.end(); ++ps) {
495 ::encode(ps->first, dirty_pgs);
496 if (!can_be_trimmed) {
497 ps->second.last_epoch_clean = first;
498 }
499 ::encode(ps->second, dirty_pgs);
500 }
501 utime_t inc_stamp = ceph_clock_now();
502 ::encode(inc_stamp, trans_bl);
503 ::encode_destructively(dirty_pgs, trans_bl);
504 bufferlist dirty_osds;
505 ::encode(dirty_osds, trans_bl);
506 txn->put("pgmap", ++ver, trans_bl);
507 // update the db in batch
508 if (txn->size() > 1024) {
509 st.apply_transaction(txn);
510 // reset the transaction
511 txn.reset(new MonitorDBStore::Transaction);
512 }
513 }
514 txn->put("pgmap", "last_committed", ver);
515 txn->put("pgmap_meta", "version", ver);
516 // this will also piggy back the leftover pgmap added in the loop above
517 st.apply_transaction(txn);
518 return 0;
519}
520
521static int update_auth(MonitorDBStore& st, const string& keyring_path)
522{
523 // import all keyrings stored in the keyring file
524 KeyRing keyring;
525 int r = keyring.load(g_ceph_context, keyring_path);
526 if (r < 0) {
527 cerr << "unable to load admin keyring: " << keyring_path << std::endl;
528 return r;
529 }
530
531 bufferlist bl;
532 __u8 v = 1;
533 ::encode(v, bl);
534
535 for (const auto& k : keyring.get_keys()) {
536 KeyServerData::Incremental auth_inc;
537 auth_inc.name = k.first;
538 auth_inc.auth = k.second;
539 if (auth_inc.auth.caps.empty()) {
540 cerr << "no caps granted to: " << auth_inc.name << std::endl;
541 return -EINVAL;
542 }
543 auth_inc.op = KeyServerData::AUTH_INC_ADD;
544
545 AuthMonitor::Incremental inc;
546 inc.inc_type = AuthMonitor::AUTH_DATA;
547 ::encode(auth_inc, inc.auth_data);
548 inc.auth_type = CEPH_AUTH_CEPHX;
549
550 inc.encode(bl, CEPH_FEATURES_ALL);
551 }
552
553 const string prefix("auth");
554 auto last_committed = st.get(prefix, "last_committed") + 1;
555 auto t = make_shared<MonitorDBStore::Transaction>();
556 t->put(prefix, last_committed, bl);
557 t->put(prefix, "last_committed", last_committed);
558 auto first_committed = st.get(prefix, "first_committed");
559 if (!first_committed) {
560 t->put(prefix, "first_committed", last_committed);
561 }
562 st.apply_transaction(t);
563 return 0;
564}
565
566static int update_mkfs(MonitorDBStore& st)
567{
568 MonMap monmap;
569 int r = monmap.build_initial(g_ceph_context, cerr);
570 if (r) {
571 cerr << "no initial monitors" << std::endl;
572 return -EINVAL;
573 }
574 bufferlist bl;
575 monmap.encode(bl, CEPH_FEATURES_ALL);
576 monmap.set_epoch(0);
577 auto t = make_shared<MonitorDBStore::Transaction>();
578 t->put("mkfs", "monmap", bl);
579 st.apply_transaction(t);
580 return 0;
581}
582
583static int update_monitor(MonitorDBStore& st)
584{
585 const string prefix("monitor");
586 // a stripped-down Monitor::mkfs()
587 bufferlist bl;
588 bl.append(CEPH_MON_ONDISK_MAGIC "\n");
589 auto t = make_shared<MonitorDBStore::Transaction>();
590 t->put(prefix, "magic", bl);
591 st.apply_transaction(t);
592 return 0;
593}
594
b32b8144
FG
595// rebuild
596// - mgr
597// - mgr_command_desc
3efd9988
FG
598static int update_mgrmap(MonitorDBStore& st)
599{
600 auto t = make_shared<MonitorDBStore::Transaction>();
601
602 {
603 MgrMap map;
604 // mgr expects epoch > 1
605 map.epoch++;
606 auto initial_modules =
607 get_str_vec(g_ceph_context->_conf->get_val<string>("mgr_initial_modules"));
608 copy(begin(initial_modules),
609 end(initial_modules),
610 inserter(map.modules, end(map.modules)));
611 bufferlist bl;
612 map.encode(bl, CEPH_FEATURES_ALL);
613 t->put("mgr", map.epoch, bl);
614 t->put("mgr", "last_committed", map.epoch);
615 }
616 {
617 auto mgr_command_descs = mgr_commands;
618 for (auto& c : mgr_command_descs) {
619 c.set_flag(MonCommand::FLAG_MGR);
620 }
621 bufferlist bl;
622 ::encode(mgr_command_descs, bl);
623 t->put("mgr_command_desc", "", bl);
624 }
625 return st.apply_transaction(t);
626}
627
7c673cae
FG
628static int update_paxos(MonitorDBStore& st)
629{
630 // build a pending paxos proposal from all non-permanent k/v pairs. once the
631 // proposal is committed, it will gets applied. on the sync provider side, it
632 // will be a no-op, but on its peers, the paxos commit will help to build up
633 // the necessary epochs.
634 bufferlist pending_proposal;
635 {
636 MonitorDBStore::Transaction t;
637 vector<string> prefixes = {"auth", "osdmap",
3efd9988 638 "mgr", "mgr_command_desc",
7c673cae
FG
639 "pgmap", "pgmap_pg", "pgmap_meta"};
640 for (const auto& prefix : prefixes) {
641 for (auto i = st.get_iterator(prefix); i->valid(); i->next()) {
642 auto key = i->raw_key();
643 auto val = i->value();
644 t.put(key.first, key.second, val);
645 }
646 }
647 t.encode(pending_proposal);
648 }
649 const string prefix("paxos");
650 auto t = make_shared<MonitorDBStore::Transaction>();
651 t->put(prefix, "first_committed", 0);
652 t->put(prefix, "last_committed", 0);
653 auto pending_v = 1;
654 t->put(prefix, pending_v, pending_proposal);
655 t->put(prefix, "pending_v", pending_v);
656 t->put(prefix, "pending_pn", 400);
657 st.apply_transaction(t);
658 return 0;
659}
660
661// rebuild
662// - pgmap_meta/version
663// - pgmap_meta/last_osdmap_epoch
664// - pgmap_meta/last_pg_scan
665// - pgmap_meta/full_ratio
666// - pgmap_meta/nearfull_ratio
667// - pgmap_meta/stamp
668static int update_pgmap_meta(MonitorDBStore& st)
669{
670 const string prefix("pgmap_meta");
671 auto t = make_shared<MonitorDBStore::Transaction>();
672 // stolen from PGMonitor::create_pending()
673 // the first pgmap_meta
674 t->put(prefix, "version", 1);
675 {
676 auto stamp = ceph_clock_now();
677 bufferlist bl;
678 ::encode(stamp, bl);
679 t->put(prefix, "stamp", bl);
680 }
681 {
682 auto last_osdmap_epoch = st.get("osdmap", "last_committed");
683 t->put(prefix, "last_osdmap_epoch", last_osdmap_epoch);
684 }
685 // be conservative, so PGMonitor will scan the all pools for pg changes
686 t->put(prefix, "last_pg_scan", 1);
687 {
688 auto full_ratio = g_ceph_context->_conf->mon_osd_full_ratio;
689 if (full_ratio > 1.0)
690 full_ratio /= 100.0;
691 bufferlist bl;
692 ::encode(full_ratio, bl);
693 t->put(prefix, "full_ratio", bl);
694 }
695 {
696 auto backfillfull_ratio = g_ceph_context->_conf->mon_osd_backfillfull_ratio;
697 if (backfillfull_ratio > 1.0)
698 backfillfull_ratio /= 100.0;
699 bufferlist bl;
700 ::encode(backfillfull_ratio, bl);
701 t->put(prefix, "backfillfull_ratio", bl);
702 }
703 {
704 auto nearfull_ratio = g_ceph_context->_conf->mon_osd_nearfull_ratio;
705 if (nearfull_ratio > 1.0)
706 nearfull_ratio /= 100.0;
707 bufferlist bl;
708 ::encode(nearfull_ratio, bl);
709 t->put(prefix, "nearfull_ratio", bl);
710 }
711 st.apply_transaction(t);
712 return 0;
713}
714
715int rebuild_monstore(const char* progname,
716 vector<string>& subcmds,
717 MonitorDBStore& st)
718{
719 po::options_description op_desc("Allowed 'rebuild' options");
720 string keyring_path;
721 op_desc.add_options()
722 ("keyring", po::value<string>(&keyring_path),
723 "path to the client.admin key");
724 po::variables_map op_vm;
725 int r = parse_cmd_args(&op_desc, nullptr, nullptr, subcmds, &op_vm);
726 if (r) {
727 return -r;
728 }
729 if (op_vm.count("help")) {
730 usage(progname, op_desc);
731 return 0;
732 }
733 if (!keyring_path.empty())
734 update_auth(st, keyring_path);
735 if ((r = update_pgmap_meta(st))) {
736 return r;
737 }
b32b8144
FG
738 if ((r = update_mgrmap(st))) {
739 return r;
740 }
7c673cae
FG
741 if ((r = update_paxos(st))) {
742 return r;
743 }
744 if ((r = update_mkfs(st))) {
745 return r;
746 }
747 if ((r = update_monitor(st))) {
748 return r;
749 }
750 return 0;
751}
752
753int main(int argc, char **argv) {
754 int err = 0;
755 po::options_description desc("Allowed options");
756 string store_path, cmd;
757 vector<string> subcmds;
758 desc.add_options()
759 ("help,h", "produce help message")
760 ;
761
762 /* Dear Future Developer:
763 *
764 * for further improvement, should you need to pass specific options to
765 * a command (e.g., get osdmap VER --hex), you can expand the current
766 * format by creating additional 'po::option_description' and passing
767 * 'subcmds' to 'po::command_line_parser', much like what is currently
768 * done by default. However, beware: in order to differentiate a
769 * command-specific option from the generic/global options, you will need
770 * to pass '--' in the command line (so that the first parser, the one
771 * below, assumes it has reached the end of all options); e.g.,
772 * 'get osdmap VER -- --hex'. Not pretty; far from intuitive; it was as
773 * far as I got with this library. Improvements on this format will be
774 * left as an excercise for the reader. -Joao
775 */
776 po::options_description positional_desc("Positional argument options");
777 positional_desc.add_options()
778 ("store-path", po::value<string>(&store_path),
779 "path to monitor's store")
780 ("command", po::value<string>(&cmd),
781 "Command")
782 ("subcmd", po::value<vector<string> >(&subcmds),
783 "Command arguments/Sub-Commands")
784 ;
785 po::positional_options_description positional;
786 positional.add("store-path", 1);
787 positional.add("command", 1);
788 positional.add("subcmd", -1);
789
790 po::options_description all_desc("All options");
791 all_desc.add(desc).add(positional_desc);
792
793 vector<string> ceph_option_strings;
794 po::variables_map vm;
795 try {
796 po::parsed_options parsed =
797 po::command_line_parser(argc, argv).
798 options(all_desc).
799 positional(positional).
800 allow_unregistered().run();
801
802 po::store(
803 parsed,
804 vm);
805 po::notify(vm);
806
807 // Specifying po::include_positional would have our positional arguments
808 // being collected (thus being part of ceph_option_strings and eventually
809 // passed on to global_init() below).
810 // Instead we specify po::exclude_positional, which has the upside of
811 // completely avoid this, but the downside of having to specify ceph
812 // options as --VAR=VAL (note the '='); otherwise we will capture the
813 // positional 'VAL' as belonging to us, never being collected.
814 ceph_option_strings = po::collect_unrecognized(parsed.options,
815 po::exclude_positional);
816
817 } catch(po::error &e) {
818 std::cerr << "error: " << e.what() << std::endl;
819 return 1;
820 }
821
822 // parse command structure before calling global_init() and friends.
823
824 if (vm.empty() || vm.count("help") ||
825 store_path.empty() || cmd.empty() ||
826 *cmd.begin() == '-') {
827 usage(argv[0], desc);
828 return 1;
829 }
830
831 vector<const char *> ceph_options, def_args;
832 ceph_options.reserve(ceph_option_strings.size());
833 for (vector<string>::iterator i = ceph_option_strings.begin();
834 i != ceph_option_strings.end();
835 ++i) {
836 ceph_options.push_back(i->c_str());
837 }
838
839 auto cct = global_init(
840 &def_args, ceph_options, CEPH_ENTITY_TYPE_MON,
841 CODE_ENVIRONMENT_UTILITY, 0);
842 common_init_finish(g_ceph_context);
843 g_ceph_context->_conf->apply_changes(NULL);
844 g_conf = g_ceph_context->_conf;
845
846 // this is where we'll write *whatever*, on a per-command basis.
847 // not all commands require some place to write their things.
848 MonitorDBStore st(store_path);
849 if (store_path.size()) {
850 stringstream ss;
851 int r = st.open(ss);
852 if (r < 0) {
853 std::cerr << ss.str() << std::endl;
854 return EINVAL;
855 }
856 }
857
858 if (cmd == "dump-keys") {
859 KeyValueDB::WholeSpaceIterator iter = st.get_iterator();
860 while (iter->valid()) {
861 pair<string,string> key(iter->raw_key());
862 cout << key.first << " / " << key.second << std::endl;
863 iter->next();
864 }
865 } else if (cmd == "compact") {
866 st.compact();
867 } else if (cmd == "get") {
868 unsigned v = 0;
869 string outpath;
870 bool readable = false;
871 string map_type;
872 // visible options for this command
873 po::options_description op_desc("Allowed 'get' options");
874 op_desc.add_options()
875 ("help,h", "produce this help message")
876 ("out,o", po::value<string>(&outpath),
877 "output file (default: stdout)")
878 ("version,v", po::value<unsigned>(&v),
879 "map version to obtain")
880 ("readable,r", po::value<bool>(&readable)->default_value(false),
881 "print the map infomation in human readable format")
882 ;
883 // this is going to be a positional argument; we don't want to show
884 // it as an option during --help, but we do want to have it captured
885 // when parsing.
886 po::options_description hidden_op_desc("Hidden 'get' options");
887 hidden_op_desc.add_options()
888 ("map-type", po::value<string>(&map_type),
889 "map-type")
890 ;
891 po::positional_options_description op_positional;
892 op_positional.add("map-type", 1);
893
894 po::variables_map op_vm;
895 int r = parse_cmd_args(&op_desc, &hidden_op_desc, &op_positional,
896 subcmds, &op_vm);
897 if (r < 0) {
898 err = -r;
899 goto done;
900 }
901
902 if (op_vm.count("help") || map_type.empty()) {
903 usage(argv[0], op_desc);
904 err = 0;
905 goto done;
906 }
907
908 if (v == 0) {
909 if (map_type == "crushmap") {
910 v = st.get("osdmap", "last_committed");
911 } else {
912 v = st.get(map_type, "last_committed");
913 }
914 }
915
916 int fd = STDOUT_FILENO;
917 if (!outpath.empty()){
918 fd = ::open(outpath.c_str(), O_WRONLY|O_CREAT|O_TRUNC, 0666);
919 if (fd < 0) {
920 std::cerr << "error opening output file: "
921 << cpp_strerror(errno) << std::endl;
922 err = EINVAL;
923 goto done;
924 }
925 }
926
927 BOOST_SCOPE_EXIT((&r) (&fd) (&outpath)) {
928 ::close(fd);
929 if (r < 0 && fd != STDOUT_FILENO) {
930 ::remove(outpath.c_str());
931 }
932 } BOOST_SCOPE_EXIT_END
933
934 bufferlist bl;
935 r = 0;
936 if (map_type == "osdmap") {
937 r = st.get(map_type, st.combine_strings("full", v), bl);
938 } else if (map_type == "crushmap") {
939 bufferlist tmp;
940 r = st.get("osdmap", st.combine_strings("full", v), tmp);
941 if (r >= 0) {
942 OSDMap osdmap;
943 osdmap.decode(tmp);
944 osdmap.crush->encode(bl, CEPH_FEATURES_SUPPORTED_DEFAULT);
945 }
946 } else {
947 r = st.get(map_type, v, bl);
948 }
949 if (r < 0) {
950 std::cerr << "Error getting map: " << cpp_strerror(r) << std::endl;
951 err = EINVAL;
952 goto done;
953 }
954
955 if (readable) {
956 stringstream ss;
957 bufferlist out;
b32b8144
FG
958 try {
959 if (map_type == "monmap") {
960 MonMap monmap;
961 monmap.decode(bl);
962 monmap.print(ss);
963 } else if (map_type == "osdmap") {
964 OSDMap osdmap;
965 osdmap.decode(bl);
966 osdmap.print(ss);
967 } else if (map_type == "mdsmap") {
968 FSMap fs_map;
969 fs_map.decode(bl);
970 fs_map.print(ss);
971 } else if (map_type == "mgr") {
972 MgrMap mgr_map;
973 auto p = bl.begin();
974 mgr_map.decode(p);
975 JSONFormatter f;
976 f.dump_object("mgrmap", mgr_map);
977 f.flush(ss);
978 } else if (map_type == "crushmap") {
979 CrushWrapper cw;
980 bufferlist::iterator it = bl.begin();
981 cw.decode(it);
982 CrushCompiler cc(cw, std::cerr, 0);
983 cc.decompile(ss);
984 } else {
985 std::cerr << "This type of readable map does not exist: " << map_type
986 << std::endl << "You can only specify[osdmap|monmap|mdsmap"
987 "|crushmap|mgr]" << std::endl;
988 }
989 } catch (const buffer::error &err) {
990 std::cerr << "Could not decode for human readable output (you may still"
991 " use non-readable mode). Detail: " << err << std::endl;
7c673cae 992 }
b32b8144 993
7c673cae
FG
994 out.append(ss);
995 out.write_fd(fd);
996 } else {
997 bl.write_fd(fd);
998 }
999
1000 if (!outpath.empty()) {
1001 std::cout << "wrote " << map_type
1002 << " version " << v << " to " << outpath
1003 << std::endl;
1004 }
1005 } else if (cmd == "show-versions") {
1006 string map_type; //map type:osdmap,monmap...
1007 // visible options for this command
1008 po::options_description op_desc("Allowed 'show-versions' options");
1009 op_desc.add_options()
1010 ("help,h", "produce this help message")
1011 ("map-type", po::value<string>(&map_type), "map_type");
1012
1013 po::positional_options_description op_positional;
1014 op_positional.add("map-type", 1);
1015
1016 po::variables_map op_vm;
1017 int r = parse_cmd_args(&op_desc, NULL, &op_positional,
1018 subcmds, &op_vm);
1019 if (r < 0) {
1020 err = -r;
1021 goto done;
1022 }
1023
1024 if (op_vm.count("help") || map_type.empty()) {
1025 usage(argv[0], op_desc);
1026 err = 0;
1027 goto done;
1028 }
1029
1030 unsigned int v_first = 0;
1031 unsigned int v_last = 0;
1032 v_first = st.get(map_type, "first_committed");
1033 v_last = st.get(map_type, "last_committed");
1034
1035 std::cout << "first committed:\t" << v_first << "\n"
1036 << "last committed:\t" << v_last << std::endl;
1037 } else if (cmd == "dump-paxos") {
1038 unsigned dstart = 0;
1039 unsigned dstop = ~0;
1040 po::options_description op_desc("Allowed 'dump-paxos' options");
1041 op_desc.add_options()
1042 ("help,h", "produce this help message")
1043 ("start,s", po::value<unsigned>(&dstart),
1044 "starting version (default: 0)")
1045 ("end,e", po::value<unsigned>(&dstop),
1046 "finish version (default: ~0)")
1047 ;
1048
1049 po::variables_map op_vm;
1050 int r = parse_cmd_args(&op_desc, NULL, NULL,
1051 subcmds, &op_vm);
1052 if (r < 0) {
1053 err = -r;
1054 goto done;
1055 }
1056
1057 if (op_vm.count("help")) {
1058 usage(argv[0], op_desc);
1059 err = 0;
1060 goto done;
1061 }
1062
1063 if (dstart > dstop) {
1064 std::cerr << "error: 'start' version (value: " << dstart << ") "
1065 << " is greater than 'end' version (value: " << dstop << ")"
1066 << std::endl;
1067 err = EINVAL;
1068 goto done;
1069 }
1070
1071 version_t v = dstart;
1072 for (; v <= dstop; ++v) {
1073 bufferlist bl;
1074 st.get("paxos", v, bl);
1075 if (bl.length() == 0)
1076 break;
1077 cout << "\n--- " << v << " ---" << std::endl;
1078 auto tx(std::make_shared<MonitorDBStore::Transaction>());
1079 Paxos::decode_append_transaction(tx, bl);
1080 JSONFormatter f(true);
1081 tx->dump(&f);
1082 f.flush(cout);
1083 }
1084
1085 std::cout << "dumped " << v << " paxos versions" << std::endl;
1086
1087 } else if (cmd == "dump-trace") {
1088 unsigned dstart = 0;
1089 unsigned dstop = ~0;
1090 string outpath;
1091
1092 // visible options for this command
1093 po::options_description op_desc("Allowed 'dump-trace' options");
1094 op_desc.add_options()
1095 ("help,h", "produce this help message")
1096 ("start,s", po::value<unsigned>(&dstart),
1097 "starting version (default: 0)")
1098 ("end,e", po::value<unsigned>(&dstop),
1099 "finish version (default: ~0)")
1100 ;
1101 // this is going to be a positional argument; we don't want to show
1102 // it as an option during --help, but we do want to have it captured
1103 // when parsing.
1104 po::options_description hidden_op_desc("Hidden 'dump-trace' options");
1105 hidden_op_desc.add_options()
1106 ("out,o", po::value<string>(&outpath),
1107 "file to write the dump to")
1108 ;
1109 po::positional_options_description op_positional;
1110 op_positional.add("out", 1);
1111
1112 po::variables_map op_vm;
1113 int r = parse_cmd_args(&op_desc, &hidden_op_desc, &op_positional,
1114 subcmds, &op_vm);
1115 if (r < 0) {
1116 err = -r;
1117 goto done;
1118 }
1119
1120 if (op_vm.count("help")) {
1121 usage(argv[0], op_desc);
1122 err = 0;
1123 goto done;
1124 }
1125
1126 if (outpath.empty()) {
1127 usage(argv[0], op_desc);
1128 err = EINVAL;
1129 goto done;
1130 }
1131
1132 if (dstart > dstop) {
1133 std::cerr << "error: 'start' version (value: " << dstart << ") "
1134 << " is greater than 'stop' version (value: " << dstop << ")"
1135 << std::endl;
1136 err = EINVAL;
1137 goto done;
1138 }
1139
1140 TraceIter iter(outpath.c_str());
1141 iter.init();
1142 while (true) {
1143 if (!iter.valid())
1144 break;
1145 if (iter.num() >= dstop) {
1146 break;
1147 }
1148 if (iter.num() >= dstart) {
1149 JSONFormatter f(true);
1150 iter.cur()->dump(&f, false);
1151 f.flush(std::cout);
1152 std::cout << std::endl;
1153 }
1154 iter.next();
1155 }
1156 std::cerr << "Read up to transaction " << iter.num() << std::endl;
1157 } else if (cmd == "replay-trace") {
1158 string inpath;
1159 unsigned num_replays = 1;
1160 // visible options for this command
1161 po::options_description op_desc("Allowed 'replay-trace' options");
1162 op_desc.add_options()
1163 ("help,h", "produce this help message")
1164 ("num-replays,n", po::value<unsigned>(&num_replays),
1165 "finish version (default: 1)")
1166 ;
1167 // this is going to be a positional argument; we don't want to show
1168 // it as an option during --help, but we do want to have it captured
1169 // when parsing.
1170 po::options_description hidden_op_desc("Hidden 'replay-trace' options");
1171 hidden_op_desc.add_options()
1172 ("in,i", po::value<string>(&inpath),
1173 "file to write the dump to")
1174 ;
1175 po::positional_options_description op_positional;
1176 op_positional.add("in", 1);
1177
1178 // op_desc_all will aggregate all visible and hidden options for parsing.
1179 // when we call 'usage()' we just pass 'op_desc', as that's the description
1180 // holding the visible options.
1181 po::options_description op_desc_all;
1182 op_desc_all.add(op_desc).add(hidden_op_desc);
1183
1184 po::variables_map op_vm;
1185 try {
1186 po::parsed_options op_parsed = po::command_line_parser(subcmds).
1187 options(op_desc_all).positional(op_positional).run();
1188 po::store(op_parsed, op_vm);
1189 po::notify(op_vm);
1190 } catch (po::error &e) {
1191 std::cerr << "error: " << e.what() << std::endl;
1192 err = EINVAL;
1193 goto done;
1194 }
1195
1196 if (op_vm.count("help")) {
1197 usage(argv[0], op_desc);
1198 err = 0;
1199 goto done;
1200 }
1201
1202 if (inpath.empty()) {
1203 usage(argv[0], op_desc);
1204 err = EINVAL;
1205 goto done;
1206 }
1207
1208 unsigned num = 0;
1209 for (unsigned i = 0; i < num_replays; ++i) {
1210 TraceIter iter(inpath.c_str());
1211 iter.init();
1212 while (true) {
1213 if (!iter.valid())
1214 break;
1215 std::cerr << "Replaying trans num " << num << std::endl;
1216 st.apply_transaction(iter.cur());
1217 iter.next();
1218 ++num;
1219 }
1220 std::cerr << "Read up to transaction " << iter.num() << std::endl;
1221 }
1222 } else if (cmd == "random-gen") {
1223 unsigned tsize = 200;
1224 unsigned tvalsize = 1024;
1225 unsigned ntrans = 100;
1226 po::options_description op_desc("Allowed 'random-gen' options");
1227 op_desc.add_options()
1228 ("help,h", "produce this help message")
1229 ("num-keys,k", po::value<unsigned>(&tsize),
1230 "keys to write in each transaction (default: 200)")
1231 ("size,s", po::value<unsigned>(&tvalsize),
1232 "size (in bytes) of the value to write in each key (default: 1024)")
1233 ("ntrans,n", po::value<unsigned>(&ntrans),
1234 "number of transactions to run (default: 100)")
1235 ;
1236
1237 po::variables_map op_vm;
1238 try {
1239 po::parsed_options op_parsed = po::command_line_parser(subcmds).
1240 options(op_desc).run();
1241 po::store(op_parsed, op_vm);
1242 po::notify(op_vm);
1243 } catch (po::error &e) {
1244 std::cerr << "error: " << e.what() << std::endl;
1245 err = EINVAL;
1246 goto done;
1247 }
1248
1249 if (op_vm.count("help")) {
1250 usage(argv[0], op_desc);
1251 err = 0;
1252 goto done;
1253 }
1254
1255 unsigned num = 0;
1256 for (unsigned i = 0; i < ntrans; ++i) {
1257 std::cerr << "Applying trans " << i << std::endl;
1258 auto t(std::make_shared<MonitorDBStore::Transaction>());
1259 string prefix;
1260 prefix.push_back((i%26)+'a');
1261 for (unsigned j = 0; j < tsize; ++j) {
1262 stringstream os;
1263 os << num;
1264 bufferlist bl;
1265 for (unsigned k = 0; k < tvalsize; ++k) bl.append(rand());
1266 t->put(prefix, os.str(), bl);
1267 ++num;
1268 }
1269 t->compact_prefix(prefix);
1270 st.apply_transaction(t);
1271 }
1272 } else if (cmd == "store-copy") {
1273 if (subcmds.size() < 1 || subcmds[0].empty()) {
1274 usage(argv[0], desc);
1275 err = EINVAL;
1276 goto done;
1277 }
1278
1279 string out_path = subcmds[0];
1280
1281 MonitorDBStore out_store(out_path);
1282 {
1283 stringstream ss;
1284 int r = out_store.create_and_open(ss);
1285 if (r < 0) {
1286 std::cerr << ss.str() << std::endl;
1287 goto done;
1288 }
1289 }
1290
1291
1292 KeyValueDB::WholeSpaceIterator it = st.get_iterator();
1293 uint64_t total_keys = 0;
1294 uint64_t total_size = 0;
1295 uint64_t total_tx = 0;
1296
1297 do {
1298 uint64_t num_keys = 0;
1299
1300 auto tx(std::make_shared<MonitorDBStore::Transaction>());
1301
1302 while (it->valid() && num_keys < 128) {
1303 pair<string,string> k = it->raw_key();
1304 bufferlist v = it->value();
1305 tx->put(k.first, k.second, v);
1306
1307 num_keys ++;
1308 total_tx ++;
1309 total_size += v.length();
1310
1311 it->next();
1312 }
1313
1314 total_keys += num_keys;
1315
1316 if (!tx->empty())
1317 out_store.apply_transaction(tx);
1318
1319 std::cout << "copied " << total_keys << " keys so far ("
1320 << stringify(si_t(total_size)) << ")" << std::endl;
1321
1322 } while (it->valid());
1323 out_store.close();
1324 std::cout << "summary: copied " << total_keys << " keys, using "
1325 << total_tx << " transactions, totalling "
1326 << stringify(si_t(total_size)) << std::endl;
1327 std::cout << "from '" << store_path << "' to '" << out_path << "'"
1328 << std::endl;
1329 } else if (cmd == "rewrite-crush") {
1330 err = rewrite_crush(argv[0], subcmds, st);
1331 } else if (cmd == "inflate-pgmap") {
1332 unsigned n = 2000;
1333 bool can_be_trimmed = false;
1334 po::options_description op_desc("Allowed 'inflate-pgmap' options");
1335 op_desc.add_options()
1336 ("num-maps,n", po::value<unsigned>(&n),
1337 "number of maps to add (default: 2000)")
1338 ("can-be-trimmed", po::value<bool>(&can_be_trimmed),
1339 "can be trimmed (default: false)")
1340 ;
1341
1342 po::variables_map op_vm;
1343 try {
1344 po::parsed_options op_parsed = po::command_line_parser(subcmds).
1345 options(op_desc).run();
1346 po::store(op_parsed, op_vm);
1347 po::notify(op_vm);
1348 } catch (po::error &e) {
1349 std::cerr << "error: " << e.what() << std::endl;
1350 err = EINVAL;
1351 goto done;
1352 }
1353 err = inflate_pgmap(st, n, can_be_trimmed);
1354 } else if (cmd == "rebuild") {
1355 err = rebuild_monstore(argv[0], subcmds, st);
1356 } else {
1357 std::cerr << "Unrecognized command: " << cmd << std::endl;
1358 usage(argv[0], desc);
1359 goto done;
1360 }
1361
1362 done:
1363 st.close();
1364 return err;
1365}