]> git.proxmox.com Git - ceph.git/blame - ceph/src/tools/ceph_monstore_tool.cc
import 15.2.9
[ceph.git] / ceph / src / tools / ceph_monstore_tool.cc
CommitLineData
7c673cae
FG
1// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2// vim: ts=8 sw=2 smarttab
3/*
4* Ceph - scalable distributed file system
5*
6* Copyright (C) 2012 Inktank, Inc.
7*
8* This is free software; you can redistribute it and/or
9* modify it under the terms of the GNU Lesser General Public
10* License version 2.1, as published by the Free Software
11* Foundation. See file COPYING.
12*/
13#include <boost/program_options/variables_map.hpp>
14#include <boost/program_options/parsers.hpp>
15#include <boost/scope_exit.hpp>
16
17#include <stdlib.h>
18#include <string>
19
20#include "common/Formatter.h"
21#include "common/errno.h"
22
23#include "auth/KeyRing.h"
24#include "auth/cephx/CephxKeyServer.h"
25#include "global/global_init.h"
26#include "include/stringify.h"
3efd9988 27#include "mgr/mgr_commands.h"
7c673cae
FG
28#include "mon/AuthMonitor.h"
29#include "mon/MonitorDBStore.h"
30#include "mon/Paxos.h"
31#include "mon/MonMap.h"
b32b8144
FG
32#include "mds/FSMap.h"
33#include "mon/MgrMap.h"
7c673cae
FG
34#include "osd/OSDMap.h"
35#include "crush/CrushCompiler.h"
a8e16298 36#include "mon/CreatingPGs.h"
7c673cae
FG
37
38namespace po = boost::program_options;
7c673cae
FG
39
40class TraceIter {
41 int fd;
42 unsigned idx;
43 MonitorDBStore::TransactionRef t;
44public:
45 explicit TraceIter(string fname) : fd(-1), idx(-1) {
46 fd = ::open(fname.c_str(), O_RDONLY);
47 t.reset(new MonitorDBStore::Transaction);
48 }
49 bool valid() {
50 return fd != -1;
51 }
52 MonitorDBStore::TransactionRef cur() {
11fdf7f2 53 ceph_assert(valid());
7c673cae
FG
54 return t;
55 }
56 unsigned num() { return idx; }
57 void next() {
58 ++idx;
59 bufferlist bl;
60 int r = bl.read_fd(fd, 6);
61 if (r < 0) {
62 std::cerr << "Got error: " << cpp_strerror(r) << " on read_fd"
63 << std::endl;
64 ::close(fd);
65 fd = -1;
66 return;
67 } else if ((unsigned)r < 6) {
68 std::cerr << "short read" << std::endl;
69 ::close(fd);
70 fd = -1;
71 return;
72 }
11fdf7f2 73 auto bliter = bl.cbegin();
7c673cae 74 uint8_t ver, ver2;
11fdf7f2
TL
75 decode(ver, bliter);
76 decode(ver2, bliter);
7c673cae 77 uint32_t len;
11fdf7f2 78 decode(len, bliter);
7c673cae
FG
79 r = bl.read_fd(fd, len);
80 if (r < 0) {
81 std::cerr << "Got error: " << cpp_strerror(r) << " on read_fd"
82 << std::endl;
83 ::close(fd);
84 fd = -1;
85 return;
86 } else if ((unsigned)r < len) {
87 std::cerr << "short read" << std::endl;
88 ::close(fd);
89 fd = -1;
90 return;
91 }
11fdf7f2 92 bliter = bl.cbegin();
7c673cae
FG
93 t.reset(new MonitorDBStore::Transaction);
94 t->decode(bliter);
95 }
96 void init() {
97 next();
98 }
99 ~TraceIter() {
100 if (fd != -1) {
101 ::close(fd);
102 fd = -1;
103 }
104 }
105};
106
107
108int parse_cmd_args(
109 po::options_description *desc, /// < visible options description
110 po::options_description *hidden_desc, /// < hidden options description
111 po::positional_options_description *positional, /// < positional args
112 vector<string> &cmd_args, /// < arguments to be parsed
113 po::variables_map *vm /// > post-parsing variable map
114 )
115{
116 // desc_all will aggregate all visible and hidden options for parsing.
117 //
118 // From boost's program_options point of view, there is absolutely no
119 // distinction between 'desc' and 'hidden_desc'. This is a distinction
120 // that is only useful to us: 'desc' is whatever we are willing to show
121 // on 'usage()', whereas 'hidden_desc' refers to parameters we wish to
122 // take advantage of but do not wish to show on 'usage()'.
123 //
124 // For example, consider that program_options matches positional arguments
125 // (specified via 'positional') against the paramenters defined on a
126 // given 'po::options_description' class. This is performed below,
127 // supplying both the description and the positional arguments to the
128 // parser. However, we do not want the parameters that are mapped to
129 // positional arguments to be shown on usage, as that makes for ugly and
130 // confusing usage messages. Therefore we dissociate the options'
131 // description that is to be used as an aid to the user from those options
132 // that are nothing but useful for internal purposes (i.e., mapping options
133 // to positional arguments). We still need to aggregate them before parsing
134 // and that's what 'desc_all' is all about.
135 //
136
11fdf7f2 137 ceph_assert(desc != NULL);
7c673cae
FG
138
139 po::options_description desc_all;
140 desc_all.add(*desc);
141 if (hidden_desc != NULL)
142 desc_all.add(*hidden_desc);
143
144 try {
145 po::command_line_parser parser = po::command_line_parser(cmd_args).
146 options(desc_all);
147
148 if (positional) {
149 parser = parser.positional(*positional);
150 }
151
152 po::parsed_options parsed = parser.run();
153 po::store(parsed, *vm);
154 po::notify(*vm);
155 } catch (po::error &e) {
156 std::cerr << "error: " << e.what() << std::endl;
157 return -EINVAL;
158 }
159 return 0;
160}
161
162
163/**
164 * usage: ceph-monstore-tool <store-path> <command> [options]
165 *
166 * commands:
167 *
168 * store-copy < --out arg >
169 * dump-keys
170 * compact
171 * getmonmap < --out arg [ --version arg ] >
172 * getosdmap < --out arg [ --version arg ] >
173 * dump-paxos <--dump-start VER> <--dump-end VER>
174 * dump-trace < --trace-file arg >
175 * replay-trace
176 * random-gen
177 * rewrite-crush
7c673cae
FG
178 *
179 * wanted syntax:
180 *
181 * ceph-monstore-tool PATH CMD [options]
182 *
183 * ceph-monstore-tool PATH store-copy <PATH2 | -o PATH2>
184 * ceph-monstore-tool PATH dump-keys
185 * ceph-monstore-tool PATH compact
186 * ceph-monstore-tool PATH get monmap [VER]
187 * ceph-monstore-tool PATH get osdmap [VER]
188 * ceph-monstore-tool PATH dump-paxos STARTVER ENDVER
189 *
190 *
191 */
192void usage(const char *n, po::options_description &d)
193{
194 std::cerr <<
195 "usage: " << n << " <store-path> <cmd> [args|options]\n"
196 << "\n"
197 << "Commands:\n"
198 << " store-copy PATH copies store to PATH\n"
199 << " compact compacts the store\n"
200 << " get monmap [-- options] get monmap (version VER if specified)\n"
201 << " (default: last committed)\n"
202 << " get osdmap [-- options] get osdmap (version VER if specified)\n"
203 << " (default: last committed)\n"
204 << " get mdsmap [-- options] get mdsmap (version VER if specified)\n"
205 << " (default: last committed)\n"
b32b8144
FG
206 << " get mgr [-- options] get mgr map (version VER if specified)\n"
207 << " (default: last committed)\n"
7c673cae
FG
208 << " get crushmap [-- options] get crushmap (version VER if specified)\n"
209 << " (default: last committed)\n"
210 << " show-versions [-- options] show the first&last committed version of map\n"
211 << " (show-versions -- --help for more info)\n"
212 << " dump-keys dumps store keys to FILE\n"
213 << " (default: stdout)\n"
214 << " dump-paxos [-- options] dump paxos transactions\n"
215 << " (dump-paxos -- --help for more info)\n"
216 << " dump-trace FILE [-- options] dump contents of trace file FILE\n"
217 << " (dump-trace -- --help for more info)\n"
218 << " replay-trace FILE [-- options] replay trace from FILE\n"
219 << " (replay-trace -- --help for more info)\n"
220 << " random-gen [-- options] add randomly generated ops to the store\n"
221 << " (random-gen -- --help for more info)\n"
222 << " rewrite-crush [-- options] add a rewrite commit to the store\n"
223 << " (rewrite-crush -- --help for more info)\n"
7c673cae
FG
224 << " rebuild rebuild store\n"
225 << " (rebuild -- --help for more info)\n"
226 << std::endl;
227 std::cerr << d << std::endl;
228 std::cerr
229 << "\nPlease Note:\n"
230 << "* Ceph-specific options should be in the format --option-name=VAL\n"
231 << " (specifically, do not forget the '='!!)\n"
232 << "* Command-specific options need to be passed after a '--'\n"
233 << " e.g., 'get monmap -- --version 10 --out /tmp/foo'"
234 << std::endl;
235}
236
237int update_osdmap(MonitorDBStore& store, version_t ver, bool copy,
11fdf7f2 238 std::shared_ptr<CrushWrapper> crush,
7c673cae
FG
239 MonitorDBStore::Transaction* t) {
240 const string prefix("osdmap");
241
242 // full
243 bufferlist bl;
244 int r = 0;
245 r = store.get(prefix, store.combine_strings("full", ver), bl);
246 if (r) {
247 std::cerr << "Error getting full map: " << cpp_strerror(r) << std::endl;
248 return r;
249 }
250 OSDMap osdmap;
251 osdmap.decode(bl);
252 osdmap.crush = crush;
253 if (copy) {
254 osdmap.inc_epoch();
255 }
256 bl.clear();
257 // be consistent with OSDMonitor::update_from_paxos()
258 osdmap.encode(bl, CEPH_FEATURES_ALL|CEPH_FEATURE_RESERVED);
259 t->put(prefix, store.combine_strings("full", osdmap.get_epoch()), bl);
260
261 // incremental
262 OSDMap::Incremental inc;
263 if (copy) {
264 inc.epoch = osdmap.get_epoch();
265 inc.fsid = osdmap.get_fsid();
266 } else {
267 bl.clear();
268 r = store.get(prefix, ver, bl);
269 if (r) {
270 std::cerr << "Error getting inc map: " << cpp_strerror(r) << std::endl;
271 return r;
272 }
273 OSDMap::Incremental inc(bl);
274 if (inc.crush.length()) {
275 inc.crush.clear();
276 crush->encode(inc.crush, CEPH_FEATURES_SUPPORTED_DEFAULT);
277 }
278 if (inc.fullmap.length()) {
279 OSDMap fullmap;
280 fullmap.decode(inc.fullmap);
281 fullmap.crush = crush;
282 inc.fullmap.clear();
283 fullmap.encode(inc.fullmap);
284 }
285 }
11fdf7f2 286 ceph_assert(osdmap.have_crc());
7c673cae
FG
287 inc.full_crc = osdmap.get_crc();
288 bl.clear();
289 // be consistent with OSDMonitor::update_from_paxos()
290 inc.encode(bl, CEPH_FEATURES_ALL|CEPH_FEATURE_RESERVED);
291 t->put(prefix, inc.epoch, bl);
292 return 0;
293}
294
295int rewrite_transaction(MonitorDBStore& store, int version,
296 const string& crush_file,
297 MonitorDBStore::Transaction* t) {
298 const string prefix("osdmap");
299
300 // calc the known-good epoch
301 version_t last_committed = store.get(prefix, "last_committed");
302 version_t good_version = 0;
303 if (version <= 0) {
304 if (last_committed >= (unsigned)-version) {
305 good_version = last_committed + version;
306 } else {
307 std::cerr << "osdmap-version is less than: -" << last_committed << std::endl;
308 return EINVAL;
309 }
310 } else {
311 good_version = version;
312 }
313 if (good_version >= last_committed) {
314 std::cout << "good epoch is greater or equal to the last committed one: "
315 << good_version << " >= " << last_committed << std::endl;
316 return 0;
317 }
318
319 // load/extract the crush map
320 int r = 0;
11fdf7f2 321 std::shared_ptr<CrushWrapper> crush(new CrushWrapper);
7c673cae
FG
322 if (crush_file.empty()) {
323 bufferlist bl;
324 r = store.get(prefix, store.combine_strings("full", good_version), bl);
325 if (r) {
326 std::cerr << "Error getting map: " << cpp_strerror(r) << std::endl;
327 return r;
328 }
329 OSDMap osdmap;
330 osdmap.decode(bl);
331 crush = osdmap.crush;
332 } else {
333 string err;
334 bufferlist bl;
335 r = bl.read_file(crush_file.c_str(), &err);
336 if (r) {
337 std::cerr << err << ": " << cpp_strerror(r) << std::endl;
338 return r;
339 }
11fdf7f2 340 auto p = bl.cbegin();
7c673cae
FG
341 crush->decode(p);
342 }
343
344 // prepare a transaction to rewrite the epochs
345 // (good_version, last_committed]
346 // with the good crush map.
347 // XXX: may need to break this into several paxos versions?
11fdf7f2 348 ceph_assert(good_version < last_committed);
7c673cae
FG
349 for (version_t v = good_version + 1; v <= last_committed; v++) {
350 cout << "rewriting epoch #" << v << "/" << last_committed << std::endl;
351 r = update_osdmap(store, v, false, crush, t);
352 if (r)
353 return r;
354 }
355
356 // add a new osdmap epoch to store, so monitors will update their current osdmap
357 // in addition to the ones stored in epochs.
358 //
359 // This is needed due to the way the monitor updates from paxos and the
360 // facilities we are leveraging to push this update to the rest of the
361 // quorum.
362 //
363 // In a nutshell, we are generating a good version of the osdmap, with a
364 // proper crush, and building a transaction that will replace the bad
365 // osdmaps with good osdmaps. But this transaction needs to be applied on
366 // all nodes, so that the monitors will have good osdmaps to share with
367 // clients. We thus leverage Paxos, specifically the recovery mechanism, by
368 // creating a pending value that will be committed once the monitors form an
369 // initial quorum after being brought back to life.
370 //
371 // However, the way the monitor works has the paxos services, including the
372 // OSDMonitor, updating their state from disk *prior* to the recovery phase
373 // begins (so they have an up to date state in memory). This means the
374 // OSDMonitor will see the old, broken map, before the new paxos version is
375 // applied to disk, and the old version is cached. Even though we have the
376 // good map now, and we share the good map with clients, we will still be
377 // working on the old broken map. Instead of mucking around the monitor to
378 // make this work, we instead opt for adding the same osdmap but with a
379 // newer version, so that the OSDMonitor picks up on it when it updates from
380 // paxos after the proposal has been committed. This is not elegant, but
381 // avoids further unpleasantness that would arise from kludging around the
382 // current behavior. Also, has the added benefit of making sure the clients
383 // get an updated version of the map (because last_committed+1 >
384 // last_committed) :)
385 //
386 cout << "adding a new epoch #" << last_committed+1 << std::endl;
387 r = update_osdmap(store, last_committed++, true, crush, t);
388 if (r)
389 return r;
390 t->put(prefix, store.combine_strings("full", "latest"), last_committed);
391 t->put(prefix, "last_committed", last_committed);
392 return 0;
393}
394
395/**
396 * create a new paxos version which carries a proposal to rewrite all epochs
397 * of incremental and full map of "osdmap" after a faulty crush map is injected.
398 * so the leader will trigger a recovery and propagate this fix to its peons,
399 * after the proposal is accepted, and the transaction in it is applied. all
400 * monitors will rewrite the bad crush map with the good one, and have a new
401 * osdmap epoch with the good crush map in it.
402 */
403int rewrite_crush(const char* progname,
404 vector<string>& subcmds,
405 MonitorDBStore& store) {
406 po::options_description op_desc("Allowed 'rewrite-crush' options");
407 int version = -1;
408 string crush_file;
409 op_desc.add_options()
410 ("help,h", "produce this help message")
411 ("crush", po::value<string>(&crush_file),
412 ("path to the crush map file "
413 "(default: will instead extract it from the known-good osdmap)"))
414 ("good-epoch", po::value<int>(&version),
415 "known-good epoch of osdmap, if a negative number '-N' is given, the "
416 "$last_committed-N is used instead (default: -1). "
417 "Please note, -1 is not necessarily a good epoch, because there are "
418 "good chance that we have more epochs slipped into the monstore after "
419 "the one where the crushmap is firstly injected.")
420 ;
421 po::variables_map op_vm;
422 int r = parse_cmd_args(&op_desc, NULL, NULL, subcmds, &op_vm);
423 if (r) {
424 return -r;
425 }
426 if (op_vm.count("help")) {
427 usage(progname, op_desc);
428 return 0;
429 }
430
431 MonitorDBStore::Transaction rewrite_txn;
432 r = rewrite_transaction(store, version, crush_file, &rewrite_txn);
433 if (r) {
434 return r;
435 }
436
437 // store the transaction into store as a proposal
438 const string prefix("paxos");
439 version_t pending_v = store.get(prefix, "last_committed") + 1;
440 auto t(std::make_shared<MonitorDBStore::Transaction>());
441 bufferlist bl;
442 rewrite_txn.encode(bl);
443 cout << "adding pending commit " << pending_v
444 << " " << bl.length() << " bytes" << std::endl;
445 t->put(prefix, pending_v, bl);
446 t->put(prefix, "pending_v", pending_v);
447 // a large enough yet unique proposal number will probably do the trick
448 version_t pending_pn = (store.get(prefix, "accepted_pn") / 100 + 4) * 100 + 1;
449 t->put(prefix, "pending_pn", pending_pn);
450 store.apply_transaction(t);
451 return 0;
452}
453
7c673cae
FG
454static int update_auth(MonitorDBStore& st, const string& keyring_path)
455{
456 // import all keyrings stored in the keyring file
457 KeyRing keyring;
458 int r = keyring.load(g_ceph_context, keyring_path);
459 if (r < 0) {
460 cerr << "unable to load admin keyring: " << keyring_path << std::endl;
461 return r;
462 }
463
464 bufferlist bl;
465 __u8 v = 1;
11fdf7f2 466 encode(v, bl);
7c673cae
FG
467
468 for (const auto& k : keyring.get_keys()) {
469 KeyServerData::Incremental auth_inc;
470 auth_inc.name = k.first;
471 auth_inc.auth = k.second;
472 if (auth_inc.auth.caps.empty()) {
473 cerr << "no caps granted to: " << auth_inc.name << std::endl;
474 return -EINVAL;
475 }
9f95a23c
TL
476 map<string,string> caps;
477 std::transform(begin(auth_inc.auth.caps), end(auth_inc.auth.caps),
478 inserter(caps, end(caps)),
479 [](auto& cap) {
480 string c;
481 auto p = cap.second.cbegin();
482 decode(c, p);
483 return make_pair(cap.first, c);
484 });
485 cout << "adding auth for '"
486 << auth_inc.name << "': " << auth_inc.auth
487 << " with caps(" << caps << ")" << std::endl;
7c673cae
FG
488 auth_inc.op = KeyServerData::AUTH_INC_ADD;
489
490 AuthMonitor::Incremental inc;
491 inc.inc_type = AuthMonitor::AUTH_DATA;
11fdf7f2 492 encode(auth_inc, inc.auth_data);
7c673cae 493 inc.auth_type = CEPH_AUTH_CEPHX;
7c673cae
FG
494 inc.encode(bl, CEPH_FEATURES_ALL);
495 }
496
497 const string prefix("auth");
498 auto last_committed = st.get(prefix, "last_committed") + 1;
499 auto t = make_shared<MonitorDBStore::Transaction>();
500 t->put(prefix, last_committed, bl);
501 t->put(prefix, "last_committed", last_committed);
502 auto first_committed = st.get(prefix, "first_committed");
503 if (!first_committed) {
504 t->put(prefix, "first_committed", last_committed);
505 }
506 st.apply_transaction(t);
507 return 0;
508}
509
92f5a8d4
TL
510static int update_mkfs(MonitorDBStore& st,
511 const string& monmap_path,
512 const vector<string>& mon_ids)
7c673cae
FG
513{
514 MonMap monmap;
11fdf7f2
TL
515 if (!monmap_path.empty()) {
516 cout << __func__ << " pulling initial monmap from " << monmap_path << std::endl;
517 bufferlist bl;
518 string err;
519 int r = bl.read_file(monmap_path.c_str(), &err);
520 if (r < 0) {
521 cerr << "failed to read monmap from " << monmap_path << ": "
522 << cpp_strerror(r) << std::endl;
523 return r;
524 }
525 monmap.decode(bl);
526 } else {
527 cout << __func__ << " generating seed initial monmap" << std::endl;
528 int r = monmap.build_initial(g_ceph_context, true, cerr);
529 if (r) {
530 cerr << "no initial monitors" << std::endl;
531 return -EINVAL;
532 }
92f5a8d4
TL
533 vector<string> new_names;
534 if (!mon_ids.empty()) {
535 if (mon_ids.size() != monmap.size()) {
536 cerr << "Please pass the same number of <mon-ids> to name the hosts "
537 << "listed in 'mon_host'. "
538 << mon_ids.size() << " mon-id(s) specified, "
539 << "while you have " << monmap.size() << " mon hosts." << std::endl;
540 return -EINVAL;
541 }
542 new_names = mon_ids;
543 } else {
544 for (unsigned rank = 0; rank < monmap.size(); rank++) {
545 string new_name{"a"};
546 new_name[0] += rank;
547 new_names.push_back(std::move(new_name));
548 }
549 }
550 for (unsigned rank = 0; rank < monmap.size(); rank++) {
551 auto name = monmap.get_name(rank);
552 if (name.compare(0, 7, "noname-") == 0) {
553 monmap.rename(name, new_names[rank]);
554 }
555 }
7c673cae 556 }
11fdf7f2 557 monmap.print(cout);
7c673cae
FG
558 bufferlist bl;
559 monmap.encode(bl, CEPH_FEATURES_ALL);
560 monmap.set_epoch(0);
561 auto t = make_shared<MonitorDBStore::Transaction>();
562 t->put("mkfs", "monmap", bl);
563 st.apply_transaction(t);
564 return 0;
565}
566
567static int update_monitor(MonitorDBStore& st)
568{
569 const string prefix("monitor");
570 // a stripped-down Monitor::mkfs()
571 bufferlist bl;
572 bl.append(CEPH_MON_ONDISK_MAGIC "\n");
573 auto t = make_shared<MonitorDBStore::Transaction>();
574 t->put(prefix, "magic", bl);
575 st.apply_transaction(t);
576 return 0;
577}
578
a8e16298
TL
579// rebuild
580// - creating_pgs
581static int update_creating_pgs(MonitorDBStore& st)
582{
583 bufferlist bl;
584 auto last_osdmap_epoch = st.get("osdmap", "last_committed");
585 int r = st.get("osdmap", st.combine_strings("full", last_osdmap_epoch), bl);
586 if (r < 0) {
9f95a23c 587 cerr << "unable to load osdmap e" << last_osdmap_epoch << std::endl;
a8e16298
TL
588 return r;
589 }
590
591 OSDMap osdmap;
592 osdmap.decode(bl);
593 creating_pgs_t creating;
594 for (auto& i : osdmap.get_pools()) {
595 creating.created_pools.insert(i.first);
596 }
597 creating.last_scan_epoch = last_osdmap_epoch;
598
599 bufferlist newbl;
9f95a23c 600 encode(creating, newbl, CEPH_FEATURES_ALL);
a8e16298
TL
601
602 auto t = make_shared<MonitorDBStore::Transaction>();
603 t->put("osd_pg_creating", "creating", newbl);
604 st.apply_transaction(t);
605 return 0;
606}
607
b32b8144
FG
608// rebuild
609// - mgr
610// - mgr_command_desc
3efd9988
FG
611static int update_mgrmap(MonitorDBStore& st)
612{
613 auto t = make_shared<MonitorDBStore::Transaction>();
614
615 {
616 MgrMap map;
617 // mgr expects epoch > 1
618 map.epoch++;
619 auto initial_modules =
11fdf7f2 620 get_str_vec(g_ceph_context->_conf.get_val<string>("mgr_initial_modules"));
3efd9988
FG
621 copy(begin(initial_modules),
622 end(initial_modules),
623 inserter(map.modules, end(map.modules)));
624 bufferlist bl;
625 map.encode(bl, CEPH_FEATURES_ALL);
626 t->put("mgr", map.epoch, bl);
627 t->put("mgr", "last_committed", map.epoch);
628 }
629 {
630 auto mgr_command_descs = mgr_commands;
631 for (auto& c : mgr_command_descs) {
632 c.set_flag(MonCommand::FLAG_MGR);
633 }
634 bufferlist bl;
11fdf7f2 635 encode(mgr_command_descs, bl);
92f5a8d4 636 t->put("mgr_command_descs", "", bl);
3efd9988
FG
637 }
638 return st.apply_transaction(t);
639}
640
7c673cae
FG
641static int update_paxos(MonitorDBStore& st)
642{
643 // build a pending paxos proposal from all non-permanent k/v pairs. once the
644 // proposal is committed, it will gets applied. on the sync provider side, it
645 // will be a no-op, but on its peers, the paxos commit will help to build up
646 // the necessary epochs.
647 bufferlist pending_proposal;
648 {
649 MonitorDBStore::Transaction t;
650 vector<string> prefixes = {"auth", "osdmap",
a8e16298 651 "mgr", "mgr_command_desc"};
7c673cae
FG
652 for (const auto& prefix : prefixes) {
653 for (auto i = st.get_iterator(prefix); i->valid(); i->next()) {
654 auto key = i->raw_key();
655 auto val = i->value();
656 t.put(key.first, key.second, val);
657 }
658 }
659 t.encode(pending_proposal);
660 }
661 const string prefix("paxos");
662 auto t = make_shared<MonitorDBStore::Transaction>();
663 t->put(prefix, "first_committed", 0);
664 t->put(prefix, "last_committed", 0);
665 auto pending_v = 1;
666 t->put(prefix, pending_v, pending_proposal);
667 t->put(prefix, "pending_v", pending_v);
668 t->put(prefix, "pending_pn", 400);
669 st.apply_transaction(t);
670 return 0;
671}
672
7c673cae
FG
673int rebuild_monstore(const char* progname,
674 vector<string>& subcmds,
675 MonitorDBStore& st)
676{
677 po::options_description op_desc("Allowed 'rebuild' options");
678 string keyring_path;
11fdf7f2 679 string monmap_path;
92f5a8d4 680 vector<string> mon_ids;
7c673cae
FG
681 op_desc.add_options()
682 ("keyring", po::value<string>(&keyring_path),
11fdf7f2
TL
683 "path to the client.admin key")
684 ("monmap", po::value<string>(&monmap_path),
92f5a8d4
TL
685 "path to the initial monmap")
686 ("mon-ids", po::value<vector<string>>(&mon_ids)->multitoken(),
687 "mon ids, use 'a', 'b', ... if not specified");
688 po::positional_options_description pos_desc;
689 pos_desc.add("mon-ids", -1);
7c673cae 690 po::variables_map op_vm;
92f5a8d4 691 int r = parse_cmd_args(&op_desc, nullptr, &pos_desc, subcmds, &op_vm);
7c673cae
FG
692 if (r) {
693 return -r;
694 }
695 if (op_vm.count("help")) {
696 usage(progname, op_desc);
697 return 0;
698 }
699 if (!keyring_path.empty())
700 update_auth(st, keyring_path);
a8e16298 701 if ((r = update_creating_pgs(st))) {
7c673cae
FG
702 return r;
703 }
b32b8144
FG
704 if ((r = update_mgrmap(st))) {
705 return r;
706 }
7c673cae
FG
707 if ((r = update_paxos(st))) {
708 return r;
709 }
92f5a8d4 710 if ((r = update_mkfs(st, monmap_path, mon_ids))) {
7c673cae
FG
711 return r;
712 }
713 if ((r = update_monitor(st))) {
714 return r;
715 }
716 return 0;
717}
718
719int main(int argc, char **argv) {
720 int err = 0;
721 po::options_description desc("Allowed options");
722 string store_path, cmd;
723 vector<string> subcmds;
724 desc.add_options()
725 ("help,h", "produce help message")
726 ;
727
728 /* Dear Future Developer:
729 *
730 * for further improvement, should you need to pass specific options to
731 * a command (e.g., get osdmap VER --hex), you can expand the current
732 * format by creating additional 'po::option_description' and passing
733 * 'subcmds' to 'po::command_line_parser', much like what is currently
734 * done by default. However, beware: in order to differentiate a
735 * command-specific option from the generic/global options, you will need
736 * to pass '--' in the command line (so that the first parser, the one
737 * below, assumes it has reached the end of all options); e.g.,
738 * 'get osdmap VER -- --hex'. Not pretty; far from intuitive; it was as
739 * far as I got with this library. Improvements on this format will be
740 * left as an excercise for the reader. -Joao
741 */
742 po::options_description positional_desc("Positional argument options");
743 positional_desc.add_options()
744 ("store-path", po::value<string>(&store_path),
745 "path to monitor's store")
746 ("command", po::value<string>(&cmd),
747 "Command")
748 ("subcmd", po::value<vector<string> >(&subcmds),
749 "Command arguments/Sub-Commands")
750 ;
751 po::positional_options_description positional;
752 positional.add("store-path", 1);
753 positional.add("command", 1);
754 positional.add("subcmd", -1);
755
756 po::options_description all_desc("All options");
757 all_desc.add(desc).add(positional_desc);
758
759 vector<string> ceph_option_strings;
760 po::variables_map vm;
761 try {
762 po::parsed_options parsed =
763 po::command_line_parser(argc, argv).
764 options(all_desc).
765 positional(positional).
766 allow_unregistered().run();
767
768 po::store(
769 parsed,
770 vm);
771 po::notify(vm);
772
773 // Specifying po::include_positional would have our positional arguments
774 // being collected (thus being part of ceph_option_strings and eventually
775 // passed on to global_init() below).
776 // Instead we specify po::exclude_positional, which has the upside of
777 // completely avoid this, but the downside of having to specify ceph
778 // options as --VAR=VAL (note the '='); otherwise we will capture the
779 // positional 'VAL' as belonging to us, never being collected.
780 ceph_option_strings = po::collect_unrecognized(parsed.options,
781 po::exclude_positional);
782
783 } catch(po::error &e) {
784 std::cerr << "error: " << e.what() << std::endl;
785 return 1;
786 }
787
788 // parse command structure before calling global_init() and friends.
789
790 if (vm.empty() || vm.count("help") ||
791 store_path.empty() || cmd.empty() ||
792 *cmd.begin() == '-') {
793 usage(argv[0], desc);
794 return 1;
795 }
796
11fdf7f2 797 vector<const char *> ceph_options;
7c673cae
FG
798 ceph_options.reserve(ceph_option_strings.size());
799 for (vector<string>::iterator i = ceph_option_strings.begin();
800 i != ceph_option_strings.end();
801 ++i) {
802 ceph_options.push_back(i->c_str());
803 }
804
805 auto cct = global_init(
11fdf7f2
TL
806 NULL, ceph_options, CEPH_ENTITY_TYPE_MON,
807 CODE_ENVIRONMENT_UTILITY,
808 CINIT_FLAG_NO_MON_CONFIG);
7c673cae 809 common_init_finish(g_ceph_context);
11fdf7f2 810 cct->_conf.apply_changes(nullptr);
7c673cae
FG
811
812 // this is where we'll write *whatever*, on a per-command basis.
813 // not all commands require some place to write their things.
814 MonitorDBStore st(store_path);
815 if (store_path.size()) {
816 stringstream ss;
817 int r = st.open(ss);
818 if (r < 0) {
819 std::cerr << ss.str() << std::endl;
820 return EINVAL;
821 }
822 }
823
824 if (cmd == "dump-keys") {
825 KeyValueDB::WholeSpaceIterator iter = st.get_iterator();
826 while (iter->valid()) {
827 pair<string,string> key(iter->raw_key());
828 cout << key.first << " / " << key.second << std::endl;
829 iter->next();
830 }
831 } else if (cmd == "compact") {
832 st.compact();
833 } else if (cmd == "get") {
834 unsigned v = 0;
835 string outpath;
7c673cae
FG
836 string map_type;
837 // visible options for this command
838 po::options_description op_desc("Allowed 'get' options");
839 op_desc.add_options()
840 ("help,h", "produce this help message")
841 ("out,o", po::value<string>(&outpath),
842 "output file (default: stdout)")
843 ("version,v", po::value<unsigned>(&v),
844 "map version to obtain")
9f95a23c 845 ("readable,r", "print the map information in human readable format")
7c673cae
FG
846 ;
847 // this is going to be a positional argument; we don't want to show
848 // it as an option during --help, but we do want to have it captured
849 // when parsing.
850 po::options_description hidden_op_desc("Hidden 'get' options");
851 hidden_op_desc.add_options()
852 ("map-type", po::value<string>(&map_type),
853 "map-type")
854 ;
855 po::positional_options_description op_positional;
856 op_positional.add("map-type", 1);
857
858 po::variables_map op_vm;
859 int r = parse_cmd_args(&op_desc, &hidden_op_desc, &op_positional,
860 subcmds, &op_vm);
861 if (r < 0) {
862 err = -r;
863 goto done;
864 }
865
866 if (op_vm.count("help") || map_type.empty()) {
867 usage(argv[0], op_desc);
868 err = 0;
869 goto done;
870 }
871
872 if (v == 0) {
873 if (map_type == "crushmap") {
874 v = st.get("osdmap", "last_committed");
875 } else {
876 v = st.get(map_type, "last_committed");
877 }
878 }
879
880 int fd = STDOUT_FILENO;
881 if (!outpath.empty()){
882 fd = ::open(outpath.c_str(), O_WRONLY|O_CREAT|O_TRUNC, 0666);
883 if (fd < 0) {
884 std::cerr << "error opening output file: "
885 << cpp_strerror(errno) << std::endl;
886 err = EINVAL;
887 goto done;
888 }
889 }
890
891 BOOST_SCOPE_EXIT((&r) (&fd) (&outpath)) {
892 ::close(fd);
893 if (r < 0 && fd != STDOUT_FILENO) {
894 ::remove(outpath.c_str());
895 }
896 } BOOST_SCOPE_EXIT_END
897
898 bufferlist bl;
899 r = 0;
900 if (map_type == "osdmap") {
901 r = st.get(map_type, st.combine_strings("full", v), bl);
902 } else if (map_type == "crushmap") {
903 bufferlist tmp;
904 r = st.get("osdmap", st.combine_strings("full", v), tmp);
905 if (r >= 0) {
906 OSDMap osdmap;
907 osdmap.decode(tmp);
908 osdmap.crush->encode(bl, CEPH_FEATURES_SUPPORTED_DEFAULT);
909 }
910 } else {
911 r = st.get(map_type, v, bl);
912 }
913 if (r < 0) {
914 std::cerr << "Error getting map: " << cpp_strerror(r) << std::endl;
915 err = EINVAL;
916 goto done;
917 }
918
9f95a23c 919 if (op_vm.count("readable")) {
7c673cae
FG
920 stringstream ss;
921 bufferlist out;
b32b8144
FG
922 try {
923 if (map_type == "monmap") {
924 MonMap monmap;
925 monmap.decode(bl);
926 monmap.print(ss);
927 } else if (map_type == "osdmap") {
928 OSDMap osdmap;
929 osdmap.decode(bl);
930 osdmap.print(ss);
931 } else if (map_type == "mdsmap") {
932 FSMap fs_map;
933 fs_map.decode(bl);
934 fs_map.print(ss);
935 } else if (map_type == "mgr") {
936 MgrMap mgr_map;
11fdf7f2 937 auto p = bl.cbegin();
b32b8144
FG
938 mgr_map.decode(p);
939 JSONFormatter f;
940 f.dump_object("mgrmap", mgr_map);
941 f.flush(ss);
942 } else if (map_type == "crushmap") {
943 CrushWrapper cw;
11fdf7f2 944 auto it = bl.cbegin();
b32b8144
FG
945 cw.decode(it);
946 CrushCompiler cc(cw, std::cerr, 0);
947 cc.decompile(ss);
948 } else {
949 std::cerr << "This type of readable map does not exist: " << map_type
950 << std::endl << "You can only specify[osdmap|monmap|mdsmap"
951 "|crushmap|mgr]" << std::endl;
952 }
953 } catch (const buffer::error &err) {
954 std::cerr << "Could not decode for human readable output (you may still"
955 " use non-readable mode). Detail: " << err << std::endl;
7c673cae 956 }
b32b8144 957
7c673cae
FG
958 out.append(ss);
959 out.write_fd(fd);
960 } else {
961 bl.write_fd(fd);
962 }
963
964 if (!outpath.empty()) {
965 std::cout << "wrote " << map_type
966 << " version " << v << " to " << outpath
967 << std::endl;
968 }
969 } else if (cmd == "show-versions") {
970 string map_type; //map type:osdmap,monmap...
971 // visible options for this command
972 po::options_description op_desc("Allowed 'show-versions' options");
973 op_desc.add_options()
974 ("help,h", "produce this help message")
975 ("map-type", po::value<string>(&map_type), "map_type");
976
977 po::positional_options_description op_positional;
978 op_positional.add("map-type", 1);
979
980 po::variables_map op_vm;
981 int r = parse_cmd_args(&op_desc, NULL, &op_positional,
982 subcmds, &op_vm);
983 if (r < 0) {
984 err = -r;
985 goto done;
986 }
987
988 if (op_vm.count("help") || map_type.empty()) {
989 usage(argv[0], op_desc);
990 err = 0;
991 goto done;
992 }
993
994 unsigned int v_first = 0;
995 unsigned int v_last = 0;
996 v_first = st.get(map_type, "first_committed");
997 v_last = st.get(map_type, "last_committed");
998
999 std::cout << "first committed:\t" << v_first << "\n"
1000 << "last committed:\t" << v_last << std::endl;
1001 } else if (cmd == "dump-paxos") {
1002 unsigned dstart = 0;
1003 unsigned dstop = ~0;
1004 po::options_description op_desc("Allowed 'dump-paxos' options");
1005 op_desc.add_options()
1006 ("help,h", "produce this help message")
1007 ("start,s", po::value<unsigned>(&dstart),
1008 "starting version (default: 0)")
1009 ("end,e", po::value<unsigned>(&dstop),
1010 "finish version (default: ~0)")
1011 ;
1012
1013 po::variables_map op_vm;
1014 int r = parse_cmd_args(&op_desc, NULL, NULL,
1015 subcmds, &op_vm);
1016 if (r < 0) {
1017 err = -r;
1018 goto done;
1019 }
1020
1021 if (op_vm.count("help")) {
1022 usage(argv[0], op_desc);
1023 err = 0;
1024 goto done;
1025 }
1026
1027 if (dstart > dstop) {
1028 std::cerr << "error: 'start' version (value: " << dstart << ") "
1029 << " is greater than 'end' version (value: " << dstop << ")"
1030 << std::endl;
1031 err = EINVAL;
1032 goto done;
1033 }
1034
1035 version_t v = dstart;
1036 for (; v <= dstop; ++v) {
1037 bufferlist bl;
1038 st.get("paxos", v, bl);
1039 if (bl.length() == 0)
1040 break;
1041 cout << "\n--- " << v << " ---" << std::endl;
1042 auto tx(std::make_shared<MonitorDBStore::Transaction>());
1043 Paxos::decode_append_transaction(tx, bl);
1044 JSONFormatter f(true);
1045 tx->dump(&f);
1046 f.flush(cout);
1047 }
1048
1049 std::cout << "dumped " << v << " paxos versions" << std::endl;
1050
1051 } else if (cmd == "dump-trace") {
1052 unsigned dstart = 0;
1053 unsigned dstop = ~0;
1054 string outpath;
1055
1056 // visible options for this command
1057 po::options_description op_desc("Allowed 'dump-trace' options");
1058 op_desc.add_options()
1059 ("help,h", "produce this help message")
1060 ("start,s", po::value<unsigned>(&dstart),
1061 "starting version (default: 0)")
1062 ("end,e", po::value<unsigned>(&dstop),
1063 "finish version (default: ~0)")
1064 ;
1065 // this is going to be a positional argument; we don't want to show
1066 // it as an option during --help, but we do want to have it captured
1067 // when parsing.
1068 po::options_description hidden_op_desc("Hidden 'dump-trace' options");
1069 hidden_op_desc.add_options()
1070 ("out,o", po::value<string>(&outpath),
1071 "file to write the dump to")
1072 ;
1073 po::positional_options_description op_positional;
1074 op_positional.add("out", 1);
1075
1076 po::variables_map op_vm;
1077 int r = parse_cmd_args(&op_desc, &hidden_op_desc, &op_positional,
1078 subcmds, &op_vm);
1079 if (r < 0) {
1080 err = -r;
1081 goto done;
1082 }
1083
1084 if (op_vm.count("help")) {
1085 usage(argv[0], op_desc);
1086 err = 0;
1087 goto done;
1088 }
1089
1090 if (outpath.empty()) {
1091 usage(argv[0], op_desc);
1092 err = EINVAL;
1093 goto done;
1094 }
1095
1096 if (dstart > dstop) {
1097 std::cerr << "error: 'start' version (value: " << dstart << ") "
1098 << " is greater than 'stop' version (value: " << dstop << ")"
1099 << std::endl;
1100 err = EINVAL;
1101 goto done;
1102 }
1103
1104 TraceIter iter(outpath.c_str());
1105 iter.init();
1106 while (true) {
1107 if (!iter.valid())
1108 break;
1109 if (iter.num() >= dstop) {
1110 break;
1111 }
1112 if (iter.num() >= dstart) {
1113 JSONFormatter f(true);
1114 iter.cur()->dump(&f, false);
1115 f.flush(std::cout);
1116 std::cout << std::endl;
1117 }
1118 iter.next();
1119 }
1120 std::cerr << "Read up to transaction " << iter.num() << std::endl;
1121 } else if (cmd == "replay-trace") {
1122 string inpath;
1123 unsigned num_replays = 1;
1124 // visible options for this command
1125 po::options_description op_desc("Allowed 'replay-trace' options");
1126 op_desc.add_options()
1127 ("help,h", "produce this help message")
1128 ("num-replays,n", po::value<unsigned>(&num_replays),
1129 "finish version (default: 1)")
1130 ;
1131 // this is going to be a positional argument; we don't want to show
1132 // it as an option during --help, but we do want to have it captured
1133 // when parsing.
1134 po::options_description hidden_op_desc("Hidden 'replay-trace' options");
1135 hidden_op_desc.add_options()
1136 ("in,i", po::value<string>(&inpath),
1137 "file to write the dump to")
1138 ;
1139 po::positional_options_description op_positional;
1140 op_positional.add("in", 1);
1141
1142 // op_desc_all will aggregate all visible and hidden options for parsing.
1143 // when we call 'usage()' we just pass 'op_desc', as that's the description
1144 // holding the visible options.
1145 po::options_description op_desc_all;
1146 op_desc_all.add(op_desc).add(hidden_op_desc);
1147
1148 po::variables_map op_vm;
1149 try {
1150 po::parsed_options op_parsed = po::command_line_parser(subcmds).
1151 options(op_desc_all).positional(op_positional).run();
1152 po::store(op_parsed, op_vm);
1153 po::notify(op_vm);
1154 } catch (po::error &e) {
1155 std::cerr << "error: " << e.what() << std::endl;
1156 err = EINVAL;
1157 goto done;
1158 }
1159
1160 if (op_vm.count("help")) {
1161 usage(argv[0], op_desc);
1162 err = 0;
1163 goto done;
1164 }
1165
1166 if (inpath.empty()) {
1167 usage(argv[0], op_desc);
1168 err = EINVAL;
1169 goto done;
1170 }
1171
1172 unsigned num = 0;
1173 for (unsigned i = 0; i < num_replays; ++i) {
1174 TraceIter iter(inpath.c_str());
1175 iter.init();
1176 while (true) {
1177 if (!iter.valid())
1178 break;
1179 std::cerr << "Replaying trans num " << num << std::endl;
1180 st.apply_transaction(iter.cur());
1181 iter.next();
1182 ++num;
1183 }
1184 std::cerr << "Read up to transaction " << iter.num() << std::endl;
1185 }
1186 } else if (cmd == "random-gen") {
1187 unsigned tsize = 200;
1188 unsigned tvalsize = 1024;
1189 unsigned ntrans = 100;
1190 po::options_description op_desc("Allowed 'random-gen' options");
1191 op_desc.add_options()
1192 ("help,h", "produce this help message")
1193 ("num-keys,k", po::value<unsigned>(&tsize),
1194 "keys to write in each transaction (default: 200)")
1195 ("size,s", po::value<unsigned>(&tvalsize),
1196 "size (in bytes) of the value to write in each key (default: 1024)")
1197 ("ntrans,n", po::value<unsigned>(&ntrans),
1198 "number of transactions to run (default: 100)")
1199 ;
1200
1201 po::variables_map op_vm;
1202 try {
1203 po::parsed_options op_parsed = po::command_line_parser(subcmds).
1204 options(op_desc).run();
1205 po::store(op_parsed, op_vm);
1206 po::notify(op_vm);
1207 } catch (po::error &e) {
1208 std::cerr << "error: " << e.what() << std::endl;
1209 err = EINVAL;
1210 goto done;
1211 }
1212
1213 if (op_vm.count("help")) {
1214 usage(argv[0], op_desc);
1215 err = 0;
1216 goto done;
1217 }
1218
1219 unsigned num = 0;
1220 for (unsigned i = 0; i < ntrans; ++i) {
1221 std::cerr << "Applying trans " << i << std::endl;
1222 auto t(std::make_shared<MonitorDBStore::Transaction>());
1223 string prefix;
1224 prefix.push_back((i%26)+'a');
1225 for (unsigned j = 0; j < tsize; ++j) {
1226 stringstream os;
1227 os << num;
1228 bufferlist bl;
1229 for (unsigned k = 0; k < tvalsize; ++k) bl.append(rand());
1230 t->put(prefix, os.str(), bl);
1231 ++num;
1232 }
1233 t->compact_prefix(prefix);
1234 st.apply_transaction(t);
1235 }
1236 } else if (cmd == "store-copy") {
1237 if (subcmds.size() < 1 || subcmds[0].empty()) {
1238 usage(argv[0], desc);
1239 err = EINVAL;
1240 goto done;
1241 }
1242
1243 string out_path = subcmds[0];
1244
1245 MonitorDBStore out_store(out_path);
1246 {
1247 stringstream ss;
1248 int r = out_store.create_and_open(ss);
1249 if (r < 0) {
1250 std::cerr << ss.str() << std::endl;
1251 goto done;
1252 }
1253 }
1254
1255
1256 KeyValueDB::WholeSpaceIterator it = st.get_iterator();
1257 uint64_t total_keys = 0;
1258 uint64_t total_size = 0;
1259 uint64_t total_tx = 0;
1260
1261 do {
1262 uint64_t num_keys = 0;
1263
1264 auto tx(std::make_shared<MonitorDBStore::Transaction>());
1265
1266 while (it->valid() && num_keys < 128) {
1267 pair<string,string> k = it->raw_key();
1268 bufferlist v = it->value();
1269 tx->put(k.first, k.second, v);
1270
1271 num_keys ++;
1272 total_tx ++;
1273 total_size += v.length();
1274
1275 it->next();
1276 }
1277
1278 total_keys += num_keys;
1279
1280 if (!tx->empty())
1281 out_store.apply_transaction(tx);
1282
1283 std::cout << "copied " << total_keys << " keys so far ("
1adf2230 1284 << stringify(byte_u_t(total_size)) << ")" << std::endl;
7c673cae
FG
1285
1286 } while (it->valid());
1287 out_store.close();
1288 std::cout << "summary: copied " << total_keys << " keys, using "
1289 << total_tx << " transactions, totalling "
1adf2230 1290 << stringify(byte_u_t(total_size)) << std::endl;
7c673cae
FG
1291 std::cout << "from '" << store_path << "' to '" << out_path << "'"
1292 << std::endl;
1293 } else if (cmd == "rewrite-crush") {
1294 err = rewrite_crush(argv[0], subcmds, st);
7c673cae
FG
1295 } else if (cmd == "rebuild") {
1296 err = rebuild_monstore(argv[0], subcmds, st);
1297 } else {
1298 std::cerr << "Unrecognized command: " << cmd << std::endl;
1299 usage(argv[0], desc);
1300 goto done;
1301 }
1302
1303 done:
1304 st.close();
1305 return err;
1306}