]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
2 | // vim: ts=8 sw=2 smarttab | |
3 | /* | |
4 | * Ceph - scalable distributed file system | |
5 | * | |
6 | * Copyright (C) 2012 Inktank, Inc. | |
7 | * | |
8 | * This is free software; you can redistribute it and/or | |
9 | * modify it under the terms of the GNU Lesser General Public | |
10 | * License version 2.1, as published by the Free Software | |
11 | * Foundation. See file COPYING. | |
12 | */ | |
13 | #include <boost/program_options/variables_map.hpp> | |
14 | #include <boost/program_options/parsers.hpp> | |
15 | #include <boost/scope_exit.hpp> | |
16 | ||
17 | #include <stdlib.h> | |
18 | #include <string> | |
19 | ||
20 | #include "common/Formatter.h" | |
21 | #include "common/errno.h" | |
22 | ||
23 | #include "auth/KeyRing.h" | |
24 | #include "auth/cephx/CephxKeyServer.h" | |
25 | #include "global/global_init.h" | |
20effc67 | 26 | #include "include/scope_guard.h" |
7c673cae | 27 | #include "include/stringify.h" |
3efd9988 | 28 | #include "mgr/mgr_commands.h" |
7c673cae FG |
29 | #include "mon/AuthMonitor.h" |
30 | #include "mon/MonitorDBStore.h" | |
31 | #include "mon/Paxos.h" | |
32 | #include "mon/MonMap.h" | |
b32b8144 FG |
33 | #include "mds/FSMap.h" |
34 | #include "mon/MgrMap.h" | |
7c673cae FG |
35 | #include "osd/OSDMap.h" |
36 | #include "crush/CrushCompiler.h" | |
a8e16298 | 37 | #include "mon/CreatingPGs.h" |
7c673cae FG |
38 | |
39 | namespace po = boost::program_options; | |
7c673cae | 40 | |
20effc67 TL |
41 | using namespace std; |
42 | ||
7c673cae FG |
43 | class TraceIter { |
44 | int fd; | |
45 | unsigned idx; | |
46 | MonitorDBStore::TransactionRef t; | |
47 | public: | |
48 | explicit TraceIter(string fname) : fd(-1), idx(-1) { | |
f67539c2 | 49 | fd = ::open(fname.c_str(), O_RDONLY|O_BINARY); |
7c673cae FG |
50 | t.reset(new MonitorDBStore::Transaction); |
51 | } | |
52 | bool valid() { | |
53 | return fd != -1; | |
54 | } | |
55 | MonitorDBStore::TransactionRef cur() { | |
11fdf7f2 | 56 | ceph_assert(valid()); |
7c673cae FG |
57 | return t; |
58 | } | |
59 | unsigned num() { return idx; } | |
60 | void next() { | |
61 | ++idx; | |
62 | bufferlist bl; | |
63 | int r = bl.read_fd(fd, 6); | |
64 | if (r < 0) { | |
65 | std::cerr << "Got error: " << cpp_strerror(r) << " on read_fd" | |
66 | << std::endl; | |
67 | ::close(fd); | |
68 | fd = -1; | |
69 | return; | |
70 | } else if ((unsigned)r < 6) { | |
71 | std::cerr << "short read" << std::endl; | |
72 | ::close(fd); | |
73 | fd = -1; | |
74 | return; | |
75 | } | |
11fdf7f2 | 76 | auto bliter = bl.cbegin(); |
7c673cae | 77 | uint8_t ver, ver2; |
11fdf7f2 TL |
78 | decode(ver, bliter); |
79 | decode(ver2, bliter); | |
7c673cae | 80 | uint32_t len; |
11fdf7f2 | 81 | decode(len, bliter); |
7c673cae FG |
82 | r = bl.read_fd(fd, len); |
83 | if (r < 0) { | |
84 | std::cerr << "Got error: " << cpp_strerror(r) << " on read_fd" | |
85 | << std::endl; | |
86 | ::close(fd); | |
87 | fd = -1; | |
88 | return; | |
89 | } else if ((unsigned)r < len) { | |
90 | std::cerr << "short read" << std::endl; | |
91 | ::close(fd); | |
92 | fd = -1; | |
93 | return; | |
94 | } | |
11fdf7f2 | 95 | bliter = bl.cbegin(); |
7c673cae FG |
96 | t.reset(new MonitorDBStore::Transaction); |
97 | t->decode(bliter); | |
98 | } | |
99 | void init() { | |
100 | next(); | |
101 | } | |
102 | ~TraceIter() { | |
103 | if (fd != -1) { | |
104 | ::close(fd); | |
105 | fd = -1; | |
106 | } | |
107 | } | |
108 | }; | |
109 | ||
110 | ||
111 | int parse_cmd_args( | |
112 | po::options_description *desc, /// < visible options description | |
113 | po::options_description *hidden_desc, /// < hidden options description | |
114 | po::positional_options_description *positional, /// < positional args | |
115 | vector<string> &cmd_args, /// < arguments to be parsed | |
116 | po::variables_map *vm /// > post-parsing variable map | |
117 | ) | |
118 | { | |
119 | // desc_all will aggregate all visible and hidden options for parsing. | |
120 | // | |
121 | // From boost's program_options point of view, there is absolutely no | |
122 | // distinction between 'desc' and 'hidden_desc'. This is a distinction | |
123 | // that is only useful to us: 'desc' is whatever we are willing to show | |
124 | // on 'usage()', whereas 'hidden_desc' refers to parameters we wish to | |
125 | // take advantage of but do not wish to show on 'usage()'. | |
126 | // | |
127 | // For example, consider that program_options matches positional arguments | |
128 | // (specified via 'positional') against the paramenters defined on a | |
129 | // given 'po::options_description' class. This is performed below, | |
130 | // supplying both the description and the positional arguments to the | |
131 | // parser. However, we do not want the parameters that are mapped to | |
132 | // positional arguments to be shown on usage, as that makes for ugly and | |
133 | // confusing usage messages. Therefore we dissociate the options' | |
134 | // description that is to be used as an aid to the user from those options | |
135 | // that are nothing but useful for internal purposes (i.e., mapping options | |
136 | // to positional arguments). We still need to aggregate them before parsing | |
137 | // and that's what 'desc_all' is all about. | |
138 | // | |
139 | ||
11fdf7f2 | 140 | ceph_assert(desc != NULL); |
7c673cae FG |
141 | |
142 | po::options_description desc_all; | |
143 | desc_all.add(*desc); | |
144 | if (hidden_desc != NULL) | |
145 | desc_all.add(*hidden_desc); | |
146 | ||
147 | try { | |
148 | po::command_line_parser parser = po::command_line_parser(cmd_args). | |
149 | options(desc_all); | |
150 | ||
151 | if (positional) { | |
152 | parser = parser.positional(*positional); | |
153 | } | |
154 | ||
155 | po::parsed_options parsed = parser.run(); | |
156 | po::store(parsed, *vm); | |
157 | po::notify(*vm); | |
158 | } catch (po::error &e) { | |
159 | std::cerr << "error: " << e.what() << std::endl; | |
160 | return -EINVAL; | |
161 | } | |
162 | return 0; | |
163 | } | |
164 | ||
165 | ||
166 | /** | |
167 | * usage: ceph-monstore-tool <store-path> <command> [options] | |
168 | * | |
169 | * commands: | |
170 | * | |
171 | * store-copy < --out arg > | |
172 | * dump-keys | |
173 | * compact | |
174 | * getmonmap < --out arg [ --version arg ] > | |
175 | * getosdmap < --out arg [ --version arg ] > | |
176 | * dump-paxos <--dump-start VER> <--dump-end VER> | |
177 | * dump-trace < --trace-file arg > | |
178 | * replay-trace | |
179 | * random-gen | |
180 | * rewrite-crush | |
7c673cae FG |
181 | * |
182 | * wanted syntax: | |
183 | * | |
184 | * ceph-monstore-tool PATH CMD [options] | |
185 | * | |
186 | * ceph-monstore-tool PATH store-copy <PATH2 | -o PATH2> | |
187 | * ceph-monstore-tool PATH dump-keys | |
188 | * ceph-monstore-tool PATH compact | |
189 | * ceph-monstore-tool PATH get monmap [VER] | |
190 | * ceph-monstore-tool PATH get osdmap [VER] | |
191 | * ceph-monstore-tool PATH dump-paxos STARTVER ENDVER | |
192 | * | |
193 | * | |
194 | */ | |
195 | void usage(const char *n, po::options_description &d) | |
196 | { | |
197 | std::cerr << | |
198 | "usage: " << n << " <store-path> <cmd> [args|options]\n" | |
199 | << "\n" | |
200 | << "Commands:\n" | |
201 | << " store-copy PATH copies store to PATH\n" | |
202 | << " compact compacts the store\n" | |
203 | << " get monmap [-- options] get monmap (version VER if specified)\n" | |
204 | << " (default: last committed)\n" | |
205 | << " get osdmap [-- options] get osdmap (version VER if specified)\n" | |
206 | << " (default: last committed)\n" | |
207 | << " get mdsmap [-- options] get mdsmap (version VER if specified)\n" | |
208 | << " (default: last committed)\n" | |
b32b8144 FG |
209 | << " get mgr [-- options] get mgr map (version VER if specified)\n" |
210 | << " (default: last committed)\n" | |
7c673cae FG |
211 | << " get crushmap [-- options] get crushmap (version VER if specified)\n" |
212 | << " (default: last committed)\n" | |
213 | << " show-versions [-- options] show the first&last committed version of map\n" | |
214 | << " (show-versions -- --help for more info)\n" | |
215 | << " dump-keys dumps store keys to FILE\n" | |
216 | << " (default: stdout)\n" | |
217 | << " dump-paxos [-- options] dump paxos transactions\n" | |
218 | << " (dump-paxos -- --help for more info)\n" | |
219 | << " dump-trace FILE [-- options] dump contents of trace file FILE\n" | |
220 | << " (dump-trace -- --help for more info)\n" | |
221 | << " replay-trace FILE [-- options] replay trace from FILE\n" | |
222 | << " (replay-trace -- --help for more info)\n" | |
223 | << " random-gen [-- options] add randomly generated ops to the store\n" | |
224 | << " (random-gen -- --help for more info)\n" | |
225 | << " rewrite-crush [-- options] add a rewrite commit to the store\n" | |
226 | << " (rewrite-crush -- --help for more info)\n" | |
7c673cae FG |
227 | << " rebuild rebuild store\n" |
228 | << " (rebuild -- --help for more info)\n" | |
229 | << std::endl; | |
230 | std::cerr << d << std::endl; | |
231 | std::cerr | |
232 | << "\nPlease Note:\n" | |
233 | << "* Ceph-specific options should be in the format --option-name=VAL\n" | |
234 | << " (specifically, do not forget the '='!!)\n" | |
235 | << "* Command-specific options need to be passed after a '--'\n" | |
236 | << " e.g., 'get monmap -- --version 10 --out /tmp/foo'" | |
237 | << std::endl; | |
238 | } | |
239 | ||
240 | int update_osdmap(MonitorDBStore& store, version_t ver, bool copy, | |
11fdf7f2 | 241 | std::shared_ptr<CrushWrapper> crush, |
7c673cae FG |
242 | MonitorDBStore::Transaction* t) { |
243 | const string prefix("osdmap"); | |
244 | ||
245 | // full | |
246 | bufferlist bl; | |
247 | int r = 0; | |
248 | r = store.get(prefix, store.combine_strings("full", ver), bl); | |
249 | if (r) { | |
250 | std::cerr << "Error getting full map: " << cpp_strerror(r) << std::endl; | |
251 | return r; | |
252 | } | |
253 | OSDMap osdmap; | |
254 | osdmap.decode(bl); | |
255 | osdmap.crush = crush; | |
256 | if (copy) { | |
257 | osdmap.inc_epoch(); | |
258 | } | |
259 | bl.clear(); | |
260 | // be consistent with OSDMonitor::update_from_paxos() | |
261 | osdmap.encode(bl, CEPH_FEATURES_ALL|CEPH_FEATURE_RESERVED); | |
262 | t->put(prefix, store.combine_strings("full", osdmap.get_epoch()), bl); | |
263 | ||
264 | // incremental | |
265 | OSDMap::Incremental inc; | |
266 | if (copy) { | |
267 | inc.epoch = osdmap.get_epoch(); | |
268 | inc.fsid = osdmap.get_fsid(); | |
269 | } else { | |
270 | bl.clear(); | |
271 | r = store.get(prefix, ver, bl); | |
272 | if (r) { | |
273 | std::cerr << "Error getting inc map: " << cpp_strerror(r) << std::endl; | |
274 | return r; | |
275 | } | |
276 | OSDMap::Incremental inc(bl); | |
277 | if (inc.crush.length()) { | |
278 | inc.crush.clear(); | |
279 | crush->encode(inc.crush, CEPH_FEATURES_SUPPORTED_DEFAULT); | |
280 | } | |
281 | if (inc.fullmap.length()) { | |
282 | OSDMap fullmap; | |
283 | fullmap.decode(inc.fullmap); | |
284 | fullmap.crush = crush; | |
285 | inc.fullmap.clear(); | |
286 | fullmap.encode(inc.fullmap); | |
287 | } | |
288 | } | |
11fdf7f2 | 289 | ceph_assert(osdmap.have_crc()); |
7c673cae FG |
290 | inc.full_crc = osdmap.get_crc(); |
291 | bl.clear(); | |
292 | // be consistent with OSDMonitor::update_from_paxos() | |
293 | inc.encode(bl, CEPH_FEATURES_ALL|CEPH_FEATURE_RESERVED); | |
294 | t->put(prefix, inc.epoch, bl); | |
295 | return 0; | |
296 | } | |
297 | ||
298 | int rewrite_transaction(MonitorDBStore& store, int version, | |
299 | const string& crush_file, | |
300 | MonitorDBStore::Transaction* t) { | |
301 | const string prefix("osdmap"); | |
302 | ||
303 | // calc the known-good epoch | |
304 | version_t last_committed = store.get(prefix, "last_committed"); | |
305 | version_t good_version = 0; | |
306 | if (version <= 0) { | |
307 | if (last_committed >= (unsigned)-version) { | |
308 | good_version = last_committed + version; | |
309 | } else { | |
310 | std::cerr << "osdmap-version is less than: -" << last_committed << std::endl; | |
311 | return EINVAL; | |
312 | } | |
313 | } else { | |
314 | good_version = version; | |
315 | } | |
316 | if (good_version >= last_committed) { | |
317 | std::cout << "good epoch is greater or equal to the last committed one: " | |
318 | << good_version << " >= " << last_committed << std::endl; | |
319 | return 0; | |
320 | } | |
321 | ||
322 | // load/extract the crush map | |
323 | int r = 0; | |
11fdf7f2 | 324 | std::shared_ptr<CrushWrapper> crush(new CrushWrapper); |
7c673cae FG |
325 | if (crush_file.empty()) { |
326 | bufferlist bl; | |
327 | r = store.get(prefix, store.combine_strings("full", good_version), bl); | |
328 | if (r) { | |
329 | std::cerr << "Error getting map: " << cpp_strerror(r) << std::endl; | |
330 | return r; | |
331 | } | |
332 | OSDMap osdmap; | |
333 | osdmap.decode(bl); | |
334 | crush = osdmap.crush; | |
335 | } else { | |
336 | string err; | |
337 | bufferlist bl; | |
338 | r = bl.read_file(crush_file.c_str(), &err); | |
339 | if (r) { | |
340 | std::cerr << err << ": " << cpp_strerror(r) << std::endl; | |
341 | return r; | |
342 | } | |
11fdf7f2 | 343 | auto p = bl.cbegin(); |
7c673cae FG |
344 | crush->decode(p); |
345 | } | |
346 | ||
347 | // prepare a transaction to rewrite the epochs | |
348 | // (good_version, last_committed] | |
349 | // with the good crush map. | |
350 | // XXX: may need to break this into several paxos versions? | |
11fdf7f2 | 351 | ceph_assert(good_version < last_committed); |
7c673cae FG |
352 | for (version_t v = good_version + 1; v <= last_committed; v++) { |
353 | cout << "rewriting epoch #" << v << "/" << last_committed << std::endl; | |
354 | r = update_osdmap(store, v, false, crush, t); | |
355 | if (r) | |
356 | return r; | |
357 | } | |
358 | ||
359 | // add a new osdmap epoch to store, so monitors will update their current osdmap | |
360 | // in addition to the ones stored in epochs. | |
361 | // | |
362 | // This is needed due to the way the monitor updates from paxos and the | |
363 | // facilities we are leveraging to push this update to the rest of the | |
364 | // quorum. | |
365 | // | |
366 | // In a nutshell, we are generating a good version of the osdmap, with a | |
367 | // proper crush, and building a transaction that will replace the bad | |
368 | // osdmaps with good osdmaps. But this transaction needs to be applied on | |
369 | // all nodes, so that the monitors will have good osdmaps to share with | |
370 | // clients. We thus leverage Paxos, specifically the recovery mechanism, by | |
371 | // creating a pending value that will be committed once the monitors form an | |
372 | // initial quorum after being brought back to life. | |
373 | // | |
374 | // However, the way the monitor works has the paxos services, including the | |
375 | // OSDMonitor, updating their state from disk *prior* to the recovery phase | |
376 | // begins (so they have an up to date state in memory). This means the | |
377 | // OSDMonitor will see the old, broken map, before the new paxos version is | |
378 | // applied to disk, and the old version is cached. Even though we have the | |
379 | // good map now, and we share the good map with clients, we will still be | |
380 | // working on the old broken map. Instead of mucking around the monitor to | |
381 | // make this work, we instead opt for adding the same osdmap but with a | |
382 | // newer version, so that the OSDMonitor picks up on it when it updates from | |
383 | // paxos after the proposal has been committed. This is not elegant, but | |
384 | // avoids further unpleasantness that would arise from kludging around the | |
385 | // current behavior. Also, has the added benefit of making sure the clients | |
386 | // get an updated version of the map (because last_committed+1 > | |
387 | // last_committed) :) | |
388 | // | |
389 | cout << "adding a new epoch #" << last_committed+1 << std::endl; | |
390 | r = update_osdmap(store, last_committed++, true, crush, t); | |
391 | if (r) | |
392 | return r; | |
393 | t->put(prefix, store.combine_strings("full", "latest"), last_committed); | |
394 | t->put(prefix, "last_committed", last_committed); | |
395 | return 0; | |
396 | } | |
397 | ||
398 | /** | |
399 | * create a new paxos version which carries a proposal to rewrite all epochs | |
400 | * of incremental and full map of "osdmap" after a faulty crush map is injected. | |
401 | * so the leader will trigger a recovery and propagate this fix to its peons, | |
402 | * after the proposal is accepted, and the transaction in it is applied. all | |
403 | * monitors will rewrite the bad crush map with the good one, and have a new | |
404 | * osdmap epoch with the good crush map in it. | |
405 | */ | |
406 | int rewrite_crush(const char* progname, | |
407 | vector<string>& subcmds, | |
408 | MonitorDBStore& store) { | |
409 | po::options_description op_desc("Allowed 'rewrite-crush' options"); | |
410 | int version = -1; | |
411 | string crush_file; | |
412 | op_desc.add_options() | |
413 | ("help,h", "produce this help message") | |
414 | ("crush", po::value<string>(&crush_file), | |
415 | ("path to the crush map file " | |
416 | "(default: will instead extract it from the known-good osdmap)")) | |
417 | ("good-epoch", po::value<int>(&version), | |
418 | "known-good epoch of osdmap, if a negative number '-N' is given, the " | |
419 | "$last_committed-N is used instead (default: -1). " | |
420 | "Please note, -1 is not necessarily a good epoch, because there are " | |
421 | "good chance that we have more epochs slipped into the monstore after " | |
422 | "the one where the crushmap is firstly injected.") | |
423 | ; | |
424 | po::variables_map op_vm; | |
425 | int r = parse_cmd_args(&op_desc, NULL, NULL, subcmds, &op_vm); | |
426 | if (r) { | |
427 | return -r; | |
428 | } | |
429 | if (op_vm.count("help")) { | |
430 | usage(progname, op_desc); | |
431 | return 0; | |
432 | } | |
433 | ||
434 | MonitorDBStore::Transaction rewrite_txn; | |
435 | r = rewrite_transaction(store, version, crush_file, &rewrite_txn); | |
436 | if (r) { | |
437 | return r; | |
438 | } | |
439 | ||
440 | // store the transaction into store as a proposal | |
441 | const string prefix("paxos"); | |
442 | version_t pending_v = store.get(prefix, "last_committed") + 1; | |
443 | auto t(std::make_shared<MonitorDBStore::Transaction>()); | |
444 | bufferlist bl; | |
445 | rewrite_txn.encode(bl); | |
446 | cout << "adding pending commit " << pending_v | |
447 | << " " << bl.length() << " bytes" << std::endl; | |
448 | t->put(prefix, pending_v, bl); | |
449 | t->put(prefix, "pending_v", pending_v); | |
450 | // a large enough yet unique proposal number will probably do the trick | |
451 | version_t pending_pn = (store.get(prefix, "accepted_pn") / 100 + 4) * 100 + 1; | |
452 | t->put(prefix, "pending_pn", pending_pn); | |
453 | store.apply_transaction(t); | |
454 | return 0; | |
455 | } | |
456 | ||
7c673cae FG |
457 | static int update_auth(MonitorDBStore& st, const string& keyring_path) |
458 | { | |
459 | // import all keyrings stored in the keyring file | |
460 | KeyRing keyring; | |
461 | int r = keyring.load(g_ceph_context, keyring_path); | |
462 | if (r < 0) { | |
463 | cerr << "unable to load admin keyring: " << keyring_path << std::endl; | |
464 | return r; | |
465 | } | |
466 | ||
467 | bufferlist bl; | |
468 | __u8 v = 1; | |
11fdf7f2 | 469 | encode(v, bl); |
7c673cae FG |
470 | |
471 | for (const auto& k : keyring.get_keys()) { | |
472 | KeyServerData::Incremental auth_inc; | |
473 | auth_inc.name = k.first; | |
474 | auth_inc.auth = k.second; | |
475 | if (auth_inc.auth.caps.empty()) { | |
476 | cerr << "no caps granted to: " << auth_inc.name << std::endl; | |
477 | return -EINVAL; | |
478 | } | |
9f95a23c TL |
479 | map<string,string> caps; |
480 | std::transform(begin(auth_inc.auth.caps), end(auth_inc.auth.caps), | |
481 | inserter(caps, end(caps)), | |
482 | [](auto& cap) { | |
483 | string c; | |
484 | auto p = cap.second.cbegin(); | |
485 | decode(c, p); | |
486 | return make_pair(cap.first, c); | |
487 | }); | |
488 | cout << "adding auth for '" | |
489 | << auth_inc.name << "': " << auth_inc.auth | |
490 | << " with caps(" << caps << ")" << std::endl; | |
7c673cae FG |
491 | auth_inc.op = KeyServerData::AUTH_INC_ADD; |
492 | ||
493 | AuthMonitor::Incremental inc; | |
494 | inc.inc_type = AuthMonitor::AUTH_DATA; | |
11fdf7f2 | 495 | encode(auth_inc, inc.auth_data); |
7c673cae | 496 | inc.auth_type = CEPH_AUTH_CEPHX; |
7c673cae FG |
497 | inc.encode(bl, CEPH_FEATURES_ALL); |
498 | } | |
499 | ||
a4b75251 TL |
500 | // prime rotating secrets |
501 | { | |
502 | KeyServer ks(g_ceph_context, nullptr); | |
503 | KeyServerData::Incremental auth_inc; | |
504 | auth_inc.op = KeyServerData::AUTH_INC_SET_ROTATING; | |
505 | bool r = ks.prepare_rotating_update(auth_inc.rotating_bl); | |
506 | ceph_assert(r); | |
507 | AuthMonitor::Incremental inc; | |
508 | inc.inc_type = AuthMonitor::AUTH_DATA; | |
509 | encode(auth_inc, inc.auth_data); | |
510 | inc.auth_type = CEPH_AUTH_CEPHX; | |
511 | inc.encode(bl, CEPH_FEATURES_ALL); | |
512 | } | |
513 | ||
7c673cae FG |
514 | const string prefix("auth"); |
515 | auto last_committed = st.get(prefix, "last_committed") + 1; | |
516 | auto t = make_shared<MonitorDBStore::Transaction>(); | |
517 | t->put(prefix, last_committed, bl); | |
518 | t->put(prefix, "last_committed", last_committed); | |
519 | auto first_committed = st.get(prefix, "first_committed"); | |
520 | if (!first_committed) { | |
521 | t->put(prefix, "first_committed", last_committed); | |
522 | } | |
523 | st.apply_transaction(t); | |
524 | return 0; | |
525 | } | |
526 | ||
92f5a8d4 TL |
527 | static int update_mkfs(MonitorDBStore& st, |
528 | const string& monmap_path, | |
529 | const vector<string>& mon_ids) | |
7c673cae FG |
530 | { |
531 | MonMap monmap; | |
11fdf7f2 TL |
532 | if (!monmap_path.empty()) { |
533 | cout << __func__ << " pulling initial monmap from " << monmap_path << std::endl; | |
534 | bufferlist bl; | |
535 | string err; | |
536 | int r = bl.read_file(monmap_path.c_str(), &err); | |
537 | if (r < 0) { | |
538 | cerr << "failed to read monmap from " << monmap_path << ": " | |
539 | << cpp_strerror(r) << std::endl; | |
540 | return r; | |
541 | } | |
542 | monmap.decode(bl); | |
543 | } else { | |
544 | cout << __func__ << " generating seed initial monmap" << std::endl; | |
545 | int r = monmap.build_initial(g_ceph_context, true, cerr); | |
546 | if (r) { | |
547 | cerr << "no initial monitors" << std::endl; | |
548 | return -EINVAL; | |
549 | } | |
92f5a8d4 TL |
550 | vector<string> new_names; |
551 | if (!mon_ids.empty()) { | |
552 | if (mon_ids.size() != monmap.size()) { | |
553 | cerr << "Please pass the same number of <mon-ids> to name the hosts " | |
554 | << "listed in 'mon_host'. " | |
555 | << mon_ids.size() << " mon-id(s) specified, " | |
556 | << "while you have " << monmap.size() << " mon hosts." << std::endl; | |
557 | return -EINVAL; | |
558 | } | |
559 | new_names = mon_ids; | |
560 | } else { | |
561 | for (unsigned rank = 0; rank < monmap.size(); rank++) { | |
562 | string new_name{"a"}; | |
563 | new_name[0] += rank; | |
564 | new_names.push_back(std::move(new_name)); | |
565 | } | |
566 | } | |
567 | for (unsigned rank = 0; rank < monmap.size(); rank++) { | |
568 | auto name = monmap.get_name(rank); | |
569 | if (name.compare(0, 7, "noname-") == 0) { | |
570 | monmap.rename(name, new_names[rank]); | |
571 | } | |
572 | } | |
7c673cae | 573 | } |
11fdf7f2 | 574 | monmap.print(cout); |
7c673cae FG |
575 | bufferlist bl; |
576 | monmap.encode(bl, CEPH_FEATURES_ALL); | |
577 | monmap.set_epoch(0); | |
578 | auto t = make_shared<MonitorDBStore::Transaction>(); | |
579 | t->put("mkfs", "monmap", bl); | |
580 | st.apply_transaction(t); | |
581 | return 0; | |
582 | } | |
583 | ||
584 | static int update_monitor(MonitorDBStore& st) | |
585 | { | |
586 | const string prefix("monitor"); | |
587 | // a stripped-down Monitor::mkfs() | |
588 | bufferlist bl; | |
589 | bl.append(CEPH_MON_ONDISK_MAGIC "\n"); | |
590 | auto t = make_shared<MonitorDBStore::Transaction>(); | |
591 | t->put(prefix, "magic", bl); | |
592 | st.apply_transaction(t); | |
593 | return 0; | |
594 | } | |
595 | ||
a8e16298 TL |
596 | // rebuild |
597 | // - creating_pgs | |
598 | static int update_creating_pgs(MonitorDBStore& st) | |
599 | { | |
600 | bufferlist bl; | |
601 | auto last_osdmap_epoch = st.get("osdmap", "last_committed"); | |
602 | int r = st.get("osdmap", st.combine_strings("full", last_osdmap_epoch), bl); | |
603 | if (r < 0) { | |
9f95a23c | 604 | cerr << "unable to load osdmap e" << last_osdmap_epoch << std::endl; |
a8e16298 TL |
605 | return r; |
606 | } | |
607 | ||
608 | OSDMap osdmap; | |
609 | osdmap.decode(bl); | |
610 | creating_pgs_t creating; | |
611 | for (auto& i : osdmap.get_pools()) { | |
612 | creating.created_pools.insert(i.first); | |
613 | } | |
614 | creating.last_scan_epoch = last_osdmap_epoch; | |
615 | ||
616 | bufferlist newbl; | |
9f95a23c | 617 | encode(creating, newbl, CEPH_FEATURES_ALL); |
a8e16298 TL |
618 | |
619 | auto t = make_shared<MonitorDBStore::Transaction>(); | |
620 | t->put("osd_pg_creating", "creating", newbl); | |
621 | st.apply_transaction(t); | |
622 | return 0; | |
623 | } | |
624 | ||
b32b8144 FG |
625 | // rebuild |
626 | // - mgr | |
627 | // - mgr_command_desc | |
3efd9988 FG |
628 | static int update_mgrmap(MonitorDBStore& st) |
629 | { | |
630 | auto t = make_shared<MonitorDBStore::Transaction>(); | |
631 | ||
632 | { | |
633 | MgrMap map; | |
634 | // mgr expects epoch > 1 | |
635 | map.epoch++; | |
636 | auto initial_modules = | |
11fdf7f2 | 637 | get_str_vec(g_ceph_context->_conf.get_val<string>("mgr_initial_modules")); |
3efd9988 FG |
638 | copy(begin(initial_modules), |
639 | end(initial_modules), | |
640 | inserter(map.modules, end(map.modules))); | |
641 | bufferlist bl; | |
642 | map.encode(bl, CEPH_FEATURES_ALL); | |
643 | t->put("mgr", map.epoch, bl); | |
644 | t->put("mgr", "last_committed", map.epoch); | |
645 | } | |
646 | { | |
647 | auto mgr_command_descs = mgr_commands; | |
648 | for (auto& c : mgr_command_descs) { | |
649 | c.set_flag(MonCommand::FLAG_MGR); | |
650 | } | |
651 | bufferlist bl; | |
11fdf7f2 | 652 | encode(mgr_command_descs, bl); |
92f5a8d4 | 653 | t->put("mgr_command_descs", "", bl); |
3efd9988 FG |
654 | } |
655 | return st.apply_transaction(t); | |
656 | } | |
657 | ||
7c673cae FG |
658 | static int update_paxos(MonitorDBStore& st) |
659 | { | |
522d829b TL |
660 | const string prefix("paxos"); |
661 | // a large enough version greater than the maximum possible `last_committed` | |
662 | // that could be replied by the peons when the leader is collecting paxos | |
663 | // transactions during recovery | |
664 | constexpr version_t first_committed = 0x42; | |
665 | constexpr version_t last_committed = first_committed; | |
666 | for (version_t v = first_committed; v < last_committed + 1; v++) { | |
667 | auto t = make_shared<MonitorDBStore::Transaction>(); | |
668 | if (v == first_committed) { | |
669 | t->put(prefix, "first_committed", v); | |
670 | } | |
671 | bufferlist proposal; | |
672 | MonitorDBStore::Transaction empty_txn; | |
673 | empty_txn.encode(proposal); | |
674 | t->put(prefix, v, proposal); | |
675 | t->put(prefix, "last_committed", v); | |
676 | st.apply_transaction(t); | |
677 | } | |
7c673cae FG |
678 | // build a pending paxos proposal from all non-permanent k/v pairs. once the |
679 | // proposal is committed, it will gets applied. on the sync provider side, it | |
680 | // will be a no-op, but on its peers, the paxos commit will help to build up | |
681 | // the necessary epochs. | |
682 | bufferlist pending_proposal; | |
683 | { | |
684 | MonitorDBStore::Transaction t; | |
685 | vector<string> prefixes = {"auth", "osdmap", | |
a8e16298 | 686 | "mgr", "mgr_command_desc"}; |
7c673cae FG |
687 | for (const auto& prefix : prefixes) { |
688 | for (auto i = st.get_iterator(prefix); i->valid(); i->next()) { | |
689 | auto key = i->raw_key(); | |
690 | auto val = i->value(); | |
691 | t.put(key.first, key.second, val); | |
692 | } | |
693 | } | |
694 | t.encode(pending_proposal); | |
695 | } | |
522d829b | 696 | auto pending_v = last_committed + 1; |
7c673cae | 697 | auto t = make_shared<MonitorDBStore::Transaction>(); |
7c673cae FG |
698 | t->put(prefix, pending_v, pending_proposal); |
699 | t->put(prefix, "pending_v", pending_v); | |
700 | t->put(prefix, "pending_pn", 400); | |
701 | st.apply_transaction(t); | |
702 | return 0; | |
703 | } | |
704 | ||
7c673cae FG |
705 | int rebuild_monstore(const char* progname, |
706 | vector<string>& subcmds, | |
707 | MonitorDBStore& st) | |
708 | { | |
709 | po::options_description op_desc("Allowed 'rebuild' options"); | |
710 | string keyring_path; | |
11fdf7f2 | 711 | string monmap_path; |
92f5a8d4 | 712 | vector<string> mon_ids; |
7c673cae FG |
713 | op_desc.add_options() |
714 | ("keyring", po::value<string>(&keyring_path), | |
11fdf7f2 TL |
715 | "path to the client.admin key") |
716 | ("monmap", po::value<string>(&monmap_path), | |
92f5a8d4 TL |
717 | "path to the initial monmap") |
718 | ("mon-ids", po::value<vector<string>>(&mon_ids)->multitoken(), | |
719 | "mon ids, use 'a', 'b', ... if not specified"); | |
720 | po::positional_options_description pos_desc; | |
721 | pos_desc.add("mon-ids", -1); | |
7c673cae | 722 | po::variables_map op_vm; |
92f5a8d4 | 723 | int r = parse_cmd_args(&op_desc, nullptr, &pos_desc, subcmds, &op_vm); |
7c673cae FG |
724 | if (r) { |
725 | return -r; | |
726 | } | |
727 | if (op_vm.count("help")) { | |
728 | usage(progname, op_desc); | |
729 | return 0; | |
730 | } | |
731 | if (!keyring_path.empty()) | |
732 | update_auth(st, keyring_path); | |
a8e16298 | 733 | if ((r = update_creating_pgs(st))) { |
7c673cae FG |
734 | return r; |
735 | } | |
b32b8144 FG |
736 | if ((r = update_mgrmap(st))) { |
737 | return r; | |
738 | } | |
7c673cae FG |
739 | if ((r = update_paxos(st))) { |
740 | return r; | |
741 | } | |
92f5a8d4 | 742 | if ((r = update_mkfs(st, monmap_path, mon_ids))) { |
7c673cae FG |
743 | return r; |
744 | } | |
745 | if ((r = update_monitor(st))) { | |
746 | return r; | |
747 | } | |
748 | return 0; | |
749 | } | |
750 | ||
751 | int main(int argc, char **argv) { | |
752 | int err = 0; | |
753 | po::options_description desc("Allowed options"); | |
754 | string store_path, cmd; | |
755 | vector<string> subcmds; | |
756 | desc.add_options() | |
757 | ("help,h", "produce help message") | |
758 | ; | |
759 | ||
760 | /* Dear Future Developer: | |
761 | * | |
762 | * for further improvement, should you need to pass specific options to | |
763 | * a command (e.g., get osdmap VER --hex), you can expand the current | |
764 | * format by creating additional 'po::option_description' and passing | |
765 | * 'subcmds' to 'po::command_line_parser', much like what is currently | |
766 | * done by default. However, beware: in order to differentiate a | |
767 | * command-specific option from the generic/global options, you will need | |
768 | * to pass '--' in the command line (so that the first parser, the one | |
769 | * below, assumes it has reached the end of all options); e.g., | |
770 | * 'get osdmap VER -- --hex'. Not pretty; far from intuitive; it was as | |
771 | * far as I got with this library. Improvements on this format will be | |
772 | * left as an excercise for the reader. -Joao | |
773 | */ | |
774 | po::options_description positional_desc("Positional argument options"); | |
775 | positional_desc.add_options() | |
776 | ("store-path", po::value<string>(&store_path), | |
777 | "path to monitor's store") | |
778 | ("command", po::value<string>(&cmd), | |
779 | "Command") | |
780 | ("subcmd", po::value<vector<string> >(&subcmds), | |
781 | "Command arguments/Sub-Commands") | |
782 | ; | |
783 | po::positional_options_description positional; | |
784 | positional.add("store-path", 1); | |
785 | positional.add("command", 1); | |
786 | positional.add("subcmd", -1); | |
787 | ||
788 | po::options_description all_desc("All options"); | |
789 | all_desc.add(desc).add(positional_desc); | |
790 | ||
791 | vector<string> ceph_option_strings; | |
792 | po::variables_map vm; | |
793 | try { | |
794 | po::parsed_options parsed = | |
795 | po::command_line_parser(argc, argv). | |
796 | options(all_desc). | |
797 | positional(positional). | |
798 | allow_unregistered().run(); | |
799 | ||
800 | po::store( | |
801 | parsed, | |
802 | vm); | |
803 | po::notify(vm); | |
804 | ||
805 | // Specifying po::include_positional would have our positional arguments | |
806 | // being collected (thus being part of ceph_option_strings and eventually | |
807 | // passed on to global_init() below). | |
808 | // Instead we specify po::exclude_positional, which has the upside of | |
809 | // completely avoid this, but the downside of having to specify ceph | |
810 | // options as --VAR=VAL (note the '='); otherwise we will capture the | |
811 | // positional 'VAL' as belonging to us, never being collected. | |
812 | ceph_option_strings = po::collect_unrecognized(parsed.options, | |
813 | po::exclude_positional); | |
814 | ||
815 | } catch(po::error &e) { | |
816 | std::cerr << "error: " << e.what() << std::endl; | |
817 | return 1; | |
818 | } | |
819 | ||
820 | // parse command structure before calling global_init() and friends. | |
821 | ||
822 | if (vm.empty() || vm.count("help") || | |
823 | store_path.empty() || cmd.empty() || | |
824 | *cmd.begin() == '-') { | |
825 | usage(argv[0], desc); | |
826 | return 1; | |
827 | } | |
828 | ||
11fdf7f2 | 829 | vector<const char *> ceph_options; |
7c673cae FG |
830 | ceph_options.reserve(ceph_option_strings.size()); |
831 | for (vector<string>::iterator i = ceph_option_strings.begin(); | |
832 | i != ceph_option_strings.end(); | |
833 | ++i) { | |
834 | ceph_options.push_back(i->c_str()); | |
835 | } | |
836 | ||
837 | auto cct = global_init( | |
11fdf7f2 TL |
838 | NULL, ceph_options, CEPH_ENTITY_TYPE_MON, |
839 | CODE_ENVIRONMENT_UTILITY, | |
840 | CINIT_FLAG_NO_MON_CONFIG); | |
7c673cae | 841 | common_init_finish(g_ceph_context); |
11fdf7f2 | 842 | cct->_conf.apply_changes(nullptr); |
7c673cae FG |
843 | |
844 | // this is where we'll write *whatever*, on a per-command basis. | |
845 | // not all commands require some place to write their things. | |
846 | MonitorDBStore st(store_path); | |
847 | if (store_path.size()) { | |
848 | stringstream ss; | |
849 | int r = st.open(ss); | |
850 | if (r < 0) { | |
851 | std::cerr << ss.str() << std::endl; | |
852 | return EINVAL; | |
853 | } | |
854 | } | |
855 | ||
20effc67 TL |
856 | auto close_store = make_scope_guard([&] { |
857 | st.close(); | |
858 | }); | |
859 | ||
7c673cae FG |
860 | if (cmd == "dump-keys") { |
861 | KeyValueDB::WholeSpaceIterator iter = st.get_iterator(); | |
862 | while (iter->valid()) { | |
863 | pair<string,string> key(iter->raw_key()); | |
864 | cout << key.first << " / " << key.second << std::endl; | |
865 | iter->next(); | |
866 | } | |
867 | } else if (cmd == "compact") { | |
868 | st.compact(); | |
869 | } else if (cmd == "get") { | |
870 | unsigned v = 0; | |
871 | string outpath; | |
7c673cae FG |
872 | string map_type; |
873 | // visible options for this command | |
874 | po::options_description op_desc("Allowed 'get' options"); | |
875 | op_desc.add_options() | |
876 | ("help,h", "produce this help message") | |
877 | ("out,o", po::value<string>(&outpath), | |
878 | "output file (default: stdout)") | |
879 | ("version,v", po::value<unsigned>(&v), | |
880 | "map version to obtain") | |
9f95a23c | 881 | ("readable,r", "print the map information in human readable format") |
7c673cae FG |
882 | ; |
883 | // this is going to be a positional argument; we don't want to show | |
884 | // it as an option during --help, but we do want to have it captured | |
885 | // when parsing. | |
886 | po::options_description hidden_op_desc("Hidden 'get' options"); | |
887 | hidden_op_desc.add_options() | |
888 | ("map-type", po::value<string>(&map_type), | |
889 | "map-type") | |
890 | ; | |
891 | po::positional_options_description op_positional; | |
892 | op_positional.add("map-type", 1); | |
893 | ||
894 | po::variables_map op_vm; | |
895 | int r = parse_cmd_args(&op_desc, &hidden_op_desc, &op_positional, | |
896 | subcmds, &op_vm); | |
897 | if (r < 0) { | |
20effc67 | 898 | return -r; |
7c673cae FG |
899 | } |
900 | ||
901 | if (op_vm.count("help") || map_type.empty()) { | |
902 | usage(argv[0], op_desc); | |
20effc67 | 903 | return 0; |
7c673cae FG |
904 | } |
905 | ||
906 | if (v == 0) { | |
907 | if (map_type == "crushmap") { | |
908 | v = st.get("osdmap", "last_committed"); | |
909 | } else { | |
910 | v = st.get(map_type, "last_committed"); | |
911 | } | |
912 | } | |
913 | ||
914 | int fd = STDOUT_FILENO; | |
915 | if (!outpath.empty()){ | |
f67539c2 | 916 | fd = ::open(outpath.c_str(), O_WRONLY|O_CREAT|O_TRUNC|O_BINARY, 0666); |
7c673cae FG |
917 | if (fd < 0) { |
918 | std::cerr << "error opening output file: " | |
919 | << cpp_strerror(errno) << std::endl; | |
20effc67 | 920 | return EINVAL; |
7c673cae FG |
921 | } |
922 | } | |
923 | ||
20effc67 | 924 | auto close_fd = make_scope_guard([&] { |
7c673cae FG |
925 | ::close(fd); |
926 | if (r < 0 && fd != STDOUT_FILENO) { | |
927 | ::remove(outpath.c_str()); | |
928 | } | |
20effc67 | 929 | }); |
7c673cae FG |
930 | |
931 | bufferlist bl; | |
932 | r = 0; | |
933 | if (map_type == "osdmap") { | |
934 | r = st.get(map_type, st.combine_strings("full", v), bl); | |
935 | } else if (map_type == "crushmap") { | |
936 | bufferlist tmp; | |
937 | r = st.get("osdmap", st.combine_strings("full", v), tmp); | |
938 | if (r >= 0) { | |
939 | OSDMap osdmap; | |
940 | osdmap.decode(tmp); | |
941 | osdmap.crush->encode(bl, CEPH_FEATURES_SUPPORTED_DEFAULT); | |
942 | } | |
943 | } else { | |
944 | r = st.get(map_type, v, bl); | |
945 | } | |
946 | if (r < 0) { | |
947 | std::cerr << "Error getting map: " << cpp_strerror(r) << std::endl; | |
20effc67 | 948 | return EINVAL; |
7c673cae FG |
949 | } |
950 | ||
9f95a23c | 951 | if (op_vm.count("readable")) { |
7c673cae FG |
952 | stringstream ss; |
953 | bufferlist out; | |
b32b8144 FG |
954 | try { |
955 | if (map_type == "monmap") { | |
956 | MonMap monmap; | |
957 | monmap.decode(bl); | |
958 | monmap.print(ss); | |
959 | } else if (map_type == "osdmap") { | |
960 | OSDMap osdmap; | |
961 | osdmap.decode(bl); | |
962 | osdmap.print(ss); | |
963 | } else if (map_type == "mdsmap") { | |
964 | FSMap fs_map; | |
965 | fs_map.decode(bl); | |
966 | fs_map.print(ss); | |
967 | } else if (map_type == "mgr") { | |
968 | MgrMap mgr_map; | |
11fdf7f2 | 969 | auto p = bl.cbegin(); |
b32b8144 FG |
970 | mgr_map.decode(p); |
971 | JSONFormatter f; | |
972 | f.dump_object("mgrmap", mgr_map); | |
973 | f.flush(ss); | |
974 | } else if (map_type == "crushmap") { | |
975 | CrushWrapper cw; | |
11fdf7f2 | 976 | auto it = bl.cbegin(); |
b32b8144 FG |
977 | cw.decode(it); |
978 | CrushCompiler cc(cw, std::cerr, 0); | |
979 | cc.decompile(ss); | |
980 | } else { | |
981 | std::cerr << "This type of readable map does not exist: " << map_type | |
982 | << std::endl << "You can only specify[osdmap|monmap|mdsmap" | |
983 | "|crushmap|mgr]" << std::endl; | |
984 | } | |
985 | } catch (const buffer::error &err) { | |
986 | std::cerr << "Could not decode for human readable output (you may still" | |
f67539c2 | 987 | " use non-readable mode). Detail: " << err.what() << std::endl; |
7c673cae | 988 | } |
b32b8144 | 989 | |
7c673cae FG |
990 | out.append(ss); |
991 | out.write_fd(fd); | |
992 | } else { | |
993 | bl.write_fd(fd); | |
994 | } | |
995 | ||
996 | if (!outpath.empty()) { | |
997 | std::cout << "wrote " << map_type | |
998 | << " version " << v << " to " << outpath | |
999 | << std::endl; | |
1000 | } | |
1001 | } else if (cmd == "show-versions") { | |
1002 | string map_type; //map type:osdmap,monmap... | |
1003 | // visible options for this command | |
1004 | po::options_description op_desc("Allowed 'show-versions' options"); | |
1005 | op_desc.add_options() | |
1006 | ("help,h", "produce this help message") | |
1007 | ("map-type", po::value<string>(&map_type), "map_type"); | |
1008 | ||
1009 | po::positional_options_description op_positional; | |
1010 | op_positional.add("map-type", 1); | |
1011 | ||
1012 | po::variables_map op_vm; | |
1013 | int r = parse_cmd_args(&op_desc, NULL, &op_positional, | |
1014 | subcmds, &op_vm); | |
1015 | if (r < 0) { | |
20effc67 | 1016 | return -r; |
7c673cae FG |
1017 | } |
1018 | ||
1019 | if (op_vm.count("help") || map_type.empty()) { | |
1020 | usage(argv[0], op_desc); | |
20effc67 | 1021 | return 0; |
7c673cae FG |
1022 | } |
1023 | ||
1024 | unsigned int v_first = 0; | |
1025 | unsigned int v_last = 0; | |
1026 | v_first = st.get(map_type, "first_committed"); | |
1027 | v_last = st.get(map_type, "last_committed"); | |
1028 | ||
1029 | std::cout << "first committed:\t" << v_first << "\n" | |
1030 | << "last committed:\t" << v_last << std::endl; | |
1031 | } else if (cmd == "dump-paxos") { | |
1032 | unsigned dstart = 0; | |
1033 | unsigned dstop = ~0; | |
1034 | po::options_description op_desc("Allowed 'dump-paxos' options"); | |
1035 | op_desc.add_options() | |
1036 | ("help,h", "produce this help message") | |
1037 | ("start,s", po::value<unsigned>(&dstart), | |
1038 | "starting version (default: 0)") | |
1039 | ("end,e", po::value<unsigned>(&dstop), | |
1040 | "finish version (default: ~0)") | |
1041 | ; | |
1042 | ||
1043 | po::variables_map op_vm; | |
1044 | int r = parse_cmd_args(&op_desc, NULL, NULL, | |
1045 | subcmds, &op_vm); | |
1046 | if (r < 0) { | |
20effc67 | 1047 | return -r; |
7c673cae FG |
1048 | } |
1049 | ||
1050 | if (op_vm.count("help")) { | |
1051 | usage(argv[0], op_desc); | |
20effc67 | 1052 | return 0; |
7c673cae FG |
1053 | } |
1054 | ||
1055 | if (dstart > dstop) { | |
1056 | std::cerr << "error: 'start' version (value: " << dstart << ") " | |
1057 | << " is greater than 'end' version (value: " << dstop << ")" | |
1058 | << std::endl; | |
20effc67 | 1059 | return EINVAL; |
7c673cae FG |
1060 | } |
1061 | ||
1062 | version_t v = dstart; | |
1063 | for (; v <= dstop; ++v) { | |
1064 | bufferlist bl; | |
1065 | st.get("paxos", v, bl); | |
1066 | if (bl.length() == 0) | |
1067 | break; | |
1068 | cout << "\n--- " << v << " ---" << std::endl; | |
1069 | auto tx(std::make_shared<MonitorDBStore::Transaction>()); | |
1070 | Paxos::decode_append_transaction(tx, bl); | |
1071 | JSONFormatter f(true); | |
1072 | tx->dump(&f); | |
1073 | f.flush(cout); | |
1074 | } | |
1075 | ||
1076 | std::cout << "dumped " << v << " paxos versions" << std::endl; | |
1077 | ||
1078 | } else if (cmd == "dump-trace") { | |
1079 | unsigned dstart = 0; | |
1080 | unsigned dstop = ~0; | |
1081 | string outpath; | |
1082 | ||
1083 | // visible options for this command | |
1084 | po::options_description op_desc("Allowed 'dump-trace' options"); | |
1085 | op_desc.add_options() | |
1086 | ("help,h", "produce this help message") | |
1087 | ("start,s", po::value<unsigned>(&dstart), | |
1088 | "starting version (default: 0)") | |
1089 | ("end,e", po::value<unsigned>(&dstop), | |
1090 | "finish version (default: ~0)") | |
1091 | ; | |
1092 | // this is going to be a positional argument; we don't want to show | |
1093 | // it as an option during --help, but we do want to have it captured | |
1094 | // when parsing. | |
1095 | po::options_description hidden_op_desc("Hidden 'dump-trace' options"); | |
1096 | hidden_op_desc.add_options() | |
1097 | ("out,o", po::value<string>(&outpath), | |
1098 | "file to write the dump to") | |
1099 | ; | |
1100 | po::positional_options_description op_positional; | |
1101 | op_positional.add("out", 1); | |
1102 | ||
1103 | po::variables_map op_vm; | |
1104 | int r = parse_cmd_args(&op_desc, &hidden_op_desc, &op_positional, | |
1105 | subcmds, &op_vm); | |
1106 | if (r < 0) { | |
20effc67 | 1107 | return -r; |
7c673cae FG |
1108 | } |
1109 | ||
1110 | if (op_vm.count("help")) { | |
1111 | usage(argv[0], op_desc); | |
20effc67 | 1112 | return 0; |
7c673cae FG |
1113 | } |
1114 | ||
1115 | if (outpath.empty()) { | |
1116 | usage(argv[0], op_desc); | |
20effc67 | 1117 | return EINVAL; |
7c673cae FG |
1118 | } |
1119 | ||
1120 | if (dstart > dstop) { | |
1121 | std::cerr << "error: 'start' version (value: " << dstart << ") " | |
1122 | << " is greater than 'stop' version (value: " << dstop << ")" | |
1123 | << std::endl; | |
20effc67 | 1124 | return EINVAL; |
7c673cae FG |
1125 | } |
1126 | ||
1127 | TraceIter iter(outpath.c_str()); | |
1128 | iter.init(); | |
1129 | while (true) { | |
1130 | if (!iter.valid()) | |
1131 | break; | |
1132 | if (iter.num() >= dstop) { | |
1133 | break; | |
1134 | } | |
1135 | if (iter.num() >= dstart) { | |
1136 | JSONFormatter f(true); | |
1137 | iter.cur()->dump(&f, false); | |
1138 | f.flush(std::cout); | |
1139 | std::cout << std::endl; | |
1140 | } | |
1141 | iter.next(); | |
1142 | } | |
1143 | std::cerr << "Read up to transaction " << iter.num() << std::endl; | |
1144 | } else if (cmd == "replay-trace") { | |
1145 | string inpath; | |
1146 | unsigned num_replays = 1; | |
1147 | // visible options for this command | |
1148 | po::options_description op_desc("Allowed 'replay-trace' options"); | |
1149 | op_desc.add_options() | |
1150 | ("help,h", "produce this help message") | |
1151 | ("num-replays,n", po::value<unsigned>(&num_replays), | |
1152 | "finish version (default: 1)") | |
1153 | ; | |
1154 | // this is going to be a positional argument; we don't want to show | |
1155 | // it as an option during --help, but we do want to have it captured | |
1156 | // when parsing. | |
1157 | po::options_description hidden_op_desc("Hidden 'replay-trace' options"); | |
1158 | hidden_op_desc.add_options() | |
1159 | ("in,i", po::value<string>(&inpath), | |
1160 | "file to write the dump to") | |
1161 | ; | |
1162 | po::positional_options_description op_positional; | |
1163 | op_positional.add("in", 1); | |
1164 | ||
1165 | // op_desc_all will aggregate all visible and hidden options for parsing. | |
1166 | // when we call 'usage()' we just pass 'op_desc', as that's the description | |
1167 | // holding the visible options. | |
1168 | po::options_description op_desc_all; | |
1169 | op_desc_all.add(op_desc).add(hidden_op_desc); | |
1170 | ||
1171 | po::variables_map op_vm; | |
1172 | try { | |
1173 | po::parsed_options op_parsed = po::command_line_parser(subcmds). | |
1174 | options(op_desc_all).positional(op_positional).run(); | |
1175 | po::store(op_parsed, op_vm); | |
1176 | po::notify(op_vm); | |
1177 | } catch (po::error &e) { | |
1178 | std::cerr << "error: " << e.what() << std::endl; | |
20effc67 | 1179 | return EINVAL; |
7c673cae FG |
1180 | } |
1181 | ||
1182 | if (op_vm.count("help")) { | |
1183 | usage(argv[0], op_desc); | |
20effc67 | 1184 | return 0; |
7c673cae FG |
1185 | } |
1186 | ||
1187 | if (inpath.empty()) { | |
1188 | usage(argv[0], op_desc); | |
20effc67 | 1189 | return EINVAL; |
7c673cae FG |
1190 | } |
1191 | ||
1192 | unsigned num = 0; | |
1193 | for (unsigned i = 0; i < num_replays; ++i) { | |
1194 | TraceIter iter(inpath.c_str()); | |
1195 | iter.init(); | |
1196 | while (true) { | |
1197 | if (!iter.valid()) | |
1198 | break; | |
1199 | std::cerr << "Replaying trans num " << num << std::endl; | |
1200 | st.apply_transaction(iter.cur()); | |
1201 | iter.next(); | |
1202 | ++num; | |
1203 | } | |
1204 | std::cerr << "Read up to transaction " << iter.num() << std::endl; | |
1205 | } | |
1206 | } else if (cmd == "random-gen") { | |
1207 | unsigned tsize = 200; | |
1208 | unsigned tvalsize = 1024; | |
1209 | unsigned ntrans = 100; | |
1210 | po::options_description op_desc("Allowed 'random-gen' options"); | |
1211 | op_desc.add_options() | |
1212 | ("help,h", "produce this help message") | |
1213 | ("num-keys,k", po::value<unsigned>(&tsize), | |
1214 | "keys to write in each transaction (default: 200)") | |
1215 | ("size,s", po::value<unsigned>(&tvalsize), | |
1216 | "size (in bytes) of the value to write in each key (default: 1024)") | |
1217 | ("ntrans,n", po::value<unsigned>(&ntrans), | |
1218 | "number of transactions to run (default: 100)") | |
1219 | ; | |
1220 | ||
1221 | po::variables_map op_vm; | |
1222 | try { | |
1223 | po::parsed_options op_parsed = po::command_line_parser(subcmds). | |
1224 | options(op_desc).run(); | |
1225 | po::store(op_parsed, op_vm); | |
1226 | po::notify(op_vm); | |
1227 | } catch (po::error &e) { | |
1228 | std::cerr << "error: " << e.what() << std::endl; | |
20effc67 | 1229 | return EINVAL; |
7c673cae FG |
1230 | } |
1231 | ||
1232 | if (op_vm.count("help")) { | |
1233 | usage(argv[0], op_desc); | |
20effc67 | 1234 | return 0; |
7c673cae FG |
1235 | } |
1236 | ||
1237 | unsigned num = 0; | |
1238 | for (unsigned i = 0; i < ntrans; ++i) { | |
1239 | std::cerr << "Applying trans " << i << std::endl; | |
1240 | auto t(std::make_shared<MonitorDBStore::Transaction>()); | |
1241 | string prefix; | |
1242 | prefix.push_back((i%26)+'a'); | |
1243 | for (unsigned j = 0; j < tsize; ++j) { | |
1244 | stringstream os; | |
1245 | os << num; | |
1246 | bufferlist bl; | |
1247 | for (unsigned k = 0; k < tvalsize; ++k) bl.append(rand()); | |
1248 | t->put(prefix, os.str(), bl); | |
1249 | ++num; | |
1250 | } | |
1251 | t->compact_prefix(prefix); | |
1252 | st.apply_transaction(t); | |
1253 | } | |
1254 | } else if (cmd == "store-copy") { | |
1255 | if (subcmds.size() < 1 || subcmds[0].empty()) { | |
1256 | usage(argv[0], desc); | |
20effc67 | 1257 | return EINVAL; |
7c673cae FG |
1258 | } |
1259 | ||
1260 | string out_path = subcmds[0]; | |
1261 | ||
1262 | MonitorDBStore out_store(out_path); | |
1263 | { | |
1264 | stringstream ss; | |
1265 | int r = out_store.create_and_open(ss); | |
1266 | if (r < 0) { | |
1267 | std::cerr << ss.str() << std::endl; | |
20effc67 | 1268 | return err; |
7c673cae FG |
1269 | } |
1270 | } | |
1271 | ||
1272 | ||
1273 | KeyValueDB::WholeSpaceIterator it = st.get_iterator(); | |
1274 | uint64_t total_keys = 0; | |
1275 | uint64_t total_size = 0; | |
1276 | uint64_t total_tx = 0; | |
1277 | ||
1278 | do { | |
1279 | uint64_t num_keys = 0; | |
1280 | ||
1281 | auto tx(std::make_shared<MonitorDBStore::Transaction>()); | |
1282 | ||
1283 | while (it->valid() && num_keys < 128) { | |
1284 | pair<string,string> k = it->raw_key(); | |
1285 | bufferlist v = it->value(); | |
1286 | tx->put(k.first, k.second, v); | |
1287 | ||
1288 | num_keys ++; | |
1289 | total_tx ++; | |
1290 | total_size += v.length(); | |
1291 | ||
1292 | it->next(); | |
1293 | } | |
1294 | ||
1295 | total_keys += num_keys; | |
1296 | ||
1297 | if (!tx->empty()) | |
1298 | out_store.apply_transaction(tx); | |
1299 | ||
1300 | std::cout << "copied " << total_keys << " keys so far (" | |
1adf2230 | 1301 | << stringify(byte_u_t(total_size)) << ")" << std::endl; |
7c673cae FG |
1302 | |
1303 | } while (it->valid()); | |
1304 | out_store.close(); | |
1305 | std::cout << "summary: copied " << total_keys << " keys, using " | |
1306 | << total_tx << " transactions, totalling " | |
1adf2230 | 1307 | << stringify(byte_u_t(total_size)) << std::endl; |
7c673cae FG |
1308 | std::cout << "from '" << store_path << "' to '" << out_path << "'" |
1309 | << std::endl; | |
1310 | } else if (cmd == "rewrite-crush") { | |
1311 | err = rewrite_crush(argv[0], subcmds, st); | |
7c673cae FG |
1312 | } else if (cmd == "rebuild") { |
1313 | err = rebuild_monstore(argv[0], subcmds, st); | |
1314 | } else { | |
1315 | std::cerr << "Unrecognized command: " << cmd << std::endl; | |
1316 | usage(argv[0], desc); | |
20effc67 | 1317 | return err; |
7c673cae | 1318 | } |
7c673cae | 1319 | } |