]> git.proxmox.com Git - ceph.git/blame - ceph/src/mon/MonmapMonitor.cc
bump version to 15.2.6-pve1
[ceph.git] / ceph / src / mon / MonmapMonitor.cc
CommitLineData
7c673cae
FG
1// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2// vim: ts=8 sw=2 smarttab
3/*
4 * Ceph - scalable distributed file system
5 *
6 * Copyright (C) 2009 Sage Weil <sage@newdream.net>
7 *
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
12 *
13 */
14
15#include "MonmapMonitor.h"
16#include "Monitor.h"
17#include "messages/MMonCommand.h"
18#include "messages/MMonJoin.h"
19
20#include "common/ceph_argparse.h"
21#include "common/errno.h"
22#include <sstream>
23#include "common/config.h"
24#include "common/cmdparse.h"
25
11fdf7f2 26#include "include/ceph_assert.h"
7c673cae
FG
27#include "include/stringify.h"
28
29#define dout_subsys ceph_subsys_mon
30#undef dout_prefix
31#define dout_prefix _prefix(_dout, mon)
9f95a23c 32using namespace TOPNSPC::common;
7c673cae
FG
33static ostream& _prefix(std::ostream *_dout, Monitor *mon) {
34 return *_dout << "mon." << mon->name << "@" << mon->rank
35 << "(" << mon->get_state_name()
36 << ").monmap v" << mon->monmap->epoch << " ";
37}
38
39void MonmapMonitor::create_initial()
40{
224ce89b 41 dout(10) << __func__ << " using current monmap" << dendl;
7c673cae
FG
42 pending_map = *mon->monmap;
43 pending_map.epoch = 1;
31f18b77 44
11fdf7f2 45 if (g_conf()->mon_debug_no_initial_persistent_features) {
31f18b77
FG
46 derr << __func__ << " mon_debug_no_initial_persistent_features=true"
47 << dendl;
48 } else {
49 // initialize with default persistent features for new clusters
50 pending_map.persistent_features = ceph::features::mon::get_persistent();
11fdf7f2 51 pending_map.min_mon_release = ceph_release();
31f18b77 52 }
7c673cae
FG
53}
54
55void MonmapMonitor::update_from_paxos(bool *need_bootstrap)
56{
57 version_t version = get_last_committed();
58 if (version <= mon->monmap->get_epoch())
59 return;
60
61 dout(10) << __func__ << " version " << version
62 << ", my v " << mon->monmap->epoch << dendl;
63
64 if (need_bootstrap && version != mon->monmap->get_epoch()) {
65 dout(10) << " signaling that we need a bootstrap" << dendl;
66 *need_bootstrap = true;
67 }
68
69 // read and decode
70 monmap_bl.clear();
71 int ret = get_version(version, monmap_bl);
11fdf7f2
TL
72 ceph_assert(ret == 0);
73 ceph_assert(monmap_bl.length());
7c673cae 74
224ce89b 75 dout(10) << __func__ << " got " << version << dendl;
7c673cae
FG
76 mon->monmap->decode(monmap_bl);
77
78 if (mon->store->exists("mkfs", "monmap")) {
79 auto t(std::make_shared<MonitorDBStore::Transaction>());
80 t->erase("mkfs", "monmap");
81 mon->store->apply_transaction(t);
82 }
83
84 check_subs();
11fdf7f2
TL
85
86 // make sure we've recorded min_mon_release
87 string val;
88 if (mon->store->read_meta("min_mon_release", &val) < 0 ||
89 val.size() == 0 ||
90 atoi(val.c_str()) != (int)ceph_release()) {
91 dout(10) << __func__ << " updating min_mon_release meta" << dendl;
92 mon->store->write_meta("min_mon_release",
93 stringify(ceph_release()));
94 }
7c673cae
FG
95}
96
97void MonmapMonitor::create_pending()
98{
99 pending_map = *mon->monmap;
100 pending_map.epoch++;
101 pending_map.last_changed = ceph_clock_now();
224ce89b 102 dout(10) << __func__ << " monmap epoch " << pending_map.epoch << dendl;
7c673cae
FG
103}
104
105void MonmapMonitor::encode_pending(MonitorDBStore::TransactionRef t)
106{
224ce89b 107 dout(10) << __func__ << " epoch " << pending_map.epoch << dendl;
7c673cae 108
11fdf7f2 109 ceph_assert(mon->monmap->epoch + 1 == pending_map.epoch ||
7c673cae
FG
110 pending_map.epoch == 1); // special case mkfs!
111 bufferlist bl;
112 pending_map.encode(bl, mon->get_quorum_con_features());
113
114 put_version(t, pending_map.epoch, bl);
115 put_last_committed(t, pending_map.epoch);
116
117 // generate a cluster fingerprint, too?
118 if (pending_map.epoch == 1) {
119 mon->prepare_new_fingerprint(t);
120 }
121}
122
123class C_ApplyFeatures : public Context {
124 MonmapMonitor *svc;
125 mon_feature_t features;
9f95a23c 126 ceph_release_t min_mon_release;
11fdf7f2 127public:
9f95a23c 128 C_ApplyFeatures(MonmapMonitor *s, const mon_feature_t& f, ceph_release_t mmr) :
11fdf7f2 129 svc(s), features(f), min_mon_release(mmr) { }
7c673cae
FG
130 void finish(int r) override {
131 if (r >= 0) {
11fdf7f2 132 svc->apply_mon_features(features, min_mon_release);
7c673cae
FG
133 } else if (r == -EAGAIN || r == -ECANCELED) {
134 // discard features if we're no longer on the quorum that
135 // established them in the first place.
136 return;
137 } else {
11fdf7f2 138 ceph_abort_msg("bad C_ApplyFeatures return value");
7c673cae
FG
139 }
140 }
141};
142
11fdf7f2 143void MonmapMonitor::apply_mon_features(const mon_feature_t& features,
9f95a23c 144 ceph_release_t min_mon_release)
7c673cae
FG
145{
146 if (!is_writeable()) {
147 dout(5) << __func__ << " wait for service to be writeable" << dendl;
11fdf7f2 148 wait_for_writeable_ctx(new C_ApplyFeatures(this, features, min_mon_release));
7c673cae
FG
149 return;
150 }
151
11fdf7f2
TL
152 // do nothing here unless we have a full quorum
153 if (mon->get_quorum().size() < mon->monmap->size()) {
154 return;
155 }
156
157 ceph_assert(is_writeable());
158 ceph_assert(features.contains_all(pending_map.persistent_features));
7c673cae
FG
159 // we should never hit this because `features` should be the result
160 // of the quorum's supported features. But if it happens, die.
11fdf7f2 161 ceph_assert(ceph::features::mon::get_supported().contains_all(features));
7c673cae
FG
162
163 mon_feature_t new_features =
164 (pending_map.persistent_features ^
165 (features & ceph::features::mon::get_persistent()));
166
11fdf7f2
TL
167 if (new_features.empty() &&
168 pending_map.min_mon_release == min_mon_release) {
81eedcae 169 dout(10) << __func__ << " min_mon_release (" << (int)min_mon_release
11fdf7f2 170 << ") and features (" << features << ") match" << dendl;
7c673cae
FG
171 return;
172 }
173
11fdf7f2
TL
174 if (!new_features.empty()) {
175 dout(1) << __func__ << " applying new features "
176 << new_features << ", had " << pending_map.persistent_features
177 << ", will have "
178 << (new_features | pending_map.persistent_features)
179 << dendl;
180 pending_map.persistent_features |= new_features;
181 }
182 if (min_mon_release > pending_map.min_mon_release) {
183 dout(1) << __func__ << " increasing min_mon_release to "
9f95a23c 184 << ceph::to_integer<int>(min_mon_release) << " (" << min_mon_release
11fdf7f2
TL
185 << ")" << dendl;
186 pending_map.min_mon_release = min_mon_release;
7c673cae
FG
187 }
188
7c673cae
FG
189 propose_pending();
190}
191
192void MonmapMonitor::on_active()
193{
194 if (get_last_committed() >= 1 && !mon->has_ever_joined) {
195 // make note of the fact that i was, once, part of the quorum.
196 dout(10) << "noting that i was, once, part of an active quorum." << dendl;
197
198 /* This is some form of nasty in-breeding we have between the MonmapMonitor
199 and the Monitor itself. We should find a way to get rid of it given our
200 new architecture. Until then, stick with it since we are a
201 single-threaded process and, truth be told, no one else relies on this
202 thing besides us.
203 */
204 auto t(std::make_shared<MonitorDBStore::Transaction>());
205 t->put(Monitor::MONITOR_NAME, "joined", 1);
206 mon->store->apply_transaction(t);
207 mon->has_ever_joined = true;
208 }
209
b32b8144
FG
210 if (mon->is_leader()) {
211 mon->clog->debug() << "monmap " << *mon->monmap;
212 }
7c673cae 213
11fdf7f2
TL
214 apply_mon_features(mon->get_quorum_mon_features(),
215 mon->quorum_min_mon_release);
7c673cae
FG
216}
217
218bool MonmapMonitor::preprocess_query(MonOpRequestRef op)
219{
9f95a23c 220 auto m = op->get_req<PaxosServiceMessage>();
7c673cae
FG
221 switch (m->get_type()) {
222 // READs
223 case MSG_MON_COMMAND:
f64942e4
AA
224 try {
225 return preprocess_command(op);
226 }
227 catch (const bad_cmd_get& e) {
228 bufferlist bl;
229 mon->reply_command(op, -EINVAL, e.what(), bl, get_last_committed());
230 return true;
231 }
7c673cae
FG
232 case MSG_MON_JOIN:
233 return preprocess_join(op);
234 default:
235 ceph_abort();
236 return true;
237 }
238}
239
240void MonmapMonitor::dump_info(Formatter *f)
241{
242 f->dump_unsigned("monmap_first_committed", get_first_committed());
243 f->dump_unsigned("monmap_last_committed", get_last_committed());
244 f->open_object_section("monmap");
245 mon->monmap->dump(f);
246 f->close_section();
247 f->open_array_section("quorum");
248 for (set<int>::iterator q = mon->get_quorum().begin(); q != mon->get_quorum().end(); ++q)
249 f->dump_int("mon", *q);
250 f->close_section();
251}
252
253bool MonmapMonitor::preprocess_command(MonOpRequestRef op)
254{
9f95a23c 255 auto m = op->get_req<MMonCommand>();
7c673cae
FG
256 int r = -1;
257 bufferlist rdata;
258 stringstream ss;
259
11fdf7f2 260 cmdmap_t cmdmap;
7c673cae
FG
261 if (!cmdmap_from_json(m->cmd, &cmdmap, ss)) {
262 string rs = ss.str();
263 mon->reply_command(op, -EINVAL, rs, rdata, get_last_committed());
264 return true;
265 }
266
267 string prefix;
9f95a23c 268 cmd_getval(cmdmap, "prefix", prefix);
7c673cae 269
11fdf7f2 270 MonSession *session = op->get_session();
7c673cae
FG
271 if (!session) {
272 mon->reply_command(op, -EACCES, "access denied", get_last_committed());
273 return true;
274 }
275
276 string format;
9f95a23c 277 cmd_getval(cmdmap, "format", format, string("plain"));
7c673cae
FG
278 boost::scoped_ptr<Formatter> f(Formatter::create(format));
279
280 if (prefix == "mon stat") {
281 mon->monmap->print_summary(ss);
224ce89b
WB
282 ss << ", election epoch " << mon->get_epoch() << ", leader "
283 << mon->get_leader() << " " << mon->get_leader_name()
284 << ", quorum " << mon->get_quorum() << " " << mon->get_quorum_names();
7c673cae
FG
285 rdata.append(ss);
286 ss.str("");
287 r = 0;
288
289 } else if (prefix == "mon getmap" ||
290 prefix == "mon dump") {
291
292 epoch_t epoch;
293 int64_t epochnum;
9f95a23c 294 cmd_getval(cmdmap, "epoch", epochnum, (int64_t)0);
7c673cae
FG
295 epoch = epochnum;
296
297 MonMap *p = mon->monmap;
298 if (epoch) {
299 bufferlist bl;
300 r = get_version(epoch, bl);
301 if (r == -ENOENT) {
302 ss << "there is no map for epoch " << epoch;
303 goto reply;
304 }
11fdf7f2
TL
305 ceph_assert(r == 0);
306 ceph_assert(bl.length() > 0);
7c673cae
FG
307 p = new MonMap;
308 p->decode(bl);
309 }
310
11fdf7f2 311 ceph_assert(p);
7c673cae
FG
312
313 if (prefix == "mon getmap") {
314 p->encode(rdata, m->get_connection()->get_features());
315 r = 0;
316 ss << "got monmap epoch " << p->get_epoch();
317 } else if (prefix == "mon dump") {
318 stringstream ds;
319 if (f) {
320 f->open_object_section("monmap");
321 p->dump(f.get());
322 f->open_array_section("quorum");
323 for (set<int>::iterator q = mon->get_quorum().begin();
324 q != mon->get_quorum().end(); ++q) {
325 f->dump_int("mon", *q);
326 }
327 f->close_section();
328 f->close_section();
329 f->flush(ds);
330 r = 0;
331 } else {
332 p->print(ds);
333 r = 0;
334 }
335 rdata.append(ds);
336 ss << "dumped monmap epoch " << p->get_epoch();
337 }
11fdf7f2 338 if (p != mon->monmap) {
7c673cae 339 delete p;
11fdf7f2
TL
340 p = nullptr;
341 }
7c673cae 342
224ce89b 343 } else if (prefix == "mon feature ls") {
7c673cae
FG
344
345 bool list_with_value = false;
346 string with_value;
9f95a23c 347 if (cmd_getval(cmdmap, "with_value", with_value) &&
7c673cae
FG
348 with_value == "--with-value") {
349 list_with_value = true;
350 }
351
352 MonMap *p = mon->monmap;
353
354 // list features
355 mon_feature_t supported = ceph::features::mon::get_supported();
356 mon_feature_t persistent = ceph::features::mon::get_persistent();
357 mon_feature_t required = p->get_required_features();
358
359 stringstream ds;
360 auto print_feature = [&](mon_feature_t& m_features, const char* m_str) {
361 if (f) {
362 if (list_with_value)
363 m_features.dump_with_value(f.get(), m_str);
364 else
365 m_features.dump(f.get(), m_str);
366 } else {
367 if (list_with_value)
368 m_features.print_with_value(ds);
369 else
370 m_features.print(ds);
371 }
372 };
373
374 if (f) {
375 f->open_object_section("features");
376
377 f->open_object_section("all");
378 print_feature(supported, "supported");
379 print_feature(persistent, "persistent");
380 f->close_section(); // all
381
382 f->open_object_section("monmap");
383 print_feature(p->persistent_features, "persistent");
384 print_feature(p->optional_features, "optional");
385 print_feature(required, "required");
386 f->close_section(); // monmap
387
388 f->close_section(); // features
389 f->flush(ds);
390
391 } else {
392 ds << "all features" << std::endl
393 << "\tsupported: ";
394 print_feature(supported, nullptr);
395 ds << std::endl
396 << "\tpersistent: ";
397 print_feature(persistent, nullptr);
398 ds << std::endl
399 << std::endl;
400
401 ds << "on current monmap (epoch "
402 << p->get_epoch() << ")" << std::endl
403 << "\tpersistent: ";
404 print_feature(p->persistent_features, nullptr);
405 ds << std::endl
406 // omit optional features in plain-text
407 // makes it easier to read, and they're, currently, empty.
408 << "\trequired: ";
409 print_feature(required, nullptr);
410 ds << std::endl;
411 }
412 rdata.append(ds);
413 r = 0;
414 }
415
416reply:
417 if (r != -1) {
418 string rs;
419 getline(ss, rs);
420
421 mon->reply_command(op, r, rs, rdata, get_last_committed());
422 return true;
423 } else
424 return false;
425}
426
427
428bool MonmapMonitor::prepare_update(MonOpRequestRef op)
429{
9f95a23c 430 auto m = op->get_req<PaxosServiceMessage>();
224ce89b 431 dout(7) << __func__ << " " << *m << " from " << m->get_orig_source_inst() << dendl;
7c673cae
FG
432
433 switch (m->get_type()) {
434 case MSG_MON_COMMAND:
f64942e4
AA
435 try {
436 return prepare_command(op);
11fdf7f2 437 } catch (const bad_cmd_get& e) {
f64942e4
AA
438 bufferlist bl;
439 mon->reply_command(op, -EINVAL, e.what(), bl, get_last_committed());
440 return true;
441 }
7c673cae
FG
442 case MSG_MON_JOIN:
443 return prepare_join(op);
444 default:
445 ceph_abort();
446 }
447
448 return false;
449}
450
451bool MonmapMonitor::prepare_command(MonOpRequestRef op)
452{
9f95a23c 453 auto m = op->get_req<MMonCommand>();
7c673cae
FG
454 stringstream ss;
455 string rs;
456 int err = -EINVAL;
457
11fdf7f2 458 cmdmap_t cmdmap;
7c673cae
FG
459 if (!cmdmap_from_json(m->cmd, &cmdmap, ss)) {
460 string rs = ss.str();
461 mon->reply_command(op, -EINVAL, rs, get_last_committed());
462 return true;
463 }
464
465 string prefix;
9f95a23c 466 cmd_getval(cmdmap, "prefix", prefix);
7c673cae 467
11fdf7f2 468 MonSession *session = op->get_session();
7c673cae
FG
469 if (!session) {
470 mon->reply_command(op, -EACCES, "access denied", get_last_committed());
471 return true;
472 }
473
474 /* We should follow the following rules:
475 *
476 * - 'monmap' is the current, consistent version of the monmap
477 * - 'pending_map' is the uncommitted version of the monmap
478 *
479 * All checks for the current state must be made against 'monmap'.
480 * All changes are made against 'pending_map'.
481 *
482 * If there are concurrent operations modifying 'pending_map', please
483 * follow the following rules.
484 *
485 * - if pending_map has already been changed, the second operation must
486 * wait for the proposal to finish and be run again; This is the easiest
487 * path to guarantee correctness but may impact performance (i.e., it
488 * will take longer for the user to get a reply).
489 *
490 * - if the result of the second operation can be guaranteed to be
491 * idempotent, the operation may reply to the user once the proposal
492 * finishes; still needs to wait for the proposal to finish.
493 *
494 * - An operation _NEVER_ returns to the user based on pending state.
495 *
496 * If an operation does not modify current stable monmap, it may be
497 * serialized before current pending map, regardless of any change that
498 * has been made to the pending map -- remember, pending is uncommitted
499 * state, thus we are not bound by it.
500 */
501
11fdf7f2 502 ceph_assert(mon->monmap);
7c673cae
FG
503 MonMap &monmap = *mon->monmap;
504
505
506 /* Please note:
507 *
508 * Adding or removing monitors may lead to loss of quorum.
509 *
510 * Because quorum may be lost, it's important to reply something
511 * to the user, lest she end up waiting forever for a reply. And
512 * no reply will ever be sent until quorum is formed again.
513 *
514 * On the other hand, this means we're leaking uncommitted state
515 * to the user. As such, please be mindful of the reply message.
516 *
517 * e.g., 'adding monitor mon.foo' is okay ('adding' is an on-going
518 * operation and conveys its not-yet-permanent nature); whereas
519 * 'added monitor mon.foo' presumes the action has successfully
520 * completed and state has been committed, which may not be true.
521 */
522
523
524 bool propose = false;
525 if (prefix == "mon add") {
526 string name;
9f95a23c 527 cmd_getval(cmdmap, "name", name);
7c673cae 528 string addrstr;
9f95a23c 529 cmd_getval(cmdmap, "addr", addrstr);
7c673cae
FG
530 entity_addr_t addr;
531 bufferlist rdata;
532
533 if (!addr.parse(addrstr.c_str())) {
534 err = -EINVAL;
535 ss << "addr " << addrstr << "does not parse";
536 goto reply;
537 }
538
11fdf7f2
TL
539 entity_addrvec_t addrs;
540 if (monmap.persistent_features.contains_all(
541 ceph::features::mon::FEATURE_NAUTILUS)) {
542 if (addr.get_port() == CEPH_MON_PORT_IANA) {
543 addr.set_type(entity_addr_t::TYPE_MSGR2);
544 }
545 if (addr.get_port() == CEPH_MON_PORT_LEGACY) {
546 // if they specified the *old* default they probably don't care
547 addr.set_port(0);
548 }
549 if (addr.get_port()) {
550 addrs.v.push_back(addr);
551 } else {
552 addr.set_type(entity_addr_t::TYPE_MSGR2);
553 addr.set_port(CEPH_MON_PORT_IANA);
554 addrs.v.push_back(addr);
555 addr.set_type(entity_addr_t::TYPE_LEGACY);
556 addr.set_port(CEPH_MON_PORT_LEGACY);
557 addrs.v.push_back(addr);
558 }
559 } else {
560 if (addr.get_port() == 0) {
561 addr.set_port(CEPH_MON_PORT_LEGACY);
562 }
563 addr.set_type(entity_addr_t::TYPE_LEGACY);
564 addrs.v.push_back(addr);
7c673cae 565 }
11fdf7f2 566 dout(20) << __func__ << " addr " << addr << " -> addrs " << addrs << dendl;
7c673cae
FG
567
568 /**
569 * If we have a monitor with the same name and different addr, then EEXIST
570 * If we have a monitor with the same addr and different name, then EEXIST
571 * If we have a monitor with the same addr and same name, then wait for
572 * the proposal to finish and return success.
573 * If we don't have the monitor, add it.
574 */
575
576 err = 0;
577 if (!ss.str().empty())
578 ss << "; ";
579
580 do {
581 if (monmap.contains(name)) {
11fdf7f2 582 if (monmap.get_addrs(name) == addrs) {
7c673cae
FG
583 // stable map contains monitor with the same name at the same address.
584 // serialize before current pending map.
585 err = 0; // for clarity; this has already been set above.
11fdf7f2 586 ss << "mon." << name << " at " << addrs << " already exists";
7c673cae
FG
587 goto reply;
588 } else {
589 ss << "mon." << name
11fdf7f2 590 << " already exists at address " << monmap.get_addrs(name);
7c673cae 591 }
11fdf7f2 592 } else if (monmap.contains(addrs)) {
7c673cae 593 // we established on the previous branch that name is different
11fdf7f2 594 ss << "mon." << monmap.get_name(addrs)
7c673cae
FG
595 << " already exists at address " << addr;
596 } else {
597 // go ahead and add
598 break;
599 }
600 err = -EEXIST;
601 goto reply;
602 } while (false);
603
604 /* Given there's no delay between proposals on the MonmapMonitor (see
605 * MonmapMonitor::should_propose()), there is no point in checking for
606 * a mismatch between name and addr on pending_map.
607 *
608 * Once we established the monitor does not exist in the committed state,
609 * we can simply go ahead and add the monitor.
610 */
611
11fdf7f2 612 pending_map.add(name, addrs);
7c673cae 613 pending_map.last_changed = ceph_clock_now();
11fdf7f2 614 ss << "adding mon." << name << " at " << addrs;
7c673cae
FG
615 propose = true;
616 dout(0) << __func__ << " proposing new mon." << name << dendl;
617
618 } else if (prefix == "mon remove" ||
619 prefix == "mon rm") {
620 string name;
9f95a23c 621 cmd_getval(cmdmap, "name", name);
7c673cae
FG
622 if (!monmap.contains(name)) {
623 err = 0;
624 ss << "mon." << name << " does not exist or has already been removed";
625 goto reply;
626 }
627
628 if (monmap.size() == 1) {
629 err = -EINVAL;
630 ss << "error: refusing removal of last monitor " << name;
631 goto reply;
632 }
633
634 /* At the time of writing, there is no risk of races when multiple clients
635 * attempt to use the same name. The reason is simple but may not be
636 * obvious.
637 *
638 * In a nutshell, we do not collate proposals on the MonmapMonitor. As
639 * soon as we return 'true' below, PaxosService::dispatch() will check if
640 * the service should propose, and - if so - the service will be marked as
641 * 'proposing' and a proposal will be triggered. The PaxosService class
642 * guarantees that once a service is marked 'proposing' no further writes
643 * will be handled.
644 *
645 * The decision on whether the service should propose or not is, in this
646 * case, made by MonmapMonitor::should_propose(), which always considers
647 * the proposal delay being 0.0 seconds. This is key for PaxosService to
648 * trigger the proposal immediately.
649 * 0.0 seconds of delay.
650 *
651 * From the above, there's no point in performing further checks on the
652 * pending_map, as we don't ever have multiple proposals in-flight in
653 * this service. As we've established the committed state contains the
654 * monitor, we can simply go ahead and remove it.
655 *
656 * Please note that the code hinges on all of the above to be true. It
657 * has been true since time immemorial and we don't see a good reason
658 * to make it sturdier at this time - mainly because we don't think it's
659 * going to change any time soon, lest for any bug that may be unwillingly
660 * introduced.
661 */
662
11fdf7f2 663 entity_addrvec_t addrs = pending_map.get_addrs(name);
7c673cae
FG
664 pending_map.remove(name);
665 pending_map.last_changed = ceph_clock_now();
11fdf7f2 666 ss << "removing mon." << name << " at " << addrs
7c673cae
FG
667 << ", there will be " << pending_map.size() << " monitors" ;
668 propose = true;
669 err = 0;
670
671 } else if (prefix == "mon feature set") {
672
673 /* PLEASE NOTE:
674 *
675 * We currently only support setting/unsetting persistent features.
676 * This is by design, given at the moment we still don't have optional
677 * features, and, as such, there is no point introducing an interface
678 * to manipulate them. This allows us to provide a cleaner, more
679 * intuitive interface to the user, modifying solely persistent
680 * features.
681 *
682 * In the future we should consider adding another interface to handle
683 * optional features/flags; e.g., 'mon feature flag set/unset', or
684 * 'mon flag set/unset'.
685 */
686 string feature_name;
9f95a23c 687 if (!cmd_getval(cmdmap, "feature_name", feature_name)) {
7c673cae
FG
688 ss << "missing required feature name";
689 err = -EINVAL;
690 goto reply;
691 }
692
693 mon_feature_t feature;
694 feature = ceph::features::mon::get_feature_by_name(feature_name);
695 if (feature == ceph::features::mon::FEATURE_NONE) {
696 ss << "unknown feature '" << feature_name << "'";
697 err = -ENOENT;
698 goto reply;
699 }
700
11fdf7f2 701 bool sure = false;
9f95a23c 702 cmd_getval(cmdmap, "yes_i_really_mean_it", sure);
11fdf7f2 703 if (!sure) {
7c673cae
FG
704 ss << "please specify '--yes-i-really-mean-it' if you "
705 << "really, **really** want to set feature '"
706 << feature << "' in the monmap.";
707 err = -EPERM;
708 goto reply;
709 }
710
711 if (!mon->get_quorum_mon_features().contains_all(feature)) {
712 ss << "current quorum does not support feature '" << feature
713 << "'; supported features: "
714 << mon->get_quorum_mon_features();
715 err = -EINVAL;
716 goto reply;
717 }
718
719 ss << "setting feature '" << feature << "'";
720
721 err = 0;
722 if (monmap.persistent_features.contains_all(feature)) {
723 dout(10) << __func__ << " feature '" << feature
724 << "' already set on monmap; no-op." << dendl;
725 goto reply;
726 }
727
728 pending_map.persistent_features.set_feature(feature);
729 pending_map.last_changed = ceph_clock_now();
730 propose = true;
731
11fdf7f2 732 dout(1) << __func__ << " " << ss.str() << "; new features will be: "
7c673cae
FG
733 << "persistent = " << pending_map.persistent_features
734 // output optional nevertheless, for auditing purposes.
735 << ", optional = " << pending_map.optional_features << dendl;
11fdf7f2
TL
736
737 } else if (prefix == "mon set-rank") {
738 string name;
739 int64_t rank;
9f95a23c
TL
740 if (!cmd_getval(cmdmap, "name", name) ||
741 !cmd_getval(cmdmap, "rank", rank)) {
11fdf7f2
TL
742 err = -EINVAL;
743 goto reply;
744 }
745 int oldrank = pending_map.get_rank(name);
746 if (oldrank < 0) {
747 ss << "mon." << name << " does not exist in monmap";
748 err = -ENOENT;
749 goto reply;
750 }
751 err = 0;
752 pending_map.set_rank(name, rank);
753 pending_map.last_changed = ceph_clock_now();
754 propose = true;
755 } else if (prefix == "mon set-addrs") {
756 string name;
757 string addrs;
9f95a23c
TL
758 if (!cmd_getval(cmdmap, "name", name) ||
759 !cmd_getval(cmdmap, "addrs", addrs)) {
11fdf7f2
TL
760 err = -EINVAL;
761 goto reply;
762 }
763 if (!pending_map.contains(name)) {
764 ss << "mon." << name << " does not exist";
765 err = -ENOENT;
766 goto reply;
767 }
768 entity_addrvec_t av;
769 if (!av.parse(addrs.c_str(), nullptr)) {
770 ss << "failed to parse addrs '" << addrs << "'";
771 err = -EINVAL;
772 goto reply;
773 }
774 for (auto& a : av.v) {
775 a.set_nonce(0);
776 if (!a.get_port()) {
777 ss << "monitor must bind to a non-zero port, not " << a;
778 err = -EINVAL;
779 goto reply;
780 }
781 }
782 err = 0;
783 pending_map.set_addrvec(name, av);
784 pending_map.last_changed = ceph_clock_now();
785 propose = true;
9f95a23c
TL
786 } else if (prefix == "mon set-weight") {
787 string name;
788 int64_t weight;
789 if (!cmd_getval(cmdmap, "name", name) ||
790 !cmd_getval(cmdmap, "weight", weight)) {
791 err = -EINVAL;
792 goto reply;
793 }
794 if (!pending_map.contains(name)) {
795 ss << "mon." << name << " does not exist";
796 err = -ENOENT;
797 goto reply;
798 }
799 err = 0;
800 pending_map.set_weight(name, weight);
801 pending_map.last_changed = ceph_clock_now();
802 propose = true;
11fdf7f2
TL
803 } else if (prefix == "mon enable-msgr2") {
804 if (!monmap.get_required_features().contains_all(
805 ceph::features::mon::FEATURE_NAUTILUS)) {
806 err = -EACCES;
807 ss << "all monitors must be running nautilus to enable v2";
808 goto reply;
809 }
810 for (auto& i : pending_map.mon_info) {
811 if (i.second.public_addrs.v.size() == 1 &&
812 i.second.public_addrs.front().is_legacy() &&
813 i.second.public_addrs.front().get_port() == CEPH_MON_PORT_LEGACY) {
814 entity_addrvec_t av;
815 entity_addr_t a = i.second.public_addrs.front();
816 a.set_type(entity_addr_t::TYPE_MSGR2);
817 a.set_port(CEPH_MON_PORT_IANA);
818 av.v.push_back(a);
819 av.v.push_back(i.second.public_addrs.front());
820 dout(10) << " setting mon." << i.first
821 << " addrs " << i.second.public_addrs
822 << " -> " << av << dendl;
823 pending_map.set_addrvec(i.first, av);
824 propose = true;
825 pending_map.last_changed = ceph_clock_now();
826 }
827 }
828 err = 0;
7c673cae
FG
829 } else {
830 ss << "unknown command " << prefix;
831 err = -EINVAL;
832 }
833
834reply:
835 getline(ss, rs);
836 mon->reply_command(op, err, rs, get_last_committed());
837 // we are returning to the user; do not propose.
838 return propose;
839}
840
841bool MonmapMonitor::preprocess_join(MonOpRequestRef op)
842{
9f95a23c 843 auto join = op->get_req<MMonJoin>();
11fdf7f2 844 dout(10) << __func__ << " " << join->name << " at " << join->addrs << dendl;
7c673cae 845
11fdf7f2 846 MonSession *session = op->get_session();
7c673cae
FG
847 if (!session ||
848 !session->is_capable("mon", MON_CAP_W | MON_CAP_X)) {
849 dout(10) << " insufficient caps" << dendl;
850 return true;
851 }
852
11fdf7f2
TL
853 if (pending_map.contains(join->name) &&
854 !pending_map.get_addrs(join->name).front().is_blank_ip()) {
7c673cae
FG
855 dout(10) << " already have " << join->name << dendl;
856 return true;
857 }
11fdf7f2
TL
858 if (pending_map.contains(join->addrs) &&
859 pending_map.get_name(join->addrs) == join->name) {
860 dout(10) << " already have " << join->addrs << dendl;
7c673cae
FG
861 return true;
862 }
863 return false;
864}
865bool MonmapMonitor::prepare_join(MonOpRequestRef op)
866{
9f95a23c 867 auto join = op->get_req<MMonJoin>();
11fdf7f2
TL
868 dout(0) << "adding/updating " << join->name
869 << " at " << join->addrs << " to monitor cluster" << dendl;
7c673cae
FG
870 if (pending_map.contains(join->name))
871 pending_map.remove(join->name);
11fdf7f2
TL
872 if (pending_map.contains(join->addrs))
873 pending_map.remove(pending_map.get_name(join->addrs));
874 pending_map.add(join->name, join->addrs);
7c673cae
FG
875 pending_map.last_changed = ceph_clock_now();
876 return true;
877}
878
879bool MonmapMonitor::should_propose(double& delay)
880{
881 delay = 0.0;
882 return true;
883}
884
7c673cae
FG
885int MonmapMonitor::get_monmap(bufferlist &bl)
886{
887 version_t latest_ver = get_last_committed();
888 dout(10) << __func__ << " ver " << latest_ver << dendl;
889
890 if (!mon->store->exists(get_service_name(), stringify(latest_ver)))
891 return -ENOENT;
892
893 int err = get_version(latest_ver, bl);
894 if (err < 0) {
895 dout(1) << __func__ << " error obtaining monmap: "
896 << cpp_strerror(err) << dendl;
897 return err;
898 }
899 return 0;
900}
901
902void MonmapMonitor::check_subs()
903{
904 const string type = "monmap";
905 mon->with_session_map([this, &type](const MonSessionMap& session_map) {
906 auto subs = session_map.subs.find(type);
907 if (subs == session_map.subs.end())
908 return;
909 for (auto sub : *subs->second) {
910 check_sub(sub);
911 }
912 });
913}
914
915void MonmapMonitor::check_sub(Subscription *sub)
916{
917 const auto epoch = mon->monmap->get_epoch();
918 dout(10) << __func__
919 << " monmap next " << sub->next
920 << " have " << epoch << dendl;
921 if (sub->next <= epoch) {
922 mon->send_latest_monmap(sub->session->con.get());
923 if (sub->onetime) {
11fdf7f2 924 mon->with_session_map([sub](MonSessionMap& session_map) {
7c673cae
FG
925 session_map.remove_sub(sub);
926 });
927 } else {
928 sub->next = epoch + 1;
929 }
930 }
931}
11fdf7f2
TL
932
933void MonmapMonitor::tick()
934{
935 if (!is_active() ||
936 !mon->is_leader()) {
937 return;
938 }
939
940 if (mon->monmap->created.is_zero()) {
941 dout(10) << __func__ << " detected empty created stamp" << dendl;
942 utime_t ctime;
943 for (version_t v = 1; v <= get_last_committed(); v++) {
944 bufferlist bl;
945 int r = get_version(v, bl);
946 if (r < 0) {
947 continue;
948 }
949 MonMap m;
950 auto p = bl.cbegin();
951 decode(m, p);
952 if (!m.last_changed.is_zero()) {
953 dout(10) << __func__ << " first monmap with last_changed is "
954 << v << " with " << m.last_changed << dendl;
955 ctime = m.last_changed;
956 break;
957 }
958 }
959 if (ctime.is_zero()) {
960 ctime = ceph_clock_now();
961 }
962 dout(10) << __func__ << " updating created stamp to " << ctime << dendl;
963 pending_map.created = ctime;
964 propose_pending();
965 }
966}