]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
2 | // vim: ts=8 sw=2 smarttab | |
3 | /* | |
4 | * Ceph - scalable distributed file system | |
5 | * | |
6 | * Copyright (C) 2009 Sage Weil <sage@newdream.net> | |
7 | * | |
8 | * This is free software; you can redistribute it and/or | |
9 | * modify it under the terms of the GNU Lesser General Public | |
10 | * License version 2.1, as published by the Free Software | |
11 | * Foundation. See file COPYING. | |
12 | * | |
13 | */ | |
14 | ||
15 | #include "MonmapMonitor.h" | |
16 | #include "Monitor.h" | |
17 | #include "messages/MMonCommand.h" | |
18 | #include "messages/MMonJoin.h" | |
19 | ||
20 | #include "common/ceph_argparse.h" | |
21 | #include "common/errno.h" | |
22 | #include <sstream> | |
23 | #include "common/config.h" | |
24 | #include "common/cmdparse.h" | |
25 | ||
26 | #include "include/assert.h" | |
27 | #include "include/stringify.h" | |
28 | ||
29 | #define dout_subsys ceph_subsys_mon | |
30 | #undef dout_prefix | |
31 | #define dout_prefix _prefix(_dout, mon) | |
32 | static ostream& _prefix(std::ostream *_dout, Monitor *mon) { | |
33 | return *_dout << "mon." << mon->name << "@" << mon->rank | |
34 | << "(" << mon->get_state_name() | |
35 | << ").monmap v" << mon->monmap->epoch << " "; | |
36 | } | |
37 | ||
38 | void MonmapMonitor::create_initial() | |
39 | { | |
224ce89b | 40 | dout(10) << __func__ << " using current monmap" << dendl; |
7c673cae FG |
41 | pending_map = *mon->monmap; |
42 | pending_map.epoch = 1; | |
31f18b77 FG |
43 | |
44 | if (g_conf->mon_debug_no_initial_persistent_features) { | |
45 | derr << __func__ << " mon_debug_no_initial_persistent_features=true" | |
46 | << dendl; | |
47 | } else { | |
48 | // initialize with default persistent features for new clusters | |
49 | pending_map.persistent_features = ceph::features::mon::get_persistent(); | |
50 | } | |
7c673cae FG |
51 | } |
52 | ||
53 | void MonmapMonitor::update_from_paxos(bool *need_bootstrap) | |
54 | { | |
55 | version_t version = get_last_committed(); | |
56 | if (version <= mon->monmap->get_epoch()) | |
57 | return; | |
58 | ||
59 | dout(10) << __func__ << " version " << version | |
60 | << ", my v " << mon->monmap->epoch << dendl; | |
61 | ||
62 | if (need_bootstrap && version != mon->monmap->get_epoch()) { | |
63 | dout(10) << " signaling that we need a bootstrap" << dendl; | |
64 | *need_bootstrap = true; | |
65 | } | |
66 | ||
67 | // read and decode | |
68 | monmap_bl.clear(); | |
69 | int ret = get_version(version, monmap_bl); | |
70 | assert(ret == 0); | |
71 | assert(monmap_bl.length()); | |
72 | ||
224ce89b | 73 | dout(10) << __func__ << " got " << version << dendl; |
7c673cae FG |
74 | mon->monmap->decode(monmap_bl); |
75 | ||
76 | if (mon->store->exists("mkfs", "monmap")) { | |
77 | auto t(std::make_shared<MonitorDBStore::Transaction>()); | |
78 | t->erase("mkfs", "monmap"); | |
79 | mon->store->apply_transaction(t); | |
80 | } | |
81 | ||
82 | check_subs(); | |
83 | } | |
84 | ||
85 | void MonmapMonitor::create_pending() | |
86 | { | |
87 | pending_map = *mon->monmap; | |
88 | pending_map.epoch++; | |
89 | pending_map.last_changed = ceph_clock_now(); | |
224ce89b | 90 | dout(10) << __func__ << " monmap epoch " << pending_map.epoch << dendl; |
7c673cae FG |
91 | } |
92 | ||
93 | void MonmapMonitor::encode_pending(MonitorDBStore::TransactionRef t) | |
94 | { | |
224ce89b | 95 | dout(10) << __func__ << " epoch " << pending_map.epoch << dendl; |
7c673cae FG |
96 | |
97 | assert(mon->monmap->epoch + 1 == pending_map.epoch || | |
98 | pending_map.epoch == 1); // special case mkfs! | |
99 | bufferlist bl; | |
100 | pending_map.encode(bl, mon->get_quorum_con_features()); | |
101 | ||
102 | put_version(t, pending_map.epoch, bl); | |
103 | put_last_committed(t, pending_map.epoch); | |
104 | ||
105 | // generate a cluster fingerprint, too? | |
106 | if (pending_map.epoch == 1) { | |
107 | mon->prepare_new_fingerprint(t); | |
108 | } | |
109 | } | |
110 | ||
111 | class C_ApplyFeatures : public Context { | |
112 | MonmapMonitor *svc; | |
113 | mon_feature_t features; | |
114 | public: | |
115 | C_ApplyFeatures(MonmapMonitor *s, const mon_feature_t& f) : | |
116 | svc(s), features(f) { } | |
117 | void finish(int r) override { | |
118 | if (r >= 0) { | |
119 | svc->apply_mon_features(features); | |
120 | } else if (r == -EAGAIN || r == -ECANCELED) { | |
121 | // discard features if we're no longer on the quorum that | |
122 | // established them in the first place. | |
123 | return; | |
124 | } else { | |
125 | assert(0 == "bad C_ApplyFeatures return value"); | |
126 | } | |
127 | } | |
128 | }; | |
129 | ||
130 | void MonmapMonitor::apply_mon_features(const mon_feature_t& features) | |
131 | { | |
132 | if (!is_writeable()) { | |
133 | dout(5) << __func__ << " wait for service to be writeable" << dendl; | |
134 | wait_for_writeable_ctx(new C_ApplyFeatures(this, features)); | |
135 | return; | |
136 | } | |
137 | ||
138 | assert(is_writeable()); | |
139 | assert(features.contains_all(pending_map.persistent_features)); | |
140 | // we should never hit this because `features` should be the result | |
141 | // of the quorum's supported features. But if it happens, die. | |
142 | assert(ceph::features::mon::get_supported().contains_all(features)); | |
143 | ||
144 | mon_feature_t new_features = | |
145 | (pending_map.persistent_features ^ | |
146 | (features & ceph::features::mon::get_persistent())); | |
147 | ||
148 | if (new_features.empty()) { | |
149 | dout(10) << __func__ << " features match current pending: " | |
150 | << features << dendl; | |
151 | return; | |
152 | } | |
153 | ||
154 | if (mon->get_quorum().size() < mon->monmap->size()) { | |
155 | dout(1) << __func__ << " new features " << new_features | |
156 | << " contains features that require a full quorum" | |
157 | << " (quorum size is " << mon->get_quorum().size() | |
158 | << ", requires " << mon->monmap->size() << "): " | |
159 | << new_features | |
160 | << " -- do not enable them!" << dendl; | |
161 | return; | |
162 | } | |
163 | ||
164 | new_features |= pending_map.persistent_features; | |
165 | ||
166 | dout(5) << __func__ << " applying new features to monmap;" | |
167 | << " had " << pending_map.persistent_features | |
168 | << ", will have " << new_features << dendl; | |
169 | pending_map.persistent_features = new_features; | |
170 | propose_pending(); | |
171 | } | |
172 | ||
173 | void MonmapMonitor::on_active() | |
174 | { | |
175 | if (get_last_committed() >= 1 && !mon->has_ever_joined) { | |
176 | // make note of the fact that i was, once, part of the quorum. | |
177 | dout(10) << "noting that i was, once, part of an active quorum." << dendl; | |
178 | ||
179 | /* This is some form of nasty in-breeding we have between the MonmapMonitor | |
180 | and the Monitor itself. We should find a way to get rid of it given our | |
181 | new architecture. Until then, stick with it since we are a | |
182 | single-threaded process and, truth be told, no one else relies on this | |
183 | thing besides us. | |
184 | */ | |
185 | auto t(std::make_shared<MonitorDBStore::Transaction>()); | |
186 | t->put(Monitor::MONITOR_NAME, "joined", 1); | |
187 | mon->store->apply_transaction(t); | |
188 | mon->has_ever_joined = true; | |
189 | } | |
190 | ||
191 | if (mon->is_leader()) | |
192 | mon->clog->info() << "monmap " << *mon->monmap; | |
193 | ||
194 | apply_mon_features(mon->get_quorum_mon_features()); | |
195 | } | |
196 | ||
197 | bool MonmapMonitor::preprocess_query(MonOpRequestRef op) | |
198 | { | |
199 | PaxosServiceMessage *m = static_cast<PaxosServiceMessage*>(op->get_req()); | |
200 | switch (m->get_type()) { | |
201 | // READs | |
202 | case MSG_MON_COMMAND: | |
203 | return preprocess_command(op); | |
204 | case MSG_MON_JOIN: | |
205 | return preprocess_join(op); | |
206 | default: | |
207 | ceph_abort(); | |
208 | return true; | |
209 | } | |
210 | } | |
211 | ||
212 | void MonmapMonitor::dump_info(Formatter *f) | |
213 | { | |
214 | f->dump_unsigned("monmap_first_committed", get_first_committed()); | |
215 | f->dump_unsigned("monmap_last_committed", get_last_committed()); | |
216 | f->open_object_section("monmap"); | |
217 | mon->monmap->dump(f); | |
218 | f->close_section(); | |
219 | f->open_array_section("quorum"); | |
220 | for (set<int>::iterator q = mon->get_quorum().begin(); q != mon->get_quorum().end(); ++q) | |
221 | f->dump_int("mon", *q); | |
222 | f->close_section(); | |
223 | } | |
224 | ||
225 | bool MonmapMonitor::preprocess_command(MonOpRequestRef op) | |
226 | { | |
227 | MMonCommand *m = static_cast<MMonCommand*>(op->get_req()); | |
228 | int r = -1; | |
229 | bufferlist rdata; | |
230 | stringstream ss; | |
231 | ||
232 | map<string, cmd_vartype> cmdmap; | |
233 | if (!cmdmap_from_json(m->cmd, &cmdmap, ss)) { | |
234 | string rs = ss.str(); | |
235 | mon->reply_command(op, -EINVAL, rs, rdata, get_last_committed()); | |
236 | return true; | |
237 | } | |
238 | ||
239 | string prefix; | |
240 | cmd_getval(g_ceph_context, cmdmap, "prefix", prefix); | |
241 | ||
242 | MonSession *session = m->get_session(); | |
243 | if (!session) { | |
244 | mon->reply_command(op, -EACCES, "access denied", get_last_committed()); | |
245 | return true; | |
246 | } | |
247 | ||
248 | string format; | |
249 | cmd_getval(g_ceph_context, cmdmap, "format", format, string("plain")); | |
250 | boost::scoped_ptr<Formatter> f(Formatter::create(format)); | |
251 | ||
252 | if (prefix == "mon stat") { | |
253 | mon->monmap->print_summary(ss); | |
224ce89b WB |
254 | ss << ", election epoch " << mon->get_epoch() << ", leader " |
255 | << mon->get_leader() << " " << mon->get_leader_name() | |
256 | << ", quorum " << mon->get_quorum() << " " << mon->get_quorum_names(); | |
7c673cae FG |
257 | rdata.append(ss); |
258 | ss.str(""); | |
259 | r = 0; | |
260 | ||
261 | } else if (prefix == "mon getmap" || | |
262 | prefix == "mon dump") { | |
263 | ||
264 | epoch_t epoch; | |
265 | int64_t epochnum; | |
266 | cmd_getval(g_ceph_context, cmdmap, "epoch", epochnum, (int64_t)0); | |
267 | epoch = epochnum; | |
268 | ||
269 | MonMap *p = mon->monmap; | |
270 | if (epoch) { | |
271 | bufferlist bl; | |
272 | r = get_version(epoch, bl); | |
273 | if (r == -ENOENT) { | |
274 | ss << "there is no map for epoch " << epoch; | |
275 | goto reply; | |
276 | } | |
277 | assert(r == 0); | |
278 | assert(bl.length() > 0); | |
279 | p = new MonMap; | |
280 | p->decode(bl); | |
281 | } | |
282 | ||
283 | assert(p != NULL); | |
284 | ||
285 | if (prefix == "mon getmap") { | |
286 | p->encode(rdata, m->get_connection()->get_features()); | |
287 | r = 0; | |
288 | ss << "got monmap epoch " << p->get_epoch(); | |
289 | } else if (prefix == "mon dump") { | |
290 | stringstream ds; | |
291 | if (f) { | |
292 | f->open_object_section("monmap"); | |
293 | p->dump(f.get()); | |
294 | f->open_array_section("quorum"); | |
295 | for (set<int>::iterator q = mon->get_quorum().begin(); | |
296 | q != mon->get_quorum().end(); ++q) { | |
297 | f->dump_int("mon", *q); | |
298 | } | |
299 | f->close_section(); | |
300 | f->close_section(); | |
301 | f->flush(ds); | |
302 | r = 0; | |
303 | } else { | |
304 | p->print(ds); | |
305 | r = 0; | |
306 | } | |
307 | rdata.append(ds); | |
308 | ss << "dumped monmap epoch " << p->get_epoch(); | |
309 | } | |
310 | if (p != mon->monmap) | |
311 | delete p; | |
312 | ||
224ce89b | 313 | } else if (prefix == "mon feature ls") { |
7c673cae FG |
314 | |
315 | bool list_with_value = false; | |
316 | string with_value; | |
317 | if (cmd_getval(g_ceph_context, cmdmap, "with_value", with_value) && | |
318 | with_value == "--with-value") { | |
319 | list_with_value = true; | |
320 | } | |
321 | ||
322 | MonMap *p = mon->monmap; | |
323 | ||
324 | // list features | |
325 | mon_feature_t supported = ceph::features::mon::get_supported(); | |
326 | mon_feature_t persistent = ceph::features::mon::get_persistent(); | |
327 | mon_feature_t required = p->get_required_features(); | |
328 | ||
329 | stringstream ds; | |
330 | auto print_feature = [&](mon_feature_t& m_features, const char* m_str) { | |
331 | if (f) { | |
332 | if (list_with_value) | |
333 | m_features.dump_with_value(f.get(), m_str); | |
334 | else | |
335 | m_features.dump(f.get(), m_str); | |
336 | } else { | |
337 | if (list_with_value) | |
338 | m_features.print_with_value(ds); | |
339 | else | |
340 | m_features.print(ds); | |
341 | } | |
342 | }; | |
343 | ||
344 | if (f) { | |
345 | f->open_object_section("features"); | |
346 | ||
347 | f->open_object_section("all"); | |
348 | print_feature(supported, "supported"); | |
349 | print_feature(persistent, "persistent"); | |
350 | f->close_section(); // all | |
351 | ||
352 | f->open_object_section("monmap"); | |
353 | print_feature(p->persistent_features, "persistent"); | |
354 | print_feature(p->optional_features, "optional"); | |
355 | print_feature(required, "required"); | |
356 | f->close_section(); // monmap | |
357 | ||
358 | f->close_section(); // features | |
359 | f->flush(ds); | |
360 | ||
361 | } else { | |
362 | ds << "all features" << std::endl | |
363 | << "\tsupported: "; | |
364 | print_feature(supported, nullptr); | |
365 | ds << std::endl | |
366 | << "\tpersistent: "; | |
367 | print_feature(persistent, nullptr); | |
368 | ds << std::endl | |
369 | << std::endl; | |
370 | ||
371 | ds << "on current monmap (epoch " | |
372 | << p->get_epoch() << ")" << std::endl | |
373 | << "\tpersistent: "; | |
374 | print_feature(p->persistent_features, nullptr); | |
375 | ds << std::endl | |
376 | // omit optional features in plain-text | |
377 | // makes it easier to read, and they're, currently, empty. | |
378 | << "\trequired: "; | |
379 | print_feature(required, nullptr); | |
380 | ds << std::endl; | |
381 | } | |
382 | rdata.append(ds); | |
383 | r = 0; | |
384 | } | |
385 | ||
386 | reply: | |
387 | if (r != -1) { | |
388 | string rs; | |
389 | getline(ss, rs); | |
390 | ||
391 | mon->reply_command(op, r, rs, rdata, get_last_committed()); | |
392 | return true; | |
393 | } else | |
394 | return false; | |
395 | } | |
396 | ||
397 | ||
398 | bool MonmapMonitor::prepare_update(MonOpRequestRef op) | |
399 | { | |
400 | PaxosServiceMessage *m = static_cast<PaxosServiceMessage*>(op->get_req()); | |
224ce89b | 401 | dout(7) << __func__ << " " << *m << " from " << m->get_orig_source_inst() << dendl; |
7c673cae FG |
402 | |
403 | switch (m->get_type()) { | |
404 | case MSG_MON_COMMAND: | |
405 | return prepare_command(op); | |
406 | case MSG_MON_JOIN: | |
407 | return prepare_join(op); | |
408 | default: | |
409 | ceph_abort(); | |
410 | } | |
411 | ||
412 | return false; | |
413 | } | |
414 | ||
415 | bool MonmapMonitor::prepare_command(MonOpRequestRef op) | |
416 | { | |
417 | MMonCommand *m = static_cast<MMonCommand*>(op->get_req()); | |
418 | stringstream ss; | |
419 | string rs; | |
420 | int err = -EINVAL; | |
421 | ||
422 | map<string, cmd_vartype> cmdmap; | |
423 | if (!cmdmap_from_json(m->cmd, &cmdmap, ss)) { | |
424 | string rs = ss.str(); | |
425 | mon->reply_command(op, -EINVAL, rs, get_last_committed()); | |
426 | return true; | |
427 | } | |
428 | ||
429 | string prefix; | |
430 | cmd_getval(g_ceph_context, cmdmap, "prefix", prefix); | |
431 | ||
432 | MonSession *session = m->get_session(); | |
433 | if (!session) { | |
434 | mon->reply_command(op, -EACCES, "access denied", get_last_committed()); | |
435 | return true; | |
436 | } | |
437 | ||
438 | /* We should follow the following rules: | |
439 | * | |
440 | * - 'monmap' is the current, consistent version of the monmap | |
441 | * - 'pending_map' is the uncommitted version of the monmap | |
442 | * | |
443 | * All checks for the current state must be made against 'monmap'. | |
444 | * All changes are made against 'pending_map'. | |
445 | * | |
446 | * If there are concurrent operations modifying 'pending_map', please | |
447 | * follow the following rules. | |
448 | * | |
449 | * - if pending_map has already been changed, the second operation must | |
450 | * wait for the proposal to finish and be run again; This is the easiest | |
451 | * path to guarantee correctness but may impact performance (i.e., it | |
452 | * will take longer for the user to get a reply). | |
453 | * | |
454 | * - if the result of the second operation can be guaranteed to be | |
455 | * idempotent, the operation may reply to the user once the proposal | |
456 | * finishes; still needs to wait for the proposal to finish. | |
457 | * | |
458 | * - An operation _NEVER_ returns to the user based on pending state. | |
459 | * | |
460 | * If an operation does not modify current stable monmap, it may be | |
461 | * serialized before current pending map, regardless of any change that | |
462 | * has been made to the pending map -- remember, pending is uncommitted | |
463 | * state, thus we are not bound by it. | |
464 | */ | |
465 | ||
466 | assert(mon->monmap); | |
467 | MonMap &monmap = *mon->monmap; | |
468 | ||
469 | ||
470 | /* Please note: | |
471 | * | |
472 | * Adding or removing monitors may lead to loss of quorum. | |
473 | * | |
474 | * Because quorum may be lost, it's important to reply something | |
475 | * to the user, lest she end up waiting forever for a reply. And | |
476 | * no reply will ever be sent until quorum is formed again. | |
477 | * | |
478 | * On the other hand, this means we're leaking uncommitted state | |
479 | * to the user. As such, please be mindful of the reply message. | |
480 | * | |
481 | * e.g., 'adding monitor mon.foo' is okay ('adding' is an on-going | |
482 | * operation and conveys its not-yet-permanent nature); whereas | |
483 | * 'added monitor mon.foo' presumes the action has successfully | |
484 | * completed and state has been committed, which may not be true. | |
485 | */ | |
486 | ||
487 | ||
488 | bool propose = false; | |
489 | if (prefix == "mon add") { | |
490 | string name; | |
491 | cmd_getval(g_ceph_context, cmdmap, "name", name); | |
492 | string addrstr; | |
493 | cmd_getval(g_ceph_context, cmdmap, "addr", addrstr); | |
494 | entity_addr_t addr; | |
495 | bufferlist rdata; | |
496 | ||
497 | if (!addr.parse(addrstr.c_str())) { | |
498 | err = -EINVAL; | |
499 | ss << "addr " << addrstr << "does not parse"; | |
500 | goto reply; | |
501 | } | |
502 | ||
503 | if (addr.get_port() == 0) { | |
504 | ss << "port defaulted to " << CEPH_MON_PORT; | |
505 | addr.set_port(CEPH_MON_PORT); | |
506 | } | |
507 | ||
508 | /** | |
509 | * If we have a monitor with the same name and different addr, then EEXIST | |
510 | * If we have a monitor with the same addr and different name, then EEXIST | |
511 | * If we have a monitor with the same addr and same name, then wait for | |
512 | * the proposal to finish and return success. | |
513 | * If we don't have the monitor, add it. | |
514 | */ | |
515 | ||
516 | err = 0; | |
517 | if (!ss.str().empty()) | |
518 | ss << "; "; | |
519 | ||
520 | do { | |
521 | if (monmap.contains(name)) { | |
522 | if (monmap.get_addr(name) == addr) { | |
523 | // stable map contains monitor with the same name at the same address. | |
524 | // serialize before current pending map. | |
525 | err = 0; // for clarity; this has already been set above. | |
526 | ss << "mon." << name << " at " << addr << " already exists"; | |
527 | goto reply; | |
528 | } else { | |
529 | ss << "mon." << name | |
530 | << " already exists at address " << monmap.get_addr(name); | |
531 | } | |
532 | } else if (monmap.contains(addr)) { | |
533 | // we established on the previous branch that name is different | |
534 | ss << "mon." << monmap.get_name(addr) | |
535 | << " already exists at address " << addr; | |
536 | } else { | |
537 | // go ahead and add | |
538 | break; | |
539 | } | |
540 | err = -EEXIST; | |
541 | goto reply; | |
542 | } while (false); | |
543 | ||
544 | /* Given there's no delay between proposals on the MonmapMonitor (see | |
545 | * MonmapMonitor::should_propose()), there is no point in checking for | |
546 | * a mismatch between name and addr on pending_map. | |
547 | * | |
548 | * Once we established the monitor does not exist in the committed state, | |
549 | * we can simply go ahead and add the monitor. | |
550 | */ | |
551 | ||
552 | pending_map.add(name, addr); | |
553 | pending_map.last_changed = ceph_clock_now(); | |
554 | ss << "adding mon." << name << " at " << addr; | |
555 | propose = true; | |
556 | dout(0) << __func__ << " proposing new mon." << name << dendl; | |
557 | ||
558 | } else if (prefix == "mon remove" || | |
559 | prefix == "mon rm") { | |
560 | string name; | |
561 | cmd_getval(g_ceph_context, cmdmap, "name", name); | |
562 | if (!monmap.contains(name)) { | |
563 | err = 0; | |
564 | ss << "mon." << name << " does not exist or has already been removed"; | |
565 | goto reply; | |
566 | } | |
567 | ||
568 | if (monmap.size() == 1) { | |
569 | err = -EINVAL; | |
570 | ss << "error: refusing removal of last monitor " << name; | |
571 | goto reply; | |
572 | } | |
573 | ||
574 | /* At the time of writing, there is no risk of races when multiple clients | |
575 | * attempt to use the same name. The reason is simple but may not be | |
576 | * obvious. | |
577 | * | |
578 | * In a nutshell, we do not collate proposals on the MonmapMonitor. As | |
579 | * soon as we return 'true' below, PaxosService::dispatch() will check if | |
580 | * the service should propose, and - if so - the service will be marked as | |
581 | * 'proposing' and a proposal will be triggered. The PaxosService class | |
582 | * guarantees that once a service is marked 'proposing' no further writes | |
583 | * will be handled. | |
584 | * | |
585 | * The decision on whether the service should propose or not is, in this | |
586 | * case, made by MonmapMonitor::should_propose(), which always considers | |
587 | * the proposal delay being 0.0 seconds. This is key for PaxosService to | |
588 | * trigger the proposal immediately. | |
589 | * 0.0 seconds of delay. | |
590 | * | |
591 | * From the above, there's no point in performing further checks on the | |
592 | * pending_map, as we don't ever have multiple proposals in-flight in | |
593 | * this service. As we've established the committed state contains the | |
594 | * monitor, we can simply go ahead and remove it. | |
595 | * | |
596 | * Please note that the code hinges on all of the above to be true. It | |
597 | * has been true since time immemorial and we don't see a good reason | |
598 | * to make it sturdier at this time - mainly because we don't think it's | |
599 | * going to change any time soon, lest for any bug that may be unwillingly | |
600 | * introduced. | |
601 | */ | |
602 | ||
603 | entity_addr_t addr = pending_map.get_addr(name); | |
604 | pending_map.remove(name); | |
605 | pending_map.last_changed = ceph_clock_now(); | |
606 | ss << "removing mon." << name << " at " << addr | |
607 | << ", there will be " << pending_map.size() << " monitors" ; | |
608 | propose = true; | |
609 | err = 0; | |
610 | ||
611 | } else if (prefix == "mon feature set") { | |
612 | ||
613 | /* PLEASE NOTE: | |
614 | * | |
615 | * We currently only support setting/unsetting persistent features. | |
616 | * This is by design, given at the moment we still don't have optional | |
617 | * features, and, as such, there is no point introducing an interface | |
618 | * to manipulate them. This allows us to provide a cleaner, more | |
619 | * intuitive interface to the user, modifying solely persistent | |
620 | * features. | |
621 | * | |
622 | * In the future we should consider adding another interface to handle | |
623 | * optional features/flags; e.g., 'mon feature flag set/unset', or | |
624 | * 'mon flag set/unset'. | |
625 | */ | |
626 | string feature_name; | |
627 | if (!cmd_getval(g_ceph_context, cmdmap, "feature_name", feature_name)) { | |
628 | ss << "missing required feature name"; | |
629 | err = -EINVAL; | |
630 | goto reply; | |
631 | } | |
632 | ||
633 | mon_feature_t feature; | |
634 | feature = ceph::features::mon::get_feature_by_name(feature_name); | |
635 | if (feature == ceph::features::mon::FEATURE_NONE) { | |
636 | ss << "unknown feature '" << feature_name << "'"; | |
637 | err = -ENOENT; | |
638 | goto reply; | |
639 | } | |
640 | ||
641 | string sure; | |
642 | if (!cmd_getval(g_ceph_context, cmdmap, "sure", sure) || | |
643 | sure != "--yes-i-really-mean-it") { | |
644 | ss << "please specify '--yes-i-really-mean-it' if you " | |
645 | << "really, **really** want to set feature '" | |
646 | << feature << "' in the monmap."; | |
647 | err = -EPERM; | |
648 | goto reply; | |
649 | } | |
650 | ||
651 | if (!mon->get_quorum_mon_features().contains_all(feature)) { | |
652 | ss << "current quorum does not support feature '" << feature | |
653 | << "'; supported features: " | |
654 | << mon->get_quorum_mon_features(); | |
655 | err = -EINVAL; | |
656 | goto reply; | |
657 | } | |
658 | ||
659 | ss << "setting feature '" << feature << "'"; | |
660 | ||
661 | err = 0; | |
662 | if (monmap.persistent_features.contains_all(feature)) { | |
663 | dout(10) << __func__ << " feature '" << feature | |
664 | << "' already set on monmap; no-op." << dendl; | |
665 | goto reply; | |
666 | } | |
667 | ||
668 | pending_map.persistent_features.set_feature(feature); | |
669 | pending_map.last_changed = ceph_clock_now(); | |
670 | propose = true; | |
671 | ||
672 | dout(1) << __func__ << ss.str() << "; new features will be: " | |
673 | << "persistent = " << pending_map.persistent_features | |
674 | // output optional nevertheless, for auditing purposes. | |
675 | << ", optional = " << pending_map.optional_features << dendl; | |
676 | ||
677 | } else { | |
678 | ss << "unknown command " << prefix; | |
679 | err = -EINVAL; | |
680 | } | |
681 | ||
682 | reply: | |
683 | getline(ss, rs); | |
684 | mon->reply_command(op, err, rs, get_last_committed()); | |
685 | // we are returning to the user; do not propose. | |
686 | return propose; | |
687 | } | |
688 | ||
689 | bool MonmapMonitor::preprocess_join(MonOpRequestRef op) | |
690 | { | |
691 | MMonJoin *join = static_cast<MMonJoin*>(op->get_req()); | |
224ce89b | 692 | dout(10) << __func__ << " " << join->name << " at " << join->addr << dendl; |
7c673cae FG |
693 | |
694 | MonSession *session = join->get_session(); | |
695 | if (!session || | |
696 | !session->is_capable("mon", MON_CAP_W | MON_CAP_X)) { | |
697 | dout(10) << " insufficient caps" << dendl; | |
698 | return true; | |
699 | } | |
700 | ||
701 | if (pending_map.contains(join->name) && !pending_map.get_addr(join->name).is_blank_ip()) { | |
702 | dout(10) << " already have " << join->name << dendl; | |
703 | return true; | |
704 | } | |
705 | if (pending_map.contains(join->addr) && pending_map.get_name(join->addr) == join->name) { | |
706 | dout(10) << " already have " << join->addr << dendl; | |
707 | return true; | |
708 | } | |
709 | return false; | |
710 | } | |
711 | bool MonmapMonitor::prepare_join(MonOpRequestRef op) | |
712 | { | |
713 | MMonJoin *join = static_cast<MMonJoin*>(op->get_req()); | |
714 | dout(0) << "adding/updating " << join->name << " at " << join->addr << " to monitor cluster" << dendl; | |
715 | if (pending_map.contains(join->name)) | |
716 | pending_map.remove(join->name); | |
717 | if (pending_map.contains(join->addr)) | |
718 | pending_map.remove(pending_map.get_name(join->addr)); | |
719 | pending_map.add(join->name, join->addr); | |
720 | pending_map.last_changed = ceph_clock_now(); | |
721 | return true; | |
722 | } | |
723 | ||
724 | bool MonmapMonitor::should_propose(double& delay) | |
725 | { | |
726 | delay = 0.0; | |
727 | return true; | |
728 | } | |
729 | ||
7c673cae FG |
730 | void MonmapMonitor::get_health(list<pair<health_status_t, string> >& summary, |
731 | list<pair<health_status_t, string> > *detail, | |
732 | CephContext *cct) const | |
733 | { | |
734 | int max = mon->monmap->size(); | |
735 | int actual = mon->get_quorum().size(); | |
736 | if (actual < max) { | |
737 | ostringstream ss; | |
738 | ss << (max-actual) << " mons down, quorum " << mon->get_quorum() << " " << mon->get_quorum_names(); | |
739 | summary.push_back(make_pair(HEALTH_WARN, ss.str())); | |
740 | if (detail) { | |
741 | set<int> q = mon->get_quorum(); | |
742 | for (int i=0; i<max; i++) { | |
743 | if (q.count(i) == 0) { | |
744 | ostringstream ss; | |
745 | ss << "mon." << mon->monmap->get_name(i) << " (rank " << i | |
746 | << ") addr " << mon->monmap->get_addr(i) | |
747 | << " is down (out of quorum)"; | |
748 | detail->push_back(make_pair(HEALTH_WARN, ss.str())); | |
749 | } | |
750 | } | |
751 | } | |
752 | } | |
753 | } | |
754 | ||
755 | int MonmapMonitor::get_monmap(bufferlist &bl) | |
756 | { | |
757 | version_t latest_ver = get_last_committed(); | |
758 | dout(10) << __func__ << " ver " << latest_ver << dendl; | |
759 | ||
760 | if (!mon->store->exists(get_service_name(), stringify(latest_ver))) | |
761 | return -ENOENT; | |
762 | ||
763 | int err = get_version(latest_ver, bl); | |
764 | if (err < 0) { | |
765 | dout(1) << __func__ << " error obtaining monmap: " | |
766 | << cpp_strerror(err) << dendl; | |
767 | return err; | |
768 | } | |
769 | return 0; | |
770 | } | |
771 | ||
772 | void MonmapMonitor::check_subs() | |
773 | { | |
774 | const string type = "monmap"; | |
775 | mon->with_session_map([this, &type](const MonSessionMap& session_map) { | |
776 | auto subs = session_map.subs.find(type); | |
777 | if (subs == session_map.subs.end()) | |
778 | return; | |
779 | for (auto sub : *subs->second) { | |
780 | check_sub(sub); | |
781 | } | |
782 | }); | |
783 | } | |
784 | ||
785 | void MonmapMonitor::check_sub(Subscription *sub) | |
786 | { | |
787 | const auto epoch = mon->monmap->get_epoch(); | |
788 | dout(10) << __func__ | |
789 | << " monmap next " << sub->next | |
790 | << " have " << epoch << dendl; | |
791 | if (sub->next <= epoch) { | |
792 | mon->send_latest_monmap(sub->session->con.get()); | |
793 | if (sub->onetime) { | |
794 | mon->with_session_map([this, sub](MonSessionMap& session_map) { | |
795 | session_map.remove_sub(sub); | |
796 | }); | |
797 | } else { | |
798 | sub->next = epoch + 1; | |
799 | } | |
800 | } | |
801 | } |