]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
2 | // vim: ts=8 sw=2 smarttab | |
3 | /* | |
4 | * Ceph - scalable distributed file system | |
5 | * | |
6 | * Copyright (C) 2009 Sage Weil <sage@newdream.net> | |
7 | * | |
8 | * This is free software; you can redistribute it and/or | |
9 | * modify it under the terms of the GNU Lesser General Public | |
10 | * License version 2.1, as published by the Free Software | |
11 | * Foundation. See file COPYING. | |
12 | * | |
13 | */ | |
14 | ||
15 | #include "MonmapMonitor.h" | |
16 | #include "Monitor.h" | |
17 | #include "messages/MMonCommand.h" | |
18 | #include "messages/MMonJoin.h" | |
19 | ||
20 | #include "common/ceph_argparse.h" | |
21 | #include "common/errno.h" | |
22 | #include <sstream> | |
23 | #include "common/config.h" | |
24 | #include "common/cmdparse.h" | |
25 | ||
26 | #include "include/assert.h" | |
27 | #include "include/stringify.h" | |
28 | ||
29 | #define dout_subsys ceph_subsys_mon | |
30 | #undef dout_prefix | |
31 | #define dout_prefix _prefix(_dout, mon) | |
32 | static ostream& _prefix(std::ostream *_dout, Monitor *mon) { | |
33 | return *_dout << "mon." << mon->name << "@" << mon->rank | |
34 | << "(" << mon->get_state_name() | |
35 | << ").monmap v" << mon->monmap->epoch << " "; | |
36 | } | |
37 | ||
38 | void MonmapMonitor::create_initial() | |
39 | { | |
224ce89b | 40 | dout(10) << __func__ << " using current monmap" << dendl; |
7c673cae FG |
41 | pending_map = *mon->monmap; |
42 | pending_map.epoch = 1; | |
31f18b77 FG |
43 | |
44 | if (g_conf->mon_debug_no_initial_persistent_features) { | |
45 | derr << __func__ << " mon_debug_no_initial_persistent_features=true" | |
46 | << dendl; | |
47 | } else { | |
48 | // initialize with default persistent features for new clusters | |
49 | pending_map.persistent_features = ceph::features::mon::get_persistent(); | |
50 | } | |
7c673cae FG |
51 | } |
52 | ||
53 | void MonmapMonitor::update_from_paxos(bool *need_bootstrap) | |
54 | { | |
55 | version_t version = get_last_committed(); | |
56 | if (version <= mon->monmap->get_epoch()) | |
57 | return; | |
58 | ||
59 | dout(10) << __func__ << " version " << version | |
60 | << ", my v " << mon->monmap->epoch << dendl; | |
61 | ||
62 | if (need_bootstrap && version != mon->monmap->get_epoch()) { | |
63 | dout(10) << " signaling that we need a bootstrap" << dendl; | |
64 | *need_bootstrap = true; | |
65 | } | |
66 | ||
67 | // read and decode | |
68 | monmap_bl.clear(); | |
69 | int ret = get_version(version, monmap_bl); | |
70 | assert(ret == 0); | |
71 | assert(monmap_bl.length()); | |
72 | ||
224ce89b | 73 | dout(10) << __func__ << " got " << version << dendl; |
7c673cae FG |
74 | mon->monmap->decode(monmap_bl); |
75 | ||
76 | if (mon->store->exists("mkfs", "monmap")) { | |
77 | auto t(std::make_shared<MonitorDBStore::Transaction>()); | |
78 | t->erase("mkfs", "monmap"); | |
79 | mon->store->apply_transaction(t); | |
80 | } | |
81 | ||
82 | check_subs(); | |
83 | } | |
84 | ||
85 | void MonmapMonitor::create_pending() | |
86 | { | |
87 | pending_map = *mon->monmap; | |
88 | pending_map.epoch++; | |
89 | pending_map.last_changed = ceph_clock_now(); | |
224ce89b | 90 | dout(10) << __func__ << " monmap epoch " << pending_map.epoch << dendl; |
7c673cae FG |
91 | } |
92 | ||
93 | void MonmapMonitor::encode_pending(MonitorDBStore::TransactionRef t) | |
94 | { | |
224ce89b | 95 | dout(10) << __func__ << " epoch " << pending_map.epoch << dendl; |
7c673cae FG |
96 | |
97 | assert(mon->monmap->epoch + 1 == pending_map.epoch || | |
98 | pending_map.epoch == 1); // special case mkfs! | |
99 | bufferlist bl; | |
100 | pending_map.encode(bl, mon->get_quorum_con_features()); | |
101 | ||
102 | put_version(t, pending_map.epoch, bl); | |
103 | put_last_committed(t, pending_map.epoch); | |
104 | ||
105 | // generate a cluster fingerprint, too? | |
106 | if (pending_map.epoch == 1) { | |
107 | mon->prepare_new_fingerprint(t); | |
108 | } | |
109 | } | |
110 | ||
111 | class C_ApplyFeatures : public Context { | |
112 | MonmapMonitor *svc; | |
113 | mon_feature_t features; | |
114 | public: | |
115 | C_ApplyFeatures(MonmapMonitor *s, const mon_feature_t& f) : | |
116 | svc(s), features(f) { } | |
117 | void finish(int r) override { | |
118 | if (r >= 0) { | |
119 | svc->apply_mon_features(features); | |
120 | } else if (r == -EAGAIN || r == -ECANCELED) { | |
121 | // discard features if we're no longer on the quorum that | |
122 | // established them in the first place. | |
123 | return; | |
124 | } else { | |
125 | assert(0 == "bad C_ApplyFeatures return value"); | |
126 | } | |
127 | } | |
128 | }; | |
129 | ||
130 | void MonmapMonitor::apply_mon_features(const mon_feature_t& features) | |
131 | { | |
132 | if (!is_writeable()) { | |
133 | dout(5) << __func__ << " wait for service to be writeable" << dendl; | |
134 | wait_for_writeable_ctx(new C_ApplyFeatures(this, features)); | |
135 | return; | |
136 | } | |
137 | ||
138 | assert(is_writeable()); | |
139 | assert(features.contains_all(pending_map.persistent_features)); | |
140 | // we should never hit this because `features` should be the result | |
141 | // of the quorum's supported features. But if it happens, die. | |
142 | assert(ceph::features::mon::get_supported().contains_all(features)); | |
143 | ||
144 | mon_feature_t new_features = | |
145 | (pending_map.persistent_features ^ | |
146 | (features & ceph::features::mon::get_persistent())); | |
147 | ||
148 | if (new_features.empty()) { | |
149 | dout(10) << __func__ << " features match current pending: " | |
150 | << features << dendl; | |
151 | return; | |
152 | } | |
153 | ||
154 | if (mon->get_quorum().size() < mon->monmap->size()) { | |
155 | dout(1) << __func__ << " new features " << new_features | |
156 | << " contains features that require a full quorum" | |
157 | << " (quorum size is " << mon->get_quorum().size() | |
158 | << ", requires " << mon->monmap->size() << "): " | |
159 | << new_features | |
160 | << " -- do not enable them!" << dendl; | |
161 | return; | |
162 | } | |
163 | ||
164 | new_features |= pending_map.persistent_features; | |
165 | ||
166 | dout(5) << __func__ << " applying new features to monmap;" | |
167 | << " had " << pending_map.persistent_features | |
168 | << ", will have " << new_features << dendl; | |
169 | pending_map.persistent_features = new_features; | |
170 | propose_pending(); | |
171 | } | |
172 | ||
173 | void MonmapMonitor::on_active() | |
174 | { | |
175 | if (get_last_committed() >= 1 && !mon->has_ever_joined) { | |
176 | // make note of the fact that i was, once, part of the quorum. | |
177 | dout(10) << "noting that i was, once, part of an active quorum." << dendl; | |
178 | ||
179 | /* This is some form of nasty in-breeding we have between the MonmapMonitor | |
180 | and the Monitor itself. We should find a way to get rid of it given our | |
181 | new architecture. Until then, stick with it since we are a | |
182 | single-threaded process and, truth be told, no one else relies on this | |
183 | thing besides us. | |
184 | */ | |
185 | auto t(std::make_shared<MonitorDBStore::Transaction>()); | |
186 | t->put(Monitor::MONITOR_NAME, "joined", 1); | |
187 | mon->store->apply_transaction(t); | |
188 | mon->has_ever_joined = true; | |
189 | } | |
190 | ||
b32b8144 FG |
191 | if (mon->is_leader()) { |
192 | mon->clog->debug() << "monmap " << *mon->monmap; | |
193 | } | |
7c673cae FG |
194 | |
195 | apply_mon_features(mon->get_quorum_mon_features()); | |
196 | } | |
197 | ||
198 | bool MonmapMonitor::preprocess_query(MonOpRequestRef op) | |
199 | { | |
200 | PaxosServiceMessage *m = static_cast<PaxosServiceMessage*>(op->get_req()); | |
201 | switch (m->get_type()) { | |
202 | // READs | |
203 | case MSG_MON_COMMAND: | |
f64942e4 AA |
204 | try { |
205 | return preprocess_command(op); | |
206 | } | |
207 | catch (const bad_cmd_get& e) { | |
208 | bufferlist bl; | |
209 | mon->reply_command(op, -EINVAL, e.what(), bl, get_last_committed()); | |
210 | return true; | |
211 | } | |
7c673cae FG |
212 | case MSG_MON_JOIN: |
213 | return preprocess_join(op); | |
214 | default: | |
215 | ceph_abort(); | |
216 | return true; | |
217 | } | |
218 | } | |
219 | ||
220 | void MonmapMonitor::dump_info(Formatter *f) | |
221 | { | |
222 | f->dump_unsigned("monmap_first_committed", get_first_committed()); | |
223 | f->dump_unsigned("monmap_last_committed", get_last_committed()); | |
224 | f->open_object_section("monmap"); | |
225 | mon->monmap->dump(f); | |
226 | f->close_section(); | |
227 | f->open_array_section("quorum"); | |
228 | for (set<int>::iterator q = mon->get_quorum().begin(); q != mon->get_quorum().end(); ++q) | |
229 | f->dump_int("mon", *q); | |
230 | f->close_section(); | |
231 | } | |
232 | ||
233 | bool MonmapMonitor::preprocess_command(MonOpRequestRef op) | |
234 | { | |
235 | MMonCommand *m = static_cast<MMonCommand*>(op->get_req()); | |
236 | int r = -1; | |
237 | bufferlist rdata; | |
238 | stringstream ss; | |
239 | ||
240 | map<string, cmd_vartype> cmdmap; | |
241 | if (!cmdmap_from_json(m->cmd, &cmdmap, ss)) { | |
242 | string rs = ss.str(); | |
243 | mon->reply_command(op, -EINVAL, rs, rdata, get_last_committed()); | |
244 | return true; | |
245 | } | |
246 | ||
247 | string prefix; | |
f64942e4 | 248 | cmd_getval_throws(g_ceph_context, cmdmap, "prefix", prefix); |
7c673cae FG |
249 | |
250 | MonSession *session = m->get_session(); | |
251 | if (!session) { | |
252 | mon->reply_command(op, -EACCES, "access denied", get_last_committed()); | |
253 | return true; | |
254 | } | |
255 | ||
256 | string format; | |
f64942e4 | 257 | cmd_getval_throws(g_ceph_context, cmdmap, "format", format, string("plain")); |
7c673cae FG |
258 | boost::scoped_ptr<Formatter> f(Formatter::create(format)); |
259 | ||
260 | if (prefix == "mon stat") { | |
261 | mon->monmap->print_summary(ss); | |
224ce89b WB |
262 | ss << ", election epoch " << mon->get_epoch() << ", leader " |
263 | << mon->get_leader() << " " << mon->get_leader_name() | |
264 | << ", quorum " << mon->get_quorum() << " " << mon->get_quorum_names(); | |
7c673cae FG |
265 | rdata.append(ss); |
266 | ss.str(""); | |
267 | r = 0; | |
268 | ||
269 | } else if (prefix == "mon getmap" || | |
270 | prefix == "mon dump") { | |
271 | ||
272 | epoch_t epoch; | |
273 | int64_t epochnum; | |
f64942e4 | 274 | cmd_getval_throws(g_ceph_context, cmdmap, "epoch", epochnum, (int64_t)0); |
7c673cae FG |
275 | epoch = epochnum; |
276 | ||
277 | MonMap *p = mon->monmap; | |
278 | if (epoch) { | |
279 | bufferlist bl; | |
280 | r = get_version(epoch, bl); | |
281 | if (r == -ENOENT) { | |
282 | ss << "there is no map for epoch " << epoch; | |
283 | goto reply; | |
284 | } | |
285 | assert(r == 0); | |
286 | assert(bl.length() > 0); | |
287 | p = new MonMap; | |
288 | p->decode(bl); | |
289 | } | |
290 | ||
291 | assert(p != NULL); | |
292 | ||
293 | if (prefix == "mon getmap") { | |
294 | p->encode(rdata, m->get_connection()->get_features()); | |
295 | r = 0; | |
296 | ss << "got monmap epoch " << p->get_epoch(); | |
297 | } else if (prefix == "mon dump") { | |
298 | stringstream ds; | |
299 | if (f) { | |
300 | f->open_object_section("monmap"); | |
301 | p->dump(f.get()); | |
302 | f->open_array_section("quorum"); | |
303 | for (set<int>::iterator q = mon->get_quorum().begin(); | |
304 | q != mon->get_quorum().end(); ++q) { | |
305 | f->dump_int("mon", *q); | |
306 | } | |
307 | f->close_section(); | |
308 | f->close_section(); | |
309 | f->flush(ds); | |
310 | r = 0; | |
311 | } else { | |
312 | p->print(ds); | |
313 | r = 0; | |
314 | } | |
315 | rdata.append(ds); | |
316 | ss << "dumped monmap epoch " << p->get_epoch(); | |
317 | } | |
318 | if (p != mon->monmap) | |
319 | delete p; | |
320 | ||
224ce89b | 321 | } else if (prefix == "mon feature ls") { |
7c673cae FG |
322 | |
323 | bool list_with_value = false; | |
324 | string with_value; | |
f64942e4 | 325 | if (cmd_getval_throws(g_ceph_context, cmdmap, "with_value", with_value) && |
7c673cae FG |
326 | with_value == "--with-value") { |
327 | list_with_value = true; | |
328 | } | |
329 | ||
330 | MonMap *p = mon->monmap; | |
331 | ||
332 | // list features | |
333 | mon_feature_t supported = ceph::features::mon::get_supported(); | |
334 | mon_feature_t persistent = ceph::features::mon::get_persistent(); | |
335 | mon_feature_t required = p->get_required_features(); | |
336 | ||
337 | stringstream ds; | |
338 | auto print_feature = [&](mon_feature_t& m_features, const char* m_str) { | |
339 | if (f) { | |
340 | if (list_with_value) | |
341 | m_features.dump_with_value(f.get(), m_str); | |
342 | else | |
343 | m_features.dump(f.get(), m_str); | |
344 | } else { | |
345 | if (list_with_value) | |
346 | m_features.print_with_value(ds); | |
347 | else | |
348 | m_features.print(ds); | |
349 | } | |
350 | }; | |
351 | ||
352 | if (f) { | |
353 | f->open_object_section("features"); | |
354 | ||
355 | f->open_object_section("all"); | |
356 | print_feature(supported, "supported"); | |
357 | print_feature(persistent, "persistent"); | |
358 | f->close_section(); // all | |
359 | ||
360 | f->open_object_section("monmap"); | |
361 | print_feature(p->persistent_features, "persistent"); | |
362 | print_feature(p->optional_features, "optional"); | |
363 | print_feature(required, "required"); | |
364 | f->close_section(); // monmap | |
365 | ||
366 | f->close_section(); // features | |
367 | f->flush(ds); | |
368 | ||
369 | } else { | |
370 | ds << "all features" << std::endl | |
371 | << "\tsupported: "; | |
372 | print_feature(supported, nullptr); | |
373 | ds << std::endl | |
374 | << "\tpersistent: "; | |
375 | print_feature(persistent, nullptr); | |
376 | ds << std::endl | |
377 | << std::endl; | |
378 | ||
379 | ds << "on current monmap (epoch " | |
380 | << p->get_epoch() << ")" << std::endl | |
381 | << "\tpersistent: "; | |
382 | print_feature(p->persistent_features, nullptr); | |
383 | ds << std::endl | |
384 | // omit optional features in plain-text | |
385 | // makes it easier to read, and they're, currently, empty. | |
386 | << "\trequired: "; | |
387 | print_feature(required, nullptr); | |
388 | ds << std::endl; | |
389 | } | |
390 | rdata.append(ds); | |
391 | r = 0; | |
392 | } | |
393 | ||
394 | reply: | |
395 | if (r != -1) { | |
396 | string rs; | |
397 | getline(ss, rs); | |
398 | ||
399 | mon->reply_command(op, r, rs, rdata, get_last_committed()); | |
400 | return true; | |
401 | } else | |
402 | return false; | |
403 | } | |
404 | ||
405 | ||
406 | bool MonmapMonitor::prepare_update(MonOpRequestRef op) | |
407 | { | |
408 | PaxosServiceMessage *m = static_cast<PaxosServiceMessage*>(op->get_req()); | |
224ce89b | 409 | dout(7) << __func__ << " " << *m << " from " << m->get_orig_source_inst() << dendl; |
7c673cae FG |
410 | |
411 | switch (m->get_type()) { | |
412 | case MSG_MON_COMMAND: | |
f64942e4 AA |
413 | try { |
414 | return prepare_command(op); | |
415 | } | |
416 | catch (const bad_cmd_get& e) { | |
417 | bufferlist bl; | |
418 | mon->reply_command(op, -EINVAL, e.what(), bl, get_last_committed()); | |
419 | return true; | |
420 | } | |
7c673cae FG |
421 | case MSG_MON_JOIN: |
422 | return prepare_join(op); | |
423 | default: | |
424 | ceph_abort(); | |
425 | } | |
426 | ||
427 | return false; | |
428 | } | |
429 | ||
430 | bool MonmapMonitor::prepare_command(MonOpRequestRef op) | |
431 | { | |
432 | MMonCommand *m = static_cast<MMonCommand*>(op->get_req()); | |
433 | stringstream ss; | |
434 | string rs; | |
435 | int err = -EINVAL; | |
436 | ||
437 | map<string, cmd_vartype> cmdmap; | |
438 | if (!cmdmap_from_json(m->cmd, &cmdmap, ss)) { | |
439 | string rs = ss.str(); | |
440 | mon->reply_command(op, -EINVAL, rs, get_last_committed()); | |
441 | return true; | |
442 | } | |
443 | ||
444 | string prefix; | |
f64942e4 | 445 | cmd_getval_throws(g_ceph_context, cmdmap, "prefix", prefix); |
7c673cae FG |
446 | |
447 | MonSession *session = m->get_session(); | |
448 | if (!session) { | |
449 | mon->reply_command(op, -EACCES, "access denied", get_last_committed()); | |
450 | return true; | |
451 | } | |
452 | ||
453 | /* We should follow the following rules: | |
454 | * | |
455 | * - 'monmap' is the current, consistent version of the monmap | |
456 | * - 'pending_map' is the uncommitted version of the monmap | |
457 | * | |
458 | * All checks for the current state must be made against 'monmap'. | |
459 | * All changes are made against 'pending_map'. | |
460 | * | |
461 | * If there are concurrent operations modifying 'pending_map', please | |
462 | * follow the following rules. | |
463 | * | |
464 | * - if pending_map has already been changed, the second operation must | |
465 | * wait for the proposal to finish and be run again; This is the easiest | |
466 | * path to guarantee correctness but may impact performance (i.e., it | |
467 | * will take longer for the user to get a reply). | |
468 | * | |
469 | * - if the result of the second operation can be guaranteed to be | |
470 | * idempotent, the operation may reply to the user once the proposal | |
471 | * finishes; still needs to wait for the proposal to finish. | |
472 | * | |
473 | * - An operation _NEVER_ returns to the user based on pending state. | |
474 | * | |
475 | * If an operation does not modify current stable monmap, it may be | |
476 | * serialized before current pending map, regardless of any change that | |
477 | * has been made to the pending map -- remember, pending is uncommitted | |
478 | * state, thus we are not bound by it. | |
479 | */ | |
480 | ||
481 | assert(mon->monmap); | |
482 | MonMap &monmap = *mon->monmap; | |
483 | ||
484 | ||
485 | /* Please note: | |
486 | * | |
487 | * Adding or removing monitors may lead to loss of quorum. | |
488 | * | |
489 | * Because quorum may be lost, it's important to reply something | |
490 | * to the user, lest she end up waiting forever for a reply. And | |
491 | * no reply will ever be sent until quorum is formed again. | |
492 | * | |
493 | * On the other hand, this means we're leaking uncommitted state | |
494 | * to the user. As such, please be mindful of the reply message. | |
495 | * | |
496 | * e.g., 'adding monitor mon.foo' is okay ('adding' is an on-going | |
497 | * operation and conveys its not-yet-permanent nature); whereas | |
498 | * 'added monitor mon.foo' presumes the action has successfully | |
499 | * completed and state has been committed, which may not be true. | |
500 | */ | |
501 | ||
502 | ||
503 | bool propose = false; | |
504 | if (prefix == "mon add") { | |
505 | string name; | |
f64942e4 | 506 | cmd_getval_throws(g_ceph_context, cmdmap, "name", name); |
7c673cae | 507 | string addrstr; |
f64942e4 | 508 | cmd_getval_throws(g_ceph_context, cmdmap, "addr", addrstr); |
7c673cae FG |
509 | entity_addr_t addr; |
510 | bufferlist rdata; | |
511 | ||
512 | if (!addr.parse(addrstr.c_str())) { | |
513 | err = -EINVAL; | |
514 | ss << "addr " << addrstr << "does not parse"; | |
515 | goto reply; | |
516 | } | |
517 | ||
518 | if (addr.get_port() == 0) { | |
519 | ss << "port defaulted to " << CEPH_MON_PORT; | |
520 | addr.set_port(CEPH_MON_PORT); | |
521 | } | |
522 | ||
523 | /** | |
524 | * If we have a monitor with the same name and different addr, then EEXIST | |
525 | * If we have a monitor with the same addr and different name, then EEXIST | |
526 | * If we have a monitor with the same addr and same name, then wait for | |
527 | * the proposal to finish and return success. | |
528 | * If we don't have the monitor, add it. | |
529 | */ | |
530 | ||
531 | err = 0; | |
532 | if (!ss.str().empty()) | |
533 | ss << "; "; | |
534 | ||
535 | do { | |
536 | if (monmap.contains(name)) { | |
537 | if (monmap.get_addr(name) == addr) { | |
538 | // stable map contains monitor with the same name at the same address. | |
539 | // serialize before current pending map. | |
540 | err = 0; // for clarity; this has already been set above. | |
541 | ss << "mon." << name << " at " << addr << " already exists"; | |
542 | goto reply; | |
543 | } else { | |
544 | ss << "mon." << name | |
545 | << " already exists at address " << monmap.get_addr(name); | |
546 | } | |
547 | } else if (monmap.contains(addr)) { | |
548 | // we established on the previous branch that name is different | |
549 | ss << "mon." << monmap.get_name(addr) | |
550 | << " already exists at address " << addr; | |
551 | } else { | |
552 | // go ahead and add | |
553 | break; | |
554 | } | |
555 | err = -EEXIST; | |
556 | goto reply; | |
557 | } while (false); | |
558 | ||
559 | /* Given there's no delay between proposals on the MonmapMonitor (see | |
560 | * MonmapMonitor::should_propose()), there is no point in checking for | |
561 | * a mismatch between name and addr on pending_map. | |
562 | * | |
563 | * Once we established the monitor does not exist in the committed state, | |
564 | * we can simply go ahead and add the monitor. | |
565 | */ | |
566 | ||
567 | pending_map.add(name, addr); | |
568 | pending_map.last_changed = ceph_clock_now(); | |
569 | ss << "adding mon." << name << " at " << addr; | |
570 | propose = true; | |
571 | dout(0) << __func__ << " proposing new mon." << name << dendl; | |
572 | ||
573 | } else if (prefix == "mon remove" || | |
574 | prefix == "mon rm") { | |
575 | string name; | |
f64942e4 | 576 | cmd_getval_throws(g_ceph_context, cmdmap, "name", name); |
7c673cae FG |
577 | if (!monmap.contains(name)) { |
578 | err = 0; | |
579 | ss << "mon." << name << " does not exist or has already been removed"; | |
580 | goto reply; | |
581 | } | |
582 | ||
583 | if (monmap.size() == 1) { | |
584 | err = -EINVAL; | |
585 | ss << "error: refusing removal of last monitor " << name; | |
586 | goto reply; | |
587 | } | |
588 | ||
589 | /* At the time of writing, there is no risk of races when multiple clients | |
590 | * attempt to use the same name. The reason is simple but may not be | |
591 | * obvious. | |
592 | * | |
593 | * In a nutshell, we do not collate proposals on the MonmapMonitor. As | |
594 | * soon as we return 'true' below, PaxosService::dispatch() will check if | |
595 | * the service should propose, and - if so - the service will be marked as | |
596 | * 'proposing' and a proposal will be triggered. The PaxosService class | |
597 | * guarantees that once a service is marked 'proposing' no further writes | |
598 | * will be handled. | |
599 | * | |
600 | * The decision on whether the service should propose or not is, in this | |
601 | * case, made by MonmapMonitor::should_propose(), which always considers | |
602 | * the proposal delay being 0.0 seconds. This is key for PaxosService to | |
603 | * trigger the proposal immediately. | |
604 | * 0.0 seconds of delay. | |
605 | * | |
606 | * From the above, there's no point in performing further checks on the | |
607 | * pending_map, as we don't ever have multiple proposals in-flight in | |
608 | * this service. As we've established the committed state contains the | |
609 | * monitor, we can simply go ahead and remove it. | |
610 | * | |
611 | * Please note that the code hinges on all of the above to be true. It | |
612 | * has been true since time immemorial and we don't see a good reason | |
613 | * to make it sturdier at this time - mainly because we don't think it's | |
614 | * going to change any time soon, lest for any bug that may be unwillingly | |
615 | * introduced. | |
616 | */ | |
617 | ||
618 | entity_addr_t addr = pending_map.get_addr(name); | |
619 | pending_map.remove(name); | |
620 | pending_map.last_changed = ceph_clock_now(); | |
621 | ss << "removing mon." << name << " at " << addr | |
622 | << ", there will be " << pending_map.size() << " monitors" ; | |
623 | propose = true; | |
624 | err = 0; | |
625 | ||
626 | } else if (prefix == "mon feature set") { | |
627 | ||
628 | /* PLEASE NOTE: | |
629 | * | |
630 | * We currently only support setting/unsetting persistent features. | |
631 | * This is by design, given at the moment we still don't have optional | |
632 | * features, and, as such, there is no point introducing an interface | |
633 | * to manipulate them. This allows us to provide a cleaner, more | |
634 | * intuitive interface to the user, modifying solely persistent | |
635 | * features. | |
636 | * | |
637 | * In the future we should consider adding another interface to handle | |
638 | * optional features/flags; e.g., 'mon feature flag set/unset', or | |
639 | * 'mon flag set/unset'. | |
640 | */ | |
641 | string feature_name; | |
f64942e4 | 642 | if (!cmd_getval_throws(g_ceph_context, cmdmap, "feature_name", feature_name)) { |
7c673cae FG |
643 | ss << "missing required feature name"; |
644 | err = -EINVAL; | |
645 | goto reply; | |
646 | } | |
647 | ||
648 | mon_feature_t feature; | |
649 | feature = ceph::features::mon::get_feature_by_name(feature_name); | |
650 | if (feature == ceph::features::mon::FEATURE_NONE) { | |
651 | ss << "unknown feature '" << feature_name << "'"; | |
652 | err = -ENOENT; | |
653 | goto reply; | |
654 | } | |
655 | ||
656 | string sure; | |
f64942e4 | 657 | if (!cmd_getval_throws(g_ceph_context, cmdmap, "sure", sure) || |
7c673cae FG |
658 | sure != "--yes-i-really-mean-it") { |
659 | ss << "please specify '--yes-i-really-mean-it' if you " | |
660 | << "really, **really** want to set feature '" | |
661 | << feature << "' in the monmap."; | |
662 | err = -EPERM; | |
663 | goto reply; | |
664 | } | |
665 | ||
666 | if (!mon->get_quorum_mon_features().contains_all(feature)) { | |
667 | ss << "current quorum does not support feature '" << feature | |
668 | << "'; supported features: " | |
669 | << mon->get_quorum_mon_features(); | |
670 | err = -EINVAL; | |
671 | goto reply; | |
672 | } | |
673 | ||
674 | ss << "setting feature '" << feature << "'"; | |
675 | ||
676 | err = 0; | |
677 | if (monmap.persistent_features.contains_all(feature)) { | |
678 | dout(10) << __func__ << " feature '" << feature | |
679 | << "' already set on monmap; no-op." << dendl; | |
680 | goto reply; | |
681 | } | |
682 | ||
683 | pending_map.persistent_features.set_feature(feature); | |
684 | pending_map.last_changed = ceph_clock_now(); | |
685 | propose = true; | |
686 | ||
687 | dout(1) << __func__ << ss.str() << "; new features will be: " | |
688 | << "persistent = " << pending_map.persistent_features | |
689 | // output optional nevertheless, for auditing purposes. | |
690 | << ", optional = " << pending_map.optional_features << dendl; | |
7c673cae FG |
691 | } else { |
692 | ss << "unknown command " << prefix; | |
693 | err = -EINVAL; | |
694 | } | |
695 | ||
696 | reply: | |
697 | getline(ss, rs); | |
698 | mon->reply_command(op, err, rs, get_last_committed()); | |
699 | // we are returning to the user; do not propose. | |
700 | return propose; | |
701 | } | |
702 | ||
703 | bool MonmapMonitor::preprocess_join(MonOpRequestRef op) | |
704 | { | |
705 | MMonJoin *join = static_cast<MMonJoin*>(op->get_req()); | |
224ce89b | 706 | dout(10) << __func__ << " " << join->name << " at " << join->addr << dendl; |
7c673cae FG |
707 | |
708 | MonSession *session = join->get_session(); | |
709 | if (!session || | |
710 | !session->is_capable("mon", MON_CAP_W | MON_CAP_X)) { | |
711 | dout(10) << " insufficient caps" << dendl; | |
712 | return true; | |
713 | } | |
714 | ||
715 | if (pending_map.contains(join->name) && !pending_map.get_addr(join->name).is_blank_ip()) { | |
716 | dout(10) << " already have " << join->name << dendl; | |
717 | return true; | |
718 | } | |
719 | if (pending_map.contains(join->addr) && pending_map.get_name(join->addr) == join->name) { | |
720 | dout(10) << " already have " << join->addr << dendl; | |
721 | return true; | |
722 | } | |
723 | return false; | |
724 | } | |
725 | bool MonmapMonitor::prepare_join(MonOpRequestRef op) | |
726 | { | |
727 | MMonJoin *join = static_cast<MMonJoin*>(op->get_req()); | |
728 | dout(0) << "adding/updating " << join->name << " at " << join->addr << " to monitor cluster" << dendl; | |
729 | if (pending_map.contains(join->name)) | |
730 | pending_map.remove(join->name); | |
731 | if (pending_map.contains(join->addr)) | |
732 | pending_map.remove(pending_map.get_name(join->addr)); | |
733 | pending_map.add(join->name, join->addr); | |
734 | pending_map.last_changed = ceph_clock_now(); | |
735 | return true; | |
736 | } | |
737 | ||
738 | bool MonmapMonitor::should_propose(double& delay) | |
739 | { | |
740 | delay = 0.0; | |
741 | return true; | |
742 | } | |
743 | ||
7c673cae FG |
744 | void MonmapMonitor::get_health(list<pair<health_status_t, string> >& summary, |
745 | list<pair<health_status_t, string> > *detail, | |
746 | CephContext *cct) const | |
747 | { | |
748 | int max = mon->monmap->size(); | |
749 | int actual = mon->get_quorum().size(); | |
750 | if (actual < max) { | |
751 | ostringstream ss; | |
752 | ss << (max-actual) << " mons down, quorum " << mon->get_quorum() << " " << mon->get_quorum_names(); | |
753 | summary.push_back(make_pair(HEALTH_WARN, ss.str())); | |
754 | if (detail) { | |
755 | set<int> q = mon->get_quorum(); | |
756 | for (int i=0; i<max; i++) { | |
757 | if (q.count(i) == 0) { | |
758 | ostringstream ss; | |
759 | ss << "mon." << mon->monmap->get_name(i) << " (rank " << i | |
760 | << ") addr " << mon->monmap->get_addr(i) | |
761 | << " is down (out of quorum)"; | |
762 | detail->push_back(make_pair(HEALTH_WARN, ss.str())); | |
763 | } | |
764 | } | |
765 | } | |
766 | } | |
767 | } | |
768 | ||
769 | int MonmapMonitor::get_monmap(bufferlist &bl) | |
770 | { | |
771 | version_t latest_ver = get_last_committed(); | |
772 | dout(10) << __func__ << " ver " << latest_ver << dendl; | |
773 | ||
774 | if (!mon->store->exists(get_service_name(), stringify(latest_ver))) | |
775 | return -ENOENT; | |
776 | ||
777 | int err = get_version(latest_ver, bl); | |
778 | if (err < 0) { | |
779 | dout(1) << __func__ << " error obtaining monmap: " | |
780 | << cpp_strerror(err) << dendl; | |
781 | return err; | |
782 | } | |
783 | return 0; | |
784 | } | |
785 | ||
786 | void MonmapMonitor::check_subs() | |
787 | { | |
788 | const string type = "monmap"; | |
789 | mon->with_session_map([this, &type](const MonSessionMap& session_map) { | |
790 | auto subs = session_map.subs.find(type); | |
791 | if (subs == session_map.subs.end()) | |
792 | return; | |
793 | for (auto sub : *subs->second) { | |
794 | check_sub(sub); | |
795 | } | |
796 | }); | |
797 | } | |
798 | ||
799 | void MonmapMonitor::check_sub(Subscription *sub) | |
800 | { | |
801 | const auto epoch = mon->monmap->get_epoch(); | |
802 | dout(10) << __func__ | |
803 | << " monmap next " << sub->next | |
804 | << " have " << epoch << dendl; | |
805 | if (sub->next <= epoch) { | |
806 | mon->send_latest_monmap(sub->session->con.get()); | |
807 | if (sub->onetime) { | |
808 | mon->with_session_map([this, sub](MonSessionMap& session_map) { | |
809 | session_map.remove_sub(sub); | |
810 | }); | |
811 | } else { | |
812 | sub->next = epoch + 1; | |
813 | } | |
814 | } | |
815 | } |