]>
Commit | Line | Data |
---|---|---|
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- | |
2 | // vim: ts=8 sw=2 smarttab | |
3 | /* | |
4 | * Ceph - scalable distributed file system | |
5 | * | |
6 | * Copyright (C) 2009 Sage Weil <sage@newdream.net> | |
7 | * | |
8 | * This is free software; you can redistribute it and/or | |
9 | * modify it under the terms of the GNU Lesser General Public | |
10 | * License version 2.1, as published by the Free Software | |
11 | * Foundation. See file COPYING. | |
12 | * | |
13 | */ | |
14 | ||
15 | #include "MonmapMonitor.h" | |
16 | #include "Monitor.h" | |
17 | #include "messages/MMonCommand.h" | |
18 | #include "messages/MMonJoin.h" | |
19 | ||
20 | #include "common/ceph_argparse.h" | |
21 | #include "common/errno.h" | |
22 | #include <sstream> | |
23 | #include "common/config.h" | |
24 | #include "common/cmdparse.h" | |
25 | ||
26 | #include "include/assert.h" | |
27 | #include "include/stringify.h" | |
28 | ||
29 | #define dout_subsys ceph_subsys_mon | |
30 | #undef dout_prefix | |
31 | #define dout_prefix _prefix(_dout, mon) | |
32 | static ostream& _prefix(std::ostream *_dout, Monitor *mon) { | |
33 | return *_dout << "mon." << mon->name << "@" << mon->rank | |
34 | << "(" << mon->get_state_name() | |
35 | << ").monmap v" << mon->monmap->epoch << " "; | |
36 | } | |
37 | ||
38 | void MonmapMonitor::create_initial() | |
39 | { | |
40 | dout(10) << "create_initial using current monmap" << dendl; | |
41 | pending_map = *mon->monmap; | |
42 | pending_map.epoch = 1; | |
43 | ||
44 | if (g_conf->mon_debug_no_initial_persistent_features) { | |
45 | derr << __func__ << " mon_debug_no_initial_persistent_features=true" | |
46 | << dendl; | |
47 | } else { | |
48 | // initialize with default persistent features for new clusters | |
49 | pending_map.persistent_features = ceph::features::mon::get_persistent(); | |
50 | } | |
51 | } | |
52 | ||
53 | void MonmapMonitor::update_from_paxos(bool *need_bootstrap) | |
54 | { | |
55 | version_t version = get_last_committed(); | |
56 | if (version <= mon->monmap->get_epoch()) | |
57 | return; | |
58 | ||
59 | dout(10) << __func__ << " version " << version | |
60 | << ", my v " << mon->monmap->epoch << dendl; | |
61 | ||
62 | if (need_bootstrap && version != mon->monmap->get_epoch()) { | |
63 | dout(10) << " signaling that we need a bootstrap" << dendl; | |
64 | *need_bootstrap = true; | |
65 | } | |
66 | ||
67 | // read and decode | |
68 | monmap_bl.clear(); | |
69 | int ret = get_version(version, monmap_bl); | |
70 | assert(ret == 0); | |
71 | assert(monmap_bl.length()); | |
72 | ||
73 | dout(10) << "update_from_paxos got " << version << dendl; | |
74 | mon->monmap->decode(monmap_bl); | |
75 | ||
76 | if (mon->store->exists("mkfs", "monmap")) { | |
77 | auto t(std::make_shared<MonitorDBStore::Transaction>()); | |
78 | t->erase("mkfs", "monmap"); | |
79 | mon->store->apply_transaction(t); | |
80 | } | |
81 | ||
82 | check_subs(); | |
83 | } | |
84 | ||
85 | void MonmapMonitor::create_pending() | |
86 | { | |
87 | pending_map = *mon->monmap; | |
88 | pending_map.epoch++; | |
89 | pending_map.last_changed = ceph_clock_now(); | |
90 | dout(10) << "create_pending monmap epoch " << pending_map.epoch << dendl; | |
91 | } | |
92 | ||
93 | void MonmapMonitor::encode_pending(MonitorDBStore::TransactionRef t) | |
94 | { | |
95 | dout(10) << "encode_pending epoch " << pending_map.epoch << dendl; | |
96 | ||
97 | assert(mon->monmap->epoch + 1 == pending_map.epoch || | |
98 | pending_map.epoch == 1); // special case mkfs! | |
99 | bufferlist bl; | |
100 | pending_map.encode(bl, mon->get_quorum_con_features()); | |
101 | ||
102 | put_version(t, pending_map.epoch, bl); | |
103 | put_last_committed(t, pending_map.epoch); | |
104 | ||
105 | // generate a cluster fingerprint, too? | |
106 | if (pending_map.epoch == 1) { | |
107 | mon->prepare_new_fingerprint(t); | |
108 | } | |
109 | } | |
110 | ||
111 | class C_ApplyFeatures : public Context { | |
112 | MonmapMonitor *svc; | |
113 | mon_feature_t features; | |
114 | public: | |
115 | C_ApplyFeatures(MonmapMonitor *s, const mon_feature_t& f) : | |
116 | svc(s), features(f) { } | |
117 | void finish(int r) override { | |
118 | if (r >= 0) { | |
119 | svc->apply_mon_features(features); | |
120 | } else if (r == -EAGAIN || r == -ECANCELED) { | |
121 | // discard features if we're no longer on the quorum that | |
122 | // established them in the first place. | |
123 | return; | |
124 | } else { | |
125 | assert(0 == "bad C_ApplyFeatures return value"); | |
126 | } | |
127 | } | |
128 | }; | |
129 | ||
130 | void MonmapMonitor::apply_mon_features(const mon_feature_t& features) | |
131 | { | |
132 | if (!is_writeable()) { | |
133 | dout(5) << __func__ << " wait for service to be writeable" << dendl; | |
134 | wait_for_writeable_ctx(new C_ApplyFeatures(this, features)); | |
135 | return; | |
136 | } | |
137 | ||
138 | assert(is_writeable()); | |
139 | assert(features.contains_all(pending_map.persistent_features)); | |
140 | // we should never hit this because `features` should be the result | |
141 | // of the quorum's supported features. But if it happens, die. | |
142 | assert(ceph::features::mon::get_supported().contains_all(features)); | |
143 | ||
144 | mon_feature_t new_features = | |
145 | (pending_map.persistent_features ^ | |
146 | (features & ceph::features::mon::get_persistent())); | |
147 | ||
148 | if (new_features.empty()) { | |
149 | dout(10) << __func__ << " features match current pending: " | |
150 | << features << dendl; | |
151 | return; | |
152 | } | |
153 | ||
154 | if (mon->get_quorum().size() < mon->monmap->size()) { | |
155 | dout(1) << __func__ << " new features " << new_features | |
156 | << " contains features that require a full quorum" | |
157 | << " (quorum size is " << mon->get_quorum().size() | |
158 | << ", requires " << mon->monmap->size() << "): " | |
159 | << new_features | |
160 | << " -- do not enable them!" << dendl; | |
161 | return; | |
162 | } | |
163 | ||
164 | new_features |= pending_map.persistent_features; | |
165 | ||
166 | dout(5) << __func__ << " applying new features to monmap;" | |
167 | << " had " << pending_map.persistent_features | |
168 | << ", will have " << new_features << dendl; | |
169 | pending_map.persistent_features = new_features; | |
170 | propose_pending(); | |
171 | } | |
172 | ||
173 | void MonmapMonitor::on_active() | |
174 | { | |
175 | if (get_last_committed() >= 1 && !mon->has_ever_joined) { | |
176 | // make note of the fact that i was, once, part of the quorum. | |
177 | dout(10) << "noting that i was, once, part of an active quorum." << dendl; | |
178 | ||
179 | /* This is some form of nasty in-breeding we have between the MonmapMonitor | |
180 | and the Monitor itself. We should find a way to get rid of it given our | |
181 | new architecture. Until then, stick with it since we are a | |
182 | single-threaded process and, truth be told, no one else relies on this | |
183 | thing besides us. | |
184 | */ | |
185 | auto t(std::make_shared<MonitorDBStore::Transaction>()); | |
186 | t->put(Monitor::MONITOR_NAME, "joined", 1); | |
187 | mon->store->apply_transaction(t); | |
188 | mon->has_ever_joined = true; | |
189 | } | |
190 | ||
191 | if (mon->is_leader()) | |
192 | mon->clog->info() << "monmap " << *mon->monmap; | |
193 | ||
194 | apply_mon_features(mon->get_quorum_mon_features()); | |
195 | } | |
196 | ||
197 | bool MonmapMonitor::preprocess_query(MonOpRequestRef op) | |
198 | { | |
199 | PaxosServiceMessage *m = static_cast<PaxosServiceMessage*>(op->get_req()); | |
200 | switch (m->get_type()) { | |
201 | // READs | |
202 | case MSG_MON_COMMAND: | |
203 | return preprocess_command(op); | |
204 | case MSG_MON_JOIN: | |
205 | return preprocess_join(op); | |
206 | default: | |
207 | ceph_abort(); | |
208 | return true; | |
209 | } | |
210 | } | |
211 | ||
212 | void MonmapMonitor::dump_info(Formatter *f) | |
213 | { | |
214 | f->dump_unsigned("monmap_first_committed", get_first_committed()); | |
215 | f->dump_unsigned("monmap_last_committed", get_last_committed()); | |
216 | f->open_object_section("monmap"); | |
217 | mon->monmap->dump(f); | |
218 | f->close_section(); | |
219 | f->open_array_section("quorum"); | |
220 | for (set<int>::iterator q = mon->get_quorum().begin(); q != mon->get_quorum().end(); ++q) | |
221 | f->dump_int("mon", *q); | |
222 | f->close_section(); | |
223 | } | |
224 | ||
225 | bool MonmapMonitor::preprocess_command(MonOpRequestRef op) | |
226 | { | |
227 | MMonCommand *m = static_cast<MMonCommand*>(op->get_req()); | |
228 | int r = -1; | |
229 | bufferlist rdata; | |
230 | stringstream ss; | |
231 | ||
232 | map<string, cmd_vartype> cmdmap; | |
233 | if (!cmdmap_from_json(m->cmd, &cmdmap, ss)) { | |
234 | string rs = ss.str(); | |
235 | mon->reply_command(op, -EINVAL, rs, rdata, get_last_committed()); | |
236 | return true; | |
237 | } | |
238 | ||
239 | string prefix; | |
240 | cmd_getval(g_ceph_context, cmdmap, "prefix", prefix); | |
241 | ||
242 | MonSession *session = m->get_session(); | |
243 | if (!session) { | |
244 | mon->reply_command(op, -EACCES, "access denied", get_last_committed()); | |
245 | return true; | |
246 | } | |
247 | ||
248 | string format; | |
249 | cmd_getval(g_ceph_context, cmdmap, "format", format, string("plain")); | |
250 | boost::scoped_ptr<Formatter> f(Formatter::create(format)); | |
251 | ||
252 | if (prefix == "mon stat") { | |
253 | mon->monmap->print_summary(ss); | |
254 | ss << ", election epoch " << mon->get_epoch() << ", quorum " << mon->get_quorum() | |
255 | << " " << mon->get_quorum_names(); | |
256 | rdata.append(ss); | |
257 | ss.str(""); | |
258 | r = 0; | |
259 | ||
260 | } else if (prefix == "mon getmap" || | |
261 | prefix == "mon dump") { | |
262 | ||
263 | epoch_t epoch; | |
264 | int64_t epochnum; | |
265 | cmd_getval(g_ceph_context, cmdmap, "epoch", epochnum, (int64_t)0); | |
266 | epoch = epochnum; | |
267 | ||
268 | MonMap *p = mon->monmap; | |
269 | if (epoch) { | |
270 | bufferlist bl; | |
271 | r = get_version(epoch, bl); | |
272 | if (r == -ENOENT) { | |
273 | ss << "there is no map for epoch " << epoch; | |
274 | goto reply; | |
275 | } | |
276 | assert(r == 0); | |
277 | assert(bl.length() > 0); | |
278 | p = new MonMap; | |
279 | p->decode(bl); | |
280 | } | |
281 | ||
282 | assert(p != NULL); | |
283 | ||
284 | if (prefix == "mon getmap") { | |
285 | p->encode(rdata, m->get_connection()->get_features()); | |
286 | r = 0; | |
287 | ss << "got monmap epoch " << p->get_epoch(); | |
288 | } else if (prefix == "mon dump") { | |
289 | stringstream ds; | |
290 | if (f) { | |
291 | f->open_object_section("monmap"); | |
292 | p->dump(f.get()); | |
293 | f->open_array_section("quorum"); | |
294 | for (set<int>::iterator q = mon->get_quorum().begin(); | |
295 | q != mon->get_quorum().end(); ++q) { | |
296 | f->dump_int("mon", *q); | |
297 | } | |
298 | f->close_section(); | |
299 | f->close_section(); | |
300 | f->flush(ds); | |
301 | r = 0; | |
302 | } else { | |
303 | p->print(ds); | |
304 | r = 0; | |
305 | } | |
306 | rdata.append(ds); | |
307 | ss << "dumped monmap epoch " << p->get_epoch(); | |
308 | } | |
309 | if (p != mon->monmap) | |
310 | delete p; | |
311 | ||
312 | } else if (prefix == "mon feature list") { | |
313 | ||
314 | bool list_with_value = false; | |
315 | string with_value; | |
316 | if (cmd_getval(g_ceph_context, cmdmap, "with_value", with_value) && | |
317 | with_value == "--with-value") { | |
318 | list_with_value = true; | |
319 | } | |
320 | ||
321 | MonMap *p = mon->monmap; | |
322 | ||
323 | // list features | |
324 | mon_feature_t supported = ceph::features::mon::get_supported(); | |
325 | mon_feature_t persistent = ceph::features::mon::get_persistent(); | |
326 | mon_feature_t required = p->get_required_features(); | |
327 | ||
328 | stringstream ds; | |
329 | auto print_feature = [&](mon_feature_t& m_features, const char* m_str) { | |
330 | if (f) { | |
331 | if (list_with_value) | |
332 | m_features.dump_with_value(f.get(), m_str); | |
333 | else | |
334 | m_features.dump(f.get(), m_str); | |
335 | } else { | |
336 | if (list_with_value) | |
337 | m_features.print_with_value(ds); | |
338 | else | |
339 | m_features.print(ds); | |
340 | } | |
341 | }; | |
342 | ||
343 | if (f) { | |
344 | f->open_object_section("features"); | |
345 | ||
346 | f->open_object_section("all"); | |
347 | print_feature(supported, "supported"); | |
348 | print_feature(persistent, "persistent"); | |
349 | f->close_section(); // all | |
350 | ||
351 | f->open_object_section("monmap"); | |
352 | print_feature(p->persistent_features, "persistent"); | |
353 | print_feature(p->optional_features, "optional"); | |
354 | print_feature(required, "required"); | |
355 | f->close_section(); // monmap | |
356 | ||
357 | f->close_section(); // features | |
358 | f->flush(ds); | |
359 | ||
360 | } else { | |
361 | ds << "all features" << std::endl | |
362 | << "\tsupported: "; | |
363 | print_feature(supported, nullptr); | |
364 | ds << std::endl | |
365 | << "\tpersistent: "; | |
366 | print_feature(persistent, nullptr); | |
367 | ds << std::endl | |
368 | << std::endl; | |
369 | ||
370 | ds << "on current monmap (epoch " | |
371 | << p->get_epoch() << ")" << std::endl | |
372 | << "\tpersistent: "; | |
373 | print_feature(p->persistent_features, nullptr); | |
374 | ds << std::endl | |
375 | // omit optional features in plain-text | |
376 | // makes it easier to read, and they're, currently, empty. | |
377 | << "\trequired: "; | |
378 | print_feature(required, nullptr); | |
379 | ds << std::endl; | |
380 | } | |
381 | rdata.append(ds); | |
382 | r = 0; | |
383 | } | |
384 | ||
385 | reply: | |
386 | if (r != -1) { | |
387 | string rs; | |
388 | getline(ss, rs); | |
389 | ||
390 | mon->reply_command(op, r, rs, rdata, get_last_committed()); | |
391 | return true; | |
392 | } else | |
393 | return false; | |
394 | } | |
395 | ||
396 | ||
397 | bool MonmapMonitor::prepare_update(MonOpRequestRef op) | |
398 | { | |
399 | PaxosServiceMessage *m = static_cast<PaxosServiceMessage*>(op->get_req()); | |
400 | dout(7) << "prepare_update " << *m << " from " << m->get_orig_source_inst() << dendl; | |
401 | ||
402 | switch (m->get_type()) { | |
403 | case MSG_MON_COMMAND: | |
404 | return prepare_command(op); | |
405 | case MSG_MON_JOIN: | |
406 | return prepare_join(op); | |
407 | default: | |
408 | ceph_abort(); | |
409 | } | |
410 | ||
411 | return false; | |
412 | } | |
413 | ||
414 | bool MonmapMonitor::prepare_command(MonOpRequestRef op) | |
415 | { | |
416 | MMonCommand *m = static_cast<MMonCommand*>(op->get_req()); | |
417 | stringstream ss; | |
418 | string rs; | |
419 | int err = -EINVAL; | |
420 | ||
421 | map<string, cmd_vartype> cmdmap; | |
422 | if (!cmdmap_from_json(m->cmd, &cmdmap, ss)) { | |
423 | string rs = ss.str(); | |
424 | mon->reply_command(op, -EINVAL, rs, get_last_committed()); | |
425 | return true; | |
426 | } | |
427 | ||
428 | string prefix; | |
429 | cmd_getval(g_ceph_context, cmdmap, "prefix", prefix); | |
430 | ||
431 | MonSession *session = m->get_session(); | |
432 | if (!session) { | |
433 | mon->reply_command(op, -EACCES, "access denied", get_last_committed()); | |
434 | return true; | |
435 | } | |
436 | ||
437 | /* We should follow the following rules: | |
438 | * | |
439 | * - 'monmap' is the current, consistent version of the monmap | |
440 | * - 'pending_map' is the uncommitted version of the monmap | |
441 | * | |
442 | * All checks for the current state must be made against 'monmap'. | |
443 | * All changes are made against 'pending_map'. | |
444 | * | |
445 | * If there are concurrent operations modifying 'pending_map', please | |
446 | * follow the following rules. | |
447 | * | |
448 | * - if pending_map has already been changed, the second operation must | |
449 | * wait for the proposal to finish and be run again; This is the easiest | |
450 | * path to guarantee correctness but may impact performance (i.e., it | |
451 | * will take longer for the user to get a reply). | |
452 | * | |
453 | * - if the result of the second operation can be guaranteed to be | |
454 | * idempotent, the operation may reply to the user once the proposal | |
455 | * finishes; still needs to wait for the proposal to finish. | |
456 | * | |
457 | * - An operation _NEVER_ returns to the user based on pending state. | |
458 | * | |
459 | * If an operation does not modify current stable monmap, it may be | |
460 | * serialized before current pending map, regardless of any change that | |
461 | * has been made to the pending map -- remember, pending is uncommitted | |
462 | * state, thus we are not bound by it. | |
463 | */ | |
464 | ||
465 | assert(mon->monmap); | |
466 | MonMap &monmap = *mon->monmap; | |
467 | ||
468 | ||
469 | /* Please note: | |
470 | * | |
471 | * Adding or removing monitors may lead to loss of quorum. | |
472 | * | |
473 | * Because quorum may be lost, it's important to reply something | |
474 | * to the user, lest she end up waiting forever for a reply. And | |
475 | * no reply will ever be sent until quorum is formed again. | |
476 | * | |
477 | * On the other hand, this means we're leaking uncommitted state | |
478 | * to the user. As such, please be mindful of the reply message. | |
479 | * | |
480 | * e.g., 'adding monitor mon.foo' is okay ('adding' is an on-going | |
481 | * operation and conveys its not-yet-permanent nature); whereas | |
482 | * 'added monitor mon.foo' presumes the action has successfully | |
483 | * completed and state has been committed, which may not be true. | |
484 | */ | |
485 | ||
486 | ||
487 | bool propose = false; | |
488 | if (prefix == "mon add") { | |
489 | string name; | |
490 | cmd_getval(g_ceph_context, cmdmap, "name", name); | |
491 | string addrstr; | |
492 | cmd_getval(g_ceph_context, cmdmap, "addr", addrstr); | |
493 | entity_addr_t addr; | |
494 | bufferlist rdata; | |
495 | ||
496 | if (!addr.parse(addrstr.c_str())) { | |
497 | err = -EINVAL; | |
498 | ss << "addr " << addrstr << "does not parse"; | |
499 | goto reply; | |
500 | } | |
501 | ||
502 | if (addr.get_port() == 0) { | |
503 | ss << "port defaulted to " << CEPH_MON_PORT; | |
504 | addr.set_port(CEPH_MON_PORT); | |
505 | } | |
506 | ||
507 | /** | |
508 | * If we have a monitor with the same name and different addr, then EEXIST | |
509 | * If we have a monitor with the same addr and different name, then EEXIST | |
510 | * If we have a monitor with the same addr and same name, then wait for | |
511 | * the proposal to finish and return success. | |
512 | * If we don't have the monitor, add it. | |
513 | */ | |
514 | ||
515 | err = 0; | |
516 | if (!ss.str().empty()) | |
517 | ss << "; "; | |
518 | ||
519 | do { | |
520 | if (monmap.contains(name)) { | |
521 | if (monmap.get_addr(name) == addr) { | |
522 | // stable map contains monitor with the same name at the same address. | |
523 | // serialize before current pending map. | |
524 | err = 0; // for clarity; this has already been set above. | |
525 | ss << "mon." << name << " at " << addr << " already exists"; | |
526 | goto reply; | |
527 | } else { | |
528 | ss << "mon." << name | |
529 | << " already exists at address " << monmap.get_addr(name); | |
530 | } | |
531 | } else if (monmap.contains(addr)) { | |
532 | // we established on the previous branch that name is different | |
533 | ss << "mon." << monmap.get_name(addr) | |
534 | << " already exists at address " << addr; | |
535 | } else { | |
536 | // go ahead and add | |
537 | break; | |
538 | } | |
539 | err = -EEXIST; | |
540 | goto reply; | |
541 | } while (false); | |
542 | ||
543 | /* Given there's no delay between proposals on the MonmapMonitor (see | |
544 | * MonmapMonitor::should_propose()), there is no point in checking for | |
545 | * a mismatch between name and addr on pending_map. | |
546 | * | |
547 | * Once we established the monitor does not exist in the committed state, | |
548 | * we can simply go ahead and add the monitor. | |
549 | */ | |
550 | ||
551 | pending_map.add(name, addr); | |
552 | pending_map.last_changed = ceph_clock_now(); | |
553 | ss << "adding mon." << name << " at " << addr; | |
554 | propose = true; | |
555 | dout(0) << __func__ << " proposing new mon." << name << dendl; | |
556 | ||
557 | } else if (prefix == "mon remove" || | |
558 | prefix == "mon rm") { | |
559 | string name; | |
560 | cmd_getval(g_ceph_context, cmdmap, "name", name); | |
561 | if (!monmap.contains(name)) { | |
562 | err = 0; | |
563 | ss << "mon." << name << " does not exist or has already been removed"; | |
564 | goto reply; | |
565 | } | |
566 | ||
567 | if (monmap.size() == 1) { | |
568 | err = -EINVAL; | |
569 | ss << "error: refusing removal of last monitor " << name; | |
570 | goto reply; | |
571 | } | |
572 | ||
573 | /* At the time of writing, there is no risk of races when multiple clients | |
574 | * attempt to use the same name. The reason is simple but may not be | |
575 | * obvious. | |
576 | * | |
577 | * In a nutshell, we do not collate proposals on the MonmapMonitor. As | |
578 | * soon as we return 'true' below, PaxosService::dispatch() will check if | |
579 | * the service should propose, and - if so - the service will be marked as | |
580 | * 'proposing' and a proposal will be triggered. The PaxosService class | |
581 | * guarantees that once a service is marked 'proposing' no further writes | |
582 | * will be handled. | |
583 | * | |
584 | * The decision on whether the service should propose or not is, in this | |
585 | * case, made by MonmapMonitor::should_propose(), which always considers | |
586 | * the proposal delay being 0.0 seconds. This is key for PaxosService to | |
587 | * trigger the proposal immediately. | |
588 | * 0.0 seconds of delay. | |
589 | * | |
590 | * From the above, there's no point in performing further checks on the | |
591 | * pending_map, as we don't ever have multiple proposals in-flight in | |
592 | * this service. As we've established the committed state contains the | |
593 | * monitor, we can simply go ahead and remove it. | |
594 | * | |
595 | * Please note that the code hinges on all of the above to be true. It | |
596 | * has been true since time immemorial and we don't see a good reason | |
597 | * to make it sturdier at this time - mainly because we don't think it's | |
598 | * going to change any time soon, lest for any bug that may be unwillingly | |
599 | * introduced. | |
600 | */ | |
601 | ||
602 | entity_addr_t addr = pending_map.get_addr(name); | |
603 | pending_map.remove(name); | |
604 | pending_map.last_changed = ceph_clock_now(); | |
605 | ss << "removing mon." << name << " at " << addr | |
606 | << ", there will be " << pending_map.size() << " monitors" ; | |
607 | propose = true; | |
608 | err = 0; | |
609 | ||
610 | } else if (prefix == "mon feature set") { | |
611 | ||
612 | /* PLEASE NOTE: | |
613 | * | |
614 | * We currently only support setting/unsetting persistent features. | |
615 | * This is by design, given at the moment we still don't have optional | |
616 | * features, and, as such, there is no point introducing an interface | |
617 | * to manipulate them. This allows us to provide a cleaner, more | |
618 | * intuitive interface to the user, modifying solely persistent | |
619 | * features. | |
620 | * | |
621 | * In the future we should consider adding another interface to handle | |
622 | * optional features/flags; e.g., 'mon feature flag set/unset', or | |
623 | * 'mon flag set/unset'. | |
624 | */ | |
625 | string feature_name; | |
626 | if (!cmd_getval(g_ceph_context, cmdmap, "feature_name", feature_name)) { | |
627 | ss << "missing required feature name"; | |
628 | err = -EINVAL; | |
629 | goto reply; | |
630 | } | |
631 | ||
632 | mon_feature_t feature; | |
633 | feature = ceph::features::mon::get_feature_by_name(feature_name); | |
634 | if (feature == ceph::features::mon::FEATURE_NONE) { | |
635 | ss << "unknown feature '" << feature_name << "'"; | |
636 | err = -ENOENT; | |
637 | goto reply; | |
638 | } | |
639 | ||
640 | string sure; | |
641 | if (!cmd_getval(g_ceph_context, cmdmap, "sure", sure) || | |
642 | sure != "--yes-i-really-mean-it") { | |
643 | ss << "please specify '--yes-i-really-mean-it' if you " | |
644 | << "really, **really** want to set feature '" | |
645 | << feature << "' in the monmap."; | |
646 | err = -EPERM; | |
647 | goto reply; | |
648 | } | |
649 | ||
650 | if (!mon->get_quorum_mon_features().contains_all(feature)) { | |
651 | ss << "current quorum does not support feature '" << feature | |
652 | << "'; supported features: " | |
653 | << mon->get_quorum_mon_features(); | |
654 | err = -EINVAL; | |
655 | goto reply; | |
656 | } | |
657 | ||
658 | ss << "setting feature '" << feature << "'"; | |
659 | ||
660 | err = 0; | |
661 | if (monmap.persistent_features.contains_all(feature)) { | |
662 | dout(10) << __func__ << " feature '" << feature | |
663 | << "' already set on monmap; no-op." << dendl; | |
664 | goto reply; | |
665 | } | |
666 | ||
667 | pending_map.persistent_features.set_feature(feature); | |
668 | pending_map.last_changed = ceph_clock_now(); | |
669 | propose = true; | |
670 | ||
671 | dout(1) << __func__ << ss.str() << "; new features will be: " | |
672 | << "persistent = " << pending_map.persistent_features | |
673 | // output optional nevertheless, for auditing purposes. | |
674 | << ", optional = " << pending_map.optional_features << dendl; | |
675 | ||
676 | } else { | |
677 | ss << "unknown command " << prefix; | |
678 | err = -EINVAL; | |
679 | } | |
680 | ||
681 | reply: | |
682 | getline(ss, rs); | |
683 | mon->reply_command(op, err, rs, get_last_committed()); | |
684 | // we are returning to the user; do not propose. | |
685 | return propose; | |
686 | } | |
687 | ||
688 | bool MonmapMonitor::preprocess_join(MonOpRequestRef op) | |
689 | { | |
690 | MMonJoin *join = static_cast<MMonJoin*>(op->get_req()); | |
691 | dout(10) << "preprocess_join " << join->name << " at " << join->addr << dendl; | |
692 | ||
693 | MonSession *session = join->get_session(); | |
694 | if (!session || | |
695 | !session->is_capable("mon", MON_CAP_W | MON_CAP_X)) { | |
696 | dout(10) << " insufficient caps" << dendl; | |
697 | return true; | |
698 | } | |
699 | ||
700 | if (pending_map.contains(join->name) && !pending_map.get_addr(join->name).is_blank_ip()) { | |
701 | dout(10) << " already have " << join->name << dendl; | |
702 | return true; | |
703 | } | |
704 | if (pending_map.contains(join->addr) && pending_map.get_name(join->addr) == join->name) { | |
705 | dout(10) << " already have " << join->addr << dendl; | |
706 | return true; | |
707 | } | |
708 | return false; | |
709 | } | |
710 | bool MonmapMonitor::prepare_join(MonOpRequestRef op) | |
711 | { | |
712 | MMonJoin *join = static_cast<MMonJoin*>(op->get_req()); | |
713 | dout(0) << "adding/updating " << join->name << " at " << join->addr << " to monitor cluster" << dendl; | |
714 | if (pending_map.contains(join->name)) | |
715 | pending_map.remove(join->name); | |
716 | if (pending_map.contains(join->addr)) | |
717 | pending_map.remove(pending_map.get_name(join->addr)); | |
718 | pending_map.add(join->name, join->addr); | |
719 | pending_map.last_changed = ceph_clock_now(); | |
720 | return true; | |
721 | } | |
722 | ||
723 | bool MonmapMonitor::should_propose(double& delay) | |
724 | { | |
725 | delay = 0.0; | |
726 | return true; | |
727 | } | |
728 | ||
729 | void MonmapMonitor::get_health(list<pair<health_status_t, string> >& summary, | |
730 | list<pair<health_status_t, string> > *detail, | |
731 | CephContext *cct) const | |
732 | { | |
733 | int max = mon->monmap->size(); | |
734 | int actual = mon->get_quorum().size(); | |
735 | if (actual < max) { | |
736 | ostringstream ss; | |
737 | ss << (max-actual) << " mons down, quorum " << mon->get_quorum() << " " << mon->get_quorum_names(); | |
738 | summary.push_back(make_pair(HEALTH_WARN, ss.str())); | |
739 | if (detail) { | |
740 | set<int> q = mon->get_quorum(); | |
741 | for (int i=0; i<max; i++) { | |
742 | if (q.count(i) == 0) { | |
743 | ostringstream ss; | |
744 | ss << "mon." << mon->monmap->get_name(i) << " (rank " << i | |
745 | << ") addr " << mon->monmap->get_addr(i) | |
746 | << " is down (out of quorum)"; | |
747 | detail->push_back(make_pair(HEALTH_WARN, ss.str())); | |
748 | } | |
749 | } | |
750 | } | |
751 | } | |
752 | } | |
753 | ||
754 | int MonmapMonitor::get_monmap(bufferlist &bl) | |
755 | { | |
756 | version_t latest_ver = get_last_committed(); | |
757 | dout(10) << __func__ << " ver " << latest_ver << dendl; | |
758 | ||
759 | if (!mon->store->exists(get_service_name(), stringify(latest_ver))) | |
760 | return -ENOENT; | |
761 | ||
762 | int err = get_version(latest_ver, bl); | |
763 | if (err < 0) { | |
764 | dout(1) << __func__ << " error obtaining monmap: " | |
765 | << cpp_strerror(err) << dendl; | |
766 | return err; | |
767 | } | |
768 | return 0; | |
769 | } | |
770 | ||
771 | void MonmapMonitor::check_subs() | |
772 | { | |
773 | const string type = "monmap"; | |
774 | mon->with_session_map([this, &type](const MonSessionMap& session_map) { | |
775 | auto subs = session_map.subs.find(type); | |
776 | if (subs == session_map.subs.end()) | |
777 | return; | |
778 | for (auto sub : *subs->second) { | |
779 | check_sub(sub); | |
780 | } | |
781 | }); | |
782 | } | |
783 | ||
784 | void MonmapMonitor::check_sub(Subscription *sub) | |
785 | { | |
786 | const auto epoch = mon->monmap->get_epoch(); | |
787 | dout(10) << __func__ | |
788 | << " monmap next " << sub->next | |
789 | << " have " << epoch << dendl; | |
790 | if (sub->next <= epoch) { | |
791 | mon->send_latest_monmap(sub->session->con.get()); | |
792 | if (sub->onetime) { | |
793 | mon->with_session_map([this, sub](MonSessionMap& session_map) { | |
794 | session_map.remove_sub(sub); | |
795 | }); | |
796 | } else { | |
797 | sub->next = epoch + 1; | |
798 | } | |
799 | } | |
800 | } |