]> git.proxmox.com Git - ceph.git/blob - ceph/src/mon/MgrMonitor.cc
add subtree-ish sources for 12.0.3
[ceph.git] / ceph / src / mon / MgrMonitor.cc
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3 /*
4 * Ceph - scalable distributed file system
5 *
6 * Copyright (C) 2016 John Spray <john.spray@redhat.com>
7 *
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
12 */
13
14 #include "messages/MMgrBeacon.h"
15 #include "messages/MMgrMap.h"
16 #include "messages/MMgrDigest.h"
17
18 #include "PGMap.h"
19 #include "PGMonitor.h"
20 #include "include/stringify.h"
21 #include "mgr/MgrContext.h"
22 #include "OSDMonitor.h"
23
24 #include "MgrMonitor.h"
25
26 #define dout_subsys ceph_subsys_mon
27 #undef dout_prefix
28 #define dout_prefix _prefix(_dout, mon, map)
29 static ostream& _prefix(std::ostream *_dout, Monitor *mon,
30 const MgrMap& mgrmap) {
31 return *_dout << "mon." << mon->name << "@" << mon->rank
32 << "(" << mon->get_state_name()
33 << ").mgr e" << mgrmap.get_epoch() << " ";
34 }
35
36 void MgrMonitor::create_initial()
37 {
38 }
39
40 void MgrMonitor::update_from_paxos(bool *need_bootstrap)
41 {
42 version_t version = get_last_committed();
43 if (version != map.epoch) {
44 dout(4) << "loading version " << version << dendl;
45
46 bufferlist bl;
47 int err = get_version(version, bl);
48 assert(err == 0);
49
50 bufferlist::iterator p = bl.begin();
51 map.decode(p);
52
53 dout(4) << "active server: " << map.active_addr
54 << "(" << map.active_gid << ")" << dendl;
55
56 if (map.available) {
57 first_seen_inactive = utime_t();
58 } else {
59 first_seen_inactive = ceph_clock_now();
60 }
61
62 check_subs();
63 }
64
65 // feed our pet MgrClient
66 mon->mgr_client.ms_dispatch(new MMgrMap(map));
67 }
68
69 void MgrMonitor::create_pending()
70 {
71 pending_map = map;
72 pending_map.epoch++;
73 }
74
75 void MgrMonitor::encode_pending(MonitorDBStore::TransactionRef t)
76 {
77 dout(10) << __func__ << " " << pending_map << dendl;
78 bufferlist bl;
79 pending_map.encode(bl, mon->get_quorum_con_features());
80 put_version(t, pending_map.epoch, bl);
81 put_last_committed(t, pending_map.epoch);
82 }
83
84 bool MgrMonitor::check_caps(MonOpRequestRef op, const uuid_d& fsid)
85 {
86 // check permissions
87 MonSession *session = op->get_session();
88 if (!session)
89 return false;
90 if (!session->is_capable("mgr", MON_CAP_X)) {
91 dout(1) << __func__ << " insufficient caps " << session->caps << dendl;
92 return false;
93 }
94 if (fsid != mon->monmap->fsid) {
95 dout(1) << __func__ << " op fsid " << fsid
96 << " != " << mon->monmap->fsid << dendl;
97 return false;
98 }
99 return true;
100 }
101
102 bool MgrMonitor::preprocess_query(MonOpRequestRef op)
103 {
104 PaxosServiceMessage *m = static_cast<PaxosServiceMessage*>(op->get_req());
105 switch (m->get_type()) {
106 case MSG_MGR_BEACON:
107 return preprocess_beacon(op);
108 case MSG_MON_COMMAND:
109 return preprocess_command(op);
110 default:
111 mon->no_reply(op);
112 derr << "Unhandled message type " << m->get_type() << dendl;
113 return true;
114 }
115 }
116
117 bool MgrMonitor::prepare_update(MonOpRequestRef op)
118 {
119 PaxosServiceMessage *m = static_cast<PaxosServiceMessage*>(op->get_req());
120 switch (m->get_type()) {
121 case MSG_MGR_BEACON:
122 return prepare_beacon(op);
123
124 case MSG_MON_COMMAND:
125 return prepare_command(op);
126
127 default:
128 mon->no_reply(op);
129 derr << "Unhandled message type " << m->get_type() << dendl;
130 return true;
131 }
132 }
133
134
135
136 class C_Updated : public Context {
137 MgrMonitor *mm;
138 MonOpRequestRef op;
139 public:
140 C_Updated(MgrMonitor *a, MonOpRequestRef c) :
141 mm(a), op(c) {}
142 void finish(int r) override {
143 if (r >= 0) {
144 // Success
145 } else if (r == -ECANCELED) {
146 mm->mon->no_reply(op);
147 } else {
148 mm->dispatch(op); // try again
149 }
150 }
151 };
152
153 bool MgrMonitor::preprocess_beacon(MonOpRequestRef op)
154 {
155 MMgrBeacon *m = static_cast<MMgrBeacon*>(op->get_req());
156 dout(4) << "beacon from " << m->get_gid() << dendl;
157
158 if (!check_caps(op, m->get_fsid())) {
159 // drop it on the floor
160 return true;
161 }
162
163 // always send this to the leader's prepare_beacon()
164 return false;
165 }
166
167 bool MgrMonitor::prepare_beacon(MonOpRequestRef op)
168 {
169 MMgrBeacon *m = static_cast<MMgrBeacon*>(op->get_req());
170 dout(4) << "beacon from " << m->get_gid() << dendl;
171
172 // See if we are seeing same name, new GID for the active daemon
173 if (m->get_name() == pending_map.active_name
174 && m->get_gid() != pending_map.active_gid)
175 {
176 dout(4) << "Active daemon restart (mgr." << m->get_name() << ")" << dendl;
177 drop_active();
178 }
179
180 // See if we are seeing same name, new GID for any standbys
181 for (const auto &i : pending_map.standbys) {
182 const StandbyInfo &s = i.second;
183 if (s.name == m->get_name() && s.gid != m->get_gid()) {
184 dout(4) << "Standby daemon restart (mgr." << m->get_name() << ")" << dendl;
185 drop_standby(i.first);
186 break;
187 }
188 }
189
190 last_beacon[m->get_gid()] = ceph_clock_now();
191
192 // Track whether we modified pending_map
193 bool updated = false;
194
195 if (pending_map.active_gid == m->get_gid()) {
196 // A beacon from the currently active daemon
197 if (pending_map.active_addr != m->get_server_addr()) {
198 dout(4) << "learned address " << m->get_server_addr()
199 << " (was " << pending_map.active_addr << ")" << dendl;
200 pending_map.active_addr = m->get_server_addr();
201 updated = true;
202 }
203
204 if (pending_map.get_available() != m->get_available()) {
205 dout(4) << "available " << m->get_gid() << dendl;
206 pending_map.available = m->get_available();
207 updated = true;
208 }
209 } else if (pending_map.active_gid == 0) {
210 // There is no currently active daemon, select this one.
211 if (pending_map.standbys.count(m->get_gid())) {
212 drop_standby(m->get_gid());
213 }
214 dout(4) << "selecting new active " << m->get_gid()
215 << " " << m->get_name()
216 << " (was " << pending_map.active_gid << " "
217 << pending_map.active_name << ")" << dendl;
218 pending_map.active_gid = m->get_gid();
219 pending_map.active_name = m->get_name();
220
221 updated = true;
222 } else {
223 if (pending_map.standbys.count(m->get_gid()) > 0) {
224 dout(10) << "from existing standby " << m->get_gid() << dendl;
225 } else {
226 dout(10) << "new standby " << m->get_gid() << dendl;
227 pending_map.standbys[m->get_gid()] = {m->get_gid(), m->get_name()};
228 updated = true;
229 }
230 }
231
232 if (updated) {
233 dout(4) << "updating map" << dendl;
234 wait_for_finished_proposal(op, new C_Updated(this, op));
235 } else {
236 dout(10) << "no change" << dendl;
237 }
238
239 return updated;
240 }
241
242 void MgrMonitor::check_subs()
243 {
244 const std::string type = "mgrmap";
245 if (mon->session_map.subs.count(type) == 0)
246 return;
247 for (auto sub : *(mon->session_map.subs[type])) {
248 check_sub(sub);
249 }
250 }
251
252 void MgrMonitor::check_sub(Subscription *sub)
253 {
254 if (sub->type == "mgrmap") {
255 if (sub->next <= map.get_epoch()) {
256 dout(20) << "Sending map to subscriber " << sub->session->con << dendl;
257 sub->session->con->send_message(new MMgrMap(map));
258 if (sub->onetime) {
259 mon->session_map.remove_sub(sub);
260 } else {
261 sub->next = map.get_epoch() + 1;
262 }
263 }
264 } else {
265 assert(sub->type == "mgrdigest");
266 if (digest_callback == nullptr) {
267 send_digests();
268 }
269 }
270 }
271
272 /**
273 * Handle digest subscriptions separately (outside of check_sub) because
274 * they are going to be periodic rather than version-driven.
275 */
276 void MgrMonitor::send_digests()
277 {
278 digest_callback = nullptr;
279
280 const std::string type = "mgrdigest";
281 if (mon->session_map.subs.count(type) == 0)
282 return;
283
284 for (auto sub : *(mon->session_map.subs[type])) {
285 MMgrDigest *mdigest = new MMgrDigest;
286
287 JSONFormatter f;
288 std::list<std::string> health_strs;
289 mon->get_health(health_strs, nullptr, &f);
290 f.flush(mdigest->health_json);
291 f.reset();
292
293 std::ostringstream ss;
294 mon->get_mon_status(&f, ss);
295 f.flush(mdigest->mon_status_json);
296 f.reset();
297
298 sub->session->con->send_message(mdigest);
299 }
300
301 digest_callback = new C_MonContext(mon, [this](int){
302 send_digests();
303 });
304 mon->timer.add_event_after(g_conf->mon_mgr_digest_period, digest_callback);
305 }
306
307 void MgrMonitor::on_active()
308 {
309 if (mon->is_leader())
310 mon->clog->info() << "mgrmap e" << map.epoch << ": " << map;
311 }
312
313 void MgrMonitor::get_health(
314 list<pair<health_status_t,string> >& summary,
315 list<pair<health_status_t,string> > *detail,
316 CephContext *cct) const
317 {
318 // start mgr warnings as soon as the mons and osds are all upgraded,
319 // but before the require_luminous osdmap flag is set. this way the
320 // user gets some warning before the osd flag is set and mgr is
321 // actually *required*.
322 if (!mon->monmap->get_required_features().contains_all(
323 ceph::features::mon::FEATURE_LUMINOUS) ||
324 !HAVE_FEATURE(mon->osdmon()->osdmap.get_up_osd_features(),
325 SERVER_LUMINOUS)) {
326 return;
327 }
328
329 if (!map.available) {
330 auto level = HEALTH_WARN;
331 // do not escalate to ERR if they are still upgrading to jewel.
332 if (mon->osdmon()->osdmap.test_flag(CEPH_OSDMAP_REQUIRE_LUMINOUS)) {
333 utime_t now = ceph_clock_now();
334 if (first_seen_inactive != utime_t() &&
335 now - first_seen_inactive > g_conf->mon_mgr_inactive_grace) {
336 level = HEALTH_ERR;
337 }
338 }
339 summary.push_back(make_pair(level, "no active mgr"));
340 }
341 }
342
343 void MgrMonitor::tick()
344 {
345 if (!is_active() || !mon->is_leader())
346 return;
347
348 const utime_t now = ceph_clock_now();
349 utime_t cutoff = now;
350 cutoff -= g_conf->mon_mgr_beacon_grace;
351
352 // Populate any missing beacons (i.e. no beacon since MgrMonitor
353 // instantiation) with the current time, so that they will
354 // eventually look laggy if they fail to give us a beacon.
355 if (pending_map.active_gid != 0
356 && last_beacon.count(pending_map.active_gid) == 0) {
357 last_beacon[pending_map.active_gid] = now;
358 }
359 for (auto s : pending_map.standbys) {
360 if (last_beacon.count(s.first) == 0) {
361 last_beacon[s.first] = now;
362 }
363 }
364
365 // Cull standbys first so that any remaining standbys
366 // will be eligible to take over from the active if we cull him.
367 std::list<uint64_t> dead_standbys;
368 for (const auto &i : pending_map.standbys) {
369 auto last_beacon_time = last_beacon.at(i.first);
370 if (last_beacon_time < cutoff) {
371 dead_standbys.push_back(i.first);
372 }
373 }
374
375 bool propose = false;
376
377 for (auto i : dead_standbys) {
378 dout(4) << "Dropping laggy standby " << i << dendl;
379 drop_standby(i);
380 propose = true;
381 }
382
383 if (pending_map.active_gid != 0
384 && last_beacon.at(pending_map.active_gid) < cutoff) {
385
386 drop_active();
387 propose = true;
388 dout(4) << "Dropping active" << pending_map.active_gid << dendl;
389 if (promote_standby()) {
390 dout(4) << "Promoted standby " << pending_map.active_gid << dendl;
391 } else {
392 dout(4) << "Active is laggy but have no standbys to replace it" << dendl;
393 }
394 } else if (pending_map.active_gid == 0) {
395 if (promote_standby()) {
396 dout(4) << "Promoted standby " << pending_map.active_gid << dendl;
397 propose = true;
398 }
399 }
400
401 if (propose) {
402 propose_pending();
403 }
404 }
405
406 bool MgrMonitor::promote_standby()
407 {
408 assert(pending_map.active_gid == 0);
409 if (pending_map.standbys.size()) {
410 // Promote a replacement (arbitrary choice of standby)
411 auto replacement_gid = pending_map.standbys.begin()->first;
412 pending_map.active_gid = replacement_gid;
413 pending_map.active_name = pending_map.standbys.at(replacement_gid).name;
414 pending_map.available = false;
415 pending_map.active_addr = entity_addr_t();
416
417 drop_standby(replacement_gid);
418 return true;
419 } else {
420 return false;
421 }
422 }
423
424 void MgrMonitor::drop_active()
425 {
426 if (last_beacon.count(pending_map.active_gid) > 0) {
427 last_beacon.erase(pending_map.active_gid);
428 }
429
430 pending_map.active_name = "";
431 pending_map.active_gid = 0;
432 pending_map.available = false;
433 pending_map.active_addr = entity_addr_t();
434 }
435
436 void MgrMonitor::drop_standby(uint64_t gid)
437 {
438 pending_map.standbys.erase(gid);
439 if (last_beacon.count(gid) > 0) {
440 last_beacon.erase(gid);
441 }
442
443 }
444
445 bool MgrMonitor::preprocess_command(MonOpRequestRef op)
446 {
447 return false;
448
449 }
450
451 bool MgrMonitor::prepare_command(MonOpRequestRef op)
452 {
453 MMonCommand *m = static_cast<MMonCommand*>(op->get_req());
454
455 std::stringstream ss;
456 bufferlist rdata;
457
458 std::map<std::string, cmd_vartype> cmdmap;
459 if (!cmdmap_from_json(m->cmd, &cmdmap, ss)) {
460 string rs = ss.str();
461 mon->reply_command(op, -EINVAL, rs, rdata, get_last_committed());
462 return true;
463 }
464
465 MonSession *session = m->get_session();
466 if (!session) {
467 mon->reply_command(op, -EACCES, "access denied", rdata, get_last_committed());
468 return true;
469 }
470
471 string prefix;
472 cmd_getval(g_ceph_context, cmdmap, "prefix", prefix);
473
474 int r = 0;
475
476 if (prefix == "mgr fail") {
477 string who;
478 cmd_getval(g_ceph_context, cmdmap, "who", who);
479
480 std::string err;
481 uint64_t gid = strict_strtol(who.c_str(), 10, &err);
482 bool changed = false;
483 if (!err.empty()) {
484 // Does not parse as a gid, treat it as a name
485 if (pending_map.active_name == who) {
486 drop_active();
487 changed = true;
488 } else {
489 gid = 0;
490 for (const auto &i : pending_map.standbys) {
491 if (i.second.name == who) {
492 gid = i.first;
493 break;
494 }
495 }
496 if (gid != 0) {
497 drop_standby(gid);
498 changed = true;
499 } else {
500 ss << "Daemon not found '" << who << "', already failed?";
501 }
502 }
503 } else {
504 if (pending_map.active_gid == gid) {
505 drop_active();
506 changed = true;
507 } else if (pending_map.standbys.count(gid) > 0) {
508 drop_standby(gid);
509 changed = true;
510 } else {
511 ss << "Daemon not found '" << gid << "', already failed?";
512 }
513 }
514
515 if (changed && pending_map.active_gid == 0) {
516 promote_standby();
517 }
518 } else {
519 r = -ENOSYS;
520 }
521
522 dout(4) << __func__ << " done, r=" << r << dendl;
523 /* Compose response */
524 string rs;
525 getline(ss, rs);
526
527 if (r >= 0) {
528 // success.. delay reply
529 wait_for_finished_proposal(op, new Monitor::C_Command(mon, op, r, rs,
530 get_last_committed() + 1));
531 return true;
532 } else {
533 // reply immediately
534 mon->reply_command(op, r, rs, rdata, get_last_committed());
535 return false;
536 }
537 }
538
539 void MgrMonitor::init()
540 {
541 if (digest_callback == nullptr) {
542 send_digests(); // To get it to schedule its own event
543 }
544 }
545
546 void MgrMonitor::on_shutdown()
547 {
548 if (digest_callback) {
549 mon->timer.cancel_event(digest_callback);
550 }
551 }
552