]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
2 | // vim: ts=8 sw=2 smarttab | |
3 | /* | |
4 | * Ceph - scalable distributed file system | |
5 | * | |
6 | * Copyright (C) 2016 John Spray <john.spray@redhat.com> | |
7 | * | |
8 | * This is free software; you can redistribute it and/or | |
9 | * modify it under the terms of the GNU Lesser General Public | |
10 | * License version 2.1, as published by the Free Software | |
11 | * Foundation. See file COPYING. | |
12 | */ | |
13 | ||
224ce89b WB |
14 | #include <boost/tokenizer.hpp> |
15 | ||
7c673cae FG |
16 | #include "messages/MMgrBeacon.h" |
17 | #include "messages/MMgrMap.h" | |
18 | #include "messages/MMgrDigest.h" | |
19 | ||
31f18b77 | 20 | #include "PGStatService.h" |
7c673cae FG |
21 | #include "include/stringify.h" |
22 | #include "mgr/MgrContext.h" | |
c07f9fc5 | 23 | #include "mgr/mgr_commands.h" |
7c673cae FG |
24 | #include "OSDMonitor.h" |
25 | ||
26 | #include "MgrMonitor.h" | |
27 | ||
c07f9fc5 FG |
28 | #define MGR_METADATA_PREFIX "mgr_metadata" |
29 | ||
7c673cae FG |
30 | #define dout_subsys ceph_subsys_mon |
31 | #undef dout_prefix | |
32 | #define dout_prefix _prefix(_dout, mon, map) | |
33 | static ostream& _prefix(std::ostream *_dout, Monitor *mon, | |
34 | const MgrMap& mgrmap) { | |
35 | return *_dout << "mon." << mon->name << "@" << mon->rank | |
36 | << "(" << mon->get_state_name() | |
37 | << ").mgr e" << mgrmap.get_epoch() << " "; | |
38 | } | |
39 | ||
c07f9fc5 FG |
40 | // Prefix for mon store of active mgr's command descriptions |
41 | const static std::string command_descs_prefix = "mgr_command_descs"; | |
42 | ||
31f18b77 | 43 | |
b32b8144 FG |
44 | version_t MgrMonitor::get_trim_to() |
45 | { | |
46 | int64_t max = g_conf->get_val<int64_t>("mon_max_mgrmap_epochs"); | |
47 | if (map.epoch > max) { | |
48 | return map.epoch - max; | |
49 | } | |
50 | return 0; | |
51 | } | |
52 | ||
7c673cae FG |
53 | void MgrMonitor::create_initial() |
54 | { | |
3efd9988 FG |
55 | // Take a local copy of initial_modules for tokenizer to iterate over. |
56 | auto initial_modules = g_conf->get_val<std::string>("mgr_initial_modules"); | |
57 | boost::tokenizer<> tok(initial_modules); | |
224ce89b WB |
58 | for (auto& m : tok) { |
59 | pending_map.modules.insert(m); | |
60 | } | |
c07f9fc5 FG |
61 | pending_command_descs = mgr_commands; |
62 | dout(10) << __func__ << " initial modules " << pending_map.modules | |
63 | << ", " << pending_command_descs.size() << " commands" | |
64 | << dendl; | |
7c673cae FG |
65 | } |
66 | ||
3efd9988 FG |
67 | void MgrMonitor::get_store_prefixes(std::set<string>& s) |
68 | { | |
69 | s.insert(service_name); | |
70 | s.insert(command_descs_prefix); | |
71 | s.insert(MGR_METADATA_PREFIX); | |
72 | } | |
73 | ||
7c673cae FG |
74 | void MgrMonitor::update_from_paxos(bool *need_bootstrap) |
75 | { | |
76 | version_t version = get_last_committed(); | |
77 | if (version != map.epoch) { | |
78 | dout(4) << "loading version " << version << dendl; | |
79 | ||
80 | bufferlist bl; | |
81 | int err = get_version(version, bl); | |
82 | assert(err == 0); | |
83 | ||
c07f9fc5 FG |
84 | bool old_available = map.get_available(); |
85 | uint64_t old_gid = map.get_active_gid(); | |
86 | ||
7c673cae FG |
87 | bufferlist::iterator p = bl.begin(); |
88 | map.decode(p); | |
89 | ||
90 | dout(4) << "active server: " << map.active_addr | |
91 | << "(" << map.active_gid << ")" << dendl; | |
92 | ||
224ce89b WB |
93 | ever_had_active_mgr = get_value("ever_had_active_mgr"); |
94 | ||
95 | load_health(); | |
96 | ||
7c673cae FG |
97 | if (map.available) { |
98 | first_seen_inactive = utime_t(); | |
99 | } else { | |
100 | first_seen_inactive = ceph_clock_now(); | |
101 | } | |
102 | ||
103 | check_subs(); | |
c07f9fc5 FG |
104 | |
105 | if (version == 1 | |
3efd9988 FG |
106 | || command_descs.empty() |
107 | || (map.get_available() | |
108 | && (!old_available || old_gid != map.get_active_gid()))) { | |
c07f9fc5 FG |
109 | dout(4) << "mkfs or daemon transitioned to available, loading commands" |
110 | << dendl; | |
111 | bufferlist loaded_commands; | |
112 | int r = mon->store->get(command_descs_prefix, "", loaded_commands); | |
113 | if (r < 0) { | |
114 | derr << "Failed to load mgr commands: " << cpp_strerror(r) << dendl; | |
115 | } else { | |
116 | auto p = loaded_commands.begin(); | |
117 | ::decode(command_descs, p); | |
118 | } | |
119 | } | |
7c673cae FG |
120 | } |
121 | ||
122 | // feed our pet MgrClient | |
123 | mon->mgr_client.ms_dispatch(new MMgrMap(map)); | |
124 | } | |
125 | ||
126 | void MgrMonitor::create_pending() | |
127 | { | |
128 | pending_map = map; | |
129 | pending_map.epoch++; | |
3efd9988 FG |
130 | |
131 | if (map.get_epoch() == 1 && | |
132 | command_descs.empty() && | |
133 | pending_command_descs.empty()) { | |
134 | // we've been through the initial map and we haven't populated the | |
135 | // command_descs vector. This likely means we came from kraken, where | |
136 | // we wouldn't populate the vector, nor would we write it to disk, on | |
137 | // create_initial(). | |
138 | create_initial(); | |
139 | } | |
7c673cae FG |
140 | } |
141 | ||
224ce89b WB |
142 | health_status_t MgrMonitor::should_warn_about_mgr_down() |
143 | { | |
144 | utime_t now = ceph_clock_now(); | |
145 | // we warn if | |
146 | // - we've ever had an active mgr, or | |
147 | // - we have osds AND we've exceeded the grace period | |
148 | // which means a new mon cluster and be HEALTH_OK indefinitely as long as | |
149 | // no OSDs are ever created. | |
150 | if (ever_had_active_mgr || | |
151 | (mon->osdmon()->osdmap.get_num_osds() > 0 && | |
3efd9988 | 152 | now > mon->monmap->created + g_conf->get_val<int64_t>("mon_mgr_mkfs_grace"))) { |
224ce89b WB |
153 | health_status_t level = HEALTH_WARN; |
154 | if (first_seen_inactive != utime_t() && | |
3efd9988 | 155 | now - first_seen_inactive > g_conf->get_val<int64_t>("mon_mgr_inactive_grace")) { |
224ce89b WB |
156 | level = HEALTH_ERR; |
157 | } | |
158 | return level; | |
159 | } | |
160 | return HEALTH_OK; | |
161 | } | |
162 | ||
7c673cae FG |
163 | void MgrMonitor::encode_pending(MonitorDBStore::TransactionRef t) |
164 | { | |
165 | dout(10) << __func__ << " " << pending_map << dendl; | |
166 | bufferlist bl; | |
167 | pending_map.encode(bl, mon->get_quorum_con_features()); | |
168 | put_version(t, pending_map.epoch, bl); | |
169 | put_last_committed(t, pending_map.epoch); | |
224ce89b | 170 | |
c07f9fc5 FG |
171 | for (auto& p : pending_metadata) { |
172 | dout(10) << __func__ << " set metadata for " << p.first << dendl; | |
173 | t->put(MGR_METADATA_PREFIX, p.first, p.second); | |
174 | } | |
175 | for (auto& name : pending_metadata_rm) { | |
176 | dout(10) << __func__ << " rm metadata for " << name << dendl; | |
177 | t->erase(MGR_METADATA_PREFIX, name); | |
178 | } | |
179 | pending_metadata.clear(); | |
180 | pending_metadata_rm.clear(); | |
181 | ||
224ce89b WB |
182 | health_check_map_t next; |
183 | if (pending_map.active_gid == 0) { | |
184 | auto level = should_warn_about_mgr_down(); | |
185 | if (level != HEALTH_OK) { | |
186 | next.add("MGR_DOWN", level, "no active mgr"); | |
187 | } else { | |
188 | dout(10) << __func__ << " no health warning (never active and new cluster)" | |
189 | << dendl; | |
190 | } | |
191 | } else { | |
192 | put_value(t, "ever_had_active_mgr", 1); | |
193 | } | |
194 | encode_health(next, t); | |
c07f9fc5 FG |
195 | |
196 | if (pending_command_descs.size()) { | |
197 | dout(4) << __func__ << " encoding " << pending_command_descs.size() | |
198 | << " command_descs" << dendl; | |
199 | for (auto& p : pending_command_descs) { | |
200 | p.set_flag(MonCommand::FLAG_MGR); | |
201 | } | |
202 | bufferlist bl; | |
203 | ::encode(pending_command_descs, bl); | |
204 | t->put(command_descs_prefix, "", bl); | |
205 | pending_command_descs.clear(); | |
206 | } | |
7c673cae FG |
207 | } |
208 | ||
209 | bool MgrMonitor::check_caps(MonOpRequestRef op, const uuid_d& fsid) | |
210 | { | |
211 | // check permissions | |
212 | MonSession *session = op->get_session(); | |
213 | if (!session) | |
214 | return false; | |
215 | if (!session->is_capable("mgr", MON_CAP_X)) { | |
216 | dout(1) << __func__ << " insufficient caps " << session->caps << dendl; | |
217 | return false; | |
218 | } | |
219 | if (fsid != mon->monmap->fsid) { | |
220 | dout(1) << __func__ << " op fsid " << fsid | |
221 | << " != " << mon->monmap->fsid << dendl; | |
222 | return false; | |
223 | } | |
224 | return true; | |
225 | } | |
226 | ||
227 | bool MgrMonitor::preprocess_query(MonOpRequestRef op) | |
228 | { | |
229 | PaxosServiceMessage *m = static_cast<PaxosServiceMessage*>(op->get_req()); | |
230 | switch (m->get_type()) { | |
231 | case MSG_MGR_BEACON: | |
232 | return preprocess_beacon(op); | |
233 | case MSG_MON_COMMAND: | |
234 | return preprocess_command(op); | |
235 | default: | |
236 | mon->no_reply(op); | |
237 | derr << "Unhandled message type " << m->get_type() << dendl; | |
238 | return true; | |
239 | } | |
240 | } | |
241 | ||
242 | bool MgrMonitor::prepare_update(MonOpRequestRef op) | |
243 | { | |
244 | PaxosServiceMessage *m = static_cast<PaxosServiceMessage*>(op->get_req()); | |
245 | switch (m->get_type()) { | |
246 | case MSG_MGR_BEACON: | |
247 | return prepare_beacon(op); | |
248 | ||
249 | case MSG_MON_COMMAND: | |
250 | return prepare_command(op); | |
251 | ||
252 | default: | |
253 | mon->no_reply(op); | |
254 | derr << "Unhandled message type " << m->get_type() << dendl; | |
255 | return true; | |
256 | } | |
257 | } | |
258 | ||
259 | ||
260 | ||
261 | class C_Updated : public Context { | |
262 | MgrMonitor *mm; | |
263 | MonOpRequestRef op; | |
264 | public: | |
265 | C_Updated(MgrMonitor *a, MonOpRequestRef c) : | |
266 | mm(a), op(c) {} | |
267 | void finish(int r) override { | |
268 | if (r >= 0) { | |
269 | // Success | |
270 | } else if (r == -ECANCELED) { | |
271 | mm->mon->no_reply(op); | |
272 | } else { | |
273 | mm->dispatch(op); // try again | |
274 | } | |
275 | } | |
276 | }; | |
277 | ||
278 | bool MgrMonitor::preprocess_beacon(MonOpRequestRef op) | |
279 | { | |
280 | MMgrBeacon *m = static_cast<MMgrBeacon*>(op->get_req()); | |
94b18763 | 281 | mon->no_reply(op); // we never reply to beacons |
7c673cae FG |
282 | dout(4) << "beacon from " << m->get_gid() << dendl; |
283 | ||
284 | if (!check_caps(op, m->get_fsid())) { | |
285 | // drop it on the floor | |
286 | return true; | |
287 | } | |
288 | ||
289 | // always send this to the leader's prepare_beacon() | |
290 | return false; | |
291 | } | |
292 | ||
293 | bool MgrMonitor::prepare_beacon(MonOpRequestRef op) | |
294 | { | |
295 | MMgrBeacon *m = static_cast<MMgrBeacon*>(op->get_req()); | |
296 | dout(4) << "beacon from " << m->get_gid() << dendl; | |
297 | ||
298 | // See if we are seeing same name, new GID for the active daemon | |
299 | if (m->get_name() == pending_map.active_name | |
300 | && m->get_gid() != pending_map.active_gid) | |
301 | { | |
302 | dout(4) << "Active daemon restart (mgr." << m->get_name() << ")" << dendl; | |
224ce89b WB |
303 | mon->clog->info() << "Active manager daemon " << m->get_name() |
304 | << " restarted"; | |
7c673cae FG |
305 | drop_active(); |
306 | } | |
307 | ||
308 | // See if we are seeing same name, new GID for any standbys | |
309 | for (const auto &i : pending_map.standbys) { | |
310 | const StandbyInfo &s = i.second; | |
311 | if (s.name == m->get_name() && s.gid != m->get_gid()) { | |
312 | dout(4) << "Standby daemon restart (mgr." << m->get_name() << ")" << dendl; | |
224ce89b WB |
313 | mon->clog->debug() << "Standby manager daemon " << m->get_name() |
314 | << " restarted"; | |
7c673cae FG |
315 | drop_standby(i.first); |
316 | break; | |
317 | } | |
318 | } | |
319 | ||
31f18b77 | 320 | last_beacon[m->get_gid()] = ceph::coarse_mono_clock::now(); |
7c673cae FG |
321 | |
322 | // Track whether we modified pending_map | |
323 | bool updated = false; | |
324 | ||
325 | if (pending_map.active_gid == m->get_gid()) { | |
3efd9988 FG |
326 | if (pending_map.services != m->get_services()) { |
327 | dout(4) << "updated services from mgr." << m->get_name() | |
328 | << ": " << m->get_services() << dendl; | |
329 | pending_map.services = m->get_services(); | |
330 | updated = true; | |
331 | } | |
332 | ||
7c673cae FG |
333 | // A beacon from the currently active daemon |
334 | if (pending_map.active_addr != m->get_server_addr()) { | |
335 | dout(4) << "learned address " << m->get_server_addr() | |
336 | << " (was " << pending_map.active_addr << ")" << dendl; | |
337 | pending_map.active_addr = m->get_server_addr(); | |
338 | updated = true; | |
339 | } | |
340 | ||
341 | if (pending_map.get_available() != m->get_available()) { | |
342 | dout(4) << "available " << m->get_gid() << dendl; | |
224ce89b WB |
343 | mon->clog->info() << "Manager daemon " << pending_map.active_name |
344 | << " is now available"; | |
c07f9fc5 FG |
345 | |
346 | // This beacon should include command descriptions | |
347 | pending_command_descs = m->get_command_descs(); | |
348 | if (pending_command_descs.empty()) { | |
349 | // This should not happen, but it also isn't fatal: we just | |
350 | // won't successfully update our list of commands. | |
351 | dout(4) << "First available beacon from " << pending_map.active_name | |
352 | << "(" << m->get_gid() << ") does not include command descs" | |
353 | << dendl; | |
354 | } else { | |
355 | dout(4) << "First available beacon from " << pending_map.active_name | |
356 | << "(" << m->get_gid() << ") includes " | |
357 | << pending_command_descs.size() << " command descs" << dendl; | |
358 | } | |
359 | ||
7c673cae FG |
360 | pending_map.available = m->get_available(); |
361 | updated = true; | |
362 | } | |
224ce89b WB |
363 | if (pending_map.available_modules != m->get_available_modules()) { |
364 | dout(4) << "available_modules " << m->get_available_modules() | |
365 | << " (was " << pending_map.available_modules << ")" << dendl; | |
366 | pending_map.available_modules = m->get_available_modules(); | |
367 | updated = true; | |
368 | } | |
7c673cae FG |
369 | } else if (pending_map.active_gid == 0) { |
370 | // There is no currently active daemon, select this one. | |
371 | if (pending_map.standbys.count(m->get_gid())) { | |
181888fb | 372 | drop_standby(m->get_gid(), false); |
7c673cae FG |
373 | } |
374 | dout(4) << "selecting new active " << m->get_gid() | |
375 | << " " << m->get_name() | |
376 | << " (was " << pending_map.active_gid << " " | |
377 | << pending_map.active_name << ")" << dendl; | |
378 | pending_map.active_gid = m->get_gid(); | |
379 | pending_map.active_name = m->get_name(); | |
224ce89b | 380 | pending_map.available_modules = m->get_available_modules(); |
c07f9fc5 FG |
381 | ::encode(m->get_metadata(), pending_metadata[m->get_name()]); |
382 | pending_metadata_rm.erase(m->get_name()); | |
224ce89b WB |
383 | |
384 | mon->clog->info() << "Activating manager daemon " | |
385 | << pending_map.active_name; | |
7c673cae FG |
386 | |
387 | updated = true; | |
388 | } else { | |
389 | if (pending_map.standbys.count(m->get_gid()) > 0) { | |
390 | dout(10) << "from existing standby " << m->get_gid() << dendl; | |
224ce89b WB |
391 | if (pending_map.standbys[m->get_gid()].available_modules != |
392 | m->get_available_modules()) { | |
393 | dout(10) << "existing standby " << m->get_gid() << " available_modules " | |
394 | << m->get_available_modules() << " (was " | |
395 | << pending_map.standbys[m->get_gid()].available_modules << ")" | |
396 | << dendl; | |
397 | pending_map.standbys[m->get_gid()].available_modules = | |
398 | m->get_available_modules(); | |
399 | updated = true; | |
400 | } | |
7c673cae FG |
401 | } else { |
402 | dout(10) << "new standby " << m->get_gid() << dendl; | |
224ce89b WB |
403 | mon->clog->debug() << "Standby manager daemon " << m->get_name() |
404 | << " started"; | |
c07f9fc5 FG |
405 | pending_map.standbys[m->get_gid()] = {m->get_gid(), m->get_name(), |
406 | m->get_available_modules()}; | |
407 | ::encode(m->get_metadata(), pending_metadata[m->get_name()]); | |
408 | pending_metadata_rm.erase(m->get_name()); | |
7c673cae FG |
409 | updated = true; |
410 | } | |
411 | } | |
412 | ||
413 | if (updated) { | |
414 | dout(4) << "updating map" << dendl; | |
415 | wait_for_finished_proposal(op, new C_Updated(this, op)); | |
416 | } else { | |
417 | dout(10) << "no change" << dendl; | |
418 | } | |
419 | ||
420 | return updated; | |
421 | } | |
422 | ||
423 | void MgrMonitor::check_subs() | |
424 | { | |
425 | const std::string type = "mgrmap"; | |
426 | if (mon->session_map.subs.count(type) == 0) | |
427 | return; | |
428 | for (auto sub : *(mon->session_map.subs[type])) { | |
429 | check_sub(sub); | |
430 | } | |
431 | } | |
432 | ||
433 | void MgrMonitor::check_sub(Subscription *sub) | |
434 | { | |
435 | if (sub->type == "mgrmap") { | |
436 | if (sub->next <= map.get_epoch()) { | |
224ce89b WB |
437 | dout(20) << "Sending map to subscriber " << sub->session->con |
438 | << " " << sub->session->con->get_peer_addr() << dendl; | |
7c673cae FG |
439 | sub->session->con->send_message(new MMgrMap(map)); |
440 | if (sub->onetime) { | |
441 | mon->session_map.remove_sub(sub); | |
442 | } else { | |
443 | sub->next = map.get_epoch() + 1; | |
444 | } | |
445 | } | |
446 | } else { | |
447 | assert(sub->type == "mgrdigest"); | |
c07f9fc5 FG |
448 | if (sub->next == 0) { |
449 | // new registration; cancel previous timer | |
450 | cancel_timer(); | |
451 | } | |
31f18b77 | 452 | if (digest_event == nullptr) { |
7c673cae FG |
453 | send_digests(); |
454 | } | |
455 | } | |
456 | } | |
457 | ||
458 | /** | |
459 | * Handle digest subscriptions separately (outside of check_sub) because | |
460 | * they are going to be periodic rather than version-driven. | |
461 | */ | |
462 | void MgrMonitor::send_digests() | |
463 | { | |
31f18b77 FG |
464 | cancel_timer(); |
465 | ||
7c673cae FG |
466 | const std::string type = "mgrdigest"; |
467 | if (mon->session_map.subs.count(type) == 0) | |
468 | return; | |
469 | ||
b32b8144 FG |
470 | if (!is_active()) { |
471 | // if paxos is currently not active, don't send a digest but reenable timer | |
472 | goto timer; | |
473 | } | |
474 | dout(10) << __func__ << dendl; | |
475 | ||
7c673cae | 476 | for (auto sub : *(mon->session_map.subs[type])) { |
224ce89b WB |
477 | dout(10) << __func__ << " sending digest to subscriber " << sub->session->con |
478 | << " " << sub->session->con->get_peer_addr() << dendl; | |
7c673cae FG |
479 | MMgrDigest *mdigest = new MMgrDigest; |
480 | ||
481 | JSONFormatter f; | |
224ce89b | 482 | mon->get_health_status(true, &f, nullptr, nullptr, nullptr); |
7c673cae FG |
483 | f.flush(mdigest->health_json); |
484 | f.reset(); | |
485 | ||
486 | std::ostringstream ss; | |
487 | mon->get_mon_status(&f, ss); | |
488 | f.flush(mdigest->mon_status_json); | |
489 | f.reset(); | |
490 | ||
491 | sub->session->con->send_message(mdigest); | |
492 | } | |
493 | ||
b32b8144 | 494 | timer: |
3efd9988 FG |
495 | digest_event = mon->timer.add_event_after( |
496 | g_conf->get_val<int64_t>("mon_mgr_digest_period"), | |
497 | new C_MonContext(mon, [this](int) { | |
7c673cae | 498 | send_digests(); |
3efd9988 | 499 | })); |
31f18b77 FG |
500 | } |
501 | ||
502 | void MgrMonitor::cancel_timer() | |
503 | { | |
504 | if (digest_event) { | |
505 | mon->timer.cancel_event(digest_event); | |
506 | digest_event = nullptr; | |
507 | } | |
7c673cae FG |
508 | } |
509 | ||
510 | void MgrMonitor::on_active() | |
511 | { | |
224ce89b WB |
512 | if (mon->is_leader()) { |
513 | mon->clog->debug() << "mgrmap e" << map.epoch << ": " << map; | |
514 | } | |
7c673cae FG |
515 | } |
516 | ||
517 | void MgrMonitor::get_health( | |
518 | list<pair<health_status_t,string> >& summary, | |
519 | list<pair<health_status_t,string> > *detail, | |
520 | CephContext *cct) const | |
521 | { | |
522 | // start mgr warnings as soon as the mons and osds are all upgraded, | |
523 | // but before the require_luminous osdmap flag is set. this way the | |
524 | // user gets some warning before the osd flag is set and mgr is | |
525 | // actually *required*. | |
526 | if (!mon->monmap->get_required_features().contains_all( | |
527 | ceph::features::mon::FEATURE_LUMINOUS) || | |
528 | !HAVE_FEATURE(mon->osdmon()->osdmap.get_up_osd_features(), | |
529 | SERVER_LUMINOUS)) { | |
530 | return; | |
531 | } | |
532 | ||
224ce89b | 533 | if (map.active_gid == 0) { |
7c673cae FG |
534 | auto level = HEALTH_WARN; |
535 | // do not escalate to ERR if they are still upgrading to jewel. | |
31f18b77 | 536 | if (mon->osdmon()->osdmap.require_osd_release >= CEPH_RELEASE_LUMINOUS) { |
7c673cae FG |
537 | utime_t now = ceph_clock_now(); |
538 | if (first_seen_inactive != utime_t() && | |
3efd9988 | 539 | now - first_seen_inactive > g_conf->get_val<int64_t>("mon_mgr_inactive_grace")) { |
7c673cae FG |
540 | level = HEALTH_ERR; |
541 | } | |
542 | } | |
543 | summary.push_back(make_pair(level, "no active mgr")); | |
544 | } | |
545 | } | |
546 | ||
547 | void MgrMonitor::tick() | |
548 | { | |
549 | if (!is_active() || !mon->is_leader()) | |
550 | return; | |
551 | ||
31f18b77 | 552 | const auto now = ceph::coarse_mono_clock::now(); |
3efd9988 FG |
553 | |
554 | const auto mgr_beacon_grace = std::chrono::seconds( | |
555 | g_conf->get_val<int64_t>("mon_mgr_beacon_grace")); | |
556 | ||
557 | // Note that this is the mgr daemon's tick period, not ours (the | |
558 | // beacon is sent with this period). | |
559 | const auto mgr_tick_period = std::chrono::seconds( | |
560 | g_conf->get_val<int64_t>("mgr_tick_period")); | |
561 | ||
562 | if (last_tick != ceph::coarse_mono_clock::time_point::min() | |
563 | && (now - last_tick > (mgr_beacon_grace - mgr_tick_period))) { | |
564 | // This case handles either local slowness (calls being delayed | |
565 | // for whatever reason) or cluster election slowness (a long gap | |
566 | // between calls while an election happened) | |
567 | dout(4) << __func__ << ": resetting beacon timeouts due to mon delay " | |
568 | "(slow election?) of " << now - last_tick << " seconds" << dendl; | |
569 | for (auto &i : last_beacon) { | |
570 | i.second = now; | |
571 | } | |
572 | } | |
573 | ||
574 | last_tick = now; | |
7c673cae FG |
575 | |
576 | // Populate any missing beacons (i.e. no beacon since MgrMonitor | |
577 | // instantiation) with the current time, so that they will | |
578 | // eventually look laggy if they fail to give us a beacon. | |
579 | if (pending_map.active_gid != 0 | |
580 | && last_beacon.count(pending_map.active_gid) == 0) { | |
581 | last_beacon[pending_map.active_gid] = now; | |
582 | } | |
583 | for (auto s : pending_map.standbys) { | |
584 | if (last_beacon.count(s.first) == 0) { | |
585 | last_beacon[s.first] = now; | |
586 | } | |
587 | } | |
588 | ||
589 | // Cull standbys first so that any remaining standbys | |
590 | // will be eligible to take over from the active if we cull him. | |
591 | std::list<uint64_t> dead_standbys; | |
3efd9988 | 592 | const auto cutoff = now - mgr_beacon_grace; |
7c673cae FG |
593 | for (const auto &i : pending_map.standbys) { |
594 | auto last_beacon_time = last_beacon.at(i.first); | |
595 | if (last_beacon_time < cutoff) { | |
596 | dead_standbys.push_back(i.first); | |
597 | } | |
598 | } | |
599 | ||
600 | bool propose = false; | |
601 | ||
602 | for (auto i : dead_standbys) { | |
603 | dout(4) << "Dropping laggy standby " << i << dendl; | |
604 | drop_standby(i); | |
605 | propose = true; | |
606 | } | |
607 | ||
608 | if (pending_map.active_gid != 0 | |
609 | && last_beacon.at(pending_map.active_gid) < cutoff) { | |
224ce89b | 610 | const std::string old_active_name = pending_map.active_name; |
7c673cae FG |
611 | drop_active(); |
612 | propose = true; | |
613 | dout(4) << "Dropping active" << pending_map.active_gid << dendl; | |
614 | if (promote_standby()) { | |
615 | dout(4) << "Promoted standby " << pending_map.active_gid << dendl; | |
224ce89b WB |
616 | mon->clog->info() << "Manager daemon " << old_active_name |
617 | << " is unresponsive, replacing it with standby" | |
618 | << " daemon " << pending_map.active_name; | |
7c673cae FG |
619 | } else { |
620 | dout(4) << "Active is laggy but have no standbys to replace it" << dendl; | |
28e407b8 | 621 | mon->clog->info() << "Manager daemon " << old_active_name |
224ce89b | 622 | << " is unresponsive. No standby daemons available."; |
7c673cae FG |
623 | } |
624 | } else if (pending_map.active_gid == 0) { | |
625 | if (promote_standby()) { | |
626 | dout(4) << "Promoted standby " << pending_map.active_gid << dendl; | |
224ce89b | 627 | mon->clog->info() << "Activating manager daemon " |
3efd9988 | 628 | << pending_map.active_name; |
7c673cae FG |
629 | propose = true; |
630 | } | |
631 | } | |
632 | ||
224ce89b | 633 | if (!pending_map.available && |
c07f9fc5 | 634 | !ever_had_active_mgr && |
224ce89b | 635 | should_warn_about_mgr_down() != HEALTH_OK) { |
3efd9988 FG |
636 | dout(10) << " exceeded mon_mgr_mkfs_grace " |
637 | << g_conf->get_val<int64_t>("mon_mgr_mkfs_grace") | |
638 | << " seconds" << dendl; | |
224ce89b WB |
639 | propose = true; |
640 | } | |
641 | ||
7c673cae FG |
642 | if (propose) { |
643 | propose_pending(); | |
644 | } | |
645 | } | |
646 | ||
224ce89b WB |
647 | void MgrMonitor::on_restart() |
648 | { | |
649 | // Clear out the leader-specific state. | |
650 | last_beacon.clear(); | |
3efd9988 | 651 | last_tick = ceph::coarse_mono_clock::now(); |
224ce89b WB |
652 | } |
653 | ||
654 | ||
7c673cae FG |
655 | bool MgrMonitor::promote_standby() |
656 | { | |
657 | assert(pending_map.active_gid == 0); | |
658 | if (pending_map.standbys.size()) { | |
659 | // Promote a replacement (arbitrary choice of standby) | |
660 | auto replacement_gid = pending_map.standbys.begin()->first; | |
661 | pending_map.active_gid = replacement_gid; | |
662 | pending_map.active_name = pending_map.standbys.at(replacement_gid).name; | |
663 | pending_map.available = false; | |
664 | pending_map.active_addr = entity_addr_t(); | |
665 | ||
181888fb FG |
666 | drop_standby(replacement_gid, false); |
667 | ||
7c673cae FG |
668 | return true; |
669 | } else { | |
670 | return false; | |
671 | } | |
672 | } | |
673 | ||
674 | void MgrMonitor::drop_active() | |
675 | { | |
676 | if (last_beacon.count(pending_map.active_gid) > 0) { | |
677 | last_beacon.erase(pending_map.active_gid); | |
678 | } | |
679 | ||
c07f9fc5 FG |
680 | pending_metadata_rm.insert(pending_map.active_name); |
681 | pending_metadata.erase(pending_map.active_name); | |
7c673cae FG |
682 | pending_map.active_name = ""; |
683 | pending_map.active_gid = 0; | |
684 | pending_map.available = false; | |
685 | pending_map.active_addr = entity_addr_t(); | |
3efd9988 | 686 | pending_map.services.clear(); |
224ce89b WB |
687 | |
688 | // So that when new active mgr subscribes to mgrdigest, it will | |
689 | // get an immediate response instead of waiting for next timer | |
690 | cancel_timer(); | |
7c673cae FG |
691 | } |
692 | ||
181888fb | 693 | void MgrMonitor::drop_standby(uint64_t gid, bool drop_meta) |
7c673cae | 694 | { |
181888fb FG |
695 | if (drop_meta) { |
696 | pending_metadata_rm.insert(pending_map.standbys[gid].name); | |
697 | pending_metadata.erase(pending_map.standbys[gid].name); | |
698 | } | |
7c673cae FG |
699 | pending_map.standbys.erase(gid); |
700 | if (last_beacon.count(gid) > 0) { | |
701 | last_beacon.erase(gid); | |
702 | } | |
7c673cae FG |
703 | } |
704 | ||
705 | bool MgrMonitor::preprocess_command(MonOpRequestRef op) | |
706 | { | |
31f18b77 FG |
707 | MMonCommand *m = static_cast<MMonCommand*>(op->get_req()); |
708 | std::stringstream ss; | |
709 | bufferlist rdata; | |
710 | ||
711 | std::map<std::string, cmd_vartype> cmdmap; | |
712 | if (!cmdmap_from_json(m->cmd, &cmdmap, ss)) { | |
713 | string rs = ss.str(); | |
714 | mon->reply_command(op, -EINVAL, rs, rdata, get_last_committed()); | |
715 | return true; | |
716 | } | |
7c673cae | 717 | |
31f18b77 FG |
718 | MonSession *session = m->get_session(); |
719 | if (!session) { | |
720 | mon->reply_command(op, -EACCES, "access denied", rdata, | |
721 | get_last_committed()); | |
722 | return true; | |
723 | } | |
724 | ||
725 | string format; | |
726 | cmd_getval(g_ceph_context, cmdmap, "format", format, string("json-pretty")); | |
727 | boost::scoped_ptr<Formatter> f(Formatter::create(format)); | |
728 | ||
729 | string prefix; | |
730 | cmd_getval(g_ceph_context, cmdmap, "prefix", prefix); | |
731 | int r = 0; | |
732 | ||
733 | if (prefix == "mgr dump") { | |
734 | int64_t epoch = 0; | |
735 | cmd_getval(g_ceph_context, cmdmap, "epoch", epoch, (int64_t)map.get_epoch()); | |
736 | if (epoch == (int64_t)map.get_epoch()) { | |
737 | f->dump_object("mgrmap", map); | |
738 | } else { | |
739 | bufferlist bl; | |
740 | int err = get_version(epoch, bl); | |
741 | if (err == -ENOENT) { | |
742 | r = -ENOENT; | |
743 | ss << "there is no map for epoch " << epoch; | |
744 | goto reply; | |
745 | } | |
746 | MgrMap m; | |
747 | auto p = bl.begin(); | |
748 | m.decode(p); | |
749 | f->dump_object("mgrmap", m); | |
750 | } | |
751 | f->flush(rdata); | |
224ce89b | 752 | } else if (prefix == "mgr module ls") { |
3efd9988 FG |
753 | f->open_object_section("modules"); |
754 | { | |
755 | f->open_array_section("enabled_modules"); | |
756 | for (auto& p : map.modules) { | |
757 | f->dump_string("module", p); | |
758 | } | |
759 | f->close_section(); | |
760 | f->open_array_section("disabled_modules"); | |
761 | for (auto& p : map.available_modules) { | |
762 | if (map.modules.count(p) == 0) { | |
763 | f->dump_string("module", p); | |
764 | } | |
765 | } | |
766 | f->close_section(); | |
767 | } | |
768 | f->close_section(); | |
769 | f->flush(rdata); | |
770 | } else if (prefix == "mgr services") { | |
771 | f->open_object_section("services"); | |
772 | for (const auto &i : map.services) { | |
773 | f->dump_string(i.first.c_str(), i.second); | |
224ce89b WB |
774 | } |
775 | f->close_section(); | |
776 | f->flush(rdata); | |
c07f9fc5 FG |
777 | } else if (prefix == "mgr metadata") { |
778 | string name; | |
779 | cmd_getval(g_ceph_context, cmdmap, "id", name); | |
780 | if (name.size() > 0 && !map.have_name(name)) { | |
781 | ss << "mgr." << name << " does not exist"; | |
782 | r = -ENOENT; | |
783 | goto reply; | |
784 | } | |
785 | string format; | |
786 | cmd_getval(g_ceph_context, cmdmap, "format", format); | |
787 | boost::scoped_ptr<Formatter> f(Formatter::create(format, "json-pretty", "json-pretty")); | |
788 | if (name.size()) { | |
789 | f->open_object_section("mgr_metadata"); | |
790 | f->dump_string("id", name); | |
791 | r = dump_metadata(name, f.get(), &ss); | |
792 | if (r < 0) | |
793 | goto reply; | |
794 | f->close_section(); | |
795 | } else { | |
796 | r = 0; | |
797 | f->open_array_section("mgr_metadata"); | |
798 | for (auto& i : map.get_all_names()) { | |
799 | f->open_object_section("mgr"); | |
800 | f->dump_string("id", i); | |
801 | r = dump_metadata(i, f.get(), NULL); | |
802 | if (r == -EINVAL || r == -ENOENT) { | |
803 | // Drop error, continue to get other daemons' metadata | |
804 | dout(4) << "No metadata for mgr." << i << dendl; | |
805 | r = 0; | |
806 | } else if (r < 0) { | |
807 | // Unexpected error | |
808 | goto reply; | |
809 | } | |
810 | f->close_section(); | |
811 | } | |
812 | f->close_section(); | |
813 | } | |
814 | f->flush(rdata); | |
815 | } else if (prefix == "mgr versions") { | |
816 | if (!f) | |
817 | f.reset(Formatter::create("json-pretty")); | |
818 | count_metadata("ceph_version", f.get()); | |
819 | f->flush(rdata); | |
820 | r = 0; | |
821 | } else if (prefix == "mgr count-metadata") { | |
822 | if (!f) | |
823 | f.reset(Formatter::create("json-pretty")); | |
824 | string field; | |
825 | cmd_getval(g_ceph_context, cmdmap, "property", field); | |
826 | count_metadata(field, f.get()); | |
827 | f->flush(rdata); | |
828 | r = 0; | |
31f18b77 FG |
829 | } else { |
830 | return false; | |
831 | } | |
832 | ||
833 | reply: | |
834 | string rs; | |
835 | getline(ss, rs); | |
836 | mon->reply_command(op, r, rs, rdata, get_last_committed()); | |
837 | return true; | |
7c673cae FG |
838 | } |
839 | ||
840 | bool MgrMonitor::prepare_command(MonOpRequestRef op) | |
841 | { | |
842 | MMonCommand *m = static_cast<MMonCommand*>(op->get_req()); | |
843 | ||
844 | std::stringstream ss; | |
845 | bufferlist rdata; | |
846 | ||
847 | std::map<std::string, cmd_vartype> cmdmap; | |
848 | if (!cmdmap_from_json(m->cmd, &cmdmap, ss)) { | |
849 | string rs = ss.str(); | |
850 | mon->reply_command(op, -EINVAL, rs, rdata, get_last_committed()); | |
851 | return true; | |
852 | } | |
853 | ||
854 | MonSession *session = m->get_session(); | |
855 | if (!session) { | |
856 | mon->reply_command(op, -EACCES, "access denied", rdata, get_last_committed()); | |
857 | return true; | |
858 | } | |
859 | ||
224ce89b WB |
860 | string format; |
861 | cmd_getval(g_ceph_context, cmdmap, "format", format, string("plain")); | |
862 | boost::scoped_ptr<Formatter> f(Formatter::create(format)); | |
863 | ||
7c673cae FG |
864 | string prefix; |
865 | cmd_getval(g_ceph_context, cmdmap, "prefix", prefix); | |
866 | ||
867 | int r = 0; | |
868 | ||
869 | if (prefix == "mgr fail") { | |
870 | string who; | |
871 | cmd_getval(g_ceph_context, cmdmap, "who", who); | |
872 | ||
873 | std::string err; | |
874 | uint64_t gid = strict_strtol(who.c_str(), 10, &err); | |
875 | bool changed = false; | |
876 | if (!err.empty()) { | |
877 | // Does not parse as a gid, treat it as a name | |
878 | if (pending_map.active_name == who) { | |
879 | drop_active(); | |
880 | changed = true; | |
881 | } else { | |
882 | gid = 0; | |
883 | for (const auto &i : pending_map.standbys) { | |
884 | if (i.second.name == who) { | |
885 | gid = i.first; | |
886 | break; | |
887 | } | |
888 | } | |
889 | if (gid != 0) { | |
890 | drop_standby(gid); | |
891 | changed = true; | |
892 | } else { | |
893 | ss << "Daemon not found '" << who << "', already failed?"; | |
894 | } | |
895 | } | |
896 | } else { | |
897 | if (pending_map.active_gid == gid) { | |
898 | drop_active(); | |
899 | changed = true; | |
900 | } else if (pending_map.standbys.count(gid) > 0) { | |
901 | drop_standby(gid); | |
902 | changed = true; | |
903 | } else { | |
904 | ss << "Daemon not found '" << gid << "', already failed?"; | |
905 | } | |
906 | } | |
907 | ||
908 | if (changed && pending_map.active_gid == 0) { | |
909 | promote_standby(); | |
910 | } | |
224ce89b WB |
911 | } else if (prefix == "mgr module enable") { |
912 | string module; | |
913 | cmd_getval(g_ceph_context, cmdmap, "module", module); | |
914 | if (module.empty()) { | |
915 | r = -EINVAL; | |
916 | goto out; | |
917 | } | |
918 | string force; | |
919 | cmd_getval(g_ceph_context, cmdmap, "force", force); | |
920 | if (!pending_map.all_support_module(module) && | |
921 | force != "--force") { | |
922 | ss << "all mgr daemons do not support module '" << module << "', pass " | |
923 | << "--force to force enablement"; | |
924 | r = -ENOENT; | |
925 | goto out; | |
926 | } | |
927 | pending_map.modules.insert(module); | |
928 | } else if (prefix == "mgr module disable") { | |
929 | string module; | |
930 | cmd_getval(g_ceph_context, cmdmap, "module", module); | |
931 | if (module.empty()) { | |
932 | r = -EINVAL; | |
933 | goto out; | |
934 | } | |
935 | pending_map.modules.erase(module); | |
7c673cae | 936 | } else { |
224ce89b | 937 | ss << "Command '" << prefix << "' not implemented!"; |
7c673cae FG |
938 | r = -ENOSYS; |
939 | } | |
940 | ||
224ce89b | 941 | out: |
7c673cae FG |
942 | dout(4) << __func__ << " done, r=" << r << dendl; |
943 | /* Compose response */ | |
944 | string rs; | |
945 | getline(ss, rs); | |
946 | ||
947 | if (r >= 0) { | |
948 | // success.. delay reply | |
949 | wait_for_finished_proposal(op, new Monitor::C_Command(mon, op, r, rs, | |
950 | get_last_committed() + 1)); | |
951 | return true; | |
952 | } else { | |
953 | // reply immediately | |
954 | mon->reply_command(op, r, rs, rdata, get_last_committed()); | |
955 | return false; | |
956 | } | |
957 | } | |
958 | ||
959 | void MgrMonitor::init() | |
960 | { | |
31f18b77 | 961 | if (digest_event == nullptr) { |
7c673cae FG |
962 | send_digests(); // To get it to schedule its own event |
963 | } | |
964 | } | |
965 | ||
966 | void MgrMonitor::on_shutdown() | |
967 | { | |
31f18b77 | 968 | cancel_timer(); |
7c673cae FG |
969 | } |
970 | ||
c07f9fc5 FG |
971 | int MgrMonitor::load_metadata(const string& name, std::map<string, string>& m, |
972 | ostream *err) | |
973 | { | |
974 | bufferlist bl; | |
975 | int r = mon->store->get(MGR_METADATA_PREFIX, name, bl); | |
976 | if (r < 0) | |
977 | return r; | |
978 | try { | |
979 | bufferlist::iterator p = bl.begin(); | |
980 | ::decode(m, p); | |
981 | } | |
982 | catch (buffer::error& e) { | |
983 | if (err) | |
984 | *err << "mgr." << name << " metadata is corrupt"; | |
985 | return -EIO; | |
986 | } | |
987 | return 0; | |
988 | } | |
989 | ||
990 | void MgrMonitor::count_metadata(const string& field, std::map<string,int> *out) | |
991 | { | |
992 | std::set<string> ls = map.get_all_names(); | |
993 | for (auto& name : ls) { | |
994 | std::map<string,string> meta; | |
995 | load_metadata(name, meta, nullptr); | |
996 | auto p = meta.find(field); | |
997 | if (p == meta.end()) { | |
998 | (*out)["unknown"]++; | |
999 | } else { | |
1000 | (*out)[p->second]++; | |
1001 | } | |
1002 | } | |
1003 | } | |
1004 | ||
1005 | void MgrMonitor::count_metadata(const string& field, Formatter *f) | |
1006 | { | |
1007 | std::map<string,int> by_val; | |
1008 | count_metadata(field, &by_val); | |
1009 | f->open_object_section(field.c_str()); | |
1010 | for (auto& p : by_val) { | |
1011 | f->dump_int(p.first.c_str(), p.second); | |
1012 | } | |
1013 | f->close_section(); | |
1014 | } | |
1015 | ||
1016 | int MgrMonitor::dump_metadata(const string& name, Formatter *f, ostream *err) | |
1017 | { | |
1018 | std::map<string,string> m; | |
1019 | if (int r = load_metadata(name, m, err)) | |
1020 | return r; | |
1021 | for (auto& p : m) { | |
1022 | f->dump_string(p.first.c_str(), p.second); | |
1023 | } | |
1024 | return 0; | |
1025 | } | |
31f18b77 | 1026 | |
d2e6a577 FG |
1027 | const std::vector<MonCommand> &MgrMonitor::get_command_descs() const |
1028 | { | |
1029 | if (command_descs.empty()) { | |
1030 | // must have just upgraded; fallback to static commands | |
1031 | return mgr_commands; | |
1032 | } else { | |
1033 | return command_descs; | |
1034 | } | |
1035 | } |