]> git.proxmox.com Git - ceph.git/blame - ceph/src/mgr/MgrStandby.cc
bump version to 18.2.2-pve1
[ceph.git] / ceph / src / mgr / MgrStandby.cc
CommitLineData
7c673cae
FG
1// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2// vim: ts=8 sw=2 smarttab
3/*
4 * Ceph - scalable distributed file system
5 *
6 * Copyright (C) 2016 John Spray <john.spray@redhat.com>
7 *
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
12 */
13
14#include <Python.h>
20effc67 15#include <boost/algorithm/string/replace.hpp>
7c673cae
FG
16
17#include "common/errno.h"
31f18b77
FG
18#include "common/signal.h"
19#include "include/compat.h"
7c673cae
FG
20
21#include "include/stringify.h"
22#include "global/global_context.h"
23#include "global/signal_handler.h"
24
25#include "mgr/MgrContext.h"
11fdf7f2 26#include "mgr/mgr_commands.h"
20effc67 27#include "mgr/mgr_perf_counters.h"
7c673cae
FG
28
29#include "messages/MMgrBeacon.h"
30#include "messages/MMgrMap.h"
31#include "Mgr.h"
32
33#include "MgrStandby.h"
34
35#define dout_context g_ceph_context
36#define dout_subsys ceph_subsys_mgr
37#undef dout_prefix
38#define dout_prefix *_dout << "mgr " << __func__ << " "
39
20effc67
TL
40using std::map;
41using std::string;
42using std::vector;
7c673cae 43
31f18b77 44MgrStandby::MgrStandby(int argc, const char **argv) :
7c673cae 45 Dispatcher(g_ceph_context),
f67539c2 46 monc{g_ceph_context, poolctx},
11fdf7f2
TL
47 client_messenger(Messenger::create(
48 g_ceph_context,
20effc67
TL
49 cct->_conf.get_val<std::string>("ms_public_type").empty() ?
50 cct->_conf.get_val<std::string>("ms_type") : cct->_conf.get_val<std::string>("ms_public_type"),
11fdf7f2
TL
51 entity_name_t::MGR(),
52 "mgr",
f67539c2
TL
53 Messenger::get_pid_nonce())),
54 objecter{g_ceph_context, client_messenger.get(), &monc, poolctx},
7c673cae 55 client{client_messenger.get(), &monc, &objecter},
9f95a23c 56 mgrc(g_ceph_context, client_messenger.get(), &monc.monmap),
7c673cae
FG
57 log_client(g_ceph_context, client_messenger.get(), &monc.monmap, LogClient::NO_FLAGS),
58 clog(log_client.create_channel(CLOG_CHANNEL_CLUSTER)),
59 audit_clog(log_client.create_channel(CLOG_CHANNEL_AUDIT)),
11fdf7f2 60 finisher(g_ceph_context, "MgrStandby", "mgrsb-fin"),
7c673cae 61 timer(g_ceph_context, lock),
3efd9988 62 py_module_registry(clog),
31f18b77
FG
63 active_mgr(nullptr),
64 orig_argc(argc),
c07f9fc5
FG
65 orig_argv(argv),
66 available_in_map(false)
7c673cae
FG
67{
68}
69
70MgrStandby::~MgrStandby() = default;
71
72const char** MgrStandby::get_tracked_conf_keys() const
73{
74 static const char* KEYS[] = {
75 // clog & admin clog
76 "clog_to_monitors",
77 "clog_to_syslog",
78 "clog_to_syslog_facility",
79 "clog_to_syslog_level",
7c673cae
FG
80 "clog_to_graylog",
81 "clog_to_graylog_host",
82 "clog_to_graylog_port",
b3b6e05e 83 "mgr_standby_modules",
7c673cae
FG
84 "host",
85 "fsid",
86 NULL
87 };
88 return KEYS;
89}
90
91void MgrStandby::handle_conf_change(
11fdf7f2 92 const ConfigProxy& conf,
7c673cae
FG
93 const std::set <std::string> &changed)
94{
95 if (changed.count("clog_to_monitors") ||
96 changed.count("clog_to_syslog") ||
97 changed.count("clog_to_syslog_level") ||
98 changed.count("clog_to_syslog_facility") ||
99 changed.count("clog_to_graylog") ||
100 changed.count("clog_to_graylog_host") ||
101 changed.count("clog_to_graylog_port") ||
102 changed.count("host") ||
103 changed.count("fsid")) {
104 _update_log_config();
105 }
b3b6e05e
TL
106 if (changed.count("mgr_standby_modules") && !active_mgr) {
107 if (g_conf().get_val<bool>("mgr_standby_modules") != py_module_registry.have_standby_modules()) {
108 dout(1) << "mgr_standby_modules now "
109 << (int)g_conf().get_val<bool>("mgr_standby_modules")
110 << ", standby modules are "
111 << (py_module_registry.have_standby_modules() ? "":"not ")
112 << "active, respawning"
113 << dendl;
114 respawn();
115 }
116 }
7c673cae
FG
117}
118
119int MgrStandby::init()
120{
11fdf7f2
TL
121 init_async_signal_handler();
122 register_async_signal_handler(SIGHUP, sighup_handler);
123
b3b6e05e
TL
124 cct->_conf.add_observer(this);
125
11fdf7f2
TL
126 std::lock_guard l(lock);
127
128 // Start finisher
129 finisher.start();
7c673cae
FG
130
131 // Initialize Messenger
132 client_messenger->add_dispatcher_tail(this);
133 client_messenger->add_dispatcher_head(&objecter);
134 client_messenger->add_dispatcher_tail(&client);
135 client_messenger->start();
136
f67539c2
TL
137 poolctx.start(2);
138
7c673cae
FG
139 // Initialize MonClient
140 if (monc.build_initial_monmap() < 0) {
141 client_messenger->shutdown();
142 client_messenger->wait();
143 return -1;
144 }
145
146 monc.sub_want("mgrmap", 0, 0);
147
148 monc.set_want_keys(CEPH_ENTITY_TYPE_MON|CEPH_ENTITY_TYPE_OSD
149 |CEPH_ENTITY_TYPE_MDS|CEPH_ENTITY_TYPE_MGR);
150 monc.set_messenger(client_messenger.get());
11fdf7f2
TL
151
152 // We must register our config callback before calling init(), so
153 // that we see the initial configuration message
154 monc.register_config_callback([this](const std::string &k, const std::string &v){
adb31ebb
TL
155 // removing value to hide sensitive data going into mgr logs
156 // leaving this for debugging purposes
157 // dout(10) << "config_callback: " << k << " : " << v << dendl;
158 dout(10) << "config_callback: " << k << " : " << dendl;
11fdf7f2 159 if (k.substr(0, 4) == "mgr/") {
f67539c2 160 py_module_registry.handle_config(k, v);
11fdf7f2
TL
161 return true;
162 }
163 return false;
164 });
165 monc.register_config_notify_callback([this]() {
166 py_module_registry.handle_config_notify();
167 });
168 dout(4) << "Registered monc callback" << dendl;
169
7c673cae
FG
170 int r = monc.init();
171 if (r < 0) {
172 monc.shutdown();
173 client_messenger->shutdown();
174 client_messenger->wait();
175 return r;
176 }
11fdf7f2
TL
177 mgrc.init();
178 client_messenger->add_dispatcher_tail(&mgrc);
179
7c673cae
FG
180 r = monc.authenticate();
181 if (r < 0) {
182 derr << "Authentication failed, did you specify a mgr ID with a valid keyring?" << dendl;
183 monc.shutdown();
184 client_messenger->shutdown();
185 client_messenger->wait();
186 return r;
187 }
11fdf7f2
TL
188 // only forward monmap updates after authentication finishes, otherwise
189 // monc.authenticate() will be waiting for MgrStandy::ms_dispatch()
190 // to acquire the lock forever, as it is already locked in the beginning of
191 // this method.
192 monc.set_passthrough_monmap();
7c673cae
FG
193
194 client_t whoami = monc.get_global_id();
11fdf7f2 195 client_messenger->set_myname(entity_name_t::MGR(whoami.v));
7c673cae
FG
196 monc.set_log_client(&log_client);
197 _update_log_config();
198 objecter.set_client_incarnation(0);
199 objecter.init();
200 objecter.start();
201 client.init();
202 timer.init();
203
11fdf7f2 204 py_module_registry.init();
20effc67
TL
205 mgr_perf_start(g_ceph_context);
206
11fdf7f2 207
31f18b77 208 tick();
7c673cae
FG
209
210 dout(4) << "Complete." << dendl;
211 return 0;
212}
213
214void MgrStandby::send_beacon()
215{
9f95a23c
TL
216 ceph_assert(ceph_mutex_is_locked_by_me(lock));
217 dout(20) << state_str() << dendl;
11fdf7f2 218
9f95a23c 219 auto modules = py_module_registry.get_modules();
11fdf7f2
TL
220
221 // Construct a list of the info about each loaded module
222 // which we will transmit to the monitor.
223 std::vector<MgrMap::ModuleInfo> module_info;
224 for (const auto &module : modules) {
225 MgrMap::ModuleInfo info;
226 info.name = module->get_name();
227 info.error_string = module->get_error_string();
228 info.can_run = module->get_can_run();
229 info.module_options = module->get_options();
230 module_info.push_back(std::move(info));
231 }
c07f9fc5 232
9f95a23c
TL
233 auto clients = py_module_registry.get_clients();
234 for (const auto& client : clients) {
f67539c2 235 dout(15) << "noting RADOS client for blocklist: " << client << dendl;
9f95a23c
TL
236 }
237
c07f9fc5
FG
238 // Whether I think I am available (request MgrMonitor to set me
239 // as available in the map)
7c673cae 240 bool available = active_mgr != nullptr && active_mgr->is_initialized();
c07f9fc5 241
11fdf7f2 242 auto addrs = available ? active_mgr->get_server_addrs() : entity_addrvec_t();
3efd9988 243 dout(10) << "sending beacon as gid " << monc.get_global_id() << dendl;
224ce89b 244
c07f9fc5 245 map<string,string> metadata;
11fdf7f2
TL
246 metadata["addr"] = client_messenger->get_myaddr_legacy().ip_only_to_str();
247 metadata["addrs"] = stringify(client_messenger->get_myaddrs());
c07f9fc5
FG
248 collect_sys_info(&metadata, g_ceph_context);
249
9f95a23c 250 auto m = ceph::make_message<MMgrBeacon>(monc.get_fsid(),
7c673cae 251 monc.get_global_id(),
11fdf7f2
TL
252 g_conf()->name.get_id(),
253 addrs,
224ce89b 254 available,
11fdf7f2 255 std::move(module_info),
9f95a23c
TL
256 std::move(metadata),
257 std::move(clients),
258 CEPH_FEATURES_ALL);
c07f9fc5 259
3efd9988
FG
260 if (available) {
261 if (!available_in_map) {
262 // We are informing the mon that we are done initializing: inform
263 // it of our command set. This has to happen after init() because
264 // it needs the python modules to have loaded.
11fdf7f2
TL
265 std::vector<MonCommand> commands = mgr_commands;
266 std::vector<MonCommand> py_commands = py_module_registry.get_commands();
267 commands.insert(commands.end(), py_commands.begin(), py_commands.end());
20effc67
TL
268 if (monc.monmap.min_mon_release < ceph_release_t::quincy) {
269 dout(10) << " stripping out positional=false quincy-ism" << dendl;
270 for (auto& i : commands) {
271 boost::replace_all(i.cmdstring, ",positional=false", "");
272 }
273 }
11fdf7f2 274 m->set_command_descs(commands);
3efd9988
FG
275 dout(4) << "going active, including " << m->get_command_descs().size()
276 << " commands in beacon" << dendl;
277 }
278
279 m->set_services(active_mgr->get_services());
c07f9fc5 280 }
20effc67 281
9f95a23c 282 monc.send_mon_message(std::move(m));
31f18b77
FG
283}
284
285void MgrStandby::tick()
286{
224ce89b 287 dout(10) << __func__ << dendl;
31f18b77
FG
288 send_beacon();
289
11fdf7f2
TL
290 timer.add_event_after(
291 g_conf().get_val<std::chrono::seconds>("mgr_tick_period").count(),
9f95a23c 292 new LambdaContext([this](int r){
31f18b77 293 tick();
3efd9988 294 }
20effc67 295 ));
7c673cae
FG
296}
297
7c673cae
FG
298void MgrStandby::shutdown()
299{
9f95a23c 300 finisher.queue(new LambdaContext([&](int) {
11fdf7f2
TL
301 std::lock_guard l(lock);
302
303 dout(4) << "Shutting down" << dendl;
304
f67539c2
TL
305 py_module_registry.shutdown();
306 // stop sending beacon first, I use monc to talk with monitors
11fdf7f2
TL
307 timer.shutdown();
308 // client uses monc and objecter
309 client.shutdown();
310 mgrc.shutdown();
f67539c2
TL
311 // Stop asio threads, so leftover events won't call into shut down
312 // monclient/objecter.
313 poolctx.finish();
11fdf7f2
TL
314 // stop monc, so mon won't be able to instruct me to shutdown/activate after
315 // the active_mgr is stopped
316 monc.shutdown();
317 if (active_mgr) {
318 active_mgr->shutdown();
319 }
11fdf7f2
TL
320 // objecter is used by monc and active_mgr
321 objecter.shutdown();
322 // client_messenger is used by all of them, so stop it in the end
323 client_messenger->shutdown();
324 }));
325
326 // Then stop the finisher to ensure its enqueued contexts aren't going
327 // to touch references to the things we're about to tear down
328 finisher.wait_for_empty();
329 finisher.stop();
20effc67 330 mgr_perf_stop(g_ceph_context);
7c673cae
FG
331}
332
31f18b77
FG
333void MgrStandby::respawn()
334{
11fdf7f2
TL
335 // --- WARNING TO FUTURE COPY/PASTERS ---
336 // You must also add a call like
337 //
338 // ceph_pthread_setname(pthread_self(), "ceph-mgr");
339 //
340 // to main() so that /proc/$pid/stat field 2 contains "(ceph-mgr)"
341 // instead of "(exe)", so that killall (and log rotation) will work.
342
31f18b77
FG
343 char *new_argv[orig_argc+1];
344 dout(1) << " e: '" << orig_argv[0] << "'" << dendl;
345 for (int i=0; i<orig_argc; i++) {
346 new_argv[i] = (char *)orig_argv[i];
347 dout(1) << " " << i << ": '" << orig_argv[i] << "'" << dendl;
348 }
349 new_argv[orig_argc] = NULL;
350
351 /* Determine the path to our executable, test if Linux /proc/self/exe exists.
352 * This allows us to exec the same executable even if it has since been
353 * unlinked.
354 */
355 char exe_path[PATH_MAX] = "";
356 if (readlink(PROCPREFIX "/proc/self/exe", exe_path, PATH_MAX-1) == -1) {
357 /* Print CWD for the user's interest */
358 char buf[PATH_MAX];
359 char *cwd = getcwd(buf, sizeof(buf));
11fdf7f2 360 ceph_assert(cwd);
31f18b77
FG
361 dout(1) << " cwd " << cwd << dendl;
362
363 /* Fall back to a best-effort: just running in our CWD */
364 strncpy(exe_path, orig_argv[0], PATH_MAX-1);
365 } else {
366 dout(1) << "respawning with exe " << exe_path << dendl;
367 strcpy(exe_path, PROCPREFIX "/proc/self/exe");
368 }
369
370 dout(1) << " exe_path " << exe_path << dendl;
371
372 unblock_all_signals(NULL);
373 execv(exe_path, new_argv);
374
375 derr << "respawn execv " << orig_argv[0]
376 << " failed with " << cpp_strerror(errno) << dendl;
377 ceph_abort();
378}
379
7c673cae
FG
380void MgrStandby::_update_log_config()
381{
20effc67
TL
382 clog->parse_client_options(cct);
383 audit_clog->parse_client_options(cct);
7c673cae
FG
384}
385
9f95a23c 386void MgrStandby::handle_mgr_map(ref_t<MMgrMap> mmap)
7c673cae 387{
c07f9fc5 388 auto &map = mmap->get_map();
7c673cae
FG
389 dout(4) << "received map epoch " << map.get_epoch() << dendl;
390 const bool active_in_map = map.active_gid == monc.get_global_id();
391 dout(4) << "active in map: " << active_in_map
392 << " active is " << map.active_gid << dendl;
3efd9988 393
11fdf7f2
TL
394 // PyModuleRegistry may ask us to respawn if it sees that
395 // this MgrMap is changing its set of enabled modules
396 bool need_respawn = py_module_registry.handle_mgr_map(map);
397 if (need_respawn) {
adb31ebb 398 dout(1) << "respawning because set of enabled modules changed!" << dendl;
11fdf7f2 399 respawn();
3efd9988
FG
400 }
401
7c673cae
FG
402 if (active_in_map) {
403 if (!active_mgr) {
404 dout(1) << "Activating!" << dendl;
3efd9988
FG
405 active_mgr.reset(new Mgr(&monc, map, &py_module_registry,
406 client_messenger.get(), &objecter,
7c673cae 407 &client, clog, audit_clog));
9f95a23c 408 active_mgr->background_init(new LambdaContext(
224ce89b
WB
409 [this](int r){
410 // Advertise our active-ness ASAP instead of waiting for
411 // next tick.
11fdf7f2 412 std::lock_guard l(lock);
224ce89b
WB
413 send_beacon();
414 }));
415 dout(1) << "I am now activating" << dendl;
7c673cae
FG
416 } else {
417 dout(10) << "I was already active" << dendl;
224ce89b
WB
418 bool need_respawn = active_mgr->got_mgr_map(map);
419 if (need_respawn) {
420 respawn();
421 }
7c673cae 422 }
c07f9fc5
FG
423
424 if (!available_in_map && map.get_available()) {
425 dout(4) << "Map now says I am available" << dendl;
426 available_in_map = true;
427 }
3efd9988
FG
428 } else if (active_mgr != nullptr) {
429 derr << "I was active but no longer am" << dendl;
430 respawn();
7c673cae 431 } else {
11fdf7f2 432 if (map.active_gid != 0 && map.active_name != g_conf()->name.get_id()) {
3efd9988
FG
433 // I am the standby and someone else is active, start modules
434 // in standby mode to do redirects if needed
b3b6e05e
TL
435 if (!py_module_registry.is_standby_running() &&
436 g_conf().get_val<bool>("mgr_standby_modules")) {
11fdf7f2 437 py_module_registry.standby_start(monc, finisher);
3efd9988 438 }
7c673cae
FG
439 }
440 }
7c673cae
FG
441}
442
9f95a23c 443bool MgrStandby::ms_dispatch2(const ref_t<Message>& m)
7c673cae 444{
11fdf7f2 445 std::lock_guard l(lock);
9f95a23c 446 dout(10) << state_str() << " " << *m << dendl;
7c673cae 447
31f18b77 448 if (m->get_type() == MSG_MGR_MAP) {
9f95a23c 449 handle_mgr_map(ref_cast<MMgrMap>(m));
11fdf7f2
TL
450 }
451 bool handled = false;
452 if (active_mgr) {
31f18b77 453 auto am = active_mgr;
9f95a23c
TL
454 lock.unlock();
455 handled = am->ms_dispatch2(m);
456 lock.lock();
7c673cae 457 }
94b18763
FG
458 if (m->get_type() == MSG_MGR_MAP) {
459 // let this pass through for mgrc
460 handled = false;
461 }
462 return handled;
7c673cae
FG
463}
464
465
7c673cae
FG
466bool MgrStandby::ms_handle_refused(Connection *con)
467{
468 // do nothing for now
469 return false;
470}
471
7c673cae
FG
472int MgrStandby::main(vector<const char *> args)
473{
7c673cae
FG
474 client_messenger->wait();
475
476 // Disable signal handlers
477 unregister_async_signal_handler(SIGHUP, sighup_handler);
7c673cae 478 shutdown_async_signal_handler();
7c673cae
FG
479
480 return 0;
481}
482
483
484std::string MgrStandby::state_str()
485{
3efd9988
FG
486 if (active_mgr == nullptr) {
487 return "standby";
488 } else if (active_mgr->is_initialized()) {
489 return "active";
490 } else {
491 return "active (starting)";
492 }
7c673cae 493}