]> git.proxmox.com Git - ceph.git/blame - ceph/src/mgr/MgrStandby.cc
import ceph pacific 16.2.5
[ceph.git] / ceph / src / mgr / MgrStandby.cc
CommitLineData
7c673cae
FG
1// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2// vim: ts=8 sw=2 smarttab
3/*
4 * Ceph - scalable distributed file system
5 *
6 * Copyright (C) 2016 John Spray <john.spray@redhat.com>
7 *
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
12 */
13
14#include <Python.h>
15
16#include "common/errno.h"
31f18b77
FG
17#include "common/signal.h"
18#include "include/compat.h"
7c673cae
FG
19
20#include "include/stringify.h"
21#include "global/global_context.h"
22#include "global/signal_handler.h"
23
24#include "mgr/MgrContext.h"
11fdf7f2 25#include "mgr/mgr_commands.h"
7c673cae
FG
26
27#include "messages/MMgrBeacon.h"
28#include "messages/MMgrMap.h"
29#include "Mgr.h"
30
31#include "MgrStandby.h"
32
33#define dout_context g_ceph_context
34#define dout_subsys ceph_subsys_mgr
35#undef dout_prefix
36#define dout_prefix *_dout << "mgr " << __func__ << " "
37
38
31f18b77 39MgrStandby::MgrStandby(int argc, const char **argv) :
7c673cae 40 Dispatcher(g_ceph_context),
f67539c2 41 monc{g_ceph_context, poolctx},
11fdf7f2
TL
42 client_messenger(Messenger::create(
43 g_ceph_context,
44 cct->_conf.get_val<std::string>("ms_type"),
45 entity_name_t::MGR(),
46 "mgr",
f67539c2
TL
47 Messenger::get_pid_nonce())),
48 objecter{g_ceph_context, client_messenger.get(), &monc, poolctx},
7c673cae 49 client{client_messenger.get(), &monc, &objecter},
9f95a23c 50 mgrc(g_ceph_context, client_messenger.get(), &monc.monmap),
7c673cae
FG
51 log_client(g_ceph_context, client_messenger.get(), &monc.monmap, LogClient::NO_FLAGS),
52 clog(log_client.create_channel(CLOG_CHANNEL_CLUSTER)),
53 audit_clog(log_client.create_channel(CLOG_CHANNEL_AUDIT)),
11fdf7f2 54 finisher(g_ceph_context, "MgrStandby", "mgrsb-fin"),
7c673cae 55 timer(g_ceph_context, lock),
3efd9988 56 py_module_registry(clog),
31f18b77
FG
57 active_mgr(nullptr),
58 orig_argc(argc),
c07f9fc5
FG
59 orig_argv(argv),
60 available_in_map(false)
7c673cae
FG
61{
62}
63
64MgrStandby::~MgrStandby() = default;
65
66const char** MgrStandby::get_tracked_conf_keys() const
67{
68 static const char* KEYS[] = {
69 // clog & admin clog
70 "clog_to_monitors",
71 "clog_to_syslog",
72 "clog_to_syslog_facility",
73 "clog_to_syslog_level",
7c673cae
FG
74 "clog_to_graylog",
75 "clog_to_graylog_host",
76 "clog_to_graylog_port",
b3b6e05e 77 "mgr_standby_modules",
7c673cae
FG
78 "host",
79 "fsid",
80 NULL
81 };
82 return KEYS;
83}
84
85void MgrStandby::handle_conf_change(
11fdf7f2 86 const ConfigProxy& conf,
7c673cae
FG
87 const std::set <std::string> &changed)
88{
89 if (changed.count("clog_to_monitors") ||
90 changed.count("clog_to_syslog") ||
91 changed.count("clog_to_syslog_level") ||
92 changed.count("clog_to_syslog_facility") ||
93 changed.count("clog_to_graylog") ||
94 changed.count("clog_to_graylog_host") ||
95 changed.count("clog_to_graylog_port") ||
96 changed.count("host") ||
97 changed.count("fsid")) {
98 _update_log_config();
99 }
b3b6e05e
TL
100 if (changed.count("mgr_standby_modules") && !active_mgr) {
101 if (g_conf().get_val<bool>("mgr_standby_modules") != py_module_registry.have_standby_modules()) {
102 dout(1) << "mgr_standby_modules now "
103 << (int)g_conf().get_val<bool>("mgr_standby_modules")
104 << ", standby modules are "
105 << (py_module_registry.have_standby_modules() ? "":"not ")
106 << "active, respawning"
107 << dendl;
108 respawn();
109 }
110 }
7c673cae
FG
111}
112
113int MgrStandby::init()
114{
11fdf7f2
TL
115 init_async_signal_handler();
116 register_async_signal_handler(SIGHUP, sighup_handler);
117
b3b6e05e
TL
118 cct->_conf.add_observer(this);
119
11fdf7f2
TL
120 std::lock_guard l(lock);
121
122 // Start finisher
123 finisher.start();
7c673cae
FG
124
125 // Initialize Messenger
126 client_messenger->add_dispatcher_tail(this);
127 client_messenger->add_dispatcher_head(&objecter);
128 client_messenger->add_dispatcher_tail(&client);
129 client_messenger->start();
130
f67539c2
TL
131 poolctx.start(2);
132
7c673cae
FG
133 // Initialize MonClient
134 if (monc.build_initial_monmap() < 0) {
135 client_messenger->shutdown();
136 client_messenger->wait();
137 return -1;
138 }
139
140 monc.sub_want("mgrmap", 0, 0);
141
142 monc.set_want_keys(CEPH_ENTITY_TYPE_MON|CEPH_ENTITY_TYPE_OSD
143 |CEPH_ENTITY_TYPE_MDS|CEPH_ENTITY_TYPE_MGR);
144 monc.set_messenger(client_messenger.get());
11fdf7f2
TL
145
146 // We must register our config callback before calling init(), so
147 // that we see the initial configuration message
148 monc.register_config_callback([this](const std::string &k, const std::string &v){
adb31ebb
TL
149 // removing value to hide sensitive data going into mgr logs
150 // leaving this for debugging purposes
151 // dout(10) << "config_callback: " << k << " : " << v << dendl;
152 dout(10) << "config_callback: " << k << " : " << dendl;
11fdf7f2 153 if (k.substr(0, 4) == "mgr/") {
f67539c2 154 py_module_registry.handle_config(k, v);
11fdf7f2
TL
155 return true;
156 }
157 return false;
158 });
159 monc.register_config_notify_callback([this]() {
160 py_module_registry.handle_config_notify();
161 });
162 dout(4) << "Registered monc callback" << dendl;
163
7c673cae
FG
164 int r = monc.init();
165 if (r < 0) {
166 monc.shutdown();
167 client_messenger->shutdown();
168 client_messenger->wait();
169 return r;
170 }
11fdf7f2
TL
171 mgrc.init();
172 client_messenger->add_dispatcher_tail(&mgrc);
173
7c673cae
FG
174 r = monc.authenticate();
175 if (r < 0) {
176 derr << "Authentication failed, did you specify a mgr ID with a valid keyring?" << dendl;
177 monc.shutdown();
178 client_messenger->shutdown();
179 client_messenger->wait();
180 return r;
181 }
11fdf7f2
TL
182 // only forward monmap updates after authentication finishes, otherwise
183 // monc.authenticate() will be waiting for MgrStandy::ms_dispatch()
184 // to acquire the lock forever, as it is already locked in the beginning of
185 // this method.
186 monc.set_passthrough_monmap();
7c673cae
FG
187
188 client_t whoami = monc.get_global_id();
11fdf7f2 189 client_messenger->set_myname(entity_name_t::MGR(whoami.v));
7c673cae
FG
190 monc.set_log_client(&log_client);
191 _update_log_config();
192 objecter.set_client_incarnation(0);
193 objecter.init();
194 objecter.start();
195 client.init();
196 timer.init();
197
11fdf7f2
TL
198 py_module_registry.init();
199
31f18b77 200 tick();
7c673cae
FG
201
202 dout(4) << "Complete." << dendl;
203 return 0;
204}
205
206void MgrStandby::send_beacon()
207{
9f95a23c
TL
208 ceph_assert(ceph_mutex_is_locked_by_me(lock));
209 dout(20) << state_str() << dendl;
11fdf7f2 210
9f95a23c 211 auto modules = py_module_registry.get_modules();
11fdf7f2
TL
212
213 // Construct a list of the info about each loaded module
214 // which we will transmit to the monitor.
215 std::vector<MgrMap::ModuleInfo> module_info;
216 for (const auto &module : modules) {
217 MgrMap::ModuleInfo info;
218 info.name = module->get_name();
219 info.error_string = module->get_error_string();
220 info.can_run = module->get_can_run();
221 info.module_options = module->get_options();
222 module_info.push_back(std::move(info));
223 }
c07f9fc5 224
9f95a23c
TL
225 auto clients = py_module_registry.get_clients();
226 for (const auto& client : clients) {
f67539c2 227 dout(15) << "noting RADOS client for blocklist: " << client << dendl;
9f95a23c
TL
228 }
229
c07f9fc5
FG
230 // Whether I think I am available (request MgrMonitor to set me
231 // as available in the map)
7c673cae 232 bool available = active_mgr != nullptr && active_mgr->is_initialized();
c07f9fc5 233
11fdf7f2 234 auto addrs = available ? active_mgr->get_server_addrs() : entity_addrvec_t();
3efd9988 235 dout(10) << "sending beacon as gid " << monc.get_global_id() << dendl;
224ce89b 236
c07f9fc5 237 map<string,string> metadata;
11fdf7f2
TL
238 metadata["addr"] = client_messenger->get_myaddr_legacy().ip_only_to_str();
239 metadata["addrs"] = stringify(client_messenger->get_myaddrs());
c07f9fc5
FG
240 collect_sys_info(&metadata, g_ceph_context);
241
9f95a23c 242 auto m = ceph::make_message<MMgrBeacon>(monc.get_fsid(),
7c673cae 243 monc.get_global_id(),
11fdf7f2
TL
244 g_conf()->name.get_id(),
245 addrs,
224ce89b 246 available,
11fdf7f2 247 std::move(module_info),
9f95a23c
TL
248 std::move(metadata),
249 std::move(clients),
250 CEPH_FEATURES_ALL);
c07f9fc5 251
3efd9988
FG
252 if (available) {
253 if (!available_in_map) {
254 // We are informing the mon that we are done initializing: inform
255 // it of our command set. This has to happen after init() because
256 // it needs the python modules to have loaded.
11fdf7f2
TL
257 std::vector<MonCommand> commands = mgr_commands;
258 std::vector<MonCommand> py_commands = py_module_registry.get_commands();
259 commands.insert(commands.end(), py_commands.begin(), py_commands.end());
260 m->set_command_descs(commands);
3efd9988
FG
261 dout(4) << "going active, including " << m->get_command_descs().size()
262 << " commands in beacon" << dendl;
263 }
264
265 m->set_services(active_mgr->get_services());
c07f9fc5
FG
266 }
267
9f95a23c 268 monc.send_mon_message(std::move(m));
31f18b77
FG
269}
270
271void MgrStandby::tick()
272{
224ce89b 273 dout(10) << __func__ << dendl;
31f18b77
FG
274 send_beacon();
275
11fdf7f2
TL
276 timer.add_event_after(
277 g_conf().get_val<std::chrono::seconds>("mgr_tick_period").count(),
9f95a23c 278 new LambdaContext([this](int r){
31f18b77 279 tick();
3efd9988 280 }
7c673cae
FG
281 ));
282}
283
7c673cae
FG
284void MgrStandby::shutdown()
285{
9f95a23c 286 finisher.queue(new LambdaContext([&](int) {
11fdf7f2
TL
287 std::lock_guard l(lock);
288
289 dout(4) << "Shutting down" << dendl;
290
f67539c2
TL
291 py_module_registry.shutdown();
292 // stop sending beacon first, I use monc to talk with monitors
11fdf7f2
TL
293 timer.shutdown();
294 // client uses monc and objecter
295 client.shutdown();
296 mgrc.shutdown();
f67539c2
TL
297 // Stop asio threads, so leftover events won't call into shut down
298 // monclient/objecter.
299 poolctx.finish();
11fdf7f2
TL
300 // stop monc, so mon won't be able to instruct me to shutdown/activate after
301 // the active_mgr is stopped
302 monc.shutdown();
303 if (active_mgr) {
304 active_mgr->shutdown();
305 }
11fdf7f2
TL
306 // objecter is used by monc and active_mgr
307 objecter.shutdown();
308 // client_messenger is used by all of them, so stop it in the end
309 client_messenger->shutdown();
310 }));
311
312 // Then stop the finisher to ensure its enqueued contexts aren't going
313 // to touch references to the things we're about to tear down
314 finisher.wait_for_empty();
315 finisher.stop();
7c673cae
FG
316}
317
31f18b77
FG
318void MgrStandby::respawn()
319{
11fdf7f2
TL
320 // --- WARNING TO FUTURE COPY/PASTERS ---
321 // You must also add a call like
322 //
323 // ceph_pthread_setname(pthread_self(), "ceph-mgr");
324 //
325 // to main() so that /proc/$pid/stat field 2 contains "(ceph-mgr)"
326 // instead of "(exe)", so that killall (and log rotation) will work.
327
31f18b77
FG
328 char *new_argv[orig_argc+1];
329 dout(1) << " e: '" << orig_argv[0] << "'" << dendl;
330 for (int i=0; i<orig_argc; i++) {
331 new_argv[i] = (char *)orig_argv[i];
332 dout(1) << " " << i << ": '" << orig_argv[i] << "'" << dendl;
333 }
334 new_argv[orig_argc] = NULL;
335
336 /* Determine the path to our executable, test if Linux /proc/self/exe exists.
337 * This allows us to exec the same executable even if it has since been
338 * unlinked.
339 */
340 char exe_path[PATH_MAX] = "";
341 if (readlink(PROCPREFIX "/proc/self/exe", exe_path, PATH_MAX-1) == -1) {
342 /* Print CWD for the user's interest */
343 char buf[PATH_MAX];
344 char *cwd = getcwd(buf, sizeof(buf));
11fdf7f2 345 ceph_assert(cwd);
31f18b77
FG
346 dout(1) << " cwd " << cwd << dendl;
347
348 /* Fall back to a best-effort: just running in our CWD */
349 strncpy(exe_path, orig_argv[0], PATH_MAX-1);
350 } else {
351 dout(1) << "respawning with exe " << exe_path << dendl;
352 strcpy(exe_path, PROCPREFIX "/proc/self/exe");
353 }
354
355 dout(1) << " exe_path " << exe_path << dendl;
356
357 unblock_all_signals(NULL);
358 execv(exe_path, new_argv);
359
360 derr << "respawn execv " << orig_argv[0]
361 << " failed with " << cpp_strerror(errno) << dendl;
362 ceph_abort();
363}
364
7c673cae
FG
365void MgrStandby::_update_log_config()
366{
367 map<string,string> log_to_monitors;
368 map<string,string> log_to_syslog;
369 map<string,string> log_channel;
370 map<string,string> log_prio;
371 map<string,string> log_to_graylog;
372 map<string,string> log_to_graylog_host;
373 map<string,string> log_to_graylog_port;
374 uuid_d fsid;
375 string host;
376
377 if (parse_log_client_options(cct, log_to_monitors, log_to_syslog,
378 log_channel, log_prio, log_to_graylog,
379 log_to_graylog_host, log_to_graylog_port,
380 fsid, host) == 0) {
381 clog->update_config(log_to_monitors, log_to_syslog,
382 log_channel, log_prio, log_to_graylog,
383 log_to_graylog_host, log_to_graylog_port,
384 fsid, host);
385 audit_clog->update_config(log_to_monitors, log_to_syslog,
386 log_channel, log_prio, log_to_graylog,
387 log_to_graylog_host, log_to_graylog_port,
388 fsid, host);
389 }
390}
391
9f95a23c 392void MgrStandby::handle_mgr_map(ref_t<MMgrMap> mmap)
7c673cae 393{
c07f9fc5 394 auto &map = mmap->get_map();
7c673cae
FG
395 dout(4) << "received map epoch " << map.get_epoch() << dendl;
396 const bool active_in_map = map.active_gid == monc.get_global_id();
397 dout(4) << "active in map: " << active_in_map
398 << " active is " << map.active_gid << dendl;
3efd9988 399
11fdf7f2
TL
400 // PyModuleRegistry may ask us to respawn if it sees that
401 // this MgrMap is changing its set of enabled modules
402 bool need_respawn = py_module_registry.handle_mgr_map(map);
403 if (need_respawn) {
adb31ebb 404 dout(1) << "respawning because set of enabled modules changed!" << dendl;
11fdf7f2 405 respawn();
3efd9988
FG
406 }
407
7c673cae
FG
408 if (active_in_map) {
409 if (!active_mgr) {
410 dout(1) << "Activating!" << dendl;
3efd9988
FG
411 active_mgr.reset(new Mgr(&monc, map, &py_module_registry,
412 client_messenger.get(), &objecter,
7c673cae 413 &client, clog, audit_clog));
9f95a23c 414 active_mgr->background_init(new LambdaContext(
224ce89b
WB
415 [this](int r){
416 // Advertise our active-ness ASAP instead of waiting for
417 // next tick.
11fdf7f2 418 std::lock_guard l(lock);
224ce89b
WB
419 send_beacon();
420 }));
421 dout(1) << "I am now activating" << dendl;
7c673cae
FG
422 } else {
423 dout(10) << "I was already active" << dendl;
224ce89b
WB
424 bool need_respawn = active_mgr->got_mgr_map(map);
425 if (need_respawn) {
426 respawn();
427 }
7c673cae 428 }
c07f9fc5
FG
429
430 if (!available_in_map && map.get_available()) {
431 dout(4) << "Map now says I am available" << dendl;
432 available_in_map = true;
433 }
3efd9988
FG
434 } else if (active_mgr != nullptr) {
435 derr << "I was active but no longer am" << dendl;
436 respawn();
7c673cae 437 } else {
11fdf7f2 438 if (map.active_gid != 0 && map.active_name != g_conf()->name.get_id()) {
3efd9988
FG
439 // I am the standby and someone else is active, start modules
440 // in standby mode to do redirects if needed
b3b6e05e
TL
441 if (!py_module_registry.is_standby_running() &&
442 g_conf().get_val<bool>("mgr_standby_modules")) {
11fdf7f2 443 py_module_registry.standby_start(monc, finisher);
3efd9988 444 }
7c673cae
FG
445 }
446 }
7c673cae
FG
447}
448
9f95a23c 449bool MgrStandby::ms_dispatch2(const ref_t<Message>& m)
7c673cae 450{
11fdf7f2 451 std::lock_guard l(lock);
9f95a23c 452 dout(10) << state_str() << " " << *m << dendl;
7c673cae 453
31f18b77 454 if (m->get_type() == MSG_MGR_MAP) {
9f95a23c 455 handle_mgr_map(ref_cast<MMgrMap>(m));
11fdf7f2
TL
456 }
457 bool handled = false;
458 if (active_mgr) {
31f18b77 459 auto am = active_mgr;
9f95a23c
TL
460 lock.unlock();
461 handled = am->ms_dispatch2(m);
462 lock.lock();
7c673cae 463 }
94b18763
FG
464 if (m->get_type() == MSG_MGR_MAP) {
465 // let this pass through for mgrc
466 handled = false;
467 }
468 return handled;
7c673cae
FG
469}
470
471
7c673cae
FG
472bool MgrStandby::ms_handle_refused(Connection *con)
473{
474 // do nothing for now
475 return false;
476}
477
7c673cae
FG
478int MgrStandby::main(vector<const char *> args)
479{
7c673cae
FG
480 client_messenger->wait();
481
482 // Disable signal handlers
483 unregister_async_signal_handler(SIGHUP, sighup_handler);
7c673cae 484 shutdown_async_signal_handler();
7c673cae
FG
485
486 return 0;
487}
488
489
490std::string MgrStandby::state_str()
491{
3efd9988
FG
492 if (active_mgr == nullptr) {
493 return "standby";
494 } else if (active_mgr->is_initialized()) {
495 return "active";
496 } else {
497 return "active (starting)";
498 }
7c673cae
FG
499}
500