]> git.proxmox.com Git - ceph.git/blob - ceph/src/mgr/MgrStandby.cc
585837802f501f6dbe7e97831c0a5d38b7983e88
[ceph.git] / ceph / src / mgr / MgrStandby.cc
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3 /*
4 * Ceph - scalable distributed file system
5 *
6 * Copyright (C) 2016 John Spray <john.spray@redhat.com>
7 *
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
12 */
13
14 #include <Python.h>
15
16 #include "common/errno.h"
17 #include "common/signal.h"
18 #include "include/compat.h"
19
20 #include "include/stringify.h"
21 #include "global/global_context.h"
22 #include "global/signal_handler.h"
23
24 #include "mgr/MgrContext.h"
25 #include "mgr/mgr_commands.h"
26
27 #include "messages/MMgrBeacon.h"
28 #include "messages/MMgrMap.h"
29 #include "Mgr.h"
30
31 #include "MgrStandby.h"
32
33 #define dout_context g_ceph_context
34 #define dout_subsys ceph_subsys_mgr
35 #undef dout_prefix
36 #define dout_prefix *_dout << "mgr " << __func__ << " "
37
38
39 MgrStandby::MgrStandby(int argc, const char **argv) :
40 Dispatcher(g_ceph_context),
41 monc{g_ceph_context, poolctx},
42 client_messenger(Messenger::create(
43 g_ceph_context,
44 cct->_conf.get_val<std::string>("ms_type"),
45 entity_name_t::MGR(),
46 "mgr",
47 Messenger::get_pid_nonce())),
48 objecter{g_ceph_context, client_messenger.get(), &monc, poolctx},
49 client{client_messenger.get(), &monc, &objecter},
50 mgrc(g_ceph_context, client_messenger.get(), &monc.monmap),
51 log_client(g_ceph_context, client_messenger.get(), &monc.monmap, LogClient::NO_FLAGS),
52 clog(log_client.create_channel(CLOG_CHANNEL_CLUSTER)),
53 audit_clog(log_client.create_channel(CLOG_CHANNEL_AUDIT)),
54 finisher(g_ceph_context, "MgrStandby", "mgrsb-fin"),
55 timer(g_ceph_context, lock),
56 py_module_registry(clog),
57 active_mgr(nullptr),
58 orig_argc(argc),
59 orig_argv(argv),
60 available_in_map(false)
61 {
62 }
63
64 MgrStandby::~MgrStandby() = default;
65
66 const char** MgrStandby::get_tracked_conf_keys() const
67 {
68 static const char* KEYS[] = {
69 // clog & admin clog
70 "clog_to_monitors",
71 "clog_to_syslog",
72 "clog_to_syslog_facility",
73 "clog_to_syslog_level",
74 "clog_to_graylog",
75 "clog_to_graylog_host",
76 "clog_to_graylog_port",
77 "host",
78 "fsid",
79 NULL
80 };
81 return KEYS;
82 }
83
84 void MgrStandby::handle_conf_change(
85 const ConfigProxy& conf,
86 const std::set <std::string> &changed)
87 {
88 if (changed.count("clog_to_monitors") ||
89 changed.count("clog_to_syslog") ||
90 changed.count("clog_to_syslog_level") ||
91 changed.count("clog_to_syslog_facility") ||
92 changed.count("clog_to_graylog") ||
93 changed.count("clog_to_graylog_host") ||
94 changed.count("clog_to_graylog_port") ||
95 changed.count("host") ||
96 changed.count("fsid")) {
97 _update_log_config();
98 }
99 }
100
101 int MgrStandby::init()
102 {
103 init_async_signal_handler();
104 register_async_signal_handler(SIGHUP, sighup_handler);
105
106 std::lock_guard l(lock);
107
108 // Start finisher
109 finisher.start();
110
111 // Initialize Messenger
112 client_messenger->add_dispatcher_tail(this);
113 client_messenger->add_dispatcher_head(&objecter);
114 client_messenger->add_dispatcher_tail(&client);
115 client_messenger->start();
116
117 poolctx.start(2);
118
119 // Initialize MonClient
120 if (monc.build_initial_monmap() < 0) {
121 client_messenger->shutdown();
122 client_messenger->wait();
123 return -1;
124 }
125
126 monc.sub_want("mgrmap", 0, 0);
127
128 monc.set_want_keys(CEPH_ENTITY_TYPE_MON|CEPH_ENTITY_TYPE_OSD
129 |CEPH_ENTITY_TYPE_MDS|CEPH_ENTITY_TYPE_MGR);
130 monc.set_messenger(client_messenger.get());
131
132 // We must register our config callback before calling init(), so
133 // that we see the initial configuration message
134 monc.register_config_callback([this](const std::string &k, const std::string &v){
135 // removing value to hide sensitive data going into mgr logs
136 // leaving this for debugging purposes
137 // dout(10) << "config_callback: " << k << " : " << v << dendl;
138 dout(10) << "config_callback: " << k << " : " << dendl;
139 if (k.substr(0, 4) == "mgr/") {
140 py_module_registry.handle_config(k, v);
141 return true;
142 }
143 return false;
144 });
145 monc.register_config_notify_callback([this]() {
146 py_module_registry.handle_config_notify();
147 });
148 dout(4) << "Registered monc callback" << dendl;
149
150 int r = monc.init();
151 if (r < 0) {
152 monc.shutdown();
153 client_messenger->shutdown();
154 client_messenger->wait();
155 return r;
156 }
157 mgrc.init();
158 client_messenger->add_dispatcher_tail(&mgrc);
159
160 r = monc.authenticate();
161 if (r < 0) {
162 derr << "Authentication failed, did you specify a mgr ID with a valid keyring?" << dendl;
163 monc.shutdown();
164 client_messenger->shutdown();
165 client_messenger->wait();
166 return r;
167 }
168 // only forward monmap updates after authentication finishes, otherwise
169 // monc.authenticate() will be waiting for MgrStandy::ms_dispatch()
170 // to acquire the lock forever, as it is already locked in the beginning of
171 // this method.
172 monc.set_passthrough_monmap();
173
174 client_t whoami = monc.get_global_id();
175 client_messenger->set_myname(entity_name_t::MGR(whoami.v));
176 monc.set_log_client(&log_client);
177 _update_log_config();
178 objecter.set_client_incarnation(0);
179 objecter.init();
180 objecter.start();
181 client.init();
182 timer.init();
183
184 py_module_registry.init();
185
186 tick();
187
188 dout(4) << "Complete." << dendl;
189 return 0;
190 }
191
192 void MgrStandby::send_beacon()
193 {
194 ceph_assert(ceph_mutex_is_locked_by_me(lock));
195 dout(20) << state_str() << dendl;
196
197 auto modules = py_module_registry.get_modules();
198
199 // Construct a list of the info about each loaded module
200 // which we will transmit to the monitor.
201 std::vector<MgrMap::ModuleInfo> module_info;
202 for (const auto &module : modules) {
203 MgrMap::ModuleInfo info;
204 info.name = module->get_name();
205 info.error_string = module->get_error_string();
206 info.can_run = module->get_can_run();
207 info.module_options = module->get_options();
208 module_info.push_back(std::move(info));
209 }
210
211 auto clients = py_module_registry.get_clients();
212 for (const auto& client : clients) {
213 dout(15) << "noting RADOS client for blocklist: " << client << dendl;
214 }
215
216 // Whether I think I am available (request MgrMonitor to set me
217 // as available in the map)
218 bool available = active_mgr != nullptr && active_mgr->is_initialized();
219
220 auto addrs = available ? active_mgr->get_server_addrs() : entity_addrvec_t();
221 dout(10) << "sending beacon as gid " << monc.get_global_id() << dendl;
222
223 map<string,string> metadata;
224 metadata["addr"] = client_messenger->get_myaddr_legacy().ip_only_to_str();
225 metadata["addrs"] = stringify(client_messenger->get_myaddrs());
226 collect_sys_info(&metadata, g_ceph_context);
227
228 auto m = ceph::make_message<MMgrBeacon>(monc.get_fsid(),
229 monc.get_global_id(),
230 g_conf()->name.get_id(),
231 addrs,
232 available,
233 std::move(module_info),
234 std::move(metadata),
235 std::move(clients),
236 CEPH_FEATURES_ALL);
237
238 if (available) {
239 if (!available_in_map) {
240 // We are informing the mon that we are done initializing: inform
241 // it of our command set. This has to happen after init() because
242 // it needs the python modules to have loaded.
243 std::vector<MonCommand> commands = mgr_commands;
244 std::vector<MonCommand> py_commands = py_module_registry.get_commands();
245 commands.insert(commands.end(), py_commands.begin(), py_commands.end());
246 m->set_command_descs(commands);
247 dout(4) << "going active, including " << m->get_command_descs().size()
248 << " commands in beacon" << dendl;
249 }
250
251 m->set_services(active_mgr->get_services());
252 }
253
254 monc.send_mon_message(std::move(m));
255 }
256
257 void MgrStandby::tick()
258 {
259 dout(10) << __func__ << dendl;
260 send_beacon();
261
262 timer.add_event_after(
263 g_conf().get_val<std::chrono::seconds>("mgr_tick_period").count(),
264 new LambdaContext([this](int r){
265 tick();
266 }
267 ));
268 }
269
270 void MgrStandby::shutdown()
271 {
272 finisher.queue(new LambdaContext([&](int) {
273 std::lock_guard l(lock);
274
275 dout(4) << "Shutting down" << dendl;
276
277 py_module_registry.shutdown();
278 // stop sending beacon first, I use monc to talk with monitors
279 timer.shutdown();
280 // client uses monc and objecter
281 client.shutdown();
282 mgrc.shutdown();
283 // Stop asio threads, so leftover events won't call into shut down
284 // monclient/objecter.
285 poolctx.finish();
286 // stop monc, so mon won't be able to instruct me to shutdown/activate after
287 // the active_mgr is stopped
288 monc.shutdown();
289 if (active_mgr) {
290 active_mgr->shutdown();
291 }
292 // objecter is used by monc and active_mgr
293 objecter.shutdown();
294 // client_messenger is used by all of them, so stop it in the end
295 client_messenger->shutdown();
296 }));
297
298 // Then stop the finisher to ensure its enqueued contexts aren't going
299 // to touch references to the things we're about to tear down
300 finisher.wait_for_empty();
301 finisher.stop();
302 }
303
304 void MgrStandby::respawn()
305 {
306 // --- WARNING TO FUTURE COPY/PASTERS ---
307 // You must also add a call like
308 //
309 // ceph_pthread_setname(pthread_self(), "ceph-mgr");
310 //
311 // to main() so that /proc/$pid/stat field 2 contains "(ceph-mgr)"
312 // instead of "(exe)", so that killall (and log rotation) will work.
313
314 char *new_argv[orig_argc+1];
315 dout(1) << " e: '" << orig_argv[0] << "'" << dendl;
316 for (int i=0; i<orig_argc; i++) {
317 new_argv[i] = (char *)orig_argv[i];
318 dout(1) << " " << i << ": '" << orig_argv[i] << "'" << dendl;
319 }
320 new_argv[orig_argc] = NULL;
321
322 /* Determine the path to our executable, test if Linux /proc/self/exe exists.
323 * This allows us to exec the same executable even if it has since been
324 * unlinked.
325 */
326 char exe_path[PATH_MAX] = "";
327 if (readlink(PROCPREFIX "/proc/self/exe", exe_path, PATH_MAX-1) == -1) {
328 /* Print CWD for the user's interest */
329 char buf[PATH_MAX];
330 char *cwd = getcwd(buf, sizeof(buf));
331 ceph_assert(cwd);
332 dout(1) << " cwd " << cwd << dendl;
333
334 /* Fall back to a best-effort: just running in our CWD */
335 strncpy(exe_path, orig_argv[0], PATH_MAX-1);
336 } else {
337 dout(1) << "respawning with exe " << exe_path << dendl;
338 strcpy(exe_path, PROCPREFIX "/proc/self/exe");
339 }
340
341 dout(1) << " exe_path " << exe_path << dendl;
342
343 unblock_all_signals(NULL);
344 execv(exe_path, new_argv);
345
346 derr << "respawn execv " << orig_argv[0]
347 << " failed with " << cpp_strerror(errno) << dendl;
348 ceph_abort();
349 }
350
351 void MgrStandby::_update_log_config()
352 {
353 map<string,string> log_to_monitors;
354 map<string,string> log_to_syslog;
355 map<string,string> log_channel;
356 map<string,string> log_prio;
357 map<string,string> log_to_graylog;
358 map<string,string> log_to_graylog_host;
359 map<string,string> log_to_graylog_port;
360 uuid_d fsid;
361 string host;
362
363 if (parse_log_client_options(cct, log_to_monitors, log_to_syslog,
364 log_channel, log_prio, log_to_graylog,
365 log_to_graylog_host, log_to_graylog_port,
366 fsid, host) == 0) {
367 clog->update_config(log_to_monitors, log_to_syslog,
368 log_channel, log_prio, log_to_graylog,
369 log_to_graylog_host, log_to_graylog_port,
370 fsid, host);
371 audit_clog->update_config(log_to_monitors, log_to_syslog,
372 log_channel, log_prio, log_to_graylog,
373 log_to_graylog_host, log_to_graylog_port,
374 fsid, host);
375 }
376 }
377
378 void MgrStandby::handle_mgr_map(ref_t<MMgrMap> mmap)
379 {
380 auto &map = mmap->get_map();
381 dout(4) << "received map epoch " << map.get_epoch() << dendl;
382 const bool active_in_map = map.active_gid == monc.get_global_id();
383 dout(4) << "active in map: " << active_in_map
384 << " active is " << map.active_gid << dendl;
385
386 // PyModuleRegistry may ask us to respawn if it sees that
387 // this MgrMap is changing its set of enabled modules
388 bool need_respawn = py_module_registry.handle_mgr_map(map);
389 if (need_respawn) {
390 dout(1) << "respawning because set of enabled modules changed!" << dendl;
391 respawn();
392 }
393
394 if (active_in_map) {
395 if (!active_mgr) {
396 dout(1) << "Activating!" << dendl;
397 active_mgr.reset(new Mgr(&monc, map, &py_module_registry,
398 client_messenger.get(), &objecter,
399 &client, clog, audit_clog));
400 active_mgr->background_init(new LambdaContext(
401 [this](int r){
402 // Advertise our active-ness ASAP instead of waiting for
403 // next tick.
404 std::lock_guard l(lock);
405 send_beacon();
406 }));
407 dout(1) << "I am now activating" << dendl;
408 } else {
409 dout(10) << "I was already active" << dendl;
410 bool need_respawn = active_mgr->got_mgr_map(map);
411 if (need_respawn) {
412 respawn();
413 }
414 }
415
416 if (!available_in_map && map.get_available()) {
417 dout(4) << "Map now says I am available" << dendl;
418 available_in_map = true;
419 }
420 } else if (active_mgr != nullptr) {
421 derr << "I was active but no longer am" << dendl;
422 respawn();
423 } else {
424 if (map.active_gid != 0 && map.active_name != g_conf()->name.get_id()) {
425 // I am the standby and someone else is active, start modules
426 // in standby mode to do redirects if needed
427 if (!py_module_registry.is_standby_running()) {
428 py_module_registry.standby_start(monc, finisher);
429 }
430 }
431 }
432 }
433
434 bool MgrStandby::ms_dispatch2(const ref_t<Message>& m)
435 {
436 std::lock_guard l(lock);
437 dout(10) << state_str() << " " << *m << dendl;
438
439 if (m->get_type() == MSG_MGR_MAP) {
440 handle_mgr_map(ref_cast<MMgrMap>(m));
441 }
442 bool handled = false;
443 if (active_mgr) {
444 auto am = active_mgr;
445 lock.unlock();
446 handled = am->ms_dispatch2(m);
447 lock.lock();
448 }
449 if (m->get_type() == MSG_MGR_MAP) {
450 // let this pass through for mgrc
451 handled = false;
452 }
453 return handled;
454 }
455
456
457 bool MgrStandby::ms_handle_refused(Connection *con)
458 {
459 // do nothing for now
460 return false;
461 }
462
463 int MgrStandby::main(vector<const char *> args)
464 {
465 client_messenger->wait();
466
467 // Disable signal handlers
468 unregister_async_signal_handler(SIGHUP, sighup_handler);
469 shutdown_async_signal_handler();
470
471 return 0;
472 }
473
474
475 std::string MgrStandby::state_str()
476 {
477 if (active_mgr == nullptr) {
478 return "standby";
479 } else if (active_mgr->is_initialized()) {
480 return "active";
481 } else {
482 return "active (starting)";
483 }
484 }
485