]>
git.proxmox.com Git - ceph.git/blob - ceph/src/mgr/MgrStandby.cc
585837802f501f6dbe7e97831c0a5d38b7983e88
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
4 * Ceph - scalable distributed file system
6 * Copyright (C) 2016 John Spray <john.spray@redhat.com>
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
16 #include "common/errno.h"
17 #include "common/signal.h"
18 #include "include/compat.h"
20 #include "include/stringify.h"
21 #include "global/global_context.h"
22 #include "global/signal_handler.h"
24 #include "mgr/MgrContext.h"
25 #include "mgr/mgr_commands.h"
27 #include "messages/MMgrBeacon.h"
28 #include "messages/MMgrMap.h"
31 #include "MgrStandby.h"
33 #define dout_context g_ceph_context
34 #define dout_subsys ceph_subsys_mgr
36 #define dout_prefix *_dout << "mgr " << __func__ << " "
39 MgrStandby::MgrStandby(int argc
, const char **argv
) :
40 Dispatcher(g_ceph_context
),
41 monc
{g_ceph_context
, poolctx
},
42 client_messenger(Messenger::create(
44 cct
->_conf
.get_val
<std::string
>("ms_type"),
47 Messenger::get_pid_nonce())),
48 objecter
{g_ceph_context
, client_messenger
.get(), &monc
, poolctx
},
49 client
{client_messenger
.get(), &monc
, &objecter
},
50 mgrc(g_ceph_context
, client_messenger
.get(), &monc
.monmap
),
51 log_client(g_ceph_context
, client_messenger
.get(), &monc
.monmap
, LogClient::NO_FLAGS
),
52 clog(log_client
.create_channel(CLOG_CHANNEL_CLUSTER
)),
53 audit_clog(log_client
.create_channel(CLOG_CHANNEL_AUDIT
)),
54 finisher(g_ceph_context
, "MgrStandby", "mgrsb-fin"),
55 timer(g_ceph_context
, lock
),
56 py_module_registry(clog
),
60 available_in_map(false)
64 MgrStandby::~MgrStandby() = default;
66 const char** MgrStandby::get_tracked_conf_keys() const
68 static const char* KEYS
[] = {
72 "clog_to_syslog_facility",
73 "clog_to_syslog_level",
75 "clog_to_graylog_host",
76 "clog_to_graylog_port",
84 void MgrStandby::handle_conf_change(
85 const ConfigProxy
& conf
,
86 const std::set
<std::string
> &changed
)
88 if (changed
.count("clog_to_monitors") ||
89 changed
.count("clog_to_syslog") ||
90 changed
.count("clog_to_syslog_level") ||
91 changed
.count("clog_to_syslog_facility") ||
92 changed
.count("clog_to_graylog") ||
93 changed
.count("clog_to_graylog_host") ||
94 changed
.count("clog_to_graylog_port") ||
95 changed
.count("host") ||
96 changed
.count("fsid")) {
101 int MgrStandby::init()
103 init_async_signal_handler();
104 register_async_signal_handler(SIGHUP
, sighup_handler
);
106 std::lock_guard
l(lock
);
111 // Initialize Messenger
112 client_messenger
->add_dispatcher_tail(this);
113 client_messenger
->add_dispatcher_head(&objecter
);
114 client_messenger
->add_dispatcher_tail(&client
);
115 client_messenger
->start();
119 // Initialize MonClient
120 if (monc
.build_initial_monmap() < 0) {
121 client_messenger
->shutdown();
122 client_messenger
->wait();
126 monc
.sub_want("mgrmap", 0, 0);
128 monc
.set_want_keys(CEPH_ENTITY_TYPE_MON
|CEPH_ENTITY_TYPE_OSD
129 |CEPH_ENTITY_TYPE_MDS
|CEPH_ENTITY_TYPE_MGR
);
130 monc
.set_messenger(client_messenger
.get());
132 // We must register our config callback before calling init(), so
133 // that we see the initial configuration message
134 monc
.register_config_callback([this](const std::string
&k
, const std::string
&v
){
135 // removing value to hide sensitive data going into mgr logs
136 // leaving this for debugging purposes
137 // dout(10) << "config_callback: " << k << " : " << v << dendl;
138 dout(10) << "config_callback: " << k
<< " : " << dendl
;
139 if (k
.substr(0, 4) == "mgr/") {
140 py_module_registry
.handle_config(k
, v
);
145 monc
.register_config_notify_callback([this]() {
146 py_module_registry
.handle_config_notify();
148 dout(4) << "Registered monc callback" << dendl
;
153 client_messenger
->shutdown();
154 client_messenger
->wait();
158 client_messenger
->add_dispatcher_tail(&mgrc
);
160 r
= monc
.authenticate();
162 derr
<< "Authentication failed, did you specify a mgr ID with a valid keyring?" << dendl
;
164 client_messenger
->shutdown();
165 client_messenger
->wait();
168 // only forward monmap updates after authentication finishes, otherwise
169 // monc.authenticate() will be waiting for MgrStandy::ms_dispatch()
170 // to acquire the lock forever, as it is already locked in the beginning of
172 monc
.set_passthrough_monmap();
174 client_t whoami
= monc
.get_global_id();
175 client_messenger
->set_myname(entity_name_t::MGR(whoami
.v
));
176 monc
.set_log_client(&log_client
);
177 _update_log_config();
178 objecter
.set_client_incarnation(0);
184 py_module_registry
.init();
188 dout(4) << "Complete." << dendl
;
192 void MgrStandby::send_beacon()
194 ceph_assert(ceph_mutex_is_locked_by_me(lock
));
195 dout(20) << state_str() << dendl
;
197 auto modules
= py_module_registry
.get_modules();
199 // Construct a list of the info about each loaded module
200 // which we will transmit to the monitor.
201 std::vector
<MgrMap::ModuleInfo
> module_info
;
202 for (const auto &module
: modules
) {
203 MgrMap::ModuleInfo info
;
204 info
.name
= module
->get_name();
205 info
.error_string
= module
->get_error_string();
206 info
.can_run
= module
->get_can_run();
207 info
.module_options
= module
->get_options();
208 module_info
.push_back(std::move(info
));
211 auto clients
= py_module_registry
.get_clients();
212 for (const auto& client
: clients
) {
213 dout(15) << "noting RADOS client for blocklist: " << client
<< dendl
;
216 // Whether I think I am available (request MgrMonitor to set me
217 // as available in the map)
218 bool available
= active_mgr
!= nullptr && active_mgr
->is_initialized();
220 auto addrs
= available
? active_mgr
->get_server_addrs() : entity_addrvec_t();
221 dout(10) << "sending beacon as gid " << monc
.get_global_id() << dendl
;
223 map
<string
,string
> metadata
;
224 metadata
["addr"] = client_messenger
->get_myaddr_legacy().ip_only_to_str();
225 metadata
["addrs"] = stringify(client_messenger
->get_myaddrs());
226 collect_sys_info(&metadata
, g_ceph_context
);
228 auto m
= ceph::make_message
<MMgrBeacon
>(monc
.get_fsid(),
229 monc
.get_global_id(),
230 g_conf()->name
.get_id(),
233 std::move(module_info
),
239 if (!available_in_map
) {
240 // We are informing the mon that we are done initializing: inform
241 // it of our command set. This has to happen after init() because
242 // it needs the python modules to have loaded.
243 std::vector
<MonCommand
> commands
= mgr_commands
;
244 std::vector
<MonCommand
> py_commands
= py_module_registry
.get_commands();
245 commands
.insert(commands
.end(), py_commands
.begin(), py_commands
.end());
246 m
->set_command_descs(commands
);
247 dout(4) << "going active, including " << m
->get_command_descs().size()
248 << " commands in beacon" << dendl
;
251 m
->set_services(active_mgr
->get_services());
254 monc
.send_mon_message(std::move(m
));
257 void MgrStandby::tick()
259 dout(10) << __func__
<< dendl
;
262 timer
.add_event_after(
263 g_conf().get_val
<std::chrono::seconds
>("mgr_tick_period").count(),
264 new LambdaContext([this](int r
){
270 void MgrStandby::shutdown()
272 finisher
.queue(new LambdaContext([&](int) {
273 std::lock_guard
l(lock
);
275 dout(4) << "Shutting down" << dendl
;
277 py_module_registry
.shutdown();
278 // stop sending beacon first, I use monc to talk with monitors
280 // client uses monc and objecter
283 // Stop asio threads, so leftover events won't call into shut down
284 // monclient/objecter.
286 // stop monc, so mon won't be able to instruct me to shutdown/activate after
287 // the active_mgr is stopped
290 active_mgr
->shutdown();
292 // objecter is used by monc and active_mgr
294 // client_messenger is used by all of them, so stop it in the end
295 client_messenger
->shutdown();
298 // Then stop the finisher to ensure its enqueued contexts aren't going
299 // to touch references to the things we're about to tear down
300 finisher
.wait_for_empty();
304 void MgrStandby::respawn()
306 // --- WARNING TO FUTURE COPY/PASTERS ---
307 // You must also add a call like
309 // ceph_pthread_setname(pthread_self(), "ceph-mgr");
311 // to main() so that /proc/$pid/stat field 2 contains "(ceph-mgr)"
312 // instead of "(exe)", so that killall (and log rotation) will work.
314 char *new_argv
[orig_argc
+1];
315 dout(1) << " e: '" << orig_argv
[0] << "'" << dendl
;
316 for (int i
=0; i
<orig_argc
; i
++) {
317 new_argv
[i
] = (char *)orig_argv
[i
];
318 dout(1) << " " << i
<< ": '" << orig_argv
[i
] << "'" << dendl
;
320 new_argv
[orig_argc
] = NULL
;
322 /* Determine the path to our executable, test if Linux /proc/self/exe exists.
323 * This allows us to exec the same executable even if it has since been
326 char exe_path
[PATH_MAX
] = "";
327 if (readlink(PROCPREFIX
"/proc/self/exe", exe_path
, PATH_MAX
-1) == -1) {
328 /* Print CWD for the user's interest */
330 char *cwd
= getcwd(buf
, sizeof(buf
));
332 dout(1) << " cwd " << cwd
<< dendl
;
334 /* Fall back to a best-effort: just running in our CWD */
335 strncpy(exe_path
, orig_argv
[0], PATH_MAX
-1);
337 dout(1) << "respawning with exe " << exe_path
<< dendl
;
338 strcpy(exe_path
, PROCPREFIX
"/proc/self/exe");
341 dout(1) << " exe_path " << exe_path
<< dendl
;
343 unblock_all_signals(NULL
);
344 execv(exe_path
, new_argv
);
346 derr
<< "respawn execv " << orig_argv
[0]
347 << " failed with " << cpp_strerror(errno
) << dendl
;
351 void MgrStandby::_update_log_config()
353 map
<string
,string
> log_to_monitors
;
354 map
<string
,string
> log_to_syslog
;
355 map
<string
,string
> log_channel
;
356 map
<string
,string
> log_prio
;
357 map
<string
,string
> log_to_graylog
;
358 map
<string
,string
> log_to_graylog_host
;
359 map
<string
,string
> log_to_graylog_port
;
363 if (parse_log_client_options(cct
, log_to_monitors
, log_to_syslog
,
364 log_channel
, log_prio
, log_to_graylog
,
365 log_to_graylog_host
, log_to_graylog_port
,
367 clog
->update_config(log_to_monitors
, log_to_syslog
,
368 log_channel
, log_prio
, log_to_graylog
,
369 log_to_graylog_host
, log_to_graylog_port
,
371 audit_clog
->update_config(log_to_monitors
, log_to_syslog
,
372 log_channel
, log_prio
, log_to_graylog
,
373 log_to_graylog_host
, log_to_graylog_port
,
378 void MgrStandby::handle_mgr_map(ref_t
<MMgrMap
> mmap
)
380 auto &map
= mmap
->get_map();
381 dout(4) << "received map epoch " << map
.get_epoch() << dendl
;
382 const bool active_in_map
= map
.active_gid
== monc
.get_global_id();
383 dout(4) << "active in map: " << active_in_map
384 << " active is " << map
.active_gid
<< dendl
;
386 // PyModuleRegistry may ask us to respawn if it sees that
387 // this MgrMap is changing its set of enabled modules
388 bool need_respawn
= py_module_registry
.handle_mgr_map(map
);
390 dout(1) << "respawning because set of enabled modules changed!" << dendl
;
396 dout(1) << "Activating!" << dendl
;
397 active_mgr
.reset(new Mgr(&monc
, map
, &py_module_registry
,
398 client_messenger
.get(), &objecter
,
399 &client
, clog
, audit_clog
));
400 active_mgr
->background_init(new LambdaContext(
402 // Advertise our active-ness ASAP instead of waiting for
404 std::lock_guard
l(lock
);
407 dout(1) << "I am now activating" << dendl
;
409 dout(10) << "I was already active" << dendl
;
410 bool need_respawn
= active_mgr
->got_mgr_map(map
);
416 if (!available_in_map
&& map
.get_available()) {
417 dout(4) << "Map now says I am available" << dendl
;
418 available_in_map
= true;
420 } else if (active_mgr
!= nullptr) {
421 derr
<< "I was active but no longer am" << dendl
;
424 if (map
.active_gid
!= 0 && map
.active_name
!= g_conf()->name
.get_id()) {
425 // I am the standby and someone else is active, start modules
426 // in standby mode to do redirects if needed
427 if (!py_module_registry
.is_standby_running()) {
428 py_module_registry
.standby_start(monc
, finisher
);
434 bool MgrStandby::ms_dispatch2(const ref_t
<Message
>& m
)
436 std::lock_guard
l(lock
);
437 dout(10) << state_str() << " " << *m
<< dendl
;
439 if (m
->get_type() == MSG_MGR_MAP
) {
440 handle_mgr_map(ref_cast
<MMgrMap
>(m
));
442 bool handled
= false;
444 auto am
= active_mgr
;
446 handled
= am
->ms_dispatch2(m
);
449 if (m
->get_type() == MSG_MGR_MAP
) {
450 // let this pass through for mgrc
457 bool MgrStandby::ms_handle_refused(Connection
*con
)
459 // do nothing for now
463 int MgrStandby::main(vector
<const char *> args
)
465 client_messenger
->wait();
467 // Disable signal handlers
468 unregister_async_signal_handler(SIGHUP
, sighup_handler
);
469 shutdown_async_signal_handler();
475 std::string
MgrStandby::state_str()
477 if (active_mgr
== nullptr) {
479 } else if (active_mgr
->is_initialized()) {
482 return "active (starting)";