]>
git.proxmox.com Git - ceph.git/blob - ceph/src/mgr/MgrStandby.cc
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
4 * Ceph - scalable distributed file system
6 * Copyright (C) 2016 John Spray <john.spray@redhat.com>
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
15 #include <boost/algorithm/string/replace.hpp>
17 #include "common/errno.h"
18 #include "common/signal.h"
19 #include "include/compat.h"
21 #include "include/stringify.h"
22 #include "global/global_context.h"
23 #include "global/signal_handler.h"
25 #include "mgr/MgrContext.h"
26 #include "mgr/mgr_commands.h"
27 #include "mgr/mgr_perf_counters.h"
29 #include "messages/MMgrBeacon.h"
30 #include "messages/MMgrMap.h"
33 #include "MgrStandby.h"
35 #define dout_context g_ceph_context
36 #define dout_subsys ceph_subsys_mgr
38 #define dout_prefix *_dout << "mgr " << __func__ << " "
44 MgrStandby::MgrStandby(int argc
, const char **argv
) :
45 Dispatcher(g_ceph_context
),
46 monc
{g_ceph_context
, poolctx
},
47 client_messenger(Messenger::create(
49 cct
->_conf
.get_val
<std::string
>("ms_public_type").empty() ?
50 cct
->_conf
.get_val
<std::string
>("ms_type") : cct
->_conf
.get_val
<std::string
>("ms_public_type"),
53 Messenger::get_pid_nonce())),
54 objecter
{g_ceph_context
, client_messenger
.get(), &monc
, poolctx
},
55 client
{client_messenger
.get(), &monc
, &objecter
},
56 mgrc(g_ceph_context
, client_messenger
.get(), &monc
.monmap
),
57 log_client(g_ceph_context
, client_messenger
.get(), &monc
.monmap
, LogClient::NO_FLAGS
),
58 clog(log_client
.create_channel(CLOG_CHANNEL_CLUSTER
)),
59 audit_clog(log_client
.create_channel(CLOG_CHANNEL_AUDIT
)),
60 finisher(g_ceph_context
, "MgrStandby", "mgrsb-fin"),
61 timer(g_ceph_context
, lock
),
62 py_module_registry(clog
),
66 available_in_map(false)
70 MgrStandby::~MgrStandby() = default;
72 const char** MgrStandby::get_tracked_conf_keys() const
74 static const char* KEYS
[] = {
78 "clog_to_syslog_facility",
79 "clog_to_syslog_level",
81 "clog_to_graylog_host",
82 "clog_to_graylog_port",
83 "mgr_standby_modules",
91 void MgrStandby::handle_conf_change(
92 const ConfigProxy
& conf
,
93 const std::set
<std::string
> &changed
)
95 if (changed
.count("clog_to_monitors") ||
96 changed
.count("clog_to_syslog") ||
97 changed
.count("clog_to_syslog_level") ||
98 changed
.count("clog_to_syslog_facility") ||
99 changed
.count("clog_to_graylog") ||
100 changed
.count("clog_to_graylog_host") ||
101 changed
.count("clog_to_graylog_port") ||
102 changed
.count("host") ||
103 changed
.count("fsid")) {
104 _update_log_config();
106 if (changed
.count("mgr_standby_modules") && !active_mgr
) {
107 if (g_conf().get_val
<bool>("mgr_standby_modules") != py_module_registry
.have_standby_modules()) {
108 dout(1) << "mgr_standby_modules now "
109 << (int)g_conf().get_val
<bool>("mgr_standby_modules")
110 << ", standby modules are "
111 << (py_module_registry
.have_standby_modules() ? "":"not ")
112 << "active, respawning"
119 int MgrStandby::init()
121 init_async_signal_handler();
122 register_async_signal_handler(SIGHUP
, sighup_handler
);
124 cct
->_conf
.add_observer(this);
126 std::lock_guard
l(lock
);
131 // Initialize Messenger
132 client_messenger
->add_dispatcher_tail(this);
133 client_messenger
->add_dispatcher_head(&objecter
);
134 client_messenger
->add_dispatcher_tail(&client
);
135 client_messenger
->start();
139 // Initialize MonClient
140 if (monc
.build_initial_monmap() < 0) {
141 client_messenger
->shutdown();
142 client_messenger
->wait();
146 monc
.sub_want("mgrmap", 0, 0);
148 monc
.set_want_keys(CEPH_ENTITY_TYPE_MON
|CEPH_ENTITY_TYPE_OSD
149 |CEPH_ENTITY_TYPE_MDS
|CEPH_ENTITY_TYPE_MGR
);
150 monc
.set_messenger(client_messenger
.get());
152 // We must register our config callback before calling init(), so
153 // that we see the initial configuration message
154 monc
.register_config_callback([this](const std::string
&k
, const std::string
&v
){
155 // removing value to hide sensitive data going into mgr logs
156 // leaving this for debugging purposes
157 // dout(10) << "config_callback: " << k << " : " << v << dendl;
158 dout(10) << "config_callback: " << k
<< " : " << dendl
;
159 if (k
.substr(0, 4) == "mgr/") {
160 py_module_registry
.handle_config(k
, v
);
165 monc
.register_config_notify_callback([this]() {
166 py_module_registry
.handle_config_notify();
168 dout(4) << "Registered monc callback" << dendl
;
173 client_messenger
->shutdown();
174 client_messenger
->wait();
178 client_messenger
->add_dispatcher_tail(&mgrc
);
180 r
= monc
.authenticate();
182 derr
<< "Authentication failed, did you specify a mgr ID with a valid keyring?" << dendl
;
184 client_messenger
->shutdown();
185 client_messenger
->wait();
188 // only forward monmap updates after authentication finishes, otherwise
189 // monc.authenticate() will be waiting for MgrStandy::ms_dispatch()
190 // to acquire the lock forever, as it is already locked in the beginning of
192 monc
.set_passthrough_monmap();
194 client_t whoami
= monc
.get_global_id();
195 client_messenger
->set_myname(entity_name_t::MGR(whoami
.v
));
196 monc
.set_log_client(&log_client
);
197 _update_log_config();
198 objecter
.set_client_incarnation(0);
204 py_module_registry
.init();
205 mgr_perf_start(g_ceph_context
);
210 dout(4) << "Complete." << dendl
;
214 void MgrStandby::send_beacon()
216 ceph_assert(ceph_mutex_is_locked_by_me(lock
));
217 dout(20) << state_str() << dendl
;
219 auto modules
= py_module_registry
.get_modules();
221 // Construct a list of the info about each loaded module
222 // which we will transmit to the monitor.
223 std::vector
<MgrMap::ModuleInfo
> module_info
;
224 for (const auto &module
: modules
) {
225 MgrMap::ModuleInfo info
;
226 info
.name
= module
->get_name();
227 info
.error_string
= module
->get_error_string();
228 info
.can_run
= module
->get_can_run();
229 info
.module_options
= module
->get_options();
230 module_info
.push_back(std::move(info
));
233 auto clients
= py_module_registry
.get_clients();
234 for (const auto& client
: clients
) {
235 dout(15) << "noting RADOS client for blocklist: " << client
<< dendl
;
238 // Whether I think I am available (request MgrMonitor to set me
239 // as available in the map)
240 bool available
= active_mgr
!= nullptr && active_mgr
->is_initialized();
242 auto addrs
= available
? active_mgr
->get_server_addrs() : entity_addrvec_t();
243 dout(10) << "sending beacon as gid " << monc
.get_global_id() << dendl
;
245 map
<string
,string
> metadata
;
246 metadata
["addr"] = client_messenger
->get_myaddr_legacy().ip_only_to_str();
247 metadata
["addrs"] = stringify(client_messenger
->get_myaddrs());
248 collect_sys_info(&metadata
, g_ceph_context
);
250 auto m
= ceph::make_message
<MMgrBeacon
>(monc
.get_fsid(),
251 monc
.get_global_id(),
252 g_conf()->name
.get_id(),
255 std::move(module_info
),
261 if (!available_in_map
) {
262 // We are informing the mon that we are done initializing: inform
263 // it of our command set. This has to happen after init() because
264 // it needs the python modules to have loaded.
265 std::vector
<MonCommand
> commands
= mgr_commands
;
266 std::vector
<MonCommand
> py_commands
= py_module_registry
.get_commands();
267 commands
.insert(commands
.end(), py_commands
.begin(), py_commands
.end());
268 if (monc
.monmap
.min_mon_release
< ceph_release_t::quincy
) {
269 dout(10) << " stripping out positional=false quincy-ism" << dendl
;
270 for (auto& i
: commands
) {
271 boost::replace_all(i
.cmdstring
, ",positional=false", "");
274 m
->set_command_descs(commands
);
275 dout(4) << "going active, including " << m
->get_command_descs().size()
276 << " commands in beacon" << dendl
;
279 m
->set_services(active_mgr
->get_services());
282 monc
.send_mon_message(std::move(m
));
285 void MgrStandby::tick()
287 dout(10) << __func__
<< dendl
;
290 timer
.add_event_after(
291 g_conf().get_val
<std::chrono::seconds
>("mgr_tick_period").count(),
292 new LambdaContext([this](int r
){
298 void MgrStandby::shutdown()
300 finisher
.queue(new LambdaContext([&](int) {
301 std::lock_guard
l(lock
);
303 dout(4) << "Shutting down" << dendl
;
305 py_module_registry
.shutdown();
306 // stop sending beacon first, I use monc to talk with monitors
308 // client uses monc and objecter
311 // Stop asio threads, so leftover events won't call into shut down
312 // monclient/objecter.
314 // stop monc, so mon won't be able to instruct me to shutdown/activate after
315 // the active_mgr is stopped
318 active_mgr
->shutdown();
320 // objecter is used by monc and active_mgr
322 // client_messenger is used by all of them, so stop it in the end
323 client_messenger
->shutdown();
326 // Then stop the finisher to ensure its enqueued contexts aren't going
327 // to touch references to the things we're about to tear down
328 finisher
.wait_for_empty();
330 mgr_perf_stop(g_ceph_context
);
333 void MgrStandby::respawn()
335 // --- WARNING TO FUTURE COPY/PASTERS ---
336 // You must also add a call like
338 // ceph_pthread_setname(pthread_self(), "ceph-mgr");
340 // to main() so that /proc/$pid/stat field 2 contains "(ceph-mgr)"
341 // instead of "(exe)", so that killall (and log rotation) will work.
343 char *new_argv
[orig_argc
+1];
344 dout(1) << " e: '" << orig_argv
[0] << "'" << dendl
;
345 for (int i
=0; i
<orig_argc
; i
++) {
346 new_argv
[i
] = (char *)orig_argv
[i
];
347 dout(1) << " " << i
<< ": '" << orig_argv
[i
] << "'" << dendl
;
349 new_argv
[orig_argc
] = NULL
;
351 /* Determine the path to our executable, test if Linux /proc/self/exe exists.
352 * This allows us to exec the same executable even if it has since been
355 char exe_path
[PATH_MAX
] = "";
356 if (readlink(PROCPREFIX
"/proc/self/exe", exe_path
, PATH_MAX
-1) == -1) {
357 /* Print CWD for the user's interest */
359 char *cwd
= getcwd(buf
, sizeof(buf
));
361 dout(1) << " cwd " << cwd
<< dendl
;
363 /* Fall back to a best-effort: just running in our CWD */
364 strncpy(exe_path
, orig_argv
[0], PATH_MAX
-1);
366 dout(1) << "respawning with exe " << exe_path
<< dendl
;
367 strcpy(exe_path
, PROCPREFIX
"/proc/self/exe");
370 dout(1) << " exe_path " << exe_path
<< dendl
;
372 unblock_all_signals(NULL
);
373 execv(exe_path
, new_argv
);
375 derr
<< "respawn execv " << orig_argv
[0]
376 << " failed with " << cpp_strerror(errno
) << dendl
;
380 void MgrStandby::_update_log_config()
382 clog
->parse_client_options(cct
);
383 audit_clog
->parse_client_options(cct
);
386 void MgrStandby::handle_mgr_map(ref_t
<MMgrMap
> mmap
)
388 auto &map
= mmap
->get_map();
389 dout(4) << "received map epoch " << map
.get_epoch() << dendl
;
390 const bool active_in_map
= map
.active_gid
== monc
.get_global_id();
391 dout(4) << "active in map: " << active_in_map
392 << " active is " << map
.active_gid
<< dendl
;
394 // PyModuleRegistry may ask us to respawn if it sees that
395 // this MgrMap is changing its set of enabled modules
396 bool need_respawn
= py_module_registry
.handle_mgr_map(map
);
398 dout(1) << "respawning because set of enabled modules changed!" << dendl
;
404 dout(1) << "Activating!" << dendl
;
405 active_mgr
.reset(new Mgr(&monc
, map
, &py_module_registry
,
406 client_messenger
.get(), &objecter
,
407 &client
, clog
, audit_clog
));
408 active_mgr
->background_init(new LambdaContext(
410 // Advertise our active-ness ASAP instead of waiting for
412 std::lock_guard
l(lock
);
415 dout(1) << "I am now activating" << dendl
;
417 dout(10) << "I was already active" << dendl
;
418 bool need_respawn
= active_mgr
->got_mgr_map(map
);
424 if (!available_in_map
&& map
.get_available()) {
425 dout(4) << "Map now says I am available" << dendl
;
426 available_in_map
= true;
428 } else if (active_mgr
!= nullptr) {
429 derr
<< "I was active but no longer am" << dendl
;
432 if (map
.active_gid
!= 0 && map
.active_name
!= g_conf()->name
.get_id()) {
433 // I am the standby and someone else is active, start modules
434 // in standby mode to do redirects if needed
435 if (!py_module_registry
.is_standby_running() &&
436 g_conf().get_val
<bool>("mgr_standby_modules")) {
437 py_module_registry
.standby_start(monc
, finisher
);
443 bool MgrStandby::ms_dispatch2(const ref_t
<Message
>& m
)
445 std::lock_guard
l(lock
);
446 dout(10) << state_str() << " " << *m
<< dendl
;
448 if (m
->get_type() == MSG_MGR_MAP
) {
449 handle_mgr_map(ref_cast
<MMgrMap
>(m
));
451 bool handled
= false;
453 auto am
= active_mgr
;
455 handled
= am
->ms_dispatch2(m
);
458 if (m
->get_type() == MSG_MGR_MAP
) {
459 // let this pass through for mgrc
466 bool MgrStandby::ms_handle_refused(Connection
*con
)
468 // do nothing for now
472 int MgrStandby::main(vector
<const char *> args
)
474 client_messenger
->wait();
476 // Disable signal handlers
477 unregister_async_signal_handler(SIGHUP
, sighup_handler
);
478 shutdown_async_signal_handler();
484 std::string
MgrStandby::state_str()
486 if (active_mgr
== nullptr) {
488 } else if (active_mgr
->is_initialized()) {
491 return "active (starting)";