]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
2 | // vim: ts=8 sw=2 smarttab | |
3 | /* | |
4 | * Ceph - scalable distributed file system | |
5 | * | |
6 | * Copyright (C) 2016 John Spray <john.spray@redhat.com> | |
7 | * | |
8 | * This is free software; you can redistribute it and/or | |
9 | * modify it under the terms of the GNU Lesser General Public | |
10 | * License version 2.1, as published by the Free Software | |
11 | * Foundation. See file COPYING. | |
12 | */ | |
13 | ||
14 | #include <Python.h> | |
15 | ||
16 | #include "common/errno.h" | |
31f18b77 FG |
17 | #include "common/signal.h" |
18 | #include "include/compat.h" | |
7c673cae FG |
19 | |
20 | #include "include/stringify.h" | |
21 | #include "global/global_context.h" | |
22 | #include "global/signal_handler.h" | |
23 | ||
24 | #include "mgr/MgrContext.h" | |
25 | ||
26 | #include "messages/MMgrBeacon.h" | |
27 | #include "messages/MMgrMap.h" | |
28 | #include "Mgr.h" | |
29 | ||
30 | #include "MgrStandby.h" | |
31 | ||
32 | #define dout_context g_ceph_context | |
33 | #define dout_subsys ceph_subsys_mgr | |
34 | #undef dout_prefix | |
35 | #define dout_prefix *_dout << "mgr " << __func__ << " " | |
36 | ||
37 | ||
31f18b77 | 38 | MgrStandby::MgrStandby(int argc, const char **argv) : |
7c673cae FG |
39 | Dispatcher(g_ceph_context), |
40 | monc{g_ceph_context}, | |
41 | client_messenger(Messenger::create_client_messenger(g_ceph_context, "mgr")), | |
42 | objecter{g_ceph_context, client_messenger.get(), &monc, NULL, 0, 0}, | |
43 | client{client_messenger.get(), &monc, &objecter}, | |
44 | log_client(g_ceph_context, client_messenger.get(), &monc.monmap, LogClient::NO_FLAGS), | |
45 | clog(log_client.create_channel(CLOG_CHANNEL_CLUSTER)), | |
46 | audit_clog(log_client.create_channel(CLOG_CHANNEL_AUDIT)), | |
47 | lock("MgrStandby::lock"), | |
48 | timer(g_ceph_context, lock), | |
3efd9988 | 49 | py_module_registry(clog), |
31f18b77 FG |
50 | active_mgr(nullptr), |
51 | orig_argc(argc), | |
c07f9fc5 FG |
52 | orig_argv(argv), |
53 | available_in_map(false) | |
7c673cae FG |
54 | { |
55 | } | |
56 | ||
57 | MgrStandby::~MgrStandby() = default; | |
58 | ||
59 | const char** MgrStandby::get_tracked_conf_keys() const | |
60 | { | |
61 | static const char* KEYS[] = { | |
62 | // clog & admin clog | |
63 | "clog_to_monitors", | |
64 | "clog_to_syslog", | |
65 | "clog_to_syslog_facility", | |
66 | "clog_to_syslog_level", | |
67 | "osd_objectstore_fuse", | |
68 | "clog_to_graylog", | |
69 | "clog_to_graylog_host", | |
70 | "clog_to_graylog_port", | |
71 | "host", | |
72 | "fsid", | |
73 | NULL | |
74 | }; | |
75 | return KEYS; | |
76 | } | |
77 | ||
78 | void MgrStandby::handle_conf_change( | |
79 | const struct md_config_t *conf, | |
80 | const std::set <std::string> &changed) | |
81 | { | |
82 | if (changed.count("clog_to_monitors") || | |
83 | changed.count("clog_to_syslog") || | |
84 | changed.count("clog_to_syslog_level") || | |
85 | changed.count("clog_to_syslog_facility") || | |
86 | changed.count("clog_to_graylog") || | |
87 | changed.count("clog_to_graylog_host") || | |
88 | changed.count("clog_to_graylog_port") || | |
89 | changed.count("host") || | |
90 | changed.count("fsid")) { | |
91 | _update_log_config(); | |
92 | } | |
93 | } | |
94 | ||
95 | int MgrStandby::init() | |
96 | { | |
97 | Mutex::Locker l(lock); | |
98 | ||
99 | // Initialize Messenger | |
100 | client_messenger->add_dispatcher_tail(this); | |
101 | client_messenger->add_dispatcher_head(&objecter); | |
102 | client_messenger->add_dispatcher_tail(&client); | |
103 | client_messenger->start(); | |
104 | ||
105 | // Initialize MonClient | |
106 | if (monc.build_initial_monmap() < 0) { | |
107 | client_messenger->shutdown(); | |
108 | client_messenger->wait(); | |
109 | return -1; | |
110 | } | |
111 | ||
112 | monc.sub_want("mgrmap", 0, 0); | |
113 | ||
114 | monc.set_want_keys(CEPH_ENTITY_TYPE_MON|CEPH_ENTITY_TYPE_OSD | |
115 | |CEPH_ENTITY_TYPE_MDS|CEPH_ENTITY_TYPE_MGR); | |
116 | monc.set_messenger(client_messenger.get()); | |
117 | int r = monc.init(); | |
118 | if (r < 0) { | |
119 | monc.shutdown(); | |
120 | client_messenger->shutdown(); | |
121 | client_messenger->wait(); | |
122 | return r; | |
123 | } | |
124 | r = monc.authenticate(); | |
125 | if (r < 0) { | |
126 | derr << "Authentication failed, did you specify a mgr ID with a valid keyring?" << dendl; | |
127 | monc.shutdown(); | |
128 | client_messenger->shutdown(); | |
129 | client_messenger->wait(); | |
130 | return r; | |
131 | } | |
132 | ||
133 | client_t whoami = monc.get_global_id(); | |
134 | client_messenger->set_myname(entity_name_t::CLIENT(whoami.v)); | |
135 | monc.set_log_client(&log_client); | |
136 | _update_log_config(); | |
137 | objecter.set_client_incarnation(0); | |
138 | objecter.init(); | |
139 | objecter.start(); | |
140 | client.init(); | |
141 | timer.init(); | |
142 | ||
31f18b77 | 143 | tick(); |
7c673cae FG |
144 | |
145 | dout(4) << "Complete." << dendl; | |
146 | return 0; | |
147 | } | |
148 | ||
149 | void MgrStandby::send_beacon() | |
150 | { | |
151 | assert(lock.is_locked_by_me()); | |
152 | dout(1) << state_str() << dendl; | |
7c673cae | 153 | |
224ce89b | 154 | set<string> modules; |
3efd9988 | 155 | PyModuleRegistry::list_modules(&modules); |
c07f9fc5 FG |
156 | |
157 | // Whether I think I am available (request MgrMonitor to set me | |
158 | // as available in the map) | |
7c673cae | 159 | bool available = active_mgr != nullptr && active_mgr->is_initialized(); |
c07f9fc5 | 160 | |
7c673cae | 161 | auto addr = available ? active_mgr->get_server_addr() : entity_addr_t(); |
3efd9988 | 162 | dout(10) << "sending beacon as gid " << monc.get_global_id() << dendl; |
224ce89b | 163 | |
c07f9fc5 FG |
164 | map<string,string> metadata; |
165 | collect_sys_info(&metadata, g_ceph_context); | |
166 | ||
7c673cae FG |
167 | MMgrBeacon *m = new MMgrBeacon(monc.get_fsid(), |
168 | monc.get_global_id(), | |
169 | g_conf->name.get_id(), | |
170 | addr, | |
224ce89b | 171 | available, |
c07f9fc5 FG |
172 | modules, |
173 | std::move(metadata)); | |
174 | ||
3efd9988 FG |
175 | if (available) { |
176 | if (!available_in_map) { | |
177 | // We are informing the mon that we are done initializing: inform | |
178 | // it of our command set. This has to happen after init() because | |
179 | // it needs the python modules to have loaded. | |
180 | m->set_command_descs(active_mgr->get_command_set()); | |
181 | dout(4) << "going active, including " << m->get_command_descs().size() | |
182 | << " commands in beacon" << dendl; | |
183 | } | |
184 | ||
185 | m->set_services(active_mgr->get_services()); | |
c07f9fc5 FG |
186 | } |
187 | ||
7c673cae | 188 | monc.send_mon_message(m); |
31f18b77 FG |
189 | } |
190 | ||
191 | void MgrStandby::tick() | |
192 | { | |
224ce89b | 193 | dout(10) << __func__ << dendl; |
31f18b77 FG |
194 | send_beacon(); |
195 | ||
3efd9988 | 196 | if (active_mgr && active_mgr->is_initialized()) { |
31f18b77 FG |
197 | active_mgr->tick(); |
198 | } | |
199 | ||
3efd9988 FG |
200 | timer.add_event_after(g_conf->get_val<int64_t>("mgr_tick_period"), |
201 | new FunctionContext([this](int r){ | |
31f18b77 | 202 | tick(); |
3efd9988 | 203 | } |
7c673cae FG |
204 | )); |
205 | } | |
206 | ||
207 | void MgrStandby::handle_signal(int signum) | |
208 | { | |
209 | Mutex::Locker l(lock); | |
210 | assert(signum == SIGINT || signum == SIGTERM); | |
211 | derr << "*** Got signal " << sig_str(signum) << " ***" << dendl; | |
212 | shutdown(); | |
213 | } | |
214 | ||
215 | void MgrStandby::shutdown() | |
216 | { | |
217 | // Expect already to be locked as we're called from signal handler | |
218 | assert(lock.is_locked_by_me()); | |
219 | ||
3efd9988 FG |
220 | dout(4) << "Shutting down" << dendl; |
221 | ||
7c673cae FG |
222 | // stop sending beacon first, i use monc to talk with monitors |
223 | timer.shutdown(); | |
224 | // client uses monc and objecter | |
225 | client.shutdown(); | |
226 | // stop monc, so mon won't be able to instruct me to shutdown/activate after | |
227 | // the active_mgr is stopped | |
228 | monc.shutdown(); | |
229 | if (active_mgr) { | |
230 | active_mgr->shutdown(); | |
231 | } | |
3efd9988 FG |
232 | |
233 | py_module_registry.shutdown(); | |
234 | ||
7c673cae FG |
235 | // objecter is used by monc and active_mgr |
236 | objecter.shutdown(); | |
237 | // client_messenger is used by all of them, so stop it in the end | |
238 | client_messenger->shutdown(); | |
239 | } | |
240 | ||
31f18b77 FG |
241 | void MgrStandby::respawn() |
242 | { | |
243 | char *new_argv[orig_argc+1]; | |
244 | dout(1) << " e: '" << orig_argv[0] << "'" << dendl; | |
245 | for (int i=0; i<orig_argc; i++) { | |
246 | new_argv[i] = (char *)orig_argv[i]; | |
247 | dout(1) << " " << i << ": '" << orig_argv[i] << "'" << dendl; | |
248 | } | |
249 | new_argv[orig_argc] = NULL; | |
250 | ||
251 | /* Determine the path to our executable, test if Linux /proc/self/exe exists. | |
252 | * This allows us to exec the same executable even if it has since been | |
253 | * unlinked. | |
254 | */ | |
255 | char exe_path[PATH_MAX] = ""; | |
256 | if (readlink(PROCPREFIX "/proc/self/exe", exe_path, PATH_MAX-1) == -1) { | |
257 | /* Print CWD for the user's interest */ | |
258 | char buf[PATH_MAX]; | |
259 | char *cwd = getcwd(buf, sizeof(buf)); | |
260 | assert(cwd); | |
261 | dout(1) << " cwd " << cwd << dendl; | |
262 | ||
263 | /* Fall back to a best-effort: just running in our CWD */ | |
264 | strncpy(exe_path, orig_argv[0], PATH_MAX-1); | |
265 | } else { | |
266 | dout(1) << "respawning with exe " << exe_path << dendl; | |
267 | strcpy(exe_path, PROCPREFIX "/proc/self/exe"); | |
268 | } | |
269 | ||
270 | dout(1) << " exe_path " << exe_path << dendl; | |
271 | ||
272 | unblock_all_signals(NULL); | |
273 | execv(exe_path, new_argv); | |
274 | ||
275 | derr << "respawn execv " << orig_argv[0] | |
276 | << " failed with " << cpp_strerror(errno) << dendl; | |
277 | ceph_abort(); | |
278 | } | |
279 | ||
7c673cae FG |
280 | void MgrStandby::_update_log_config() |
281 | { | |
282 | map<string,string> log_to_monitors; | |
283 | map<string,string> log_to_syslog; | |
284 | map<string,string> log_channel; | |
285 | map<string,string> log_prio; | |
286 | map<string,string> log_to_graylog; | |
287 | map<string,string> log_to_graylog_host; | |
288 | map<string,string> log_to_graylog_port; | |
289 | uuid_d fsid; | |
290 | string host; | |
291 | ||
292 | if (parse_log_client_options(cct, log_to_monitors, log_to_syslog, | |
293 | log_channel, log_prio, log_to_graylog, | |
294 | log_to_graylog_host, log_to_graylog_port, | |
295 | fsid, host) == 0) { | |
296 | clog->update_config(log_to_monitors, log_to_syslog, | |
297 | log_channel, log_prio, log_to_graylog, | |
298 | log_to_graylog_host, log_to_graylog_port, | |
299 | fsid, host); | |
300 | audit_clog->update_config(log_to_monitors, log_to_syslog, | |
301 | log_channel, log_prio, log_to_graylog, | |
302 | log_to_graylog_host, log_to_graylog_port, | |
303 | fsid, host); | |
304 | } | |
305 | } | |
306 | ||
307 | void MgrStandby::handle_mgr_map(MMgrMap* mmap) | |
308 | { | |
c07f9fc5 | 309 | auto &map = mmap->get_map(); |
7c673cae FG |
310 | dout(4) << "received map epoch " << map.get_epoch() << dendl; |
311 | const bool active_in_map = map.active_gid == monc.get_global_id(); | |
312 | dout(4) << "active in map: " << active_in_map | |
313 | << " active is " << map.active_gid << dendl; | |
3efd9988 FG |
314 | |
315 | if (!py_module_registry.is_initialized()) { | |
316 | int r = py_module_registry.init(map); | |
317 | ||
318 | // FIXME: error handling | |
319 | assert(r == 0); | |
320 | } else { | |
321 | bool need_respawn = py_module_registry.handle_mgr_map(map); | |
322 | if (need_respawn) { | |
323 | respawn(); | |
324 | } | |
325 | } | |
326 | ||
7c673cae FG |
327 | if (active_in_map) { |
328 | if (!active_mgr) { | |
329 | dout(1) << "Activating!" << dendl; | |
3efd9988 FG |
330 | active_mgr.reset(new Mgr(&monc, map, &py_module_registry, |
331 | client_messenger.get(), &objecter, | |
7c673cae | 332 | &client, clog, audit_clog)); |
224ce89b WB |
333 | active_mgr->background_init(new FunctionContext( |
334 | [this](int r){ | |
335 | // Advertise our active-ness ASAP instead of waiting for | |
336 | // next tick. | |
337 | Mutex::Locker l(lock); | |
338 | send_beacon(); | |
339 | })); | |
340 | dout(1) << "I am now activating" << dendl; | |
7c673cae FG |
341 | } else { |
342 | dout(10) << "I was already active" << dendl; | |
224ce89b WB |
343 | bool need_respawn = active_mgr->got_mgr_map(map); |
344 | if (need_respawn) { | |
345 | respawn(); | |
346 | } | |
7c673cae | 347 | } |
c07f9fc5 FG |
348 | |
349 | if (!available_in_map && map.get_available()) { | |
350 | dout(4) << "Map now says I am available" << dendl; | |
351 | available_in_map = true; | |
352 | } | |
3efd9988 FG |
353 | } else if (active_mgr != nullptr) { |
354 | derr << "I was active but no longer am" << dendl; | |
355 | respawn(); | |
7c673cae | 356 | } else { |
3efd9988 FG |
357 | if (map.active_gid != 0 && map.active_name != g_conf->name.get_id()) { |
358 | // I am the standby and someone else is active, start modules | |
359 | // in standby mode to do redirects if needed | |
360 | if (!py_module_registry.is_standby_running()) { | |
361 | py_module_registry.standby_start(&monc); | |
362 | } | |
7c673cae FG |
363 | } |
364 | } | |
365 | ||
366 | mmap->put(); | |
367 | } | |
368 | ||
369 | bool MgrStandby::ms_dispatch(Message *m) | |
370 | { | |
371 | Mutex::Locker l(lock); | |
94b18763 | 372 | bool handled = false; |
7c673cae FG |
373 | dout(4) << state_str() << " " << *m << dendl; |
374 | ||
31f18b77 FG |
375 | if (m->get_type() == MSG_MGR_MAP) { |
376 | handle_mgr_map(static_cast<MMgrMap*>(m)); | |
377 | return true; | |
378 | } else if (active_mgr) { | |
379 | auto am = active_mgr; | |
380 | lock.Unlock(); | |
94b18763 | 381 | handled = am->ms_dispatch(m); |
31f18b77 FG |
382 | lock.Lock(); |
383 | return handled; | |
384 | } else { | |
385 | return false; | |
7c673cae | 386 | } |
94b18763 FG |
387 | if (m->get_type() == MSG_MGR_MAP) { |
388 | // let this pass through for mgrc | |
389 | handled = false; | |
390 | } | |
391 | return handled; | |
7c673cae FG |
392 | } |
393 | ||
394 | ||
395 | bool MgrStandby::ms_get_authorizer(int dest_type, AuthAuthorizer **authorizer, | |
396 | bool force_new) | |
397 | { | |
398 | if (dest_type == CEPH_ENTITY_TYPE_MON) | |
399 | return true; | |
400 | ||
401 | if (force_new) { | |
402 | if (monc.wait_auth_rotating(10) < 0) | |
403 | return false; | |
404 | } | |
405 | ||
406 | *authorizer = monc.build_authorizer(dest_type); | |
407 | return *authorizer != NULL; | |
408 | } | |
409 | ||
410 | bool MgrStandby::ms_handle_refused(Connection *con) | |
411 | { | |
412 | // do nothing for now | |
413 | return false; | |
414 | } | |
415 | ||
416 | // A reference for use by the signal handler | |
417 | static MgrStandby *signal_mgr = nullptr; | |
418 | ||
419 | static void handle_mgr_signal(int signum) | |
420 | { | |
421 | if (signal_mgr) { | |
422 | signal_mgr->handle_signal(signum); | |
423 | } | |
424 | } | |
425 | ||
426 | int MgrStandby::main(vector<const char *> args) | |
427 | { | |
428 | // Enable signal handlers | |
429 | signal_mgr = this; | |
430 | init_async_signal_handler(); | |
431 | register_async_signal_handler(SIGHUP, sighup_handler); | |
432 | register_async_signal_handler_oneshot(SIGINT, handle_mgr_signal); | |
433 | register_async_signal_handler_oneshot(SIGTERM, handle_mgr_signal); | |
434 | ||
435 | client_messenger->wait(); | |
436 | ||
437 | // Disable signal handlers | |
438 | unregister_async_signal_handler(SIGHUP, sighup_handler); | |
439 | unregister_async_signal_handler(SIGINT, handle_mgr_signal); | |
440 | unregister_async_signal_handler(SIGTERM, handle_mgr_signal); | |
441 | shutdown_async_signal_handler(); | |
442 | signal_mgr = nullptr; | |
443 | ||
444 | return 0; | |
445 | } | |
446 | ||
447 | ||
448 | std::string MgrStandby::state_str() | |
449 | { | |
3efd9988 FG |
450 | if (active_mgr == nullptr) { |
451 | return "standby"; | |
452 | } else if (active_mgr->is_initialized()) { | |
453 | return "active"; | |
454 | } else { | |
455 | return "active (starting)"; | |
456 | } | |
7c673cae FG |
457 | } |
458 |