]> git.proxmox.com Git - ceph.git/blame - ceph/src/mgr/PyModuleRegistry.cc
import ceph 14.2.5
[ceph.git] / ceph / src / mgr / PyModuleRegistry.cc
CommitLineData
3efd9988
FG
1// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2// vim: ts=8 sw=2 smarttab
3/*
4 * Ceph - scalable distributed file system
5 *
6 * Copyright (C) 2017 John Spray <john.spray@redhat.com>
7 *
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
12 */
13
14
15#include "include/stringify.h"
16#include "common/errno.h"
3efd9988
FG
17
18#include "BaseMgrModule.h"
19#include "PyOSDMap.h"
20#include "BaseMgrStandbyModule.h"
21#include "Gil.h"
11fdf7f2
TL
22#include "MgrContext.h"
23#include "mgr/mgr_commands.h"
3efd9988
FG
24
25#include "ActivePyModules.h"
26
27#include "PyModuleRegistry.h"
28
3efd9988
FG
29#define dout_context g_ceph_context
30#define dout_subsys ceph_subsys_mgr
31
32#undef dout_prefix
33#define dout_prefix *_dout << "mgr[py] "
34
3efd9988
FG
35
36
11fdf7f2 37void PyModuleRegistry::init()
3efd9988 38{
11fdf7f2 39 std::lock_guard locker(lock);
3efd9988
FG
40
41 // Set up global python interpreter
11fdf7f2
TL
42#if PY_MAJOR_VERSION >= 3
43#define WCHAR(s) L ## #s
eafe8130 44 Py_SetProgramName(const_cast<wchar_t*>(WCHAR(MGR_PYTHON_EXECUTABLE)));
11fdf7f2
TL
45#undef WCHAR
46#else
eafe8130 47 Py_SetProgramName(const_cast<char*>(MGR_PYTHON_EXECUTABLE));
11fdf7f2
TL
48#endif
49 // Add more modules
50 if (g_conf().get_val<bool>("daemonize")) {
51 PyImport_AppendInittab("ceph_logger", PyModule::init_ceph_logger);
52 }
53 PyImport_AppendInittab("ceph_module", PyModule::init_ceph_module);
3efd9988
FG
54 Py_InitializeEx(0);
55
56 // Let CPython know that we will be calling it back from other
57 // threads in future.
58 if (! PyEval_ThreadsInitialized()) {
59 PyEval_InitThreads();
60 }
61
62 // Drop the GIL and remember the main thread state (current
63 // thread state becomes NULL)
64 pMainThreadState = PyEval_SaveThread();
11fdf7f2 65 ceph_assert(pMainThreadState != nullptr);
3efd9988
FG
66
67 std::list<std::string> failed_modules;
68
81eedcae
TL
69 const std::string module_path = g_conf().get_val<std::string>("mgr_module_path");
70 std::set<std::string> module_names = probe_modules(module_path);
3efd9988 71 // Load python code
11fdf7f2 72 for (const auto& module_name : module_names) {
3efd9988 73 dout(1) << "Loading python module '" << module_name << "'" << dendl;
11fdf7f2
TL
74
75 // Everything starts disabled, set enabled flag on module
76 // when we see first MgrMap
77 auto mod = std::make_shared<PyModule>(module_name);
3efd9988
FG
78 int r = mod->load(pMainThreadState);
79 if (r != 0) {
80 // Don't use handle_pyerror() here; we don't have the GIL
81 // or the right thread state (this is deliberate).
82 derr << "Error loading module '" << module_name << "': "
83 << cpp_strerror(r) << dendl;
84 failed_modules.push_back(module_name);
85 // Don't drop out here, load the other modules
3efd9988 86 }
11fdf7f2
TL
87
88 // Record the module even if the load failed, so that we can
89 // report its loading error
90 modules[module_name] = std::move(mod);
3efd9988 91 }
81eedcae
TL
92 if (module_names.empty()) {
93 clog->error() << "No ceph-mgr modules found in " << module_path;
94 }
3efd9988
FG
95 if (!failed_modules.empty()) {
96 clog->error() << "Failed to load ceph-mgr modules: " << joinify(
97 failed_modules.begin(), failed_modules.end(), std::string(", "));
98 }
3efd9988
FG
99}
100
11fdf7f2 101bool PyModuleRegistry::handle_mgr_map(const MgrMap &mgr_map_)
3efd9988 102{
11fdf7f2
TL
103 std::lock_guard l(lock);
104
105 if (mgr_map.epoch == 0) {
106 mgr_map = mgr_map_;
107
108 // First time we see MgrMap, set the enabled flags on modules
109 // This should always happen before someone calls standby_start
110 // or active_start
111 for (const auto &[module_name, module] : modules) {
112 const bool enabled = (mgr_map.modules.count(module_name) > 0);
113 module->set_enabled(enabled);
114 const bool always_on = (mgr_map.get_always_on_modules().count(module_name) > 0);
115 module->set_always_on(always_on);
3efd9988
FG
116 }
117
11fdf7f2
TL
118 return false;
119 } else {
120 bool modules_changed = mgr_map_.modules != mgr_map.modules ||
121 mgr_map_.always_on_modules != mgr_map.always_on_modules;
122 mgr_map = mgr_map_;
3efd9988 123
11fdf7f2
TL
124 if (standby_modules != nullptr) {
125 standby_modules->handle_mgr_map(mgr_map_);
3efd9988
FG
126 }
127
11fdf7f2 128 return modules_changed;
3efd9988 129 }
11fdf7f2 130}
3efd9988 131
3efd9988 132
3efd9988 133
11fdf7f2 134void PyModuleRegistry::standby_start(MonClient &mc, Finisher &f)
3efd9988 135{
11fdf7f2
TL
136 std::lock_guard l(lock);
137 ceph_assert(active_modules == nullptr);
138 ceph_assert(standby_modules == nullptr);
139
140 // Must have seen a MgrMap by this point, in order to know
141 // which modules should be enabled
142 ceph_assert(mgr_map.epoch > 0);
3efd9988
FG
143
144 dout(4) << "Starting modules in standby mode" << dendl;
145
11fdf7f2
TL
146 standby_modules.reset(new StandbyPyModules(
147 mgr_map, module_config, clog, mc, f));
3efd9988
FG
148
149 std::set<std::string> failed_modules;
150 for (const auto &i : modules) {
11fdf7f2
TL
151 if (!(i.second->is_enabled() && i.second->get_can_run())) {
152 // report always_on modules with a standby mode that won't run
153 if (i.second->is_always_on() && i.second->pStandbyClass) {
3efd9988 154 failed_modules.insert(i.second->get_name());
3efd9988 155 }
11fdf7f2
TL
156 continue;
157 }
158
159 if (i.second->pStandbyClass) {
160 dout(4) << "starting module " << i.second->get_name() << dendl;
161 standby_modules->start_one(i.second);
3efd9988
FG
162 } else {
163 dout(4) << "skipping module '" << i.second->get_name() << "' because "
164 "it does not implement a standby mode" << dendl;
165 }
166 }
167
168 if (!failed_modules.empty()) {
169 clog->error() << "Failed to execute ceph-mgr module(s) in standby mode: "
170 << joinify(failed_modules.begin(), failed_modules.end(),
171 std::string(", "));
172 }
173}
174
175void PyModuleRegistry::active_start(
11fdf7f2
TL
176 DaemonStateIndex &ds, ClusterState &cs,
177 const std::map<std::string, std::string> &kv_store,
178 MonClient &mc, LogChannelRef clog_, LogChannelRef audit_clog_,
179 Objecter &objecter_, Client &client_, Finisher &f,
180 DaemonServer &server)
3efd9988 181{
11fdf7f2 182 std::lock_guard locker(lock);
3efd9988
FG
183
184 dout(4) << "Starting modules in active mode" << dendl;
185
11fdf7f2
TL
186 ceph_assert(active_modules == nullptr);
187
188 // Must have seen a MgrMap by this point, in order to know
189 // which modules should be enabled
190 ceph_assert(mgr_map.epoch > 0);
3efd9988
FG
191
192 if (standby_modules != nullptr) {
193 standby_modules->shutdown();
194 standby_modules.reset();
195 }
196
197 active_modules.reset(new ActivePyModules(
11fdf7f2
TL
198 module_config, kv_store, ds, cs, mc,
199 clog_, audit_clog_, objecter_, client_, f, server,
200 *this));
3efd9988
FG
201
202 for (const auto &i : modules) {
11fdf7f2
TL
203 // Anything we're skipping because of !can_run will be flagged
204 // to the user separately via get_health_checks
205 if (!(i.second->is_enabled() && i.second->is_loaded())) {
206 continue;
3efd9988 207 }
11fdf7f2
TL
208
209 dout(4) << "Starting " << i.first << dendl;
210 active_modules->start_one(i.second);
3efd9988
FG
211 }
212}
213
214void PyModuleRegistry::active_shutdown()
215{
11fdf7f2 216 std::lock_guard locker(lock);
3efd9988
FG
217
218 if (active_modules != nullptr) {
219 active_modules->shutdown();
220 active_modules.reset();
221 }
222}
223
224void PyModuleRegistry::shutdown()
225{
11fdf7f2 226 std::lock_guard locker(lock);
3efd9988
FG
227
228 if (standby_modules != nullptr) {
229 standby_modules->shutdown();
230 standby_modules.reset();
231 }
232
233 // Ideally, now, we'd be able to do this for all modules:
234 //
235 // Py_EndInterpreter(pMyThreadState);
236 // PyThreadState_Swap(pMainThreadState);
237 //
238 // Unfortunately, if the module has any other *python* threads active
239 // at this point, Py_EndInterpreter() will abort with:
240 //
241 // Fatal Python error: Py_EndInterpreter: not the last thread
242 //
243 // This can happen when using CherryPy in a module, becuase CherryPy
244 // runs an extra thread as a timeout monitor, which spends most of its
245 // life inside a time.sleep(60). Unless you are very, very lucky with
246 // the timing calling this destructor, that thread will still be stuck
247 // in a sleep, and Py_EndInterpreter() will abort.
248 //
249 // This could of course also happen with a poorly written module which
250 // made no attempt to clean up any additional threads it created.
251 //
252 // The safest thing to do is just not call Py_EndInterpreter(), and
253 // let Py_Finalize() kill everything after all modules are shut down.
254
255 modules.clear();
256
257 PyEval_RestoreThread(pMainThreadState);
258 Py_Finalize();
259}
260
81eedcae 261std::set<std::string> PyModuleRegistry::probe_modules(const std::string &path) const
3efd9988
FG
262{
263 DIR *dir = opendir(path.c_str());
264 if (!dir) {
11fdf7f2 265 return {};
3efd9988 266 }
11fdf7f2
TL
267
268 std::set<std::string> modules_out;
3efd9988
FG
269 struct dirent *entry = NULL;
270 while ((entry = readdir(dir)) != NULL) {
271 string n(entry->d_name);
272 string fn = path + "/" + n;
273 struct stat st;
274 int r = ::stat(fn.c_str(), &st);
275 if (r == 0 && S_ISDIR(st.st_mode)) {
276 string initfn = fn + "/module.py";
277 r = ::stat(initfn.c_str(), &st);
278 if (r == 0) {
11fdf7f2 279 modules_out.insert(n);
3efd9988
FG
280 }
281 }
282 }
283 closedir(dir);
11fdf7f2
TL
284
285 return modules_out;
286}
287
288int PyModuleRegistry::handle_command(
289 std::string const &module_name,
290 const cmdmap_t &cmdmap,
291 const bufferlist &inbuf,
292 std::stringstream *ds,
293 std::stringstream *ss)
294{
295 if (active_modules) {
296 return active_modules->handle_command(module_name, cmdmap, inbuf, ds, ss);
297 } else {
298 // We do not expect to be called before active modules is up, but
299 // it's straightfoward to handle this case so let's do it.
300 return -EAGAIN;
301 }
302}
303
304std::vector<ModuleCommand> PyModuleRegistry::get_py_commands() const
305{
306 std::lock_guard l(lock);
307
308 std::vector<ModuleCommand> result;
309 for (const auto& i : modules) {
310 i.second->get_commands(&result);
311 }
312
313 return result;
314}
315
316std::vector<MonCommand> PyModuleRegistry::get_commands() const
317{
318 std::vector<ModuleCommand> commands = get_py_commands();
319 std::vector<MonCommand> result;
320 for (auto &pyc: commands) {
321 uint64_t flags = MonCommand::FLAG_MGR;
322 if (pyc.polling) {
323 flags |= MonCommand::FLAG_POLL;
324 }
325 result.push_back({pyc.cmdstring, pyc.helpstring, "mgr",
326 pyc.perm, flags});
327 }
328 return result;
329}
330
331void PyModuleRegistry::get_health_checks(health_check_map_t *checks)
332{
333 std::lock_guard l(lock);
334
335 // Only the active mgr reports module issues
336 if (active_modules) {
337 active_modules->get_health_checks(checks);
338
339 std::map<std::string, std::string> dependency_modules;
340 std::map<std::string, std::string> failed_modules;
341
342 /*
343 * Break up broken modules into two categories:
344 * - can_run=false: the module is working fine but explicitly
345 * telling you that a dependency is missing. Advise the user to
346 * read the message from the module and install what's missing.
347 * - failed=true or loaded=false: something unexpected is broken,
348 * either at runtime (from serve()) or at load time. This indicates
349 * a bug and the user should be guided to inspect the mgr log
350 * to investigate and gather evidence.
351 */
352
353 for (const auto &i : modules) {
354 auto module = i.second;
355 if (module->is_enabled() && !module->get_can_run()) {
356 dependency_modules[module->get_name()] = module->get_error_string();
357 } else if ((module->is_enabled() && !module->is_loaded())
358 || (module->is_failed() && module->get_can_run())) {
359 // - Unloadable modules are only reported if they're enabled,
360 // to avoid spamming users about modules they don't have the
361 // dependencies installed for because they don't use it.
362 // - Failed modules are only reported if they passed the can_run
363 // checks (to avoid outputting two health messages about a
364 // module that said can_run=false but we tried running it anyway)
365 failed_modules[module->get_name()] = module->get_error_string();
366 }
367 }
368
369 // report failed always_on modules as health errors
370 for (const auto& name : mgr_map.get_always_on_modules()) {
371 if (!active_modules->module_exists(name)) {
372 if (failed_modules.find(name) == failed_modules.end() &&
373 dependency_modules.find(name) == dependency_modules.end()) {
374 failed_modules[name] = "Not found or unloadable";
375 }
376 }
377 }
378
379 if (!dependency_modules.empty()) {
380 std::ostringstream ss;
381 if (dependency_modules.size() == 1) {
382 auto iter = dependency_modules.begin();
383 ss << "Module '" << iter->first << "' has failed dependency: "
384 << iter->second;
385 } else if (dependency_modules.size() > 1) {
386 ss << dependency_modules.size()
387 << " mgr modules have failed dependencies";
388 }
389 auto& d = checks->add("MGR_MODULE_DEPENDENCY", HEALTH_WARN, ss.str());
390 for (auto& i : dependency_modules) {
391 std::ostringstream ss;
392 ss << "Module '" << i.first << "' has failed dependency: " << i.second;
393 d.detail.push_back(ss.str());
394 }
395 }
396
397 if (!failed_modules.empty()) {
398 std::ostringstream ss;
399 if (failed_modules.size() == 1) {
400 auto iter = failed_modules.begin();
401 ss << "Module '" << iter->first << "' has failed: " << iter->second;
402 } else if (failed_modules.size() > 1) {
403 ss << failed_modules.size() << " mgr modules have failed";
404 }
405 auto& d = checks->add("MGR_MODULE_ERROR", HEALTH_ERR, ss.str());
406 for (auto& i : failed_modules) {
407 std::ostringstream ss;
408 ss << "Module '" << i.first << "' has failed: " << i.second;
409 d.detail.push_back(ss.str());
410 }
411 }
412 }
413}
414
415void PyModuleRegistry::handle_config(const std::string &k, const std::string &v)
416{
417 std::lock_guard l(module_config.lock);
418
419 if (!v.empty()) {
420 dout(4) << "Loaded module_config entry " << k << ":" << v << dendl;
421 module_config.config[k] = v;
422 } else {
423 module_config.config.erase(k);
424 }
425}
426
427void PyModuleRegistry::handle_config_notify()
428{
429 std::lock_guard l(lock);
430 if (active_modules) {
431 active_modules->config_notify();
432 }
3efd9988
FG
433}
434
11fdf7f2
TL
435void PyModuleRegistry::upgrade_config(
436 MonClient *monc,
437 const std::map<std::string, std::string> &old_config)
3efd9988 438{
11fdf7f2
TL
439 // Only bother doing anything if we didn't already have
440 // some new-style config.
441 if (module_config.config.empty()) {
442 dout(1) << "Upgrading module configuration for Mimic" << dendl;
443 // Upgrade luminous->mimic: migrate config-key configuration
444 // into main configuration store
445 for (auto &i : old_config) {
446 auto last_slash = i.first.rfind('/');
447 const std::string module_name = i.first.substr(4, i.first.substr(4).find('/'));
448 const std::string key = i.first.substr(last_slash + 1);
449
450 const auto &value = i.second;
451
452 // Heuristic to skip things that look more like stores
453 // than configs.
454 bool is_config = true;
455 for (const auto &c : value) {
456 if (c == '\n' || c == '\r' || c < 0x20) {
457 is_config = false;
458 break;
459 }
460 }
461
462 if (value.size() > 256) {
463 is_config = false;
464 }
465
466 if (!is_config) {
467 dout(1) << "Not migrating config module:key "
468 << module_name << " : " << key << dendl;
469 continue;
470 }
471
472 // Check that the named module exists
473 auto module_iter = modules.find(module_name);
474 if (module_iter == modules.end()) {
475 dout(1) << "KV store contains data for unknown module '"
476 << module_name << "'" << dendl;
477 continue;
478 }
479 PyModuleRef module = module_iter->second;
480
481 // Parse option name out of key
482 std::string option_name;
483 auto slash_loc = key.find("/");
484 if (slash_loc != std::string::npos) {
485 if (key.size() > slash_loc + 1) {
486 // Localized option
487 option_name = key.substr(slash_loc + 1);
488 } else {
489 // Trailing slash: garbage.
490 derr << "Invalid mgr store key: '" << key << "'" << dendl;
491 continue;
492 }
493 } else {
494 option_name = key;
495 }
496
497 // Consult module schema to see if this is really
498 // a configuration value
499 if (!option_name.empty() && module->is_option(option_name)) {
500 module_config.set_config(monc, module_name, key, i.second);
501 dout(4) << "Rewrote configuration module:key "
502 << module_name << ":" << key << dendl;
503 } else {
504 dout(4) << "Leaving store module:key " << module_name
505 << ":" << key << " in store, not config" << dendl;
506 }
507 }
508 } else {
509 dout(10) << "Module configuration contains "
510 << module_config.config.size() << " keys" << dendl;
511 }
3efd9988
FG
512}
513