1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
4 * Ceph - scalable distributed file system
6 * Copyright (C) 2017 John Spray <john.spray@redhat.com>
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
15 #include "include/stringify.h"
16 #include "common/errno.h"
18 #include "BaseMgrModule.h"
20 #include "BaseMgrStandbyModule.h"
22 #include "MgrContext.h"
23 #include "mgr/mgr_commands.h"
25 #include "ActivePyModules.h"
27 #include "PyModuleRegistry.h"
29 #define dout_context g_ceph_context
30 #define dout_subsys ceph_subsys_mgr
33 #define dout_prefix *_dout << "mgr[py] "
37 void PyModuleRegistry::init()
39 std::lock_guard
locker(lock
);
41 // Set up global python interpreter
42 #if PY_MAJOR_VERSION >= 3
43 #define WCHAR(s) L ## #s
44 Py_SetProgramName(const_cast<wchar_t*>(WCHAR(MGR_PYTHON_EXECUTABLE
)));
47 Py_SetProgramName(const_cast<char*>(MGR_PYTHON_EXECUTABLE
));
50 if (g_conf().get_val
<bool>("daemonize")) {
51 PyImport_AppendInittab("ceph_logger", PyModule::init_ceph_logger
);
53 PyImport_AppendInittab("ceph_module", PyModule::init_ceph_module
);
56 // Let CPython know that we will be calling it back from other
58 if (! PyEval_ThreadsInitialized()) {
62 // Drop the GIL and remember the main thread state (current
63 // thread state becomes NULL)
64 pMainThreadState
= PyEval_SaveThread();
65 ceph_assert(pMainThreadState
!= nullptr);
67 std::list
<std::string
> failed_modules
;
69 const std::string module_path
= g_conf().get_val
<std::string
>("mgr_module_path");
70 std::set
<std::string
> module_names
= probe_modules(module_path
);
72 for (const auto& module_name
: module_names
) {
73 dout(1) << "Loading python module '" << module_name
<< "'" << dendl
;
75 // Everything starts disabled, set enabled flag on module
76 // when we see first MgrMap
77 auto mod
= std::make_shared
<PyModule
>(module_name
);
78 int r
= mod
->load(pMainThreadState
);
80 // Don't use handle_pyerror() here; we don't have the GIL
81 // or the right thread state (this is deliberate).
82 derr
<< "Error loading module '" << module_name
<< "': "
83 << cpp_strerror(r
) << dendl
;
84 failed_modules
.push_back(module_name
);
85 // Don't drop out here, load the other modules
88 // Record the module even if the load failed, so that we can
89 // report its loading error
90 modules
[module_name
] = std::move(mod
);
92 if (module_names
.empty()) {
93 clog
->error() << "No ceph-mgr modules found in " << module_path
;
95 if (!failed_modules
.empty()) {
96 clog
->error() << "Failed to load ceph-mgr modules: " << joinify(
97 failed_modules
.begin(), failed_modules
.end(), std::string(", "));
101 bool PyModuleRegistry::handle_mgr_map(const MgrMap
&mgr_map_
)
103 std::lock_guard
l(lock
);
105 if (mgr_map
.epoch
== 0) {
108 // First time we see MgrMap, set the enabled flags on modules
109 // This should always happen before someone calls standby_start
111 for (const auto &[module_name
, module
] : modules
) {
112 const bool enabled
= (mgr_map
.modules
.count(module_name
) > 0);
113 module
->set_enabled(enabled
);
114 const bool always_on
= (mgr_map
.get_always_on_modules().count(module_name
) > 0);
115 module
->set_always_on(always_on
);
120 bool modules_changed
= mgr_map_
.modules
!= mgr_map
.modules
||
121 mgr_map_
.always_on_modules
!= mgr_map
.always_on_modules
;
124 if (standby_modules
!= nullptr) {
125 standby_modules
->handle_mgr_map(mgr_map_
);
128 return modules_changed
;
134 void PyModuleRegistry::standby_start(MonClient
&mc
, Finisher
&f
)
136 std::lock_guard
l(lock
);
137 ceph_assert(active_modules
== nullptr);
138 ceph_assert(standby_modules
== nullptr);
140 // Must have seen a MgrMap by this point, in order to know
141 // which modules should be enabled
142 ceph_assert(mgr_map
.epoch
> 0);
144 dout(4) << "Starting modules in standby mode" << dendl
;
146 standby_modules
.reset(new StandbyPyModules(
147 mgr_map
, module_config
, clog
, mc
, f
));
149 std::set
<std::string
> failed_modules
;
150 for (const auto &i
: modules
) {
151 if (!(i
.second
->is_enabled() && i
.second
->get_can_run())) {
152 // report always_on modules with a standby mode that won't run
153 if (i
.second
->is_always_on() && i
.second
->pStandbyClass
) {
154 failed_modules
.insert(i
.second
->get_name());
159 if (i
.second
->pStandbyClass
) {
160 dout(4) << "starting module " << i
.second
->get_name() << dendl
;
161 standby_modules
->start_one(i
.second
);
163 dout(4) << "skipping module '" << i
.second
->get_name() << "' because "
164 "it does not implement a standby mode" << dendl
;
168 if (!failed_modules
.empty()) {
169 clog
->error() << "Failed to execute ceph-mgr module(s) in standby mode: "
170 << joinify(failed_modules
.begin(), failed_modules
.end(),
175 void PyModuleRegistry::active_start(
176 DaemonStateIndex
&ds
, ClusterState
&cs
,
177 const std::map
<std::string
, std::string
> &kv_store
,
178 MonClient
&mc
, LogChannelRef clog_
, LogChannelRef audit_clog_
,
179 Objecter
&objecter_
, Client
&client_
, Finisher
&f
,
180 DaemonServer
&server
)
182 std::lock_guard
locker(lock
);
184 dout(4) << "Starting modules in active mode" << dendl
;
186 ceph_assert(active_modules
== nullptr);
188 // Must have seen a MgrMap by this point, in order to know
189 // which modules should be enabled
190 ceph_assert(mgr_map
.epoch
> 0);
192 if (standby_modules
!= nullptr) {
193 standby_modules
->shutdown();
194 standby_modules
.reset();
197 active_modules
.reset(new ActivePyModules(
198 module_config
, kv_store
, ds
, cs
, mc
,
199 clog_
, audit_clog_
, objecter_
, client_
, f
, server
,
202 for (const auto &i
: modules
) {
203 // Anything we're skipping because of !can_run will be flagged
204 // to the user separately via get_health_checks
205 if (!(i
.second
->is_enabled() && i
.second
->is_loaded())) {
209 dout(4) << "Starting " << i
.first
<< dendl
;
210 active_modules
->start_one(i
.second
);
214 void PyModuleRegistry::active_shutdown()
216 std::lock_guard
locker(lock
);
218 if (active_modules
!= nullptr) {
219 active_modules
->shutdown();
220 active_modules
.reset();
224 void PyModuleRegistry::shutdown()
226 std::lock_guard
locker(lock
);
228 if (standby_modules
!= nullptr) {
229 standby_modules
->shutdown();
230 standby_modules
.reset();
233 // Ideally, now, we'd be able to do this for all modules:
235 // Py_EndInterpreter(pMyThreadState);
236 // PyThreadState_Swap(pMainThreadState);
238 // Unfortunately, if the module has any other *python* threads active
239 // at this point, Py_EndInterpreter() will abort with:
241 // Fatal Python error: Py_EndInterpreter: not the last thread
243 // This can happen when using CherryPy in a module, becuase CherryPy
244 // runs an extra thread as a timeout monitor, which spends most of its
245 // life inside a time.sleep(60). Unless you are very, very lucky with
246 // the timing calling this destructor, that thread will still be stuck
247 // in a sleep, and Py_EndInterpreter() will abort.
249 // This could of course also happen with a poorly written module which
250 // made no attempt to clean up any additional threads it created.
252 // The safest thing to do is just not call Py_EndInterpreter(), and
253 // let Py_Finalize() kill everything after all modules are shut down.
257 PyEval_RestoreThread(pMainThreadState
);
261 std::set
<std::string
> PyModuleRegistry::probe_modules(const std::string
&path
) const
263 DIR *dir
= opendir(path
.c_str());
268 std::set
<std::string
> modules_out
;
269 struct dirent
*entry
= NULL
;
270 while ((entry
= readdir(dir
)) != NULL
) {
271 string
n(entry
->d_name
);
272 string fn
= path
+ "/" + n
;
274 int r
= ::stat(fn
.c_str(), &st
);
275 if (r
== 0 && S_ISDIR(st
.st_mode
)) {
276 string initfn
= fn
+ "/module.py";
277 r
= ::stat(initfn
.c_str(), &st
);
279 modules_out
.insert(n
);
288 int PyModuleRegistry::handle_command(
289 const ModuleCommand
& module_command
,
290 const MgrSession
& session
,
291 const cmdmap_t
&cmdmap
,
292 const bufferlist
&inbuf
,
293 std::stringstream
*ds
,
294 std::stringstream
*ss
)
296 if (active_modules
) {
297 return active_modules
->handle_command(module_command
, session
, cmdmap
,
300 // We do not expect to be called before active modules is up, but
301 // it's straightfoward to handle this case so let's do it.
306 std::vector
<ModuleCommand
> PyModuleRegistry::get_py_commands() const
308 std::lock_guard
l(lock
);
310 std::vector
<ModuleCommand
> result
;
311 for (const auto& i
: modules
) {
312 i
.second
->get_commands(&result
);
318 std::vector
<MonCommand
> PyModuleRegistry::get_commands() const
320 std::vector
<ModuleCommand
> commands
= get_py_commands();
321 std::vector
<MonCommand
> result
;
322 for (auto &pyc
: commands
) {
323 uint64_t flags
= MonCommand::FLAG_MGR
;
325 flags
|= MonCommand::FLAG_POLL
;
327 result
.push_back({pyc
.cmdstring
, pyc
.helpstring
, "mgr",
333 void PyModuleRegistry::get_health_checks(health_check_map_t
*checks
)
335 std::lock_guard
l(lock
);
337 // Only the active mgr reports module issues
338 if (active_modules
) {
339 active_modules
->get_health_checks(checks
);
341 std::map
<std::string
, std::string
> dependency_modules
;
342 std::map
<std::string
, std::string
> failed_modules
;
345 * Break up broken modules into two categories:
346 * - can_run=false: the module is working fine but explicitly
347 * telling you that a dependency is missing. Advise the user to
348 * read the message from the module and install what's missing.
349 * - failed=true or loaded=false: something unexpected is broken,
350 * either at runtime (from serve()) or at load time. This indicates
351 * a bug and the user should be guided to inspect the mgr log
352 * to investigate and gather evidence.
355 for (const auto &i
: modules
) {
356 auto module
= i
.second
;
357 if (module
->is_enabled() && !module
->get_can_run()) {
358 dependency_modules
[module
->get_name()] = module
->get_error_string();
359 } else if ((module
->is_enabled() && !module
->is_loaded())
360 || (module
->is_failed() && module
->get_can_run())) {
361 // - Unloadable modules are only reported if they're enabled,
362 // to avoid spamming users about modules they don't have the
363 // dependencies installed for because they don't use it.
364 // - Failed modules are only reported if they passed the can_run
365 // checks (to avoid outputting two health messages about a
366 // module that said can_run=false but we tried running it anyway)
367 failed_modules
[module
->get_name()] = module
->get_error_string();
371 // report failed always_on modules as health errors
372 for (const auto& name
: mgr_map
.get_always_on_modules()) {
373 if (active_modules
->is_pending(name
)) {
376 if (!active_modules
->module_exists(name
)) {
377 if (failed_modules
.find(name
) == failed_modules
.end() &&
378 dependency_modules
.find(name
) == dependency_modules
.end()) {
379 failed_modules
[name
] = "Not found or unloadable";
384 if (!dependency_modules
.empty()) {
385 std::ostringstream ss
;
386 if (dependency_modules
.size() == 1) {
387 auto iter
= dependency_modules
.begin();
388 ss
<< "Module '" << iter
->first
<< "' has failed dependency: "
390 } else if (dependency_modules
.size() > 1) {
391 ss
<< dependency_modules
.size()
392 << " mgr modules have failed dependencies";
394 auto& d
= checks
->add("MGR_MODULE_DEPENDENCY", HEALTH_WARN
, ss
.str(),
395 dependency_modules
.size());
396 for (auto& i
: dependency_modules
) {
397 std::ostringstream ss
;
398 ss
<< "Module '" << i
.first
<< "' has failed dependency: " << i
.second
;
399 d
.detail
.push_back(ss
.str());
403 if (!failed_modules
.empty()) {
404 std::ostringstream ss
;
405 if (failed_modules
.size() == 1) {
406 auto iter
= failed_modules
.begin();
407 ss
<< "Module '" << iter
->first
<< "' has failed: " << iter
->second
;
408 } else if (failed_modules
.size() > 1) {
409 ss
<< failed_modules
.size() << " mgr modules have failed";
411 auto& d
= checks
->add("MGR_MODULE_ERROR", HEALTH_ERR
, ss
.str(),
412 failed_modules
.size());
413 for (auto& i
: failed_modules
) {
414 std::ostringstream ss
;
415 ss
<< "Module '" << i
.first
<< "' has failed: " << i
.second
;
416 d
.detail
.push_back(ss
.str());
422 void PyModuleRegistry::handle_config(const std::string
&k
, const std::string
&v
)
424 std::lock_guard
l(module_config
.lock
);
427 dout(10) << "Loaded module_config entry " << k
<< ":" << v
<< dendl
;
428 module_config
.config
[k
] = v
;
430 module_config
.config
.erase(k
);
434 void PyModuleRegistry::handle_config_notify()
436 std::lock_guard
l(lock
);
437 if (active_modules
) {
438 active_modules
->config_notify();
442 void PyModuleRegistry::upgrade_config(
444 const std::map
<std::string
, std::string
> &old_config
)
446 // Only bother doing anything if we didn't already have
447 // some new-style config.
448 if (module_config
.config
.empty()) {
449 dout(1) << "Upgrading module configuration for Mimic" << dendl
;
450 // Upgrade luminous->mimic: migrate config-key configuration
451 // into main configuration store
452 for (auto &i
: old_config
) {
453 auto last_slash
= i
.first
.rfind('/');
454 const std::string module_name
= i
.first
.substr(4, i
.first
.substr(4).find('/'));
455 const std::string key
= i
.first
.substr(last_slash
+ 1);
457 const auto &value
= i
.second
;
459 // Heuristic to skip things that look more like stores
461 bool is_config
= true;
462 for (const auto &c
: value
) {
463 if (c
== '\n' || c
== '\r' || c
< 0x20) {
469 if (value
.size() > 256) {
474 dout(1) << "Not migrating config module:key "
475 << module_name
<< " : " << key
<< dendl
;
479 // Check that the named module exists
480 auto module_iter
= modules
.find(module_name
);
481 if (module_iter
== modules
.end()) {
482 dout(1) << "KV store contains data for unknown module '"
483 << module_name
<< "'" << dendl
;
486 PyModuleRef module
= module_iter
->second
;
488 // Parse option name out of key
489 std::string option_name
;
490 auto slash_loc
= key
.find("/");
491 if (slash_loc
!= std::string::npos
) {
492 if (key
.size() > slash_loc
+ 1) {
494 option_name
= key
.substr(slash_loc
+ 1);
496 // Trailing slash: garbage.
497 derr
<< "Invalid mgr store key: '" << key
<< "'" << dendl
;
504 // Consult module schema to see if this is really
505 // a configuration value
506 if (!option_name
.empty() && module
->is_option(option_name
)) {
507 module_config
.set_config(monc
, module_name
, key
, i
.second
);
508 dout(4) << "Rewrote configuration module:key "
509 << module_name
<< ":" << key
<< dendl
;
511 dout(4) << "Leaving store module:key " << module_name
512 << ":" << key
<< " in store, not config" << dendl
;
516 dout(10) << "Module configuration contains "
517 << module_config
.config
.size() << " keys" << dendl
;