]> git.proxmox.com Git - ceph.git/blob - ceph/src/mgr/PyModuleRegistry.cc
bump version to 15.2.4-pve1
[ceph.git] / ceph / src / mgr / PyModuleRegistry.cc
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3 /*
4 * Ceph - scalable distributed file system
5 *
6 * Copyright (C) 2017 John Spray <john.spray@redhat.com>
7 *
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
12 */
13
14
15 #include "include/stringify.h"
16 #include "common/errno.h"
17
18 #include "BaseMgrModule.h"
19 #include "PyOSDMap.h"
20 #include "BaseMgrStandbyModule.h"
21 #include "Gil.h"
22 #include "MgrContext.h"
23 #include "mgr/mgr_commands.h"
24
25 #include "ActivePyModules.h"
26
27 #include "PyModuleRegistry.h"
28
29 #define dout_context g_ceph_context
30 #define dout_subsys ceph_subsys_mgr
31
32 #undef dout_prefix
33 #define dout_prefix *_dout << "mgr[py] "
34
35
36
37 void PyModuleRegistry::init()
38 {
39 std::lock_guard locker(lock);
40
41 // Set up global python interpreter
42 #if PY_MAJOR_VERSION >= 3
43 #define WCHAR(s) L ## #s
44 Py_SetProgramName(const_cast<wchar_t*>(WCHAR(MGR_PYTHON_EXECUTABLE)));
45 #undef WCHAR
46 #else
47 Py_SetProgramName(const_cast<char*>(MGR_PYTHON_EXECUTABLE));
48 #endif
49 // Add more modules
50 if (g_conf().get_val<bool>("daemonize")) {
51 PyImport_AppendInittab("ceph_logger", PyModule::init_ceph_logger);
52 }
53 PyImport_AppendInittab("ceph_module", PyModule::init_ceph_module);
54 Py_InitializeEx(0);
55
56 // Let CPython know that we will be calling it back from other
57 // threads in future.
58 if (! PyEval_ThreadsInitialized()) {
59 PyEval_InitThreads();
60 }
61
62 // Drop the GIL and remember the main thread state (current
63 // thread state becomes NULL)
64 pMainThreadState = PyEval_SaveThread();
65 ceph_assert(pMainThreadState != nullptr);
66
67 std::list<std::string> failed_modules;
68
69 const std::string module_path = g_conf().get_val<std::string>("mgr_module_path");
70 std::set<std::string> module_names = probe_modules(module_path);
71 // Load python code
72 for (const auto& module_name : module_names) {
73 dout(1) << "Loading python module '" << module_name << "'" << dendl;
74
75 // Everything starts disabled, set enabled flag on module
76 // when we see first MgrMap
77 auto mod = std::make_shared<PyModule>(module_name);
78 int r = mod->load(pMainThreadState);
79 if (r != 0) {
80 // Don't use handle_pyerror() here; we don't have the GIL
81 // or the right thread state (this is deliberate).
82 derr << "Error loading module '" << module_name << "': "
83 << cpp_strerror(r) << dendl;
84 failed_modules.push_back(module_name);
85 // Don't drop out here, load the other modules
86 }
87
88 // Record the module even if the load failed, so that we can
89 // report its loading error
90 modules[module_name] = std::move(mod);
91 }
92 if (module_names.empty()) {
93 clog->error() << "No ceph-mgr modules found in " << module_path;
94 }
95 if (!failed_modules.empty()) {
96 clog->error() << "Failed to load ceph-mgr modules: " << joinify(
97 failed_modules.begin(), failed_modules.end(), std::string(", "));
98 }
99 }
100
101 bool PyModuleRegistry::handle_mgr_map(const MgrMap &mgr_map_)
102 {
103 std::lock_guard l(lock);
104
105 if (mgr_map.epoch == 0) {
106 mgr_map = mgr_map_;
107
108 // First time we see MgrMap, set the enabled flags on modules
109 // This should always happen before someone calls standby_start
110 // or active_start
111 for (const auto &[module_name, module] : modules) {
112 const bool enabled = (mgr_map.modules.count(module_name) > 0);
113 module->set_enabled(enabled);
114 const bool always_on = (mgr_map.get_always_on_modules().count(module_name) > 0);
115 module->set_always_on(always_on);
116 }
117
118 return false;
119 } else {
120 bool modules_changed = mgr_map_.modules != mgr_map.modules ||
121 mgr_map_.always_on_modules != mgr_map.always_on_modules;
122 mgr_map = mgr_map_;
123
124 if (standby_modules != nullptr) {
125 standby_modules->handle_mgr_map(mgr_map_);
126 }
127
128 return modules_changed;
129 }
130 }
131
132
133
134 void PyModuleRegistry::standby_start(MonClient &mc, Finisher &f)
135 {
136 std::lock_guard l(lock);
137 ceph_assert(active_modules == nullptr);
138 ceph_assert(standby_modules == nullptr);
139
140 // Must have seen a MgrMap by this point, in order to know
141 // which modules should be enabled
142 ceph_assert(mgr_map.epoch > 0);
143
144 dout(4) << "Starting modules in standby mode" << dendl;
145
146 standby_modules.reset(new StandbyPyModules(
147 mgr_map, module_config, clog, mc, f));
148
149 std::set<std::string> failed_modules;
150 for (const auto &i : modules) {
151 if (!(i.second->is_enabled() && i.second->get_can_run())) {
152 // report always_on modules with a standby mode that won't run
153 if (i.second->is_always_on() && i.second->pStandbyClass) {
154 failed_modules.insert(i.second->get_name());
155 }
156 continue;
157 }
158
159 if (i.second->pStandbyClass) {
160 dout(4) << "starting module " << i.second->get_name() << dendl;
161 standby_modules->start_one(i.second);
162 } else {
163 dout(4) << "skipping module '" << i.second->get_name() << "' because "
164 "it does not implement a standby mode" << dendl;
165 }
166 }
167
168 if (!failed_modules.empty()) {
169 clog->error() << "Failed to execute ceph-mgr module(s) in standby mode: "
170 << joinify(failed_modules.begin(), failed_modules.end(),
171 std::string(", "));
172 }
173 }
174
175 void PyModuleRegistry::active_start(
176 DaemonStateIndex &ds, ClusterState &cs,
177 const std::map<std::string, std::string> &kv_store,
178 MonClient &mc, LogChannelRef clog_, LogChannelRef audit_clog_,
179 Objecter &objecter_, Client &client_, Finisher &f,
180 DaemonServer &server)
181 {
182 std::lock_guard locker(lock);
183
184 dout(4) << "Starting modules in active mode" << dendl;
185
186 ceph_assert(active_modules == nullptr);
187
188 // Must have seen a MgrMap by this point, in order to know
189 // which modules should be enabled
190 ceph_assert(mgr_map.epoch > 0);
191
192 if (standby_modules != nullptr) {
193 standby_modules->shutdown();
194 standby_modules.reset();
195 }
196
197 active_modules.reset(new ActivePyModules(
198 module_config, kv_store, ds, cs, mc,
199 clog_, audit_clog_, objecter_, client_, f, server,
200 *this));
201
202 for (const auto &i : modules) {
203 // Anything we're skipping because of !can_run will be flagged
204 // to the user separately via get_health_checks
205 if (!(i.second->is_enabled() && i.second->is_loaded())) {
206 continue;
207 }
208
209 dout(4) << "Starting " << i.first << dendl;
210 active_modules->start_one(i.second);
211 }
212 }
213
214 void PyModuleRegistry::active_shutdown()
215 {
216 std::lock_guard locker(lock);
217
218 if (active_modules != nullptr) {
219 active_modules->shutdown();
220 active_modules.reset();
221 }
222 }
223
224 void PyModuleRegistry::shutdown()
225 {
226 std::lock_guard locker(lock);
227
228 if (standby_modules != nullptr) {
229 standby_modules->shutdown();
230 standby_modules.reset();
231 }
232
233 // Ideally, now, we'd be able to do this for all modules:
234 //
235 // Py_EndInterpreter(pMyThreadState);
236 // PyThreadState_Swap(pMainThreadState);
237 //
238 // Unfortunately, if the module has any other *python* threads active
239 // at this point, Py_EndInterpreter() will abort with:
240 //
241 // Fatal Python error: Py_EndInterpreter: not the last thread
242 //
243 // This can happen when using CherryPy in a module, becuase CherryPy
244 // runs an extra thread as a timeout monitor, which spends most of its
245 // life inside a time.sleep(60). Unless you are very, very lucky with
246 // the timing calling this destructor, that thread will still be stuck
247 // in a sleep, and Py_EndInterpreter() will abort.
248 //
249 // This could of course also happen with a poorly written module which
250 // made no attempt to clean up any additional threads it created.
251 //
252 // The safest thing to do is just not call Py_EndInterpreter(), and
253 // let Py_Finalize() kill everything after all modules are shut down.
254
255 modules.clear();
256
257 PyEval_RestoreThread(pMainThreadState);
258 Py_Finalize();
259 }
260
261 std::set<std::string> PyModuleRegistry::probe_modules(const std::string &path) const
262 {
263 DIR *dir = opendir(path.c_str());
264 if (!dir) {
265 return {};
266 }
267
268 std::set<std::string> modules_out;
269 struct dirent *entry = NULL;
270 while ((entry = readdir(dir)) != NULL) {
271 string n(entry->d_name);
272 string fn = path + "/" + n;
273 struct stat st;
274 int r = ::stat(fn.c_str(), &st);
275 if (r == 0 && S_ISDIR(st.st_mode)) {
276 string initfn = fn + "/module.py";
277 r = ::stat(initfn.c_str(), &st);
278 if (r == 0) {
279 modules_out.insert(n);
280 }
281 }
282 }
283 closedir(dir);
284
285 return modules_out;
286 }
287
288 int PyModuleRegistry::handle_command(
289 const ModuleCommand& module_command,
290 const MgrSession& session,
291 const cmdmap_t &cmdmap,
292 const bufferlist &inbuf,
293 std::stringstream *ds,
294 std::stringstream *ss)
295 {
296 if (active_modules) {
297 return active_modules->handle_command(module_command, session, cmdmap,
298 inbuf, ds, ss);
299 } else {
300 // We do not expect to be called before active modules is up, but
301 // it's straightfoward to handle this case so let's do it.
302 return -EAGAIN;
303 }
304 }
305
306 std::vector<ModuleCommand> PyModuleRegistry::get_py_commands() const
307 {
308 std::lock_guard l(lock);
309
310 std::vector<ModuleCommand> result;
311 for (const auto& i : modules) {
312 i.second->get_commands(&result);
313 }
314
315 return result;
316 }
317
318 std::vector<MonCommand> PyModuleRegistry::get_commands() const
319 {
320 std::vector<ModuleCommand> commands = get_py_commands();
321 std::vector<MonCommand> result;
322 for (auto &pyc: commands) {
323 uint64_t flags = MonCommand::FLAG_MGR;
324 if (pyc.polling) {
325 flags |= MonCommand::FLAG_POLL;
326 }
327 result.push_back({pyc.cmdstring, pyc.helpstring, "mgr",
328 pyc.perm, flags});
329 }
330 return result;
331 }
332
333 void PyModuleRegistry::get_health_checks(health_check_map_t *checks)
334 {
335 std::lock_guard l(lock);
336
337 // Only the active mgr reports module issues
338 if (active_modules) {
339 active_modules->get_health_checks(checks);
340
341 std::map<std::string, std::string> dependency_modules;
342 std::map<std::string, std::string> failed_modules;
343
344 /*
345 * Break up broken modules into two categories:
346 * - can_run=false: the module is working fine but explicitly
347 * telling you that a dependency is missing. Advise the user to
348 * read the message from the module and install what's missing.
349 * - failed=true or loaded=false: something unexpected is broken,
350 * either at runtime (from serve()) or at load time. This indicates
351 * a bug and the user should be guided to inspect the mgr log
352 * to investigate and gather evidence.
353 */
354
355 for (const auto &i : modules) {
356 auto module = i.second;
357 if (module->is_enabled() && !module->get_can_run()) {
358 dependency_modules[module->get_name()] = module->get_error_string();
359 } else if ((module->is_enabled() && !module->is_loaded())
360 || (module->is_failed() && module->get_can_run())) {
361 // - Unloadable modules are only reported if they're enabled,
362 // to avoid spamming users about modules they don't have the
363 // dependencies installed for because they don't use it.
364 // - Failed modules are only reported if they passed the can_run
365 // checks (to avoid outputting two health messages about a
366 // module that said can_run=false but we tried running it anyway)
367 failed_modules[module->get_name()] = module->get_error_string();
368 }
369 }
370
371 // report failed always_on modules as health errors
372 for (const auto& name : mgr_map.get_always_on_modules()) {
373 if (!active_modules->module_exists(name)) {
374 if (failed_modules.find(name) == failed_modules.end() &&
375 dependency_modules.find(name) == dependency_modules.end()) {
376 failed_modules[name] = "Not found or unloadable";
377 }
378 }
379 }
380
381 if (!dependency_modules.empty()) {
382 std::ostringstream ss;
383 if (dependency_modules.size() == 1) {
384 auto iter = dependency_modules.begin();
385 ss << "Module '" << iter->first << "' has failed dependency: "
386 << iter->second;
387 } else if (dependency_modules.size() > 1) {
388 ss << dependency_modules.size()
389 << " mgr modules have failed dependencies";
390 }
391 auto& d = checks->add("MGR_MODULE_DEPENDENCY", HEALTH_WARN, ss.str(),
392 dependency_modules.size());
393 for (auto& i : dependency_modules) {
394 std::ostringstream ss;
395 ss << "Module '" << i.first << "' has failed dependency: " << i.second;
396 d.detail.push_back(ss.str());
397 }
398 }
399
400 if (!failed_modules.empty()) {
401 std::ostringstream ss;
402 if (failed_modules.size() == 1) {
403 auto iter = failed_modules.begin();
404 ss << "Module '" << iter->first << "' has failed: " << iter->second;
405 } else if (failed_modules.size() > 1) {
406 ss << failed_modules.size() << " mgr modules have failed";
407 }
408 auto& d = checks->add("MGR_MODULE_ERROR", HEALTH_ERR, ss.str(),
409 failed_modules.size());
410 for (auto& i : failed_modules) {
411 std::ostringstream ss;
412 ss << "Module '" << i.first << "' has failed: " << i.second;
413 d.detail.push_back(ss.str());
414 }
415 }
416 }
417 }
418
419 void PyModuleRegistry::handle_config(const std::string &k, const std::string &v)
420 {
421 std::lock_guard l(module_config.lock);
422
423 if (!v.empty()) {
424 dout(10) << "Loaded module_config entry " << k << ":" << v << dendl;
425 module_config.config[k] = v;
426 } else {
427 module_config.config.erase(k);
428 }
429 }
430
431 void PyModuleRegistry::handle_config_notify()
432 {
433 std::lock_guard l(lock);
434 if (active_modules) {
435 active_modules->config_notify();
436 }
437 }
438
439 void PyModuleRegistry::upgrade_config(
440 MonClient *monc,
441 const std::map<std::string, std::string> &old_config)
442 {
443 // Only bother doing anything if we didn't already have
444 // some new-style config.
445 if (module_config.config.empty()) {
446 dout(1) << "Upgrading module configuration for Mimic" << dendl;
447 // Upgrade luminous->mimic: migrate config-key configuration
448 // into main configuration store
449 for (auto &i : old_config) {
450 auto last_slash = i.first.rfind('/');
451 const std::string module_name = i.first.substr(4, i.first.substr(4).find('/'));
452 const std::string key = i.first.substr(last_slash + 1);
453
454 const auto &value = i.second;
455
456 // Heuristic to skip things that look more like stores
457 // than configs.
458 bool is_config = true;
459 for (const auto &c : value) {
460 if (c == '\n' || c == '\r' || c < 0x20) {
461 is_config = false;
462 break;
463 }
464 }
465
466 if (value.size() > 256) {
467 is_config = false;
468 }
469
470 if (!is_config) {
471 dout(1) << "Not migrating config module:key "
472 << module_name << " : " << key << dendl;
473 continue;
474 }
475
476 // Check that the named module exists
477 auto module_iter = modules.find(module_name);
478 if (module_iter == modules.end()) {
479 dout(1) << "KV store contains data for unknown module '"
480 << module_name << "'" << dendl;
481 continue;
482 }
483 PyModuleRef module = module_iter->second;
484
485 // Parse option name out of key
486 std::string option_name;
487 auto slash_loc = key.find("/");
488 if (slash_loc != std::string::npos) {
489 if (key.size() > slash_loc + 1) {
490 // Localized option
491 option_name = key.substr(slash_loc + 1);
492 } else {
493 // Trailing slash: garbage.
494 derr << "Invalid mgr store key: '" << key << "'" << dendl;
495 continue;
496 }
497 } else {
498 option_name = key;
499 }
500
501 // Consult module schema to see if this is really
502 // a configuration value
503 if (!option_name.empty() && module->is_option(option_name)) {
504 module_config.set_config(monc, module_name, key, i.second);
505 dout(4) << "Rewrote configuration module:key "
506 << module_name << ":" << key << dendl;
507 } else {
508 dout(4) << "Leaving store module:key " << module_name
509 << ":" << key << " in store, not config" << dendl;
510 }
511 }
512 } else {
513 dout(10) << "Module configuration contains "
514 << module_config.config.size() << " keys" << dendl;
515 }
516 }
517