]>
Commit | Line | Data |
---|---|---|
3efd9988 FG |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
2 | // vim: ts=8 sw=2 smarttab | |
3 | /* | |
4 | * Ceph - scalable distributed file system | |
5 | * | |
6 | * Copyright (C) 2017 John Spray <john.spray@redhat.com> | |
7 | * | |
8 | * This is free software; you can redistribute it and/or | |
9 | * modify it under the terms of the GNU Lesser General Public | |
10 | * License version 2.1, as published by the Free Software | |
11 | * Foundation. See file COPYING. | |
12 | */ | |
13 | ||
14 | ||
15 | #include "include/stringify.h" | |
16 | #include "common/errno.h" | |
3efd9988 FG |
17 | |
18 | #include "BaseMgrModule.h" | |
19 | #include "PyOSDMap.h" | |
20 | #include "BaseMgrStandbyModule.h" | |
21 | #include "Gil.h" | |
11fdf7f2 TL |
22 | #include "MgrContext.h" |
23 | #include "mgr/mgr_commands.h" | |
3efd9988 FG |
24 | |
25 | #include "ActivePyModules.h" | |
26 | ||
27 | #include "PyModuleRegistry.h" | |
28 | ||
3efd9988 FG |
29 | #define dout_context g_ceph_context |
30 | #define dout_subsys ceph_subsys_mgr | |
31 | ||
32 | #undef dout_prefix | |
33 | #define dout_prefix *_dout << "mgr[py] " | |
34 | ||
3efd9988 FG |
35 | |
36 | ||
11fdf7f2 | 37 | void PyModuleRegistry::init() |
3efd9988 | 38 | { |
11fdf7f2 | 39 | std::lock_guard locker(lock); |
3efd9988 FG |
40 | |
41 | // Set up global python interpreter | |
11fdf7f2 TL |
42 | #if PY_MAJOR_VERSION >= 3 |
43 | #define WCHAR(s) L ## #s | |
eafe8130 | 44 | Py_SetProgramName(const_cast<wchar_t*>(WCHAR(MGR_PYTHON_EXECUTABLE))); |
11fdf7f2 TL |
45 | #undef WCHAR |
46 | #else | |
eafe8130 | 47 | Py_SetProgramName(const_cast<char*>(MGR_PYTHON_EXECUTABLE)); |
11fdf7f2 TL |
48 | #endif |
49 | // Add more modules | |
50 | if (g_conf().get_val<bool>("daemonize")) { | |
51 | PyImport_AppendInittab("ceph_logger", PyModule::init_ceph_logger); | |
52 | } | |
53 | PyImport_AppendInittab("ceph_module", PyModule::init_ceph_module); | |
3efd9988 FG |
54 | Py_InitializeEx(0); |
55 | ||
56 | // Let CPython know that we will be calling it back from other | |
57 | // threads in future. | |
58 | if (! PyEval_ThreadsInitialized()) { | |
59 | PyEval_InitThreads(); | |
60 | } | |
61 | ||
62 | // Drop the GIL and remember the main thread state (current | |
63 | // thread state becomes NULL) | |
64 | pMainThreadState = PyEval_SaveThread(); | |
11fdf7f2 | 65 | ceph_assert(pMainThreadState != nullptr); |
3efd9988 FG |
66 | |
67 | std::list<std::string> failed_modules; | |
68 | ||
81eedcae TL |
69 | const std::string module_path = g_conf().get_val<std::string>("mgr_module_path"); |
70 | std::set<std::string> module_names = probe_modules(module_path); | |
3efd9988 | 71 | // Load python code |
11fdf7f2 | 72 | for (const auto& module_name : module_names) { |
3efd9988 | 73 | dout(1) << "Loading python module '" << module_name << "'" << dendl; |
11fdf7f2 TL |
74 | |
75 | // Everything starts disabled, set enabled flag on module | |
76 | // when we see first MgrMap | |
77 | auto mod = std::make_shared<PyModule>(module_name); | |
3efd9988 FG |
78 | int r = mod->load(pMainThreadState); |
79 | if (r != 0) { | |
80 | // Don't use handle_pyerror() here; we don't have the GIL | |
81 | // or the right thread state (this is deliberate). | |
82 | derr << "Error loading module '" << module_name << "': " | |
83 | << cpp_strerror(r) << dendl; | |
84 | failed_modules.push_back(module_name); | |
85 | // Don't drop out here, load the other modules | |
3efd9988 | 86 | } |
11fdf7f2 TL |
87 | |
88 | // Record the module even if the load failed, so that we can | |
89 | // report its loading error | |
90 | modules[module_name] = std::move(mod); | |
3efd9988 | 91 | } |
81eedcae TL |
92 | if (module_names.empty()) { |
93 | clog->error() << "No ceph-mgr modules found in " << module_path; | |
94 | } | |
3efd9988 FG |
95 | if (!failed_modules.empty()) { |
96 | clog->error() << "Failed to load ceph-mgr modules: " << joinify( | |
97 | failed_modules.begin(), failed_modules.end(), std::string(", ")); | |
98 | } | |
3efd9988 FG |
99 | } |
100 | ||
11fdf7f2 | 101 | bool PyModuleRegistry::handle_mgr_map(const MgrMap &mgr_map_) |
3efd9988 | 102 | { |
11fdf7f2 TL |
103 | std::lock_guard l(lock); |
104 | ||
105 | if (mgr_map.epoch == 0) { | |
106 | mgr_map = mgr_map_; | |
107 | ||
108 | // First time we see MgrMap, set the enabled flags on modules | |
109 | // This should always happen before someone calls standby_start | |
110 | // or active_start | |
111 | for (const auto &[module_name, module] : modules) { | |
112 | const bool enabled = (mgr_map.modules.count(module_name) > 0); | |
113 | module->set_enabled(enabled); | |
114 | const bool always_on = (mgr_map.get_always_on_modules().count(module_name) > 0); | |
115 | module->set_always_on(always_on); | |
3efd9988 FG |
116 | } |
117 | ||
11fdf7f2 TL |
118 | return false; |
119 | } else { | |
120 | bool modules_changed = mgr_map_.modules != mgr_map.modules || | |
121 | mgr_map_.always_on_modules != mgr_map.always_on_modules; | |
122 | mgr_map = mgr_map_; | |
3efd9988 | 123 | |
11fdf7f2 TL |
124 | if (standby_modules != nullptr) { |
125 | standby_modules->handle_mgr_map(mgr_map_); | |
3efd9988 FG |
126 | } |
127 | ||
11fdf7f2 | 128 | return modules_changed; |
3efd9988 | 129 | } |
11fdf7f2 | 130 | } |
3efd9988 | 131 | |
3efd9988 | 132 | |
3efd9988 | 133 | |
11fdf7f2 | 134 | void PyModuleRegistry::standby_start(MonClient &mc, Finisher &f) |
3efd9988 | 135 | { |
11fdf7f2 TL |
136 | std::lock_guard l(lock); |
137 | ceph_assert(active_modules == nullptr); | |
138 | ceph_assert(standby_modules == nullptr); | |
139 | ||
140 | // Must have seen a MgrMap by this point, in order to know | |
141 | // which modules should be enabled | |
142 | ceph_assert(mgr_map.epoch > 0); | |
3efd9988 FG |
143 | |
144 | dout(4) << "Starting modules in standby mode" << dendl; | |
145 | ||
11fdf7f2 TL |
146 | standby_modules.reset(new StandbyPyModules( |
147 | mgr_map, module_config, clog, mc, f)); | |
3efd9988 FG |
148 | |
149 | std::set<std::string> failed_modules; | |
150 | for (const auto &i : modules) { | |
11fdf7f2 TL |
151 | if (!(i.second->is_enabled() && i.second->get_can_run())) { |
152 | // report always_on modules with a standby mode that won't run | |
153 | if (i.second->is_always_on() && i.second->pStandbyClass) { | |
3efd9988 | 154 | failed_modules.insert(i.second->get_name()); |
3efd9988 | 155 | } |
11fdf7f2 TL |
156 | continue; |
157 | } | |
158 | ||
159 | if (i.second->pStandbyClass) { | |
160 | dout(4) << "starting module " << i.second->get_name() << dendl; | |
161 | standby_modules->start_one(i.second); | |
3efd9988 FG |
162 | } else { |
163 | dout(4) << "skipping module '" << i.second->get_name() << "' because " | |
164 | "it does not implement a standby mode" << dendl; | |
165 | } | |
166 | } | |
167 | ||
168 | if (!failed_modules.empty()) { | |
169 | clog->error() << "Failed to execute ceph-mgr module(s) in standby mode: " | |
170 | << joinify(failed_modules.begin(), failed_modules.end(), | |
171 | std::string(", ")); | |
172 | } | |
173 | } | |
174 | ||
175 | void PyModuleRegistry::active_start( | |
11fdf7f2 TL |
176 | DaemonStateIndex &ds, ClusterState &cs, |
177 | const std::map<std::string, std::string> &kv_store, | |
178 | MonClient &mc, LogChannelRef clog_, LogChannelRef audit_clog_, | |
179 | Objecter &objecter_, Client &client_, Finisher &f, | |
180 | DaemonServer &server) | |
3efd9988 | 181 | { |
11fdf7f2 | 182 | std::lock_guard locker(lock); |
3efd9988 FG |
183 | |
184 | dout(4) << "Starting modules in active mode" << dendl; | |
185 | ||
11fdf7f2 TL |
186 | ceph_assert(active_modules == nullptr); |
187 | ||
188 | // Must have seen a MgrMap by this point, in order to know | |
189 | // which modules should be enabled | |
190 | ceph_assert(mgr_map.epoch > 0); | |
3efd9988 FG |
191 | |
192 | if (standby_modules != nullptr) { | |
193 | standby_modules->shutdown(); | |
194 | standby_modules.reset(); | |
195 | } | |
196 | ||
197 | active_modules.reset(new ActivePyModules( | |
11fdf7f2 TL |
198 | module_config, kv_store, ds, cs, mc, |
199 | clog_, audit_clog_, objecter_, client_, f, server, | |
200 | *this)); | |
3efd9988 FG |
201 | |
202 | for (const auto &i : modules) { | |
11fdf7f2 TL |
203 | // Anything we're skipping because of !can_run will be flagged |
204 | // to the user separately via get_health_checks | |
205 | if (!(i.second->is_enabled() && i.second->is_loaded())) { | |
206 | continue; | |
3efd9988 | 207 | } |
11fdf7f2 TL |
208 | |
209 | dout(4) << "Starting " << i.first << dendl; | |
210 | active_modules->start_one(i.second); | |
3efd9988 FG |
211 | } |
212 | } | |
213 | ||
214 | void PyModuleRegistry::active_shutdown() | |
215 | { | |
11fdf7f2 | 216 | std::lock_guard locker(lock); |
3efd9988 FG |
217 | |
218 | if (active_modules != nullptr) { | |
219 | active_modules->shutdown(); | |
220 | active_modules.reset(); | |
221 | } | |
222 | } | |
223 | ||
224 | void PyModuleRegistry::shutdown() | |
225 | { | |
11fdf7f2 | 226 | std::lock_guard locker(lock); |
3efd9988 FG |
227 | |
228 | if (standby_modules != nullptr) { | |
229 | standby_modules->shutdown(); | |
230 | standby_modules.reset(); | |
231 | } | |
232 | ||
233 | // Ideally, now, we'd be able to do this for all modules: | |
234 | // | |
235 | // Py_EndInterpreter(pMyThreadState); | |
236 | // PyThreadState_Swap(pMainThreadState); | |
237 | // | |
238 | // Unfortunately, if the module has any other *python* threads active | |
239 | // at this point, Py_EndInterpreter() will abort with: | |
240 | // | |
241 | // Fatal Python error: Py_EndInterpreter: not the last thread | |
242 | // | |
243 | // This can happen when using CherryPy in a module, becuase CherryPy | |
244 | // runs an extra thread as a timeout monitor, which spends most of its | |
245 | // life inside a time.sleep(60). Unless you are very, very lucky with | |
246 | // the timing calling this destructor, that thread will still be stuck | |
247 | // in a sleep, and Py_EndInterpreter() will abort. | |
248 | // | |
249 | // This could of course also happen with a poorly written module which | |
250 | // made no attempt to clean up any additional threads it created. | |
251 | // | |
252 | // The safest thing to do is just not call Py_EndInterpreter(), and | |
253 | // let Py_Finalize() kill everything after all modules are shut down. | |
254 | ||
255 | modules.clear(); | |
256 | ||
257 | PyEval_RestoreThread(pMainThreadState); | |
258 | Py_Finalize(); | |
259 | } | |
260 | ||
81eedcae | 261 | std::set<std::string> PyModuleRegistry::probe_modules(const std::string &path) const |
3efd9988 FG |
262 | { |
263 | DIR *dir = opendir(path.c_str()); | |
264 | if (!dir) { | |
11fdf7f2 | 265 | return {}; |
3efd9988 | 266 | } |
11fdf7f2 TL |
267 | |
268 | std::set<std::string> modules_out; | |
3efd9988 FG |
269 | struct dirent *entry = NULL; |
270 | while ((entry = readdir(dir)) != NULL) { | |
271 | string n(entry->d_name); | |
272 | string fn = path + "/" + n; | |
273 | struct stat st; | |
274 | int r = ::stat(fn.c_str(), &st); | |
275 | if (r == 0 && S_ISDIR(st.st_mode)) { | |
276 | string initfn = fn + "/module.py"; | |
277 | r = ::stat(initfn.c_str(), &st); | |
278 | if (r == 0) { | |
11fdf7f2 | 279 | modules_out.insert(n); |
3efd9988 FG |
280 | } |
281 | } | |
282 | } | |
283 | closedir(dir); | |
11fdf7f2 TL |
284 | |
285 | return modules_out; | |
286 | } | |
287 | ||
288 | int PyModuleRegistry::handle_command( | |
92f5a8d4 TL |
289 | const ModuleCommand& module_command, |
290 | const MgrSession& session, | |
11fdf7f2 TL |
291 | const cmdmap_t &cmdmap, |
292 | const bufferlist &inbuf, | |
293 | std::stringstream *ds, | |
294 | std::stringstream *ss) | |
295 | { | |
296 | if (active_modules) { | |
92f5a8d4 TL |
297 | return active_modules->handle_command(module_command, session, cmdmap, |
298 | inbuf, ds, ss); | |
11fdf7f2 TL |
299 | } else { |
300 | // We do not expect to be called before active modules is up, but | |
301 | // it's straightfoward to handle this case so let's do it. | |
302 | return -EAGAIN; | |
303 | } | |
304 | } | |
305 | ||
306 | std::vector<ModuleCommand> PyModuleRegistry::get_py_commands() const | |
307 | { | |
308 | std::lock_guard l(lock); | |
309 | ||
310 | std::vector<ModuleCommand> result; | |
311 | for (const auto& i : modules) { | |
312 | i.second->get_commands(&result); | |
313 | } | |
314 | ||
315 | return result; | |
316 | } | |
317 | ||
318 | std::vector<MonCommand> PyModuleRegistry::get_commands() const | |
319 | { | |
320 | std::vector<ModuleCommand> commands = get_py_commands(); | |
321 | std::vector<MonCommand> result; | |
322 | for (auto &pyc: commands) { | |
323 | uint64_t flags = MonCommand::FLAG_MGR; | |
324 | if (pyc.polling) { | |
325 | flags |= MonCommand::FLAG_POLL; | |
326 | } | |
327 | result.push_back({pyc.cmdstring, pyc.helpstring, "mgr", | |
328 | pyc.perm, flags}); | |
329 | } | |
330 | return result; | |
331 | } | |
332 | ||
333 | void PyModuleRegistry::get_health_checks(health_check_map_t *checks) | |
334 | { | |
335 | std::lock_guard l(lock); | |
336 | ||
337 | // Only the active mgr reports module issues | |
338 | if (active_modules) { | |
339 | active_modules->get_health_checks(checks); | |
340 | ||
341 | std::map<std::string, std::string> dependency_modules; | |
342 | std::map<std::string, std::string> failed_modules; | |
343 | ||
344 | /* | |
345 | * Break up broken modules into two categories: | |
346 | * - can_run=false: the module is working fine but explicitly | |
347 | * telling you that a dependency is missing. Advise the user to | |
348 | * read the message from the module and install what's missing. | |
349 | * - failed=true or loaded=false: something unexpected is broken, | |
350 | * either at runtime (from serve()) or at load time. This indicates | |
351 | * a bug and the user should be guided to inspect the mgr log | |
352 | * to investigate and gather evidence. | |
353 | */ | |
354 | ||
355 | for (const auto &i : modules) { | |
356 | auto module = i.second; | |
357 | if (module->is_enabled() && !module->get_can_run()) { | |
358 | dependency_modules[module->get_name()] = module->get_error_string(); | |
359 | } else if ((module->is_enabled() && !module->is_loaded()) | |
360 | || (module->is_failed() && module->get_can_run())) { | |
361 | // - Unloadable modules are only reported if they're enabled, | |
362 | // to avoid spamming users about modules they don't have the | |
363 | // dependencies installed for because they don't use it. | |
364 | // - Failed modules are only reported if they passed the can_run | |
365 | // checks (to avoid outputting two health messages about a | |
366 | // module that said can_run=false but we tried running it anyway) | |
367 | failed_modules[module->get_name()] = module->get_error_string(); | |
368 | } | |
369 | } | |
370 | ||
371 | // report failed always_on modules as health errors | |
372 | for (const auto& name : mgr_map.get_always_on_modules()) { | |
373 | if (!active_modules->module_exists(name)) { | |
374 | if (failed_modules.find(name) == failed_modules.end() && | |
375 | dependency_modules.find(name) == dependency_modules.end()) { | |
376 | failed_modules[name] = "Not found or unloadable"; | |
377 | } | |
378 | } | |
379 | } | |
380 | ||
381 | if (!dependency_modules.empty()) { | |
382 | std::ostringstream ss; | |
383 | if (dependency_modules.size() == 1) { | |
384 | auto iter = dependency_modules.begin(); | |
385 | ss << "Module '" << iter->first << "' has failed dependency: " | |
386 | << iter->second; | |
387 | } else if (dependency_modules.size() > 1) { | |
388 | ss << dependency_modules.size() | |
389 | << " mgr modules have failed dependencies"; | |
390 | } | |
391 | auto& d = checks->add("MGR_MODULE_DEPENDENCY", HEALTH_WARN, ss.str()); | |
392 | for (auto& i : dependency_modules) { | |
393 | std::ostringstream ss; | |
394 | ss << "Module '" << i.first << "' has failed dependency: " << i.second; | |
395 | d.detail.push_back(ss.str()); | |
396 | } | |
397 | } | |
398 | ||
399 | if (!failed_modules.empty()) { | |
400 | std::ostringstream ss; | |
401 | if (failed_modules.size() == 1) { | |
402 | auto iter = failed_modules.begin(); | |
403 | ss << "Module '" << iter->first << "' has failed: " << iter->second; | |
404 | } else if (failed_modules.size() > 1) { | |
405 | ss << failed_modules.size() << " mgr modules have failed"; | |
406 | } | |
407 | auto& d = checks->add("MGR_MODULE_ERROR", HEALTH_ERR, ss.str()); | |
408 | for (auto& i : failed_modules) { | |
409 | std::ostringstream ss; | |
410 | ss << "Module '" << i.first << "' has failed: " << i.second; | |
411 | d.detail.push_back(ss.str()); | |
412 | } | |
413 | } | |
414 | } | |
415 | } | |
416 | ||
417 | void PyModuleRegistry::handle_config(const std::string &k, const std::string &v) | |
418 | { | |
419 | std::lock_guard l(module_config.lock); | |
420 | ||
421 | if (!v.empty()) { | |
422 | dout(4) << "Loaded module_config entry " << k << ":" << v << dendl; | |
423 | module_config.config[k] = v; | |
424 | } else { | |
425 | module_config.config.erase(k); | |
426 | } | |
427 | } | |
428 | ||
429 | void PyModuleRegistry::handle_config_notify() | |
430 | { | |
431 | std::lock_guard l(lock); | |
432 | if (active_modules) { | |
433 | active_modules->config_notify(); | |
434 | } | |
3efd9988 FG |
435 | } |
436 | ||
11fdf7f2 TL |
437 | void PyModuleRegistry::upgrade_config( |
438 | MonClient *monc, | |
439 | const std::map<std::string, std::string> &old_config) | |
3efd9988 | 440 | { |
11fdf7f2 TL |
441 | // Only bother doing anything if we didn't already have |
442 | // some new-style config. | |
443 | if (module_config.config.empty()) { | |
444 | dout(1) << "Upgrading module configuration for Mimic" << dendl; | |
445 | // Upgrade luminous->mimic: migrate config-key configuration | |
446 | // into main configuration store | |
447 | for (auto &i : old_config) { | |
448 | auto last_slash = i.first.rfind('/'); | |
449 | const std::string module_name = i.first.substr(4, i.first.substr(4).find('/')); | |
450 | const std::string key = i.first.substr(last_slash + 1); | |
451 | ||
452 | const auto &value = i.second; | |
453 | ||
454 | // Heuristic to skip things that look more like stores | |
455 | // than configs. | |
456 | bool is_config = true; | |
457 | for (const auto &c : value) { | |
458 | if (c == '\n' || c == '\r' || c < 0x20) { | |
459 | is_config = false; | |
460 | break; | |
461 | } | |
462 | } | |
463 | ||
464 | if (value.size() > 256) { | |
465 | is_config = false; | |
466 | } | |
467 | ||
468 | if (!is_config) { | |
469 | dout(1) << "Not migrating config module:key " | |
470 | << module_name << " : " << key << dendl; | |
471 | continue; | |
472 | } | |
473 | ||
474 | // Check that the named module exists | |
475 | auto module_iter = modules.find(module_name); | |
476 | if (module_iter == modules.end()) { | |
477 | dout(1) << "KV store contains data for unknown module '" | |
478 | << module_name << "'" << dendl; | |
479 | continue; | |
480 | } | |
481 | PyModuleRef module = module_iter->second; | |
482 | ||
483 | // Parse option name out of key | |
484 | std::string option_name; | |
485 | auto slash_loc = key.find("/"); | |
486 | if (slash_loc != std::string::npos) { | |
487 | if (key.size() > slash_loc + 1) { | |
488 | // Localized option | |
489 | option_name = key.substr(slash_loc + 1); | |
490 | } else { | |
491 | // Trailing slash: garbage. | |
492 | derr << "Invalid mgr store key: '" << key << "'" << dendl; | |
493 | continue; | |
494 | } | |
495 | } else { | |
496 | option_name = key; | |
497 | } | |
498 | ||
499 | // Consult module schema to see if this is really | |
500 | // a configuration value | |
501 | if (!option_name.empty() && module->is_option(option_name)) { | |
502 | module_config.set_config(monc, module_name, key, i.second); | |
503 | dout(4) << "Rewrote configuration module:key " | |
504 | << module_name << ":" << key << dendl; | |
505 | } else { | |
506 | dout(4) << "Leaving store module:key " << module_name | |
507 | << ":" << key << " in store, not config" << dendl; | |
508 | } | |
509 | } | |
510 | } else { | |
511 | dout(10) << "Module configuration contains " | |
512 | << module_config.config.size() << " keys" << dendl; | |
513 | } | |
3efd9988 FG |
514 | } |
515 |