1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
4 * Ceph - scalable distributed file system
6 * Copyright (C) 2014 John Spray <john.spray@inktank.com>
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
14 // Include this first to get python headers earlier
17 #include "common/errno.h"
18 #include "include/stringify.h"
20 #include "PyFormatter.h"
22 #include "osd/OSDMap.h"
23 #include "mon/MonMap.h"
25 #include "mgr/MgrContext.h"
27 // For ::mgr_store_prefix
29 #include "PyModuleRegistry.h"
32 #include "ActivePyModules.h"
33 #include "DaemonKey.h"
34 #include "DaemonServer.h"
36 #define dout_context g_ceph_context
37 #define dout_subsys ceph_subsys_mgr
39 #define dout_prefix *_dout << "mgr " << __func__ << " "
41 ActivePyModules::ActivePyModules(
42 PyModuleConfig
&module_config_
,
43 std::map
<std::string
, std::string
> store_data
,
44 bool mon_provides_kv_sub
,
45 DaemonStateIndex
&ds
, ClusterState
&cs
,
46 MonClient
&mc
, LogChannelRef clog_
,
47 LogChannelRef audit_clog_
, Objecter
&objecter_
,
48 Client
&client_
, Finisher
&f
, DaemonServer
&server
,
49 PyModuleRegistry
&pmr
)
50 : module_config(module_config_
), daemon_state(ds
), cluster_state(cs
),
51 monc(mc
), clog(clog_
), audit_clog(audit_clog_
), objecter(objecter_
),
52 client(client_
), finisher(f
),
53 cmd_finisher(g_ceph_context
, "cmd_finisher", "cmdfin"),
54 server(server
), py_module_registry(pmr
)
56 store_cache
= std::move(store_data
);
57 // we can only trust our ConfigMap if the mon cluster has provided
58 // kv sub since our startup.
59 have_local_config_map
= mon_provides_kv_sub
;
60 _refresh_config_map();
64 ActivePyModules::~ActivePyModules() = default;
66 void ActivePyModules::dump_server(const std::string
&hostname
,
67 const DaemonStateCollection
&dmc
,
70 f
->dump_string("hostname", hostname
);
71 f
->open_array_section("services");
72 std::string ceph_version
;
74 for (const auto &[key
, state
] : dmc
) {
75 without_gil([&ceph_version
, state
=state
] {
76 std::lock_guard
l(state
->lock
);
77 // TODO: pick the highest version, and make sure that
78 // somewhere else (during health reporting?) we are
79 // indicating to the user if we see mixed versions
80 auto ver_iter
= state
->metadata
.find("ceph_version");
81 if (ver_iter
!= state
->metadata
.end()) {
82 ceph_version
= state
->metadata
.at("ceph_version");
85 f
->open_object_section("service");
86 f
->dump_string("type", key
.type
);
87 f
->dump_string("id", key
.name
);
92 f
->dump_string("ceph_version", ceph_version
);
95 PyObject
*ActivePyModules::get_server_python(const std::string
&hostname
)
97 const auto dmc
= without_gil([&]{
98 std::lock_guard
l(lock
);
99 dout(10) << " (" << hostname
<< ")" << dendl
;
100 return daemon_state
.get_by_server(hostname
);
103 dump_server(hostname
, dmc
, &f
);
108 PyObject
*ActivePyModules::list_servers_python()
110 dout(10) << " >" << dendl
;
112 without_gil_t no_gil
;
113 return daemon_state
.with_daemons_by_server([this, &no_gil
]
114 (const std::map
<std::string
, DaemonStateCollection
> &all
) {
115 with_gil_t with_gil
{no_gil
};
116 PyFormatter
f(false, true);
117 for (const auto &[hostname
, daemon_state
] : all
) {
118 f
.open_object_section("server");
119 dump_server(hostname
, daemon_state
, &f
);
126 PyObject
*ActivePyModules::get_metadata_python(
127 const std::string
&svc_type
,
128 const std::string
&svc_id
)
130 auto metadata
= daemon_state
.get(DaemonKey
{svc_type
, svc_id
});
131 if (metadata
== nullptr) {
132 derr
<< "Requested missing service " << svc_type
<< "." << svc_id
<< dendl
;
135 auto l
= without_gil([&] {
136 return std::lock_guard(lock
);
139 f
.dump_string("hostname", metadata
->hostname
);
140 for (const auto &[key
, val
] : metadata
->metadata
) {
141 f
.dump_string(key
, val
);
147 PyObject
*ActivePyModules::get_daemon_status_python(
148 const std::string
&svc_type
,
149 const std::string
&svc_id
)
151 auto metadata
= daemon_state
.get(DaemonKey
{svc_type
, svc_id
});
152 if (metadata
== nullptr) {
153 derr
<< "Requested missing service " << svc_type
<< "." << svc_id
<< dendl
;
156 auto l
= without_gil([&] {
157 return std::lock_guard(lock
);
160 for (const auto &[daemon
, status
] : metadata
->service_status
) {
161 f
.dump_string(daemon
, status
);
166 PyObject
*ActivePyModules::get_python(const std::string
&what
)
170 // Drop the GIL, as most of the following blocks will block on
171 // a mutex -- they are all responsible for re-taking the GIL before
172 // touching the PyFormatter instance or returning from the function.
173 without_gil_t no_gil
;
175 if (what
== "fs_map") {
176 return cluster_state
.with_fsmap([&](const FSMap
&fsmap
) {
177 with_gil_t with_gil
{no_gil
};
181 } else if (what
== "osdmap_crush_map_text") {
183 cluster_state
.with_osdmap([&](const OSDMap
&osd_map
){
184 osd_map
.crush
->encode(rdata
, CEPH_FEATURES_SUPPORTED_DEFAULT
);
186 std::string crush_text
= rdata
.to_str();
187 with_gil_t with_gil
{no_gil
};
188 return PyUnicode_FromString(crush_text
.c_str());
189 } else if (what
.substr(0, 7) == "osd_map") {
190 return cluster_state
.with_osdmap([&](const OSDMap
&osd_map
){
191 with_gil_t with_gil
{no_gil
};
192 if (what
== "osd_map") {
194 } else if (what
== "osd_map_tree") {
195 osd_map
.print_tree(&f
, nullptr);
196 } else if (what
== "osd_map_crush") {
197 osd_map
.crush
->dump(&f
);
201 } else if (what
== "modified_config_options") {
202 auto all_daemons
= daemon_state
.get_all();
204 for (auto& [key
, daemon
] : all_daemons
) {
205 std::lock_guard
l(daemon
->lock
);
206 for (auto& [name
, valmap
] : daemon
->config
) {
210 with_gil_t with_gil
{no_gil
};
211 f
.open_array_section("options");
212 for (auto& name
: names
) {
213 f
.dump_string("name", name
);
217 } else if (what
.substr(0, 6) == "config") {
218 with_gil_t with_gil
{no_gil
};
219 if (what
== "config_options") {
220 g_conf().config_options(&f
);
221 } else if (what
== "config") {
222 g_conf().show_config(&f
);
225 } else if (what
== "mon_map") {
226 return cluster_state
.with_monmap([&](const MonMap
&monmap
) {
227 with_gil_t with_gil
{no_gil
};
231 } else if (what
== "service_map") {
232 return cluster_state
.with_servicemap([&](const ServiceMap
&service_map
) {
233 with_gil_t with_gil
{no_gil
};
234 service_map
.dump(&f
);
237 } else if (what
== "osd_metadata") {
238 auto dmc
= daemon_state
.get_by_service("osd");
239 for (const auto &[key
, state
] : dmc
) {
240 std::lock_guard
l(state
->lock
);
241 with_gil(no_gil
, [&f
, &name
=key
.name
, state
=state
] {
242 f
.open_object_section(name
.c_str());
243 f
.dump_string("hostname", state
->hostname
);
244 for (const auto &[name
, val
] : state
->metadata
) {
245 f
.dump_string(name
.c_str(), val
);
250 return with_gil(no_gil
, [&] { return f
.get(); });
251 } else if (what
== "mds_metadata") {
252 auto dmc
= daemon_state
.get_by_service("mds");
253 for (const auto &[key
, state
] : dmc
) {
254 std::lock_guard
l(state
->lock
);
255 with_gil(no_gil
, [&f
, &name
=key
.name
, state
=state
] {
256 f
.open_object_section(name
.c_str());
257 f
.dump_string("hostname", state
->hostname
);
258 for (const auto &[name
, val
] : state
->metadata
) {
259 f
.dump_string(name
.c_str(), val
);
264 return with_gil(no_gil
, [&] { return f
.get(); });
265 } else if (what
== "pg_summary") {
266 return cluster_state
.with_pgmap(
267 [&f
, &no_gil
](const PGMap
&pg_map
) {
268 std::map
<std::string
, std::map
<std::string
, uint32_t> > osds
;
269 std::map
<std::string
, std::map
<std::string
, uint32_t> > pools
;
270 std::map
<std::string
, uint32_t> all
;
271 for (const auto &i
: pg_map
.pg_stat
) {
272 const auto pool
= i
.first
.m_pool
;
273 const std::string state
= pg_state_string(i
.second
.state
);
274 // Insert to per-pool map
275 pools
[stringify(pool
)][state
]++;
276 for (const auto &osd_id
: i
.second
.acting
) {
277 osds
[stringify(osd_id
)][state
]++;
281 with_gil_t with_gil
{no_gil
};
282 f
.open_object_section("by_osd");
283 for (const auto &i
: osds
) {
284 f
.open_object_section(i
.first
.c_str());
285 for (const auto &j
: i
.second
) {
286 f
.dump_int(j
.first
.c_str(), j
.second
);
291 f
.open_object_section("by_pool");
292 for (const auto &i
: pools
) {
293 f
.open_object_section(i
.first
.c_str());
294 for (const auto &j
: i
.second
) {
295 f
.dump_int(j
.first
.c_str(), j
.second
);
300 f
.open_object_section("all");
301 for (const auto &i
: all
) {
302 f
.dump_int(i
.first
.c_str(), i
.second
);
305 f
.open_object_section("pg_stats_sum");
306 pg_map
.pg_sum
.dump(&f
);
311 } else if (what
== "pg_status") {
312 return cluster_state
.with_pgmap(
313 [&](const PGMap
&pg_map
) {
314 with_gil_t with_gil
{no_gil
};
315 pg_map
.print_summary(&f
, nullptr);
319 } else if (what
== "pg_dump") {
320 return cluster_state
.with_pgmap(
321 [&](const PGMap
&pg_map
) {
322 with_gil_t with_gil
{no_gil
};
323 pg_map
.dump(&f
, false);
327 } else if (what
== "devices") {
328 daemon_state
.with_devices2(
330 with_gil(no_gil
, [&] { f
.open_array_section("devices"); });
332 [&](const DeviceState
&dev
) {
333 with_gil(no_gil
, [&] { f
.dump_object("device", dev
); });
335 return with_gil(no_gil
, [&] {
339 } else if (what
.size() > 7 &&
340 what
.substr(0, 7) == "device ") {
341 string devid
= what
.substr(7);
342 if (!daemon_state
.with_device(devid
,
343 [&] (const DeviceState
& dev
) {
344 with_gil_t with_gil
{no_gil
};
345 f
.dump_object("device", dev
);
349 return with_gil(no_gil
, [&] { return f
.get(); });
350 } else if (what
== "io_rate") {
351 return cluster_state
.with_pgmap(
352 [&](const PGMap
&pg_map
) {
353 with_gil_t with_gil
{no_gil
};
354 pg_map
.dump_delta(&f
);
358 } else if (what
== "df") {
359 return cluster_state
.with_osdmap_and_pgmap(
361 const OSDMap
& osd_map
,
362 const PGMap
&pg_map
) {
363 with_gil_t with_gil
{no_gil
};
364 pg_map
.dump_cluster_stats(nullptr, &f
, true);
365 pg_map
.dump_pool_stats_full(osd_map
, nullptr, &f
, true);
368 } else if (what
== "pg_stats") {
369 return cluster_state
.with_pgmap([&](const PGMap
&pg_map
) {
370 with_gil_t with_gil
{no_gil
};
371 pg_map
.dump_pg_stats(&f
, false);
374 } else if (what
== "pool_stats") {
375 return cluster_state
.with_pgmap([&](const PGMap
&pg_map
) {
376 with_gil_t with_gil
{no_gil
};
377 pg_map
.dump_pool_stats(&f
);
380 } else if (what
== "pg_ready") {
381 with_gil_t with_gil
{no_gil
};
382 server
.dump_pg_ready(&f
);
384 } else if (what
== "osd_stats") {
385 return cluster_state
.with_pgmap([&](const PGMap
&pg_map
) {
386 with_gil_t with_gil
{no_gil
};
387 pg_map
.dump_osd_stats(&f
, false);
390 } else if (what
== "osd_ping_times") {
391 return cluster_state
.with_pgmap([&](const PGMap
&pg_map
) {
392 with_gil_t with_gil
{no_gil
};
393 pg_map
.dump_osd_ping_times(&f
);
396 } else if (what
== "osd_pool_stats") {
397 int64_t poolid
= -ENOENT
;
398 return cluster_state
.with_osdmap_and_pgmap([&](const OSDMap
& osdmap
,
399 const PGMap
& pg_map
) {
400 with_gil_t with_gil
{no_gil
};
401 f
.open_array_section("pool_stats");
402 for (auto &p
: osdmap
.get_pools()) {
404 pg_map
.dump_pool_stats_and_io_rate(poolid
, osdmap
, &f
, nullptr);
409 } else if (what
== "health") {
410 return cluster_state
.with_health([&](const ceph::bufferlist
&health_json
) {
411 with_gil_t with_gil
{no_gil
};
412 f
.dump_string("json", health_json
.to_str());
415 } else if (what
== "mon_status") {
416 return cluster_state
.with_mon_status(
417 [&](const ceph::bufferlist
&mon_status_json
) {
418 with_gil_t with_gil
{no_gil
};
419 f
.dump_string("json", mon_status_json
.to_str());
422 } else if (what
== "mgr_map") {
423 return cluster_state
.with_mgrmap([&](const MgrMap
&mgr_map
) {
424 with_gil_t with_gil
{no_gil
};
428 } else if (what
== "mgr_ips") {
429 entity_addrvec_t myaddrs
= server
.get_myaddrs();
430 with_gil_t with_gil
{no_gil
};
431 f
.open_array_section("ips");
432 std::set
<std::string
> did
;
433 for (auto& i
: myaddrs
.v
) {
434 std::string ip
= i
.ip_only_to_str();
435 if (auto [where
, inserted
] = did
.insert(ip
); inserted
) {
436 f
.dump_string("ip", ip
);
441 } else if (what
== "have_local_config_map") {
442 with_gil_t with_gil
{no_gil
};
443 f
.dump_bool("have_local_config_map", have_local_config_map
);
446 derr
<< "Python module requested unknown data '" << what
<< "'" << dendl
;
447 with_gil_t with_gil
{no_gil
};
452 void ActivePyModules::start_one(PyModuleRef py_module
)
454 std::lock_guard
l(lock
);
456 const auto name
= py_module
->get_name();
457 auto active_module
= std::make_shared
<ActivePyModule
>(py_module
, clog
);
459 pending_modules
.insert(name
);
460 // Send all python calls down a Finisher to avoid blocking
461 // C++ code, and avoid any potential lock cycles.
462 finisher
.queue(new LambdaContext([this, active_module
, name
](int) {
463 int r
= active_module
->load(this);
464 std::lock_guard
l(lock
);
465 pending_modules
.erase(name
);
467 derr
<< "Failed to run module in active mode ('" << name
<< "')"
470 auto em
= modules
.emplace(name
, active_module
);
471 ceph_assert(em
.second
); // actually inserted
473 dout(4) << "Starting thread for " << name
<< dendl
;
474 active_module
->thread
.create(active_module
->get_thread_name());
479 void ActivePyModules::shutdown()
481 std::lock_guard
locker(lock
);
483 // Signal modules to drop out of serve() and/or tear down resources
484 for (auto& [name
, module
] : modules
) {
486 dout(10) << "calling module " << name
<< " shutdown()" << dendl
;
488 dout(10) << "module " << name
<< " shutdown() returned" << dendl
;
492 // For modules implementing serve(), finish the threads where we
493 // were running that.
494 for (auto& [name
, module
] : modules
) {
496 dout(10) << "joining module " << name
<< dendl
;
497 module
->thread
.join();
498 dout(10) << "joined module " << name
<< dendl
;
502 cmd_finisher
.wait_for_empty();
508 void ActivePyModules::notify_all(const std::string
¬ify_type
,
509 const std::string
¬ify_id
)
511 std::lock_guard
l(lock
);
513 dout(10) << __func__
<< ": notify_all " << notify_type
<< dendl
;
514 for (auto& [name
, module
] : modules
) {
515 // Send all python calls down a Finisher to avoid blocking
516 // C++ code, and avoid any potential lock cycles.
517 dout(15) << "queuing notify to " << name
<< dendl
;
518 // workaround for https://bugs.llvm.org/show_bug.cgi?id=35984
519 finisher
.queue(new LambdaContext([module
=module
, notify_type
, notify_id
]
521 module
->notify(notify_type
, notify_id
);
526 void ActivePyModules::notify_all(const LogEntry
&log_entry
)
528 std::lock_guard
l(lock
);
530 dout(10) << __func__
<< ": notify_all (clog)" << dendl
;
531 for (auto& [name
, module
] : modules
) {
532 // Send all python calls down a Finisher to avoid blocking
533 // C++ code, and avoid any potential lock cycles.
535 // Note intentional use of non-reference lambda binding on
536 // log_entry: we take a copy because caller's instance is
537 // probably ephemeral.
538 dout(15) << "queuing notify (clog) to " << name
<< dendl
;
539 // workaround for https://bugs.llvm.org/show_bug.cgi?id=35984
540 finisher
.queue(new LambdaContext([module
=module
, log_entry
](int r
){
541 module
->notify_clog(log_entry
);
546 bool ActivePyModules::get_store(const std::string
&module_name
,
547 const std::string
&key
, std::string
*val
) const
549 without_gil_t no_gil
;
550 std::lock_guard
l(lock
);
552 const std::string global_key
= PyModule::mgr_store_prefix
553 + module_name
+ "/" + key
;
555 dout(4) << __func__
<< " key: " << global_key
<< dendl
;
557 auto i
= store_cache
.find(global_key
);
558 if (i
!= store_cache
.end()) {
566 PyObject
*ActivePyModules::dispatch_remote(
567 const std::string
&other_module
,
568 const std::string
&method
,
573 auto mod_iter
= modules
.find(other_module
);
574 ceph_assert(mod_iter
!= modules
.end());
576 return mod_iter
->second
->dispatch_remote(method
, args
, kwargs
, err
);
579 bool ActivePyModules::get_config(const std::string
&module_name
,
580 const std::string
&key
, std::string
*val
) const
582 const std::string global_key
= "mgr/" + module_name
+ "/" + key
;
584 dout(20) << " key: " << global_key
<< dendl
;
586 std::lock_guard
lock(module_config
.lock
);
588 auto i
= module_config
.config
.find(global_key
);
589 if (i
!= module_config
.config
.end()) {
597 PyObject
*ActivePyModules::get_typed_config(
598 const std::string
&module_name
,
599 const std::string
&key
,
600 const std::string
&prefix
) const
602 without_gil_t no_gil
;
604 std::string final_key
;
607 final_key
= prefix
+ "/" + key
;
608 found
= get_config(module_name
, final_key
, &value
);
612 found
= get_config(module_name
, final_key
, &value
);
615 PyModuleRef module
= py_module_registry
.get_module(module_name
);
616 with_gil_t with_gil
{no_gil
};
618 derr
<< "Module '" << module_name
<< "' is not available" << dendl
;
621 // removing value to hide sensitive data going into mgr logs
622 // leaving this for debugging purposes
623 // dout(10) << __func__ << " " << final_key << " found: " << value << dendl;
624 dout(10) << __func__
<< " " << final_key
<< " found" << dendl
;
625 return module
->get_typed_option_value(key
, value
);
628 dout(10) << " [" << prefix
<< "/]" << key
<< " not found "
631 dout(10) << " " << key
<< " not found " << dendl
;
633 with_gil_t with_gil
{no_gil
};
637 PyObject
*ActivePyModules::get_store_prefix(const std::string
&module_name
,
638 const std::string
&prefix
) const
640 without_gil_t no_gil
;
641 std::lock_guard
l(lock
);
642 std::lock_guard
lock(module_config
.lock
);
644 const std::string base_prefix
= PyModule::mgr_store_prefix
646 const std::string global_prefix
= base_prefix
+ prefix
;
647 dout(4) << __func__
<< " prefix: " << global_prefix
<< dendl
;
649 return with_gil(no_gil
, [&] {
651 for (auto p
= store_cache
.lower_bound(global_prefix
);
652 p
!= store_cache
.end() && p
->first
.find(global_prefix
) == 0; ++p
) {
653 f
.dump_string(p
->first
.c_str() + base_prefix
.size(), p
->second
);
659 void ActivePyModules::set_store(const std::string
&module_name
,
660 const std::string
&key
, const boost::optional
<std::string
>& val
)
662 const std::string global_key
= PyModule::mgr_store_prefix
663 + module_name
+ "/" + key
;
667 std::lock_guard
l(lock
);
669 // NOTE: this isn't strictly necessary since we'll also get an MKVData
670 // update from the mon due to our subscription *before* our command is acked.
672 store_cache
[global_key
] = *val
;
674 store_cache
.erase(global_key
);
677 std::ostringstream cmd_json
;
679 jf
.open_object_section("cmd");
681 jf
.dump_string("prefix", "config-key set");
682 jf
.dump_string("key", global_key
);
683 jf
.dump_string("val", *val
);
685 jf
.dump_string("prefix", "config-key del");
686 jf
.dump_string("key", global_key
);
690 set_cmd
.run(&monc
, cmd_json
.str());
694 if (set_cmd
.r
!= 0) {
695 // config-key set will fail if mgr's auth key has insufficient
696 // permission to set config keys
697 // FIXME: should this somehow raise an exception back into Python land?
698 dout(0) << "`config-key set " << global_key
<< " " << val
<< "` failed: "
699 << cpp_strerror(set_cmd
.r
) << dendl
;
700 dout(0) << "mon returned " << set_cmd
.r
<< ": " << set_cmd
.outs
<< dendl
;
704 void ActivePyModules::set_config(const std::string
&module_name
,
705 const std::string
&key
, const boost::optional
<std::string
>& val
)
707 module_config
.set_config(&monc
, module_name
, key
, val
);
710 std::map
<std::string
, std::string
> ActivePyModules::get_services() const
712 std::map
<std::string
, std::string
> result
;
713 std::lock_guard
l(lock
);
714 for (const auto& [name
, module
] : modules
) {
715 std::string svc_str
= module
->get_uri();
716 if (!svc_str
.empty()) {
717 result
[name
] = svc_str
;
724 void ActivePyModules::update_kv_data(
725 const std::string prefix
,
727 const map
<std::string
, boost::optional
<bufferlist
>, std::less
<>>& data
)
729 std::lock_guard
l(lock
);
730 bool do_config
= false;
732 dout(10) << "full update on " << prefix
<< dendl
;
733 auto p
= store_cache
.lower_bound(prefix
);
734 while (p
!= store_cache
.end() && p
->first
.find(prefix
) == 0) {
735 dout(20) << " rm prior " << p
->first
<< dendl
;
736 p
= store_cache
.erase(p
);
739 dout(10) << "incremental update on " << prefix
<< dendl
;
741 for (auto& i
: data
) {
743 dout(20) << " set " << i
.first
<< " = " << i
.second
->to_str() << dendl
;
744 store_cache
[i
.first
] = i
.second
->to_str();
746 dout(20) << " rm " << i
.first
<< dendl
;
747 store_cache
.erase(i
.first
);
749 if (i
.first
.find("config/") == 0) {
754 _refresh_config_map();
758 void ActivePyModules::_refresh_config_map()
762 for (auto p
= store_cache
.lower_bound("config/");
763 p
!= store_cache
.end() && p
->first
.find("config/") == 0;
765 string key
= p
->first
.substr(7);
766 if (key
.find("mgr/") == 0) {
767 // NOTE: for now, we ignore module options. see also ceph_foreign_option_get().
770 string value
= p
->second
;
773 config_map
.parse_key(key
, &name
, &who
);
775 const Option
*opt
= g_conf().find_option(name
);
777 config_map
.stray_options
.push_back(
778 std::unique_ptr
<Option
>(
779 new Option(name
, Option::TYPE_STR
, Option::LEVEL_UNKNOWN
)));
780 opt
= config_map
.stray_options
.back().get();
784 int r
= opt
->pre_validate(&value
, &err
);
786 dout(10) << __func__
<< " pre-validate failed on '" << name
<< "' = '"
787 << value
<< "' for " << name
<< dendl
;
790 MaskedOption
mopt(opt
);
791 mopt
.raw_value
= value
;
794 !ConfigMap::parse_mask(who
, §ion_name
, &mopt
.mask
)) {
795 derr
<< __func__
<< " invalid mask for key " << key
<< dendl
;
796 } else if (opt
->has_flag(Option::FLAG_NO_MON_UPDATE
)) {
797 dout(10) << __func__
<< " NO_MON_UPDATE option '"
798 << name
<< "' = '" << value
<< "' for " << name
801 Section
*section
= &config_map
.global
;;
802 if (section_name
.size() && section_name
!= "global") {
803 if (section_name
.find('.') != std::string::npos
) {
804 section
= &config_map
.by_id
[section_name
];
806 section
= &config_map
.by_type
[section_name
];
809 section
->options
.insert(make_pair(name
, std::move(mopt
)));
814 PyObject
* ActivePyModules::with_perf_counters(
815 std::function
<void(PerfCounterInstance
& counter_instance
, PerfCounterType
& counter_type
, PyFormatter
& f
)> fct
,
816 const std::string
&svc_name
,
817 const std::string
&svc_id
,
818 const std::string
&path
) const
821 f
.open_array_section(path
);
823 without_gil_t no_gil
;
824 std::lock_guard
l(lock
);
825 auto metadata
= daemon_state
.get(DaemonKey
{svc_name
, svc_id
});
827 std::lock_guard
l2(metadata
->lock
);
828 if (metadata
->perf_counters
.instances
.count(path
)) {
829 auto counter_instance
= metadata
->perf_counters
.instances
.at(path
);
830 auto counter_type
= metadata
->perf_counters
.types
.at(path
);
831 with_gil(no_gil
, [&] {
832 fct(counter_instance
, counter_type
, f
);
835 dout(4) << "Missing counter: '" << path
<< "' ("
836 << svc_name
<< "." << svc_id
<< ")" << dendl
;
837 dout(20) << "Paths are:" << dendl
;
838 for (const auto &i
: metadata
->perf_counters
.instances
) {
839 dout(20) << i
.first
<< dendl
;
843 dout(4) << "No daemon state for " << svc_name
<< "." << svc_id
<< ")"
851 PyObject
* ActivePyModules::get_counter_python(
852 const std::string
&svc_name
,
853 const std::string
&svc_id
,
854 const std::string
&path
)
856 auto extract_counters
= [](
857 PerfCounterInstance
& counter_instance
,
858 PerfCounterType
& counter_type
,
861 if (counter_type
.type
& PERFCOUNTER_LONGRUNAVG
) {
862 const auto &avg_data
= counter_instance
.get_data_avg();
863 for (const auto &datapoint
: avg_data
) {
864 f
.open_array_section("datapoint");
865 f
.dump_float("t", datapoint
.t
);
866 f
.dump_unsigned("s", datapoint
.s
);
867 f
.dump_unsigned("c", datapoint
.c
);
871 const auto &data
= counter_instance
.get_data();
872 for (const auto &datapoint
: data
) {
873 f
.open_array_section("datapoint");
874 f
.dump_float("t", datapoint
.t
);
875 f
.dump_unsigned("v", datapoint
.v
);
880 return with_perf_counters(extract_counters
, svc_name
, svc_id
, path
);
883 PyObject
* ActivePyModules::get_latest_counter_python(
884 const std::string
&svc_name
,
885 const std::string
&svc_id
,
886 const std::string
&path
)
888 auto extract_latest_counters
= [](
889 PerfCounterInstance
& counter_instance
,
890 PerfCounterType
& counter_type
,
893 if (counter_type
.type
& PERFCOUNTER_LONGRUNAVG
) {
894 const auto &datapoint
= counter_instance
.get_latest_data_avg();
895 f
.dump_float("t", datapoint
.t
);
896 f
.dump_unsigned("s", datapoint
.s
);
897 f
.dump_unsigned("c", datapoint
.c
);
899 const auto &datapoint
= counter_instance
.get_latest_data();
900 f
.dump_float("t", datapoint
.t
);
901 f
.dump_unsigned("v", datapoint
.v
);
904 return with_perf_counters(extract_latest_counters
, svc_name
, svc_id
, path
);
907 PyObject
* ActivePyModules::get_perf_schema_python(
908 const std::string
&svc_type
,
909 const std::string
&svc_id
)
911 without_gil_t no_gil
;
912 std::lock_guard
l(lock
);
914 DaemonStateCollection daemons
;
916 if (svc_type
== "") {
917 daemons
= daemon_state
.get_all();
918 } else if (svc_id
.empty()) {
919 daemons
= daemon_state
.get_by_service(svc_type
);
921 auto key
= DaemonKey
{svc_type
, svc_id
};
922 // so that the below can be a loop in all cases
923 auto got
= daemon_state
.get(key
);
924 if (got
!= nullptr) {
929 auto f
= with_gil(no_gil
, [&] {
930 return PyFormatter();
932 if (!daemons
.empty()) {
933 for (auto& [key
, state
] : daemons
) {
934 std::lock_guard
l(state
->lock
);
935 with_gil(no_gil
, [&, key
=ceph::to_string(key
), state
=state
] {
936 f
.open_object_section(key
.c_str());
937 for (auto ctr_inst_iter
: state
->perf_counters
.instances
) {
938 const auto &counter_name
= ctr_inst_iter
.first
;
939 f
.open_object_section(counter_name
.c_str());
940 auto type
= state
->perf_counters
.types
[counter_name
];
941 f
.dump_string("description", type
.description
);
942 if (!type
.nick
.empty()) {
943 f
.dump_string("nick", type
.nick
);
945 f
.dump_unsigned("type", type
.type
);
946 f
.dump_unsigned("priority", type
.priority
);
947 f
.dump_unsigned("units", type
.unit
);
954 dout(4) << __func__
<< ": No daemon state found for "
955 << svc_type
<< "." << svc_id
<< ")" << dendl
;
960 PyObject
*ActivePyModules::get_context()
962 auto l
= without_gil([&] {
963 return std::lock_guard(lock
);
965 // Construct a capsule containing ceph context.
966 // Not incrementing/decrementing ref count on the context because
967 // it's the global one and it has process lifetime.
968 auto capsule
= PyCapsule_New(g_ceph_context
, nullptr, nullptr);
973 * Helper for our wrapped types that take a capsule in their constructor.
975 PyObject
*construct_with_capsule(
976 const std::string
&module_name
,
977 const std::string
&clsname
,
980 // Look up the OSDMap type which we will construct
981 PyObject
*module
= PyImport_ImportModule(module_name
.c_str());
983 derr
<< "Failed to import python module:" << dendl
;
984 derr
<< handle_pyerror() << dendl
;
988 PyObject
*wrapper_type
= PyObject_GetAttrString(
989 module
, (const char*)clsname
.c_str());
991 derr
<< "Failed to get python type:" << dendl
;
992 derr
<< handle_pyerror() << dendl
;
994 ceph_assert(wrapper_type
);
996 // Construct a capsule containing an OSDMap.
997 auto wrapped_capsule
= PyCapsule_New(wrapped
, nullptr, nullptr);
998 ceph_assert(wrapped_capsule
);
1000 // Construct the python OSDMap
1001 auto pArgs
= PyTuple_Pack(1, wrapped_capsule
);
1002 auto wrapper_instance
= PyObject_CallObject(wrapper_type
, pArgs
);
1003 if (wrapper_instance
== nullptr) {
1004 derr
<< "Failed to construct python OSDMap:" << dendl
;
1005 derr
<< handle_pyerror() << dendl
;
1007 ceph_assert(wrapper_instance
!= nullptr);
1009 Py_DECREF(wrapped_capsule
);
1011 Py_DECREF(wrapper_type
);
1014 return wrapper_instance
;
1017 PyObject
*ActivePyModules::get_osdmap()
1019 auto newmap
= without_gil([&] {
1020 OSDMap
*newmap
= new OSDMap
;
1021 cluster_state
.with_osdmap([&](const OSDMap
& o
) {
1022 newmap
->deepish_copy_from(o
);
1026 return construct_with_capsule("mgr_module", "OSDMap", (void*)newmap
);
1029 PyObject
*ActivePyModules::get_foreign_config(
1030 const std::string
& who
,
1031 const std::string
& name
)
1033 dout(10) << "ceph_foreign_option_get " << who
<< " " << name
<< dendl
;
1035 // NOTE: for now this will only work with build-in options, not module options.
1036 const Option
*opt
= g_conf().find_option(name
);
1038 dout(4) << "ceph_foreign_option_get " << name
<< " not found " << dendl
;
1039 PyErr_Format(PyExc_KeyError
, "option not found: %s", name
.c_str());
1043 // If the monitors are not yet running pacific, we cannot rely on our local
1045 if (!have_local_config_map
) {
1046 dout(20) << "mon cluster wasn't pacific when we started: falling back to 'config get'"
1048 without_gil_t no_gil
;
1051 std::lock_guard
l(lock
);
1054 "{\"prefix\": \"config get\","s
+
1055 "\"who\": \""s
+ who
+ "\","s
+
1056 "\"key\": \""s
+ name
+ "\"}");
1059 dout(10) << "ceph_foreign_option_get (mon command) " << who
<< " " << name
<< " = "
1060 << cmd
.outbl
.to_str() << dendl
;
1061 with_gil_t
gil(no_gil
);
1062 return get_python_typed_option_value(opt
->type
, cmd
.outbl
.to_str());
1065 // mimic the behavor of mon/ConfigMonitor's 'config get' command
1067 if (!entity
.from_str(who
) &&
1068 !entity
.from_str(who
+ ".")) {
1069 dout(5) << "unrecognized entity '" << who
<< "'" << dendl
;
1070 PyErr_Format(PyExc_KeyError
, "invalid entity: %s", who
.c_str());
1074 without_gil_t no_gil
;
1077 // FIXME: this is super inefficient, since we generate the entire daemon
1078 // config just to extract one value from it!
1080 std::map
<std::string
,std::string
,std::less
<>> config
;
1081 cluster_state
.with_osdmap([&](const OSDMap
&osdmap
) {
1082 map
<string
,string
> crush_location
;
1083 string device_class
;
1084 if (entity
.is_osd()) {
1085 osdmap
.crush
->get_full_location(who
, &crush_location
);
1086 int id
= atoi(entity
.get_id().c_str());
1087 const char *c
= osdmap
.crush
->get_item_class(id
);
1091 dout(10) << __func__
<< " crush_location " << crush_location
1092 << " class " << device_class
<< dendl
;
1095 std::map
<std::string
,pair
<std::string
,const MaskedOption
*>> src
;
1096 config
= config_map
.generate_entity_map(
1104 // get a single value
1106 auto p
= config
.find(name
);
1107 if (p
!= config
.end()) {
1110 if (!entity
.is_client() &&
1111 !boost::get
<boost::blank
>(&opt
->daemon_value
)) {
1112 value
= Option::to_str(opt
->daemon_value
);
1114 value
= Option::to_str(opt
->value
);
1118 dout(10) << "ceph_foreign_option_get (configmap) " << who
<< " " << name
<< " = "
1121 with_gil_t
with_gil(no_gil
);
1122 return get_python_typed_option_value(opt
->type
, value
);
1125 void ActivePyModules::set_health_checks(const std::string
& module_name
,
1126 health_check_map_t
&& checks
)
1128 bool changed
= false;
1131 auto p
= modules
.find(module_name
);
1132 if (p
!= modules
.end()) {
1133 changed
= p
->second
->set_health_checks(std::move(checks
));
1137 // immediately schedule a report to be sent to the monitors with the new
1138 // health checks that have changed. This is done asynchronusly to avoid
1139 // blocking python land. ActivePyModules::lock needs to be dropped to make
1142 // send_report callers: DaemonServer::lock -> PyModuleRegistery::lock
1143 // active_start: PyModuleRegistry::lock -> ActivePyModules::lock
1145 // if we don't release this->lock before calling schedule_tick a cycle is
1146 // formed with the addition of ActivePyModules::lock -> DaemonServer::lock.
1147 // This is still correct as send_report is run asynchronously under
1148 // DaemonServer::lock.
1150 server
.schedule_tick(0);
1153 int ActivePyModules::handle_command(
1154 const ModuleCommand
& module_command
,
1155 const MgrSession
& session
,
1156 const cmdmap_t
&cmdmap
,
1157 const bufferlist
&inbuf
,
1158 std::stringstream
*ds
,
1159 std::stringstream
*ss
)
1162 auto mod_iter
= modules
.find(module_command
.module_name
);
1163 if (mod_iter
== modules
.end()) {
1164 *ss
<< "Module '" << module_command
.module_name
<< "' is not available";
1170 return mod_iter
->second
->handle_command(module_command
, session
, cmdmap
,
1174 void ActivePyModules::get_health_checks(health_check_map_t
*checks
)
1176 std::lock_guard
l(lock
);
1177 for (auto& [name
, module
] : modules
) {
1178 dout(15) << "getting health checks for " << name
<< dendl
;
1179 module
->get_health_checks(checks
);
1183 void ActivePyModules::update_progress_event(
1184 const std::string
& evid
,
1185 const std::string
& desc
,
1189 std::lock_guard
l(lock
);
1190 auto& pe
= progress_events
[evid
];
1192 pe
.progress
= progress
;
1193 pe
.add_to_ceph_s
= add_to_ceph_s
;
1196 void ActivePyModules::complete_progress_event(const std::string
& evid
)
1198 std::lock_guard
l(lock
);
1199 progress_events
.erase(evid
);
1202 void ActivePyModules::clear_all_progress_events()
1204 std::lock_guard
l(lock
);
1205 progress_events
.clear();
1208 void ActivePyModules::get_progress_events(std::map
<std::string
,ProgressEvent
> *events
)
1210 std::lock_guard
l(lock
);
1211 *events
= progress_events
;
1214 void ActivePyModules::config_notify()
1216 std::lock_guard
l(lock
);
1217 for (auto& [name
, module
] : modules
) {
1218 // Send all python calls down a Finisher to avoid blocking
1219 // C++ code, and avoid any potential lock cycles.
1220 dout(15) << "notify (config) " << name
<< dendl
;
1221 // workaround for https://bugs.llvm.org/show_bug.cgi?id=35984
1222 finisher
.queue(new LambdaContext([module
=module
](int r
){
1223 module
->config_notify();
1228 void ActivePyModules::set_uri(const std::string
& module_name
,
1229 const std::string
&uri
)
1231 std::lock_guard
l(lock
);
1233 dout(4) << " module " << module_name
<< " set URI '" << uri
<< "'" << dendl
;
1235 modules
.at(module_name
)->set_uri(uri
);
1238 void ActivePyModules::set_device_wear_level(const std::string
& devid
,
1242 map
<string
,string
> meta
;
1243 daemon_state
.with_device(
1245 [wear_level
, &meta
] (DeviceState
& dev
) {
1246 dev
.set_wear_level(wear_level
);
1247 meta
= dev
.metadata
;
1251 json_spirit::Object json_object
;
1252 for (auto& i
: meta
) {
1253 json_spirit::Config::add(json_object
, i
.first
, i
.second
);
1256 json
.append(json_spirit::write(json_object
));
1259 "\"prefix\": \"config-key set\", "
1260 "\"key\": \"device/" + devid
+ "\""
1264 set_cmd
.run(&monc
, cmd
, json
);
1268 MetricQueryID
ActivePyModules::add_osd_perf_query(
1269 const OSDPerfMetricQuery
&query
,
1270 const std::optional
<OSDPerfMetricLimit
> &limit
)
1272 return server
.add_osd_perf_query(query
, limit
);
1275 void ActivePyModules::remove_osd_perf_query(MetricQueryID query_id
)
1277 int r
= server
.remove_osd_perf_query(query_id
);
1279 dout(0) << "remove_osd_perf_query for query_id=" << query_id
<< " failed: "
1280 << cpp_strerror(r
) << dendl
;
1284 PyObject
*ActivePyModules::get_osd_perf_counters(MetricQueryID query_id
)
1286 OSDPerfCollector
collector(query_id
);
1287 int r
= server
.get_osd_perf_counters(&collector
);
1289 dout(0) << "get_osd_perf_counters for query_id=" << query_id
<< " failed: "
1290 << cpp_strerror(r
) << dendl
;
1295 const std::map
<OSDPerfMetricKey
, PerformanceCounters
> &counters
= collector
.counters
;
1297 f
.open_array_section("counters");
1298 for (auto &[key
, instance_counters
] : counters
) {
1299 f
.open_object_section("i");
1300 f
.open_array_section("k");
1301 for (auto &sub_key
: key
) {
1302 f
.open_array_section("s");
1303 for (size_t i
= 0; i
< sub_key
.size(); i
++) {
1304 f
.dump_string(stringify(i
).c_str(), sub_key
[i
]);
1306 f
.close_section(); // s
1308 f
.close_section(); // k
1309 f
.open_array_section("c");
1310 for (auto &c
: instance_counters
) {
1311 f
.open_array_section("p");
1312 f
.dump_unsigned("0", c
.first
);
1313 f
.dump_unsigned("1", c
.second
);
1314 f
.close_section(); // p
1316 f
.close_section(); // c
1317 f
.close_section(); // i
1319 f
.close_section(); // counters
1324 MetricQueryID
ActivePyModules::add_mds_perf_query(
1325 const MDSPerfMetricQuery
&query
,
1326 const std::optional
<MDSPerfMetricLimit
> &limit
)
1328 return server
.add_mds_perf_query(query
, limit
);
1331 void ActivePyModules::remove_mds_perf_query(MetricQueryID query_id
)
1333 int r
= server
.remove_mds_perf_query(query_id
);
1335 dout(0) << "remove_mds_perf_query for query_id=" << query_id
<< " failed: "
1336 << cpp_strerror(r
) << dendl
;
1340 PyObject
*ActivePyModules::get_mds_perf_counters(MetricQueryID query_id
)
1342 MDSPerfCollector
collector(query_id
);
1343 int r
= server
.get_mds_perf_counters(&collector
);
1345 dout(0) << "get_mds_perf_counters for query_id=" << query_id
<< " failed: "
1346 << cpp_strerror(r
) << dendl
;
1351 const std::map
<MDSPerfMetricKey
, PerformanceCounters
> &counters
= collector
.counters
;
1353 f
.open_array_section("metrics");
1355 f
.open_array_section("delayed_ranks");
1356 f
.dump_string("ranks", stringify(collector
.delayed_ranks
).c_str());
1357 f
.close_section(); // delayed_ranks
1359 f
.open_array_section("counters");
1360 for (auto &[key
, instance_counters
] : counters
) {
1361 f
.open_object_section("i");
1362 f
.open_array_section("k");
1363 for (auto &sub_key
: key
) {
1364 f
.open_array_section("s");
1365 for (size_t i
= 0; i
< sub_key
.size(); i
++) {
1366 f
.dump_string(stringify(i
).c_str(), sub_key
[i
]);
1368 f
.close_section(); // s
1370 f
.close_section(); // k
1371 f
.open_array_section("c");
1372 for (auto &c
: instance_counters
) {
1373 f
.open_array_section("p");
1374 f
.dump_unsigned("0", c
.first
);
1375 f
.dump_unsigned("1", c
.second
);
1376 f
.close_section(); // p
1378 f
.close_section(); // c
1379 f
.close_section(); // i
1381 f
.close_section(); // counters
1382 f
.close_section(); // metrics
1387 void ActivePyModules::cluster_log(const std::string
&channel
, clog_type prio
,
1388 const std::string
&message
)
1390 std::lock_guard
l(lock
);
1392 auto cl
= monc
.get_log_client()->create_channel(channel
);
1393 map
<string
,string
> log_to_monitors
;
1394 map
<string
,string
> log_to_syslog
;
1395 map
<string
,string
> log_channel
;
1396 map
<string
,string
> log_prio
;
1397 map
<string
,string
> log_to_graylog
;
1398 map
<string
,string
> log_to_graylog_host
;
1399 map
<string
,string
> log_to_graylog_port
;
1402 if (parse_log_client_options(g_ceph_context
, log_to_monitors
, log_to_syslog
,
1403 log_channel
, log_prio
, log_to_graylog
,
1404 log_to_graylog_host
, log_to_graylog_port
,
1406 cl
->update_config(log_to_monitors
, log_to_syslog
,
1407 log_channel
, log_prio
, log_to_graylog
,
1408 log_to_graylog_host
, log_to_graylog_port
,
1410 cl
->do_log(prio
, message
);
1413 void ActivePyModules::register_client(std::string_view name
, std::string addrs
)
1415 std::lock_guard
l(lock
);
1417 entity_addrvec_t addrv
;
1418 addrv
.parse(addrs
.data());
1420 dout(7) << "registering msgr client handle " << addrv
<< dendl
;
1421 py_module_registry
.register_client(name
, std::move(addrv
));
1424 void ActivePyModules::unregister_client(std::string_view name
, std::string addrs
)
1426 std::lock_guard
l(lock
);
1428 entity_addrvec_t addrv
;
1429 addrv
.parse(addrs
.data());
1431 dout(7) << "unregistering msgr client handle " << addrv
<< dendl
;
1432 py_module_registry
.unregister_client(name
, addrv
);