]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
2 | // vim: ts=8 sw=2 smarttab | |
3 | /* | |
4 | * Ceph - scalable distributed file system | |
5 | * | |
6 | * Copyright (C) 2016 John Spray <john.spray@redhat.com> | |
7 | * | |
8 | * This is free software; you can redistribute it and/or | |
9 | * modify it under the terms of the GNU Lesser General Public | |
10 | * License version 2.1, as published by the Free Software | |
11 | * Foundation. See file COPYING. | |
12 | */ | |
13 | ||
14 | #include "DaemonServer.h" | |
f67539c2 | 15 | #include <boost/algorithm/string.hpp> |
b32b8144 | 16 | #include "mgr/Mgr.h" |
7c673cae | 17 | |
b32b8144 | 18 | #include "include/stringify.h" |
31f18b77 | 19 | #include "include/str_list.h" |
7c673cae | 20 | #include "auth/RotatingKeyRing.h" |
7c673cae FG |
21 | #include "json_spirit/json_spirit_writer.h" |
22 | ||
c07f9fc5 | 23 | #include "mgr/mgr_commands.h" |
11fdf7f2 TL |
24 | #include "mgr/DaemonHealthMetricCollector.h" |
25 | #include "mgr/OSDPerfMetricCollector.h" | |
f67539c2 | 26 | #include "mgr/MDSPerfMetricCollector.h" |
c07f9fc5 FG |
27 | #include "mon/MonCommand.h" |
28 | ||
7c673cae | 29 | #include "messages/MMgrOpen.h" |
11fdf7f2 | 30 | #include "messages/MMgrClose.h" |
7c673cae | 31 | #include "messages/MMgrConfigure.h" |
31f18b77 | 32 | #include "messages/MMonMgrReport.h" |
7c673cae FG |
33 | #include "messages/MCommand.h" |
34 | #include "messages/MCommandReply.h" | |
9f95a23c TL |
35 | #include "messages/MMgrCommand.h" |
36 | #include "messages/MMgrCommandReply.h" | |
7c673cae FG |
37 | #include "messages/MPGStats.h" |
38 | #include "messages/MOSDScrub.h" | |
11fdf7f2 | 39 | #include "messages/MOSDScrub2.h" |
c07f9fc5 | 40 | #include "messages/MOSDForceRecovery.h" |
224ce89b | 41 | #include "common/errno.h" |
11fdf7f2 | 42 | #include "common/pick_address.h" |
7c673cae FG |
43 | |
44 | #define dout_context g_ceph_context | |
45 | #define dout_subsys ceph_subsys_mgr | |
46 | #undef dout_prefix | |
47 | #define dout_prefix *_dout << "mgr.server " << __func__ << " " | |
20effc67 | 48 | |
9f95a23c | 49 | using namespace TOPNSPC::common; |
20effc67 TL |
50 | |
51 | using std::list; | |
52 | using std::ostringstream; | |
53 | using std::string; | |
54 | using std::stringstream; | |
55 | using std::vector; | |
56 | using std::unique_ptr; | |
57 | ||
9f95a23c TL |
58 | namespace { |
59 | template <typename Map> | |
60 | bool map_compare(Map const &lhs, Map const &rhs) { | |
61 | return lhs.size() == rhs.size() | |
62 | && std::equal(lhs.begin(), lhs.end(), rhs.begin(), | |
63 | [] (auto a, auto b) { return a.first == b.first && a.second == b.second; }); | |
64 | } | |
65 | } | |
c07f9fc5 | 66 | |
7c673cae FG |
67 | DaemonServer::DaemonServer(MonClient *monc_, |
68 | Finisher &finisher_, | |
69 | DaemonStateIndex &daemon_state_, | |
70 | ClusterState &cluster_state_, | |
3efd9988 | 71 | PyModuleRegistry &py_modules_, |
7c673cae FG |
72 | LogChannelRef clog_, |
73 | LogChannelRef audit_clog_) | |
74 | : Dispatcher(g_ceph_context), | |
75 | client_byte_throttler(new Throttle(g_ceph_context, "mgr_client_bytes", | |
11fdf7f2 | 76 | g_conf().get_val<Option::size_t>("mgr_client_bytes"))), |
7c673cae | 77 | client_msg_throttler(new Throttle(g_ceph_context, "mgr_client_messages", |
11fdf7f2 | 78 | g_conf().get_val<uint64_t>("mgr_client_messages"))), |
7c673cae | 79 | osd_byte_throttler(new Throttle(g_ceph_context, "mgr_osd_bytes", |
11fdf7f2 | 80 | g_conf().get_val<Option::size_t>("mgr_osd_bytes"))), |
7c673cae | 81 | osd_msg_throttler(new Throttle(g_ceph_context, "mgr_osd_messsages", |
11fdf7f2 | 82 | g_conf().get_val<uint64_t>("mgr_osd_messages"))), |
7c673cae | 83 | mds_byte_throttler(new Throttle(g_ceph_context, "mgr_mds_bytes", |
11fdf7f2 | 84 | g_conf().get_val<Option::size_t>("mgr_mds_bytes"))), |
7c673cae | 85 | mds_msg_throttler(new Throttle(g_ceph_context, "mgr_mds_messsages", |
11fdf7f2 | 86 | g_conf().get_val<uint64_t>("mgr_mds_messages"))), |
7c673cae | 87 | mon_byte_throttler(new Throttle(g_ceph_context, "mgr_mon_bytes", |
11fdf7f2 | 88 | g_conf().get_val<Option::size_t>("mgr_mon_bytes"))), |
7c673cae | 89 | mon_msg_throttler(new Throttle(g_ceph_context, "mgr_mon_messsages", |
11fdf7f2 | 90 | g_conf().get_val<uint64_t>("mgr_mon_messages"))), |
7c673cae FG |
91 | msgr(nullptr), |
92 | monc(monc_), | |
93 | finisher(finisher_), | |
94 | daemon_state(daemon_state_), | |
95 | cluster_state(cluster_state_), | |
96 | py_modules(py_modules_), | |
97 | clog(clog_), | |
98 | audit_clog(audit_clog_), | |
11fdf7f2 TL |
99 | pgmap_ready(false), |
100 | timer(g_ceph_context, lock), | |
101 | shutting_down(false), | |
102 | tick_event(nullptr), | |
103 | osd_perf_metric_collector_listener(this), | |
f67539c2 TL |
104 | osd_perf_metric_collector(osd_perf_metric_collector_listener), |
105 | mds_perf_metric_collector_listener(this), | |
106 | mds_perf_metric_collector(mds_perf_metric_collector_listener) | |
3efd9988 | 107 | { |
11fdf7f2 | 108 | g_conf().add_observer(this); |
3efd9988 | 109 | } |
7c673cae FG |
110 | |
111 | DaemonServer::~DaemonServer() { | |
112 | delete msgr; | |
11fdf7f2 | 113 | g_conf().remove_observer(this); |
7c673cae FG |
114 | } |
115 | ||
11fdf7f2 | 116 | int DaemonServer::init(uint64_t gid, entity_addrvec_t client_addrs) |
7c673cae FG |
117 | { |
118 | // Initialize Messenger | |
11fdf7f2 TL |
119 | std::string public_msgr_type = g_conf()->ms_public_type.empty() ? |
120 | g_conf().get_val<std::string>("ms_type") : g_conf()->ms_public_type; | |
7c673cae FG |
121 | msgr = Messenger::create(g_ceph_context, public_msgr_type, |
122 | entity_name_t::MGR(gid), | |
123 | "mgr", | |
f67539c2 | 124 | Messenger::get_pid_nonce()); |
7c673cae FG |
125 | msgr->set_default_policy(Messenger::Policy::stateless_server(0)); |
126 | ||
11fdf7f2 TL |
127 | msgr->set_auth_client(monc); |
128 | ||
7c673cae FG |
129 | // throttle clients |
130 | msgr->set_policy_throttlers(entity_name_t::TYPE_CLIENT, | |
131 | client_byte_throttler.get(), | |
132 | client_msg_throttler.get()); | |
133 | ||
134 | // servers | |
135 | msgr->set_policy_throttlers(entity_name_t::TYPE_OSD, | |
136 | osd_byte_throttler.get(), | |
137 | osd_msg_throttler.get()); | |
138 | msgr->set_policy_throttlers(entity_name_t::TYPE_MDS, | |
139 | mds_byte_throttler.get(), | |
140 | mds_msg_throttler.get()); | |
141 | msgr->set_policy_throttlers(entity_name_t::TYPE_MON, | |
142 | mon_byte_throttler.get(), | |
143 | mon_msg_throttler.get()); | |
144 | ||
11fdf7f2 TL |
145 | entity_addrvec_t addrs; |
146 | int r = pick_addresses(cct, CEPH_PICK_ADDRESS_PUBLIC, &addrs); | |
147 | if (r < 0) { | |
148 | return r; | |
149 | } | |
150 | dout(20) << __func__ << " will bind to " << addrs << dendl; | |
151 | r = msgr->bindv(addrs); | |
7c673cae | 152 | if (r < 0) { |
11fdf7f2 | 153 | derr << "unable to bind mgr to " << addrs << dendl; |
7c673cae FG |
154 | return r; |
155 | } | |
156 | ||
157 | msgr->set_myname(entity_name_t::MGR(gid)); | |
11fdf7f2 | 158 | msgr->set_addr_unknowns(client_addrs); |
7c673cae FG |
159 | |
160 | msgr->start(); | |
161 | msgr->add_dispatcher_tail(this); | |
162 | ||
11fdf7f2 TL |
163 | msgr->set_auth_server(monc); |
164 | monc->set_handle_authentication_dispatcher(this); | |
165 | ||
224ce89b WB |
166 | started_at = ceph_clock_now(); |
167 | ||
11fdf7f2 TL |
168 | std::lock_guard l(lock); |
169 | timer.init(); | |
170 | ||
171 | schedule_tick_locked( | |
172 | g_conf().get_val<std::chrono::seconds>("mgr_tick_period").count()); | |
173 | ||
7c673cae FG |
174 | return 0; |
175 | } | |
176 | ||
11fdf7f2 | 177 | entity_addrvec_t DaemonServer::get_myaddrs() const |
7c673cae | 178 | { |
11fdf7f2 | 179 | return msgr->get_myaddrs(); |
7c673cae FG |
180 | } |
181 | ||
11fdf7f2 TL |
182 | int DaemonServer::ms_handle_authentication(Connection *con) |
183 | { | |
9f95a23c | 184 | auto s = ceph::make_ref<MgrSession>(cct); |
11fdf7f2 | 185 | con->set_priv(s); |
7c673cae | 186 | s->inst.addr = con->get_peer_addr(); |
11fdf7f2 TL |
187 | s->entity_name = con->peer_name; |
188 | dout(10) << __func__ << " new session " << s << " con " << con | |
189 | << " entity " << con->peer_name | |
190 | << " addr " << con->get_peer_addrs() | |
191 | << dendl; | |
192 | ||
193 | AuthCapsInfo &caps_info = con->get_peer_caps_info(); | |
194 | if (caps_info.allow_all) { | |
195 | dout(10) << " session " << s << " " << s->entity_name | |
196 | << " allow_all" << dendl; | |
197 | s->caps.set_allow_all(); | |
9f95a23c | 198 | } else if (caps_info.caps.length() > 0) { |
11fdf7f2 TL |
199 | auto p = caps_info.caps.cbegin(); |
200 | string str; | |
201 | try { | |
202 | decode(str, p); | |
203 | } | |
204 | catch (buffer::error& e) { | |
7c673cae | 205 | dout(10) << " session " << s << " " << s->entity_name |
9f95a23c TL |
206 | << " failed to decode caps" << dendl; |
207 | return -EACCES; | |
208 | } | |
209 | if (!s->caps.parse(str)) { | |
11fdf7f2 TL |
210 | dout(10) << " session " << s << " " << s->entity_name |
211 | << " failed to parse caps '" << str << "'" << dendl; | |
9f95a23c | 212 | return -EACCES; |
7c673cae | 213 | } |
9f95a23c TL |
214 | dout(10) << " session " << s << " " << s->entity_name |
215 | << " has caps " << s->caps << " '" << str << "'" << dendl; | |
11fdf7f2 | 216 | } |
31f18b77 | 217 | |
11fdf7f2 TL |
218 | if (con->get_peer_type() == CEPH_ENTITY_TYPE_OSD) { |
219 | std::lock_guard l(lock); | |
220 | s->osd_id = atoi(s->entity_name.get_id().c_str()); | |
221 | dout(10) << "registering osd." << s->osd_id << " session " | |
222 | << s << " con " << con << dendl; | |
223 | osd_cons[s->osd_id].insert(con); | |
7c673cae FG |
224 | } |
225 | ||
9f95a23c | 226 | return 1; |
7c673cae FG |
227 | } |
228 | ||
31f18b77 FG |
229 | bool DaemonServer::ms_handle_reset(Connection *con) |
230 | { | |
231 | if (con->get_peer_type() == CEPH_ENTITY_TYPE_OSD) { | |
11fdf7f2 TL |
232 | auto priv = con->get_priv(); |
233 | auto session = static_cast<MgrSession*>(priv.get()); | |
31f18b77 FG |
234 | if (!session) { |
235 | return false; | |
236 | } | |
11fdf7f2 | 237 | std::lock_guard l(lock); |
224ce89b | 238 | dout(10) << "unregistering osd." << session->osd_id |
31f18b77 FG |
239 | << " session " << session << " con " << con << dendl; |
240 | osd_cons[session->osd_id].erase(con); | |
3efd9988 FG |
241 | |
242 | auto iter = daemon_connections.find(con); | |
243 | if (iter != daemon_connections.end()) { | |
244 | daemon_connections.erase(iter); | |
245 | } | |
31f18b77 FG |
246 | } |
247 | return false; | |
248 | } | |
249 | ||
7c673cae FG |
250 | bool DaemonServer::ms_handle_refused(Connection *con) |
251 | { | |
252 | // do nothing for now | |
253 | return false; | |
254 | } | |
255 | ||
9f95a23c | 256 | bool DaemonServer::ms_dispatch2(const ref_t<Message>& m) |
7c673cae | 257 | { |
3efd9988 FG |
258 | // Note that we do *not* take ::lock here, in order to avoid |
259 | // serializing all message handling. It's up to each handler | |
260 | // to take whatever locks it needs. | |
7c673cae FG |
261 | switch (m->get_type()) { |
262 | case MSG_PGSTATS: | |
9f95a23c | 263 | cluster_state.ingest_pgstats(ref_cast<MPGStats>(m)); |
224ce89b | 264 | maybe_ready(m->get_source().num()); |
7c673cae FG |
265 | return true; |
266 | case MSG_MGR_REPORT: | |
9f95a23c | 267 | return handle_report(ref_cast<MMgrReport>(m)); |
7c673cae | 268 | case MSG_MGR_OPEN: |
9f95a23c | 269 | return handle_open(ref_cast<MMgrOpen>(m)); |
11fdf7f2 | 270 | case MSG_MGR_CLOSE: |
9f95a23c | 271 | return handle_close(ref_cast<MMgrClose>(m)); |
7c673cae | 272 | case MSG_COMMAND: |
9f95a23c TL |
273 | return handle_command(ref_cast<MCommand>(m)); |
274 | case MSG_MGR_COMMAND: | |
275 | return handle_command(ref_cast<MMgrCommand>(m)); | |
7c673cae FG |
276 | default: |
277 | dout(1) << "Unhandled message type " << m->get_type() << dendl; | |
278 | return false; | |
279 | }; | |
280 | } | |
281 | ||
9f95a23c TL |
282 | void DaemonServer::dump_pg_ready(ceph::Formatter *f) |
283 | { | |
284 | f->dump_bool("pg_ready", pgmap_ready.load()); | |
285 | } | |
286 | ||
224ce89b WB |
287 | void DaemonServer::maybe_ready(int32_t osd_id) |
288 | { | |
3efd9988 FG |
289 | if (pgmap_ready.load()) { |
290 | // Fast path: we don't need to take lock because pgmap_ready | |
291 | // is already set | |
292 | } else { | |
11fdf7f2 | 293 | std::lock_guard l(lock); |
224ce89b | 294 | |
3efd9988 FG |
295 | if (reported_osds.find(osd_id) == reported_osds.end()) { |
296 | dout(4) << "initial report from osd " << osd_id << dendl; | |
297 | reported_osds.insert(osd_id); | |
298 | std::set<int32_t> up_osds; | |
224ce89b | 299 | |
3efd9988 FG |
300 | cluster_state.with_osdmap([&](const OSDMap& osdmap) { |
301 | osdmap.get_up_osds(up_osds); | |
302 | }); | |
224ce89b | 303 | |
3efd9988 FG |
304 | std::set<int32_t> unreported_osds; |
305 | std::set_difference(up_osds.begin(), up_osds.end(), | |
306 | reported_osds.begin(), reported_osds.end(), | |
307 | std::inserter(unreported_osds, unreported_osds.begin())); | |
308 | ||
309 | if (unreported_osds.size() == 0) { | |
310 | dout(4) << "all osds have reported, sending PG state to mon" << dendl; | |
311 | pgmap_ready = true; | |
312 | reported_osds.clear(); | |
313 | // Avoid waiting for next tick | |
314 | send_report(); | |
315 | } else { | |
316 | dout(4) << "still waiting for " << unreported_osds.size() << " osds" | |
317 | " to report in before PGMap is ready" << dendl; | |
318 | } | |
224ce89b WB |
319 | } |
320 | } | |
321 | } | |
322 | ||
11fdf7f2 TL |
323 | void DaemonServer::tick() |
324 | { | |
325 | dout(10) << dendl; | |
326 | send_report(); | |
327 | adjust_pgs(); | |
328 | ||
329 | schedule_tick_locked( | |
330 | g_conf().get_val<std::chrono::seconds>("mgr_tick_period").count()); | |
331 | } | |
332 | ||
333 | // Currently modules do not set health checks in response to events delivered to | |
334 | // all modules (e.g. notify) so we do not risk a thundering hurd situation here. | |
335 | // if this pattern emerges in the future, this scheduler could be modified to | |
336 | // fire after all modules have had a chance to set their health checks. | |
337 | void DaemonServer::schedule_tick_locked(double delay_sec) | |
338 | { | |
9f95a23c | 339 | ceph_assert(ceph_mutex_is_locked_by_me(lock)); |
11fdf7f2 TL |
340 | |
341 | if (tick_event) { | |
342 | timer.cancel_event(tick_event); | |
343 | tick_event = nullptr; | |
344 | } | |
345 | ||
346 | // on shutdown start rejecting explicit requests to send reports that may | |
347 | // originate from python land which may still be running. | |
348 | if (shutting_down) | |
349 | return; | |
350 | ||
351 | tick_event = timer.add_event_after(delay_sec, | |
9f95a23c | 352 | new LambdaContext([this](int r) { |
11fdf7f2 TL |
353 | tick(); |
354 | })); | |
355 | } | |
356 | ||
357 | void DaemonServer::schedule_tick(double delay_sec) | |
358 | { | |
359 | std::lock_guard l(lock); | |
360 | schedule_tick_locked(delay_sec); | |
361 | } | |
362 | ||
363 | void DaemonServer::handle_osd_perf_metric_query_updated() | |
364 | { | |
365 | dout(10) << dendl; | |
366 | ||
367 | // Send a fresh MMgrConfigure to all clients, so that they can follow | |
368 | // the new policy for transmitting stats | |
9f95a23c | 369 | finisher.queue(new LambdaContext([this](int r) { |
11fdf7f2 TL |
370 | std::lock_guard l(lock); |
371 | for (auto &c : daemon_connections) { | |
372 | if (c->peer_is_osd()) { | |
373 | _send_configure(c); | |
374 | } | |
375 | } | |
376 | })); | |
377 | } | |
378 | ||
f67539c2 TL |
379 | void DaemonServer::handle_mds_perf_metric_query_updated() |
380 | { | |
381 | dout(10) << dendl; | |
382 | ||
383 | // Send a fresh MMgrConfigure to all clients, so that they can follow | |
384 | // the new policy for transmitting stats | |
385 | finisher.queue(new LambdaContext([this](int r) { | |
386 | std::lock_guard l(lock); | |
387 | for (auto &c : daemon_connections) { | |
388 | if (c->peer_is_mds()) { | |
389 | _send_configure(c); | |
390 | } | |
391 | } | |
392 | })); | |
393 | } | |
394 | ||
7c673cae FG |
395 | void DaemonServer::shutdown() |
396 | { | |
224ce89b | 397 | dout(10) << "begin" << dendl; |
7c673cae FG |
398 | msgr->shutdown(); |
399 | msgr->wait(); | |
eafe8130 | 400 | cluster_state.shutdown(); |
224ce89b | 401 | dout(10) << "done" << dendl; |
11fdf7f2 TL |
402 | |
403 | std::lock_guard l(lock); | |
404 | shutting_down = true; | |
405 | timer.shutdown(); | |
7c673cae FG |
406 | } |
407 | ||
11fdf7f2 TL |
408 | static DaemonKey key_from_service( |
409 | const std::string& service_name, | |
410 | int peer_type, | |
411 | const std::string& daemon_name) | |
412 | { | |
413 | if (!service_name.empty()) { | |
9f95a23c | 414 | return DaemonKey{service_name, daemon_name}; |
11fdf7f2 | 415 | } else { |
9f95a23c | 416 | return DaemonKey{ceph_entity_type_name(peer_type), daemon_name}; |
11fdf7f2 TL |
417 | } |
418 | } | |
7c673cae | 419 | |
1911f103 TL |
420 | void DaemonServer::fetch_missing_metadata(const DaemonKey& key, |
421 | const entity_addr_t& addr) | |
422 | { | |
423 | if (!daemon_state.is_updating(key) && | |
424 | (key.type == "osd" || key.type == "mds" || key.type == "mon")) { | |
425 | std::ostringstream oss; | |
426 | auto c = new MetadataUpdate(daemon_state, key); | |
427 | if (key.type == "osd") { | |
428 | oss << "{\"prefix\": \"osd metadata\", \"id\": " | |
429 | << key.name<< "}"; | |
430 | } else if (key.type == "mds") { | |
431 | c->set_default("addr", stringify(addr)); | |
432 | oss << "{\"prefix\": \"mds metadata\", \"who\": \"" | |
433 | << key.name << "\"}"; | |
434 | } else if (key.type == "mon") { | |
435 | oss << "{\"prefix\": \"mon metadata\", \"id\": \"" | |
436 | << key.name << "\"}"; | |
f67539c2 TL |
437 | } else { |
438 | ceph_abort(); | |
1911f103 TL |
439 | } |
440 | monc->start_mon_command({oss.str()}, {}, &c->outbl, &c->outs, c); | |
441 | } | |
442 | } | |
443 | ||
9f95a23c | 444 | bool DaemonServer::handle_open(const ref_t<MMgrOpen>& m) |
7c673cae | 445 | { |
1911f103 | 446 | std::unique_lock l(lock); |
3efd9988 | 447 | |
11fdf7f2 TL |
448 | DaemonKey key = key_from_service(m->service_name, |
449 | m->get_connection()->get_peer_type(), | |
450 | m->daemon_name); | |
7c673cae | 451 | |
92f5a8d4 | 452 | auto con = m->get_connection(); |
9f95a23c | 453 | dout(10) << "from " << key << " " << con->get_peer_addr() << dendl; |
7c673cae | 454 | |
92f5a8d4 | 455 | _send_configure(con); |
7c673cae | 456 | |
c07f9fc5 | 457 | DaemonStatePtr daemon; |
7c673cae | 458 | if (daemon_state.exists(key)) { |
92f5a8d4 | 459 | dout(20) << "updating existing DaemonState for " << key << dendl; |
c07f9fc5 FG |
460 | daemon = daemon_state.get(key); |
461 | } | |
92f5a8d4 TL |
462 | if (!daemon) { |
463 | if (m->service_daemon) { | |
464 | dout(4) << "constructing new DaemonState for " << key << dendl; | |
465 | daemon = std::make_shared<DaemonState>(daemon_state.types); | |
466 | daemon->key = key; | |
467 | daemon->service_daemon = true; | |
468 | daemon_state.insert(daemon); | |
469 | } else { | |
470 | /* A normal Ceph daemon has connected but we are or should be waiting on | |
471 | * metadata for it. Close the session so that it tries to reconnect. | |
472 | */ | |
473 | dout(2) << "ignoring open from " << key << " " << con->get_peer_addr() | |
474 | << "; not ready for session (expect reconnect)" << dendl; | |
475 | con->mark_down(); | |
1911f103 TL |
476 | l.unlock(); |
477 | fetch_missing_metadata(key, m->get_source_addr()); | |
92f5a8d4 TL |
478 | return true; |
479 | } | |
11fdf7f2 | 480 | } |
c07f9fc5 | 481 | if (daemon) { |
92f5a8d4 TL |
482 | if (m->service_daemon) { |
483 | // update the metadata through the daemon state index to | |
484 | // ensure it's kept up-to-date | |
485 | daemon_state.update_metadata(daemon, m->daemon_metadata); | |
486 | } | |
487 | ||
11fdf7f2 | 488 | std::lock_guard l(daemon->lock); |
3efd9988 | 489 | daemon->perf_counters.clear(); |
7c673cae | 490 | |
9f95a23c | 491 | daemon->service_daemon = m->service_daemon; |
11fdf7f2 | 492 | if (m->service_daemon) { |
11fdf7f2 TL |
493 | daemon->service_status = m->daemon_status; |
494 | ||
495 | utime_t now = ceph_clock_now(); | |
9f95a23c TL |
496 | auto [d, added] = pending_service_map.get_daemon(m->service_name, |
497 | m->daemon_name); | |
498 | if (added || d->gid != (uint64_t)m->get_source().num()) { | |
11fdf7f2 TL |
499 | dout(10) << "registering " << key << " in pending_service_map" << dendl; |
500 | d->gid = m->get_source().num(); | |
501 | d->addr = m->get_source_addr(); | |
502 | d->start_epoch = pending_service_map.epoch; | |
503 | d->start_stamp = now; | |
504 | d->metadata = m->daemon_metadata; | |
505 | pending_service_map_dirty = pending_service_map.epoch; | |
224ce89b | 506 | } |
224ce89b | 507 | } |
11fdf7f2 TL |
508 | |
509 | auto p = m->config_bl.cbegin(); | |
510 | if (p != m->config_bl.end()) { | |
511 | decode(daemon->config, p); | |
512 | decode(daemon->ignored_mon_config, p); | |
513 | dout(20) << " got config " << daemon->config | |
514 | << " ignored " << daemon->ignored_mon_config << dendl; | |
224ce89b | 515 | } |
11fdf7f2 TL |
516 | daemon->config_defaults_bl = m->config_defaults_bl; |
517 | daemon->config_defaults.clear(); | |
518 | dout(20) << " got config_defaults_bl " << daemon->config_defaults_bl.length() | |
519 | << " bytes" << dendl; | |
224ce89b WB |
520 | } |
521 | ||
92f5a8d4 | 522 | if (con->get_peer_type() != entity_name_t::TYPE_CLIENT && |
3efd9988 FG |
523 | m->service_name.empty()) |
524 | { | |
525 | // Store in set of the daemon/service connections, i.e. those | |
526 | // connections that require an update in the event of stats | |
527 | // configuration changes. | |
92f5a8d4 | 528 | daemon_connections.insert(con); |
3efd9988 FG |
529 | } |
530 | ||
7c673cae FG |
531 | return true; |
532 | } | |
533 | ||
9f95a23c | 534 | bool DaemonServer::handle_close(const ref_t<MMgrClose>& m) |
11fdf7f2 TL |
535 | { |
536 | std::lock_guard l(lock); | |
537 | ||
538 | DaemonKey key = key_from_service(m->service_name, | |
539 | m->get_connection()->get_peer_type(), | |
540 | m->daemon_name); | |
541 | dout(4) << "from " << m->get_connection() << " " << key << dendl; | |
542 | ||
543 | if (daemon_state.exists(key)) { | |
544 | DaemonStatePtr daemon = daemon_state.get(key); | |
545 | daemon_state.rm(key); | |
546 | { | |
547 | std::lock_guard l(daemon->lock); | |
548 | if (daemon->service_daemon) { | |
549 | pending_service_map.rm_daemon(m->service_name, m->daemon_name); | |
550 | pending_service_map_dirty = pending_service_map.epoch; | |
551 | } | |
552 | } | |
553 | } | |
554 | ||
555 | // send same message back as a reply | |
9f95a23c | 556 | m->get_connection()->send_message2(m); |
11fdf7f2 TL |
557 | return true; |
558 | } | |
559 | ||
f91f0fd5 TL |
560 | void DaemonServer::update_task_status( |
561 | DaemonKey key, | |
562 | const std::map<std::string,std::string>& task_status) | |
563 | { | |
9f95a23c TL |
564 | dout(10) << "got task status from " << key << dendl; |
565 | ||
f91f0fd5 TL |
566 | [[maybe_unused]] auto [daemon, added] = |
567 | pending_service_map.get_daemon(key.type, key.name); | |
568 | if (daemon->task_status != task_status) { | |
569 | daemon->task_status = task_status; | |
9f95a23c TL |
570 | pending_service_map_dirty = pending_service_map.epoch; |
571 | } | |
572 | } | |
573 | ||
574 | bool DaemonServer::handle_report(const ref_t<MMgrReport>& m) | |
7c673cae | 575 | { |
224ce89b WB |
576 | DaemonKey key; |
577 | if (!m->service_name.empty()) { | |
9f95a23c | 578 | key.type = m->service_name; |
224ce89b | 579 | } else { |
9f95a23c | 580 | key.type = ceph_entity_type_name(m->get_connection()->get_peer_type()); |
224ce89b | 581 | } |
9f95a23c | 582 | key.name = m->daemon_name; |
7c673cae | 583 | |
9f95a23c | 584 | dout(10) << "from " << m->get_connection() << " " << key << dendl; |
31f18b77 | 585 | |
224ce89b WB |
586 | if (m->get_connection()->get_peer_type() == entity_name_t::TYPE_CLIENT && |
587 | m->service_name.empty()) { | |
588 | // Clients should not be sending us stats unless they are declaring | |
589 | // themselves to be a daemon for some service. | |
9f95a23c TL |
590 | dout(10) << "rejecting report from non-daemon client " << m->daemon_name |
591 | << dendl; | |
592 | clog->warn() << "rejecting report from non-daemon client " << m->daemon_name | |
593 | << " at " << m->get_connection()->get_peer_addrs(); | |
b32b8144 | 594 | m->get_connection()->mark_down(); |
31f18b77 FG |
595 | return true; |
596 | } | |
7c673cae | 597 | |
9f95a23c TL |
598 | |
599 | { | |
600 | std::unique_lock locker(lock); | |
601 | ||
602 | DaemonStatePtr daemon; | |
603 | // Look up the DaemonState | |
604 | if (daemon_state.exists(key)) { | |
605 | dout(20) << "updating existing DaemonState for " << key << dendl; | |
606 | daemon = daemon_state.get(key); | |
607 | } else { | |
608 | locker.unlock(); | |
609 | ||
610 | // we don't know the hostname at this stage, reject MMgrReport here. | |
611 | dout(5) << "rejecting report from " << key << ", since we do not have its metadata now." | |
612 | << dendl; | |
613 | // issue metadata request in background | |
1911f103 | 614 | fetch_missing_metadata(key, m->get_source_addr()); |
b32b8144 | 615 | |
9f95a23c TL |
616 | locker.lock(); |
617 | ||
b32b8144 | 618 | // kill session |
11fdf7f2 TL |
619 | auto priv = m->get_connection()->get_priv(); |
620 | auto session = static_cast<MgrSession*>(priv.get()); | |
b32b8144 | 621 | if (!session) { |
9f95a23c | 622 | return false; |
b32b8144 FG |
623 | } |
624 | m->get_connection()->mark_down(); | |
b32b8144 FG |
625 | |
626 | dout(10) << "unregistering osd." << session->osd_id | |
9f95a23c | 627 | << " session " << session << " con " << m->get_connection() << dendl; |
b32b8144 FG |
628 | |
629 | if (osd_cons.find(session->osd_id) != osd_cons.end()) { | |
9f95a23c TL |
630 | osd_cons[session->osd_id].erase(m->get_connection()); |
631 | } | |
b32b8144 FG |
632 | |
633 | auto iter = daemon_connections.find(m->get_connection()); | |
634 | if (iter != daemon_connections.end()) { | |
9f95a23c | 635 | daemon_connections.erase(iter); |
b32b8144 | 636 | } |
3efd9988 | 637 | |
9f95a23c | 638 | return false; |
11fdf7f2 TL |
639 | } |
640 | ||
9f95a23c TL |
641 | // Update the DaemonState |
642 | ceph_assert(daemon != nullptr); | |
643 | { | |
644 | std::lock_guard l(daemon->lock); | |
645 | auto &daemon_counters = daemon->perf_counters; | |
646 | daemon_counters.update(*m.get()); | |
647 | ||
648 | auto p = m->config_bl.cbegin(); | |
649 | if (p != m->config_bl.end()) { | |
650 | decode(daemon->config, p); | |
651 | decode(daemon->ignored_mon_config, p); | |
652 | dout(20) << " got config " << daemon->config | |
653 | << " ignored " << daemon->ignored_mon_config << dendl; | |
654 | } | |
655 | ||
3efd9988 | 656 | utime_t now = ceph_clock_now(); |
9f95a23c TL |
657 | if (daemon->service_daemon) { |
658 | if (m->daemon_status) { | |
659 | daemon->service_status_stamp = now; | |
660 | daemon->service_status = *m->daemon_status; | |
661 | } | |
662 | daemon->last_service_beacon = now; | |
663 | } else if (m->daemon_status) { | |
664 | derr << "got status from non-daemon " << key << dendl; | |
665 | } | |
666 | // update task status | |
667 | if (m->task_status) { | |
f91f0fd5 | 668 | update_task_status(key, *m->task_status); |
9f95a23c TL |
669 | daemon->last_service_beacon = now; |
670 | } | |
671 | if (m->get_connection()->peer_is_osd() || m->get_connection()->peer_is_mon()) { | |
672 | // only OSD and MON send health_checks to me now | |
673 | daemon->daemon_health_metrics = std::move(m->daemon_health_metrics); | |
674 | dout(10) << "daemon_health_metrics " << daemon->daemon_health_metrics | |
675 | << dendl; | |
3efd9988 | 676 | } |
b32b8144 | 677 | } |
c07f9fc5 | 678 | } |
3efd9988 | 679 | |
c07f9fc5 | 680 | // if there are any schema updates, notify the python modules |
20effc67 | 681 | /* no users currently |
c07f9fc5 | 682 | if (!m->declare_types.empty() || !m->undeclare_types.empty()) { |
9f95a23c | 683 | py_modules.notify_all("perf_schema_update", ceph::to_string(key)); |
c07f9fc5 | 684 | } |
20effc67 | 685 | */ |
224ce89b | 686 | |
11fdf7f2 TL |
687 | if (m->get_connection()->peer_is_osd()) { |
688 | osd_perf_metric_collector.process_reports(m->osd_perf_metric_reports); | |
689 | } | |
690 | ||
9f95a23c TL |
691 | if (m->metric_report_message) { |
692 | const MetricReportMessage &message = *m->metric_report_message; | |
693 | boost::apply_visitor(HandlePayloadVisitor(this), message.payload); | |
694 | } | |
695 | ||
7c673cae FG |
696 | return true; |
697 | } | |
698 | ||
7c673cae FG |
699 | |
700 | void DaemonServer::_generate_command_map( | |
11fdf7f2 | 701 | cmdmap_t& cmdmap, |
7c673cae FG |
702 | map<string,string> ¶m_str_map) |
703 | { | |
11fdf7f2 | 704 | for (auto p = cmdmap.begin(); |
7c673cae FG |
705 | p != cmdmap.end(); ++p) { |
706 | if (p->first == "prefix") | |
707 | continue; | |
708 | if (p->first == "caps") { | |
709 | vector<string> cv; | |
9f95a23c | 710 | if (cmd_getval(cmdmap, "caps", cv) && |
7c673cae FG |
711 | cv.size() % 2 == 0) { |
712 | for (unsigned i = 0; i < cv.size(); i += 2) { | |
713 | string k = string("caps_") + cv[i]; | |
714 | param_str_map[k] = cv[i + 1]; | |
715 | } | |
716 | continue; | |
717 | } | |
718 | } | |
719 | param_str_map[p->first] = cmd_vartype_stringify(p->second); | |
720 | } | |
721 | } | |
722 | ||
c07f9fc5 | 723 | const MonCommand *DaemonServer::_get_mgrcommand( |
7c673cae | 724 | const string &cmd_prefix, |
c07f9fc5 | 725 | const std::vector<MonCommand> &cmds) |
7c673cae | 726 | { |
c07f9fc5 FG |
727 | const MonCommand *this_cmd = nullptr; |
728 | for (const auto &cmd : cmds) { | |
729 | if (cmd.cmdstring.compare(0, cmd_prefix.size(), cmd_prefix) == 0) { | |
730 | this_cmd = &cmd; | |
7c673cae FG |
731 | break; |
732 | } | |
733 | } | |
734 | return this_cmd; | |
735 | } | |
736 | ||
737 | bool DaemonServer::_allowed_command( | |
738 | MgrSession *s, | |
92f5a8d4 | 739 | const string &service, |
7c673cae FG |
740 | const string &module, |
741 | const string &prefix, | |
11fdf7f2 | 742 | const cmdmap_t& cmdmap, |
7c673cae | 743 | const map<string,string>& param_str_map, |
c07f9fc5 | 744 | const MonCommand *this_cmd) { |
7c673cae FG |
745 | |
746 | if (s->entity_name.is_mon()) { | |
747 | // mon is all-powerful. even when it is forwarding commands on behalf of | |
748 | // old clients; we expect the mon is validating commands before proxying! | |
749 | return true; | |
750 | } | |
751 | ||
752 | bool cmd_r = this_cmd->requires_perm('r'); | |
753 | bool cmd_w = this_cmd->requires_perm('w'); | |
754 | bool cmd_x = this_cmd->requires_perm('x'); | |
755 | ||
756 | bool capable = s->caps.is_capable( | |
757 | g_ceph_context, | |
7c673cae | 758 | s->entity_name, |
92f5a8d4 | 759 | service, module, prefix, param_str_map, |
11fdf7f2 TL |
760 | cmd_r, cmd_w, cmd_x, |
761 | s->get_peer_addr()); | |
7c673cae FG |
762 | |
763 | dout(10) << " " << s->entity_name << " " | |
764 | << (capable ? "" : "not ") << "capable" << dendl; | |
765 | return capable; | |
766 | } | |
767 | ||
11fdf7f2 TL |
768 | /** |
769 | * The working data for processing an MCommand. This lives in | |
770 | * a class to enable passing it into other threads for processing | |
771 | * outside of the thread/locks that called handle_command. | |
772 | */ | |
773 | class CommandContext { | |
774 | public: | |
9f95a23c TL |
775 | ceph::ref_t<MCommand> m_tell; |
776 | ceph::ref_t<MMgrCommand> m_mgr; | |
777 | const std::vector<std::string>& cmd; ///< ref into m_tell or m_mgr | |
778 | const bufferlist& data; ///< ref into m_tell or m_mgr | |
11fdf7f2 TL |
779 | bufferlist odata; |
780 | cmdmap_t cmdmap; | |
781 | ||
9f95a23c TL |
782 | explicit CommandContext(ceph::ref_t<MCommand> m) |
783 | : m_tell{std::move(m)}, | |
784 | cmd(m_tell->cmd), | |
785 | data(m_tell->get_data()) { | |
11fdf7f2 | 786 | } |
9f95a23c TL |
787 | explicit CommandContext(ceph::ref_t<MMgrCommand> m) |
788 | : m_mgr{std::move(m)}, | |
789 | cmd(m_mgr->cmd), | |
790 | data(m_mgr->get_data()) { | |
11fdf7f2 | 791 | } |
7c673cae | 792 | |
11fdf7f2 TL |
793 | void reply(int r, const std::stringstream &ss) { |
794 | reply(r, ss.str()); | |
795 | } | |
7c673cae | 796 | |
11fdf7f2 TL |
797 | void reply(int r, const std::string &rs) { |
798 | // Let the connection drop as soon as we've sent our response | |
9f95a23c TL |
799 | ConnectionRef con = m_tell ? m_tell->get_connection() |
800 | : m_mgr->get_connection(); | |
11fdf7f2 TL |
801 | if (con) { |
802 | con->mark_disposable(); | |
7c673cae FG |
803 | } |
804 | ||
11fdf7f2 | 805 | if (r == 0) { |
9f95a23c | 806 | dout(20) << "success" << dendl; |
11fdf7f2 TL |
807 | } else { |
808 | derr << __func__ << " " << cpp_strerror(r) << " " << rs << dendl; | |
7c673cae | 809 | } |
11fdf7f2 | 810 | if (con) { |
9f95a23c TL |
811 | if (m_tell) { |
812 | MCommandReply *reply = new MCommandReply(r, rs); | |
813 | reply->set_tid(m_tell->get_tid()); | |
814 | reply->set_data(odata); | |
815 | con->send_message(reply); | |
816 | } else { | |
817 | MMgrCommandReply *reply = new MMgrCommandReply(r, rs); | |
818 | reply->set_tid(m_mgr->get_tid()); | |
819 | reply->set_data(odata); | |
820 | con->send_message(reply); | |
821 | } | |
7c673cae | 822 | } |
11fdf7f2 TL |
823 | } |
824 | }; | |
7c673cae | 825 | |
11fdf7f2 TL |
826 | /** |
827 | * A context for receiving a bufferlist/error string from a background | |
828 | * function and then calling back to a CommandContext when it's done | |
829 | */ | |
830 | class ReplyOnFinish : public Context { | |
831 | std::shared_ptr<CommandContext> cmdctx; | |
7c673cae | 832 | |
11fdf7f2 TL |
833 | public: |
834 | bufferlist from_mon; | |
835 | string outs; | |
7c673cae | 836 | |
11fdf7f2 TL |
837 | explicit ReplyOnFinish(const std::shared_ptr<CommandContext> &cmdctx_) |
838 | : cmdctx(cmdctx_) | |
7c673cae | 839 | {} |
11fdf7f2 TL |
840 | void finish(int r) override { |
841 | cmdctx->odata.claim_append(from_mon); | |
842 | cmdctx->reply(r, outs); | |
843 | } | |
844 | }; | |
7c673cae | 845 | |
9f95a23c | 846 | bool DaemonServer::handle_command(const ref_t<MCommand>& m) |
11fdf7f2 TL |
847 | { |
848 | std::lock_guard l(lock); | |
1911f103 TL |
849 | auto cmdctx = std::make_shared<CommandContext>(m); |
850 | try { | |
851 | return _handle_command(cmdctx); | |
852 | } catch (const bad_cmd_get& e) { | |
853 | cmdctx->reply(-EINVAL, e.what()); | |
9f95a23c | 854 | return true; |
9f95a23c TL |
855 | } |
856 | } | |
857 | ||
858 | bool DaemonServer::handle_command(const ref_t<MMgrCommand>& m) | |
859 | { | |
860 | std::lock_guard l(lock); | |
861 | auto cmdctx = std::make_shared<CommandContext>(m); | |
11fdf7f2 | 862 | try { |
9f95a23c | 863 | return _handle_command(cmdctx); |
11fdf7f2 TL |
864 | } catch (const bad_cmd_get& e) { |
865 | cmdctx->reply(-EINVAL, e.what()); | |
866 | return true; | |
867 | } | |
868 | } | |
7c673cae | 869 | |
92f5a8d4 TL |
870 | void DaemonServer::log_access_denied( |
871 | std::shared_ptr<CommandContext>& cmdctx, | |
872 | MgrSession* session, std::stringstream& ss) { | |
873 | dout(1) << " access denied" << dendl; | |
874 | audit_clog->info() << "from='" << session->inst << "' " | |
875 | << "entity='" << session->entity_name << "' " | |
9f95a23c | 876 | << "cmd=" << cmdctx->cmd << ": access denied"; |
92f5a8d4 | 877 | ss << "access denied: does your client key have mgr caps? " |
f67539c2 | 878 | "See http://docs.ceph.com/en/latest/mgr/administrator/" |
92f5a8d4 TL |
879 | "#client-authentication"; |
880 | } | |
881 | ||
f67539c2 TL |
882 | void DaemonServer::_check_offlines_pgs( |
883 | const set<int>& osds, | |
884 | const OSDMap& osdmap, | |
885 | const PGMap& pgmap, | |
886 | offline_pg_report *report) | |
887 | { | |
888 | // reset output | |
889 | *report = offline_pg_report(); | |
890 | report->osds = osds; | |
891 | ||
892 | for (const auto& q : pgmap.pg_stat) { | |
893 | set<int32_t> pg_acting; // net acting sets (with no missing if degraded) | |
894 | bool found = false; | |
895 | if (q.second.state == 0) { | |
896 | report->unknown.insert(q.first); | |
897 | continue; | |
898 | } | |
899 | if (q.second.state & PG_STATE_DEGRADED) { | |
900 | for (auto& anm : q.second.avail_no_missing) { | |
901 | if (osds.count(anm.osd)) { | |
902 | found = true; | |
903 | continue; | |
904 | } | |
905 | if (anm.osd != CRUSH_ITEM_NONE) { | |
906 | pg_acting.insert(anm.osd); | |
907 | } | |
908 | } | |
909 | } else { | |
910 | for (auto& a : q.second.acting) { | |
911 | if (osds.count(a)) { | |
912 | found = true; | |
913 | continue; | |
914 | } | |
915 | if (a != CRUSH_ITEM_NONE) { | |
916 | pg_acting.insert(a); | |
917 | } | |
918 | } | |
919 | } | |
920 | if (!found) { | |
921 | continue; | |
922 | } | |
923 | const pg_pool_t *pi = osdmap.get_pg_pool(q.first.pool()); | |
924 | bool dangerous = false; | |
925 | if (!pi) { | |
926 | report->bad_no_pool.insert(q.first); // pool is creating or deleting | |
927 | dangerous = true; | |
928 | } | |
929 | if (!(q.second.state & PG_STATE_ACTIVE)) { | |
930 | report->bad_already_inactive.insert(q.first); | |
931 | dangerous = true; | |
932 | } | |
933 | if (pg_acting.size() < pi->min_size) { | |
934 | report->bad_become_inactive.insert(q.first); | |
935 | dangerous = true; | |
936 | } | |
937 | if (dangerous) { | |
938 | report->not_ok.insert(q.first); | |
939 | } else { | |
940 | report->ok.insert(q.first); | |
941 | if (q.second.state & PG_STATE_DEGRADED) { | |
942 | report->ok_become_more_degraded.insert(q.first); | |
943 | } else { | |
944 | report->ok_become_degraded.insert(q.first); | |
945 | } | |
946 | } | |
947 | } | |
948 | dout(20) << osds << " -> " << report->ok.size() << " ok, " | |
949 | << report->not_ok.size() << " not ok, " | |
950 | << report->unknown.size() << " unknown" | |
951 | << dendl; | |
952 | } | |
953 | ||
954 | void DaemonServer::_maximize_ok_to_stop_set( | |
955 | const set<int>& orig_osds, | |
956 | unsigned max, | |
957 | const OSDMap& osdmap, | |
958 | const PGMap& pgmap, | |
959 | offline_pg_report *out_report) | |
960 | { | |
961 | dout(20) << "orig_osds " << orig_osds << " max " << max << dendl; | |
962 | _check_offlines_pgs(orig_osds, osdmap, pgmap, out_report); | |
963 | if (!out_report->ok_to_stop()) { | |
964 | return; | |
965 | } | |
966 | if (orig_osds.size() >= max) { | |
967 | // already at max | |
968 | return; | |
969 | } | |
970 | ||
971 | // semi-arbitrarily start with the first osd in the set | |
972 | offline_pg_report report; | |
973 | set<int> osds = orig_osds; | |
974 | int parent = *osds.begin(); | |
975 | set<int> children; | |
976 | ||
977 | while (true) { | |
978 | // identify the next parent | |
979 | int r = osdmap.crush->get_immediate_parent_id(parent, &parent); | |
980 | if (r < 0) { | |
981 | return; // just go with what we have so far! | |
982 | } | |
983 | ||
984 | // get candidate additions that are beneath this point in the tree | |
985 | children.clear(); | |
986 | r = osdmap.crush->get_all_children(parent, &children); | |
987 | if (r < 0) { | |
988 | return; // just go with what we have so far! | |
989 | } | |
990 | dout(20) << " parent " << parent << " children " << children << dendl; | |
991 | ||
992 | // try adding in more osds | |
993 | int failed = 0; // how many children we failed to add to our set | |
994 | for (auto o : children) { | |
995 | if (o >= 0 && osdmap.is_up(o) && osds.count(o) == 0) { | |
996 | osds.insert(o); | |
997 | _check_offlines_pgs(osds, osdmap, pgmap, &report); | |
998 | if (!report.ok_to_stop()) { | |
999 | osds.erase(o); | |
1000 | ++failed; | |
1001 | continue; | |
1002 | } | |
1003 | *out_report = report; | |
1004 | if (osds.size() == max) { | |
1005 | dout(20) << " hit max" << dendl; | |
1006 | return; // yay, we hit the max | |
1007 | } | |
1008 | } | |
1009 | } | |
1010 | ||
1011 | if (failed) { | |
1012 | // we hit some failures; go with what we have | |
1013 | dout(20) << " hit some peer failures" << dendl; | |
1014 | return; | |
1015 | } | |
1016 | } | |
1017 | } | |
1018 | ||
11fdf7f2 | 1019 | bool DaemonServer::_handle_command( |
11fdf7f2 TL |
1020 | std::shared_ptr<CommandContext>& cmdctx) |
1021 | { | |
9f95a23c | 1022 | MessageRef m; |
1911f103 | 1023 | bool admin_socket_cmd = false; |
9f95a23c TL |
1024 | if (cmdctx->m_tell) { |
1025 | m = cmdctx->m_tell; | |
1911f103 TL |
1026 | // a blank fsid in MCommand signals a legacy client sending a "mon-mgr" CLI |
1027 | // command. | |
1028 | admin_socket_cmd = (cmdctx->m_tell->fsid != uuid_d()); | |
9f95a23c TL |
1029 | } else { |
1030 | m = cmdctx->m_mgr; | |
1031 | } | |
11fdf7f2 TL |
1032 | auto priv = m->get_connection()->get_priv(); |
1033 | auto session = static_cast<MgrSession*>(priv.get()); | |
7c673cae FG |
1034 | if (!session) { |
1035 | return true; | |
1036 | } | |
9f95a23c | 1037 | if (session->inst.name == entity_name_t()) { |
7c673cae | 1038 | session->inst.name = m->get_source(); |
9f95a23c | 1039 | } |
7c673cae | 1040 | |
7c673cae | 1041 | map<string,string> param_str_map; |
11fdf7f2 TL |
1042 | std::stringstream ss; |
1043 | int r = 0; | |
7c673cae | 1044 | |
9f95a23c | 1045 | if (!cmdmap_from_json(cmdctx->cmd, &(cmdctx->cmdmap), ss)) { |
7c673cae FG |
1046 | cmdctx->reply(-EINVAL, ss); |
1047 | return true; | |
1048 | } | |
1049 | ||
11fdf7f2 | 1050 | string prefix; |
9f95a23c | 1051 | cmd_getval(cmdctx->cmdmap, "prefix", prefix); |
f67539c2 | 1052 | dout(10) << "decoded-size=" << cmdctx->cmdmap.size() << " prefix=" << prefix << dendl; |
7c673cae | 1053 | |
f67539c2 TL |
1054 | boost::scoped_ptr<Formatter> f; |
1055 | { | |
1056 | std::string format; | |
1057 | if (boost::algorithm::ends_with(prefix, "_json")) { | |
1058 | format = "json"; | |
1059 | } else { | |
20effc67 | 1060 | format = cmd_getval_or<string>(cmdctx->cmdmap, "format", "plain"); |
f67539c2 TL |
1061 | } |
1062 | f.reset(Formatter::create(format)); | |
1063 | } | |
7c673cae | 1064 | |
1911f103 TL |
1065 | // this is just for mgr commands - admin socket commands will fall |
1066 | // through and use the admin socket version of | |
1067 | // get_command_descriptions | |
1068 | if (prefix == "get_command_descriptions" && !admin_socket_cmd) { | |
7c673cae | 1069 | dout(10) << "reading commands from python modules" << dendl; |
c07f9fc5 | 1070 | const auto py_commands = py_modules.get_commands(); |
7c673cae | 1071 | |
c07f9fc5 | 1072 | int cmdnum = 0; |
7c673cae FG |
1073 | JSONFormatter f; |
1074 | f.open_object_section("command_descriptions"); | |
c07f9fc5 | 1075 | |
11fdf7f2 | 1076 | auto dump_cmd = [&cmdnum, &f, m](const MonCommand &mc){ |
7c673cae | 1077 | ostringstream secname; |
20effc67 | 1078 | secname << "cmd" << std::setfill('0') << std::setw(3) << cmdnum; |
11fdf7f2 TL |
1079 | dump_cmddesc_to_json(&f, m->get_connection()->get_features(), |
1080 | secname.str(), mc.cmdstring, mc.helpstring, | |
1081 | mc.module, mc.req_perms, 0); | |
7c673cae | 1082 | cmdnum++; |
c07f9fc5 FG |
1083 | }; |
1084 | ||
1085 | for (const auto &pyc : py_commands) { | |
1086 | dump_cmd(pyc); | |
7c673cae | 1087 | } |
7c673cae | 1088 | |
c07f9fc5 FG |
1089 | for (const auto &mgr_cmd : mgr_commands) { |
1090 | dump_cmd(mgr_cmd); | |
7c673cae | 1091 | } |
c07f9fc5 | 1092 | |
7c673cae FG |
1093 | f.close_section(); // command_descriptions |
1094 | f.flush(cmdctx->odata); | |
1095 | cmdctx->reply(0, ss); | |
1096 | return true; | |
1097 | } | |
1098 | ||
1099 | // lookup command | |
c07f9fc5 | 1100 | const MonCommand *mgr_cmd = _get_mgrcommand(prefix, mgr_commands); |
7c673cae | 1101 | _generate_command_map(cmdctx->cmdmap, param_str_map); |
11fdf7f2 | 1102 | |
92f5a8d4 TL |
1103 | bool is_allowed = false; |
1104 | ModuleCommand py_command; | |
1911f103 TL |
1105 | if (admin_socket_cmd) { |
1106 | // admin socket commands require all capabilities | |
1107 | is_allowed = session->caps.is_allow_all(); | |
1108 | } else if (!mgr_cmd) { | |
92f5a8d4 TL |
1109 | // Resolve the command to the name of the module that will |
1110 | // handle it (if the command exists) | |
1111 | auto py_commands = py_modules.get_py_commands(); | |
1112 | for (const auto &pyc : py_commands) { | |
1113 | auto pyc_prefix = cmddesc_get_prefix(pyc.cmdstring); | |
1114 | if (pyc_prefix == prefix) { | |
1115 | py_command = pyc; | |
1116 | break; | |
1117 | } | |
1118 | } | |
1119 | ||
1120 | MonCommand pyc = {"", "", "py", py_command.perm}; | |
1121 | is_allowed = _allowed_command(session, "py", py_command.module_name, | |
1122 | prefix, cmdctx->cmdmap, param_str_map, | |
1123 | &pyc); | |
7c673cae FG |
1124 | } else { |
1125 | // validate user's permissions for requested command | |
92f5a8d4 | 1126 | is_allowed = _allowed_command(session, mgr_cmd->module, "", |
11fdf7f2 TL |
1127 | prefix, cmdctx->cmdmap, param_str_map, mgr_cmd); |
1128 | } | |
92f5a8d4 | 1129 | |
11fdf7f2 | 1130 | if (!is_allowed) { |
92f5a8d4 | 1131 | log_access_denied(cmdctx, session, ss); |
7c673cae FG |
1132 | cmdctx->reply(-EACCES, ss); |
1133 | return true; | |
7c673cae FG |
1134 | } |
1135 | ||
1136 | audit_clog->debug() | |
1137 | << "from='" << session->inst << "' " | |
1138 | << "entity='" << session->entity_name << "' " | |
9f95a23c | 1139 | << "cmd=" << cmdctx->cmd << ": dispatch"; |
7c673cae | 1140 | |
1911f103 TL |
1141 | if (admin_socket_cmd) { |
1142 | cct->get_admin_socket()->queue_tell_command(cmdctx->m_tell); | |
1143 | return true; | |
1144 | } | |
1145 | ||
224ce89b WB |
1146 | // ---------------- |
1147 | // service map commands | |
1148 | if (prefix == "service dump") { | |
1149 | if (!f) | |
1150 | f.reset(Formatter::create("json-pretty")); | |
1151 | cluster_state.with_servicemap([&](const ServiceMap &service_map) { | |
1152 | f->dump_object("service_map", service_map); | |
1153 | }); | |
1154 | f->flush(cmdctx->odata); | |
1155 | cmdctx->reply(0, ss); | |
1156 | return true; | |
1157 | } | |
1158 | if (prefix == "service status") { | |
1159 | if (!f) | |
1160 | f.reset(Formatter::create("json-pretty")); | |
1161 | // only include state from services that are in the persisted service map | |
1162 | f->open_object_section("service_status"); | |
9f95a23c TL |
1163 | for (auto& [type, service] : pending_service_map.services) { |
1164 | if (ServiceMap::is_normal_ceph_entity(type)) { | |
1165 | continue; | |
1166 | } | |
1167 | ||
1168 | f->open_object_section(type.c_str()); | |
1169 | for (auto& q : service.daemons) { | |
224ce89b | 1170 | f->open_object_section(q.first.c_str()); |
9f95a23c | 1171 | DaemonKey key{type, q.first}; |
11fdf7f2 | 1172 | ceph_assert(daemon_state.exists(key)); |
224ce89b | 1173 | auto daemon = daemon_state.get(key); |
11fdf7f2 | 1174 | std::lock_guard l(daemon->lock); |
224ce89b WB |
1175 | f->dump_stream("status_stamp") << daemon->service_status_stamp; |
1176 | f->dump_stream("last_beacon") << daemon->last_service_beacon; | |
1177 | f->open_object_section("status"); | |
1178 | for (auto& r : daemon->service_status) { | |
1179 | f->dump_string(r.first.c_str(), r.second); | |
1180 | } | |
1181 | f->close_section(); | |
1182 | f->close_section(); | |
1183 | } | |
1184 | f->close_section(); | |
1185 | } | |
1186 | f->close_section(); | |
1187 | f->flush(cmdctx->odata); | |
1188 | cmdctx->reply(0, ss); | |
1189 | return true; | |
1190 | } | |
1191 | ||
3efd9988 FG |
1192 | if (prefix == "config set") { |
1193 | std::string key; | |
1194 | std::string val; | |
9f95a23c TL |
1195 | cmd_getval(cmdctx->cmdmap, "key", key); |
1196 | cmd_getval(cmdctx->cmdmap, "value", val); | |
11fdf7f2 | 1197 | r = cct->_conf.set_val(key, val, &ss); |
3efd9988 | 1198 | if (r == 0) { |
11fdf7f2 | 1199 | cct->_conf.apply_changes(nullptr); |
3efd9988 FG |
1200 | } |
1201 | cmdctx->reply(0, ss); | |
1202 | return true; | |
1203 | } | |
1204 | ||
7c673cae FG |
1205 | // ----------- |
1206 | // PG commands | |
1207 | ||
1208 | if (prefix == "pg scrub" || | |
1209 | prefix == "pg repair" || | |
1210 | prefix == "pg deep-scrub") { | |
1211 | string scrubop = prefix.substr(3, string::npos); | |
1212 | pg_t pgid; | |
11fdf7f2 | 1213 | spg_t spgid; |
7c673cae | 1214 | string pgidstr; |
9f95a23c | 1215 | cmd_getval(cmdctx->cmdmap, "pgid", pgidstr); |
7c673cae FG |
1216 | if (!pgid.parse(pgidstr.c_str())) { |
1217 | ss << "invalid pgid '" << pgidstr << "'"; | |
1218 | cmdctx->reply(-EINVAL, ss); | |
1219 | return true; | |
1220 | } | |
1221 | bool pg_exists = false; | |
1222 | cluster_state.with_osdmap([&](const OSDMap& osdmap) { | |
1223 | pg_exists = osdmap.pg_exists(pgid); | |
1224 | }); | |
1225 | if (!pg_exists) { | |
11fdf7f2 | 1226 | ss << "pg " << pgid << " does not exist"; |
7c673cae FG |
1227 | cmdctx->reply(-ENOENT, ss); |
1228 | return true; | |
1229 | } | |
1230 | int acting_primary = -1; | |
11fdf7f2 | 1231 | epoch_t epoch; |
7c673cae | 1232 | cluster_state.with_osdmap([&](const OSDMap& osdmap) { |
11fdf7f2 TL |
1233 | epoch = osdmap.get_epoch(); |
1234 | osdmap.get_primary_shard(pgid, &acting_primary, &spgid); | |
7c673cae FG |
1235 | }); |
1236 | if (acting_primary == -1) { | |
1237 | ss << "pg " << pgid << " has no primary osd"; | |
1238 | cmdctx->reply(-EAGAIN, ss); | |
1239 | return true; | |
1240 | } | |
31f18b77 FG |
1241 | auto p = osd_cons.find(acting_primary); |
1242 | if (p == osd_cons.end()) { | |
1243 | ss << "pg " << pgid << " primary osd." << acting_primary | |
1244 | << " is not currently connected"; | |
1245 | cmdctx->reply(-EAGAIN, ss); | |
f64942e4 | 1246 | return true; |
31f18b77 | 1247 | } |
31f18b77 | 1248 | for (auto& con : p->second) { |
20effc67 TL |
1249 | assert(HAVE_FEATURE(con->get_features(), SERVER_OCTOPUS)); |
1250 | vector<spg_t> pgs = { spgid }; | |
1251 | con->send_message(new MOSDScrub2(monc->get_fsid(), | |
1252 | epoch, | |
1253 | pgs, | |
1254 | scrubop == "repair", | |
1255 | scrubop == "deep-scrub")); | |
31f18b77 | 1256 | } |
11fdf7f2 | 1257 | ss << "instructing pg " << spgid << " on osd." << acting_primary |
31f18b77 FG |
1258 | << " to " << scrubop; |
1259 | cmdctx->reply(0, ss); | |
1260 | return true; | |
1261 | } else if (prefix == "osd scrub" || | |
1262 | prefix == "osd deep-scrub" || | |
1263 | prefix == "osd repair") { | |
1264 | string whostr; | |
9f95a23c | 1265 | cmd_getval(cmdctx->cmdmap, "who", whostr); |
31f18b77 FG |
1266 | vector<string> pvec; |
1267 | get_str_vec(prefix, pvec); | |
1268 | ||
1269 | set<int> osds; | |
224ce89b | 1270 | if (whostr == "*" || whostr == "all" || whostr == "any") { |
31f18b77 FG |
1271 | cluster_state.with_osdmap([&](const OSDMap& osdmap) { |
1272 | for (int i = 0; i < osdmap.get_max_osd(); i++) | |
1273 | if (osdmap.is_up(i)) { | |
1274 | osds.insert(i); | |
1275 | } | |
1276 | }); | |
1277 | } else { | |
1278 | long osd = parse_osd_id(whostr.c_str(), &ss); | |
1279 | if (osd < 0) { | |
1280 | ss << "invalid osd '" << whostr << "'"; | |
1281 | cmdctx->reply(-EINVAL, ss); | |
1282 | return true; | |
1283 | } | |
1284 | cluster_state.with_osdmap([&](const OSDMap& osdmap) { | |
1285 | if (osdmap.is_up(osd)) { | |
1286 | osds.insert(osd); | |
1287 | } | |
1288 | }); | |
1289 | if (osds.empty()) { | |
1290 | ss << "osd." << osd << " is not up"; | |
1291 | cmdctx->reply(-EAGAIN, ss); | |
1292 | return true; | |
1293 | } | |
1294 | } | |
1295 | set<int> sent_osds, failed_osds; | |
1296 | for (auto osd : osds) { | |
11fdf7f2 TL |
1297 | vector<spg_t> spgs; |
1298 | epoch_t epoch; | |
1299 | cluster_state.with_osdmap_and_pgmap([&](const OSDMap& osdmap, const PGMap& pgmap) { | |
1300 | epoch = osdmap.get_epoch(); | |
1301 | auto p = pgmap.pg_by_osd.find(osd); | |
1302 | if (p != pgmap.pg_by_osd.end()) { | |
1303 | for (auto pgid : p->second) { | |
1304 | int primary; | |
1305 | spg_t spg; | |
1306 | osdmap.get_primary_shard(pgid, &primary, &spg); | |
1307 | if (primary == osd) { | |
1308 | spgs.push_back(spg); | |
1309 | } | |
1310 | } | |
1311 | } | |
1312 | }); | |
31f18b77 FG |
1313 | auto p = osd_cons.find(osd); |
1314 | if (p == osd_cons.end()) { | |
1315 | failed_osds.insert(osd); | |
1316 | } else { | |
1317 | sent_osds.insert(osd); | |
1318 | for (auto& con : p->second) { | |
11fdf7f2 TL |
1319 | if (HAVE_FEATURE(con->get_features(), SERVER_MIMIC)) { |
1320 | con->send_message(new MOSDScrub2(monc->get_fsid(), | |
1321 | epoch, | |
1322 | spgs, | |
1323 | pvec.back() == "repair", | |
1324 | pvec.back() == "deep-scrub")); | |
1325 | } else { | |
1326 | con->send_message(new MOSDScrub(monc->get_fsid(), | |
1327 | pvec.back() == "repair", | |
1328 | pvec.back() == "deep-scrub")); | |
1329 | } | |
31f18b77 FG |
1330 | } |
1331 | } | |
1332 | } | |
1333 | if (failed_osds.size() == osds.size()) { | |
1334 | ss << "failed to instruct osd(s) " << osds << " to " << pvec.back() | |
1335 | << " (not connected)"; | |
1336 | r = -EAGAIN; | |
1337 | } else { | |
1338 | ss << "instructed osd(s) " << sent_osds << " to " << pvec.back(); | |
1339 | if (!failed_osds.empty()) { | |
1340 | ss << "; osd(s) " << failed_osds << " were not connected"; | |
1341 | } | |
1342 | r = 0; | |
1343 | } | |
7c673cae FG |
1344 | cmdctx->reply(0, ss); |
1345 | return true; | |
11fdf7f2 TL |
1346 | } else if (prefix == "osd pool scrub" || |
1347 | prefix == "osd pool deep-scrub" || | |
1348 | prefix == "osd pool repair") { | |
1349 | vector<string> pool_names; | |
9f95a23c | 1350 | cmd_getval(cmdctx->cmdmap, "who", pool_names); |
11fdf7f2 TL |
1351 | if (pool_names.empty()) { |
1352 | ss << "must specify one or more pool names"; | |
1353 | cmdctx->reply(-EINVAL, ss); | |
1354 | return true; | |
1355 | } | |
1356 | epoch_t epoch; | |
1357 | map<int32_t, vector<pg_t>> pgs_by_primary; // legacy | |
1358 | map<int32_t, vector<spg_t>> spgs_by_primary; | |
1359 | cluster_state.with_osdmap([&](const OSDMap& osdmap) { | |
1360 | epoch = osdmap.get_epoch(); | |
1361 | for (auto& pool_name : pool_names) { | |
1362 | auto pool_id = osdmap.lookup_pg_pool_name(pool_name); | |
1363 | if (pool_id < 0) { | |
1364 | ss << "unrecognized pool '" << pool_name << "'"; | |
1365 | r = -ENOENT; | |
1366 | return; | |
1367 | } | |
1368 | auto pool_pg_num = osdmap.get_pg_num(pool_id); | |
1369 | for (int i = 0; i < pool_pg_num; i++) { | |
1370 | pg_t pg(i, pool_id); | |
1371 | int primary; | |
1372 | spg_t spg; | |
1373 | auto got = osdmap.get_primary_shard(pg, &primary, &spg); | |
1374 | if (!got) | |
1375 | continue; | |
1376 | pgs_by_primary[primary].push_back(pg); | |
1377 | spgs_by_primary[primary].push_back(spg); | |
1378 | } | |
1379 | } | |
1380 | }); | |
1381 | if (r < 0) { | |
1382 | cmdctx->reply(r, ss); | |
1383 | return true; | |
1384 | } | |
1385 | for (auto& it : spgs_by_primary) { | |
1386 | auto primary = it.first; | |
1387 | auto p = osd_cons.find(primary); | |
1388 | if (p == osd_cons.end()) { | |
1389 | ss << "osd." << primary << " is not currently connected"; | |
1390 | cmdctx->reply(-EAGAIN, ss); | |
1391 | return true; | |
1392 | } | |
1393 | for (auto& con : p->second) { | |
1394 | if (HAVE_FEATURE(con->get_features(), SERVER_MIMIC)) { | |
1395 | con->send_message(new MOSDScrub2(monc->get_fsid(), | |
1396 | epoch, | |
1397 | it.second, | |
1398 | prefix == "osd pool repair", | |
1399 | prefix == "osd pool deep-scrub")); | |
1400 | } else { | |
1401 | // legacy | |
1402 | auto q = pgs_by_primary.find(primary); | |
1403 | ceph_assert(q != pgs_by_primary.end()); | |
1404 | con->send_message(new MOSDScrub(monc->get_fsid(), | |
1405 | q->second, | |
1406 | prefix == "osd pool repair", | |
1407 | prefix == "osd pool deep-scrub")); | |
1408 | } | |
1409 | } | |
1410 | } | |
1411 | cmdctx->reply(0, ""); | |
1412 | return true; | |
7c673cae FG |
1413 | } else if (prefix == "osd reweight-by-pg" || |
1414 | prefix == "osd reweight-by-utilization" || | |
1415 | prefix == "osd test-reweight-by-pg" || | |
1416 | prefix == "osd test-reweight-by-utilization") { | |
1417 | bool by_pg = | |
1418 | prefix == "osd reweight-by-pg" || prefix == "osd test-reweight-by-pg"; | |
1419 | bool dry_run = | |
1420 | prefix == "osd test-reweight-by-pg" || | |
1421 | prefix == "osd test-reweight-by-utilization"; | |
20effc67 | 1422 | int64_t oload = cmd_getval_or<int64_t>(cmdctx->cmdmap, "oload", 120); |
7c673cae FG |
1423 | set<int64_t> pools; |
1424 | vector<string> poolnames; | |
9f95a23c | 1425 | cmd_getval(cmdctx->cmdmap, "pools", poolnames); |
7c673cae FG |
1426 | cluster_state.with_osdmap([&](const OSDMap& osdmap) { |
1427 | for (const auto& poolname : poolnames) { | |
1428 | int64_t pool = osdmap.lookup_pg_pool_name(poolname); | |
1429 | if (pool < 0) { | |
1430 | ss << "pool '" << poolname << "' does not exist"; | |
1431 | r = -ENOENT; | |
1432 | } | |
1433 | pools.insert(pool); | |
1434 | } | |
1435 | }); | |
1436 | if (r) { | |
1437 | cmdctx->reply(r, ss); | |
1438 | return true; | |
1439 | } | |
11fdf7f2 TL |
1440 | |
1441 | double max_change = g_conf().get_val<double>("mon_reweight_max_change"); | |
9f95a23c | 1442 | cmd_getval(cmdctx->cmdmap, "max_change", max_change); |
7c673cae FG |
1443 | if (max_change <= 0.0) { |
1444 | ss << "max_change " << max_change << " must be positive"; | |
1445 | cmdctx->reply(-EINVAL, ss); | |
1446 | return true; | |
1447 | } | |
11fdf7f2 | 1448 | int64_t max_osds = g_conf().get_val<int64_t>("mon_reweight_max_osds"); |
9f95a23c | 1449 | cmd_getval(cmdctx->cmdmap, "max_osds", max_osds); |
7c673cae FG |
1450 | if (max_osds <= 0) { |
1451 | ss << "max_osds " << max_osds << " must be positive"; | |
1452 | cmdctx->reply(-EINVAL, ss); | |
1453 | return true; | |
1454 | } | |
11fdf7f2 | 1455 | bool no_increasing = false; |
20effc67 | 1456 | cmd_getval_compat_cephbool(cmdctx->cmdmap, "no_increasing", no_increasing); |
7c673cae FG |
1457 | string out_str; |
1458 | mempool::osdmap::map<int32_t, uint32_t> new_weights; | |
11fdf7f2 TL |
1459 | r = cluster_state.with_osdmap_and_pgmap([&](const OSDMap &osdmap, const PGMap& pgmap) { |
1460 | return reweight::by_utilization(osdmap, pgmap, | |
1461 | oload, | |
1462 | max_change, | |
1463 | max_osds, | |
1464 | by_pg, | |
1465 | pools.empty() ? NULL : &pools, | |
1466 | no_increasing, | |
1467 | &new_weights, | |
1468 | &ss, &out_str, f.get()); | |
7c673cae FG |
1469 | }); |
1470 | if (r >= 0) { | |
1471 | dout(10) << "reweight::by_utilization: finished with " << out_str << dendl; | |
1472 | } | |
1473 | if (f) { | |
1474 | f->flush(cmdctx->odata); | |
1475 | } else { | |
1476 | cmdctx->odata.append(out_str); | |
1477 | } | |
1478 | if (r < 0) { | |
1479 | ss << "FAILED reweight-by-pg"; | |
1480 | cmdctx->reply(r, ss); | |
1481 | return true; | |
1482 | } else if (r == 0 || dry_run) { | |
1483 | ss << "no change"; | |
1484 | cmdctx->reply(r, ss); | |
1485 | return true; | |
1486 | } else { | |
1487 | json_spirit::Object json_object; | |
1488 | for (const auto& osd_weight : new_weights) { | |
1489 | json_spirit::Config::add(json_object, | |
1490 | std::to_string(osd_weight.first), | |
1491 | std::to_string(osd_weight.second)); | |
1492 | } | |
1493 | string s = json_spirit::write(json_object); | |
1494 | std::replace(begin(s), end(s), '\"', '\''); | |
1495 | const string cmd = | |
1496 | "{" | |
1497 | "\"prefix\": \"osd reweightn\", " | |
1498 | "\"weights\": \"" + s + "\"" | |
1499 | "}"; | |
1500 | auto on_finish = new ReplyOnFinish(cmdctx); | |
1501 | monc->start_mon_command({cmd}, {}, | |
1502 | &on_finish->from_mon, &on_finish->outs, on_finish); | |
1503 | return true; | |
1504 | } | |
31f18b77 | 1505 | } else if (prefix == "osd df") { |
9f95a23c TL |
1506 | string method, filter; |
1507 | cmd_getval(cmdctx->cmdmap, "output_method", method); | |
1508 | cmd_getval(cmdctx->cmdmap, "filter", filter); | |
11fdf7f2 TL |
1509 | stringstream rs; |
1510 | r = cluster_state.with_osdmap_and_pgmap([&](const OSDMap& osdmap, const PGMap& pgmap) { | |
11fdf7f2 | 1511 | // sanity check filter(s) |
9f95a23c TL |
1512 | if (!filter.empty() && |
1513 | osdmap.lookup_pg_pool_name(filter) < 0 && | |
1514 | !osdmap.crush->class_exists(filter) && | |
1515 | !osdmap.crush->name_exists(filter)) { | |
1516 | rs << "'" << filter << "' not a pool, crush node or device class name"; | |
1517 | return -EINVAL; | |
11fdf7f2 TL |
1518 | } |
1519 | print_osd_utilization(osdmap, pgmap, ss, | |
9f95a23c | 1520 | f.get(), method == "tree", filter); |
11fdf7f2 TL |
1521 | cmdctx->odata.append(ss); |
1522 | return 0; | |
31f18b77 | 1523 | }); |
11fdf7f2 | 1524 | cmdctx->reply(r, rs); |
31f18b77 | 1525 | return true; |
11fdf7f2 TL |
1526 | } else if (prefix == "osd pool stats") { |
1527 | string pool_name; | |
9f95a23c | 1528 | cmd_getval(cmdctx->cmdmap, "pool_name", pool_name); |
11fdf7f2 TL |
1529 | int64_t poolid = -ENOENT; |
1530 | bool one_pool = false; | |
1531 | r = cluster_state.with_osdmap_and_pgmap([&](const OSDMap& osdmap, const PGMap& pg_map) { | |
1532 | if (!pool_name.empty()) { | |
1533 | poolid = osdmap.lookup_pg_pool_name(pool_name); | |
1534 | if (poolid < 0) { | |
1535 | ceph_assert(poolid == -ENOENT); | |
1536 | ss << "unrecognized pool '" << pool_name << "'"; | |
1537 | return -ENOENT; | |
1538 | } | |
1539 | one_pool = true; | |
1540 | } | |
1541 | stringstream rs; | |
1542 | if (f) | |
1543 | f->open_array_section("pool_stats"); | |
1544 | else { | |
1545 | if (osdmap.get_pools().empty()) { | |
1546 | ss << "there are no pools!"; | |
1547 | goto stats_out; | |
1548 | } | |
1549 | } | |
1550 | for (auto &p : osdmap.get_pools()) { | |
1551 | if (!one_pool) { | |
1552 | poolid = p.first; | |
1553 | } | |
1554 | pg_map.dump_pool_stats_and_io_rate(poolid, osdmap, f.get(), &rs); | |
1555 | if (one_pool) { | |
1556 | break; | |
1557 | } | |
1558 | } | |
1559 | stats_out: | |
1560 | if (f) { | |
1561 | f->close_section(); | |
1562 | f->flush(cmdctx->odata); | |
1563 | } else { | |
1564 | cmdctx->odata.append(rs.str()); | |
1565 | } | |
1566 | return 0; | |
35e4c445 | 1567 | }); |
11fdf7f2 TL |
1568 | if (r != -EOPNOTSUPP) { |
1569 | cmdctx->reply(r, ss); | |
1570 | return true; | |
1571 | } | |
1572 | } else if (prefix == "osd safe-to-destroy" || | |
1573 | prefix == "osd destroy" || | |
1574 | prefix == "osd purge") { | |
1575 | set<int> osds; | |
1576 | int r = 0; | |
1577 | if (prefix == "osd safe-to-destroy") { | |
1578 | vector<string> ids; | |
9f95a23c | 1579 | cmd_getval(cmdctx->cmdmap, "ids", ids); |
11fdf7f2 TL |
1580 | cluster_state.with_osdmap([&](const OSDMap& osdmap) { |
1581 | r = osdmap.parse_osd_id_list(ids, &osds, &ss); | |
1582 | }); | |
1583 | if (!r && osds.empty()) { | |
1584 | ss << "must specify one or more OSDs"; | |
1585 | r = -EINVAL; | |
1586 | } | |
1587 | } else { | |
1588 | int64_t id; | |
9f95a23c | 1589 | if (!cmd_getval(cmdctx->cmdmap, "id", id)) { |
11fdf7f2 TL |
1590 | r = -EINVAL; |
1591 | ss << "must specify OSD id"; | |
1592 | } else { | |
1593 | osds.insert(id); | |
1594 | } | |
35e4c445 FG |
1595 | } |
1596 | if (r < 0) { | |
1597 | cmdctx->reply(r, ss); | |
1598 | return true; | |
1599 | } | |
11fdf7f2 | 1600 | set<int> active_osds, missing_stats, stored_pgs, safe_to_destroy; |
35e4c445 | 1601 | int affected_pgs = 0; |
11fdf7f2 | 1602 | cluster_state.with_osdmap_and_pgmap([&](const OSDMap& osdmap, const PGMap& pg_map) { |
35e4c445 FG |
1603 | if (pg_map.num_pg_unknown > 0) { |
1604 | ss << pg_map.num_pg_unknown << " pgs have unknown state; cannot draw" | |
1605 | << " any conclusions"; | |
1606 | r = -EAGAIN; | |
1607 | return; | |
1608 | } | |
1609 | int num_active_clean = 0; | |
1610 | for (auto& p : pg_map.num_pg_by_state) { | |
1611 | unsigned want = PG_STATE_ACTIVE|PG_STATE_CLEAN; | |
1612 | if ((p.first & want) == want) { | |
1613 | num_active_clean += p.second; | |
1614 | } | |
1615 | } | |
11fdf7f2 TL |
1616 | for (auto osd : osds) { |
1617 | if (!osdmap.exists(osd)) { | |
1618 | safe_to_destroy.insert(osd); | |
1619 | continue; // clearly safe to destroy | |
1620 | } | |
1621 | auto q = pg_map.num_pg_by_osd.find(osd); | |
1622 | if (q != pg_map.num_pg_by_osd.end()) { | |
81eedcae | 1623 | if (q->second.acting > 0 || q->second.up_not_acting > 0) { |
11fdf7f2 | 1624 | active_osds.insert(osd); |
81eedcae TL |
1625 | // XXX: For overlapping PGs, this counts them again |
1626 | affected_pgs += q->second.acting + q->second.up_not_acting; | |
11fdf7f2 | 1627 | continue; |
35e4c445 | 1628 | } |
11fdf7f2 TL |
1629 | } |
1630 | if (num_active_clean < pg_map.num_pg) { | |
1631 | // all pgs aren't active+clean; we need to be careful. | |
1632 | auto p = pg_map.osd_stat.find(osd); | |
81eedcae | 1633 | if (p == pg_map.osd_stat.end() || !osdmap.is_up(osd)) { |
11fdf7f2 TL |
1634 | missing_stats.insert(osd); |
1635 | continue; | |
1636 | } else if (p->second.num_pgs > 0) { | |
1637 | stored_pgs.insert(osd); | |
1638 | continue; | |
1639 | } | |
1640 | } | |
1641 | safe_to_destroy.insert(osd); | |
1642 | } | |
35e4c445 | 1643 | }); |
11fdf7f2 TL |
1644 | if (r && prefix == "osd safe-to-destroy") { |
1645 | cmdctx->reply(r, ss); // regardless of formatter | |
1646 | return true; | |
1647 | } | |
1648 | if (!r && (!active_osds.empty() || | |
1649 | !missing_stats.empty() || !stored_pgs.empty())) { | |
1650 | if (!safe_to_destroy.empty()) { | |
1651 | ss << "OSD(s) " << safe_to_destroy | |
1652 | << " are safe to destroy without reducing data durability. "; | |
1653 | } | |
1654 | if (!active_osds.empty()) { | |
1655 | ss << "OSD(s) " << active_osds << " have " << affected_pgs | |
1656 | << " pgs currently mapped to them. "; | |
1657 | } | |
1658 | if (!missing_stats.empty()) { | |
1659 | ss << "OSD(s) " << missing_stats << " have no reported stats, and not all" | |
1660 | << " PGs are active+clean; we cannot draw any conclusions. "; | |
1661 | } | |
1662 | if (!stored_pgs.empty()) { | |
1663 | ss << "OSD(s) " << stored_pgs << " last reported they still store some PG" | |
1664 | << " data, and not all PGs are active+clean; we cannot be sure they" | |
1665 | << " aren't still needed."; | |
1666 | } | |
1667 | if (!active_osds.empty() || !stored_pgs.empty()) { | |
1668 | r = -EBUSY; | |
1669 | } else { | |
1670 | r = -EAGAIN; | |
1671 | } | |
1672 | } | |
1673 | ||
1674 | if (prefix == "osd safe-to-destroy") { | |
1675 | if (!r) { | |
1676 | ss << "OSD(s) " << osds << " are safe to destroy without reducing data" | |
1677 | << " durability."; | |
11fdf7f2 TL |
1678 | } |
1679 | if (f) { | |
1680 | f->open_object_section("osd_status"); | |
1681 | f->open_array_section("safe_to_destroy"); | |
1682 | for (auto i : safe_to_destroy) | |
1683 | f->dump_int("osd", i); | |
1684 | f->close_section(); | |
1685 | f->open_array_section("active"); | |
1686 | for (auto i : active_osds) | |
1687 | f->dump_int("osd", i); | |
1688 | f->close_section(); | |
1689 | f->open_array_section("missing_stats"); | |
1690 | for (auto i : missing_stats) | |
1691 | f->dump_int("osd", i); | |
1692 | f->close_section(); | |
1693 | f->open_array_section("stored_pgs"); | |
1694 | for (auto i : stored_pgs) | |
1695 | f->dump_int("osd", i); | |
1696 | f->close_section(); | |
1697 | f->close_section(); // osd_status | |
1698 | f->flush(cmdctx->odata); | |
1699 | r = 0; | |
1700 | std::stringstream().swap(ss); | |
1701 | } | |
1702 | cmdctx->reply(r, ss); | |
1703 | return true; | |
1704 | } | |
1705 | ||
1706 | if (r) { | |
1707 | bool force = false; | |
9f95a23c | 1708 | cmd_getval(cmdctx->cmdmap, "force", force); |
11fdf7f2 TL |
1709 | if (!force) { |
1710 | // Backward compat | |
9f95a23c | 1711 | cmd_getval(cmdctx->cmdmap, "yes_i_really_mean_it", force); |
11fdf7f2 TL |
1712 | } |
1713 | if (!force) { | |
1714 | ss << "\nYou can proceed by passing --force, but be warned that" | |
1715 | " this will likely mean real, permanent data loss."; | |
1716 | } else { | |
1717 | r = 0; | |
1718 | } | |
35e4c445 FG |
1719 | } |
1720 | if (r) { | |
1721 | cmdctx->reply(r, ss); | |
1722 | return true; | |
1723 | } | |
11fdf7f2 TL |
1724 | const string cmd = |
1725 | "{" | |
1726 | "\"prefix\": \"" + prefix + "-actual\", " | |
1727 | "\"id\": " + stringify(osds) + ", " | |
1728 | "\"yes_i_really_mean_it\": true" | |
1729 | "}"; | |
1730 | auto on_finish = new ReplyOnFinish(cmdctx); | |
1731 | monc->start_mon_command({cmd}, {}, nullptr, &on_finish->outs, on_finish); | |
35e4c445 FG |
1732 | return true; |
1733 | } else if (prefix == "osd ok-to-stop") { | |
1734 | vector<string> ids; | |
9f95a23c | 1735 | cmd_getval(cmdctx->cmdmap, "ids", ids); |
35e4c445 | 1736 | set<int> osds; |
f67539c2 TL |
1737 | int64_t max = 1; |
1738 | cmd_getval(cmdctx->cmdmap, "max", max); | |
35e4c445 FG |
1739 | int r; |
1740 | cluster_state.with_osdmap([&](const OSDMap& osdmap) { | |
1741 | r = osdmap.parse_osd_id_list(ids, &osds, &ss); | |
1742 | }); | |
1743 | if (!r && osds.empty()) { | |
1744 | ss << "must specify one or more OSDs"; | |
1745 | r = -EINVAL; | |
1746 | } | |
f67539c2 TL |
1747 | if (max < (int)osds.size()) { |
1748 | max = osds.size(); | |
1749 | } | |
35e4c445 FG |
1750 | if (r < 0) { |
1751 | cmdctx->reply(r, ss); | |
1752 | return true; | |
1753 | } | |
f67539c2 | 1754 | offline_pg_report out_report; |
11fdf7f2 | 1755 | cluster_state.with_osdmap_and_pgmap([&](const OSDMap& osdmap, const PGMap& pg_map) { |
f67539c2 TL |
1756 | _maximize_ok_to_stop_set( |
1757 | osds, max, osdmap, pg_map, | |
1758 | &out_report); | |
35e4c445 | 1759 | }); |
f67539c2 TL |
1760 | if (!f) { |
1761 | f.reset(Formatter::create("json")); | |
35e4c445 | 1762 | } |
f67539c2 TL |
1763 | f->dump_object("ok_to_stop", out_report); |
1764 | f->flush(cmdctx->odata); | |
1765 | cmdctx->odata.append("\n"); | |
1766 | if (!out_report.unknown.empty()) { | |
1767 | ss << out_report.unknown.size() << " pgs have unknown state; " | |
1768 | << "cannot draw any conclusions"; | |
1769 | cmdctx->reply(-EAGAIN, ss); | |
1770 | } | |
1771 | if (!out_report.ok_to_stop()) { | |
1772 | ss << "unsafe to stop osd(s) at this time (" << out_report.not_ok.size() << " PGs are or would become offline)"; | |
35e4c445 | 1773 | cmdctx->reply(-EBUSY, ss); |
f67539c2 TL |
1774 | } else { |
1775 | cmdctx->reply(0, ss); | |
35e4c445 | 1776 | } |
35e4c445 | 1777 | return true; |
c07f9fc5 | 1778 | } else if (prefix == "pg force-recovery" || |
11fdf7f2 TL |
1779 | prefix == "pg force-backfill" || |
1780 | prefix == "pg cancel-force-recovery" || | |
1781 | prefix == "pg cancel-force-backfill" || | |
1782 | prefix == "osd pool force-recovery" || | |
1783 | prefix == "osd pool force-backfill" || | |
1784 | prefix == "osd pool cancel-force-recovery" || | |
1785 | prefix == "osd pool cancel-force-backfill") { | |
1786 | vector<string> vs; | |
1787 | get_str_vec(prefix, vs); | |
1788 | auto& granularity = vs.front(); | |
1789 | auto& forceop = vs.back(); | |
1790 | vector<pg_t> pgs; | |
c07f9fc5 FG |
1791 | |
1792 | // figure out actual op just once | |
1793 | int actual_op = 0; | |
1794 | if (forceop == "force-recovery") { | |
1795 | actual_op = OFR_RECOVERY; | |
1796 | } else if (forceop == "force-backfill") { | |
1797 | actual_op = OFR_BACKFILL; | |
1798 | } else if (forceop == "cancel-force-backfill") { | |
1799 | actual_op = OFR_BACKFILL | OFR_CANCEL; | |
1800 | } else if (forceop == "cancel-force-recovery") { | |
1801 | actual_op = OFR_RECOVERY | OFR_CANCEL; | |
1802 | } | |
1803 | ||
11fdf7f2 TL |
1804 | set<pg_t> candidates; // deduped |
1805 | if (granularity == "pg") { | |
1806 | // covnert pg names to pgs, discard any invalid ones while at it | |
1807 | vector<string> pgids; | |
9f95a23c | 1808 | cmd_getval(cmdctx->cmdmap, "pgid", pgids); |
11fdf7f2 TL |
1809 | for (auto& i : pgids) { |
1810 | pg_t pgid; | |
1811 | if (!pgid.parse(i.c_str())) { | |
1812 | ss << "invlaid pgid '" << i << "'; "; | |
1813 | r = -EINVAL; | |
1814 | continue; | |
1815 | } | |
1816 | candidates.insert(pgid); | |
c07f9fc5 | 1817 | } |
11fdf7f2 TL |
1818 | } else { |
1819 | // per pool | |
1820 | vector<string> pool_names; | |
9f95a23c | 1821 | cmd_getval(cmdctx->cmdmap, "who", pool_names); |
11fdf7f2 TL |
1822 | if (pool_names.empty()) { |
1823 | ss << "must specify one or more pool names"; | |
1824 | cmdctx->reply(-EINVAL, ss); | |
1825 | return true; | |
1826 | } | |
1827 | cluster_state.with_osdmap([&](const OSDMap& osdmap) { | |
1828 | for (auto& pool_name : pool_names) { | |
1829 | auto pool_id = osdmap.lookup_pg_pool_name(pool_name); | |
1830 | if (pool_id < 0) { | |
1831 | ss << "unrecognized pool '" << pool_name << "'"; | |
1832 | r = -ENOENT; | |
1833 | return; | |
1834 | } | |
1835 | auto pool_pg_num = osdmap.get_pg_num(pool_id); | |
1836 | for (int i = 0; i < pool_pg_num; i++) | |
1837 | candidates.insert({(unsigned int)i, (uint64_t)pool_id}); | |
1838 | } | |
c07f9fc5 | 1839 | }); |
11fdf7f2 TL |
1840 | if (r < 0) { |
1841 | cmdctx->reply(r, ss); | |
1842 | return true; | |
1843 | } | |
c07f9fc5 FG |
1844 | } |
1845 | ||
11fdf7f2 TL |
1846 | cluster_state.with_pgmap([&](const PGMap& pg_map) { |
1847 | for (auto& i : candidates) { | |
1848 | auto it = pg_map.pg_stat.find(i); | |
1849 | if (it == pg_map.pg_stat.end()) { | |
1850 | ss << "pg " << i << " does not exist; "; | |
1851 | r = -ENOENT; | |
1852 | continue; | |
1853 | } | |
1854 | auto state = it->second.state; | |
1855 | // discard pgs for which user requests are pointless | |
1856 | switch (actual_op) { | |
1857 | case OFR_RECOVERY: | |
1858 | if ((state & (PG_STATE_DEGRADED | | |
1859 | PG_STATE_RECOVERY_WAIT | | |
1860 | PG_STATE_RECOVERING)) == 0) { | |
1861 | // don't return error, user script may be racing with cluster. | |
1862 | // not fatal. | |
1863 | ss << "pg " << i << " doesn't require recovery; "; | |
1864 | continue; | |
1865 | } else if (state & PG_STATE_FORCED_RECOVERY) { | |
1866 | ss << "pg " << i << " recovery already forced; "; | |
1867 | // return error, as it may be a bug in user script | |
1868 | r = -EINVAL; | |
1869 | continue; | |
1870 | } | |
1871 | break; | |
1872 | case OFR_BACKFILL: | |
1873 | if ((state & (PG_STATE_DEGRADED | | |
1874 | PG_STATE_BACKFILL_WAIT | | |
1875 | PG_STATE_BACKFILLING)) == 0) { | |
1876 | ss << "pg " << i << " doesn't require backfilling; "; | |
1877 | continue; | |
1878 | } else if (state & PG_STATE_FORCED_BACKFILL) { | |
1879 | ss << "pg " << i << " backfill already forced; "; | |
1880 | r = -EINVAL; | |
1881 | continue; | |
1882 | } | |
1883 | break; | |
1884 | case OFR_BACKFILL | OFR_CANCEL: | |
1885 | if ((state & PG_STATE_FORCED_BACKFILL) == 0) { | |
1886 | ss << "pg " << i << " backfill not forced; "; | |
1887 | continue; | |
1888 | } | |
1889 | break; | |
1890 | case OFR_RECOVERY | OFR_CANCEL: | |
1891 | if ((state & PG_STATE_FORCED_RECOVERY) == 0) { | |
1892 | ss << "pg " << i << " recovery not forced; "; | |
1893 | continue; | |
1894 | } | |
1895 | break; | |
1896 | default: | |
1897 | ceph_abort_msg("actual_op value is not supported"); | |
1898 | } | |
1899 | pgs.push_back(i); | |
1900 | } // for | |
1901 | }); | |
1902 | ||
c07f9fc5 FG |
1903 | // respond with error only when no pgs are correct |
1904 | // yes, in case of mixed errors, only the last one will be emitted, | |
1905 | // but the message presented will be fine | |
11fdf7f2 | 1906 | if (pgs.size() != 0) { |
c07f9fc5 FG |
1907 | // clear error to not confuse users/scripts |
1908 | r = 0; | |
1909 | } | |
1910 | ||
11fdf7f2 TL |
1911 | // optimize the command -> messages conversion, use only one |
1912 | // message per distinct OSD | |
c07f9fc5 | 1913 | cluster_state.with_osdmap([&](const OSDMap& osdmap) { |
11fdf7f2 TL |
1914 | // group pgs to process by osd |
1915 | map<int, vector<spg_t>> osdpgs; | |
1916 | for (auto& pgid : pgs) { | |
1917 | int primary; | |
1918 | spg_t spg; | |
1919 | if (osdmap.get_primary_shard(pgid, &primary, &spg)) { | |
1920 | osdpgs[primary].push_back(spg); | |
1921 | } | |
1922 | } | |
1923 | for (auto& i : osdpgs) { | |
1924 | if (osdmap.is_up(i.first)) { | |
1925 | auto p = osd_cons.find(i.first); | |
1926 | if (p == osd_cons.end()) { | |
1927 | ss << "osd." << i.first << " is not currently connected"; | |
1928 | r = -EAGAIN; | |
1929 | continue; | |
1930 | } | |
1931 | for (auto& con : p->second) { | |
1932 | con->send_message( | |
1933 | new MOSDForceRecovery(monc->get_fsid(), i.second, actual_op)); | |
1934 | } | |
1935 | ss << "instructing pg(s) " << i.second << " on osd." << i.first | |
1936 | << " to " << forceop << "; "; | |
1937 | } | |
1938 | } | |
1939 | }); | |
1940 | ss << std::endl; | |
1941 | cmdctx->reply(r, ss); | |
1942 | return true; | |
1943 | } else if (prefix == "config show" || | |
1944 | prefix == "config show-with-defaults") { | |
1945 | string who; | |
9f95a23c TL |
1946 | cmd_getval(cmdctx->cmdmap, "who", who); |
1947 | auto [key, valid] = DaemonKey::parse(who); | |
1948 | if (!valid) { | |
1949 | ss << "invalid daemon name: use <type>.<id>"; | |
1950 | cmdctx->reply(-EINVAL, ss); | |
1951 | return true; | |
1952 | } | |
11fdf7f2 | 1953 | DaemonStatePtr daemon = daemon_state.get(key); |
11fdf7f2 TL |
1954 | if (!daemon) { |
1955 | ss << "no config state for daemon " << who; | |
1956 | cmdctx->reply(-ENOENT, ss); | |
1957 | return true; | |
1958 | } | |
1959 | ||
1960 | std::lock_guard l(daemon->lock); | |
1961 | ||
9f95a23c TL |
1962 | int r = 0; |
1963 | string name; | |
1964 | if (cmd_getval(cmdctx->cmdmap, "key", name)) { | |
f6b5b4d7 TL |
1965 | // handle special options |
1966 | if (name == "fsid") { | |
1967 | cmdctx->odata.append(stringify(monc->get_fsid()) + "\n"); | |
1968 | cmdctx->reply(r, ss); | |
1969 | return true; | |
1970 | } | |
11fdf7f2 TL |
1971 | auto p = daemon->config.find(name); |
1972 | if (p != daemon->config.end() && | |
1973 | !p->second.empty()) { | |
1974 | cmdctx->odata.append(p->second.rbegin()->second + "\n"); | |
1975 | } else { | |
1976 | auto& defaults = daemon->_get_config_defaults(); | |
1977 | auto q = defaults.find(name); | |
1978 | if (q != defaults.end()) { | |
1979 | cmdctx->odata.append(q->second + "\n"); | |
1980 | } else { | |
1981 | r = -ENOENT; | |
1982 | } | |
1983 | } | |
1984 | } else if (daemon->config_defaults_bl.length() > 0) { | |
1985 | TextTable tbl; | |
1986 | if (f) { | |
1987 | f->open_array_section("config"); | |
1988 | } else { | |
1989 | tbl.define_column("NAME", TextTable::LEFT, TextTable::LEFT); | |
1990 | tbl.define_column("VALUE", TextTable::LEFT, TextTable::LEFT); | |
1991 | tbl.define_column("SOURCE", TextTable::LEFT, TextTable::LEFT); | |
1992 | tbl.define_column("OVERRIDES", TextTable::LEFT, TextTable::LEFT); | |
1993 | tbl.define_column("IGNORES", TextTable::LEFT, TextTable::LEFT); | |
1994 | } | |
1995 | if (prefix == "config show") { | |
1996 | // show | |
1997 | for (auto& i : daemon->config) { | |
1998 | dout(20) << " " << i.first << " -> " << i.second << dendl; | |
1999 | if (i.second.empty()) { | |
c07f9fc5 FG |
2000 | continue; |
2001 | } | |
11fdf7f2 TL |
2002 | if (f) { |
2003 | f->open_object_section("value"); | |
2004 | f->dump_string("name", i.first); | |
2005 | f->dump_string("value", i.second.rbegin()->second); | |
2006 | f->dump_string("source", ceph_conf_level_name( | |
2007 | i.second.rbegin()->first)); | |
2008 | if (i.second.size() > 1) { | |
2009 | f->open_array_section("overrides"); | |
2010 | auto j = i.second.rend(); | |
2011 | for (--j; j != i.second.rbegin(); --j) { | |
2012 | f->open_object_section("value"); | |
2013 | f->dump_string("source", ceph_conf_level_name(j->first)); | |
2014 | f->dump_string("value", j->second); | |
2015 | f->close_section(); | |
2016 | } | |
2017 | f->close_section(); | |
2018 | } | |
2019 | if (daemon->ignored_mon_config.count(i.first)) { | |
2020 | f->dump_string("ignores", "mon"); | |
2021 | } | |
2022 | f->close_section(); | |
2023 | } else { | |
2024 | tbl << i.first; | |
2025 | tbl << i.second.rbegin()->second; | |
2026 | tbl << ceph_conf_level_name(i.second.rbegin()->first); | |
2027 | if (i.second.size() > 1) { | |
2028 | list<string> ov; | |
2029 | auto j = i.second.rend(); | |
2030 | for (--j; j != i.second.rbegin(); --j) { | |
2031 | if (j->second == i.second.rbegin()->second) { | |
2032 | ov.push_front(string("(") + ceph_conf_level_name(j->first) + | |
2033 | string("[") + j->second + string("]") + | |
2034 | string(")")); | |
2035 | } else { | |
2036 | ov.push_front(ceph_conf_level_name(j->first) + | |
2037 | string("[") + j->second + string("]")); | |
2038 | ||
2039 | } | |
2040 | } | |
2041 | tbl << ov; | |
2042 | } else { | |
2043 | tbl << ""; | |
2044 | } | |
2045 | tbl << (daemon->ignored_mon_config.count(i.first) ? "mon" : ""); | |
2046 | tbl << TextTable::endrow; | |
2047 | } | |
2048 | } | |
2049 | } else { | |
2050 | // show-with-defaults | |
2051 | auto& defaults = daemon->_get_config_defaults(); | |
2052 | for (auto& i : defaults) { | |
2053 | if (f) { | |
2054 | f->open_object_section("value"); | |
2055 | f->dump_string("name", i.first); | |
2056 | } else { | |
2057 | tbl << i.first; | |
2058 | } | |
2059 | auto j = daemon->config.find(i.first); | |
2060 | if (j != daemon->config.end() && !j->second.empty()) { | |
2061 | // have config | |
2062 | if (f) { | |
2063 | f->dump_string("value", j->second.rbegin()->second); | |
2064 | f->dump_string("source", ceph_conf_level_name( | |
2065 | j->second.rbegin()->first)); | |
2066 | if (j->second.size() > 1) { | |
2067 | f->open_array_section("overrides"); | |
2068 | auto k = j->second.rend(); | |
2069 | for (--k; k != j->second.rbegin(); --k) { | |
2070 | f->open_object_section("value"); | |
2071 | f->dump_string("source", ceph_conf_level_name(k->first)); | |
2072 | f->dump_string("value", k->second); | |
2073 | f->close_section(); | |
2074 | } | |
2075 | f->close_section(); | |
2076 | } | |
2077 | if (daemon->ignored_mon_config.count(i.first)) { | |
2078 | f->dump_string("ignores", "mon"); | |
2079 | } | |
2080 | f->close_section(); | |
2081 | } else { | |
2082 | tbl << j->second.rbegin()->second; | |
2083 | tbl << ceph_conf_level_name(j->second.rbegin()->first); | |
2084 | if (j->second.size() > 1) { | |
2085 | list<string> ov; | |
2086 | auto k = j->second.rend(); | |
2087 | for (--k; k != j->second.rbegin(); --k) { | |
2088 | if (k->second == j->second.rbegin()->second) { | |
2089 | ov.push_front(string("(") + ceph_conf_level_name(k->first) + | |
2090 | string("[") + k->second + string("]") + | |
2091 | string(")")); | |
2092 | } else { | |
2093 | ov.push_front(ceph_conf_level_name(k->first) + | |
2094 | string("[") + k->second + string("]")); | |
2095 | } | |
2096 | } | |
2097 | tbl << ov; | |
2098 | } else { | |
2099 | tbl << ""; | |
2100 | } | |
2101 | tbl << (daemon->ignored_mon_config.count(i.first) ? "mon" : ""); | |
2102 | tbl << TextTable::endrow; | |
2103 | } | |
2104 | } else { | |
2105 | // only have default | |
2106 | if (f) { | |
2107 | f->dump_string("value", i.second); | |
2108 | f->dump_string("source", ceph_conf_level_name(CONF_DEFAULT)); | |
2109 | f->close_section(); | |
2110 | } else { | |
2111 | tbl << i.second; | |
2112 | tbl << ceph_conf_level_name(CONF_DEFAULT); | |
2113 | tbl << ""; | |
2114 | tbl << ""; | |
2115 | tbl << TextTable::endrow; | |
2116 | } | |
c07f9fc5 | 2117 | } |
c07f9fc5 FG |
2118 | } |
2119 | } | |
11fdf7f2 TL |
2120 | if (f) { |
2121 | f->close_section(); | |
2122 | f->flush(cmdctx->odata); | |
2123 | } else { | |
2124 | cmdctx->odata.append(stringify(tbl)); | |
2125 | } | |
2126 | } | |
c07f9fc5 FG |
2127 | cmdctx->reply(r, ss); |
2128 | return true; | |
11fdf7f2 TL |
2129 | } else if (prefix == "device ls") { |
2130 | set<string> devids; | |
2131 | TextTable tbl; | |
2132 | if (f) { | |
2133 | f->open_array_section("devices"); | |
2134 | daemon_state.with_devices([&f](const DeviceState& dev) { | |
2135 | f->dump_object("device", dev); | |
2136 | }); | |
2137 | f->close_section(); | |
2138 | f->flush(cmdctx->odata); | |
2139 | } else { | |
2140 | tbl.define_column("DEVICE", TextTable::LEFT, TextTable::LEFT); | |
2141 | tbl.define_column("HOST:DEV", TextTable::LEFT, TextTable::LEFT); | |
2142 | tbl.define_column("DAEMONS", TextTable::LEFT, TextTable::LEFT); | |
f67539c2 | 2143 | tbl.define_column("WEAR", TextTable::RIGHT, TextTable::RIGHT); |
11fdf7f2 TL |
2144 | tbl.define_column("LIFE EXPECTANCY", TextTable::LEFT, TextTable::LEFT); |
2145 | auto now = ceph_clock_now(); | |
2146 | daemon_state.with_devices([&tbl, now](const DeviceState& dev) { | |
2147 | string h; | |
9f95a23c | 2148 | for (auto& i : dev.attachments) { |
11fdf7f2 TL |
2149 | if (h.size()) { |
2150 | h += " "; | |
2151 | } | |
9f95a23c | 2152 | h += std::get<0>(i) + ":" + std::get<1>(i); |
11fdf7f2 TL |
2153 | } |
2154 | string d; | |
2155 | for (auto& i : dev.daemons) { | |
2156 | if (d.size()) { | |
2157 | d += " "; | |
2158 | } | |
2159 | d += to_string(i); | |
2160 | } | |
f67539c2 TL |
2161 | char wear_level_str[16] = {0}; |
2162 | if (dev.wear_level >= 0) { | |
2163 | snprintf(wear_level_str, sizeof(wear_level_str)-1, "%d%%", | |
2164 | (int)(100.1 * dev.wear_level)); | |
2165 | } | |
11fdf7f2 TL |
2166 | tbl << dev.devid |
2167 | << h | |
2168 | << d | |
f67539c2 | 2169 | << wear_level_str |
11fdf7f2 TL |
2170 | << dev.get_life_expectancy_str(now) |
2171 | << TextTable::endrow; | |
2172 | }); | |
2173 | cmdctx->odata.append(stringify(tbl)); | |
2174 | } | |
2175 | cmdctx->reply(0, ss); | |
2176 | return true; | |
2177 | } else if (prefix == "device ls-by-daemon") { | |
2178 | string who; | |
9f95a23c TL |
2179 | cmd_getval(cmdctx->cmdmap, "who", who); |
2180 | if (auto [k, valid] = DaemonKey::parse(who); !valid) { | |
11fdf7f2 TL |
2181 | ss << who << " is not a valid daemon name"; |
2182 | r = -EINVAL; | |
2183 | } else { | |
2184 | auto dm = daemon_state.get(k); | |
2185 | if (dm) { | |
2186 | if (f) { | |
2187 | f->open_array_section("devices"); | |
2188 | for (auto& i : dm->devices) { | |
2189 | daemon_state.with_device(i.first, [&f] (const DeviceState& dev) { | |
2190 | f->dump_object("device", dev); | |
2191 | }); | |
2192 | } | |
2193 | f->close_section(); | |
2194 | f->flush(cmdctx->odata); | |
2195 | } else { | |
2196 | TextTable tbl; | |
2197 | tbl.define_column("DEVICE", TextTable::LEFT, TextTable::LEFT); | |
2198 | tbl.define_column("HOST:DEV", TextTable::LEFT, TextTable::LEFT); | |
2199 | tbl.define_column("EXPECTED FAILURE", TextTable::LEFT, | |
2200 | TextTable::LEFT); | |
2201 | auto now = ceph_clock_now(); | |
2202 | for (auto& i : dm->devices) { | |
2203 | daemon_state.with_device( | |
2204 | i.first, [&tbl, now] (const DeviceState& dev) { | |
2205 | string h; | |
9f95a23c | 2206 | for (auto& i : dev.attachments) { |
11fdf7f2 TL |
2207 | if (h.size()) { |
2208 | h += " "; | |
2209 | } | |
9f95a23c | 2210 | h += std::get<0>(i) + ":" + std::get<1>(i); |
11fdf7f2 TL |
2211 | } |
2212 | tbl << dev.devid | |
2213 | << h | |
2214 | << dev.get_life_expectancy_str(now) | |
2215 | << TextTable::endrow; | |
2216 | }); | |
2217 | } | |
2218 | cmdctx->odata.append(stringify(tbl)); | |
2219 | } | |
2220 | } else { | |
2221 | r = -ENOENT; | |
2222 | ss << "daemon " << who << " not found"; | |
2223 | } | |
2224 | cmdctx->reply(r, ss); | |
2225 | } | |
2226 | } else if (prefix == "device ls-by-host") { | |
2227 | string host; | |
9f95a23c | 2228 | cmd_getval(cmdctx->cmdmap, "host", host); |
11fdf7f2 TL |
2229 | set<string> devids; |
2230 | daemon_state.list_devids_by_server(host, &devids); | |
2231 | if (f) { | |
2232 | f->open_array_section("devices"); | |
2233 | for (auto& devid : devids) { | |
2234 | daemon_state.with_device( | |
2235 | devid, [&f] (const DeviceState& dev) { | |
2236 | f->dump_object("device", dev); | |
7c673cae | 2237 | }); |
11fdf7f2 TL |
2238 | } |
2239 | f->close_section(); | |
2240 | f->flush(cmdctx->odata); | |
2241 | } else { | |
2242 | TextTable tbl; | |
2243 | tbl.define_column("DEVICE", TextTable::LEFT, TextTable::LEFT); | |
2244 | tbl.define_column("DEV", TextTable::LEFT, TextTable::LEFT); | |
2245 | tbl.define_column("DAEMONS", TextTable::LEFT, TextTable::LEFT); | |
2246 | tbl.define_column("EXPECTED FAILURE", TextTable::LEFT, TextTable::LEFT); | |
2247 | auto now = ceph_clock_now(); | |
2248 | for (auto& devid : devids) { | |
2249 | daemon_state.with_device( | |
2250 | devid, [&tbl, &host, now] (const DeviceState& dev) { | |
2251 | string n; | |
9f95a23c TL |
2252 | for (auto& j : dev.attachments) { |
2253 | if (std::get<0>(j) == host) { | |
11fdf7f2 TL |
2254 | if (n.size()) { |
2255 | n += " "; | |
2256 | } | |
9f95a23c | 2257 | n += std::get<1>(j); |
11fdf7f2 TL |
2258 | } |
2259 | } | |
2260 | string d; | |
2261 | for (auto& i : dev.daemons) { | |
2262 | if (d.size()) { | |
2263 | d += " "; | |
2264 | } | |
2265 | d += to_string(i); | |
2266 | } | |
2267 | tbl << dev.devid | |
2268 | << n | |
2269 | << d | |
2270 | << dev.get_life_expectancy_str(now) | |
2271 | << TextTable::endrow; | |
2272 | }); | |
2273 | } | |
2274 | cmdctx->odata.append(stringify(tbl)); | |
2275 | } | |
2276 | cmdctx->reply(0, ss); | |
2277 | return true; | |
2278 | } else if (prefix == "device info") { | |
2279 | string devid; | |
9f95a23c | 2280 | cmd_getval(cmdctx->cmdmap, "devid", devid); |
11fdf7f2 TL |
2281 | int r = 0; |
2282 | ostringstream rs; | |
2283 | if (!daemon_state.with_device(devid, | |
2284 | [&f, &rs] (const DeviceState& dev) { | |
2285 | if (f) { | |
2286 | f->dump_object("device", dev); | |
2287 | } else { | |
2288 | dev.print(rs); | |
2289 | } | |
2290 | })) { | |
2291 | ss << "device " << devid << " not found"; | |
2292 | r = -ENOENT; | |
2293 | } else { | |
2294 | if (f) { | |
2295 | f->flush(cmdctx->odata); | |
2296 | } else { | |
2297 | cmdctx->odata.append(rs.str()); | |
2298 | } | |
2299 | } | |
2300 | cmdctx->reply(r, ss); | |
2301 | return true; | |
2302 | } else if (prefix == "device set-life-expectancy") { | |
2303 | string devid; | |
9f95a23c | 2304 | cmd_getval(cmdctx->cmdmap, "devid", devid); |
11fdf7f2 | 2305 | string from_str, to_str; |
9f95a23c TL |
2306 | cmd_getval(cmdctx->cmdmap, "from", from_str); |
2307 | cmd_getval(cmdctx->cmdmap, "to", to_str); | |
11fdf7f2 TL |
2308 | utime_t from, to; |
2309 | if (!from.parse(from_str)) { | |
2310 | ss << "unable to parse datetime '" << from_str << "'"; | |
2311 | r = -EINVAL; | |
2312 | cmdctx->reply(r, ss); | |
2313 | } else if (to_str.size() && !to.parse(to_str)) { | |
2314 | ss << "unable to parse datetime '" << to_str << "'"; | |
2315 | r = -EINVAL; | |
2316 | cmdctx->reply(r, ss); | |
2317 | } else { | |
2318 | map<string,string> meta; | |
2319 | daemon_state.with_device_create( | |
2320 | devid, | |
2321 | [from, to, &meta] (DeviceState& dev) { | |
2322 | dev.set_life_expectancy(from, to, ceph_clock_now()); | |
2323 | meta = dev.metadata; | |
2324 | }); | |
2325 | json_spirit::Object json_object; | |
2326 | for (auto& i : meta) { | |
2327 | json_spirit::Config::add(json_object, i.first, i.second); | |
2328 | } | |
2329 | bufferlist json; | |
2330 | json.append(json_spirit::write(json_object)); | |
2331 | const string cmd = | |
2332 | "{" | |
2333 | "\"prefix\": \"config-key set\", " | |
2334 | "\"key\": \"device/" + devid + "\"" | |
2335 | "}"; | |
2336 | auto on_finish = new ReplyOnFinish(cmdctx); | |
2337 | monc->start_mon_command({cmd}, json, nullptr, nullptr, on_finish); | |
2338 | } | |
2339 | return true; | |
2340 | } else if (prefix == "device rm-life-expectancy") { | |
2341 | string devid; | |
9f95a23c | 2342 | cmd_getval(cmdctx->cmdmap, "devid", devid); |
11fdf7f2 TL |
2343 | map<string,string> meta; |
2344 | if (daemon_state.with_device_write(devid, [&meta] (DeviceState& dev) { | |
2345 | dev.rm_life_expectancy(); | |
2346 | meta = dev.metadata; | |
2347 | })) { | |
2348 | string cmd; | |
2349 | bufferlist json; | |
2350 | if (meta.empty()) { | |
2351 | cmd = | |
2352 | "{" | |
2353 | "\"prefix\": \"config-key rm\", " | |
2354 | "\"key\": \"device/" + devid + "\"" | |
2355 | "}"; | |
2356 | } else { | |
2357 | json_spirit::Object json_object; | |
2358 | for (auto& i : meta) { | |
2359 | json_spirit::Config::add(json_object, i.first, i.second); | |
2360 | } | |
2361 | json.append(json_spirit::write(json_object)); | |
2362 | cmd = | |
2363 | "{" | |
2364 | "\"prefix\": \"config-key set\", " | |
2365 | "\"key\": \"device/" + devid + "\"" | |
2366 | "}"; | |
2367 | } | |
2368 | auto on_finish = new ReplyOnFinish(cmdctx); | |
2369 | monc->start_mon_command({cmd}, json, nullptr, nullptr, on_finish); | |
2370 | } else { | |
2371 | cmdctx->reply(0, ss); | |
2372 | } | |
2373 | return true; | |
2374 | } else { | |
2375 | if (!pgmap_ready) { | |
2376 | ss << "Warning: due to ceph-mgr restart, some PG states may not be up to date\n"; | |
2377 | } | |
2378 | if (f) { | |
2379 | f->open_object_section("pg_info"); | |
2380 | f->dump_bool("pg_ready", pgmap_ready); | |
2381 | } | |
2382 | ||
2383 | // fall back to feeding command to PGMap | |
2384 | r = cluster_state.with_osdmap_and_pgmap([&](const OSDMap& osdmap, const PGMap& pg_map) { | |
2385 | return process_pg_map_command(prefix, cmdctx->cmdmap, pg_map, osdmap, | |
2386 | f.get(), &ss, &cmdctx->odata); | |
7c673cae FG |
2387 | }); |
2388 | ||
11fdf7f2 TL |
2389 | if (f) { |
2390 | f->close_section(); | |
2391 | } | |
7c673cae | 2392 | if (r != -EOPNOTSUPP) { |
11fdf7f2 TL |
2393 | if (f) { |
2394 | f->flush(cmdctx->odata); | |
2395 | } | |
7c673cae FG |
2396 | cmdctx->reply(r, ss); |
2397 | return true; | |
2398 | } | |
2399 | } | |
2400 | ||
11fdf7f2 | 2401 | // Was the command unfound? |
92f5a8d4 | 2402 | if (py_command.cmdstring.empty()) { |
7c673cae FG |
2403 | ss << "No handler found for '" << prefix << "'"; |
2404 | dout(4) << "No handler found for '" << prefix << "'" << dendl; | |
2405 | cmdctx->reply(-EINVAL, ss); | |
2406 | return true; | |
7c673cae | 2407 | } |
11fdf7f2 | 2408 | |
f67539c2 | 2409 | dout(10) << "passing through command '" << prefix << "' size " << cmdctx->cmdmap.size() << dendl; |
9f95a23c TL |
2410 | finisher.queue(new LambdaContext([this, cmdctx, session, py_command, prefix] |
2411 | (int r_) mutable { | |
11fdf7f2 TL |
2412 | std::stringstream ss; |
2413 | ||
f67539c2 TL |
2414 | dout(10) << "dispatching command '" << prefix << "' size " << cmdctx->cmdmap.size() << dendl; |
2415 | ||
11fdf7f2 | 2416 | // Validate that the module is enabled |
92f5a8d4 TL |
2417 | auto& py_handler_name = py_command.module_name; |
2418 | PyModuleRef module = py_modules.get_module(py_handler_name); | |
11fdf7f2 TL |
2419 | ceph_assert(module); |
2420 | if (!module->is_enabled()) { | |
92f5a8d4 | 2421 | ss << "Module '" << py_handler_name << "' is not enabled (required by " |
11fdf7f2 | 2422 | "command '" << prefix << "'): use `ceph mgr module enable " |
92f5a8d4 | 2423 | << py_handler_name << "` to enable it"; |
11fdf7f2 TL |
2424 | dout(4) << ss.str() << dendl; |
2425 | cmdctx->reply(-EOPNOTSUPP, ss); | |
2426 | return; | |
2427 | } | |
2428 | ||
2429 | // Hack: allow the self-test method to run on unhealthy modules. | |
2430 | // Fix this in future by creating a special path for self test rather | |
2431 | // than having the hook be a normal module command. | |
92f5a8d4 | 2432 | std::string self_test_prefix = py_handler_name + " " + "self-test"; |
11fdf7f2 TL |
2433 | |
2434 | // Validate that the module is healthy | |
2435 | bool accept_command; | |
2436 | if (module->is_loaded()) { | |
2437 | if (module->get_can_run() && !module->is_failed()) { | |
2438 | // Healthy module | |
2439 | accept_command = true; | |
2440 | } else if (self_test_prefix == prefix) { | |
2441 | // Unhealthy, but allow because it's a self test command | |
2442 | accept_command = true; | |
2443 | } else { | |
2444 | accept_command = false; | |
92f5a8d4 | 2445 | ss << "Module '" << py_handler_name << "' has experienced an error and " |
11fdf7f2 TL |
2446 | "cannot handle commands: " << module->get_error_string(); |
2447 | } | |
2448 | } else { | |
2449 | // Module not loaded | |
2450 | accept_command = false; | |
92f5a8d4 | 2451 | ss << "Module '" << py_handler_name << "' failed to load and " |
11fdf7f2 TL |
2452 | "cannot handle commands: " << module->get_error_string(); |
2453 | } | |
2454 | ||
2455 | if (!accept_command) { | |
2456 | dout(4) << ss.str() << dendl; | |
2457 | cmdctx->reply(-EIO, ss); | |
2458 | return; | |
2459 | } | |
2460 | ||
2461 | std::stringstream ds; | |
9f95a23c | 2462 | bufferlist inbl = cmdctx->data; |
92f5a8d4 TL |
2463 | int r = py_modules.handle_command(py_command, *session, cmdctx->cmdmap, |
2464 | inbl, &ds, &ss); | |
2465 | if (r == -EACCES) { | |
2466 | log_access_denied(cmdctx, session, ss); | |
2467 | } | |
2468 | ||
11fdf7f2 TL |
2469 | cmdctx->odata.append(ds); |
2470 | cmdctx->reply(r, ss); | |
f67539c2 | 2471 | dout(10) << " command returned " << r << dendl; |
11fdf7f2 TL |
2472 | })); |
2473 | return true; | |
7c673cae | 2474 | } |
31f18b77 | 2475 | |
224ce89b WB |
2476 | void DaemonServer::_prune_pending_service_map() |
2477 | { | |
2478 | utime_t cutoff = ceph_clock_now(); | |
11fdf7f2 | 2479 | cutoff -= g_conf().get_val<double>("mgr_service_beacon_grace"); |
224ce89b WB |
2480 | auto p = pending_service_map.services.begin(); |
2481 | while (p != pending_service_map.services.end()) { | |
2482 | auto q = p->second.daemons.begin(); | |
2483 | while (q != p->second.daemons.end()) { | |
9f95a23c | 2484 | DaemonKey key{p->first, q->first}; |
224ce89b | 2485 | if (!daemon_state.exists(key)) { |
1911f103 TL |
2486 | if (ServiceMap::is_normal_ceph_entity(p->first)) { |
2487 | dout(10) << "daemon " << key << " in service map but not in daemon state " | |
2488 | << "index -- force pruning" << dendl; | |
2489 | q = p->second.daemons.erase(q); | |
2490 | pending_service_map_dirty = pending_service_map.epoch; | |
2491 | } else { | |
2492 | derr << "missing key " << key << dendl; | |
2493 | ++q; | |
2494 | } | |
2495 | ||
2496 | continue; | |
224ce89b | 2497 | } |
1911f103 | 2498 | |
224ce89b | 2499 | auto daemon = daemon_state.get(key); |
11fdf7f2 | 2500 | std::lock_guard l(daemon->lock); |
224ce89b WB |
2501 | if (daemon->last_service_beacon == utime_t()) { |
2502 | // we must have just restarted; assume they are alive now. | |
2503 | daemon->last_service_beacon = ceph_clock_now(); | |
2504 | ++q; | |
2505 | continue; | |
2506 | } | |
2507 | if (daemon->last_service_beacon < cutoff) { | |
2508 | dout(10) << "pruning stale " << p->first << "." << q->first | |
2509 | << " last_beacon " << daemon->last_service_beacon << dendl; | |
2510 | q = p->second.daemons.erase(q); | |
2511 | pending_service_map_dirty = pending_service_map.epoch; | |
2512 | } else { | |
2513 | ++q; | |
2514 | } | |
2515 | } | |
2516 | if (p->second.daemons.empty()) { | |
2517 | p = pending_service_map.services.erase(p); | |
2518 | pending_service_map_dirty = pending_service_map.epoch; | |
2519 | } else { | |
2520 | ++p; | |
2521 | } | |
2522 | } | |
2523 | } | |
2524 | ||
31f18b77 FG |
2525 | void DaemonServer::send_report() |
2526 | { | |
224ce89b | 2527 | if (!pgmap_ready) { |
11fdf7f2 | 2528 | if (ceph_clock_now() - started_at > g_conf().get_val<int64_t>("mgr_stats_period") * 4.0) { |
224ce89b WB |
2529 | pgmap_ready = true; |
2530 | reported_osds.clear(); | |
2531 | dout(1) << "Giving up on OSDs that haven't reported yet, sending " | |
2532 | << "potentially incomplete PG state to mon" << dendl; | |
2533 | } else { | |
2534 | dout(1) << "Not sending PG status to monitor yet, waiting for OSDs" | |
2535 | << dendl; | |
2536 | return; | |
2537 | } | |
2538 | } | |
2539 | ||
9f95a23c | 2540 | auto m = ceph::make_message<MMonMgrReport>(); |
f67539c2 | 2541 | m->gid = monc->get_global_id(); |
c07f9fc5 | 2542 | py_modules.get_health_checks(&m->health_checks); |
11fdf7f2 | 2543 | py_modules.get_progress_events(&m->progress_events); |
c07f9fc5 | 2544 | |
11fdf7f2 | 2545 | cluster_state.with_mutable_pgmap([&](PGMap& pg_map) { |
31f18b77 FG |
2546 | cluster_state.update_delta_stats(); |
2547 | ||
224ce89b WB |
2548 | if (pending_service_map.epoch) { |
2549 | _prune_pending_service_map(); | |
2550 | if (pending_service_map_dirty >= pending_service_map.epoch) { | |
2551 | pending_service_map.modified = ceph_clock_now(); | |
11fdf7f2 | 2552 | encode(pending_service_map, m->service_map_bl, CEPH_FEATURES_ALL); |
224ce89b WB |
2553 | dout(10) << "sending service_map e" << pending_service_map.epoch |
2554 | << dendl; | |
2555 | pending_service_map.epoch++; | |
2556 | } | |
2557 | } | |
2558 | ||
31f18b77 FG |
2559 | cluster_state.with_osdmap([&](const OSDMap& osdmap) { |
2560 | // FIXME: no easy way to get mon features here. this will do for | |
2561 | // now, though, as long as we don't make a backward-incompat change. | |
2562 | pg_map.encode_digest(osdmap, m->get_data(), CEPH_FEATURES_ALL); | |
2563 | dout(10) << pg_map << dendl; | |
224ce89b WB |
2564 | |
2565 | pg_map.get_health_checks(g_ceph_context, osdmap, | |
2566 | &m->health_checks); | |
c07f9fc5 | 2567 | |
224ce89b WB |
2568 | dout(10) << m->health_checks.checks.size() << " health checks" |
2569 | << dendl; | |
2570 | dout(20) << "health checks:\n"; | |
2571 | JSONFormatter jf(true); | |
2572 | jf.dump_object("health_checks", m->health_checks); | |
2573 | jf.flush(*_dout); | |
2574 | *_dout << dendl; | |
9f95a23c | 2575 | if (osdmap.require_osd_release >= ceph_release_t::luminous) { |
a8e16298 TL |
2576 | clog->debug() << "pgmap v" << pg_map.version << ": " << pg_map; |
2577 | } | |
31f18b77 FG |
2578 | }); |
2579 | }); | |
b32b8144 | 2580 | |
11fdf7f2 TL |
2581 | map<daemon_metric, unique_ptr<DaemonHealthMetricCollector>> accumulated; |
2582 | for (auto service : {"osd", "mon"} ) { | |
2583 | auto daemons = daemon_state.get_by_service(service); | |
2584 | for (const auto& [key,state] : daemons) { | |
2585 | std::lock_guard l{state->lock}; | |
2586 | for (const auto& metric : state->daemon_health_metrics) { | |
2587 | auto acc = accumulated.find(metric.get_type()); | |
2588 | if (acc == accumulated.end()) { | |
2589 | auto collector = DaemonHealthMetricCollector::create(metric.get_type()); | |
2590 | if (!collector) { | |
9f95a23c | 2591 | derr << __func__ << " " << key |
11fdf7f2 TL |
2592 | << " sent me an unknown health metric: " |
2593 | << std::hex << static_cast<uint8_t>(metric.get_type()) | |
2594 | << std::dec << dendl; | |
2595 | continue; | |
2596 | } | |
2597 | dout(20) << " + " << state->key << " " | |
2598 | << metric << dendl; | |
2599 | tie(acc, std::ignore) = accumulated.emplace(metric.get_type(), | |
2600 | std::move(collector)); | |
2601 | } | |
2602 | acc->second->update(key, metric); | |
b32b8144 | 2603 | } |
b32b8144 FG |
2604 | } |
2605 | } | |
2606 | for (const auto& acc : accumulated) { | |
2607 | acc.second->summarize(m->health_checks); | |
2608 | } | |
31f18b77 FG |
2609 | // TODO? We currently do not notify the PyModules |
2610 | // TODO: respect needs_send, so we send the report only if we are asked to do | |
2611 | // so, or the state is updated. | |
9f95a23c | 2612 | monc->send_mon_message(std::move(m)); |
31f18b77 | 2613 | } |
224ce89b | 2614 | |
11fdf7f2 TL |
2615 | void DaemonServer::adjust_pgs() |
2616 | { | |
2617 | dout(20) << dendl; | |
2618 | unsigned max = std::max<int64_t>(1, g_conf()->mon_osd_max_creating_pgs); | |
2619 | double max_misplaced = g_conf().get_val<double>("target_max_misplaced_ratio"); | |
2620 | bool aggro = g_conf().get_val<bool>("mgr_debug_aggressive_pg_num_changes"); | |
2621 | ||
2622 | map<string,unsigned> pg_num_to_set; | |
2623 | map<string,unsigned> pgp_num_to_set; | |
2624 | set<pg_t> upmaps_to_clear; | |
2625 | cluster_state.with_osdmap_and_pgmap([&](const OSDMap& osdmap, const PGMap& pg_map) { | |
2626 | unsigned creating_or_unknown = 0; | |
2627 | for (auto& i : pg_map.num_pg_by_state) { | |
2628 | if ((i.first & (PG_STATE_CREATING)) || | |
2629 | i.first == 0) { | |
2630 | creating_or_unknown += i.second; | |
2631 | } | |
2632 | } | |
2633 | unsigned left = max; | |
2634 | if (creating_or_unknown >= max) { | |
2635 | return; | |
2636 | } | |
2637 | left -= creating_or_unknown; | |
2638 | dout(10) << "creating_or_unknown " << creating_or_unknown | |
2639 | << " max_creating " << max | |
2640 | << " left " << left | |
2641 | << dendl; | |
2642 | ||
2643 | // FIXME: These checks are fundamentally racy given that adjust_pgs() | |
2644 | // can run more frequently than we get updated pg stats from OSDs. We | |
2645 | // may make multiple adjustments with stale informaiton. | |
2646 | double misplaced_ratio, degraded_ratio; | |
2647 | double inactive_pgs_ratio, unknown_pgs_ratio; | |
2648 | pg_map.get_recovery_stats(&misplaced_ratio, °raded_ratio, | |
2649 | &inactive_pgs_ratio, &unknown_pgs_ratio); | |
2650 | dout(20) << "misplaced_ratio " << misplaced_ratio | |
2651 | << " degraded_ratio " << degraded_ratio | |
2652 | << " inactive_pgs_ratio " << inactive_pgs_ratio | |
2653 | << " unknown_pgs_ratio " << unknown_pgs_ratio | |
2654 | << "; target_max_misplaced_ratio " << max_misplaced | |
2655 | << dendl; | |
2656 | ||
2657 | for (auto& i : osdmap.get_pools()) { | |
2658 | const pg_pool_t& p = i.second; | |
2659 | ||
2660 | // adjust pg_num? | |
2661 | if (p.get_pg_num_target() != p.get_pg_num()) { | |
2662 | dout(20) << "pool " << i.first | |
2663 | << " pg_num " << p.get_pg_num() | |
2664 | << " target " << p.get_pg_num_target() | |
2665 | << dendl; | |
2666 | if (p.has_flag(pg_pool_t::FLAG_CREATING)) { | |
2667 | dout(10) << "pool " << i.first | |
2668 | << " pg_num_target " << p.get_pg_num_target() | |
2669 | << " pg_num " << p.get_pg_num() | |
2670 | << " - still creating initial pgs" | |
2671 | << dendl; | |
2672 | } else if (p.get_pg_num_target() < p.get_pg_num()) { | |
2673 | // pg_num decrease (merge) | |
2674 | pg_t merge_source(p.get_pg_num() - 1, i.first); | |
2675 | pg_t merge_target = merge_source.get_parent(); | |
2676 | bool ok = true; | |
2677 | ||
2678 | if (p.get_pg_num() != p.get_pg_num_pending()) { | |
2679 | dout(10) << "pool " << i.first | |
2680 | << " pg_num_target " << p.get_pg_num_target() | |
2681 | << " pg_num " << p.get_pg_num() | |
2682 | << " - decrease and pg_num_pending != pg_num, waiting" | |
2683 | << dendl; | |
2684 | ok = false; | |
2685 | } else if (p.get_pg_num() == p.get_pgp_num()) { | |
2686 | dout(10) << "pool " << i.first | |
2687 | << " pg_num_target " << p.get_pg_num_target() | |
2688 | << " pg_num " << p.get_pg_num() | |
2689 | << " - decrease blocked by pgp_num " | |
2690 | << p.get_pgp_num() | |
2691 | << dendl; | |
2692 | ok = false; | |
2693 | } | |
9f95a23c | 2694 | vector<int32_t> source_acting; |
11fdf7f2 TL |
2695 | for (auto &merge_participant : {merge_source, merge_target}) { |
2696 | bool is_merge_source = merge_participant == merge_source; | |
2697 | if (osdmap.have_pg_upmaps(merge_participant)) { | |
2698 | dout(10) << "pool " << i.first | |
2699 | << " pg_num_target " << p.get_pg_num_target() | |
2700 | << " pg_num " << p.get_pg_num() | |
2701 | << (is_merge_source ? " - merge source " : " - merge target ") | |
2702 | << merge_participant | |
2703 | << " has upmap" << dendl; | |
2704 | upmaps_to_clear.insert(merge_participant); | |
2705 | ok = false; | |
2706 | } | |
2707 | auto q = pg_map.pg_stat.find(merge_participant); | |
2708 | if (q == pg_map.pg_stat.end()) { | |
2709 | dout(10) << "pool " << i.first | |
2710 | << " pg_num_target " << p.get_pg_num_target() | |
2711 | << " pg_num " << p.get_pg_num() | |
2712 | << " - no state for " << merge_participant | |
2713 | << (is_merge_source ? " (merge source)" : " (merge target)") | |
2714 | << dendl; | |
2715 | ok = false; | |
2716 | } else if ((q->second.state & (PG_STATE_ACTIVE | PG_STATE_CLEAN)) != | |
2717 | (PG_STATE_ACTIVE | PG_STATE_CLEAN)) { | |
2718 | dout(10) << "pool " << i.first | |
2719 | << " pg_num_target " << p.get_pg_num_target() | |
2720 | << " pg_num " << p.get_pg_num() | |
2721 | << (is_merge_source ? " - merge source " : " - merge target ") | |
2722 | << merge_participant | |
2723 | << " not clean (" << pg_state_string(q->second.state) | |
2724 | << ")" << dendl; | |
2725 | ok = false; | |
2726 | } | |
9f95a23c TL |
2727 | if (is_merge_source) { |
2728 | source_acting = q->second.acting; | |
2729 | } else if (ok && q->second.acting != source_acting) { | |
2730 | dout(10) << "pool " << i.first | |
2731 | << " pg_num_target " << p.get_pg_num_target() | |
2732 | << " pg_num " << p.get_pg_num() | |
2733 | << (is_merge_source ? " - merge source " : " - merge target ") | |
2734 | << merge_participant | |
2735 | << " acting does not match (source " << source_acting | |
2736 | << " != target " << q->second.acting | |
2737 | << ")" << dendl; | |
2738 | ok = false; | |
2739 | } | |
11fdf7f2 TL |
2740 | } |
2741 | ||
2742 | if (ok) { | |
2743 | unsigned target = p.get_pg_num() - 1; | |
2744 | dout(10) << "pool " << i.first | |
2745 | << " pg_num_target " << p.get_pg_num_target() | |
2746 | << " pg_num " << p.get_pg_num() | |
2747 | << " -> " << target | |
2748 | << " (merging " << merge_source | |
2749 | << " and " << merge_target | |
2750 | << ")" << dendl; | |
2751 | pg_num_to_set[osdmap.get_pool_name(i.first)] = target; | |
9f95a23c | 2752 | continue; |
11fdf7f2 TL |
2753 | } |
2754 | } else if (p.get_pg_num_target() > p.get_pg_num()) { | |
2755 | // pg_num increase (split) | |
2756 | bool active = true; | |
2757 | auto q = pg_map.num_pg_by_pool_state.find(i.first); | |
2758 | if (q != pg_map.num_pg_by_pool_state.end()) { | |
2759 | for (auto& j : q->second) { | |
2760 | if ((j.first & (PG_STATE_ACTIVE|PG_STATE_PEERED)) == 0) { | |
2761 | dout(20) << "pool " << i.first << " has " << j.second | |
2762 | << " pgs in " << pg_state_string(j.first) | |
2763 | << dendl; | |
2764 | active = false; | |
2765 | break; | |
2766 | } | |
2767 | } | |
2768 | } else { | |
2769 | active = false; | |
2770 | } | |
20effc67 TL |
2771 | unsigned pg_gap = p.get_pg_num() - p.get_pgp_num(); |
2772 | unsigned max_jump = cct->_conf->mgr_max_pg_num_change; | |
11fdf7f2 TL |
2773 | if (!active) { |
2774 | dout(10) << "pool " << i.first | |
2775 | << " pg_num_target " << p.get_pg_num_target() | |
2776 | << " pg_num " << p.get_pg_num() | |
2777 | << " - not all pgs active" | |
2778 | << dendl; | |
20effc67 TL |
2779 | } else if (pg_gap >= max_jump) { |
2780 | dout(10) << "pool " << i.first | |
2781 | << " pg_num " << p.get_pg_num() | |
2782 | << " - pgp_num " << p.get_pgp_num() | |
2783 | << " gap > max_pg_num_change " << max_jump | |
2784 | << " - must scale pgp_num first" | |
2785 | << dendl; | |
11fdf7f2 TL |
2786 | } else { |
2787 | unsigned add = std::min( | |
20effc67 | 2788 | std::min(left, max_jump - pg_gap), |
11fdf7f2 TL |
2789 | p.get_pg_num_target() - p.get_pg_num()); |
2790 | unsigned target = p.get_pg_num() + add; | |
2791 | left -= add; | |
2792 | dout(10) << "pool " << i.first | |
2793 | << " pg_num_target " << p.get_pg_num_target() | |
2794 | << " pg_num " << p.get_pg_num() | |
2795 | << " -> " << target << dendl; | |
2796 | pg_num_to_set[osdmap.get_pool_name(i.first)] = target; | |
2797 | } | |
2798 | } | |
2799 | } | |
2800 | ||
2801 | // adjust pgp_num? | |
2802 | unsigned target = std::min(p.get_pg_num_pending(), | |
2803 | p.get_pgp_num_target()); | |
2804 | if (target != p.get_pgp_num()) { | |
2805 | dout(20) << "pool " << i.first | |
2806 | << " pgp_num_target " << p.get_pgp_num_target() | |
2807 | << " pgp_num " << p.get_pgp_num() | |
2808 | << " -> " << target << dendl; | |
2809 | if (target > p.get_pgp_num() && | |
2810 | p.get_pgp_num() == p.get_pg_num()) { | |
2811 | dout(10) << "pool " << i.first | |
2812 | << " pgp_num_target " << p.get_pgp_num_target() | |
2813 | << " pgp_num " << p.get_pgp_num() | |
2814 | << " - increase blocked by pg_num " << p.get_pg_num() | |
2815 | << dendl; | |
2816 | } else if (!aggro && (inactive_pgs_ratio > 0 || | |
2817 | degraded_ratio > 0 || | |
2818 | unknown_pgs_ratio > 0)) { | |
2819 | dout(10) << "pool " << i.first | |
2820 | << " pgp_num_target " << p.get_pgp_num_target() | |
2821 | << " pgp_num " << p.get_pgp_num() | |
2822 | << " - inactive|degraded|unknown pgs, deferring pgp_num" | |
2823 | << " update" << dendl; | |
2824 | } else if (!aggro && (misplaced_ratio > max_misplaced)) { | |
2825 | dout(10) << "pool " << i.first | |
2826 | << " pgp_num_target " << p.get_pgp_num_target() | |
2827 | << " pgp_num " << p.get_pgp_num() | |
2828 | << " - misplaced_ratio " << misplaced_ratio | |
2829 | << " > max " << max_misplaced | |
2830 | << ", deferring pgp_num update" << dendl; | |
2831 | } else { | |
2832 | // NOTE: this calculation assumes objects are | |
2833 | // basically uniformly distributed across all PGs | |
2834 | // (regardless of pool), which is probably not | |
2835 | // perfectly correct, but it's a start. make no | |
2836 | // single adjustment that's more than half of the | |
2837 | // max_misplaced, to somewhat limit the magnitude of | |
2838 | // our potential error here. | |
20effc67 | 2839 | unsigned next; |
9f95a23c | 2840 | static constexpr unsigned MAX_NUM_OBJECTS_PER_PG_FOR_LEAP = 1; |
81eedcae TL |
2841 | pool_stat_t s = pg_map.get_pg_pool_sum_stat(i.first); |
2842 | if (aggro || | |
2843 | // pool is (virtually) empty; just jump to final pgp_num? | |
2844 | (p.get_pgp_num_target() > p.get_pgp_num() && | |
9f95a23c TL |
2845 | s.stats.sum.num_objects <= (MAX_NUM_OBJECTS_PER_PG_FOR_LEAP * |
2846 | p.get_pgp_num_target()))) { | |
11fdf7f2 TL |
2847 | next = target; |
2848 | } else { | |
2849 | double room = | |
2850 | std::min<double>(max_misplaced - misplaced_ratio, | |
81eedcae | 2851 | max_misplaced / 2.0); |
11fdf7f2 TL |
2852 | unsigned estmax = std::max<unsigned>( |
2853 | (double)p.get_pg_num() * room, 1u); | |
b3b6e05e TL |
2854 | unsigned next_min = 0; |
2855 | if (p.get_pgp_num() > estmax) { | |
2856 | next_min = p.get_pgp_num() - estmax; | |
2857 | } | |
9f95a23c | 2858 | next = std::clamp(target, |
b3b6e05e | 2859 | next_min, |
9f95a23c | 2860 | p.get_pgp_num() + estmax); |
11fdf7f2 | 2861 | dout(20) << " room " << room << " estmax " << estmax |
9f95a23c TL |
2862 | << " delta " << (target-p.get_pgp_num()) |
2863 | << " next " << next << dendl; | |
81eedcae TL |
2864 | if (p.get_pgp_num_target() == p.get_pg_num_target() && |
2865 | p.get_pgp_num_target() < p.get_pg_num()) { | |
11fdf7f2 TL |
2866 | // since pgp_num is tracking pg_num, ceph is handling |
2867 | // pgp_num. so, be responsible: don't let pgp_num get | |
2868 | // too far out ahead of merges (if we are merging). | |
2869 | // this avoids moving lots of unmerged pgs onto a | |
2870 | // small number of OSDs where we might blow out the | |
2871 | // per-osd pg max. | |
2872 | unsigned max_outpace_merges = | |
2873 | std::max<unsigned>(8, p.get_pg_num() * max_misplaced); | |
2874 | if (next + max_outpace_merges < p.get_pg_num()) { | |
2875 | next = p.get_pg_num() - max_outpace_merges; | |
2876 | dout(10) << " using next " << next | |
2877 | << " to avoid outpacing merges (max_outpace_merges " | |
2878 | << max_outpace_merges << ")" << dendl; | |
2879 | } | |
2880 | } | |
2881 | } | |
522d829b TL |
2882 | if (next != p.get_pgp_num()) { |
2883 | dout(10) << "pool " << i.first | |
2884 | << " pgp_num_target " << p.get_pgp_num_target() | |
2885 | << " pgp_num " << p.get_pgp_num() | |
2886 | << " -> " << next << dendl; | |
2887 | pgp_num_to_set[osdmap.get_pool_name(i.first)] = next; | |
2888 | } | |
11fdf7f2 TL |
2889 | } |
2890 | } | |
2891 | if (left == 0) { | |
2892 | return; | |
2893 | } | |
2894 | } | |
2895 | }); | |
2896 | for (auto i : pg_num_to_set) { | |
2897 | const string cmd = | |
2898 | "{" | |
2899 | "\"prefix\": \"osd pool set\", " | |
2900 | "\"pool\": \"" + i.first + "\", " | |
2901 | "\"var\": \"pg_num_actual\", " | |
2902 | "\"val\": \"" + stringify(i.second) + "\"" | |
2903 | "}"; | |
2904 | monc->start_mon_command({cmd}, {}, nullptr, nullptr, nullptr); | |
2905 | } | |
2906 | for (auto i : pgp_num_to_set) { | |
2907 | const string cmd = | |
2908 | "{" | |
2909 | "\"prefix\": \"osd pool set\", " | |
2910 | "\"pool\": \"" + i.first + "\", " | |
2911 | "\"var\": \"pgp_num_actual\", " | |
2912 | "\"val\": \"" + stringify(i.second) + "\"" | |
2913 | "}"; | |
2914 | monc->start_mon_command({cmd}, {}, nullptr, nullptr, nullptr); | |
2915 | } | |
2916 | for (auto pg : upmaps_to_clear) { | |
2917 | const string cmd = | |
2918 | "{" | |
2919 | "\"prefix\": \"osd rm-pg-upmap\", " | |
2920 | "\"pgid\": \"" + stringify(pg) + "\"" | |
2921 | "}"; | |
2922 | monc->start_mon_command({cmd}, {}, nullptr, nullptr, nullptr); | |
2923 | const string cmd2 = | |
2924 | "{" | |
2925 | "\"prefix\": \"osd rm-pg-upmap-items\", " | |
2926 | "\"pgid\": \"" + stringify(pg) + "\"" + | |
2927 | "}"; | |
2928 | monc->start_mon_command({cmd2}, {}, nullptr, nullptr, nullptr); | |
2929 | } | |
2930 | } | |
2931 | ||
224ce89b WB |
2932 | void DaemonServer::got_service_map() |
2933 | { | |
11fdf7f2 | 2934 | std::lock_guard l(lock); |
224ce89b WB |
2935 | |
2936 | cluster_state.with_servicemap([&](const ServiceMap& service_map) { | |
2937 | if (pending_service_map.epoch == 0) { | |
2938 | // we just started up | |
2939 | dout(10) << "got initial map e" << service_map.epoch << dendl; | |
2940 | pending_service_map = service_map; | |
f91f0fd5 | 2941 | pending_service_map.epoch = service_map.epoch + 1; |
224ce89b WB |
2942 | } else { |
2943 | // we we already active and therefore must have persisted it, | |
2944 | // which means ours is the same or newer. | |
2945 | dout(10) << "got updated map e" << service_map.epoch << dendl; | |
f91f0fd5 | 2946 | ceph_assert(pending_service_map.epoch > service_map.epoch); |
224ce89b | 2947 | } |
224ce89b WB |
2948 | }); |
2949 | ||
2950 | // cull missing daemons, populate new ones | |
92f5a8d4 | 2951 | std::set<std::string> types; |
9f95a23c TL |
2952 | for (auto& [type, service] : pending_service_map.services) { |
2953 | if (ServiceMap::is_normal_ceph_entity(type)) { | |
2954 | continue; | |
2955 | } | |
2956 | ||
2957 | types.insert(type); | |
92f5a8d4 | 2958 | |
224ce89b | 2959 | std::set<std::string> names; |
9f95a23c | 2960 | for (auto& q : service.daemons) { |
224ce89b | 2961 | names.insert(q.first); |
9f95a23c | 2962 | DaemonKey key{type, q.first}; |
224ce89b WB |
2963 | if (!daemon_state.exists(key)) { |
2964 | auto daemon = std::make_shared<DaemonState>(daemon_state.types); | |
2965 | daemon->key = key; | |
11fdf7f2 | 2966 | daemon->set_metadata(q.second.metadata); |
224ce89b WB |
2967 | daemon->service_daemon = true; |
2968 | daemon_state.insert(daemon); | |
2969 | dout(10) << "added missing " << key << dendl; | |
2970 | } | |
2971 | } | |
9f95a23c | 2972 | daemon_state.cull(type, names); |
224ce89b | 2973 | } |
92f5a8d4 | 2974 | daemon_state.cull_services(types); |
224ce89b | 2975 | } |
3efd9988 | 2976 | |
11fdf7f2 TL |
2977 | void DaemonServer::got_mgr_map() |
2978 | { | |
2979 | std::lock_guard l(lock); | |
2980 | set<std::string> have; | |
2981 | cluster_state.with_mgrmap([&](const MgrMap& mgrmap) { | |
2982 | auto md_update = [&] (DaemonKey key) { | |
2983 | std::ostringstream oss; | |
2984 | auto c = new MetadataUpdate(daemon_state, key); | |
2985 | // FIXME remove post-nautilus: include 'id' for luminous mons | |
2986 | oss << "{\"prefix\": \"mgr metadata\", \"who\": \"" | |
9f95a23c | 2987 | << key.name << "\", \"id\": \"" << key.name << "\"}"; |
11fdf7f2 TL |
2988 | monc->start_mon_command({oss.str()}, {}, &c->outbl, &c->outs, c); |
2989 | }; | |
2990 | if (mgrmap.active_name.size()) { | |
9f95a23c | 2991 | DaemonKey key{"mgr", mgrmap.active_name}; |
11fdf7f2 TL |
2992 | have.insert(mgrmap.active_name); |
2993 | if (!daemon_state.exists(key) && !daemon_state.is_updating(key)) { | |
2994 | md_update(key); | |
2995 | dout(10) << "triggered addition of " << key << " via metadata update" << dendl; | |
2996 | } | |
2997 | } | |
2998 | for (auto& i : mgrmap.standbys) { | |
9f95a23c | 2999 | DaemonKey key{"mgr", i.second.name}; |
11fdf7f2 TL |
3000 | have.insert(i.second.name); |
3001 | if (!daemon_state.exists(key) && !daemon_state.is_updating(key)) { | |
3002 | md_update(key); | |
3003 | dout(10) << "triggered addition of " << key << " via metadata update" << dendl; | |
3004 | } | |
3005 | } | |
3006 | }); | |
3007 | daemon_state.cull("mgr", have); | |
3008 | } | |
3efd9988 FG |
3009 | |
3010 | const char** DaemonServer::get_tracked_conf_keys() const | |
3011 | { | |
3012 | static const char *KEYS[] = { | |
3013 | "mgr_stats_threshold", | |
3014 | "mgr_stats_period", | |
3015 | nullptr | |
3016 | }; | |
3017 | ||
3018 | return KEYS; | |
3019 | } | |
3020 | ||
11fdf7f2 TL |
3021 | void DaemonServer::handle_conf_change(const ConfigProxy& conf, |
3022 | const std::set <std::string> &changed) | |
3efd9988 | 3023 | { |
3efd9988 FG |
3024 | |
3025 | if (changed.count("mgr_stats_threshold") || changed.count("mgr_stats_period")) { | |
3026 | dout(4) << "Updating stats threshold/period on " | |
3027 | << daemon_connections.size() << " clients" << dendl; | |
3028 | // Send a fresh MMgrConfigure to all clients, so that they can follow | |
3029 | // the new policy for transmitting stats | |
9f95a23c | 3030 | finisher.queue(new LambdaContext([this](int r) { |
11fdf7f2 TL |
3031 | std::lock_guard l(lock); |
3032 | for (auto &c : daemon_connections) { | |
3033 | _send_configure(c); | |
3034 | } | |
3035 | })); | |
3efd9988 FG |
3036 | } |
3037 | } | |
3038 | ||
3039 | void DaemonServer::_send_configure(ConnectionRef c) | |
3040 | { | |
9f95a23c | 3041 | ceph_assert(ceph_mutex_is_locked_by_me(lock)); |
3efd9988 | 3042 | |
9f95a23c | 3043 | auto configure = make_message<MMgrConfigure>(); |
11fdf7f2 TL |
3044 | configure->stats_period = g_conf().get_val<int64_t>("mgr_stats_period"); |
3045 | configure->stats_threshold = g_conf().get_val<int64_t>("mgr_stats_threshold"); | |
3046 | ||
3047 | if (c->peer_is_osd()) { | |
3048 | configure->osd_perf_metric_queries = | |
3049 | osd_perf_metric_collector.get_queries(); | |
f67539c2 TL |
3050 | } else if (c->peer_is_mds()) { |
3051 | configure->metric_config_message = | |
3052 | MetricConfigMessage(MDSConfigPayload(mds_perf_metric_collector.get_queries())); | |
11fdf7f2 TL |
3053 | } |
3054 | ||
9f95a23c | 3055 | c->send_message2(configure); |
3efd9988 FG |
3056 | } |
3057 | ||
9f95a23c | 3058 | MetricQueryID DaemonServer::add_osd_perf_query( |
11fdf7f2 TL |
3059 | const OSDPerfMetricQuery &query, |
3060 | const std::optional<OSDPerfMetricLimit> &limit) | |
3061 | { | |
3062 | return osd_perf_metric_collector.add_query(query, limit); | |
3063 | } | |
3064 | ||
9f95a23c | 3065 | int DaemonServer::remove_osd_perf_query(MetricQueryID query_id) |
11fdf7f2 TL |
3066 | { |
3067 | return osd_perf_metric_collector.remove_query(query_id); | |
3068 | } | |
3069 | ||
f67539c2 TL |
3070 | int DaemonServer::get_osd_perf_counters(OSDPerfCollector *collector) |
3071 | { | |
3072 | return osd_perf_metric_collector.get_counters(collector); | |
3073 | } | |
3074 | ||
3075 | MetricQueryID DaemonServer::add_mds_perf_query( | |
3076 | const MDSPerfMetricQuery &query, | |
3077 | const std::optional<MDSPerfMetricLimit> &limit) | |
3078 | { | |
3079 | return mds_perf_metric_collector.add_query(query, limit); | |
3080 | } | |
3081 | ||
3082 | int DaemonServer::remove_mds_perf_query(MetricQueryID query_id) | |
3083 | { | |
3084 | return mds_perf_metric_collector.remove_query(query_id); | |
3085 | } | |
3086 | ||
33c7a0ef TL |
3087 | void DaemonServer::reregister_mds_perf_queries() |
3088 | { | |
3089 | mds_perf_metric_collector.reregister_queries(); | |
3090 | } | |
3091 | ||
f67539c2 | 3092 | int DaemonServer::get_mds_perf_counters(MDSPerfCollector *collector) |
11fdf7f2 | 3093 | { |
f67539c2 | 3094 | return mds_perf_metric_collector.get_counters(collector); |
11fdf7f2 | 3095 | } |