]> git.proxmox.com Git - ceph.git/blob - ceph/src/mgr/DaemonState.h
8c21305a9c236ec2786ec43ea9d483a064eed102
[ceph.git] / ceph / src / mgr / DaemonState.h
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3 /*
4 * Ceph - scalable distributed file system
5 *
6 * Copyright (C) 2016 John Spray <john.spray@redhat.com>
7 *
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
12 */
13
14 #ifndef DAEMON_STATE_H_
15 #define DAEMON_STATE_H_
16
17 #include <map>
18 #include <string>
19 #include <memory>
20 #include <set>
21 #include <boost/circular_buffer.hpp>
22
23 #include "common/RWLock.h"
24 #include "include/str_map.h"
25
26 #include "msg/msg_types.h"
27
28 // For PerfCounterType
29 #include "messages/MMgrReport.h"
30 #include "DaemonKey.h"
31
32 namespace ceph {
33 class Formatter;
34 }
35
36 // An instance of a performance counter type, within
37 // a particular daemon.
38 class PerfCounterInstance
39 {
40 class DataPoint
41 {
42 public:
43 utime_t t;
44 uint64_t v;
45 DataPoint(utime_t t_, uint64_t v_)
46 : t(t_), v(v_)
47 {}
48 };
49
50 class AvgDataPoint
51 {
52 public:
53 utime_t t;
54 uint64_t s;
55 uint64_t c;
56 AvgDataPoint(utime_t t_, uint64_t s_, uint64_t c_)
57 : t(t_), s(s_), c(c_)
58 {}
59 };
60
61 boost::circular_buffer<DataPoint> buffer;
62 boost::circular_buffer<AvgDataPoint> avg_buffer;
63
64 uint64_t get_current() const;
65
66 public:
67 const boost::circular_buffer<DataPoint> & get_data() const
68 {
69 return buffer;
70 }
71 const DataPoint& get_latest_data() const
72 {
73 return buffer.back();
74 }
75 const boost::circular_buffer<AvgDataPoint> & get_data_avg() const
76 {
77 return avg_buffer;
78 }
79 const AvgDataPoint& get_latest_data_avg() const
80 {
81 return avg_buffer.back();
82 }
83 void push(utime_t t, uint64_t const &v);
84 void push_avg(utime_t t, uint64_t const &s, uint64_t const &c);
85
86 PerfCounterInstance(enum perfcounter_type_d type)
87 {
88 if (type & PERFCOUNTER_LONGRUNAVG)
89 avg_buffer = boost::circular_buffer<AvgDataPoint>(20);
90 else
91 buffer = boost::circular_buffer<DataPoint>(20);
92 };
93 };
94
95
96 typedef std::map<std::string, PerfCounterType> PerfCounterTypes;
97
98 // Performance counters for one daemon
99 class DaemonPerfCounters
100 {
101 public:
102 // The record of perf stat types, shared between daemons
103 PerfCounterTypes &types;
104
105 explicit DaemonPerfCounters(PerfCounterTypes &types_)
106 : types(types_)
107 {}
108
109 std::map<std::string, PerfCounterInstance> instances;
110
111 void update(const MMgrReport& report);
112
113 void clear()
114 {
115 instances.clear();
116 }
117 };
118
119 // The state that we store about one daemon
120 class DaemonState
121 {
122 public:
123 ceph::mutex lock = ceph::make_mutex("DaemonState::lock");
124
125 DaemonKey key;
126
127 // The hostname where daemon was last seen running (extracted
128 // from the metadata)
129 std::string hostname;
130
131 // The metadata (hostname, version, etc) sent from the daemon
132 std::map<std::string, std::string> metadata;
133
134 /// device ids -> devname, derived from metadata[device_ids]
135 std::map<std::string,std::string> devices;
136
137 /// device ids -> by-path, derived from metadata[device_ids]
138 std::map<std::string,std::string> devices_bypath;
139
140 // TODO: this can be generalized to other daemons
141 std::vector<DaemonHealthMetric> daemon_health_metrics;
142
143 // Ephemeral state
144 bool service_daemon = false;
145 utime_t service_status_stamp;
146 std::map<std::string, std::string> service_status;
147 utime_t last_service_beacon;
148
149 // running config
150 std::map<std::string,std::map<int32_t,std::string>> config;
151
152 // mon config values we failed to set
153 std::map<std::string,std::string> ignored_mon_config;
154
155 // compiled-in config defaults (rarely used, so we leave them encoded!)
156 bufferlist config_defaults_bl;
157 std::map<std::string,std::string> config_defaults;
158
159 // The perf counters received in MMgrReport messages
160 DaemonPerfCounters perf_counters;
161
162 explicit DaemonState(PerfCounterTypes &types_)
163 : perf_counters(types_)
164 {
165 }
166
167 void set_metadata(const std::map<std::string,std::string>& m) {
168 devices.clear();
169 devices_bypath.clear();
170 metadata = m;
171 auto p = m.find("device_ids");
172 if (p != m.end()) {
173 map<std::string,std::string> devs, paths; // devname -> id or path
174 get_str_map(p->second, &devs, ",; ");
175 auto q = m.find("device_paths");
176 if (q != m.end()) {
177 get_str_map(q->second, &paths, ",; ");
178 }
179 for (auto& i : devs) {
180 if (i.second.size()) { // skip blank ids
181 devices[i.second] = i.first; // id -> devname
182 auto j = paths.find(i.first);
183 if (j != paths.end()) {
184 devices_bypath[i.second] = j->second; // id -> path
185 }
186 }
187 }
188 }
189 p = m.find("hostname");
190 if (p != m.end()) {
191 hostname = p->second;
192 }
193 }
194
195 const std::map<std::string,std::string>& _get_config_defaults() {
196 if (config_defaults.empty() &&
197 config_defaults_bl.length()) {
198 auto p = config_defaults_bl.cbegin();
199 try {
200 decode(config_defaults, p);
201 } catch (buffer::error& e) {
202 }
203 }
204 return config_defaults;
205 }
206 };
207
208 typedef std::shared_ptr<DaemonState> DaemonStatePtr;
209 typedef std::map<DaemonKey, DaemonStatePtr> DaemonStateCollection;
210
211
212 struct DeviceState : public RefCountedObject
213 {
214 std::string devid;
215 /// (server,devname,path)
216 std::set<std::tuple<std::string,std::string,std::string>> attachments;
217 std::set<DaemonKey> daemons;
218
219 std::map<string,string> metadata; ///< persistent metadata
220
221 pair<utime_t,utime_t> life_expectancy; ///< when device failure is expected
222 utime_t life_expectancy_stamp; ///< when life expectency was recorded
223 float wear_level = -1; ///< SSD wear level (negative if unknown)
224
225 void set_metadata(map<string,string>&& m);
226
227 void set_life_expectancy(utime_t from, utime_t to, utime_t now);
228 void rm_life_expectancy();
229
230 void set_wear_level(float wear);
231
232 string get_life_expectancy_str(utime_t now) const;
233
234 /// true of we can be safely forgotten/removed from memory
235 bool empty() const {
236 return daemons.empty() && metadata.empty();
237 }
238
239 void dump(Formatter *f) const;
240 void print(ostream& out) const;
241
242 private:
243 FRIEND_MAKE_REF(DeviceState);
244 DeviceState(const std::string& n) : devid(n) {}
245 };
246
247 /**
248 * Fuse the collection of per-daemon metadata from Ceph into
249 * a view that can be queried by service type, ID or also
250 * by server (aka fqdn).
251 */
252 class DaemonStateIndex
253 {
254 private:
255 mutable ceph::shared_mutex lock =
256 ceph::make_shared_mutex("DaemonStateIndex", true, true, true);
257
258 std::map<std::string, DaemonStateCollection> by_server;
259 DaemonStateCollection all;
260 std::set<DaemonKey> updating;
261
262 std::map<std::string,ceph::ref_t<DeviceState>> devices;
263
264 void _erase(const DaemonKey& dmk);
265
266 ceph::ref_t<DeviceState> _get_or_create_device(const std::string& dev) {
267 auto em = devices.try_emplace(dev, nullptr);
268 auto& d = em.first->second;
269 if (em.second) {
270 d = ceph::make_ref<DeviceState>(dev);
271 }
272 return d;
273 }
274 void _erase_device(const ceph::ref_t<DeviceState>& d) {
275 devices.erase(d->devid);
276 }
277
278 public:
279 DaemonStateIndex() {}
280
281 // FIXME: shouldn't really be public, maybe construct DaemonState
282 // objects internally to avoid this.
283 PerfCounterTypes types;
284
285 void insert(DaemonStatePtr dm);
286 void _insert(DaemonStatePtr dm);
287 bool exists(const DaemonKey &key) const;
288 DaemonStatePtr get(const DaemonKey &key);
289 void rm(const DaemonKey &key);
290 void _rm(const DaemonKey &key);
291
292 // Note that these return by value rather than reference to avoid
293 // callers needing to stay in lock while using result. Callers must
294 // still take the individual DaemonState::lock on each entry though.
295 DaemonStateCollection get_by_server(const std::string &hostname) const;
296 DaemonStateCollection get_by_service(const std::string &svc_name) const;
297 DaemonStateCollection get_all() const {return all;}
298
299 template<typename Callback, typename...Args>
300 auto with_daemons_by_server(Callback&& cb, Args&&... args) const ->
301 decltype(cb(by_server, std::forward<Args>(args)...)) {
302 std::shared_lock l{lock};
303
304 return std::forward<Callback>(cb)(by_server, std::forward<Args>(args)...);
305 }
306
307 template<typename Callback, typename...Args>
308 bool with_device(const std::string& dev,
309 Callback&& cb, Args&&... args) const {
310 std::shared_lock l{lock};
311 auto p = devices.find(dev);
312 if (p == devices.end()) {
313 return false;
314 }
315 std::forward<Callback>(cb)(*p->second, std::forward<Args>(args)...);
316 return true;
317 }
318
319 template<typename Callback, typename...Args>
320 bool with_device_write(const std::string& dev,
321 Callback&& cb, Args&&... args) {
322 std::unique_lock l{lock};
323 auto p = devices.find(dev);
324 if (p == devices.end()) {
325 return false;
326 }
327 std::forward<Callback>(cb)(*p->second, std::forward<Args>(args)...);
328 if (p->second->empty()) {
329 _erase_device(p->second);
330 }
331 return true;
332 }
333
334 template<typename Callback, typename...Args>
335 void with_device_create(const std::string& dev,
336 Callback&& cb, Args&&... args) {
337 std::unique_lock l{lock};
338 auto d = _get_or_create_device(dev);
339 std::forward<Callback>(cb)(*d, std::forward<Args>(args)...);
340 }
341
342 template<typename Callback, typename...Args>
343 void with_devices(Callback&& cb, Args&&... args) const {
344 std::shared_lock l{lock};
345 for (auto& i : devices) {
346 std::forward<Callback>(cb)(*i.second, std::forward<Args>(args)...);
347 }
348 }
349
350 template<typename CallbackInitial, typename Callback, typename...Args>
351 void with_devices2(CallbackInitial&& cbi, // with lock taken
352 Callback&& cb, // for each device
353 Args&&... args) const {
354 std::shared_lock l{lock};
355 cbi();
356 for (auto& i : devices) {
357 std::forward<Callback>(cb)(*i.second, std::forward<Args>(args)...);
358 }
359 }
360
361 void list_devids_by_server(const std::string& server,
362 std::set<std::string> *ls) {
363 auto m = get_by_server(server);
364 for (auto& i : m) {
365 std::lock_guard l(i.second->lock);
366 for (auto& j : i.second->devices) {
367 ls->insert(j.first);
368 }
369 }
370 }
371
372 void notify_updating(const DaemonKey &k) {
373 std::unique_lock l{lock};
374 updating.insert(k);
375 }
376 void clear_updating(const DaemonKey &k) {
377 std::unique_lock l{lock};
378 updating.erase(k);
379 }
380 bool is_updating(const DaemonKey &k) {
381 std::shared_lock l{lock};
382 return updating.count(k) > 0;
383 }
384
385 void update_metadata(DaemonStatePtr state,
386 const map<string,string>& meta) {
387 // remove and re-insert in case the device metadata changed
388 std::unique_lock l{lock};
389 _rm(state->key);
390 {
391 std::lock_guard l2{state->lock};
392 state->set_metadata(meta);
393 }
394 _insert(state);
395 }
396
397 /**
398 * Remove state for all daemons of this type whose names are
399 * not present in `names_exist`. Use this function when you have
400 * a cluster map and want to ensure that anything absent in the map
401 * is also absent in this class.
402 */
403 void cull(const std::string& svc_name,
404 const std::set<std::string>& names_exist);
405 void cull_services(const std::set<std::string>& types_exist);
406 };
407
408 #endif
409