]> git.proxmox.com Git - ceph.git/blob - ceph/src/mgr/DaemonState.h
import quincy beta 17.1.0
[ceph.git] / ceph / src / mgr / DaemonState.h
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3 /*
4 * Ceph - scalable distributed file system
5 *
6 * Copyright (C) 2016 John Spray <john.spray@redhat.com>
7 *
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
12 */
13
14 #ifndef DAEMON_STATE_H_
15 #define DAEMON_STATE_H_
16
17 #include <map>
18 #include <string>
19 #include <memory>
20 #include <set>
21 #include <boost/circular_buffer.hpp>
22
23 #include "include/str_map.h"
24
25 #include "msg/msg_types.h"
26
27 // For PerfCounterType
28 #include "messages/MMgrReport.h"
29 #include "DaemonKey.h"
30
31 namespace ceph {
32 class Formatter;
33 }
34
35 // An instance of a performance counter type, within
36 // a particular daemon.
37 class PerfCounterInstance
38 {
39 class DataPoint
40 {
41 public:
42 utime_t t;
43 uint64_t v;
44 DataPoint(utime_t t_, uint64_t v_)
45 : t(t_), v(v_)
46 {}
47 };
48
49 class AvgDataPoint
50 {
51 public:
52 utime_t t;
53 uint64_t s;
54 uint64_t c;
55 AvgDataPoint(utime_t t_, uint64_t s_, uint64_t c_)
56 : t(t_), s(s_), c(c_)
57 {}
58 };
59
60 boost::circular_buffer<DataPoint> buffer;
61 boost::circular_buffer<AvgDataPoint> avg_buffer;
62
63 uint64_t get_current() const;
64
65 public:
66 const boost::circular_buffer<DataPoint> & get_data() const
67 {
68 return buffer;
69 }
70 const DataPoint& get_latest_data() const
71 {
72 return buffer.back();
73 }
74 const boost::circular_buffer<AvgDataPoint> & get_data_avg() const
75 {
76 return avg_buffer;
77 }
78 const AvgDataPoint& get_latest_data_avg() const
79 {
80 return avg_buffer.back();
81 }
82 void push(utime_t t, uint64_t const &v);
83 void push_avg(utime_t t, uint64_t const &s, uint64_t const &c);
84
85 PerfCounterInstance(enum perfcounter_type_d type)
86 {
87 if (type & PERFCOUNTER_LONGRUNAVG)
88 avg_buffer = boost::circular_buffer<AvgDataPoint>(20);
89 else
90 buffer = boost::circular_buffer<DataPoint>(20);
91 };
92 };
93
94
95 typedef std::map<std::string, PerfCounterType> PerfCounterTypes;
96
97 // Performance counters for one daemon
98 class DaemonPerfCounters
99 {
100 public:
101 // The record of perf stat types, shared between daemons
102 PerfCounterTypes &types;
103
104 explicit DaemonPerfCounters(PerfCounterTypes &types_)
105 : types(types_)
106 {}
107
108 std::map<std::string, PerfCounterInstance> instances;
109
110 void update(const MMgrReport& report);
111
112 void clear()
113 {
114 instances.clear();
115 }
116 };
117
118 // The state that we store about one daemon
119 class DaemonState
120 {
121 public:
122 ceph::mutex lock = ceph::make_mutex("DaemonState::lock");
123
124 DaemonKey key;
125
126 // The hostname where daemon was last seen running (extracted
127 // from the metadata)
128 std::string hostname;
129
130 // The metadata (hostname, version, etc) sent from the daemon
131 std::map<std::string, std::string> metadata;
132
133 /// device ids -> devname, derived from metadata[device_ids]
134 std::map<std::string,std::string> devices;
135
136 /// device ids -> by-path, derived from metadata[device_ids]
137 std::map<std::string,std::string> devices_bypath;
138
139 // TODO: this can be generalized to other daemons
140 std::vector<DaemonHealthMetric> daemon_health_metrics;
141
142 // Ephemeral state
143 bool service_daemon = false;
144 utime_t service_status_stamp;
145 std::map<std::string, std::string> service_status;
146 utime_t last_service_beacon;
147
148 // running config
149 std::map<std::string,std::map<int32_t,std::string>> config;
150
151 // mon config values we failed to set
152 std::map<std::string,std::string> ignored_mon_config;
153
154 // compiled-in config defaults (rarely used, so we leave them encoded!)
155 bufferlist config_defaults_bl;
156 std::map<std::string,std::string> config_defaults;
157
158 // The perf counters received in MMgrReport messages
159 DaemonPerfCounters perf_counters;
160
161 explicit DaemonState(PerfCounterTypes &types_)
162 : perf_counters(types_)
163 {
164 }
165 void set_metadata(const std::map<std::string,std::string>& m);
166 const std::map<std::string,std::string>& _get_config_defaults();
167 };
168
169 typedef std::shared_ptr<DaemonState> DaemonStatePtr;
170 typedef std::map<DaemonKey, DaemonStatePtr> DaemonStateCollection;
171
172
173 struct DeviceState : public RefCountedObject
174 {
175 std::string devid;
176 /// (server,devname,path)
177 std::set<std::tuple<std::string,std::string,std::string>> attachments;
178 std::set<DaemonKey> daemons;
179
180 std::map<std::string,std::string> metadata; ///< persistent metadata
181
182 std::pair<utime_t,utime_t> life_expectancy; ///< when device failure is expected
183 utime_t life_expectancy_stamp; ///< when life expectency was recorded
184 float wear_level = -1; ///< SSD wear level (negative if unknown)
185
186 void set_metadata(std::map<std::string,std::string>&& m);
187
188 void set_life_expectancy(utime_t from, utime_t to, utime_t now);
189 void rm_life_expectancy();
190
191 void set_wear_level(float wear);
192
193 std::string get_life_expectancy_str(utime_t now) const;
194
195 /// true of we can be safely forgotten/removed from memory
196 bool empty() const {
197 return daemons.empty() && metadata.empty();
198 }
199
200 void dump(Formatter *f) const;
201 void print(std::ostream& out) const;
202
203 private:
204 FRIEND_MAKE_REF(DeviceState);
205 DeviceState(const std::string& n) : devid(n) {}
206 };
207
208 /**
209 * Fuse the collection of per-daemon metadata from Ceph into
210 * a view that can be queried by service type, ID or also
211 * by server (aka fqdn).
212 */
213 class DaemonStateIndex
214 {
215 private:
216 mutable ceph::shared_mutex lock =
217 ceph::make_shared_mutex("DaemonStateIndex", true, true, true);
218
219 std::map<std::string, DaemonStateCollection> by_server;
220 DaemonStateCollection all;
221 std::set<DaemonKey> updating;
222
223 std::map<std::string,ceph::ref_t<DeviceState>> devices;
224
225 void _erase(const DaemonKey& dmk);
226
227 ceph::ref_t<DeviceState> _get_or_create_device(const std::string& dev) {
228 auto em = devices.try_emplace(dev, nullptr);
229 auto& d = em.first->second;
230 if (em.second) {
231 d = ceph::make_ref<DeviceState>(dev);
232 }
233 return d;
234 }
235 void _erase_device(const ceph::ref_t<DeviceState>& d) {
236 devices.erase(d->devid);
237 }
238
239 public:
240 DaemonStateIndex() {}
241
242 // FIXME: shouldn't really be public, maybe construct DaemonState
243 // objects internally to avoid this.
244 PerfCounterTypes types;
245
246 void insert(DaemonStatePtr dm);
247 void _insert(DaemonStatePtr dm);
248 bool exists(const DaemonKey &key) const;
249 DaemonStatePtr get(const DaemonKey &key);
250 void rm(const DaemonKey &key);
251 void _rm(const DaemonKey &key);
252
253 // Note that these return by value rather than reference to avoid
254 // callers needing to stay in lock while using result. Callers must
255 // still take the individual DaemonState::lock on each entry though.
256 DaemonStateCollection get_by_server(const std::string &hostname) const;
257 DaemonStateCollection get_by_service(const std::string &svc_name) const;
258 DaemonStateCollection get_all() const {return all;}
259
260 template<typename Callback, typename...Args>
261 auto with_daemons_by_server(Callback&& cb, Args&&... args) const ->
262 decltype(cb(by_server, std::forward<Args>(args)...)) {
263 std::shared_lock l{lock};
264
265 return std::forward<Callback>(cb)(by_server, std::forward<Args>(args)...);
266 }
267
268 template<typename Callback, typename...Args>
269 bool with_device(const std::string& dev,
270 Callback&& cb, Args&&... args) const {
271 std::shared_lock l{lock};
272 auto p = devices.find(dev);
273 if (p == devices.end()) {
274 return false;
275 }
276 std::forward<Callback>(cb)(*p->second, std::forward<Args>(args)...);
277 return true;
278 }
279
280 template<typename Callback, typename...Args>
281 bool with_device_write(const std::string& dev,
282 Callback&& cb, Args&&... args) {
283 std::unique_lock l{lock};
284 auto p = devices.find(dev);
285 if (p == devices.end()) {
286 return false;
287 }
288 std::forward<Callback>(cb)(*p->second, std::forward<Args>(args)...);
289 if (p->second->empty()) {
290 _erase_device(p->second);
291 }
292 return true;
293 }
294
295 template<typename Callback, typename...Args>
296 void with_device_create(const std::string& dev,
297 Callback&& cb, Args&&... args) {
298 std::unique_lock l{lock};
299 auto d = _get_or_create_device(dev);
300 std::forward<Callback>(cb)(*d, std::forward<Args>(args)...);
301 }
302
303 template<typename Callback, typename...Args>
304 void with_devices(Callback&& cb, Args&&... args) const {
305 std::shared_lock l{lock};
306 for (auto& i : devices) {
307 std::forward<Callback>(cb)(*i.second, std::forward<Args>(args)...);
308 }
309 }
310
311 template<typename CallbackInitial, typename Callback, typename...Args>
312 void with_devices2(CallbackInitial&& cbi, // with lock taken
313 Callback&& cb, // for each device
314 Args&&... args) const {
315 std::shared_lock l{lock};
316 cbi();
317 for (auto& i : devices) {
318 std::forward<Callback>(cb)(*i.second, std::forward<Args>(args)...);
319 }
320 }
321
322 void list_devids_by_server(const std::string& server,
323 std::set<std::string> *ls) {
324 auto m = get_by_server(server);
325 for (auto& i : m) {
326 std::lock_guard l(i.second->lock);
327 for (auto& j : i.second->devices) {
328 ls->insert(j.first);
329 }
330 }
331 }
332
333 void notify_updating(const DaemonKey &k) {
334 std::unique_lock l{lock};
335 updating.insert(k);
336 }
337 void clear_updating(const DaemonKey &k) {
338 std::unique_lock l{lock};
339 updating.erase(k);
340 }
341 bool is_updating(const DaemonKey &k) {
342 std::shared_lock l{lock};
343 return updating.count(k) > 0;
344 }
345
346 void update_metadata(DaemonStatePtr state,
347 const std::map<std::string,std::string>& meta) {
348 // remove and re-insert in case the device metadata changed
349 std::unique_lock l{lock};
350 _rm(state->key);
351 {
352 std::lock_guard l2{state->lock};
353 state->set_metadata(meta);
354 }
355 _insert(state);
356 }
357
358 /**
359 * Remove state for all daemons of this type whose names are
360 * not present in `names_exist`. Use this function when you have
361 * a cluster map and want to ensure that anything absent in the map
362 * is also absent in this class.
363 */
364 void cull(const std::string& svc_name,
365 const std::set<std::string>& names_exist);
366 void cull_services(const std::set<std::string>& types_exist);
367 };
368
369 #endif
370