]> git.proxmox.com Git - ceph.git/blob - ceph/src/mgr/DaemonState.h
import ceph 14.2.5
[ceph.git] / ceph / src / mgr / DaemonState.h
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3 /*
4 * Ceph - scalable distributed file system
5 *
6 * Copyright (C) 2016 John Spray <john.spray@redhat.com>
7 *
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
12 */
13
14 #ifndef DAEMON_STATE_H_
15 #define DAEMON_STATE_H_
16
17 #include <map>
18 #include <string>
19 #include <memory>
20 #include <set>
21 #include <boost/circular_buffer.hpp>
22
23 #include "common/RWLock.h"
24 #include "include/str_map.h"
25
26 #include "msg/msg_types.h"
27
28 // For PerfCounterType
29 #include "messages/MMgrReport.h"
30
31 namespace ceph {
32 class Formatter;
33 }
34
35 // Unique reference to a daemon within a cluster
36 typedef std::pair<std::string, std::string> DaemonKey;
37
38 static inline std::string to_string(const DaemonKey& dk) {
39 return dk.first + "." + dk.second;
40 }
41
42 // An instance of a performance counter type, within
43 // a particular daemon.
44 class PerfCounterInstance
45 {
46 class DataPoint
47 {
48 public:
49 utime_t t;
50 uint64_t v;
51 DataPoint(utime_t t_, uint64_t v_)
52 : t(t_), v(v_)
53 {}
54 };
55
56 class AvgDataPoint
57 {
58 public:
59 utime_t t;
60 uint64_t s;
61 uint64_t c;
62 AvgDataPoint(utime_t t_, uint64_t s_, uint64_t c_)
63 : t(t_), s(s_), c(c_)
64 {}
65 };
66
67 boost::circular_buffer<DataPoint> buffer;
68 boost::circular_buffer<AvgDataPoint> avg_buffer;
69
70 uint64_t get_current() const;
71
72 public:
73 const boost::circular_buffer<DataPoint> & get_data() const
74 {
75 return buffer;
76 }
77 const DataPoint& get_latest_data() const
78 {
79 return buffer.back();
80 }
81 const boost::circular_buffer<AvgDataPoint> & get_data_avg() const
82 {
83 return avg_buffer;
84 }
85 const AvgDataPoint& get_latest_data_avg() const
86 {
87 return avg_buffer.back();
88 }
89 void push(utime_t t, uint64_t const &v);
90 void push_avg(utime_t t, uint64_t const &s, uint64_t const &c);
91
92 PerfCounterInstance(enum perfcounter_type_d type)
93 {
94 if (type & PERFCOUNTER_LONGRUNAVG)
95 avg_buffer = boost::circular_buffer<AvgDataPoint>(20);
96 else
97 buffer = boost::circular_buffer<DataPoint>(20);
98 };
99 };
100
101
102 typedef std::map<std::string, PerfCounterType> PerfCounterTypes;
103
104 // Performance counters for one daemon
105 class DaemonPerfCounters
106 {
107 public:
108 // The record of perf stat types, shared between daemons
109 PerfCounterTypes &types;
110
111 explicit DaemonPerfCounters(PerfCounterTypes &types_)
112 : types(types_)
113 {}
114
115 std::map<std::string, PerfCounterInstance> instances;
116
117 void update(MMgrReport *report);
118
119 void clear()
120 {
121 instances.clear();
122 }
123 };
124
125 // The state that we store about one daemon
126 class DaemonState
127 {
128 public:
129 Mutex lock = {"DaemonState::lock"};
130
131 DaemonKey key;
132
133 // The hostname where daemon was last seen running (extracted
134 // from the metadata)
135 std::string hostname;
136
137 // The metadata (hostname, version, etc) sent from the daemon
138 std::map<std::string, std::string> metadata;
139
140 /// device ids -> devname, derived from metadata[device_ids]
141 std::map<std::string,std::string> devices;
142
143 // TODO: this can be generalized to other daemons
144 std::vector<DaemonHealthMetric> daemon_health_metrics;
145
146 // Ephemeral state
147 bool service_daemon = false;
148 utime_t service_status_stamp;
149 std::map<std::string, std::string> service_status;
150 utime_t last_service_beacon;
151
152 // running config
153 std::map<std::string,std::map<int32_t,std::string>> config;
154
155 // mon config values we failed to set
156 std::map<std::string,std::string> ignored_mon_config;
157
158 // compiled-in config defaults (rarely used, so we leave them encoded!)
159 bufferlist config_defaults_bl;
160 std::map<std::string,std::string> config_defaults;
161
162 // The perf counters received in MMgrReport messages
163 DaemonPerfCounters perf_counters;
164
165 explicit DaemonState(PerfCounterTypes &types_)
166 : perf_counters(types_)
167 {
168 }
169
170 void set_metadata(const std::map<std::string,std::string>& m) {
171 devices.clear();
172 metadata = m;
173 auto p = m.find("device_ids");
174 if (p != m.end()) {
175 map<std::string,std::string> devs;
176 get_str_map(p->second, &devs, ",; ");
177 for (auto& i : devs) {
178 if (i.second.size()) { // skip blank ids
179 devices[i.second] = i.first;
180 }
181 }
182 }
183 p = m.find("hostname");
184 if (p != m.end()) {
185 hostname = p->second;
186 }
187 }
188
189 const std::map<std::string,std::string>& _get_config_defaults() {
190 if (config_defaults.empty() &&
191 config_defaults_bl.length()) {
192 auto p = config_defaults_bl.cbegin();
193 try {
194 decode(config_defaults, p);
195 } catch (buffer::error& e) {
196 }
197 }
198 return config_defaults;
199 }
200 };
201
202 typedef std::shared_ptr<DaemonState> DaemonStatePtr;
203 typedef std::map<DaemonKey, DaemonStatePtr> DaemonStateCollection;
204
205
206 struct DeviceState : public RefCountedObject
207 {
208 std::string devid;
209 std::set<pair<std::string,std::string>> devnames; ///< (server,devname)
210 std::set<DaemonKey> daemons;
211
212 std::map<string,string> metadata; ///< persistent metadata
213
214 pair<utime_t,utime_t> life_expectancy; ///< when device failure is expected
215 utime_t life_expectancy_stamp; ///< when life expectency was recorded
216
217 DeviceState(const std::string& n)
218 : RefCountedObject(nullptr, 0),
219 devid(n) {}
220
221 void set_metadata(map<string,string>&& m);
222
223 void set_life_expectancy(utime_t from, utime_t to, utime_t now);
224 void rm_life_expectancy();
225
226 string get_life_expectancy_str(utime_t now) const;
227
228 /// true of we can be safely forgotten/removed from memory
229 bool empty() const {
230 return daemons.empty() && metadata.empty();
231 }
232
233 void dump(Formatter *f) const;
234 void print(ostream& out) const;
235 };
236
237 typedef boost::intrusive_ptr<DeviceState> DeviceStateRef;
238
239 /**
240 * Fuse the collection of per-daemon metadata from Ceph into
241 * a view that can be queried by service type, ID or also
242 * by server (aka fqdn).
243 */
244 class DaemonStateIndex
245 {
246 private:
247 mutable RWLock lock = {"DaemonStateIndex", true, true, true};
248
249 std::map<std::string, DaemonStateCollection> by_server;
250 DaemonStateCollection all;
251 std::set<DaemonKey> updating;
252
253 std::map<std::string,DeviceStateRef> devices;
254
255 void _erase(const DaemonKey& dmk);
256
257 DeviceStateRef _get_or_create_device(const std::string& dev) {
258 auto p = devices.find(dev);
259 if (p != devices.end()) {
260 return p->second;
261 }
262 devices[dev] = new DeviceState(dev);
263 return devices[dev];
264 }
265 void _erase_device(DeviceStateRef d) {
266 devices.erase(d->devid);
267 }
268
269 public:
270 DaemonStateIndex() {}
271
272 // FIXME: shouldn't really be public, maybe construct DaemonState
273 // objects internally to avoid this.
274 PerfCounterTypes types;
275
276 void insert(DaemonStatePtr dm);
277 void _insert(DaemonStatePtr dm);
278 bool exists(const DaemonKey &key) const;
279 DaemonStatePtr get(const DaemonKey &key);
280 void rm(const DaemonKey &key);
281 void _rm(const DaemonKey &key);
282
283 // Note that these return by value rather than reference to avoid
284 // callers needing to stay in lock while using result. Callers must
285 // still take the individual DaemonState::lock on each entry though.
286 DaemonStateCollection get_by_server(const std::string &hostname) const;
287 DaemonStateCollection get_by_service(const std::string &svc_name) const;
288 DaemonStateCollection get_all() const {return all;}
289
290 template<typename Callback, typename...Args>
291 auto with_daemons_by_server(Callback&& cb, Args&&... args) const ->
292 decltype(cb(by_server, std::forward<Args>(args)...)) {
293 RWLock::RLocker l(lock);
294
295 return std::forward<Callback>(cb)(by_server, std::forward<Args>(args)...);
296 }
297
298 template<typename Callback, typename...Args>
299 bool with_device(const std::string& dev,
300 Callback&& cb, Args&&... args) const {
301 RWLock::RLocker l(lock);
302 auto p = devices.find(dev);
303 if (p == devices.end()) {
304 return false;
305 }
306 std::forward<Callback>(cb)(*p->second, std::forward<Args>(args)...);
307 return true;
308 }
309
310 template<typename Callback, typename...Args>
311 bool with_device_write(const std::string& dev,
312 Callback&& cb, Args&&... args) {
313 RWLock::WLocker l(lock);
314 auto p = devices.find(dev);
315 if (p == devices.end()) {
316 return false;
317 }
318 std::forward<Callback>(cb)(*p->second, std::forward<Args>(args)...);
319 if (p->second->empty()) {
320 _erase_device(p->second);
321 }
322 return true;
323 }
324
325 template<typename Callback, typename...Args>
326 void with_device_create(const std::string& dev,
327 Callback&& cb, Args&&... args) {
328 RWLock::WLocker l(lock);
329 auto d = _get_or_create_device(dev);
330 std::forward<Callback>(cb)(*d, std::forward<Args>(args)...);
331 }
332
333 template<typename Callback, typename...Args>
334 void with_devices(Callback&& cb, Args&&... args) const {
335 RWLock::RLocker l(lock);
336 for (auto& i : devices) {
337 std::forward<Callback>(cb)(*i.second, std::forward<Args>(args)...);
338 }
339 }
340
341 template<typename CallbackInitial, typename Callback, typename...Args>
342 void with_devices2(CallbackInitial&& cbi, // with lock taken
343 Callback&& cb, // for each device
344 Args&&... args) const {
345 RWLock::RLocker l(lock);
346 cbi();
347 for (auto& i : devices) {
348 std::forward<Callback>(cb)(*i.second, std::forward<Args>(args)...);
349 }
350 }
351
352 void list_devids_by_server(const std::string& server,
353 std::set<std::string> *ls) {
354 auto m = get_by_server(server);
355 for (auto& i : m) {
356 std::lock_guard l(i.second->lock);
357 for (auto& j : i.second->devices) {
358 ls->insert(j.first);
359 }
360 }
361 }
362
363 void notify_updating(const DaemonKey &k) {
364 RWLock::WLocker l(lock);
365 updating.insert(k);
366 }
367 void clear_updating(const DaemonKey &k) {
368 RWLock::WLocker l(lock);
369 updating.erase(k);
370 }
371 bool is_updating(const DaemonKey &k) {
372 RWLock::RLocker l(lock);
373 return updating.count(k) > 0;
374 }
375
376 void update_metadata(DaemonStatePtr state,
377 const map<string,string>& meta) {
378 // remove and re-insert in case the device metadata changed
379 RWLock::WLocker l(lock);
380 _rm(state->key);
381 {
382 Mutex::Locker l2(state->lock);
383 state->set_metadata(meta);
384 }
385 _insert(state);
386 }
387
388 /**
389 * Remove state for all daemons of this type whose names are
390 * not present in `names_exist`. Use this function when you have
391 * a cluster map and want to ensure that anything absent in the map
392 * is also absent in this class.
393 */
394 void cull(const std::string& svc_name,
395 const std::set<std::string>& names_exist);
396 };
397
398 #endif
399