]> git.proxmox.com Git - ceph.git/blame - ceph/src/mgr/DaemonState.h
import new upstream nautilus stable release 14.2.8
[ceph.git] / ceph / src / mgr / DaemonState.h
CommitLineData
7c673cae
FG
1// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2// vim: ts=8 sw=2 smarttab
3/*
4 * Ceph - scalable distributed file system
5 *
6 * Copyright (C) 2016 John Spray <john.spray@redhat.com>
7 *
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
12 */
13
14#ifndef DAEMON_STATE_H_
15#define DAEMON_STATE_H_
16
17#include <map>
18#include <string>
19#include <memory>
20#include <set>
21#include <boost/circular_buffer.hpp>
22
3efd9988 23#include "common/RWLock.h"
11fdf7f2 24#include "include/str_map.h"
7c673cae
FG
25
26#include "msg/msg_types.h"
27
28// For PerfCounterType
29#include "messages/MMgrReport.h"
30
11fdf7f2
TL
31namespace ceph {
32 class Formatter;
33}
7c673cae
FG
34
35// Unique reference to a daemon within a cluster
224ce89b 36typedef std::pair<std::string, std::string> DaemonKey;
7c673cae 37
11fdf7f2
TL
38static inline std::string to_string(const DaemonKey& dk) {
39 return dk.first + "." + dk.second;
40}
41
7c673cae
FG
42// An instance of a performance counter type, within
43// a particular daemon.
44class PerfCounterInstance
45{
46 class DataPoint
47 {
48 public:
49 utime_t t;
50 uint64_t v;
51 DataPoint(utime_t t_, uint64_t v_)
52 : t(t_), v(v_)
53 {}
54 };
55
28e407b8
AA
56 class AvgDataPoint
57 {
58 public:
59 utime_t t;
60 uint64_t s;
61 uint64_t c;
62 AvgDataPoint(utime_t t_, uint64_t s_, uint64_t c_)
63 : t(t_), s(s_), c(c_)
64 {}
65 };
66
7c673cae 67 boost::circular_buffer<DataPoint> buffer;
28e407b8
AA
68 boost::circular_buffer<AvgDataPoint> avg_buffer;
69
7c673cae
FG
70 uint64_t get_current() const;
71
72 public:
73 const boost::circular_buffer<DataPoint> & get_data() const
74 {
75 return buffer;
76 }
11fdf7f2
TL
77 const DataPoint& get_latest_data() const
78 {
79 return buffer.back();
80 }
28e407b8
AA
81 const boost::circular_buffer<AvgDataPoint> & get_data_avg() const
82 {
83 return avg_buffer;
84 }
11fdf7f2
TL
85 const AvgDataPoint& get_latest_data_avg() const
86 {
87 return avg_buffer.back();
88 }
7c673cae 89 void push(utime_t t, uint64_t const &v);
28e407b8
AA
90 void push_avg(utime_t t, uint64_t const &s, uint64_t const &c);
91
92 PerfCounterInstance(enum perfcounter_type_d type)
93 {
94 if (type & PERFCOUNTER_LONGRUNAVG)
95 avg_buffer = boost::circular_buffer<AvgDataPoint>(20);
96 else
97 buffer = boost::circular_buffer<DataPoint>(20);
98 };
7c673cae
FG
99};
100
101
102typedef std::map<std::string, PerfCounterType> PerfCounterTypes;
103
104// Performance counters for one daemon
105class DaemonPerfCounters
106{
107 public:
108 // The record of perf stat types, shared between daemons
109 PerfCounterTypes &types;
110
11fdf7f2 111 explicit DaemonPerfCounters(PerfCounterTypes &types_)
7c673cae
FG
112 : types(types_)
113 {}
114
115 std::map<std::string, PerfCounterInstance> instances;
116
7c673cae
FG
117 void update(MMgrReport *report);
118
119 void clear()
120 {
121 instances.clear();
7c673cae
FG
122 }
123};
124
125// The state that we store about one daemon
126class DaemonState
127{
128 public:
c07f9fc5
FG
129 Mutex lock = {"DaemonState::lock"};
130
7c673cae
FG
131 DaemonKey key;
132
133 // The hostname where daemon was last seen running (extracted
134 // from the metadata)
135 std::string hostname;
136
137 // The metadata (hostname, version, etc) sent from the daemon
138 std::map<std::string, std::string> metadata;
139
11fdf7f2
TL
140 /// device ids -> devname, derived from metadata[device_ids]
141 std::map<std::string,std::string> devices;
142
b32b8144 143 // TODO: this can be generalized to other daemons
11fdf7f2 144 std::vector<DaemonHealthMetric> daemon_health_metrics;
b32b8144 145
224ce89b
WB
146 // Ephemeral state
147 bool service_daemon = false;
148 utime_t service_status_stamp;
149 std::map<std::string, std::string> service_status;
150 utime_t last_service_beacon;
151
11fdf7f2
TL
152 // running config
153 std::map<std::string,std::map<int32_t,std::string>> config;
154
155 // mon config values we failed to set
156 std::map<std::string,std::string> ignored_mon_config;
157
158 // compiled-in config defaults (rarely used, so we leave them encoded!)
159 bufferlist config_defaults_bl;
160 std::map<std::string,std::string> config_defaults;
161
7c673cae
FG
162 // The perf counters received in MMgrReport messages
163 DaemonPerfCounters perf_counters;
164
11fdf7f2 165 explicit DaemonState(PerfCounterTypes &types_)
7c673cae
FG
166 : perf_counters(types_)
167 {
168 }
11fdf7f2
TL
169
170 void set_metadata(const std::map<std::string,std::string>& m) {
171 devices.clear();
172 metadata = m;
173 auto p = m.find("device_ids");
174 if (p != m.end()) {
175 map<std::string,std::string> devs;
176 get_str_map(p->second, &devs, ",; ");
177 for (auto& i : devs) {
178 if (i.second.size()) { // skip blank ids
179 devices[i.second] = i.first;
180 }
181 }
182 }
eafe8130
TL
183 p = m.find("hostname");
184 if (p != m.end()) {
185 hostname = p->second;
186 }
11fdf7f2
TL
187 }
188
189 const std::map<std::string,std::string>& _get_config_defaults() {
190 if (config_defaults.empty() &&
191 config_defaults_bl.length()) {
192 auto p = config_defaults_bl.cbegin();
193 try {
194 decode(config_defaults, p);
195 } catch (buffer::error& e) {
196 }
197 }
198 return config_defaults;
199 }
7c673cae
FG
200};
201
202typedef std::shared_ptr<DaemonState> DaemonStatePtr;
203typedef std::map<DaemonKey, DaemonStatePtr> DaemonStateCollection;
204
205
11fdf7f2
TL
206struct DeviceState : public RefCountedObject
207{
208 std::string devid;
209 std::set<pair<std::string,std::string>> devnames; ///< (server,devname)
210 std::set<DaemonKey> daemons;
7c673cae 211
11fdf7f2
TL
212 std::map<string,string> metadata; ///< persistent metadata
213
214 pair<utime_t,utime_t> life_expectancy; ///< when device failure is expected
215 utime_t life_expectancy_stamp; ///< when life expectency was recorded
216
217 DeviceState(const std::string& n)
218 : RefCountedObject(nullptr, 0),
219 devid(n) {}
220
221 void set_metadata(map<string,string>&& m);
222
223 void set_life_expectancy(utime_t from, utime_t to, utime_t now);
224 void rm_life_expectancy();
225
226 string get_life_expectancy_str(utime_t now) const;
227
228 /// true of we can be safely forgotten/removed from memory
229 bool empty() const {
230 return daemons.empty() && metadata.empty();
231 }
232
233 void dump(Formatter *f) const;
234 void print(ostream& out) const;
235};
236
237typedef boost::intrusive_ptr<DeviceState> DeviceStateRef;
7c673cae
FG
238
239/**
240 * Fuse the collection of per-daemon metadata from Ceph into
241 * a view that can be queried by service type, ID or also
242 * by server (aka fqdn).
243 */
244class DaemonStateIndex
245{
11fdf7f2 246private:
3efd9988
FG
247 mutable RWLock lock = {"DaemonStateIndex", true, true, true};
248
7c673cae
FG
249 std::map<std::string, DaemonStateCollection> by_server;
250 DaemonStateCollection all;
7c673cae
FG
251 std::set<DaemonKey> updating;
252
11fdf7f2
TL
253 std::map<std::string,DeviceStateRef> devices;
254
3efd9988 255 void _erase(const DaemonKey& dmk);
7c673cae 256
11fdf7f2
TL
257 DeviceStateRef _get_or_create_device(const std::string& dev) {
258 auto p = devices.find(dev);
259 if (p != devices.end()) {
260 return p->second;
261 }
262 devices[dev] = new DeviceState(dev);
263 return devices[dev];
264 }
265 void _erase_device(DeviceStateRef d) {
266 devices.erase(d->devid);
267 }
268
269public:
3efd9988 270 DaemonStateIndex() {}
7c673cae
FG
271
272 // FIXME: shouldn't really be public, maybe construct DaemonState
273 // objects internally to avoid this.
274 PerfCounterTypes types;
275
276 void insert(DaemonStatePtr dm);
11fdf7f2 277 void _insert(DaemonStatePtr dm);
7c673cae
FG
278 bool exists(const DaemonKey &key) const;
279 DaemonStatePtr get(const DaemonKey &key);
11fdf7f2
TL
280 void rm(const DaemonKey &key);
281 void _rm(const DaemonKey &key);
3efd9988
FG
282
283 // Note that these return by value rather than reference to avoid
284 // callers needing to stay in lock while using result. Callers must
285 // still take the individual DaemonState::lock on each entry though.
7c673cae 286 DaemonStateCollection get_by_server(const std::string &hostname) const;
224ce89b 287 DaemonStateCollection get_by_service(const std::string &svc_name) const;
3efd9988
FG
288 DaemonStateCollection get_all() const {return all;}
289
290 template<typename Callback, typename...Args>
291 auto with_daemons_by_server(Callback&& cb, Args&&... args) const ->
292 decltype(cb(by_server, std::forward<Args>(args)...)) {
293 RWLock::RLocker l(lock);
294
295 return std::forward<Callback>(cb)(by_server, std::forward<Args>(args)...);
7c673cae
FG
296 }
297
11fdf7f2
TL
298 template<typename Callback, typename...Args>
299 bool with_device(const std::string& dev,
300 Callback&& cb, Args&&... args) const {
301 RWLock::RLocker l(lock);
302 auto p = devices.find(dev);
303 if (p == devices.end()) {
304 return false;
305 }
306 std::forward<Callback>(cb)(*p->second, std::forward<Args>(args)...);
307 return true;
308 }
309
310 template<typename Callback, typename...Args>
311 bool with_device_write(const std::string& dev,
312 Callback&& cb, Args&&... args) {
313 RWLock::WLocker l(lock);
314 auto p = devices.find(dev);
315 if (p == devices.end()) {
316 return false;
317 }
318 std::forward<Callback>(cb)(*p->second, std::forward<Args>(args)...);
319 if (p->second->empty()) {
320 _erase_device(p->second);
321 }
322 return true;
323 }
324
325 template<typename Callback, typename...Args>
326 void with_device_create(const std::string& dev,
327 Callback&& cb, Args&&... args) {
328 RWLock::WLocker l(lock);
329 auto d = _get_or_create_device(dev);
330 std::forward<Callback>(cb)(*d, std::forward<Args>(args)...);
331 }
332
333 template<typename Callback, typename...Args>
334 void with_devices(Callback&& cb, Args&&... args) const {
335 RWLock::RLocker l(lock);
336 for (auto& i : devices) {
337 std::forward<Callback>(cb)(*i.second, std::forward<Args>(args)...);
338 }
339 }
340
341 template<typename CallbackInitial, typename Callback, typename...Args>
342 void with_devices2(CallbackInitial&& cbi, // with lock taken
343 Callback&& cb, // for each device
344 Args&&... args) const {
345 RWLock::RLocker l(lock);
346 cbi();
347 for (auto& i : devices) {
348 std::forward<Callback>(cb)(*i.second, std::forward<Args>(args)...);
349 }
350 }
351
352 void list_devids_by_server(const std::string& server,
353 std::set<std::string> *ls) {
354 auto m = get_by_server(server);
355 for (auto& i : m) {
356 std::lock_guard l(i.second->lock);
357 for (auto& j : i.second->devices) {
358 ls->insert(j.first);
359 }
360 }
361 }
362
3efd9988
FG
363 void notify_updating(const DaemonKey &k) {
364 RWLock::WLocker l(lock);
365 updating.insert(k);
366 }
367 void clear_updating(const DaemonKey &k) {
368 RWLock::WLocker l(lock);
369 updating.erase(k);
370 }
371 bool is_updating(const DaemonKey &k) {
372 RWLock::RLocker l(lock);
373 return updating.count(k) > 0;
374 }
7c673cae 375
11fdf7f2
TL
376 void update_metadata(DaemonStatePtr state,
377 const map<string,string>& meta) {
378 // remove and re-insert in case the device metadata changed
379 RWLock::WLocker l(lock);
380 _rm(state->key);
381 {
382 Mutex::Locker l2(state->lock);
383 state->set_metadata(meta);
384 }
385 _insert(state);
386 }
387
7c673cae
FG
388 /**
389 * Remove state for all daemons of this type whose names are
390 * not present in `names_exist`. Use this function when you have
391 * a cluster map and want to ensure that anything absent in the map
392 * is also absent in this class.
393 */
224ce89b
WB
394 void cull(const std::string& svc_name,
395 const std::set<std::string>& names_exist);
92f5a8d4 396 void cull_services(const std::set<std::string>& types_exist);
7c673cae
FG
397};
398
399#endif
400