1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
4 * Ceph - scalable distributed file system
6 * Copyright (C) 2016 John Spray <john.spray@redhat.com>
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
14 #ifndef DAEMON_STATE_H_
15 #define DAEMON_STATE_H_
21 #include <boost/circular_buffer.hpp>
23 #include "common/RWLock.h"
24 #include "include/str_map.h"
26 #include "msg/msg_types.h"
28 // For PerfCounterType
29 #include "messages/MMgrReport.h"
30 #include "DaemonKey.h"
36 // An instance of a performance counter type, within
37 // a particular daemon.
38 class PerfCounterInstance
45 DataPoint(utime_t t_
, uint64_t v_
)
56 AvgDataPoint(utime_t t_
, uint64_t s_
, uint64_t c_
)
61 boost::circular_buffer
<DataPoint
> buffer
;
62 boost::circular_buffer
<AvgDataPoint
> avg_buffer
;
64 uint64_t get_current() const;
67 const boost::circular_buffer
<DataPoint
> & get_data() const
71 const DataPoint
& get_latest_data() const
75 const boost::circular_buffer
<AvgDataPoint
> & get_data_avg() const
79 const AvgDataPoint
& get_latest_data_avg() const
81 return avg_buffer
.back();
83 void push(utime_t t
, uint64_t const &v
);
84 void push_avg(utime_t t
, uint64_t const &s
, uint64_t const &c
);
86 PerfCounterInstance(enum perfcounter_type_d type
)
88 if (type
& PERFCOUNTER_LONGRUNAVG
)
89 avg_buffer
= boost::circular_buffer
<AvgDataPoint
>(20);
91 buffer
= boost::circular_buffer
<DataPoint
>(20);
96 typedef std::map
<std::string
, PerfCounterType
> PerfCounterTypes
;
98 // Performance counters for one daemon
99 class DaemonPerfCounters
102 // The record of perf stat types, shared between daemons
103 PerfCounterTypes
&types
;
105 explicit DaemonPerfCounters(PerfCounterTypes
&types_
)
109 std::map
<std::string
, PerfCounterInstance
> instances
;
111 void update(const MMgrReport
& report
);
119 // The state that we store about one daemon
123 ceph::mutex lock
= ceph::make_mutex("DaemonState::lock");
127 // The hostname where daemon was last seen running (extracted
128 // from the metadata)
129 std::string hostname
;
131 // The metadata (hostname, version, etc) sent from the daemon
132 std::map
<std::string
, std::string
> metadata
;
134 /// device ids -> devname, derived from metadata[device_ids]
135 std::map
<std::string
,std::string
> devices
;
137 /// device ids -> by-path, derived from metadata[device_ids]
138 std::map
<std::string
,std::string
> devices_bypath
;
140 // TODO: this can be generalized to other daemons
141 std::vector
<DaemonHealthMetric
> daemon_health_metrics
;
144 bool service_daemon
= false;
145 utime_t service_status_stamp
;
146 std::map
<std::string
, std::string
> service_status
;
147 utime_t last_service_beacon
;
150 std::map
<std::string
,std::map
<int32_t,std::string
>> config
;
152 // mon config values we failed to set
153 std::map
<std::string
,std::string
> ignored_mon_config
;
155 // compiled-in config defaults (rarely used, so we leave them encoded!)
156 bufferlist config_defaults_bl
;
157 std::map
<std::string
,std::string
> config_defaults
;
159 // The perf counters received in MMgrReport messages
160 DaemonPerfCounters perf_counters
;
162 explicit DaemonState(PerfCounterTypes
&types_
)
163 : perf_counters(types_
)
167 void set_metadata(const std::map
<std::string
,std::string
>& m
) {
169 devices_bypath
.clear();
171 auto p
= m
.find("device_ids");
173 map
<std::string
,std::string
> devs
, paths
; // devname -> id or path
174 get_str_map(p
->second
, &devs
, ",; ");
175 auto q
= m
.find("device_paths");
177 get_str_map(q
->second
, &paths
, ",; ");
179 for (auto& i
: devs
) {
180 if (i
.second
.size()) { // skip blank ids
181 devices
[i
.second
] = i
.first
; // id -> devname
182 auto j
= paths
.find(i
.first
);
183 if (j
!= paths
.end()) {
184 devices_bypath
[i
.second
] = j
->second
; // id -> path
189 p
= m
.find("hostname");
191 hostname
= p
->second
;
195 const std::map
<std::string
,std::string
>& _get_config_defaults() {
196 if (config_defaults
.empty() &&
197 config_defaults_bl
.length()) {
198 auto p
= config_defaults_bl
.cbegin();
200 decode(config_defaults
, p
);
201 } catch (buffer::error
& e
) {
204 return config_defaults
;
208 typedef std::shared_ptr
<DaemonState
> DaemonStatePtr
;
209 typedef std::map
<DaemonKey
, DaemonStatePtr
> DaemonStateCollection
;
212 struct DeviceState
: public RefCountedObject
215 /// (server,devname,path)
216 std::set
<std::tuple
<std::string
,std::string
,std::string
>> attachments
;
217 std::set
<DaemonKey
> daemons
;
219 std::map
<string
,string
> metadata
; ///< persistent metadata
221 pair
<utime_t
,utime_t
> life_expectancy
; ///< when device failure is expected
222 utime_t life_expectancy_stamp
; ///< when life expectency was recorded
223 float wear_level
= -1; ///< SSD wear level (negative if unknown)
225 void set_metadata(map
<string
,string
>&& m
);
227 void set_life_expectancy(utime_t from
, utime_t to
, utime_t now
);
228 void rm_life_expectancy();
230 void set_wear_level(float wear
);
232 string
get_life_expectancy_str(utime_t now
) const;
234 /// true of we can be safely forgotten/removed from memory
236 return daemons
.empty() && metadata
.empty();
239 void dump(Formatter
*f
) const;
240 void print(ostream
& out
) const;
243 FRIEND_MAKE_REF(DeviceState
);
244 DeviceState(const std::string
& n
) : devid(n
) {}
248 * Fuse the collection of per-daemon metadata from Ceph into
249 * a view that can be queried by service type, ID or also
250 * by server (aka fqdn).
252 class DaemonStateIndex
255 mutable ceph::shared_mutex lock
=
256 ceph::make_shared_mutex("DaemonStateIndex", true, true, true);
258 std::map
<std::string
, DaemonStateCollection
> by_server
;
259 DaemonStateCollection all
;
260 std::set
<DaemonKey
> updating
;
262 std::map
<std::string
,ceph::ref_t
<DeviceState
>> devices
;
264 void _erase(const DaemonKey
& dmk
);
266 ceph::ref_t
<DeviceState
> _get_or_create_device(const std::string
& dev
) {
267 auto em
= devices
.try_emplace(dev
, nullptr);
268 auto& d
= em
.first
->second
;
270 d
= ceph::make_ref
<DeviceState
>(dev
);
274 void _erase_device(const ceph::ref_t
<DeviceState
>& d
) {
275 devices
.erase(d
->devid
);
279 DaemonStateIndex() {}
281 // FIXME: shouldn't really be public, maybe construct DaemonState
282 // objects internally to avoid this.
283 PerfCounterTypes types
;
285 void insert(DaemonStatePtr dm
);
286 void _insert(DaemonStatePtr dm
);
287 bool exists(const DaemonKey
&key
) const;
288 DaemonStatePtr
get(const DaemonKey
&key
);
289 void rm(const DaemonKey
&key
);
290 void _rm(const DaemonKey
&key
);
292 // Note that these return by value rather than reference to avoid
293 // callers needing to stay in lock while using result. Callers must
294 // still take the individual DaemonState::lock on each entry though.
295 DaemonStateCollection
get_by_server(const std::string
&hostname
) const;
296 DaemonStateCollection
get_by_service(const std::string
&svc_name
) const;
297 DaemonStateCollection
get_all() const {return all
;}
299 template<typename Callback
, typename
...Args
>
300 auto with_daemons_by_server(Callback
&& cb
, Args
&&... args
) const ->
301 decltype(cb(by_server
, std::forward
<Args
>(args
)...)) {
302 std::shared_lock l
{lock
};
304 return std::forward
<Callback
>(cb
)(by_server
, std::forward
<Args
>(args
)...);
307 template<typename Callback
, typename
...Args
>
308 bool with_device(const std::string
& dev
,
309 Callback
&& cb
, Args
&&... args
) const {
310 std::shared_lock l
{lock
};
311 auto p
= devices
.find(dev
);
312 if (p
== devices
.end()) {
315 std::forward
<Callback
>(cb
)(*p
->second
, std::forward
<Args
>(args
)...);
319 template<typename Callback
, typename
...Args
>
320 bool with_device_write(const std::string
& dev
,
321 Callback
&& cb
, Args
&&... args
) {
322 std::unique_lock l
{lock
};
323 auto p
= devices
.find(dev
);
324 if (p
== devices
.end()) {
327 std::forward
<Callback
>(cb
)(*p
->second
, std::forward
<Args
>(args
)...);
328 if (p
->second
->empty()) {
329 _erase_device(p
->second
);
334 template<typename Callback
, typename
...Args
>
335 void with_device_create(const std::string
& dev
,
336 Callback
&& cb
, Args
&&... args
) {
337 std::unique_lock l
{lock
};
338 auto d
= _get_or_create_device(dev
);
339 std::forward
<Callback
>(cb
)(*d
, std::forward
<Args
>(args
)...);
342 template<typename Callback
, typename
...Args
>
343 void with_devices(Callback
&& cb
, Args
&&... args
) const {
344 std::shared_lock l
{lock
};
345 for (auto& i
: devices
) {
346 std::forward
<Callback
>(cb
)(*i
.second
, std::forward
<Args
>(args
)...);
350 template<typename CallbackInitial
, typename Callback
, typename
...Args
>
351 void with_devices2(CallbackInitial
&& cbi
, // with lock taken
352 Callback
&& cb
, // for each device
353 Args
&&... args
) const {
354 std::shared_lock l
{lock
};
356 for (auto& i
: devices
) {
357 std::forward
<Callback
>(cb
)(*i
.second
, std::forward
<Args
>(args
)...);
361 void list_devids_by_server(const std::string
& server
,
362 std::set
<std::string
> *ls
) {
363 auto m
= get_by_server(server
);
365 std::lock_guard
l(i
.second
->lock
);
366 for (auto& j
: i
.second
->devices
) {
372 void notify_updating(const DaemonKey
&k
) {
373 std::unique_lock l
{lock
};
376 void clear_updating(const DaemonKey
&k
) {
377 std::unique_lock l
{lock
};
380 bool is_updating(const DaemonKey
&k
) {
381 std::shared_lock l
{lock
};
382 return updating
.count(k
) > 0;
385 void update_metadata(DaemonStatePtr state
,
386 const map
<string
,string
>& meta
) {
387 // remove and re-insert in case the device metadata changed
388 std::unique_lock l
{lock
};
391 std::lock_guard l2
{state
->lock
};
392 state
->set_metadata(meta
);
398 * Remove state for all daemons of this type whose names are
399 * not present in `names_exist`. Use this function when you have
400 * a cluster map and want to ensure that anything absent in the map
401 * is also absent in this class.
403 void cull(const std::string
& svc_name
,
404 const std::set
<std::string
>& names_exist
);
405 void cull_services(const std::set
<std::string
>& types_exist
);