1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
4 * Ceph - scalable distributed file system
6 * Copyright (C) 2011 Sage Weil <sage@newdream.net>
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
15 #ifndef CEPH_HEARTBEATMAP_H
16 #define CEPH_HEARTBEATMAP_H
23 #include "common/ceph_time.h"
24 #include "common/ceph_mutex.h"
25 #include "include/common_fwd.h"
32 * Maintain a set of handles for internal subsystems to periodically
33 * check in with a health check and timeout. Each user can register
34 * and get a handle they can use to set or reset a timeout.
36 * A simple is_healthy() method checks for any users who are not within
37 * their grace period for a heartbeat.
40 struct heartbeat_handle_d
{
41 const std::string name
;
43 // TODO: use atomic<time_point>, once we can ditch GCC 4.8
44 std::atomic
<unsigned> timeout
= { 0 }, suicide_timeout
= { 0 };
45 time_t grace
, suicide_grace
;
46 std::list
<heartbeat_handle_d
*>::iterator list_item
;
48 explicit heartbeat_handle_d(const std::string
& n
)
49 : name(n
), thread_id(0), grace(0), suicide_grace(0)
55 // register/unregister
56 heartbeat_handle_d
*add_worker(const std::string
& name
, pthread_t thread_id
);
57 void remove_worker(const heartbeat_handle_d
*h
);
59 // reset the timeout so that it expects another touch within grace amount of time
60 void reset_timeout(heartbeat_handle_d
*h
,
61 ceph::coarse_mono_clock::rep grace
,
62 ceph::coarse_mono_clock::rep suicide_grace
);
63 // clear the timeout so that it's not checked on
64 void clear_timeout(heartbeat_handle_d
*h
);
66 // return false if any of the timeouts are currently expired.
69 // touch cct->_conf->heartbeat_file if is_healthy()
70 void check_touch_file();
72 // get the number of unhealthy workers
73 int get_unhealthy_workers() const;
75 // get the number of total workers
76 int get_total_workers() const;
78 explicit HeartbeatMap(CephContext
*cct
);
83 ceph::shared_mutex m_rwlock
=
84 ceph::make_shared_mutex("HeartbeatMap::m_rwlock");
85 ceph::coarse_mono_clock::time_point m_inject_unhealthy_until
;
86 std::list
<heartbeat_handle_d
*> m_workers
;
87 std::atomic
<unsigned> m_unhealthy_workers
= { 0 };
88 std::atomic
<unsigned> m_total_workers
= { 0 };
90 bool _check(const heartbeat_handle_d
*h
, const char *who
,
91 ceph::coarse_mono_clock::rep now
);