]> git.proxmox.com Git - ceph.git/blob - ceph/src/common/HeartbeatMap.h
import ceph quincy 17.2.6
[ceph.git] / ceph / src / common / HeartbeatMap.h
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3 /*
4 * Ceph - scalable distributed file system
5 *
6 * Copyright (C) 2011 Sage Weil <sage@newdream.net>
7 *
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
12 *
13 */
14
15 #ifndef CEPH_HEARTBEATMAP_H
16 #define CEPH_HEARTBEATMAP_H
17
18 #include <list>
19 #include <atomic>
20 #include <string>
21 #include <pthread.h>
22
23 #include "common/ceph_time.h"
24 #include "common/ceph_mutex.h"
25 #include "include/common_fwd.h"
26
27 namespace ceph {
28
29 /*
30 * HeartbeatMap -
31 *
32 * Maintain a set of handles for internal subsystems to periodically
33 * check in with a health check and timeout. Each user can register
34 * and get a handle they can use to set or reset a timeout.
35 *
36 * A simple is_healthy() method checks for any users who are not within
37 * their grace period for a heartbeat.
38 */
39
40 struct heartbeat_handle_d {
41 const std::string name;
42 pthread_t thread_id = 0;
43 using clock = ceph::coarse_mono_clock;
44 using time = ceph::coarse_mono_time;
45 std::atomic<time> timeout = clock::zero();
46 std::atomic<time> suicide_timeout = clock::zero();
47 ceph::timespan grace = ceph::timespan::zero();
48 ceph::timespan suicide_grace = ceph::timespan::zero();
49 std::list<heartbeat_handle_d*>::iterator list_item;
50
51 explicit heartbeat_handle_d(const std::string& n)
52 : name(n)
53 { }
54 };
55
56 class HeartbeatMap {
57 public:
58 // register/unregister
59 heartbeat_handle_d *add_worker(const std::string& name, pthread_t thread_id);
60 void remove_worker(const heartbeat_handle_d *h);
61
62 // reset the timeout so that it expects another touch within grace amount of time
63 void reset_timeout(heartbeat_handle_d *h,
64 ceph::timespan grace,
65 ceph::timespan suicide_grace);
66 // clear the timeout so that it's not checked on
67 void clear_timeout(heartbeat_handle_d *h);
68
69 // return false if any of the timeouts are currently expired.
70 bool is_healthy();
71
72 // touch cct->_conf->heartbeat_file if is_healthy()
73 void check_touch_file();
74
75 // get the number of unhealthy workers
76 int get_unhealthy_workers() const;
77
78 // get the number of total workers
79 int get_total_workers() const;
80
81 explicit HeartbeatMap(CephContext *cct);
82 ~HeartbeatMap();
83
84 private:
85 using clock = ceph::coarse_mono_clock;
86 CephContext *m_cct;
87 ceph::shared_mutex m_rwlock =
88 ceph::make_shared_mutex("HeartbeatMap::m_rwlock");
89 clock::time_point m_inject_unhealthy_until;
90 std::list<heartbeat_handle_d*> m_workers;
91 std::atomic<unsigned> m_unhealthy_workers = { 0 };
92 std::atomic<unsigned> m_total_workers = { 0 };
93
94 bool _check(const heartbeat_handle_d *h, const char *who,
95 ceph::coarse_mono_time now);
96 };
97
98 }
99 #endif