]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
2 | // vim: ts=8 sw=2 smarttab | |
3 | /* | |
4 | * Ceph - scalable distributed file system | |
5 | * | |
6 | * Copyright (C) 2011 Sage Weil <sage@newdream.net> | |
7 | * | |
8 | * This is free software; you can redistribute it and/or | |
9 | * modify it under the terms of the GNU Lesser General Public | |
10 | * License version 2.1, as published by the Free Software | |
11 | * Foundation. See file COPYING. | |
12 | * | |
13 | */ | |
14 | ||
15 | #ifndef CEPH_HEARTBEATMAP_H | |
16 | #define CEPH_HEARTBEATMAP_H | |
17 | ||
7c673cae | 18 | #include <list> |
31f18b77 FG |
19 | #include <atomic> |
20 | #include <string> | |
31f18b77 | 21 | #include <pthread.h> |
7c673cae | 22 | |
11fdf7f2 | 23 | #include "common/ceph_time.h" |
9f95a23c TL |
24 | #include "common/ceph_mutex.h" |
25 | #include "include/common_fwd.h" | |
7c673cae FG |
26 | |
27 | namespace ceph { | |
28 | ||
29 | /* | |
30 | * HeartbeatMap - | |
31 | * | |
32 | * Maintain a set of handles for internal subsystems to periodically | |
33 | * check in with a health check and timeout. Each user can register | |
34 | * and get a handle they can use to set or reset a timeout. | |
35 | * | |
36 | * A simple is_healthy() method checks for any users who are not within | |
37 | * their grace period for a heartbeat. | |
38 | */ | |
39 | ||
40 | struct heartbeat_handle_d { | |
41 | const std::string name; | |
f67539c2 TL |
42 | pthread_t thread_id = 0; |
43 | using clock = ceph::coarse_mono_clock; | |
44 | using time = ceph::coarse_mono_time; | |
45 | std::atomic<time> timeout = clock::zero(); | |
46 | std::atomic<time> suicide_timeout = clock::zero(); | |
47 | ceph::timespan grace = ceph::timespan::zero(); | |
48 | ceph::timespan suicide_grace = ceph::timespan::zero(); | |
7c673cae FG |
49 | std::list<heartbeat_handle_d*>::iterator list_item; |
50 | ||
51 | explicit heartbeat_handle_d(const std::string& n) | |
f67539c2 | 52 | : name(n) |
7c673cae FG |
53 | { } |
54 | }; | |
55 | ||
56 | class HeartbeatMap { | |
57 | public: | |
58 | // register/unregister | |
59 | heartbeat_handle_d *add_worker(const std::string& name, pthread_t thread_id); | |
60 | void remove_worker(const heartbeat_handle_d *h); | |
61 | ||
62 | // reset the timeout so that it expects another touch within grace amount of time | |
11fdf7f2 | 63 | void reset_timeout(heartbeat_handle_d *h, |
f67539c2 TL |
64 | ceph::timespan grace, |
65 | ceph::timespan suicide_grace); | |
7c673cae FG |
66 | // clear the timeout so that it's not checked on |
67 | void clear_timeout(heartbeat_handle_d *h); | |
68 | ||
69 | // return false if any of the timeouts are currently expired. | |
70 | bool is_healthy(); | |
71 | ||
72 | // touch cct->_conf->heartbeat_file if is_healthy() | |
73 | void check_touch_file(); | |
74 | ||
75 | // get the number of unhealthy workers | |
76 | int get_unhealthy_workers() const; | |
77 | ||
78 | // get the number of total workers | |
79 | int get_total_workers() const; | |
80 | ||
81 | explicit HeartbeatMap(CephContext *cct); | |
82 | ~HeartbeatMap(); | |
83 | ||
84 | private: | |
f67539c2 | 85 | using clock = ceph::coarse_mono_clock; |
7c673cae | 86 | CephContext *m_cct; |
9f95a23c TL |
87 | ceph::shared_mutex m_rwlock = |
88 | ceph::make_shared_mutex("HeartbeatMap::m_rwlock"); | |
f67539c2 | 89 | clock::time_point m_inject_unhealthy_until; |
7c673cae | 90 | std::list<heartbeat_handle_d*> m_workers; |
31f18b77 FG |
91 | std::atomic<unsigned> m_unhealthy_workers = { 0 }; |
92 | std::atomic<unsigned> m_total_workers = { 0 }; | |
7c673cae | 93 | |
11fdf7f2 | 94 | bool _check(const heartbeat_handle_d *h, const char *who, |
f67539c2 | 95 | ceph::coarse_mono_time now); |
7c673cae FG |
96 | }; |
97 | ||
98 | } | |
99 | #endif |