]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
2 | // vim: ts=8 sw=2 smarttab | |
3 | /* | |
4 | * Ceph - scalable distributed file system | |
5 | * | |
6 | * Copyright (C) 2011 Sage Weil <sage@newdream.net> | |
7 | * | |
8 | * This is free software; you can redistribute it and/or | |
9 | * modify it under the terms of the GNU Lesser General Public | |
10 | * License version 2.1, as published by the Free Software | |
11 | * Foundation. See file COPYING. | |
12 | * | |
13 | */ | |
14 | ||
15 | #ifndef CEPH_HEARTBEATMAP_H | |
16 | #define CEPH_HEARTBEATMAP_H | |
17 | ||
7c673cae | 18 | #include <list> |
31f18b77 FG |
19 | #include <atomic> |
20 | #include <string> | |
31f18b77 | 21 | #include <pthread.h> |
7c673cae | 22 | |
11fdf7f2 | 23 | #include "common/ceph_time.h" |
9f95a23c TL |
24 | #include "common/ceph_mutex.h" |
25 | #include "include/common_fwd.h" | |
7c673cae FG |
26 | |
27 | namespace ceph { | |
28 | ||
29 | /* | |
30 | * HeartbeatMap - | |
31 | * | |
32 | * Maintain a set of handles for internal subsystems to periodically | |
33 | * check in with a health check and timeout. Each user can register | |
34 | * and get a handle they can use to set or reset a timeout. | |
35 | * | |
36 | * A simple is_healthy() method checks for any users who are not within | |
37 | * their grace period for a heartbeat. | |
38 | */ | |
39 | ||
40 | struct heartbeat_handle_d { | |
41 | const std::string name; | |
42 | pthread_t thread_id; | |
11fdf7f2 | 43 | // TODO: use atomic<time_point>, once we can ditch GCC 4.8 |
31f18b77 | 44 | std::atomic<unsigned> timeout = { 0 }, suicide_timeout = { 0 }; |
7c673cae FG |
45 | time_t grace, suicide_grace; |
46 | std::list<heartbeat_handle_d*>::iterator list_item; | |
47 | ||
48 | explicit heartbeat_handle_d(const std::string& n) | |
49 | : name(n), thread_id(0), grace(0), suicide_grace(0) | |
50 | { } | |
51 | }; | |
52 | ||
53 | class HeartbeatMap { | |
54 | public: | |
55 | // register/unregister | |
56 | heartbeat_handle_d *add_worker(const std::string& name, pthread_t thread_id); | |
57 | void remove_worker(const heartbeat_handle_d *h); | |
58 | ||
59 | // reset the timeout so that it expects another touch within grace amount of time | |
11fdf7f2 TL |
60 | void reset_timeout(heartbeat_handle_d *h, |
61 | ceph::coarse_mono_clock::rep grace, | |
62 | ceph::coarse_mono_clock::rep suicide_grace); | |
7c673cae FG |
63 | // clear the timeout so that it's not checked on |
64 | void clear_timeout(heartbeat_handle_d *h); | |
65 | ||
66 | // return false if any of the timeouts are currently expired. | |
67 | bool is_healthy(); | |
68 | ||
69 | // touch cct->_conf->heartbeat_file if is_healthy() | |
70 | void check_touch_file(); | |
71 | ||
72 | // get the number of unhealthy workers | |
73 | int get_unhealthy_workers() const; | |
74 | ||
75 | // get the number of total workers | |
76 | int get_total_workers() const; | |
77 | ||
78 | explicit HeartbeatMap(CephContext *cct); | |
79 | ~HeartbeatMap(); | |
80 | ||
81 | private: | |
82 | CephContext *m_cct; | |
9f95a23c TL |
83 | ceph::shared_mutex m_rwlock = |
84 | ceph::make_shared_mutex("HeartbeatMap::m_rwlock"); | |
11fdf7f2 | 85 | ceph::coarse_mono_clock::time_point m_inject_unhealthy_until; |
7c673cae | 86 | std::list<heartbeat_handle_d*> m_workers; |
31f18b77 FG |
87 | std::atomic<unsigned> m_unhealthy_workers = { 0 }; |
88 | std::atomic<unsigned> m_total_workers = { 0 }; | |
7c673cae | 89 | |
11fdf7f2 TL |
90 | bool _check(const heartbeat_handle_d *h, const char *who, |
91 | ceph::coarse_mono_clock::rep now); | |
7c673cae FG |
92 | }; |
93 | ||
94 | } | |
95 | #endif |