]> git.proxmox.com Git - ceph.git/blame - ceph/src/common/HeartbeatMap.h
import 15.2.0 Octopus source
[ceph.git] / ceph / src / common / HeartbeatMap.h
CommitLineData
7c673cae
FG
1// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2// vim: ts=8 sw=2 smarttab
3/*
4 * Ceph - scalable distributed file system
5 *
6 * Copyright (C) 2011 Sage Weil <sage@newdream.net>
7 *
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
12 *
13 */
14
15#ifndef CEPH_HEARTBEATMAP_H
16#define CEPH_HEARTBEATMAP_H
17
7c673cae 18#include <list>
31f18b77
FG
19#include <atomic>
20#include <string>
31f18b77 21#include <pthread.h>
7c673cae 22
11fdf7f2 23#include "common/ceph_time.h"
9f95a23c
TL
24#include "common/ceph_mutex.h"
25#include "include/common_fwd.h"
7c673cae
FG
26
27namespace ceph {
28
29/*
30 * HeartbeatMap -
31 *
32 * Maintain a set of handles for internal subsystems to periodically
33 * check in with a health check and timeout. Each user can register
34 * and get a handle they can use to set or reset a timeout.
35 *
36 * A simple is_healthy() method checks for any users who are not within
37 * their grace period for a heartbeat.
38 */
39
40struct heartbeat_handle_d {
41 const std::string name;
42 pthread_t thread_id;
11fdf7f2 43 // TODO: use atomic<time_point>, once we can ditch GCC 4.8
31f18b77 44 std::atomic<unsigned> timeout = { 0 }, suicide_timeout = { 0 };
7c673cae
FG
45 time_t grace, suicide_grace;
46 std::list<heartbeat_handle_d*>::iterator list_item;
47
48 explicit heartbeat_handle_d(const std::string& n)
49 : name(n), thread_id(0), grace(0), suicide_grace(0)
50 { }
51};
52
53class HeartbeatMap {
54 public:
55 // register/unregister
56 heartbeat_handle_d *add_worker(const std::string& name, pthread_t thread_id);
57 void remove_worker(const heartbeat_handle_d *h);
58
59 // reset the timeout so that it expects another touch within grace amount of time
11fdf7f2
TL
60 void reset_timeout(heartbeat_handle_d *h,
61 ceph::coarse_mono_clock::rep grace,
62 ceph::coarse_mono_clock::rep suicide_grace);
7c673cae
FG
63 // clear the timeout so that it's not checked on
64 void clear_timeout(heartbeat_handle_d *h);
65
66 // return false if any of the timeouts are currently expired.
67 bool is_healthy();
68
69 // touch cct->_conf->heartbeat_file if is_healthy()
70 void check_touch_file();
71
72 // get the number of unhealthy workers
73 int get_unhealthy_workers() const;
74
75 // get the number of total workers
76 int get_total_workers() const;
77
78 explicit HeartbeatMap(CephContext *cct);
79 ~HeartbeatMap();
80
81 private:
82 CephContext *m_cct;
9f95a23c
TL
83 ceph::shared_mutex m_rwlock =
84 ceph::make_shared_mutex("HeartbeatMap::m_rwlock");
11fdf7f2 85 ceph::coarse_mono_clock::time_point m_inject_unhealthy_until;
7c673cae 86 std::list<heartbeat_handle_d*> m_workers;
31f18b77
FG
87 std::atomic<unsigned> m_unhealthy_workers = { 0 };
88 std::atomic<unsigned> m_total_workers = { 0 };
7c673cae 89
11fdf7f2
TL
90 bool _check(const heartbeat_handle_d *h, const char *who,
91 ceph::coarse_mono_clock::rep now);
7c673cae
FG
92};
93
94}
95#endif