1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
4 * Ceph - scalable distributed file system
6 * Copyright (C) 2019 Red Hat
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
16 #include "include/types.h"
18 struct ConnectionReport
{
19 int rank
= -1; // mon rank this state belongs to
20 std::map
<int, bool> current
; // true if connected to the other mon
21 std::map
<int, double> history
; // [0-1]; the connection reliability
22 epoch_t epoch
= 0; // the (local) election epoch the ConnectionReport came from
23 uint64_t epoch_version
= 0; // version of the ConnectionReport within the epoch
24 void encode(bufferlist
& bl
) const {
25 ENCODE_START(1, 1, bl
);
30 encode(epoch_version
, bl
);
33 void decode(bufferlist::const_iterator
& bl
) {
39 decode(epoch_version
, bl
);
42 bool operator==(const ConnectionReport
& o
) const {
43 return o
.rank
== rank
&& o
.current
== current
&&
44 o
.history
== history
&& o
.epoch
== epoch
&&
45 o
.epoch_version
== epoch_version
;
47 friend std::ostream
& operator<<(std::ostream
&o
, const ConnectionReport
& c
);
49 void dump(ceph::Formatter
*f
) const;
50 static void generate_test_instances(std::list
<ConnectionReport
*>& o
);
52 WRITE_CLASS_ENCODER(ConnectionReport
);
57 * Get the rank of the running daemon.
58 * It can be -1, meaning unknown/invalid, or it
60 * You should not invoke the function get_total_connection_score()
61 * with an unknown rank.
63 virtual int get_my_rank() const = 0;
65 * Asks our owner to encode us and persist it to disk.
66 * Presently we do this every tenth update.
68 virtual void persist_connectivity_scores() = 0;
69 virtual ~RankProvider() {}
72 class ConnectionTracker
{
75 * Receive a report from a peer and update our internal state
76 * if the peer has newer data.
78 void receive_peer_report(const ConnectionTracker
& o
);
80 * Bump up the epoch to the specified number.
81 * Validates that it is > current epoch and resets
82 * version to 0; returns false if not.
84 bool increase_epoch(epoch_t e
);
86 * Bump up the version within our epoch.
87 * If the new version is a multiple of ten, we also persist it.
89 void increase_version();
92 * Report a connection to a peer rank has been considered alive for
93 * the given time duration. We assume the units_alive is <= the time
94 * since the previous reporting call.
95 * (Or, more precisely, we assume that the total amount of time
96 * passed in is less than or equal to the time which has actually
97 * passed -- you can report a 10-second death immediately followed
98 * by reporting 5 seconds of liveness if your metrics are delayed.)
100 void report_live_connection(int peer_rank
, double units_alive
);
102 * Report a connection to a peer rank has been considered dead for
103 * the given time duration, analogous to that above.
105 void report_dead_connection(int peer_rank
, double units_dead
);
107 * Set the half-life for dropping connection state
108 * out of the ongoing score.
109 * Whenever you add a new data point:
110 * new_score = old_score * ( 1 - units / (2d)) + (units/(2d))
111 * where units is the units reported alive (for dead, you subtract them).
113 void set_half_life(double d
) {
117 * Get the total connection score of a rank across
118 * all peers, and the count of how many electors think it's alive.
119 * For this summation, if a rank reports a peer as down its score is zero.
121 void get_total_connection_score(int peer_rank
, double *rating
,
122 int *live_count
) const;
124 * Encode this ConnectionTracker. Useful both for storing on disk
125 * and for sending off to peers for decoding and import
126 * with receive_peer_report() above.
128 void encode(bufferlist
&bl
) const;
129 void decode(bufferlist::const_iterator
& bl
);
131 * Get a bufferlist containing the ConnectionTracker.
132 * This is like encode() but holds a copy so it
133 * doesn't re-encode on every invocation.
135 const bufferlist
& get_encoded_bl();
139 std::map
<int,ConnectionReport
> peer_reports
;
140 ConnectionReport my_reports
;
144 int persist_interval
;
146 int get_my_rank() const { return rank
; }
147 ConnectionReport
*reports(int p
);
148 const ConnectionReport
*reports(int p
) const;
150 void clear_peer_reports() {
152 peer_reports
.clear();
153 my_reports
= ConnectionReport();
157 ConnectionTracker() : epoch(0), version(0), half_life(12*60*60),
158 owner(NULL
), rank(-1), persist_interval(10) {
160 ConnectionTracker(RankProvider
*o
, int rank
, double hl
,
162 epoch(0), version(0),
163 half_life(hl
), owner(o
), rank(rank
), persist_interval(persist_i
) {
164 my_reports
.rank
= rank
;
166 ConnectionTracker(const bufferlist
& bl
) :
167 epoch(0), version(0),
168 half_life(0), owner(NULL
), rank(-1)
170 auto bi
= bl
.cbegin();
173 ConnectionTracker(const ConnectionTracker
& o
) :
174 epoch(o
.epoch
), version(o
.version
),
175 half_life(o
.half_life
), owner(o
.owner
), rank(o
.rank
),
176 persist_interval(o
.persist_interval
)
178 peer_reports
= o
.peer_reports
;
179 my_reports
= o
.my_reports
;
181 void notify_reset() { clear_peer_reports(); }
182 void notify_rank_changed(int new_rank
) {
183 if (new_rank
== rank
) return;
184 peer_reports
.erase(rank
);
185 peer_reports
.erase(new_rank
);
186 my_reports
.rank
= new_rank
;
190 void notify_rank_removed(int rank_removed
);
191 friend std::ostream
& operator<<(std::ostream
& o
, const ConnectionTracker
& c
);
192 friend ConnectionReport
*get_connection_reports(ConnectionTracker
& ct
);
193 friend std::map
<int,ConnectionReport
> *get_peer_reports(ConnectionTracker
& ct
);
194 void dump(ceph::Formatter
*f
) const;
195 static void generate_test_instances(std::list
<ConnectionTracker
*>& o
);
198 WRITE_CLASS_ENCODER(ConnectionTracker
);