]>
Commit | Line | Data |
---|---|---|
f67539c2 TL |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
2 | // vim: ts=8 sw=2 smarttab | |
3 | /* | |
4 | * Ceph - scalable distributed file system | |
5 | * | |
6 | * Copyright (C) 2019 Red Hat | |
7 | * | |
8 | * This is free software; you can redistribute it and/or | |
9 | * modify it under the terms of the GNU Lesser General Public | |
10 | * License version 2.1, as published by the Free Software | |
11 | * Foundation. See file COPYING. | |
12 | * | |
13 | */ | |
14 | ||
15 | #pragma once | |
16 | #include "include/types.h" | |
17 | ||
18 | struct ConnectionReport { | |
19 | int rank = -1; // mon rank this state belongs to | |
20 | std::map<int, bool> current; // true if connected to the other mon | |
21 | std::map<int, double> history; // [0-1]; the connection reliability | |
22 | epoch_t epoch = 0; // the (local) election epoch the ConnectionReport came from | |
23 | uint64_t epoch_version = 0; // version of the ConnectionReport within the epoch | |
24 | void encode(bufferlist& bl) const { | |
25 | ENCODE_START(1, 1, bl); | |
26 | encode(rank, bl); | |
27 | encode(current, bl); | |
28 | encode(history, bl); | |
29 | encode(epoch, bl); | |
30 | encode(epoch_version, bl); | |
31 | ENCODE_FINISH(bl); | |
32 | } | |
33 | void decode(bufferlist::const_iterator& bl) { | |
34 | DECODE_START(1, bl); | |
35 | decode(rank, bl); | |
36 | decode(current, bl); | |
37 | decode(history, bl); | |
38 | decode(epoch, bl); | |
39 | decode(epoch_version, bl); | |
40 | DECODE_FINISH(bl); | |
41 | } | |
42 | bool operator==(const ConnectionReport& o) const { | |
43 | return o.rank == rank && o.current == current && | |
44 | o.history == history && o.epoch == epoch && | |
45 | o.epoch_version == epoch_version; | |
46 | } | |
47 | friend std::ostream& operator<<(std::ostream&o, const ConnectionReport& c); | |
48 | ||
49 | void dump(ceph::Formatter *f) const; | |
50 | static void generate_test_instances(std::list<ConnectionReport*>& o); | |
51 | }; | |
52 | WRITE_CLASS_ENCODER(ConnectionReport); | |
53 | ||
54 | class RankProvider { | |
55 | public: | |
56 | /** | |
57 | * Get the rank of the running daemon. | |
58 | * It can be -1, meaning unknown/invalid, or it | |
59 | * can be >1. | |
60 | * You should not invoke the function get_total_connection_score() | |
61 | * with an unknown rank. | |
62 | */ | |
63 | virtual int get_my_rank() const = 0; | |
64 | /** | |
65 | * Asks our owner to encode us and persist it to disk. | |
66 | * Presently we do this every tenth update. | |
67 | */ | |
68 | virtual void persist_connectivity_scores() = 0; | |
69 | virtual ~RankProvider() {} | |
70 | }; | |
71 | ||
72 | class ConnectionTracker { | |
73 | public: | |
74 | /** | |
75 | * Receive a report from a peer and update our internal state | |
76 | * if the peer has newer data. | |
77 | */ | |
78 | void receive_peer_report(const ConnectionTracker& o); | |
79 | /** | |
80 | * Bump up the epoch to the specified number. | |
81 | * Validates that it is > current epoch and resets | |
82 | * version to 0; returns false if not. | |
83 | */ | |
84 | bool increase_epoch(epoch_t e); | |
85 | /** | |
86 | * Bump up the version within our epoch. | |
87 | * If the new version is a multiple of ten, we also persist it. | |
88 | */ | |
89 | void increase_version(); | |
90 | ||
91 | /** | |
92 | * Report a connection to a peer rank has been considered alive for | |
93 | * the given time duration. We assume the units_alive is <= the time | |
94 | * since the previous reporting call. | |
95 | * (Or, more precisely, we assume that the total amount of time | |
96 | * passed in is less than or equal to the time which has actually | |
97 | * passed -- you can report a 10-second death immediately followed | |
98 | * by reporting 5 seconds of liveness if your metrics are delayed.) | |
99 | */ | |
100 | void report_live_connection(int peer_rank, double units_alive); | |
101 | /** | |
102 | * Report a connection to a peer rank has been considered dead for | |
103 | * the given time duration, analogous to that above. | |
104 | */ | |
105 | void report_dead_connection(int peer_rank, double units_dead); | |
106 | /** | |
107 | * Set the half-life for dropping connection state | |
108 | * out of the ongoing score. | |
109 | * Whenever you add a new data point: | |
110 | * new_score = old_score * ( 1 - units / (2d)) + (units/(2d)) | |
111 | * where units is the units reported alive (for dead, you subtract them). | |
112 | */ | |
113 | void set_half_life(double d) { | |
114 | half_life = d; | |
115 | } | |
116 | /** | |
117 | * Get the total connection score of a rank across | |
118 | * all peers, and the count of how many electors think it's alive. | |
119 | * For this summation, if a rank reports a peer as down its score is zero. | |
120 | */ | |
121 | void get_total_connection_score(int peer_rank, double *rating, | |
122 | int *live_count) const; | |
123 | /** | |
124 | * Encode this ConnectionTracker. Useful both for storing on disk | |
125 | * and for sending off to peers for decoding and import | |
126 | * with receive_peer_report() above. | |
127 | */ | |
128 | void encode(bufferlist &bl) const; | |
129 | void decode(bufferlist::const_iterator& bl); | |
130 | /** | |
131 | * Get a bufferlist containing the ConnectionTracker. | |
132 | * This is like encode() but holds a copy so it | |
133 | * doesn't re-encode on every invocation. | |
134 | */ | |
135 | const bufferlist& get_encoded_bl(); | |
136 | private: | |
137 | epoch_t epoch; | |
138 | uint64_t version; | |
20effc67 | 139 | std::map<int,ConnectionReport> peer_reports; |
f67539c2 TL |
140 | ConnectionReport my_reports; |
141 | double half_life; | |
142 | RankProvider *owner; | |
143 | int rank; | |
144 | int persist_interval; | |
145 | bufferlist encoding; | |
146 | int get_my_rank() const { return rank; } | |
147 | ConnectionReport *reports(int p); | |
148 | const ConnectionReport *reports(int p) const; | |
149 | ||
150 | void clear_peer_reports() { | |
151 | encoding.clear(); | |
152 | peer_reports.clear(); | |
153 | my_reports = ConnectionReport(); | |
154 | } | |
155 | ||
156 | public: | |
157 | ConnectionTracker() : epoch(0), version(0), half_life(12*60*60), | |
158 | owner(NULL), rank(-1), persist_interval(10) { | |
159 | } | |
160 | ConnectionTracker(RankProvider *o, int rank, double hl, | |
161 | int persist_i) : | |
162 | epoch(0), version(0), | |
163 | half_life(hl), owner(o), rank(rank), persist_interval(persist_i) { | |
164 | my_reports.rank = rank; | |
165 | } | |
166 | ConnectionTracker(const bufferlist& bl) : | |
167 | epoch(0), version(0), | |
168 | half_life(0), owner(NULL), rank(-1) | |
169 | { | |
170 | auto bi = bl.cbegin(); | |
171 | decode(bi); | |
172 | } | |
173 | ConnectionTracker(const ConnectionTracker& o) : | |
174 | epoch(o.epoch), version(o.version), | |
175 | half_life(o.half_life), owner(o.owner), rank(o.rank), | |
176 | persist_interval(o.persist_interval) | |
177 | { | |
178 | peer_reports = o.peer_reports; | |
179 | my_reports = o.my_reports; | |
180 | } | |
181 | void notify_reset() { clear_peer_reports(); } | |
182 | void notify_rank_changed(int new_rank) { | |
183 | if (new_rank == rank) return; | |
184 | peer_reports.erase(rank); | |
185 | peer_reports.erase(new_rank); | |
186 | my_reports.rank = new_rank; | |
187 | rank = new_rank; | |
188 | encoding.clear(); | |
189 | } | |
190 | void notify_rank_removed(int rank_removed); | |
191 | friend std::ostream& operator<<(std::ostream& o, const ConnectionTracker& c); | |
192 | friend ConnectionReport *get_connection_reports(ConnectionTracker& ct); | |
20effc67 | 193 | friend std::map<int,ConnectionReport> *get_peer_reports(ConnectionTracker& ct); |
f67539c2 TL |
194 | void dump(ceph::Formatter *f) const; |
195 | static void generate_test_instances(std::list<ConnectionTracker*>& o); | |
196 | }; | |
197 | ||
198 | WRITE_CLASS_ENCODER(ConnectionTracker); |