]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
2 | // vim: ts=8 sw=2 smarttab | |
3 | /* | |
4 | * Ceph - scalable distributed file system | |
5 | * | |
6 | * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net> | |
7 | * | |
8 | * This is free software; you can redistribute it and/or | |
9 | * modify it under the terms of the GNU Lesser General Public | |
10 | * License version 2.1, as published by the Free Software | |
11 | * Foundation. See file COPYING. | |
12 | * | |
13 | */ | |
14 | ||
15 | #ifndef CEPH_MMDSBEACON_H | |
16 | #define CEPH_MMDSBEACON_H | |
17 | ||
18 | #include "messages/PaxosServiceMessage.h" | |
19 | ||
20 | #include "include/types.h" | |
21 | ||
22 | #include "mds/MDSMap.h" | |
23 | ||
24 | ||
25 | ||
26 | /** | |
27 | * Unique ID for each type of metric we can send to the mon, so that if the mon | |
28 | * knows about the IDs then it can implement special behaviour for certain | |
29 | * messages. | |
30 | */ | |
31 | enum mds_metric_t { | |
32 | MDS_HEALTH_NULL = 0, | |
33 | MDS_HEALTH_TRIM, | |
34 | MDS_HEALTH_CLIENT_RECALL, | |
35 | MDS_HEALTH_CLIENT_LATE_RELEASE, | |
36 | MDS_HEALTH_CLIENT_RECALL_MANY, | |
37 | MDS_HEALTH_CLIENT_LATE_RELEASE_MANY, | |
38 | MDS_HEALTH_CLIENT_OLDEST_TID, | |
39 | MDS_HEALTH_CLIENT_OLDEST_TID_MANY, | |
40 | MDS_HEALTH_DAMAGE, | |
41 | MDS_HEALTH_READ_ONLY, | |
42 | MDS_HEALTH_SLOW_REQUEST, | |
43 | MDS_HEALTH_CACHE_OVERSIZED | |
44 | }; | |
45 | ||
224ce89b WB |
46 | static inline const char *mds_metric_name(mds_metric_t m) |
47 | { | |
48 | switch (m) { | |
49 | case MDS_HEALTH_TRIM: return "MDS_TRIM"; | |
50 | case MDS_HEALTH_CLIENT_RECALL: return "MDS_CLIENT_RECALL"; | |
51 | case MDS_HEALTH_CLIENT_LATE_RELEASE: return "MDS_CLIENT_LATE_RELEASE"; | |
52 | case MDS_HEALTH_CLIENT_RECALL_MANY: return "MDS_CLIENT_RECALL_MANY"; | |
53 | case MDS_HEALTH_CLIENT_LATE_RELEASE_MANY: return "MDS_CLIENT_LATE_RELEASE_MANY"; | |
54 | case MDS_HEALTH_CLIENT_OLDEST_TID: return "MDS_CLIENT_OLDEST_TID"; | |
55 | case MDS_HEALTH_CLIENT_OLDEST_TID_MANY: return "MDS_CLIENT_OLDEST_TID_MANY"; | |
56 | case MDS_HEALTH_DAMAGE: return "MDS_DAMAGE"; | |
57 | case MDS_HEALTH_READ_ONLY: return "MDS_READ_ONLY"; | |
58 | case MDS_HEALTH_SLOW_REQUEST: return "MDS_SLOW_REQUEST"; | |
59 | case MDS_HEALTH_CACHE_OVERSIZED: return "MDS_CACHE_OVERSIZED"; | |
60 | default: | |
61 | return "???"; | |
62 | } | |
63 | } | |
64 | ||
65 | static inline const char *mds_metric_summary(mds_metric_t m) | |
66 | { | |
67 | switch (m) { | |
68 | case MDS_HEALTH_TRIM: | |
69 | return "%num% MDSs behind on trimming"; | |
70 | case MDS_HEALTH_CLIENT_RECALL: | |
71 | return "%num% clients failing to respond to cache pressure"; | |
72 | case MDS_HEALTH_CLIENT_LATE_RELEASE: | |
73 | return "%num% clients failing to respond to capability release"; | |
74 | case MDS_HEALTH_CLIENT_RECALL_MANY: | |
75 | return "%num% MDSs have many clients failing to respond to cache pressure"; | |
76 | case MDS_HEALTH_CLIENT_LATE_RELEASE_MANY: | |
77 | return "%num% MDSs have many clients failing to respond to capability " | |
78 | "release"; | |
79 | case MDS_HEALTH_CLIENT_OLDEST_TID: | |
80 | return "%num% clients failing to advance oldest client/flush tid"; | |
81 | case MDS_HEALTH_CLIENT_OLDEST_TID_MANY: | |
82 | return "%num% MDSs have clients failing to advance oldest client/flush tid"; | |
83 | case MDS_HEALTH_DAMAGE: | |
84 | return "%num% MDSs report damaged metadata"; | |
85 | case MDS_HEALTH_READ_ONLY: | |
86 | return "%num% MDSs are read only"; | |
87 | case MDS_HEALTH_SLOW_REQUEST: | |
88 | return "%num% MDSs report slow requests"; | |
89 | case MDS_HEALTH_CACHE_OVERSIZED: | |
90 | return "%num% MDSs report oversized cache"; | |
91 | default: | |
92 | return "???"; | |
93 | } | |
94 | } | |
95 | ||
7c673cae FG |
96 | /** |
97 | * This structure is designed to allow some flexibility in how we emit health | |
98 | * complaints, such that: | |
99 | * - The mon doesn't have to have foreknowledge of all possible metrics: we can | |
100 | * implement new messages in the MDS and have the mon pass them through to the user | |
101 | * (enables us to do complex checks inside the MDS, and allows mon to be older version | |
102 | * than MDS) | |
103 | * - The mon has enough information to perform reductions on some types of metric, for | |
104 | * example complaints about the same client from multiple MDSs where we might want | |
105 | * to reduce three "client X is stale on MDS y" metrics into one "client X is stale | |
106 | * on 3 MDSs" message. | |
107 | */ | |
108 | struct MDSHealthMetric | |
109 | { | |
110 | mds_metric_t type; | |
111 | health_status_t sev; | |
112 | std::string message; | |
113 | std::map<std::string, std::string> metadata; | |
114 | ||
115 | void encode(bufferlist& bl) const { | |
116 | ENCODE_START(1, 1, bl); | |
117 | assert(type != MDS_HEALTH_NULL); | |
118 | ::encode((uint16_t)type, bl); | |
119 | ::encode((uint8_t)sev, bl); | |
120 | ::encode(message, bl); | |
121 | ::encode(metadata, bl); | |
122 | ENCODE_FINISH(bl); | |
123 | } | |
124 | ||
125 | void decode(bufferlist::iterator& bl) { | |
126 | DECODE_START(1, bl); | |
127 | ::decode((uint16_t&)type, bl); | |
128 | assert(type != MDS_HEALTH_NULL); | |
129 | ::decode((uint8_t&)sev, bl); | |
130 | ::decode(message, bl); | |
131 | ::decode(metadata, bl); | |
132 | DECODE_FINISH(bl); | |
133 | } | |
134 | ||
135 | bool operator==(MDSHealthMetric const &other) const | |
136 | { | |
137 | return (type == other.type && sev == other.sev && message == other.message); | |
138 | } | |
139 | ||
140 | MDSHealthMetric() : type(MDS_HEALTH_NULL), sev(HEALTH_OK) {} | |
141 | MDSHealthMetric(mds_metric_t type_, health_status_t sev_, std::string const &message_) | |
142 | : type(type_), sev(sev_), message(message_) {} | |
143 | }; | |
144 | WRITE_CLASS_ENCODER(MDSHealthMetric) | |
145 | ||
146 | ||
147 | /** | |
148 | * Health metrics send by the MDS to the mon, so that the mon can generate | |
149 | * user friendly warnings about undesirable states. | |
150 | */ | |
151 | struct MDSHealth | |
152 | { | |
153 | std::list<MDSHealthMetric> metrics; | |
154 | ||
155 | void encode(bufferlist& bl) const { | |
156 | ENCODE_START(1, 1, bl); | |
157 | ::encode(metrics, bl); | |
158 | ENCODE_FINISH(bl); | |
159 | } | |
160 | ||
161 | void decode(bufferlist::iterator& bl) { | |
162 | DECODE_START(1, bl); | |
163 | ::decode(metrics, bl); | |
164 | DECODE_FINISH(bl); | |
165 | } | |
166 | ||
167 | bool operator==(MDSHealth const &other) const | |
168 | { | |
169 | return metrics == other.metrics; | |
170 | } | |
171 | }; | |
172 | WRITE_CLASS_ENCODER(MDSHealth) | |
173 | ||
174 | ||
175 | class MMDSBeacon : public PaxosServiceMessage { | |
176 | ||
177 | static const int HEAD_VERSION = 7; | |
178 | static const int COMPAT_VERSION = 6; | |
179 | ||
180 | uuid_d fsid; | |
181 | mds_gid_t global_id; | |
182 | string name; | |
183 | ||
184 | MDSMap::DaemonState state; | |
185 | version_t seq; | |
186 | ||
187 | mds_rank_t standby_for_rank; | |
188 | string standby_for_name; | |
189 | fs_cluster_id_t standby_for_fscid; | |
190 | bool standby_replay; | |
191 | ||
192 | CompatSet compat; | |
193 | ||
194 | MDSHealth health; | |
195 | ||
196 | map<string, string> sys_info; | |
197 | ||
198 | uint64_t mds_features; | |
199 | ||
200 | public: | |
201 | MMDSBeacon() | |
202 | : PaxosServiceMessage(MSG_MDS_BEACON, 0, HEAD_VERSION, COMPAT_VERSION), | |
203 | global_id(0), state(MDSMap::STATE_NULL), standby_for_rank(MDS_RANK_NONE), | |
204 | standby_for_fscid(FS_CLUSTER_ID_NONE), standby_replay(false), | |
205 | mds_features(0) | |
206 | { } | |
207 | MMDSBeacon(const uuid_d &f, mds_gid_t g, string& n, epoch_t les, MDSMap::DaemonState st, version_t se, uint64_t feat) : | |
208 | PaxosServiceMessage(MSG_MDS_BEACON, les, HEAD_VERSION, COMPAT_VERSION), | |
209 | fsid(f), global_id(g), name(n), state(st), seq(se), | |
210 | standby_for_rank(MDS_RANK_NONE), standby_for_fscid(FS_CLUSTER_ID_NONE), | |
211 | standby_replay(false), mds_features(feat) { | |
212 | } | |
213 | private: | |
214 | ~MMDSBeacon() override {} | |
215 | ||
216 | public: | |
217 | uuid_d& get_fsid() { return fsid; } | |
218 | mds_gid_t get_global_id() { return global_id; } | |
219 | string& get_name() { return name; } | |
220 | epoch_t get_last_epoch_seen() { return version; } | |
221 | MDSMap::DaemonState get_state() { return state; } | |
222 | version_t get_seq() { return seq; } | |
223 | const char *get_type_name() const override { return "mdsbeacon"; } | |
224 | mds_rank_t get_standby_for_rank() { return standby_for_rank; } | |
225 | const string& get_standby_for_name() { return standby_for_name; } | |
226 | const fs_cluster_id_t& get_standby_for_fscid() { return standby_for_fscid; } | |
227 | bool get_standby_replay() const { return standby_replay; } | |
228 | uint64_t get_mds_features() const { return mds_features; } | |
229 | ||
230 | CompatSet const& get_compat() const { return compat; } | |
231 | void set_compat(const CompatSet& c) { compat = c; } | |
232 | ||
233 | MDSHealth const& get_health() const { return health; } | |
234 | void set_health(const MDSHealth &h) { health = h; } | |
235 | ||
236 | void set_standby_for_rank(mds_rank_t r) { standby_for_rank = r; } | |
237 | void set_standby_for_name(string& n) { standby_for_name = n; } | |
238 | void set_standby_for_name(const char* c) { standby_for_name.assign(c); } | |
239 | void set_standby_for_fscid(fs_cluster_id_t f) { standby_for_fscid = f; } | |
240 | void set_standby_replay(bool r) { standby_replay = r; } | |
241 | ||
242 | const map<string, string>& get_sys_info() const { return sys_info; } | |
243 | void set_sys_info(const map<string, string>& i) { sys_info = i; } | |
244 | ||
245 | void print(ostream& out) const override { | |
246 | out << "mdsbeacon(" << global_id << "/" << name << " " << ceph_mds_state_name(state) | |
247 | << " seq " << seq << " v" << version << ")"; | |
248 | } | |
249 | ||
250 | void encode_payload(uint64_t features) override { | |
251 | paxos_encode(); | |
252 | ::encode(fsid, payload); | |
253 | ::encode(global_id, payload); | |
254 | ::encode((__u32)state, payload); | |
255 | ::encode(seq, payload); | |
256 | ::encode(name, payload); | |
257 | ::encode(standby_for_rank, payload); | |
258 | ::encode(standby_for_name, payload); | |
259 | ::encode(compat, payload); | |
260 | ::encode(health, payload); | |
261 | if (state == MDSMap::STATE_BOOT) { | |
262 | ::encode(sys_info, payload); | |
263 | } | |
264 | ::encode(mds_features, payload); | |
265 | ::encode(standby_for_fscid, payload); | |
266 | ::encode(standby_replay, payload); | |
267 | } | |
268 | void decode_payload() override { | |
269 | bufferlist::iterator p = payload.begin(); | |
270 | paxos_decode(p); | |
271 | ::decode(fsid, p); | |
272 | ::decode(global_id, p); | |
273 | ::decode((__u32&)state, p); | |
274 | ::decode(seq, p); | |
275 | ::decode(name, p); | |
276 | ::decode(standby_for_rank, p); | |
277 | ::decode(standby_for_name, p); | |
278 | ::decode(compat, p); | |
279 | ::decode(health, p); | |
280 | if (state == MDSMap::STATE_BOOT) { | |
281 | ::decode(sys_info, p); | |
282 | } | |
283 | ::decode(mds_features, p); | |
284 | ::decode(standby_for_fscid, p); | |
285 | if (header.version >= 7) { | |
286 | ::decode(standby_replay, p); | |
287 | } | |
288 | ||
289 | if (header.version < 7 && state == MDSMap::STATE_STANDBY_REPLAY) { | |
290 | // Old MDS daemons request the state, instead of explicitly | |
291 | // advertising that they are configured as a replay daemon. | |
292 | standby_replay = true; | |
293 | state = MDSMap::STATE_STANDBY; | |
294 | } | |
295 | } | |
296 | }; | |
297 | ||
298 | #endif |