]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
2 | // vim: ts=8 sw=2 smarttab | |
3 | /* | |
4 | * Ceph - scalable distributed file system | |
5 | * | |
6 | * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net> | |
7 | * | |
8 | * This is free software; you can redistribute it and/or | |
9 | * modify it under the terms of the GNU Lesser General Public | |
10 | * License version 2.1, as published by the Free Software | |
11 | * Foundation. See file COPYING. | |
12 | * | |
13 | */ | |
14 | ||
15 | ||
16 | #ifndef CEPH_MDSMAP_H | |
17 | #define CEPH_MDSMAP_H | |
18 | ||
19 | #include <errno.h> | |
20 | ||
21 | #include "include/types.h" | |
22 | #include "common/Clock.h" | |
23 | #include "msg/Message.h" | |
224ce89b | 24 | #include "include/health.h" |
7c673cae FG |
25 | |
26 | #include <set> | |
27 | #include <map> | |
28 | #include <string> | |
31f18b77 | 29 | #include <algorithm> |
7c673cae FG |
30 | |
31 | #include "common/config.h" | |
32 | ||
33 | #include "include/CompatSet.h" | |
34 | #include "include/ceph_features.h" | |
35 | #include "common/Formatter.h" | |
36 | #include "mds/mdstypes.h" | |
37 | ||
38 | /* | |
39 | ||
40 | boot --> standby, creating, or starting. | |
41 | ||
42 | ||
43 | dne ----> creating -----> active* | |
44 | ^ ^___________/ / ^ ^ | |
45 | | / / | | |
46 | destroying / / | | |
47 | ^ / / | | |
48 | | / / | | |
49 | stopped <---- stopping* <-/ / | | |
50 | \ / | | |
51 | ----- starting* ----/ | | |
52 | | | |
53 | failed | | |
54 | \ | | |
55 | \--> replay* --> reconnect* --> rejoin* | |
56 | ||
57 | * = can fail | |
58 | ||
59 | */ | |
60 | ||
61 | class CephContext; | |
224ce89b | 62 | class health_check_map_t; |
7c673cae FG |
63 | |
64 | extern CompatSet get_mdsmap_compat_set_all(); | |
65 | extern CompatSet get_mdsmap_compat_set_default(); | |
66 | extern CompatSet get_mdsmap_compat_set_base(); // pre v0.20 | |
67 | ||
68 | #define MDS_FEATURE_INCOMPAT_BASE CompatSet::Feature(1, "base v0.20") | |
69 | #define MDS_FEATURE_INCOMPAT_CLIENTRANGES CompatSet::Feature(2, "client writeable ranges") | |
70 | #define MDS_FEATURE_INCOMPAT_FILELAYOUT CompatSet::Feature(3, "default file layouts on dirs") | |
71 | #define MDS_FEATURE_INCOMPAT_DIRINODE CompatSet::Feature(4, "dir inode in separate object") | |
72 | #define MDS_FEATURE_INCOMPAT_ENCODING CompatSet::Feature(5, "mds uses versioned encoding") | |
73 | #define MDS_FEATURE_INCOMPAT_OMAPDIRFRAG CompatSet::Feature(6, "dirfrag is stored in omap") | |
74 | #define MDS_FEATURE_INCOMPAT_INLINE CompatSet::Feature(7, "mds uses inline data") | |
75 | #define MDS_FEATURE_INCOMPAT_NOANCHOR CompatSet::Feature(8, "no anchor table") | |
76 | #define MDS_FEATURE_INCOMPAT_FILE_LAYOUT_V2 CompatSet::Feature(8, "file layout v2") | |
77 | ||
78 | #define MDS_FS_NAME_DEFAULT "cephfs" | |
79 | ||
80 | class MDSMap { | |
81 | public: | |
82 | /* These states are the union of the set of possible states of an MDS daemon, | |
83 | * and the set of possible states of an MDS rank */ | |
84 | typedef enum { | |
85 | // States of an MDS daemon not currently holding a rank | |
86 | // ==================================================== | |
87 | STATE_NULL = CEPH_MDS_STATE_NULL, // null value for fns returning this type. | |
88 | STATE_BOOT = CEPH_MDS_STATE_BOOT, // up, boot announcement. destiny unknown. | |
89 | STATE_STANDBY = CEPH_MDS_STATE_STANDBY, // up, idle. waiting for assignment by monitor. | |
90 | STATE_STANDBY_REPLAY = CEPH_MDS_STATE_STANDBY_REPLAY, // up, replaying active node, ready to take over. | |
91 | ||
92 | // States of an MDS rank, and of any MDS daemon holding that rank | |
93 | // ============================================================== | |
94 | STATE_STOPPED = CEPH_MDS_STATE_STOPPED, // down, once existed, but no subtrees. empty log. may not be held by a daemon. | |
95 | ||
96 | STATE_CREATING = CEPH_MDS_STATE_CREATING, // up, creating MDS instance (new journal, idalloc..). | |
97 | STATE_STARTING = CEPH_MDS_STATE_STARTING, // up, starting prior stopped MDS instance. | |
98 | ||
99 | STATE_REPLAY = CEPH_MDS_STATE_REPLAY, // up, starting prior failed instance. scanning journal. | |
100 | STATE_RESOLVE = CEPH_MDS_STATE_RESOLVE, // up, disambiguating distributed operations (import, rename, etc.) | |
101 | STATE_RECONNECT = CEPH_MDS_STATE_RECONNECT, // up, reconnect to clients | |
102 | STATE_REJOIN = CEPH_MDS_STATE_REJOIN, // up, replayed journal, rejoining distributed cache | |
103 | STATE_CLIENTREPLAY = CEPH_MDS_STATE_CLIENTREPLAY, // up, active | |
104 | STATE_ACTIVE = CEPH_MDS_STATE_ACTIVE, // up, active | |
105 | STATE_STOPPING = CEPH_MDS_STATE_STOPPING, // up, exporting metadata (-> standby or out) | |
106 | STATE_DNE = CEPH_MDS_STATE_DNE, // down, rank does not exist | |
107 | ||
108 | // State which a daemon may send to MDSMonitor in its beacon | |
109 | // to indicate that offline repair is required. Daemon must stop | |
110 | // immediately after indicating this state. | |
111 | STATE_DAMAGED = CEPH_MDS_STATE_DAMAGED | |
112 | ||
113 | /* | |
114 | * In addition to explicit states, an MDS rank implicitly in state: | |
115 | * - STOPPED if it is not currently associated with an MDS daemon gid but it | |
116 | * is in MDSMap::stopped | |
117 | * - FAILED if it is not currently associated with an MDS daemon gid but it | |
118 | * is in MDSMap::failed | |
119 | * - DNE if it is not currently associated with an MDS daemon gid and it is | |
120 | * missing from both MDSMap::failed and MDSMap::stopped | |
121 | */ | |
122 | } DaemonState; | |
123 | ||
124 | struct mds_info_t { | |
125 | mds_gid_t global_id; | |
126 | std::string name; | |
127 | mds_rank_t rank; | |
128 | int32_t inc; | |
129 | MDSMap::DaemonState state; | |
130 | version_t state_seq; | |
131 | entity_addr_t addr; | |
132 | utime_t laggy_since; | |
133 | mds_rank_t standby_for_rank; | |
134 | std::string standby_for_name; | |
135 | fs_cluster_id_t standby_for_fscid; | |
136 | bool standby_replay; | |
137 | std::set<mds_rank_t> export_targets; | |
138 | uint64_t mds_features; | |
139 | ||
140 | mds_info_t() : global_id(MDS_GID_NONE), rank(MDS_RANK_NONE), inc(0), | |
141 | state(STATE_STANDBY), state_seq(0), | |
142 | standby_for_rank(MDS_RANK_NONE), | |
143 | standby_for_fscid(FS_CLUSTER_ID_NONE), | |
144 | standby_replay(false) | |
145 | { } | |
146 | ||
147 | bool laggy() const { return !(laggy_since == utime_t()); } | |
148 | void clear_laggy() { laggy_since = utime_t(); } | |
149 | ||
150 | entity_inst_t get_inst() const { return entity_inst_t(entity_name_t::MDS(rank), addr); } | |
151 | ||
152 | void encode(bufferlist& bl, uint64_t features) const { | |
153 | if ((features & CEPH_FEATURE_MDSENC) == 0 ) encode_unversioned(bl); | |
154 | else encode_versioned(bl, features); | |
155 | } | |
156 | void decode(bufferlist::iterator& p); | |
157 | void dump(Formatter *f) const; | |
158 | void print_summary(ostream &out) const; | |
159 | static void generate_test_instances(list<mds_info_t*>& ls); | |
160 | private: | |
161 | void encode_versioned(bufferlist& bl, uint64_t features) const; | |
162 | void encode_unversioned(bufferlist& bl) const; | |
163 | }; | |
164 | ||
165 | ||
166 | protected: | |
167 | // base map | |
168 | epoch_t epoch; | |
169 | bool enabled; | |
170 | std::string fs_name; | |
171 | uint32_t flags; // flags | |
172 | epoch_t last_failure; // mds epoch of last failure | |
173 | epoch_t last_failure_osd_epoch; // osd epoch of last failure; any mds entering replay needs | |
174 | // at least this osdmap to ensure the blacklist propagates. | |
175 | utime_t created, modified; | |
176 | ||
177 | mds_rank_t tableserver; // which MDS has snaptable | |
178 | mds_rank_t root; // which MDS has root directory | |
179 | ||
180 | __u32 session_timeout; | |
181 | __u32 session_autoclose; | |
182 | uint64_t max_file_size; | |
183 | ||
31f18b77 | 184 | std::vector<int64_t> data_pools; // file data pools available to clients (via an ioctl). first is the default. |
7c673cae FG |
185 | int64_t cas_pool; // where CAS objects go |
186 | int64_t metadata_pool; // where fs metadata objects go | |
187 | ||
188 | /* | |
189 | * in: the set of logical mds #'s that define the cluster. this is the set | |
190 | * of mds's the metadata may be distributed over. | |
191 | * up: map from logical mds #'s to the addrs filling those roles. | |
192 | * failed: subset of @in that are failed. | |
193 | * stopped: set of nodes that have been initialized, but are not active. | |
194 | * | |
195 | * @up + @failed = @in. @in * @stopped = {}. | |
196 | */ | |
197 | ||
198 | mds_rank_t max_mds; /* The maximum number of active MDSes. Also, the maximum rank. */ | |
199 | mds_rank_t standby_count_wanted; | |
200 | string balancer; /* The name/version of the mantle balancer (i.e. the rados obj name) */ | |
201 | ||
202 | std::set<mds_rank_t> in; // currently defined cluster | |
203 | ||
204 | // which ranks are failed, stopped, damaged (i.e. not held by a daemon) | |
205 | std::set<mds_rank_t> failed, stopped, damaged; | |
206 | std::map<mds_rank_t, mds_gid_t> up; // who is in those roles | |
207 | std::map<mds_gid_t, mds_info_t> mds_info; | |
208 | ||
209 | uint8_t ever_allowed_features; //< bitmap of features the cluster has allowed | |
210 | uint8_t explicitly_allowed_features; //< bitmap of features explicitly enabled | |
211 | ||
212 | bool inline_data_enabled; | |
213 | ||
214 | uint64_t cached_up_features; | |
215 | ||
216 | public: | |
217 | CompatSet compat; | |
218 | ||
219 | friend class MDSMonitor; | |
220 | friend class Filesystem; | |
221 | friend class FSMap; | |
222 | ||
223 | public: | |
224 | MDSMap() | |
225 | : epoch(0), enabled(false), fs_name(MDS_FS_NAME_DEFAULT), | |
226 | flags(CEPH_MDSMAP_DEFAULTS), last_failure(0), | |
227 | last_failure_osd_epoch(0), | |
228 | tableserver(0), root(0), | |
229 | session_timeout(0), | |
230 | session_autoclose(0), | |
231 | max_file_size(0), | |
232 | cas_pool(-1), | |
233 | metadata_pool(-1), | |
234 | max_mds(0), | |
235 | standby_count_wanted(-1), | |
236 | ever_allowed_features(0), | |
237 | explicitly_allowed_features(0), | |
238 | inline_data_enabled(false), | |
239 | cached_up_features(0) | |
240 | { } | |
241 | ||
242 | bool get_inline_data_enabled() const { return inline_data_enabled; } | |
243 | void set_inline_data_enabled(bool enabled) { inline_data_enabled = enabled; } | |
244 | ||
245 | utime_t get_session_timeout() const { | |
246 | return utime_t(session_timeout,0); | |
247 | } | |
248 | uint64_t get_max_filesize() const { return max_file_size; } | |
249 | void set_max_filesize(uint64_t m) { max_file_size = m; } | |
250 | ||
251 | int get_flags() const { return flags; } | |
252 | bool test_flag(int f) const { return flags & f; } | |
253 | void set_flag(int f) { flags |= f; } | |
254 | void clear_flag(int f) { flags &= ~f; } | |
255 | ||
256 | const std::string &get_fs_name() const {return fs_name;} | |
257 | ||
258 | void set_snaps_allowed() { | |
259 | set_flag(CEPH_MDSMAP_ALLOW_SNAPS); | |
260 | ever_allowed_features |= CEPH_MDSMAP_ALLOW_SNAPS; | |
261 | explicitly_allowed_features |= CEPH_MDSMAP_ALLOW_SNAPS; | |
262 | } | |
263 | void clear_snaps_allowed() { clear_flag(CEPH_MDSMAP_ALLOW_SNAPS); } | |
264 | bool allows_snaps() const { return test_flag(CEPH_MDSMAP_ALLOW_SNAPS); } | |
265 | ||
266 | void set_multimds_allowed() { | |
267 | set_flag(CEPH_MDSMAP_ALLOW_MULTIMDS); | |
268 | ever_allowed_features |= CEPH_MDSMAP_ALLOW_MULTIMDS; | |
269 | explicitly_allowed_features |= CEPH_MDSMAP_ALLOW_MULTIMDS; | |
270 | } | |
271 | void clear_multimds_allowed() { clear_flag(CEPH_MDSMAP_ALLOW_MULTIMDS); } | |
272 | bool allows_multimds() const { return test_flag(CEPH_MDSMAP_ALLOW_MULTIMDS); } | |
273 | ||
274 | void set_dirfrags_allowed() { | |
275 | set_flag(CEPH_MDSMAP_ALLOW_DIRFRAGS); | |
276 | ever_allowed_features |= CEPH_MDSMAP_ALLOW_DIRFRAGS; | |
277 | explicitly_allowed_features |= CEPH_MDSMAP_ALLOW_DIRFRAGS; | |
278 | } | |
279 | void clear_dirfrags_allowed() { clear_flag(CEPH_MDSMAP_ALLOW_DIRFRAGS); } | |
280 | bool allows_dirfrags() const { return test_flag(CEPH_MDSMAP_ALLOW_DIRFRAGS); } | |
281 | ||
282 | epoch_t get_epoch() const { return epoch; } | |
283 | void inc_epoch() { epoch++; } | |
284 | ||
285 | bool get_enabled() const { return enabled; } | |
286 | ||
287 | const utime_t& get_created() const { return created; } | |
288 | void set_created(utime_t ct) { modified = created = ct; } | |
289 | const utime_t& get_modified() const { return modified; } | |
290 | void set_modified(utime_t mt) { modified = mt; } | |
291 | ||
292 | epoch_t get_last_failure() const { return last_failure; } | |
293 | epoch_t get_last_failure_osd_epoch() const { return last_failure_osd_epoch; } | |
294 | ||
295 | mds_rank_t get_max_mds() const { return max_mds; } | |
296 | void set_max_mds(mds_rank_t m) { max_mds = m; } | |
297 | ||
298 | mds_rank_t get_standby_count_wanted(mds_rank_t standby_daemon_count) const { | |
299 | assert(standby_daemon_count >= 0); | |
300 | std::set<mds_rank_t> s; | |
301 | get_standby_replay_mds_set(s); | |
302 | mds_rank_t standbys_avail = (mds_rank_t)s.size()+standby_daemon_count; | |
303 | mds_rank_t wanted = std::max(0, standby_count_wanted); | |
304 | return wanted > standbys_avail ? wanted - standbys_avail : 0; | |
305 | } | |
306 | void set_standby_count_wanted(mds_rank_t n) { standby_count_wanted = n; } | |
307 | bool check_health(mds_rank_t standby_daemon_count); | |
308 | ||
309 | const std::string get_balancer() const { return balancer; } | |
310 | void set_balancer(std::string val) { balancer.assign(val); } | |
311 | ||
312 | mds_rank_t get_tableserver() const { return tableserver; } | |
313 | mds_rank_t get_root() const { return root; } | |
314 | ||
31f18b77 | 315 | const std::vector<int64_t> &get_data_pools() const { return data_pools; } |
7c673cae FG |
316 | int64_t get_first_data_pool() const { return *data_pools.begin(); } |
317 | int64_t get_metadata_pool() const { return metadata_pool; } | |
318 | bool is_data_pool(int64_t poolid) const { | |
31f18b77 | 319 | return std::binary_search(data_pools.begin(), data_pools.end(), poolid); |
7c673cae FG |
320 | } |
321 | ||
322 | bool pool_in_use(int64_t poolid) const { | |
323 | return get_enabled() && (is_data_pool(poolid) || metadata_pool == poolid); | |
324 | } | |
325 | ||
326 | const std::map<mds_gid_t,mds_info_t>& get_mds_info() const { return mds_info; } | |
327 | const mds_info_t& get_mds_info_gid(mds_gid_t gid) const { | |
328 | return mds_info.at(gid); | |
329 | } | |
330 | const mds_info_t& get_mds_info(mds_rank_t m) const { | |
331 | assert(up.count(m) && mds_info.count(up.at(m))); | |
332 | return mds_info.at(up.at(m)); | |
333 | } | |
334 | mds_gid_t find_mds_gid_by_name(const std::string& s) const { | |
335 | for (std::map<mds_gid_t,mds_info_t>::const_iterator p = mds_info.begin(); | |
336 | p != mds_info.end(); | |
337 | ++p) { | |
338 | if (p->second.name == s) { | |
339 | return p->first; | |
340 | } | |
341 | } | |
342 | return MDS_GID_NONE; | |
343 | } | |
344 | ||
345 | // counts | |
346 | unsigned get_num_in_mds() const { | |
347 | return in.size(); | |
348 | } | |
349 | unsigned get_num_up_mds() const { | |
350 | return up.size(); | |
351 | } | |
31f18b77 FG |
352 | mds_rank_t get_last_in_mds() const { |
353 | auto p = in.rbegin(); | |
354 | return p == in.rend() ? MDS_RANK_NONE : *p; | |
355 | } | |
7c673cae FG |
356 | int get_num_failed_mds() const { |
357 | return failed.size(); | |
358 | } | |
359 | unsigned get_num_mds(int state) const { | |
360 | unsigned n = 0; | |
361 | for (std::map<mds_gid_t,mds_info_t>::const_iterator p = mds_info.begin(); | |
362 | p != mds_info.end(); | |
363 | ++p) | |
364 | if (p->second.state == state) ++n; | |
365 | return n; | |
366 | } | |
367 | ||
368 | // data pools | |
369 | void add_data_pool(int64_t poolid) { | |
31f18b77 | 370 | data_pools.push_back(poolid); |
7c673cae FG |
371 | } |
372 | int remove_data_pool(int64_t poolid) { | |
31f18b77 | 373 | std::vector<int64_t>::iterator p = std::find(data_pools.begin(), data_pools.end(), poolid); |
7c673cae FG |
374 | if (p == data_pools.end()) |
375 | return -ENOENT; | |
376 | data_pools.erase(p); | |
377 | return 0; | |
378 | } | |
379 | ||
380 | // sets | |
381 | void get_mds_set(std::set<mds_rank_t>& s) const { | |
382 | s = in; | |
383 | } | |
384 | void get_up_mds_set(std::set<mds_rank_t>& s) const { | |
385 | for (std::map<mds_rank_t, mds_gid_t>::const_iterator p = up.begin(); | |
386 | p != up.end(); | |
387 | ++p) | |
388 | s.insert(p->first); | |
389 | } | |
390 | void get_active_mds_set(std::set<mds_rank_t>& s) const { | |
391 | get_mds_set(s, MDSMap::STATE_ACTIVE); | |
392 | } | |
393 | void get_standby_replay_mds_set(std::set<mds_rank_t>& s) const { | |
394 | get_mds_set(s, MDSMap::STATE_STANDBY_REPLAY); | |
395 | } | |
396 | void get_failed_mds_set(std::set<mds_rank_t>& s) const { | |
397 | s = failed; | |
398 | } | |
399 | ||
400 | // features | |
401 | uint64_t get_up_features() { | |
402 | if (!cached_up_features) { | |
403 | bool first = true; | |
404 | for (std::map<mds_rank_t, mds_gid_t>::const_iterator p = up.begin(); | |
405 | p != up.end(); | |
406 | ++p) { | |
407 | std::map<mds_gid_t, mds_info_t>::const_iterator q = | |
408 | mds_info.find(p->second); | |
409 | assert(q != mds_info.end()); | |
410 | if (first) { | |
411 | cached_up_features = q->second.mds_features; | |
412 | first = false; | |
413 | } else { | |
414 | cached_up_features &= q->second.mds_features; | |
415 | } | |
416 | } | |
417 | } | |
418 | return cached_up_features; | |
419 | } | |
420 | ||
421 | /** | |
422 | * Get MDS ranks which are in but not up. | |
423 | */ | |
424 | void get_down_mds_set(std::set<mds_rank_t> *s) const | |
425 | { | |
426 | assert(s != NULL); | |
427 | s->insert(failed.begin(), failed.end()); | |
428 | s->insert(damaged.begin(), damaged.end()); | |
429 | } | |
430 | ||
431 | int get_failed() const { | |
432 | if (!failed.empty()) return *failed.begin(); | |
433 | return -1; | |
434 | } | |
435 | void get_stopped_mds_set(std::set<mds_rank_t>& s) const { | |
436 | s = stopped; | |
437 | } | |
438 | void get_recovery_mds_set(std::set<mds_rank_t>& s) const { | |
439 | s = failed; | |
440 | for (const auto& p : damaged) | |
441 | s.insert(p); | |
442 | for (const auto& p : mds_info) | |
443 | if (p.second.state >= STATE_REPLAY && p.second.state <= STATE_STOPPING) | |
444 | s.insert(p.second.rank); | |
445 | } | |
446 | ||
447 | void | |
448 | get_clientreplay_or_active_or_stopping_mds_set(std::set<mds_rank_t>& s) const { | |
449 | for (std::map<mds_gid_t, mds_info_t>::const_iterator p = mds_info.begin(); | |
450 | p != mds_info.end(); | |
451 | ++p) | |
452 | if (p->second.state >= STATE_CLIENTREPLAY && p->second.state <= STATE_STOPPING) | |
453 | s.insert(p->second.rank); | |
454 | } | |
455 | void get_mds_set(std::set<mds_rank_t>& s, DaemonState state) const { | |
456 | for (std::map<mds_gid_t, mds_info_t>::const_iterator p = mds_info.begin(); | |
457 | p != mds_info.end(); | |
458 | ++p) | |
459 | if (p->second.state == state) | |
460 | s.insert(p->second.rank); | |
461 | } | |
462 | ||
463 | void get_health(list<pair<health_status_t,std::string> >& summary, | |
464 | list<pair<health_status_t,std::string> > *detail) const; | |
465 | ||
224ce89b WB |
466 | void get_health_checks(health_check_map_t *checks) const; |
467 | ||
7c673cae FG |
468 | typedef enum |
469 | { | |
470 | AVAILABLE = 0, | |
471 | TRANSIENT_UNAVAILABLE = 1, | |
472 | STUCK_UNAVAILABLE = 2 | |
473 | ||
474 | } availability_t; | |
475 | ||
476 | /** | |
477 | * Return indication of whether cluster is available. This is a | |
478 | * heuristic for clients to see if they should bother waiting to talk to | |
479 | * MDSs, or whether they should error out at startup/mount. | |
480 | * | |
481 | * A TRANSIENT_UNAVAILABLE result indicates that the cluster is in a | |
482 | * transition state like replaying, or is potentially about the fail over. | |
483 | * Clients should wait for an updated map before making a final decision | |
484 | * about whether the filesystem is mountable. | |
485 | * | |
486 | * A STUCK_UNAVAILABLE result indicates that we can't see a way that | |
487 | * the cluster is about to recover on its own, so it'll probably require | |
488 | * administrator intervention: clients should probaly not bother trying | |
489 | * to mount. | |
490 | */ | |
491 | availability_t is_cluster_available() const; | |
492 | ||
493 | // mds states | |
494 | bool is_down(mds_rank_t m) const { return up.count(m) == 0; } | |
495 | bool is_up(mds_rank_t m) const { return up.count(m); } | |
496 | bool is_in(mds_rank_t m) const { return up.count(m) || failed.count(m); } | |
497 | bool is_out(mds_rank_t m) const { return !is_in(m); } | |
498 | ||
499 | bool is_failed(mds_rank_t m) const { return failed.count(m); } | |
500 | bool is_stopped(mds_rank_t m) const { return stopped.count(m); } | |
501 | ||
502 | bool is_dne(mds_rank_t m) const { return in.count(m) == 0; } | |
503 | bool is_dne_gid(mds_gid_t gid) const { return mds_info.count(gid) == 0; } | |
504 | ||
505 | /** | |
506 | * Get MDS rank state if the rank is up, else STATE_NULL | |
507 | */ | |
508 | DaemonState get_state(mds_rank_t m) const { | |
509 | std::map<mds_rank_t, mds_gid_t>::const_iterator u = up.find(m); | |
510 | if (u == up.end()) | |
511 | return STATE_NULL; | |
512 | return get_state_gid(u->second); | |
513 | } | |
514 | ||
515 | /** | |
516 | * Get MDS daemon status by GID | |
517 | */ | |
518 | DaemonState get_state_gid(mds_gid_t gid) const { | |
519 | std::map<mds_gid_t,mds_info_t>::const_iterator i = mds_info.find(gid); | |
520 | if (i == mds_info.end()) | |
521 | return STATE_NULL; | |
522 | return i->second.state; | |
523 | } | |
524 | ||
525 | const mds_info_t& get_info(const mds_rank_t m) const { | |
526 | return mds_info.at(up.at(m)); | |
527 | } | |
528 | const mds_info_t& get_info_gid(const mds_gid_t gid) const { | |
529 | return mds_info.at(gid); | |
530 | } | |
531 | ||
532 | bool is_boot(mds_rank_t m) const { return get_state(m) == STATE_BOOT; } | |
533 | bool is_creating(mds_rank_t m) const { return get_state(m) == STATE_CREATING; } | |
534 | bool is_starting(mds_rank_t m) const { return get_state(m) == STATE_STARTING; } | |
535 | bool is_replay(mds_rank_t m) const { return get_state(m) == STATE_REPLAY; } | |
536 | bool is_resolve(mds_rank_t m) const { return get_state(m) == STATE_RESOLVE; } | |
537 | bool is_reconnect(mds_rank_t m) const { return get_state(m) == STATE_RECONNECT; } | |
538 | bool is_rejoin(mds_rank_t m) const { return get_state(m) == STATE_REJOIN; } | |
539 | bool is_clientreplay(mds_rank_t m) const { return get_state(m) == STATE_CLIENTREPLAY; } | |
540 | bool is_active(mds_rank_t m) const { return get_state(m) == STATE_ACTIVE; } | |
541 | bool is_stopping(mds_rank_t m) const { return get_state(m) == STATE_STOPPING; } | |
542 | bool is_active_or_stopping(mds_rank_t m) const { | |
543 | return is_active(m) || is_stopping(m); | |
544 | } | |
545 | bool is_clientreplay_or_active_or_stopping(mds_rank_t m) const { | |
546 | return is_clientreplay(m) || is_active(m) || is_stopping(m); | |
547 | } | |
548 | ||
549 | bool is_followable(mds_rank_t m) const { | |
550 | return (is_resolve(m) || | |
551 | is_replay(m) || | |
552 | is_rejoin(m) || | |
553 | is_clientreplay(m) || | |
554 | is_active(m) || | |
555 | is_stopping(m)); | |
556 | } | |
557 | ||
558 | bool is_laggy_gid(mds_gid_t gid) const { | |
559 | if (!mds_info.count(gid)) | |
560 | return false; | |
561 | std::map<mds_gid_t,mds_info_t>::const_iterator p = mds_info.find(gid); | |
562 | return p->second.laggy(); | |
563 | } | |
564 | ||
565 | // degraded = some recovery in process. fixes active membership and | |
566 | // recovery_set. | |
567 | bool is_degraded() const { | |
568 | if (!failed.empty() || !damaged.empty()) | |
569 | return true; | |
570 | for (std::map<mds_gid_t,mds_info_t>::const_iterator p = mds_info.begin(); | |
571 | p != mds_info.end(); | |
572 | ++p) | |
573 | if (p->second.state >= STATE_REPLAY && p->second.state <= STATE_CLIENTREPLAY) | |
574 | return true; | |
575 | return false; | |
576 | } | |
577 | bool is_any_failed() const { | |
578 | return failed.size(); | |
579 | } | |
580 | bool is_resolving() const { | |
581 | return | |
582 | get_num_mds(STATE_RESOLVE) > 0 && | |
583 | get_num_mds(STATE_REPLAY) == 0 && | |
584 | failed.empty() && damaged.empty(); | |
585 | } | |
586 | bool is_rejoining() const { | |
587 | // nodes are rejoining cache state | |
588 | return | |
589 | get_num_mds(STATE_REJOIN) > 0 && | |
590 | get_num_mds(STATE_REPLAY) == 0 && | |
591 | get_num_mds(STATE_RECONNECT) == 0 && | |
592 | get_num_mds(STATE_RESOLVE) == 0 && | |
593 | failed.empty() && damaged.empty(); | |
594 | } | |
595 | bool is_stopped() const { | |
596 | return up.empty(); | |
597 | } | |
598 | ||
599 | /** | |
600 | * Get whether a rank is 'up', i.e. has | |
601 | * an MDS daemon's entity_inst_t associated | |
602 | * with it. | |
603 | */ | |
604 | bool have_inst(mds_rank_t m) const { | |
605 | return up.count(m); | |
606 | } | |
607 | ||
608 | /** | |
609 | * Get the MDS daemon entity_inst_t for a rank | |
610 | * known to be up. | |
611 | */ | |
612 | const entity_inst_t get_inst(mds_rank_t m) { | |
613 | assert(up.count(m)); | |
614 | return mds_info[up[m]].get_inst(); | |
615 | } | |
616 | const entity_addr_t get_addr(mds_rank_t m) { | |
617 | assert(up.count(m)); | |
618 | return mds_info[up[m]].addr; | |
619 | } | |
620 | ||
621 | /** | |
622 | * Get the MDS daemon entity_inst_t for a rank, | |
623 | * if it is up. | |
624 | * | |
625 | * @return true if the rank was up and the inst | |
626 | * was populated, else false. | |
627 | */ | |
628 | bool get_inst(mds_rank_t m, entity_inst_t& inst) { | |
629 | if (up.count(m)) { | |
630 | inst = get_inst(m); | |
631 | return true; | |
632 | } | |
633 | return false; | |
634 | } | |
635 | ||
636 | mds_rank_t get_rank_gid(mds_gid_t gid) const { | |
637 | if (mds_info.count(gid)) { | |
638 | return mds_info.at(gid).rank; | |
639 | } else { | |
640 | return MDS_RANK_NONE; | |
641 | } | |
642 | } | |
643 | ||
644 | int get_inc_gid(mds_gid_t gid) const { | |
645 | auto mds_info_entry = mds_info.find(gid); | |
646 | if (mds_info_entry != mds_info.end()) | |
647 | return mds_info_entry->second.inc; | |
648 | return -1; | |
649 | } | |
650 | void encode(bufferlist& bl, uint64_t features) const; | |
651 | void decode(bufferlist::iterator& p); | |
652 | void decode(bufferlist& bl) { | |
653 | bufferlist::iterator p = bl.begin(); | |
654 | decode(p); | |
655 | } | |
656 | ||
657 | ||
658 | void print(ostream& out) const; | |
659 | void print_summary(Formatter *f, ostream *out) const; | |
660 | ||
661 | void dump(Formatter *f) const; | |
662 | static void generate_test_instances(list<MDSMap*>& ls); | |
663 | ||
664 | static bool state_transition_valid(DaemonState prev, DaemonState next); | |
665 | }; | |
666 | WRITE_CLASS_ENCODER_FEATURES(MDSMap::mds_info_t) | |
667 | WRITE_CLASS_ENCODER_FEATURES(MDSMap) | |
668 | ||
669 | inline ostream& operator<<(ostream &out, const MDSMap &m) { | |
670 | m.print_summary(NULL, &out); | |
671 | return out; | |
672 | } | |
673 | ||
674 | #endif |