]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
2 | // vim: ts=8 sw=2 smarttab | |
3 | /* | |
4 | * Ceph - scalable distributed file system | |
5 | * | |
6 | * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net> | |
7 | * | |
8 | * This is free software; you can redistribute it and/or | |
9 | * modify it under the terms of the GNU Lesser General Public | |
10 | * License version 2.1, as published by the Free Software | |
11 | * Foundation. See file COPYING. | |
12 | * | |
13 | */ | |
14 | ||
15 | ||
16 | #ifndef CEPH_MDSMAP_H | |
17 | #define CEPH_MDSMAP_H | |
18 | ||
19 | #include <errno.h> | |
20 | ||
21 | #include "include/types.h" | |
22 | #include "common/Clock.h" | |
23 | #include "msg/Message.h" | |
224ce89b | 24 | #include "include/health.h" |
7c673cae FG |
25 | |
26 | #include <set> | |
27 | #include <map> | |
28 | #include <string> | |
31f18b77 | 29 | #include <algorithm> |
7c673cae FG |
30 | |
31 | #include "common/config.h" | |
32 | ||
33 | #include "include/CompatSet.h" | |
34 | #include "include/ceph_features.h" | |
35 | #include "common/Formatter.h" | |
36 | #include "mds/mdstypes.h" | |
37 | ||
38 | /* | |
39 | ||
40 | boot --> standby, creating, or starting. | |
41 | ||
42 | ||
43 | dne ----> creating -----> active* | |
44 | ^ ^___________/ / ^ ^ | |
45 | | / / | | |
46 | destroying / / | | |
47 | ^ / / | | |
48 | | / / | | |
49 | stopped <---- stopping* <-/ / | | |
50 | \ / | | |
51 | ----- starting* ----/ | | |
52 | | | |
53 | failed | | |
54 | \ | | |
55 | \--> replay* --> reconnect* --> rejoin* | |
56 | ||
57 | * = can fail | |
58 | ||
59 | */ | |
60 | ||
61 | class CephContext; | |
224ce89b | 62 | class health_check_map_t; |
7c673cae FG |
63 | |
64 | extern CompatSet get_mdsmap_compat_set_all(); | |
65 | extern CompatSet get_mdsmap_compat_set_default(); | |
66 | extern CompatSet get_mdsmap_compat_set_base(); // pre v0.20 | |
67 | ||
68 | #define MDS_FEATURE_INCOMPAT_BASE CompatSet::Feature(1, "base v0.20") | |
69 | #define MDS_FEATURE_INCOMPAT_CLIENTRANGES CompatSet::Feature(2, "client writeable ranges") | |
70 | #define MDS_FEATURE_INCOMPAT_FILELAYOUT CompatSet::Feature(3, "default file layouts on dirs") | |
71 | #define MDS_FEATURE_INCOMPAT_DIRINODE CompatSet::Feature(4, "dir inode in separate object") | |
72 | #define MDS_FEATURE_INCOMPAT_ENCODING CompatSet::Feature(5, "mds uses versioned encoding") | |
73 | #define MDS_FEATURE_INCOMPAT_OMAPDIRFRAG CompatSet::Feature(6, "dirfrag is stored in omap") | |
74 | #define MDS_FEATURE_INCOMPAT_INLINE CompatSet::Feature(7, "mds uses inline data") | |
75 | #define MDS_FEATURE_INCOMPAT_NOANCHOR CompatSet::Feature(8, "no anchor table") | |
76 | #define MDS_FEATURE_INCOMPAT_FILE_LAYOUT_V2 CompatSet::Feature(8, "file layout v2") | |
77 | ||
78 | #define MDS_FS_NAME_DEFAULT "cephfs" | |
79 | ||
80 | class MDSMap { | |
81 | public: | |
82 | /* These states are the union of the set of possible states of an MDS daemon, | |
83 | * and the set of possible states of an MDS rank */ | |
84 | typedef enum { | |
85 | // States of an MDS daemon not currently holding a rank | |
86 | // ==================================================== | |
87 | STATE_NULL = CEPH_MDS_STATE_NULL, // null value for fns returning this type. | |
88 | STATE_BOOT = CEPH_MDS_STATE_BOOT, // up, boot announcement. destiny unknown. | |
89 | STATE_STANDBY = CEPH_MDS_STATE_STANDBY, // up, idle. waiting for assignment by monitor. | |
90 | STATE_STANDBY_REPLAY = CEPH_MDS_STATE_STANDBY_REPLAY, // up, replaying active node, ready to take over. | |
91 | ||
92 | // States of an MDS rank, and of any MDS daemon holding that rank | |
93 | // ============================================================== | |
94 | STATE_STOPPED = CEPH_MDS_STATE_STOPPED, // down, once existed, but no subtrees. empty log. may not be held by a daemon. | |
95 | ||
96 | STATE_CREATING = CEPH_MDS_STATE_CREATING, // up, creating MDS instance (new journal, idalloc..). | |
97 | STATE_STARTING = CEPH_MDS_STATE_STARTING, // up, starting prior stopped MDS instance. | |
98 | ||
99 | STATE_REPLAY = CEPH_MDS_STATE_REPLAY, // up, starting prior failed instance. scanning journal. | |
100 | STATE_RESOLVE = CEPH_MDS_STATE_RESOLVE, // up, disambiguating distributed operations (import, rename, etc.) | |
101 | STATE_RECONNECT = CEPH_MDS_STATE_RECONNECT, // up, reconnect to clients | |
102 | STATE_REJOIN = CEPH_MDS_STATE_REJOIN, // up, replayed journal, rejoining distributed cache | |
103 | STATE_CLIENTREPLAY = CEPH_MDS_STATE_CLIENTREPLAY, // up, active | |
104 | STATE_ACTIVE = CEPH_MDS_STATE_ACTIVE, // up, active | |
105 | STATE_STOPPING = CEPH_MDS_STATE_STOPPING, // up, exporting metadata (-> standby or out) | |
106 | STATE_DNE = CEPH_MDS_STATE_DNE, // down, rank does not exist | |
107 | ||
108 | // State which a daemon may send to MDSMonitor in its beacon | |
109 | // to indicate that offline repair is required. Daemon must stop | |
110 | // immediately after indicating this state. | |
111 | STATE_DAMAGED = CEPH_MDS_STATE_DAMAGED | |
112 | ||
113 | /* | |
114 | * In addition to explicit states, an MDS rank implicitly in state: | |
115 | * - STOPPED if it is not currently associated with an MDS daemon gid but it | |
116 | * is in MDSMap::stopped | |
117 | * - FAILED if it is not currently associated with an MDS daemon gid but it | |
118 | * is in MDSMap::failed | |
119 | * - DNE if it is not currently associated with an MDS daemon gid and it is | |
120 | * missing from both MDSMap::failed and MDSMap::stopped | |
121 | */ | |
122 | } DaemonState; | |
123 | ||
124 | struct mds_info_t { | |
125 | mds_gid_t global_id; | |
126 | std::string name; | |
127 | mds_rank_t rank; | |
128 | int32_t inc; | |
129 | MDSMap::DaemonState state; | |
130 | version_t state_seq; | |
131 | entity_addr_t addr; | |
132 | utime_t laggy_since; | |
133 | mds_rank_t standby_for_rank; | |
134 | std::string standby_for_name; | |
135 | fs_cluster_id_t standby_for_fscid; | |
136 | bool standby_replay; | |
137 | std::set<mds_rank_t> export_targets; | |
c07f9fc5 | 138 | uint64_t mds_features = 0; |
7c673cae FG |
139 | |
140 | mds_info_t() : global_id(MDS_GID_NONE), rank(MDS_RANK_NONE), inc(0), | |
141 | state(STATE_STANDBY), state_seq(0), | |
142 | standby_for_rank(MDS_RANK_NONE), | |
143 | standby_for_fscid(FS_CLUSTER_ID_NONE), | |
144 | standby_replay(false) | |
145 | { } | |
146 | ||
147 | bool laggy() const { return !(laggy_since == utime_t()); } | |
148 | void clear_laggy() { laggy_since = utime_t(); } | |
149 | ||
150 | entity_inst_t get_inst() const { return entity_inst_t(entity_name_t::MDS(rank), addr); } | |
151 | ||
152 | void encode(bufferlist& bl, uint64_t features) const { | |
153 | if ((features & CEPH_FEATURE_MDSENC) == 0 ) encode_unversioned(bl); | |
154 | else encode_versioned(bl, features); | |
155 | } | |
156 | void decode(bufferlist::iterator& p); | |
157 | void dump(Formatter *f) const; | |
158 | void print_summary(ostream &out) const; | |
d2e6a577 FG |
159 | |
160 | // The long form name for use in cluster log messages` | |
161 | std::string human_name() const; | |
162 | ||
7c673cae FG |
163 | static void generate_test_instances(list<mds_info_t*>& ls); |
164 | private: | |
165 | void encode_versioned(bufferlist& bl, uint64_t features) const; | |
166 | void encode_unversioned(bufferlist& bl) const; | |
167 | }; | |
168 | ||
169 | ||
170 | protected: | |
171 | // base map | |
172 | epoch_t epoch; | |
173 | bool enabled; | |
174 | std::string fs_name; | |
175 | uint32_t flags; // flags | |
176 | epoch_t last_failure; // mds epoch of last failure | |
177 | epoch_t last_failure_osd_epoch; // osd epoch of last failure; any mds entering replay needs | |
178 | // at least this osdmap to ensure the blacklist propagates. | |
179 | utime_t created, modified; | |
180 | ||
181 | mds_rank_t tableserver; // which MDS has snaptable | |
182 | mds_rank_t root; // which MDS has root directory | |
183 | ||
184 | __u32 session_timeout; | |
185 | __u32 session_autoclose; | |
186 | uint64_t max_file_size; | |
187 | ||
31f18b77 | 188 | std::vector<int64_t> data_pools; // file data pools available to clients (via an ioctl). first is the default. |
7c673cae FG |
189 | int64_t cas_pool; // where CAS objects go |
190 | int64_t metadata_pool; // where fs metadata objects go | |
191 | ||
192 | /* | |
193 | * in: the set of logical mds #'s that define the cluster. this is the set | |
194 | * of mds's the metadata may be distributed over. | |
195 | * up: map from logical mds #'s to the addrs filling those roles. | |
196 | * failed: subset of @in that are failed. | |
197 | * stopped: set of nodes that have been initialized, but are not active. | |
198 | * | |
199 | * @up + @failed = @in. @in * @stopped = {}. | |
200 | */ | |
201 | ||
202 | mds_rank_t max_mds; /* The maximum number of active MDSes. Also, the maximum rank. */ | |
203 | mds_rank_t standby_count_wanted; | |
204 | string balancer; /* The name/version of the mantle balancer (i.e. the rados obj name) */ | |
205 | ||
206 | std::set<mds_rank_t> in; // currently defined cluster | |
207 | ||
208 | // which ranks are failed, stopped, damaged (i.e. not held by a daemon) | |
209 | std::set<mds_rank_t> failed, stopped, damaged; | |
210 | std::map<mds_rank_t, mds_gid_t> up; // who is in those roles | |
211 | std::map<mds_gid_t, mds_info_t> mds_info; | |
212 | ||
213 | uint8_t ever_allowed_features; //< bitmap of features the cluster has allowed | |
214 | uint8_t explicitly_allowed_features; //< bitmap of features explicitly enabled | |
215 | ||
216 | bool inline_data_enabled; | |
217 | ||
218 | uint64_t cached_up_features; | |
219 | ||
220 | public: | |
221 | CompatSet compat; | |
222 | ||
223 | friend class MDSMonitor; | |
224 | friend class Filesystem; | |
225 | friend class FSMap; | |
226 | ||
227 | public: | |
228 | MDSMap() | |
229 | : epoch(0), enabled(false), fs_name(MDS_FS_NAME_DEFAULT), | |
230 | flags(CEPH_MDSMAP_DEFAULTS), last_failure(0), | |
231 | last_failure_osd_epoch(0), | |
232 | tableserver(0), root(0), | |
233 | session_timeout(0), | |
234 | session_autoclose(0), | |
235 | max_file_size(0), | |
236 | cas_pool(-1), | |
237 | metadata_pool(-1), | |
238 | max_mds(0), | |
239 | standby_count_wanted(-1), | |
240 | ever_allowed_features(0), | |
241 | explicitly_allowed_features(0), | |
242 | inline_data_enabled(false), | |
243 | cached_up_features(0) | |
244 | { } | |
245 | ||
246 | bool get_inline_data_enabled() const { return inline_data_enabled; } | |
247 | void set_inline_data_enabled(bool enabled) { inline_data_enabled = enabled; } | |
248 | ||
249 | utime_t get_session_timeout() const { | |
250 | return utime_t(session_timeout,0); | |
251 | } | |
252 | uint64_t get_max_filesize() const { return max_file_size; } | |
253 | void set_max_filesize(uint64_t m) { max_file_size = m; } | |
254 | ||
255 | int get_flags() const { return flags; } | |
256 | bool test_flag(int f) const { return flags & f; } | |
257 | void set_flag(int f) { flags |= f; } | |
258 | void clear_flag(int f) { flags &= ~f; } | |
259 | ||
260 | const std::string &get_fs_name() const {return fs_name;} | |
261 | ||
262 | void set_snaps_allowed() { | |
263 | set_flag(CEPH_MDSMAP_ALLOW_SNAPS); | |
264 | ever_allowed_features |= CEPH_MDSMAP_ALLOW_SNAPS; | |
265 | explicitly_allowed_features |= CEPH_MDSMAP_ALLOW_SNAPS; | |
266 | } | |
267 | void clear_snaps_allowed() { clear_flag(CEPH_MDSMAP_ALLOW_SNAPS); } | |
268 | bool allows_snaps() const { return test_flag(CEPH_MDSMAP_ALLOW_SNAPS); } | |
269 | ||
270 | void set_multimds_allowed() { | |
271 | set_flag(CEPH_MDSMAP_ALLOW_MULTIMDS); | |
272 | ever_allowed_features |= CEPH_MDSMAP_ALLOW_MULTIMDS; | |
273 | explicitly_allowed_features |= CEPH_MDSMAP_ALLOW_MULTIMDS; | |
274 | } | |
275 | void clear_multimds_allowed() { clear_flag(CEPH_MDSMAP_ALLOW_MULTIMDS); } | |
276 | bool allows_multimds() const { return test_flag(CEPH_MDSMAP_ALLOW_MULTIMDS); } | |
277 | ||
278 | void set_dirfrags_allowed() { | |
279 | set_flag(CEPH_MDSMAP_ALLOW_DIRFRAGS); | |
280 | ever_allowed_features |= CEPH_MDSMAP_ALLOW_DIRFRAGS; | |
281 | explicitly_allowed_features |= CEPH_MDSMAP_ALLOW_DIRFRAGS; | |
282 | } | |
283 | void clear_dirfrags_allowed() { clear_flag(CEPH_MDSMAP_ALLOW_DIRFRAGS); } | |
284 | bool allows_dirfrags() const { return test_flag(CEPH_MDSMAP_ALLOW_DIRFRAGS); } | |
285 | ||
286 | epoch_t get_epoch() const { return epoch; } | |
287 | void inc_epoch() { epoch++; } | |
288 | ||
289 | bool get_enabled() const { return enabled; } | |
290 | ||
291 | const utime_t& get_created() const { return created; } | |
292 | void set_created(utime_t ct) { modified = created = ct; } | |
293 | const utime_t& get_modified() const { return modified; } | |
294 | void set_modified(utime_t mt) { modified = mt; } | |
295 | ||
296 | epoch_t get_last_failure() const { return last_failure; } | |
297 | epoch_t get_last_failure_osd_epoch() const { return last_failure_osd_epoch; } | |
298 | ||
299 | mds_rank_t get_max_mds() const { return max_mds; } | |
300 | void set_max_mds(mds_rank_t m) { max_mds = m; } | |
301 | ||
302 | mds_rank_t get_standby_count_wanted(mds_rank_t standby_daemon_count) const { | |
303 | assert(standby_daemon_count >= 0); | |
304 | std::set<mds_rank_t> s; | |
305 | get_standby_replay_mds_set(s); | |
306 | mds_rank_t standbys_avail = (mds_rank_t)s.size()+standby_daemon_count; | |
307 | mds_rank_t wanted = std::max(0, standby_count_wanted); | |
308 | return wanted > standbys_avail ? wanted - standbys_avail : 0; | |
309 | } | |
310 | void set_standby_count_wanted(mds_rank_t n) { standby_count_wanted = n; } | |
311 | bool check_health(mds_rank_t standby_daemon_count); | |
312 | ||
313 | const std::string get_balancer() const { return balancer; } | |
314 | void set_balancer(std::string val) { balancer.assign(val); } | |
315 | ||
316 | mds_rank_t get_tableserver() const { return tableserver; } | |
317 | mds_rank_t get_root() const { return root; } | |
318 | ||
31f18b77 | 319 | const std::vector<int64_t> &get_data_pools() const { return data_pools; } |
7c673cae FG |
320 | int64_t get_first_data_pool() const { return *data_pools.begin(); } |
321 | int64_t get_metadata_pool() const { return metadata_pool; } | |
322 | bool is_data_pool(int64_t poolid) const { | |
c07f9fc5 FG |
323 | auto p = std::find(data_pools.begin(), data_pools.end(), poolid); |
324 | if (p == data_pools.end()) | |
325 | return false; | |
326 | return true; | |
7c673cae FG |
327 | } |
328 | ||
329 | bool pool_in_use(int64_t poolid) const { | |
330 | return get_enabled() && (is_data_pool(poolid) || metadata_pool == poolid); | |
331 | } | |
332 | ||
333 | const std::map<mds_gid_t,mds_info_t>& get_mds_info() const { return mds_info; } | |
334 | const mds_info_t& get_mds_info_gid(mds_gid_t gid) const { | |
335 | return mds_info.at(gid); | |
336 | } | |
337 | const mds_info_t& get_mds_info(mds_rank_t m) const { | |
338 | assert(up.count(m) && mds_info.count(up.at(m))); | |
339 | return mds_info.at(up.at(m)); | |
340 | } | |
341 | mds_gid_t find_mds_gid_by_name(const std::string& s) const { | |
342 | for (std::map<mds_gid_t,mds_info_t>::const_iterator p = mds_info.begin(); | |
343 | p != mds_info.end(); | |
344 | ++p) { | |
345 | if (p->second.name == s) { | |
346 | return p->first; | |
347 | } | |
348 | } | |
349 | return MDS_GID_NONE; | |
350 | } | |
351 | ||
352 | // counts | |
353 | unsigned get_num_in_mds() const { | |
354 | return in.size(); | |
355 | } | |
356 | unsigned get_num_up_mds() const { | |
357 | return up.size(); | |
358 | } | |
31f18b77 FG |
359 | mds_rank_t get_last_in_mds() const { |
360 | auto p = in.rbegin(); | |
361 | return p == in.rend() ? MDS_RANK_NONE : *p; | |
362 | } | |
7c673cae FG |
363 | int get_num_failed_mds() const { |
364 | return failed.size(); | |
365 | } | |
366 | unsigned get_num_mds(int state) const { | |
367 | unsigned n = 0; | |
368 | for (std::map<mds_gid_t,mds_info_t>::const_iterator p = mds_info.begin(); | |
369 | p != mds_info.end(); | |
370 | ++p) | |
371 | if (p->second.state == state) ++n; | |
372 | return n; | |
373 | } | |
374 | ||
375 | // data pools | |
376 | void add_data_pool(int64_t poolid) { | |
31f18b77 | 377 | data_pools.push_back(poolid); |
7c673cae FG |
378 | } |
379 | int remove_data_pool(int64_t poolid) { | |
31f18b77 | 380 | std::vector<int64_t>::iterator p = std::find(data_pools.begin(), data_pools.end(), poolid); |
7c673cae FG |
381 | if (p == data_pools.end()) |
382 | return -ENOENT; | |
383 | data_pools.erase(p); | |
384 | return 0; | |
385 | } | |
386 | ||
387 | // sets | |
388 | void get_mds_set(std::set<mds_rank_t>& s) const { | |
389 | s = in; | |
390 | } | |
391 | void get_up_mds_set(std::set<mds_rank_t>& s) const { | |
392 | for (std::map<mds_rank_t, mds_gid_t>::const_iterator p = up.begin(); | |
393 | p != up.end(); | |
394 | ++p) | |
395 | s.insert(p->first); | |
396 | } | |
397 | void get_active_mds_set(std::set<mds_rank_t>& s) const { | |
398 | get_mds_set(s, MDSMap::STATE_ACTIVE); | |
399 | } | |
400 | void get_standby_replay_mds_set(std::set<mds_rank_t>& s) const { | |
401 | get_mds_set(s, MDSMap::STATE_STANDBY_REPLAY); | |
402 | } | |
403 | void get_failed_mds_set(std::set<mds_rank_t>& s) const { | |
404 | s = failed; | |
405 | } | |
406 | ||
407 | // features | |
408 | uint64_t get_up_features() { | |
409 | if (!cached_up_features) { | |
410 | bool first = true; | |
411 | for (std::map<mds_rank_t, mds_gid_t>::const_iterator p = up.begin(); | |
412 | p != up.end(); | |
413 | ++p) { | |
414 | std::map<mds_gid_t, mds_info_t>::const_iterator q = | |
415 | mds_info.find(p->second); | |
416 | assert(q != mds_info.end()); | |
417 | if (first) { | |
418 | cached_up_features = q->second.mds_features; | |
419 | first = false; | |
420 | } else { | |
421 | cached_up_features &= q->second.mds_features; | |
422 | } | |
423 | } | |
424 | } | |
425 | return cached_up_features; | |
426 | } | |
427 | ||
428 | /** | |
429 | * Get MDS ranks which are in but not up. | |
430 | */ | |
431 | void get_down_mds_set(std::set<mds_rank_t> *s) const | |
432 | { | |
433 | assert(s != NULL); | |
434 | s->insert(failed.begin(), failed.end()); | |
435 | s->insert(damaged.begin(), damaged.end()); | |
436 | } | |
437 | ||
438 | int get_failed() const { | |
439 | if (!failed.empty()) return *failed.begin(); | |
440 | return -1; | |
441 | } | |
442 | void get_stopped_mds_set(std::set<mds_rank_t>& s) const { | |
443 | s = stopped; | |
444 | } | |
445 | void get_recovery_mds_set(std::set<mds_rank_t>& s) const { | |
446 | s = failed; | |
447 | for (const auto& p : damaged) | |
448 | s.insert(p); | |
449 | for (const auto& p : mds_info) | |
450 | if (p.second.state >= STATE_REPLAY && p.second.state <= STATE_STOPPING) | |
451 | s.insert(p.second.rank); | |
452 | } | |
453 | ||
454 | void | |
455 | get_clientreplay_or_active_or_stopping_mds_set(std::set<mds_rank_t>& s) const { | |
456 | for (std::map<mds_gid_t, mds_info_t>::const_iterator p = mds_info.begin(); | |
457 | p != mds_info.end(); | |
458 | ++p) | |
459 | if (p->second.state >= STATE_CLIENTREPLAY && p->second.state <= STATE_STOPPING) | |
460 | s.insert(p->second.rank); | |
461 | } | |
462 | void get_mds_set(std::set<mds_rank_t>& s, DaemonState state) const { | |
463 | for (std::map<mds_gid_t, mds_info_t>::const_iterator p = mds_info.begin(); | |
464 | p != mds_info.end(); | |
465 | ++p) | |
466 | if (p->second.state == state) | |
467 | s.insert(p->second.rank); | |
468 | } | |
469 | ||
470 | void get_health(list<pair<health_status_t,std::string> >& summary, | |
471 | list<pair<health_status_t,std::string> > *detail) const; | |
472 | ||
224ce89b WB |
473 | void get_health_checks(health_check_map_t *checks) const; |
474 | ||
7c673cae FG |
475 | typedef enum |
476 | { | |
477 | AVAILABLE = 0, | |
478 | TRANSIENT_UNAVAILABLE = 1, | |
479 | STUCK_UNAVAILABLE = 2 | |
480 | ||
481 | } availability_t; | |
482 | ||
483 | /** | |
484 | * Return indication of whether cluster is available. This is a | |
485 | * heuristic for clients to see if they should bother waiting to talk to | |
486 | * MDSs, or whether they should error out at startup/mount. | |
487 | * | |
488 | * A TRANSIENT_UNAVAILABLE result indicates that the cluster is in a | |
489 | * transition state like replaying, or is potentially about the fail over. | |
490 | * Clients should wait for an updated map before making a final decision | |
491 | * about whether the filesystem is mountable. | |
492 | * | |
493 | * A STUCK_UNAVAILABLE result indicates that we can't see a way that | |
494 | * the cluster is about to recover on its own, so it'll probably require | |
495 | * administrator intervention: clients should probaly not bother trying | |
496 | * to mount. | |
497 | */ | |
498 | availability_t is_cluster_available() const; | |
499 | ||
500 | // mds states | |
501 | bool is_down(mds_rank_t m) const { return up.count(m) == 0; } | |
502 | bool is_up(mds_rank_t m) const { return up.count(m); } | |
503 | bool is_in(mds_rank_t m) const { return up.count(m) || failed.count(m); } | |
504 | bool is_out(mds_rank_t m) const { return !is_in(m); } | |
505 | ||
506 | bool is_failed(mds_rank_t m) const { return failed.count(m); } | |
507 | bool is_stopped(mds_rank_t m) const { return stopped.count(m); } | |
508 | ||
509 | bool is_dne(mds_rank_t m) const { return in.count(m) == 0; } | |
510 | bool is_dne_gid(mds_gid_t gid) const { return mds_info.count(gid) == 0; } | |
511 | ||
512 | /** | |
513 | * Get MDS rank state if the rank is up, else STATE_NULL | |
514 | */ | |
515 | DaemonState get_state(mds_rank_t m) const { | |
516 | std::map<mds_rank_t, mds_gid_t>::const_iterator u = up.find(m); | |
517 | if (u == up.end()) | |
518 | return STATE_NULL; | |
519 | return get_state_gid(u->second); | |
520 | } | |
521 | ||
522 | /** | |
523 | * Get MDS daemon status by GID | |
524 | */ | |
525 | DaemonState get_state_gid(mds_gid_t gid) const { | |
526 | std::map<mds_gid_t,mds_info_t>::const_iterator i = mds_info.find(gid); | |
527 | if (i == mds_info.end()) | |
528 | return STATE_NULL; | |
529 | return i->second.state; | |
530 | } | |
531 | ||
532 | const mds_info_t& get_info(const mds_rank_t m) const { | |
533 | return mds_info.at(up.at(m)); | |
534 | } | |
535 | const mds_info_t& get_info_gid(const mds_gid_t gid) const { | |
536 | return mds_info.at(gid); | |
537 | } | |
538 | ||
539 | bool is_boot(mds_rank_t m) const { return get_state(m) == STATE_BOOT; } | |
540 | bool is_creating(mds_rank_t m) const { return get_state(m) == STATE_CREATING; } | |
541 | bool is_starting(mds_rank_t m) const { return get_state(m) == STATE_STARTING; } | |
542 | bool is_replay(mds_rank_t m) const { return get_state(m) == STATE_REPLAY; } | |
543 | bool is_resolve(mds_rank_t m) const { return get_state(m) == STATE_RESOLVE; } | |
544 | bool is_reconnect(mds_rank_t m) const { return get_state(m) == STATE_RECONNECT; } | |
545 | bool is_rejoin(mds_rank_t m) const { return get_state(m) == STATE_REJOIN; } | |
546 | bool is_clientreplay(mds_rank_t m) const { return get_state(m) == STATE_CLIENTREPLAY; } | |
547 | bool is_active(mds_rank_t m) const { return get_state(m) == STATE_ACTIVE; } | |
548 | bool is_stopping(mds_rank_t m) const { return get_state(m) == STATE_STOPPING; } | |
549 | bool is_active_or_stopping(mds_rank_t m) const { | |
550 | return is_active(m) || is_stopping(m); | |
551 | } | |
552 | bool is_clientreplay_or_active_or_stopping(mds_rank_t m) const { | |
553 | return is_clientreplay(m) || is_active(m) || is_stopping(m); | |
554 | } | |
555 | ||
556 | bool is_followable(mds_rank_t m) const { | |
557 | return (is_resolve(m) || | |
558 | is_replay(m) || | |
559 | is_rejoin(m) || | |
560 | is_clientreplay(m) || | |
561 | is_active(m) || | |
562 | is_stopping(m)); | |
563 | } | |
564 | ||
565 | bool is_laggy_gid(mds_gid_t gid) const { | |
566 | if (!mds_info.count(gid)) | |
567 | return false; | |
568 | std::map<mds_gid_t,mds_info_t>::const_iterator p = mds_info.find(gid); | |
569 | return p->second.laggy(); | |
570 | } | |
571 | ||
572 | // degraded = some recovery in process. fixes active membership and | |
573 | // recovery_set. | |
574 | bool is_degraded() const { | |
575 | if (!failed.empty() || !damaged.empty()) | |
576 | return true; | |
577 | for (std::map<mds_gid_t,mds_info_t>::const_iterator p = mds_info.begin(); | |
578 | p != mds_info.end(); | |
579 | ++p) | |
580 | if (p->second.state >= STATE_REPLAY && p->second.state <= STATE_CLIENTREPLAY) | |
581 | return true; | |
582 | return false; | |
583 | } | |
584 | bool is_any_failed() const { | |
585 | return failed.size(); | |
586 | } | |
587 | bool is_resolving() const { | |
588 | return | |
589 | get_num_mds(STATE_RESOLVE) > 0 && | |
590 | get_num_mds(STATE_REPLAY) == 0 && | |
591 | failed.empty() && damaged.empty(); | |
592 | } | |
593 | bool is_rejoining() const { | |
594 | // nodes are rejoining cache state | |
595 | return | |
596 | get_num_mds(STATE_REJOIN) > 0 && | |
597 | get_num_mds(STATE_REPLAY) == 0 && | |
598 | get_num_mds(STATE_RECONNECT) == 0 && | |
599 | get_num_mds(STATE_RESOLVE) == 0 && | |
600 | failed.empty() && damaged.empty(); | |
601 | } | |
602 | bool is_stopped() const { | |
603 | return up.empty(); | |
604 | } | |
605 | ||
606 | /** | |
607 | * Get whether a rank is 'up', i.e. has | |
608 | * an MDS daemon's entity_inst_t associated | |
609 | * with it. | |
610 | */ | |
611 | bool have_inst(mds_rank_t m) const { | |
612 | return up.count(m); | |
613 | } | |
614 | ||
615 | /** | |
616 | * Get the MDS daemon entity_inst_t for a rank | |
617 | * known to be up. | |
618 | */ | |
619 | const entity_inst_t get_inst(mds_rank_t m) { | |
620 | assert(up.count(m)); | |
621 | return mds_info[up[m]].get_inst(); | |
622 | } | |
623 | const entity_addr_t get_addr(mds_rank_t m) { | |
624 | assert(up.count(m)); | |
625 | return mds_info[up[m]].addr; | |
626 | } | |
627 | ||
628 | /** | |
629 | * Get the MDS daemon entity_inst_t for a rank, | |
630 | * if it is up. | |
631 | * | |
632 | * @return true if the rank was up and the inst | |
633 | * was populated, else false. | |
634 | */ | |
635 | bool get_inst(mds_rank_t m, entity_inst_t& inst) { | |
636 | if (up.count(m)) { | |
637 | inst = get_inst(m); | |
638 | return true; | |
639 | } | |
640 | return false; | |
641 | } | |
642 | ||
643 | mds_rank_t get_rank_gid(mds_gid_t gid) const { | |
644 | if (mds_info.count(gid)) { | |
645 | return mds_info.at(gid).rank; | |
646 | } else { | |
647 | return MDS_RANK_NONE; | |
648 | } | |
649 | } | |
650 | ||
651 | int get_inc_gid(mds_gid_t gid) const { | |
652 | auto mds_info_entry = mds_info.find(gid); | |
653 | if (mds_info_entry != mds_info.end()) | |
654 | return mds_info_entry->second.inc; | |
655 | return -1; | |
656 | } | |
657 | void encode(bufferlist& bl, uint64_t features) const; | |
658 | void decode(bufferlist::iterator& p); | |
659 | void decode(bufferlist& bl) { | |
660 | bufferlist::iterator p = bl.begin(); | |
661 | decode(p); | |
662 | } | |
3efd9988 | 663 | void sanitize(std::function<bool(int64_t pool)> pool_exists); |
7c673cae FG |
664 | |
665 | void print(ostream& out) const; | |
666 | void print_summary(Formatter *f, ostream *out) const; | |
667 | ||
668 | void dump(Formatter *f) const; | |
669 | static void generate_test_instances(list<MDSMap*>& ls); | |
670 | ||
671 | static bool state_transition_valid(DaemonState prev, DaemonState next); | |
672 | }; | |
673 | WRITE_CLASS_ENCODER_FEATURES(MDSMap::mds_info_t) | |
674 | WRITE_CLASS_ENCODER_FEATURES(MDSMap) | |
675 | ||
676 | inline ostream& operator<<(ostream &out, const MDSMap &m) { | |
677 | m.print_summary(NULL, &out); | |
678 | return out; | |
679 | } | |
680 | ||
681 | #endif |