]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
2 | // vim: ts=8 sw=2 smarttab | |
3 | /* | |
4 | * Ceph - scalable distributed file system | |
5 | * | |
6 | * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net> | |
7 | * | |
8 | * This is free software; you can redistribute it and/or | |
9 | * modify it under the terms of the GNU Lesser General Public | |
10 | * License version 2.1, as published by the Free Software | |
11 | * Foundation. See file COPYING. | |
12 | * | |
13 | */ | |
14 | ||
15 | ||
16 | #ifndef CEPH_MDSMAP_H | |
17 | #define CEPH_MDSMAP_H | |
18 | ||
94b18763 FG |
19 | #include <algorithm> |
20 | #include <map> | |
21 | #include <set> | |
22 | #include <string> | |
23 | #include <boost/utility/string_view.hpp> | |
24 | ||
7c673cae FG |
25 | #include <errno.h> |
26 | ||
27 | #include "include/types.h" | |
28 | #include "common/Clock.h" | |
29 | #include "msg/Message.h" | |
224ce89b | 30 | #include "include/health.h" |
7c673cae | 31 | |
7c673cae FG |
32 | #include "common/config.h" |
33 | ||
34 | #include "include/CompatSet.h" | |
35 | #include "include/ceph_features.h" | |
36 | #include "common/Formatter.h" | |
37 | #include "mds/mdstypes.h" | |
38 | ||
39 | /* | |
40 | ||
41 | boot --> standby, creating, or starting. | |
42 | ||
43 | ||
44 | dne ----> creating -----> active* | |
45 | ^ ^___________/ / ^ ^ | |
46 | | / / | | |
47 | destroying / / | | |
48 | ^ / / | | |
49 | | / / | | |
50 | stopped <---- stopping* <-/ / | | |
51 | \ / | | |
52 | ----- starting* ----/ | | |
53 | | | |
54 | failed | | |
55 | \ | | |
56 | \--> replay* --> reconnect* --> rejoin* | |
57 | ||
58 | * = can fail | |
59 | ||
60 | */ | |
61 | ||
62 | class CephContext; | |
224ce89b | 63 | class health_check_map_t; |
7c673cae FG |
64 | |
65 | extern CompatSet get_mdsmap_compat_set_all(); | |
66 | extern CompatSet get_mdsmap_compat_set_default(); | |
67 | extern CompatSet get_mdsmap_compat_set_base(); // pre v0.20 | |
68 | ||
69 | #define MDS_FEATURE_INCOMPAT_BASE CompatSet::Feature(1, "base v0.20") | |
70 | #define MDS_FEATURE_INCOMPAT_CLIENTRANGES CompatSet::Feature(2, "client writeable ranges") | |
71 | #define MDS_FEATURE_INCOMPAT_FILELAYOUT CompatSet::Feature(3, "default file layouts on dirs") | |
72 | #define MDS_FEATURE_INCOMPAT_DIRINODE CompatSet::Feature(4, "dir inode in separate object") | |
73 | #define MDS_FEATURE_INCOMPAT_ENCODING CompatSet::Feature(5, "mds uses versioned encoding") | |
74 | #define MDS_FEATURE_INCOMPAT_OMAPDIRFRAG CompatSet::Feature(6, "dirfrag is stored in omap") | |
75 | #define MDS_FEATURE_INCOMPAT_INLINE CompatSet::Feature(7, "mds uses inline data") | |
76 | #define MDS_FEATURE_INCOMPAT_NOANCHOR CompatSet::Feature(8, "no anchor table") | |
b32b8144 | 77 | #define MDS_FEATURE_INCOMPAT_FILE_LAYOUT_V2 CompatSet::Feature(9, "file layout v2") |
7c673cae FG |
78 | |
79 | #define MDS_FS_NAME_DEFAULT "cephfs" | |
80 | ||
81 | class MDSMap { | |
82 | public: | |
83 | /* These states are the union of the set of possible states of an MDS daemon, | |
84 | * and the set of possible states of an MDS rank */ | |
85 | typedef enum { | |
86 | // States of an MDS daemon not currently holding a rank | |
87 | // ==================================================== | |
88 | STATE_NULL = CEPH_MDS_STATE_NULL, // null value for fns returning this type. | |
89 | STATE_BOOT = CEPH_MDS_STATE_BOOT, // up, boot announcement. destiny unknown. | |
90 | STATE_STANDBY = CEPH_MDS_STATE_STANDBY, // up, idle. waiting for assignment by monitor. | |
91 | STATE_STANDBY_REPLAY = CEPH_MDS_STATE_STANDBY_REPLAY, // up, replaying active node, ready to take over. | |
92 | ||
93 | // States of an MDS rank, and of any MDS daemon holding that rank | |
94 | // ============================================================== | |
95 | STATE_STOPPED = CEPH_MDS_STATE_STOPPED, // down, once existed, but no subtrees. empty log. may not be held by a daemon. | |
96 | ||
97 | STATE_CREATING = CEPH_MDS_STATE_CREATING, // up, creating MDS instance (new journal, idalloc..). | |
98 | STATE_STARTING = CEPH_MDS_STATE_STARTING, // up, starting prior stopped MDS instance. | |
99 | ||
100 | STATE_REPLAY = CEPH_MDS_STATE_REPLAY, // up, starting prior failed instance. scanning journal. | |
101 | STATE_RESOLVE = CEPH_MDS_STATE_RESOLVE, // up, disambiguating distributed operations (import, rename, etc.) | |
102 | STATE_RECONNECT = CEPH_MDS_STATE_RECONNECT, // up, reconnect to clients | |
103 | STATE_REJOIN = CEPH_MDS_STATE_REJOIN, // up, replayed journal, rejoining distributed cache | |
104 | STATE_CLIENTREPLAY = CEPH_MDS_STATE_CLIENTREPLAY, // up, active | |
105 | STATE_ACTIVE = CEPH_MDS_STATE_ACTIVE, // up, active | |
106 | STATE_STOPPING = CEPH_MDS_STATE_STOPPING, // up, exporting metadata (-> standby or out) | |
107 | STATE_DNE = CEPH_MDS_STATE_DNE, // down, rank does not exist | |
108 | ||
109 | // State which a daemon may send to MDSMonitor in its beacon | |
110 | // to indicate that offline repair is required. Daemon must stop | |
111 | // immediately after indicating this state. | |
112 | STATE_DAMAGED = CEPH_MDS_STATE_DAMAGED | |
113 | ||
114 | /* | |
115 | * In addition to explicit states, an MDS rank implicitly in state: | |
116 | * - STOPPED if it is not currently associated with an MDS daemon gid but it | |
117 | * is in MDSMap::stopped | |
118 | * - FAILED if it is not currently associated with an MDS daemon gid but it | |
119 | * is in MDSMap::failed | |
120 | * - DNE if it is not currently associated with an MDS daemon gid and it is | |
121 | * missing from both MDSMap::failed and MDSMap::stopped | |
122 | */ | |
123 | } DaemonState; | |
124 | ||
125 | struct mds_info_t { | |
126 | mds_gid_t global_id; | |
127 | std::string name; | |
128 | mds_rank_t rank; | |
129 | int32_t inc; | |
130 | MDSMap::DaemonState state; | |
131 | version_t state_seq; | |
132 | entity_addr_t addr; | |
133 | utime_t laggy_since; | |
134 | mds_rank_t standby_for_rank; | |
135 | std::string standby_for_name; | |
136 | fs_cluster_id_t standby_for_fscid; | |
137 | bool standby_replay; | |
138 | std::set<mds_rank_t> export_targets; | |
c07f9fc5 | 139 | uint64_t mds_features = 0; |
7c673cae FG |
140 | |
141 | mds_info_t() : global_id(MDS_GID_NONE), rank(MDS_RANK_NONE), inc(0), | |
142 | state(STATE_STANDBY), state_seq(0), | |
143 | standby_for_rank(MDS_RANK_NONE), | |
144 | standby_for_fscid(FS_CLUSTER_ID_NONE), | |
145 | standby_replay(false) | |
146 | { } | |
147 | ||
148 | bool laggy() const { return !(laggy_since == utime_t()); } | |
149 | void clear_laggy() { laggy_since = utime_t(); } | |
150 | ||
151 | entity_inst_t get_inst() const { return entity_inst_t(entity_name_t::MDS(rank), addr); } | |
152 | ||
153 | void encode(bufferlist& bl, uint64_t features) const { | |
154 | if ((features & CEPH_FEATURE_MDSENC) == 0 ) encode_unversioned(bl); | |
155 | else encode_versioned(bl, features); | |
156 | } | |
157 | void decode(bufferlist::iterator& p); | |
158 | void dump(Formatter *f) const; | |
159 | void print_summary(ostream &out) const; | |
d2e6a577 FG |
160 | |
161 | // The long form name for use in cluster log messages` | |
162 | std::string human_name() const; | |
163 | ||
7c673cae FG |
164 | static void generate_test_instances(list<mds_info_t*>& ls); |
165 | private: | |
166 | void encode_versioned(bufferlist& bl, uint64_t features) const; | |
167 | void encode_unversioned(bufferlist& bl) const; | |
168 | }; | |
169 | ||
170 | ||
171 | protected: | |
172 | // base map | |
173 | epoch_t epoch; | |
174 | bool enabled; | |
175 | std::string fs_name; | |
176 | uint32_t flags; // flags | |
177 | epoch_t last_failure; // mds epoch of last failure | |
178 | epoch_t last_failure_osd_epoch; // osd epoch of last failure; any mds entering replay needs | |
179 | // at least this osdmap to ensure the blacklist propagates. | |
180 | utime_t created, modified; | |
181 | ||
182 | mds_rank_t tableserver; // which MDS has snaptable | |
183 | mds_rank_t root; // which MDS has root directory | |
184 | ||
185 | __u32 session_timeout; | |
186 | __u32 session_autoclose; | |
187 | uint64_t max_file_size; | |
188 | ||
31f18b77 | 189 | std::vector<int64_t> data_pools; // file data pools available to clients (via an ioctl). first is the default. |
7c673cae FG |
190 | int64_t cas_pool; // where CAS objects go |
191 | int64_t metadata_pool; // where fs metadata objects go | |
192 | ||
193 | /* | |
194 | * in: the set of logical mds #'s that define the cluster. this is the set | |
195 | * of mds's the metadata may be distributed over. | |
196 | * up: map from logical mds #'s to the addrs filling those roles. | |
197 | * failed: subset of @in that are failed. | |
198 | * stopped: set of nodes that have been initialized, but are not active. | |
199 | * | |
200 | * @up + @failed = @in. @in * @stopped = {}. | |
201 | */ | |
202 | ||
203 | mds_rank_t max_mds; /* The maximum number of active MDSes. Also, the maximum rank. */ | |
204 | mds_rank_t standby_count_wanted; | |
205 | string balancer; /* The name/version of the mantle balancer (i.e. the rados obj name) */ | |
206 | ||
207 | std::set<mds_rank_t> in; // currently defined cluster | |
208 | ||
209 | // which ranks are failed, stopped, damaged (i.e. not held by a daemon) | |
210 | std::set<mds_rank_t> failed, stopped, damaged; | |
211 | std::map<mds_rank_t, mds_gid_t> up; // who is in those roles | |
212 | std::map<mds_gid_t, mds_info_t> mds_info; | |
213 | ||
214 | uint8_t ever_allowed_features; //< bitmap of features the cluster has allowed | |
215 | uint8_t explicitly_allowed_features; //< bitmap of features explicitly enabled | |
216 | ||
217 | bool inline_data_enabled; | |
218 | ||
219 | uint64_t cached_up_features; | |
220 | ||
221 | public: | |
222 | CompatSet compat; | |
223 | ||
224 | friend class MDSMonitor; | |
225 | friend class Filesystem; | |
226 | friend class FSMap; | |
227 | ||
228 | public: | |
229 | MDSMap() | |
230 | : epoch(0), enabled(false), fs_name(MDS_FS_NAME_DEFAULT), | |
231 | flags(CEPH_MDSMAP_DEFAULTS), last_failure(0), | |
232 | last_failure_osd_epoch(0), | |
233 | tableserver(0), root(0), | |
234 | session_timeout(0), | |
235 | session_autoclose(0), | |
236 | max_file_size(0), | |
237 | cas_pool(-1), | |
238 | metadata_pool(-1), | |
239 | max_mds(0), | |
240 | standby_count_wanted(-1), | |
241 | ever_allowed_features(0), | |
242 | explicitly_allowed_features(0), | |
243 | inline_data_enabled(false), | |
244 | cached_up_features(0) | |
245 | { } | |
246 | ||
247 | bool get_inline_data_enabled() const { return inline_data_enabled; } | |
248 | void set_inline_data_enabled(bool enabled) { inline_data_enabled = enabled; } | |
249 | ||
250 | utime_t get_session_timeout() const { | |
251 | return utime_t(session_timeout,0); | |
252 | } | |
b32b8144 FG |
253 | |
254 | utime_t get_session_autoclose() const { | |
255 | return utime_t(session_autoclose, 0); | |
256 | } | |
257 | ||
7c673cae FG |
258 | uint64_t get_max_filesize() const { return max_file_size; } |
259 | void set_max_filesize(uint64_t m) { max_file_size = m; } | |
260 | ||
261 | int get_flags() const { return flags; } | |
262 | bool test_flag(int f) const { return flags & f; } | |
263 | void set_flag(int f) { flags |= f; } | |
264 | void clear_flag(int f) { flags &= ~f; } | |
265 | ||
94b18763 | 266 | boost::string_view get_fs_name() const {return fs_name;} |
7c673cae FG |
267 | |
268 | void set_snaps_allowed() { | |
269 | set_flag(CEPH_MDSMAP_ALLOW_SNAPS); | |
270 | ever_allowed_features |= CEPH_MDSMAP_ALLOW_SNAPS; | |
271 | explicitly_allowed_features |= CEPH_MDSMAP_ALLOW_SNAPS; | |
272 | } | |
273 | void clear_snaps_allowed() { clear_flag(CEPH_MDSMAP_ALLOW_SNAPS); } | |
274 | bool allows_snaps() const { return test_flag(CEPH_MDSMAP_ALLOW_SNAPS); } | |
275 | ||
276 | void set_multimds_allowed() { | |
277 | set_flag(CEPH_MDSMAP_ALLOW_MULTIMDS); | |
278 | ever_allowed_features |= CEPH_MDSMAP_ALLOW_MULTIMDS; | |
279 | explicitly_allowed_features |= CEPH_MDSMAP_ALLOW_MULTIMDS; | |
280 | } | |
281 | void clear_multimds_allowed() { clear_flag(CEPH_MDSMAP_ALLOW_MULTIMDS); } | |
282 | bool allows_multimds() const { return test_flag(CEPH_MDSMAP_ALLOW_MULTIMDS); } | |
283 | ||
284 | void set_dirfrags_allowed() { | |
285 | set_flag(CEPH_MDSMAP_ALLOW_DIRFRAGS); | |
286 | ever_allowed_features |= CEPH_MDSMAP_ALLOW_DIRFRAGS; | |
287 | explicitly_allowed_features |= CEPH_MDSMAP_ALLOW_DIRFRAGS; | |
288 | } | |
289 | void clear_dirfrags_allowed() { clear_flag(CEPH_MDSMAP_ALLOW_DIRFRAGS); } | |
290 | bool allows_dirfrags() const { return test_flag(CEPH_MDSMAP_ALLOW_DIRFRAGS); } | |
291 | ||
292 | epoch_t get_epoch() const { return epoch; } | |
293 | void inc_epoch() { epoch++; } | |
294 | ||
295 | bool get_enabled() const { return enabled; } | |
296 | ||
297 | const utime_t& get_created() const { return created; } | |
298 | void set_created(utime_t ct) { modified = created = ct; } | |
299 | const utime_t& get_modified() const { return modified; } | |
300 | void set_modified(utime_t mt) { modified = mt; } | |
301 | ||
302 | epoch_t get_last_failure() const { return last_failure; } | |
303 | epoch_t get_last_failure_osd_epoch() const { return last_failure_osd_epoch; } | |
304 | ||
305 | mds_rank_t get_max_mds() const { return max_mds; } | |
306 | void set_max_mds(mds_rank_t m) { max_mds = m; } | |
307 | ||
308 | mds_rank_t get_standby_count_wanted(mds_rank_t standby_daemon_count) const { | |
309 | assert(standby_daemon_count >= 0); | |
310 | std::set<mds_rank_t> s; | |
311 | get_standby_replay_mds_set(s); | |
312 | mds_rank_t standbys_avail = (mds_rank_t)s.size()+standby_daemon_count; | |
313 | mds_rank_t wanted = std::max(0, standby_count_wanted); | |
314 | return wanted > standbys_avail ? wanted - standbys_avail : 0; | |
315 | } | |
316 | void set_standby_count_wanted(mds_rank_t n) { standby_count_wanted = n; } | |
317 | bool check_health(mds_rank_t standby_daemon_count); | |
318 | ||
319 | const std::string get_balancer() const { return balancer; } | |
320 | void set_balancer(std::string val) { balancer.assign(val); } | |
321 | ||
322 | mds_rank_t get_tableserver() const { return tableserver; } | |
323 | mds_rank_t get_root() const { return root; } | |
324 | ||
31f18b77 | 325 | const std::vector<int64_t> &get_data_pools() const { return data_pools; } |
7c673cae FG |
326 | int64_t get_first_data_pool() const { return *data_pools.begin(); } |
327 | int64_t get_metadata_pool() const { return metadata_pool; } | |
328 | bool is_data_pool(int64_t poolid) const { | |
c07f9fc5 FG |
329 | auto p = std::find(data_pools.begin(), data_pools.end(), poolid); |
330 | if (p == data_pools.end()) | |
331 | return false; | |
332 | return true; | |
7c673cae FG |
333 | } |
334 | ||
335 | bool pool_in_use(int64_t poolid) const { | |
336 | return get_enabled() && (is_data_pool(poolid) || metadata_pool == poolid); | |
337 | } | |
338 | ||
339 | const std::map<mds_gid_t,mds_info_t>& get_mds_info() const { return mds_info; } | |
340 | const mds_info_t& get_mds_info_gid(mds_gid_t gid) const { | |
341 | return mds_info.at(gid); | |
342 | } | |
343 | const mds_info_t& get_mds_info(mds_rank_t m) const { | |
344 | assert(up.count(m) && mds_info.count(up.at(m))); | |
345 | return mds_info.at(up.at(m)); | |
346 | } | |
94b18763 | 347 | mds_gid_t find_mds_gid_by_name(boost::string_view s) const { |
7c673cae FG |
348 | for (std::map<mds_gid_t,mds_info_t>::const_iterator p = mds_info.begin(); |
349 | p != mds_info.end(); | |
350 | ++p) { | |
351 | if (p->second.name == s) { | |
352 | return p->first; | |
353 | } | |
354 | } | |
355 | return MDS_GID_NONE; | |
356 | } | |
357 | ||
358 | // counts | |
359 | unsigned get_num_in_mds() const { | |
360 | return in.size(); | |
361 | } | |
362 | unsigned get_num_up_mds() const { | |
363 | return up.size(); | |
364 | } | |
31f18b77 FG |
365 | mds_rank_t get_last_in_mds() const { |
366 | auto p = in.rbegin(); | |
367 | return p == in.rend() ? MDS_RANK_NONE : *p; | |
368 | } | |
7c673cae FG |
369 | int get_num_failed_mds() const { |
370 | return failed.size(); | |
371 | } | |
372 | unsigned get_num_mds(int state) const { | |
373 | unsigned n = 0; | |
374 | for (std::map<mds_gid_t,mds_info_t>::const_iterator p = mds_info.begin(); | |
375 | p != mds_info.end(); | |
376 | ++p) | |
377 | if (p->second.state == state) ++n; | |
378 | return n; | |
379 | } | |
380 | ||
381 | // data pools | |
382 | void add_data_pool(int64_t poolid) { | |
31f18b77 | 383 | data_pools.push_back(poolid); |
7c673cae FG |
384 | } |
385 | int remove_data_pool(int64_t poolid) { | |
31f18b77 | 386 | std::vector<int64_t>::iterator p = std::find(data_pools.begin(), data_pools.end(), poolid); |
7c673cae FG |
387 | if (p == data_pools.end()) |
388 | return -ENOENT; | |
389 | data_pools.erase(p); | |
390 | return 0; | |
391 | } | |
392 | ||
393 | // sets | |
394 | void get_mds_set(std::set<mds_rank_t>& s) const { | |
395 | s = in; | |
396 | } | |
397 | void get_up_mds_set(std::set<mds_rank_t>& s) const { | |
398 | for (std::map<mds_rank_t, mds_gid_t>::const_iterator p = up.begin(); | |
399 | p != up.end(); | |
400 | ++p) | |
401 | s.insert(p->first); | |
402 | } | |
403 | void get_active_mds_set(std::set<mds_rank_t>& s) const { | |
404 | get_mds_set(s, MDSMap::STATE_ACTIVE); | |
405 | } | |
406 | void get_standby_replay_mds_set(std::set<mds_rank_t>& s) const { | |
407 | get_mds_set(s, MDSMap::STATE_STANDBY_REPLAY); | |
408 | } | |
409 | void get_failed_mds_set(std::set<mds_rank_t>& s) const { | |
410 | s = failed; | |
411 | } | |
412 | ||
413 | // features | |
414 | uint64_t get_up_features() { | |
415 | if (!cached_up_features) { | |
416 | bool first = true; | |
417 | for (std::map<mds_rank_t, mds_gid_t>::const_iterator p = up.begin(); | |
418 | p != up.end(); | |
419 | ++p) { | |
420 | std::map<mds_gid_t, mds_info_t>::const_iterator q = | |
421 | mds_info.find(p->second); | |
422 | assert(q != mds_info.end()); | |
423 | if (first) { | |
424 | cached_up_features = q->second.mds_features; | |
425 | first = false; | |
426 | } else { | |
427 | cached_up_features &= q->second.mds_features; | |
428 | } | |
429 | } | |
430 | } | |
431 | return cached_up_features; | |
432 | } | |
433 | ||
434 | /** | |
435 | * Get MDS ranks which are in but not up. | |
436 | */ | |
437 | void get_down_mds_set(std::set<mds_rank_t> *s) const | |
438 | { | |
439 | assert(s != NULL); | |
440 | s->insert(failed.begin(), failed.end()); | |
441 | s->insert(damaged.begin(), damaged.end()); | |
442 | } | |
443 | ||
444 | int get_failed() const { | |
445 | if (!failed.empty()) return *failed.begin(); | |
446 | return -1; | |
447 | } | |
448 | void get_stopped_mds_set(std::set<mds_rank_t>& s) const { | |
449 | s = stopped; | |
450 | } | |
451 | void get_recovery_mds_set(std::set<mds_rank_t>& s) const { | |
452 | s = failed; | |
453 | for (const auto& p : damaged) | |
454 | s.insert(p); | |
455 | for (const auto& p : mds_info) | |
456 | if (p.second.state >= STATE_REPLAY && p.second.state <= STATE_STOPPING) | |
457 | s.insert(p.second.rank); | |
458 | } | |
459 | ||
460 | void | |
461 | get_clientreplay_or_active_or_stopping_mds_set(std::set<mds_rank_t>& s) const { | |
462 | for (std::map<mds_gid_t, mds_info_t>::const_iterator p = mds_info.begin(); | |
463 | p != mds_info.end(); | |
464 | ++p) | |
465 | if (p->second.state >= STATE_CLIENTREPLAY && p->second.state <= STATE_STOPPING) | |
466 | s.insert(p->second.rank); | |
467 | } | |
468 | void get_mds_set(std::set<mds_rank_t>& s, DaemonState state) const { | |
469 | for (std::map<mds_gid_t, mds_info_t>::const_iterator p = mds_info.begin(); | |
470 | p != mds_info.end(); | |
471 | ++p) | |
472 | if (p->second.state == state) | |
473 | s.insert(p->second.rank); | |
474 | } | |
475 | ||
476 | void get_health(list<pair<health_status_t,std::string> >& summary, | |
477 | list<pair<health_status_t,std::string> > *detail) const; | |
478 | ||
224ce89b WB |
479 | void get_health_checks(health_check_map_t *checks) const; |
480 | ||
7c673cae FG |
481 | typedef enum |
482 | { | |
483 | AVAILABLE = 0, | |
484 | TRANSIENT_UNAVAILABLE = 1, | |
485 | STUCK_UNAVAILABLE = 2 | |
486 | ||
487 | } availability_t; | |
488 | ||
489 | /** | |
490 | * Return indication of whether cluster is available. This is a | |
491 | * heuristic for clients to see if they should bother waiting to talk to | |
492 | * MDSs, or whether they should error out at startup/mount. | |
493 | * | |
494 | * A TRANSIENT_UNAVAILABLE result indicates that the cluster is in a | |
495 | * transition state like replaying, or is potentially about the fail over. | |
496 | * Clients should wait for an updated map before making a final decision | |
497 | * about whether the filesystem is mountable. | |
498 | * | |
499 | * A STUCK_UNAVAILABLE result indicates that we can't see a way that | |
500 | * the cluster is about to recover on its own, so it'll probably require | |
501 | * administrator intervention: clients should probaly not bother trying | |
502 | * to mount. | |
503 | */ | |
504 | availability_t is_cluster_available() const; | |
505 | ||
506 | // mds states | |
507 | bool is_down(mds_rank_t m) const { return up.count(m) == 0; } | |
508 | bool is_up(mds_rank_t m) const { return up.count(m); } | |
509 | bool is_in(mds_rank_t m) const { return up.count(m) || failed.count(m); } | |
510 | bool is_out(mds_rank_t m) const { return !is_in(m); } | |
511 | ||
512 | bool is_failed(mds_rank_t m) const { return failed.count(m); } | |
513 | bool is_stopped(mds_rank_t m) const { return stopped.count(m); } | |
514 | ||
515 | bool is_dne(mds_rank_t m) const { return in.count(m) == 0; } | |
516 | bool is_dne_gid(mds_gid_t gid) const { return mds_info.count(gid) == 0; } | |
517 | ||
518 | /** | |
519 | * Get MDS rank state if the rank is up, else STATE_NULL | |
520 | */ | |
521 | DaemonState get_state(mds_rank_t m) const { | |
522 | std::map<mds_rank_t, mds_gid_t>::const_iterator u = up.find(m); | |
523 | if (u == up.end()) | |
524 | return STATE_NULL; | |
525 | return get_state_gid(u->second); | |
526 | } | |
527 | ||
528 | /** | |
529 | * Get MDS daemon status by GID | |
530 | */ | |
531 | DaemonState get_state_gid(mds_gid_t gid) const { | |
532 | std::map<mds_gid_t,mds_info_t>::const_iterator i = mds_info.find(gid); | |
533 | if (i == mds_info.end()) | |
534 | return STATE_NULL; | |
535 | return i->second.state; | |
536 | } | |
537 | ||
538 | const mds_info_t& get_info(const mds_rank_t m) const { | |
539 | return mds_info.at(up.at(m)); | |
540 | } | |
541 | const mds_info_t& get_info_gid(const mds_gid_t gid) const { | |
542 | return mds_info.at(gid); | |
543 | } | |
544 | ||
545 | bool is_boot(mds_rank_t m) const { return get_state(m) == STATE_BOOT; } | |
546 | bool is_creating(mds_rank_t m) const { return get_state(m) == STATE_CREATING; } | |
547 | bool is_starting(mds_rank_t m) const { return get_state(m) == STATE_STARTING; } | |
548 | bool is_replay(mds_rank_t m) const { return get_state(m) == STATE_REPLAY; } | |
549 | bool is_resolve(mds_rank_t m) const { return get_state(m) == STATE_RESOLVE; } | |
550 | bool is_reconnect(mds_rank_t m) const { return get_state(m) == STATE_RECONNECT; } | |
551 | bool is_rejoin(mds_rank_t m) const { return get_state(m) == STATE_REJOIN; } | |
552 | bool is_clientreplay(mds_rank_t m) const { return get_state(m) == STATE_CLIENTREPLAY; } | |
553 | bool is_active(mds_rank_t m) const { return get_state(m) == STATE_ACTIVE; } | |
554 | bool is_stopping(mds_rank_t m) const { return get_state(m) == STATE_STOPPING; } | |
555 | bool is_active_or_stopping(mds_rank_t m) const { | |
556 | return is_active(m) || is_stopping(m); | |
557 | } | |
558 | bool is_clientreplay_or_active_or_stopping(mds_rank_t m) const { | |
559 | return is_clientreplay(m) || is_active(m) || is_stopping(m); | |
560 | } | |
561 | ||
562 | bool is_followable(mds_rank_t m) const { | |
563 | return (is_resolve(m) || | |
564 | is_replay(m) || | |
565 | is_rejoin(m) || | |
566 | is_clientreplay(m) || | |
567 | is_active(m) || | |
568 | is_stopping(m)); | |
569 | } | |
570 | ||
571 | bool is_laggy_gid(mds_gid_t gid) const { | |
572 | if (!mds_info.count(gid)) | |
573 | return false; | |
574 | std::map<mds_gid_t,mds_info_t>::const_iterator p = mds_info.find(gid); | |
575 | return p->second.laggy(); | |
576 | } | |
577 | ||
578 | // degraded = some recovery in process. fixes active membership and | |
579 | // recovery_set. | |
580 | bool is_degraded() const { | |
581 | if (!failed.empty() || !damaged.empty()) | |
582 | return true; | |
583 | for (std::map<mds_gid_t,mds_info_t>::const_iterator p = mds_info.begin(); | |
584 | p != mds_info.end(); | |
585 | ++p) | |
586 | if (p->second.state >= STATE_REPLAY && p->second.state <= STATE_CLIENTREPLAY) | |
587 | return true; | |
588 | return false; | |
589 | } | |
590 | bool is_any_failed() const { | |
591 | return failed.size(); | |
592 | } | |
593 | bool is_resolving() const { | |
594 | return | |
595 | get_num_mds(STATE_RESOLVE) > 0 && | |
596 | get_num_mds(STATE_REPLAY) == 0 && | |
597 | failed.empty() && damaged.empty(); | |
598 | } | |
599 | bool is_rejoining() const { | |
600 | // nodes are rejoining cache state | |
601 | return | |
602 | get_num_mds(STATE_REJOIN) > 0 && | |
603 | get_num_mds(STATE_REPLAY) == 0 && | |
604 | get_num_mds(STATE_RECONNECT) == 0 && | |
605 | get_num_mds(STATE_RESOLVE) == 0 && | |
606 | failed.empty() && damaged.empty(); | |
607 | } | |
608 | bool is_stopped() const { | |
609 | return up.empty(); | |
610 | } | |
611 | ||
612 | /** | |
613 | * Get whether a rank is 'up', i.e. has | |
614 | * an MDS daemon's entity_inst_t associated | |
615 | * with it. | |
616 | */ | |
617 | bool have_inst(mds_rank_t m) const { | |
618 | return up.count(m); | |
619 | } | |
620 | ||
621 | /** | |
622 | * Get the MDS daemon entity_inst_t for a rank | |
623 | * known to be up. | |
624 | */ | |
625 | const entity_inst_t get_inst(mds_rank_t m) { | |
626 | assert(up.count(m)); | |
627 | return mds_info[up[m]].get_inst(); | |
628 | } | |
629 | const entity_addr_t get_addr(mds_rank_t m) { | |
630 | assert(up.count(m)); | |
631 | return mds_info[up[m]].addr; | |
632 | } | |
633 | ||
634 | /** | |
635 | * Get the MDS daemon entity_inst_t for a rank, | |
636 | * if it is up. | |
637 | * | |
638 | * @return true if the rank was up and the inst | |
639 | * was populated, else false. | |
640 | */ | |
641 | bool get_inst(mds_rank_t m, entity_inst_t& inst) { | |
642 | if (up.count(m)) { | |
643 | inst = get_inst(m); | |
644 | return true; | |
645 | } | |
646 | return false; | |
647 | } | |
648 | ||
649 | mds_rank_t get_rank_gid(mds_gid_t gid) const { | |
650 | if (mds_info.count(gid)) { | |
651 | return mds_info.at(gid).rank; | |
652 | } else { | |
653 | return MDS_RANK_NONE; | |
654 | } | |
655 | } | |
656 | ||
657 | int get_inc_gid(mds_gid_t gid) const { | |
658 | auto mds_info_entry = mds_info.find(gid); | |
659 | if (mds_info_entry != mds_info.end()) | |
660 | return mds_info_entry->second.inc; | |
661 | return -1; | |
662 | } | |
663 | void encode(bufferlist& bl, uint64_t features) const; | |
664 | void decode(bufferlist::iterator& p); | |
665 | void decode(bufferlist& bl) { | |
666 | bufferlist::iterator p = bl.begin(); | |
667 | decode(p); | |
668 | } | |
3efd9988 | 669 | void sanitize(std::function<bool(int64_t pool)> pool_exists); |
7c673cae FG |
670 | |
671 | void print(ostream& out) const; | |
672 | void print_summary(Formatter *f, ostream *out) const; | |
673 | ||
674 | void dump(Formatter *f) const; | |
675 | static void generate_test_instances(list<MDSMap*>& ls); | |
676 | ||
677 | static bool state_transition_valid(DaemonState prev, DaemonState next); | |
678 | }; | |
679 | WRITE_CLASS_ENCODER_FEATURES(MDSMap::mds_info_t) | |
680 | WRITE_CLASS_ENCODER_FEATURES(MDSMap) | |
681 | ||
682 | inline ostream& operator<<(ostream &out, const MDSMap &m) { | |
683 | m.print_summary(NULL, &out); | |
684 | return out; | |
685 | } | |
686 | ||
687 | #endif |