]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
2 | // vim: ts=8 sw=2 smarttab | |
3 | /* | |
4 | * Ceph - scalable distributed file system | |
5 | * | |
6 | * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net> | |
7 | * | |
8 | * This is free software; you can redistribute it and/or | |
9 | * modify it under the terms of the GNU Lesser General Public | |
10 | * License version 2.1, as published by the Free Software | |
11 | * Foundation. See file COPYING. | |
12 | * | |
13 | */ | |
14 | ||
15 | ||
16 | #ifndef CEPH_MDSMAP_H | |
17 | #define CEPH_MDSMAP_H | |
18 | ||
94b18763 FG |
19 | #include <algorithm> |
20 | #include <map> | |
21 | #include <set> | |
22 | #include <string> | |
23 | #include <boost/utility/string_view.hpp> | |
24 | ||
7c673cae FG |
25 | #include <errno.h> |
26 | ||
27 | #include "include/types.h" | |
28 | #include "common/Clock.h" | |
29 | #include "msg/Message.h" | |
224ce89b | 30 | #include "include/health.h" |
7c673cae | 31 | |
7c673cae FG |
32 | #include "common/config.h" |
33 | ||
34 | #include "include/CompatSet.h" | |
35 | #include "include/ceph_features.h" | |
36 | #include "common/Formatter.h" | |
37 | #include "mds/mdstypes.h" | |
38 | ||
39 | /* | |
40 | ||
41 | boot --> standby, creating, or starting. | |
42 | ||
43 | ||
44 | dne ----> creating -----> active* | |
45 | ^ ^___________/ / ^ ^ | |
46 | | / / | | |
47 | destroying / / | | |
48 | ^ / / | | |
49 | | / / | | |
50 | stopped <---- stopping* <-/ / | | |
51 | \ / | | |
52 | ----- starting* ----/ | | |
53 | | | |
54 | failed | | |
55 | \ | | |
56 | \--> replay* --> reconnect* --> rejoin* | |
57 | ||
58 | * = can fail | |
59 | ||
60 | */ | |
61 | ||
62 | class CephContext; | |
224ce89b | 63 | class health_check_map_t; |
7c673cae | 64 | |
7c673cae FG |
65 | #define MDS_FEATURE_INCOMPAT_BASE CompatSet::Feature(1, "base v0.20") |
66 | #define MDS_FEATURE_INCOMPAT_CLIENTRANGES CompatSet::Feature(2, "client writeable ranges") | |
67 | #define MDS_FEATURE_INCOMPAT_FILELAYOUT CompatSet::Feature(3, "default file layouts on dirs") | |
68 | #define MDS_FEATURE_INCOMPAT_DIRINODE CompatSet::Feature(4, "dir inode in separate object") | |
69 | #define MDS_FEATURE_INCOMPAT_ENCODING CompatSet::Feature(5, "mds uses versioned encoding") | |
70 | #define MDS_FEATURE_INCOMPAT_OMAPDIRFRAG CompatSet::Feature(6, "dirfrag is stored in omap") | |
71 | #define MDS_FEATURE_INCOMPAT_INLINE CompatSet::Feature(7, "mds uses inline data") | |
72 | #define MDS_FEATURE_INCOMPAT_NOANCHOR CompatSet::Feature(8, "no anchor table") | |
b32b8144 | 73 | #define MDS_FEATURE_INCOMPAT_FILE_LAYOUT_V2 CompatSet::Feature(9, "file layout v2") |
7c673cae FG |
74 | |
75 | #define MDS_FS_NAME_DEFAULT "cephfs" | |
76 | ||
77 | class MDSMap { | |
78 | public: | |
79 | /* These states are the union of the set of possible states of an MDS daemon, | |
80 | * and the set of possible states of an MDS rank */ | |
81 | typedef enum { | |
82 | // States of an MDS daemon not currently holding a rank | |
83 | // ==================================================== | |
84 | STATE_NULL = CEPH_MDS_STATE_NULL, // null value for fns returning this type. | |
85 | STATE_BOOT = CEPH_MDS_STATE_BOOT, // up, boot announcement. destiny unknown. | |
86 | STATE_STANDBY = CEPH_MDS_STATE_STANDBY, // up, idle. waiting for assignment by monitor. | |
87 | STATE_STANDBY_REPLAY = CEPH_MDS_STATE_STANDBY_REPLAY, // up, replaying active node, ready to take over. | |
88 | ||
89 | // States of an MDS rank, and of any MDS daemon holding that rank | |
90 | // ============================================================== | |
91 | STATE_STOPPED = CEPH_MDS_STATE_STOPPED, // down, once existed, but no subtrees. empty log. may not be held by a daemon. | |
92 | ||
93 | STATE_CREATING = CEPH_MDS_STATE_CREATING, // up, creating MDS instance (new journal, idalloc..). | |
94 | STATE_STARTING = CEPH_MDS_STATE_STARTING, // up, starting prior stopped MDS instance. | |
95 | ||
96 | STATE_REPLAY = CEPH_MDS_STATE_REPLAY, // up, starting prior failed instance. scanning journal. | |
97 | STATE_RESOLVE = CEPH_MDS_STATE_RESOLVE, // up, disambiguating distributed operations (import, rename, etc.) | |
98 | STATE_RECONNECT = CEPH_MDS_STATE_RECONNECT, // up, reconnect to clients | |
99 | STATE_REJOIN = CEPH_MDS_STATE_REJOIN, // up, replayed journal, rejoining distributed cache | |
100 | STATE_CLIENTREPLAY = CEPH_MDS_STATE_CLIENTREPLAY, // up, active | |
101 | STATE_ACTIVE = CEPH_MDS_STATE_ACTIVE, // up, active | |
102 | STATE_STOPPING = CEPH_MDS_STATE_STOPPING, // up, exporting metadata (-> standby or out) | |
103 | STATE_DNE = CEPH_MDS_STATE_DNE, // down, rank does not exist | |
104 | ||
105 | // State which a daemon may send to MDSMonitor in its beacon | |
106 | // to indicate that offline repair is required. Daemon must stop | |
107 | // immediately after indicating this state. | |
108 | STATE_DAMAGED = CEPH_MDS_STATE_DAMAGED | |
109 | ||
110 | /* | |
111 | * In addition to explicit states, an MDS rank implicitly in state: | |
112 | * - STOPPED if it is not currently associated with an MDS daemon gid but it | |
113 | * is in MDSMap::stopped | |
114 | * - FAILED if it is not currently associated with an MDS daemon gid but it | |
115 | * is in MDSMap::failed | |
116 | * - DNE if it is not currently associated with an MDS daemon gid and it is | |
117 | * missing from both MDSMap::failed and MDSMap::stopped | |
118 | */ | |
119 | } DaemonState; | |
120 | ||
121 | struct mds_info_t { | |
122 | mds_gid_t global_id; | |
123 | std::string name; | |
124 | mds_rank_t rank; | |
125 | int32_t inc; | |
126 | MDSMap::DaemonState state; | |
127 | version_t state_seq; | |
128 | entity_addr_t addr; | |
129 | utime_t laggy_since; | |
130 | mds_rank_t standby_for_rank; | |
131 | std::string standby_for_name; | |
132 | fs_cluster_id_t standby_for_fscid; | |
133 | bool standby_replay; | |
134 | std::set<mds_rank_t> export_targets; | |
c07f9fc5 | 135 | uint64_t mds_features = 0; |
7c673cae FG |
136 | |
137 | mds_info_t() : global_id(MDS_GID_NONE), rank(MDS_RANK_NONE), inc(0), | |
138 | state(STATE_STANDBY), state_seq(0), | |
139 | standby_for_rank(MDS_RANK_NONE), | |
140 | standby_for_fscid(FS_CLUSTER_ID_NONE), | |
141 | standby_replay(false) | |
142 | { } | |
143 | ||
144 | bool laggy() const { return !(laggy_since == utime_t()); } | |
145 | void clear_laggy() { laggy_since = utime_t(); } | |
146 | ||
147 | entity_inst_t get_inst() const { return entity_inst_t(entity_name_t::MDS(rank), addr); } | |
148 | ||
149 | void encode(bufferlist& bl, uint64_t features) const { | |
150 | if ((features & CEPH_FEATURE_MDSENC) == 0 ) encode_unversioned(bl); | |
151 | else encode_versioned(bl, features); | |
152 | } | |
153 | void decode(bufferlist::iterator& p); | |
154 | void dump(Formatter *f) const; | |
155 | void print_summary(ostream &out) const; | |
d2e6a577 FG |
156 | |
157 | // The long form name for use in cluster log messages` | |
158 | std::string human_name() const; | |
159 | ||
7c673cae FG |
160 | static void generate_test_instances(list<mds_info_t*>& ls); |
161 | private: | |
162 | void encode_versioned(bufferlist& bl, uint64_t features) const; | |
163 | void encode_unversioned(bufferlist& bl) const; | |
164 | }; | |
165 | ||
1adf2230 AA |
166 | static CompatSet get_compat_set_all(); |
167 | static CompatSet get_compat_set_default(); | |
168 | static CompatSet get_compat_set_base(); // pre v0.20 | |
7c673cae FG |
169 | |
170 | protected: | |
171 | // base map | |
1adf2230 AA |
172 | epoch_t epoch = 0; |
173 | bool enabled = false; | |
174 | std::string fs_name = MDS_FS_NAME_DEFAULT; | |
175 | uint32_t flags = CEPH_MDSMAP_DEFAULTS; // flags | |
176 | epoch_t last_failure = 0; // mds epoch of last failure | |
177 | epoch_t last_failure_osd_epoch = 0; // osd epoch of last failure; any mds entering replay needs | |
7c673cae | 178 | // at least this osdmap to ensure the blacklist propagates. |
1adf2230 AA |
179 | utime_t created; |
180 | utime_t modified; | |
7c673cae | 181 | |
1adf2230 AA |
182 | mds_rank_t tableserver = 0; // which MDS has snaptable |
183 | mds_rank_t root = 0; // which MDS has root directory | |
7c673cae | 184 | |
1adf2230 AA |
185 | __u32 session_timeout = 60; |
186 | __u32 session_autoclose = 300; | |
187 | uint64_t max_file_size = 1ULL<<40; /* 1TB */ | |
7c673cae | 188 | |
31f18b77 | 189 | std::vector<int64_t> data_pools; // file data pools available to clients (via an ioctl). first is the default. |
1adf2230 AA |
190 | int64_t cas_pool = -1; // where CAS objects go |
191 | int64_t metadata_pool = -1; // where fs metadata objects go | |
7c673cae FG |
192 | |
193 | /* | |
194 | * in: the set of logical mds #'s that define the cluster. this is the set | |
195 | * of mds's the metadata may be distributed over. | |
196 | * up: map from logical mds #'s to the addrs filling those roles. | |
197 | * failed: subset of @in that are failed. | |
198 | * stopped: set of nodes that have been initialized, but are not active. | |
199 | * | |
200 | * @up + @failed = @in. @in * @stopped = {}. | |
201 | */ | |
202 | ||
1adf2230 AA |
203 | mds_rank_t max_mds = 1; /* The maximum number of active MDSes. Also, the maximum rank. */ |
204 | mds_rank_t standby_count_wanted = -1; | |
7c673cae FG |
205 | string balancer; /* The name/version of the mantle balancer (i.e. the rados obj name) */ |
206 | ||
207 | std::set<mds_rank_t> in; // currently defined cluster | |
208 | ||
209 | // which ranks are failed, stopped, damaged (i.e. not held by a daemon) | |
210 | std::set<mds_rank_t> failed, stopped, damaged; | |
211 | std::map<mds_rank_t, mds_gid_t> up; // who is in those roles | |
212 | std::map<mds_gid_t, mds_info_t> mds_info; | |
213 | ||
1adf2230 AA |
214 | uint8_t ever_allowed_features = 0; //< bitmap of features the cluster has allowed |
215 | uint8_t explicitly_allowed_features = 0; //< bitmap of features explicitly enabled | |
7c673cae | 216 | |
1adf2230 | 217 | bool inline_data_enabled = false; |
7c673cae | 218 | |
1adf2230 | 219 | uint64_t cached_up_features = 0; |
7c673cae FG |
220 | |
221 | public: | |
222 | CompatSet compat; | |
223 | ||
224 | friend class MDSMonitor; | |
225 | friend class Filesystem; | |
226 | friend class FSMap; | |
227 | ||
228 | public: | |
7c673cae FG |
229 | bool get_inline_data_enabled() const { return inline_data_enabled; } |
230 | void set_inline_data_enabled(bool enabled) { inline_data_enabled = enabled; } | |
231 | ||
232 | utime_t get_session_timeout() const { | |
233 | return utime_t(session_timeout,0); | |
234 | } | |
b32b8144 FG |
235 | |
236 | utime_t get_session_autoclose() const { | |
237 | return utime_t(session_autoclose, 0); | |
238 | } | |
239 | ||
7c673cae FG |
240 | uint64_t get_max_filesize() const { return max_file_size; } |
241 | void set_max_filesize(uint64_t m) { max_file_size = m; } | |
242 | ||
243 | int get_flags() const { return flags; } | |
244 | bool test_flag(int f) const { return flags & f; } | |
245 | void set_flag(int f) { flags |= f; } | |
246 | void clear_flag(int f) { flags &= ~f; } | |
247 | ||
94b18763 | 248 | boost::string_view get_fs_name() const {return fs_name;} |
7c673cae FG |
249 | |
250 | void set_snaps_allowed() { | |
251 | set_flag(CEPH_MDSMAP_ALLOW_SNAPS); | |
252 | ever_allowed_features |= CEPH_MDSMAP_ALLOW_SNAPS; | |
253 | explicitly_allowed_features |= CEPH_MDSMAP_ALLOW_SNAPS; | |
254 | } | |
255 | void clear_snaps_allowed() { clear_flag(CEPH_MDSMAP_ALLOW_SNAPS); } | |
256 | bool allows_snaps() const { return test_flag(CEPH_MDSMAP_ALLOW_SNAPS); } | |
257 | ||
258 | void set_multimds_allowed() { | |
259 | set_flag(CEPH_MDSMAP_ALLOW_MULTIMDS); | |
260 | ever_allowed_features |= CEPH_MDSMAP_ALLOW_MULTIMDS; | |
261 | explicitly_allowed_features |= CEPH_MDSMAP_ALLOW_MULTIMDS; | |
262 | } | |
263 | void clear_multimds_allowed() { clear_flag(CEPH_MDSMAP_ALLOW_MULTIMDS); } | |
264 | bool allows_multimds() const { return test_flag(CEPH_MDSMAP_ALLOW_MULTIMDS); } | |
265 | ||
266 | void set_dirfrags_allowed() { | |
267 | set_flag(CEPH_MDSMAP_ALLOW_DIRFRAGS); | |
268 | ever_allowed_features |= CEPH_MDSMAP_ALLOW_DIRFRAGS; | |
269 | explicitly_allowed_features |= CEPH_MDSMAP_ALLOW_DIRFRAGS; | |
270 | } | |
271 | void clear_dirfrags_allowed() { clear_flag(CEPH_MDSMAP_ALLOW_DIRFRAGS); } | |
272 | bool allows_dirfrags() const { return test_flag(CEPH_MDSMAP_ALLOW_DIRFRAGS); } | |
273 | ||
274 | epoch_t get_epoch() const { return epoch; } | |
275 | void inc_epoch() { epoch++; } | |
276 | ||
277 | bool get_enabled() const { return enabled; } | |
278 | ||
279 | const utime_t& get_created() const { return created; } | |
280 | void set_created(utime_t ct) { modified = created = ct; } | |
281 | const utime_t& get_modified() const { return modified; } | |
282 | void set_modified(utime_t mt) { modified = mt; } | |
283 | ||
284 | epoch_t get_last_failure() const { return last_failure; } | |
285 | epoch_t get_last_failure_osd_epoch() const { return last_failure_osd_epoch; } | |
286 | ||
287 | mds_rank_t get_max_mds() const { return max_mds; } | |
288 | void set_max_mds(mds_rank_t m) { max_mds = m; } | |
289 | ||
290 | mds_rank_t get_standby_count_wanted(mds_rank_t standby_daemon_count) const { | |
291 | assert(standby_daemon_count >= 0); | |
292 | std::set<mds_rank_t> s; | |
293 | get_standby_replay_mds_set(s); | |
294 | mds_rank_t standbys_avail = (mds_rank_t)s.size()+standby_daemon_count; | |
295 | mds_rank_t wanted = std::max(0, standby_count_wanted); | |
296 | return wanted > standbys_avail ? wanted - standbys_avail : 0; | |
297 | } | |
298 | void set_standby_count_wanted(mds_rank_t n) { standby_count_wanted = n; } | |
299 | bool check_health(mds_rank_t standby_daemon_count); | |
300 | ||
301 | const std::string get_balancer() const { return balancer; } | |
302 | void set_balancer(std::string val) { balancer.assign(val); } | |
303 | ||
304 | mds_rank_t get_tableserver() const { return tableserver; } | |
305 | mds_rank_t get_root() const { return root; } | |
306 | ||
31f18b77 | 307 | const std::vector<int64_t> &get_data_pools() const { return data_pools; } |
7c673cae FG |
308 | int64_t get_first_data_pool() const { return *data_pools.begin(); } |
309 | int64_t get_metadata_pool() const { return metadata_pool; } | |
310 | bool is_data_pool(int64_t poolid) const { | |
c07f9fc5 FG |
311 | auto p = std::find(data_pools.begin(), data_pools.end(), poolid); |
312 | if (p == data_pools.end()) | |
313 | return false; | |
314 | return true; | |
7c673cae FG |
315 | } |
316 | ||
317 | bool pool_in_use(int64_t poolid) const { | |
318 | return get_enabled() && (is_data_pool(poolid) || metadata_pool == poolid); | |
319 | } | |
320 | ||
321 | const std::map<mds_gid_t,mds_info_t>& get_mds_info() const { return mds_info; } | |
322 | const mds_info_t& get_mds_info_gid(mds_gid_t gid) const { | |
323 | return mds_info.at(gid); | |
324 | } | |
325 | const mds_info_t& get_mds_info(mds_rank_t m) const { | |
326 | assert(up.count(m) && mds_info.count(up.at(m))); | |
327 | return mds_info.at(up.at(m)); | |
328 | } | |
94b18763 | 329 | mds_gid_t find_mds_gid_by_name(boost::string_view s) const { |
7c673cae FG |
330 | for (std::map<mds_gid_t,mds_info_t>::const_iterator p = mds_info.begin(); |
331 | p != mds_info.end(); | |
332 | ++p) { | |
333 | if (p->second.name == s) { | |
334 | return p->first; | |
335 | } | |
336 | } | |
337 | return MDS_GID_NONE; | |
338 | } | |
339 | ||
340 | // counts | |
341 | unsigned get_num_in_mds() const { | |
342 | return in.size(); | |
343 | } | |
344 | unsigned get_num_up_mds() const { | |
345 | return up.size(); | |
346 | } | |
31f18b77 FG |
347 | mds_rank_t get_last_in_mds() const { |
348 | auto p = in.rbegin(); | |
349 | return p == in.rend() ? MDS_RANK_NONE : *p; | |
350 | } | |
7c673cae FG |
351 | int get_num_failed_mds() const { |
352 | return failed.size(); | |
353 | } | |
354 | unsigned get_num_mds(int state) const { | |
355 | unsigned n = 0; | |
356 | for (std::map<mds_gid_t,mds_info_t>::const_iterator p = mds_info.begin(); | |
357 | p != mds_info.end(); | |
358 | ++p) | |
359 | if (p->second.state == state) ++n; | |
360 | return n; | |
361 | } | |
362 | ||
363 | // data pools | |
364 | void add_data_pool(int64_t poolid) { | |
31f18b77 | 365 | data_pools.push_back(poolid); |
7c673cae FG |
366 | } |
367 | int remove_data_pool(int64_t poolid) { | |
31f18b77 | 368 | std::vector<int64_t>::iterator p = std::find(data_pools.begin(), data_pools.end(), poolid); |
7c673cae FG |
369 | if (p == data_pools.end()) |
370 | return -ENOENT; | |
371 | data_pools.erase(p); | |
372 | return 0; | |
373 | } | |
374 | ||
375 | // sets | |
376 | void get_mds_set(std::set<mds_rank_t>& s) const { | |
377 | s = in; | |
378 | } | |
379 | void get_up_mds_set(std::set<mds_rank_t>& s) const { | |
380 | for (std::map<mds_rank_t, mds_gid_t>::const_iterator p = up.begin(); | |
381 | p != up.end(); | |
382 | ++p) | |
383 | s.insert(p->first); | |
384 | } | |
385 | void get_active_mds_set(std::set<mds_rank_t>& s) const { | |
386 | get_mds_set(s, MDSMap::STATE_ACTIVE); | |
387 | } | |
388 | void get_standby_replay_mds_set(std::set<mds_rank_t>& s) const { | |
389 | get_mds_set(s, MDSMap::STATE_STANDBY_REPLAY); | |
390 | } | |
391 | void get_failed_mds_set(std::set<mds_rank_t>& s) const { | |
392 | s = failed; | |
393 | } | |
394 | ||
395 | // features | |
396 | uint64_t get_up_features() { | |
397 | if (!cached_up_features) { | |
398 | bool first = true; | |
399 | for (std::map<mds_rank_t, mds_gid_t>::const_iterator p = up.begin(); | |
400 | p != up.end(); | |
401 | ++p) { | |
402 | std::map<mds_gid_t, mds_info_t>::const_iterator q = | |
403 | mds_info.find(p->second); | |
404 | assert(q != mds_info.end()); | |
405 | if (first) { | |
406 | cached_up_features = q->second.mds_features; | |
407 | first = false; | |
408 | } else { | |
409 | cached_up_features &= q->second.mds_features; | |
410 | } | |
411 | } | |
412 | } | |
413 | return cached_up_features; | |
414 | } | |
415 | ||
416 | /** | |
417 | * Get MDS ranks which are in but not up. | |
418 | */ | |
419 | void get_down_mds_set(std::set<mds_rank_t> *s) const | |
420 | { | |
421 | assert(s != NULL); | |
422 | s->insert(failed.begin(), failed.end()); | |
423 | s->insert(damaged.begin(), damaged.end()); | |
424 | } | |
425 | ||
426 | int get_failed() const { | |
427 | if (!failed.empty()) return *failed.begin(); | |
428 | return -1; | |
429 | } | |
430 | void get_stopped_mds_set(std::set<mds_rank_t>& s) const { | |
431 | s = stopped; | |
432 | } | |
433 | void get_recovery_mds_set(std::set<mds_rank_t>& s) const { | |
434 | s = failed; | |
435 | for (const auto& p : damaged) | |
436 | s.insert(p); | |
437 | for (const auto& p : mds_info) | |
438 | if (p.second.state >= STATE_REPLAY && p.second.state <= STATE_STOPPING) | |
439 | s.insert(p.second.rank); | |
440 | } | |
441 | ||
1adf2230 | 442 | void get_mds_set_lower_bound(std::set<mds_rank_t>& s, DaemonState first) const { |
7c673cae FG |
443 | for (std::map<mds_gid_t, mds_info_t>::const_iterator p = mds_info.begin(); |
444 | p != mds_info.end(); | |
445 | ++p) | |
1adf2230 | 446 | if (p->second.state >= first && p->second.state <= STATE_STOPPING) |
7c673cae FG |
447 | s.insert(p->second.rank); |
448 | } | |
449 | void get_mds_set(std::set<mds_rank_t>& s, DaemonState state) const { | |
450 | for (std::map<mds_gid_t, mds_info_t>::const_iterator p = mds_info.begin(); | |
451 | p != mds_info.end(); | |
452 | ++p) | |
453 | if (p->second.state == state) | |
454 | s.insert(p->second.rank); | |
455 | } | |
456 | ||
457 | void get_health(list<pair<health_status_t,std::string> >& summary, | |
458 | list<pair<health_status_t,std::string> > *detail) const; | |
459 | ||
224ce89b WB |
460 | void get_health_checks(health_check_map_t *checks) const; |
461 | ||
7c673cae FG |
462 | typedef enum |
463 | { | |
464 | AVAILABLE = 0, | |
465 | TRANSIENT_UNAVAILABLE = 1, | |
466 | STUCK_UNAVAILABLE = 2 | |
467 | ||
468 | } availability_t; | |
469 | ||
470 | /** | |
471 | * Return indication of whether cluster is available. This is a | |
472 | * heuristic for clients to see if they should bother waiting to talk to | |
473 | * MDSs, or whether they should error out at startup/mount. | |
474 | * | |
475 | * A TRANSIENT_UNAVAILABLE result indicates that the cluster is in a | |
476 | * transition state like replaying, or is potentially about the fail over. | |
477 | * Clients should wait for an updated map before making a final decision | |
478 | * about whether the filesystem is mountable. | |
479 | * | |
480 | * A STUCK_UNAVAILABLE result indicates that we can't see a way that | |
481 | * the cluster is about to recover on its own, so it'll probably require | |
482 | * administrator intervention: clients should probaly not bother trying | |
483 | * to mount. | |
484 | */ | |
485 | availability_t is_cluster_available() const; | |
486 | ||
487 | // mds states | |
488 | bool is_down(mds_rank_t m) const { return up.count(m) == 0; } | |
489 | bool is_up(mds_rank_t m) const { return up.count(m); } | |
490 | bool is_in(mds_rank_t m) const { return up.count(m) || failed.count(m); } | |
491 | bool is_out(mds_rank_t m) const { return !is_in(m); } | |
492 | ||
493 | bool is_failed(mds_rank_t m) const { return failed.count(m); } | |
494 | bool is_stopped(mds_rank_t m) const { return stopped.count(m); } | |
495 | ||
496 | bool is_dne(mds_rank_t m) const { return in.count(m) == 0; } | |
497 | bool is_dne_gid(mds_gid_t gid) const { return mds_info.count(gid) == 0; } | |
498 | ||
499 | /** | |
500 | * Get MDS rank state if the rank is up, else STATE_NULL | |
501 | */ | |
502 | DaemonState get_state(mds_rank_t m) const { | |
503 | std::map<mds_rank_t, mds_gid_t>::const_iterator u = up.find(m); | |
504 | if (u == up.end()) | |
505 | return STATE_NULL; | |
506 | return get_state_gid(u->second); | |
507 | } | |
508 | ||
509 | /** | |
510 | * Get MDS daemon status by GID | |
511 | */ | |
512 | DaemonState get_state_gid(mds_gid_t gid) const { | |
513 | std::map<mds_gid_t,mds_info_t>::const_iterator i = mds_info.find(gid); | |
514 | if (i == mds_info.end()) | |
515 | return STATE_NULL; | |
516 | return i->second.state; | |
517 | } | |
518 | ||
519 | const mds_info_t& get_info(const mds_rank_t m) const { | |
520 | return mds_info.at(up.at(m)); | |
521 | } | |
522 | const mds_info_t& get_info_gid(const mds_gid_t gid) const { | |
523 | return mds_info.at(gid); | |
524 | } | |
525 | ||
526 | bool is_boot(mds_rank_t m) const { return get_state(m) == STATE_BOOT; } | |
527 | bool is_creating(mds_rank_t m) const { return get_state(m) == STATE_CREATING; } | |
528 | bool is_starting(mds_rank_t m) const { return get_state(m) == STATE_STARTING; } | |
529 | bool is_replay(mds_rank_t m) const { return get_state(m) == STATE_REPLAY; } | |
530 | bool is_resolve(mds_rank_t m) const { return get_state(m) == STATE_RESOLVE; } | |
531 | bool is_reconnect(mds_rank_t m) const { return get_state(m) == STATE_RECONNECT; } | |
532 | bool is_rejoin(mds_rank_t m) const { return get_state(m) == STATE_REJOIN; } | |
533 | bool is_clientreplay(mds_rank_t m) const { return get_state(m) == STATE_CLIENTREPLAY; } | |
534 | bool is_active(mds_rank_t m) const { return get_state(m) == STATE_ACTIVE; } | |
535 | bool is_stopping(mds_rank_t m) const { return get_state(m) == STATE_STOPPING; } | |
536 | bool is_active_or_stopping(mds_rank_t m) const { | |
537 | return is_active(m) || is_stopping(m); | |
538 | } | |
539 | bool is_clientreplay_or_active_or_stopping(mds_rank_t m) const { | |
540 | return is_clientreplay(m) || is_active(m) || is_stopping(m); | |
541 | } | |
542 | ||
543 | bool is_followable(mds_rank_t m) const { | |
544 | return (is_resolve(m) || | |
545 | is_replay(m) || | |
546 | is_rejoin(m) || | |
547 | is_clientreplay(m) || | |
548 | is_active(m) || | |
549 | is_stopping(m)); | |
550 | } | |
551 | ||
552 | bool is_laggy_gid(mds_gid_t gid) const { | |
553 | if (!mds_info.count(gid)) | |
554 | return false; | |
555 | std::map<mds_gid_t,mds_info_t>::const_iterator p = mds_info.find(gid); | |
556 | return p->second.laggy(); | |
557 | } | |
558 | ||
559 | // degraded = some recovery in process. fixes active membership and | |
560 | // recovery_set. | |
561 | bool is_degraded() const { | |
562 | if (!failed.empty() || !damaged.empty()) | |
563 | return true; | |
564 | for (std::map<mds_gid_t,mds_info_t>::const_iterator p = mds_info.begin(); | |
565 | p != mds_info.end(); | |
566 | ++p) | |
567 | if (p->second.state >= STATE_REPLAY && p->second.state <= STATE_CLIENTREPLAY) | |
568 | return true; | |
569 | return false; | |
570 | } | |
571 | bool is_any_failed() const { | |
572 | return failed.size(); | |
573 | } | |
574 | bool is_resolving() const { | |
575 | return | |
576 | get_num_mds(STATE_RESOLVE) > 0 && | |
577 | get_num_mds(STATE_REPLAY) == 0 && | |
578 | failed.empty() && damaged.empty(); | |
579 | } | |
580 | bool is_rejoining() const { | |
581 | // nodes are rejoining cache state | |
582 | return | |
583 | get_num_mds(STATE_REJOIN) > 0 && | |
584 | get_num_mds(STATE_REPLAY) == 0 && | |
585 | get_num_mds(STATE_RECONNECT) == 0 && | |
586 | get_num_mds(STATE_RESOLVE) == 0 && | |
587 | failed.empty() && damaged.empty(); | |
588 | } | |
589 | bool is_stopped() const { | |
590 | return up.empty(); | |
591 | } | |
592 | ||
593 | /** | |
594 | * Get whether a rank is 'up', i.e. has | |
595 | * an MDS daemon's entity_inst_t associated | |
596 | * with it. | |
597 | */ | |
598 | bool have_inst(mds_rank_t m) const { | |
599 | return up.count(m); | |
600 | } | |
601 | ||
602 | /** | |
603 | * Get the MDS daemon entity_inst_t for a rank | |
604 | * known to be up. | |
605 | */ | |
606 | const entity_inst_t get_inst(mds_rank_t m) { | |
607 | assert(up.count(m)); | |
608 | return mds_info[up[m]].get_inst(); | |
609 | } | |
610 | const entity_addr_t get_addr(mds_rank_t m) { | |
611 | assert(up.count(m)); | |
612 | return mds_info[up[m]].addr; | |
613 | } | |
614 | ||
615 | /** | |
616 | * Get the MDS daemon entity_inst_t for a rank, | |
617 | * if it is up. | |
618 | * | |
619 | * @return true if the rank was up and the inst | |
620 | * was populated, else false. | |
621 | */ | |
622 | bool get_inst(mds_rank_t m, entity_inst_t& inst) { | |
623 | if (up.count(m)) { | |
624 | inst = get_inst(m); | |
625 | return true; | |
626 | } | |
627 | return false; | |
628 | } | |
629 | ||
630 | mds_rank_t get_rank_gid(mds_gid_t gid) const { | |
631 | if (mds_info.count(gid)) { | |
632 | return mds_info.at(gid).rank; | |
633 | } else { | |
634 | return MDS_RANK_NONE; | |
635 | } | |
636 | } | |
637 | ||
638 | int get_inc_gid(mds_gid_t gid) const { | |
639 | auto mds_info_entry = mds_info.find(gid); | |
640 | if (mds_info_entry != mds_info.end()) | |
641 | return mds_info_entry->second.inc; | |
642 | return -1; | |
643 | } | |
644 | void encode(bufferlist& bl, uint64_t features) const; | |
645 | void decode(bufferlist::iterator& p); | |
646 | void decode(bufferlist& bl) { | |
647 | bufferlist::iterator p = bl.begin(); | |
648 | decode(p); | |
649 | } | |
3efd9988 | 650 | void sanitize(std::function<bool(int64_t pool)> pool_exists); |
7c673cae FG |
651 | |
652 | void print(ostream& out) const; | |
653 | void print_summary(Formatter *f, ostream *out) const; | |
654 | ||
655 | void dump(Formatter *f) const; | |
656 | static void generate_test_instances(list<MDSMap*>& ls); | |
657 | ||
658 | static bool state_transition_valid(DaemonState prev, DaemonState next); | |
659 | }; | |
660 | WRITE_CLASS_ENCODER_FEATURES(MDSMap::mds_info_t) | |
661 | WRITE_CLASS_ENCODER_FEATURES(MDSMap) | |
662 | ||
663 | inline ostream& operator<<(ostream &out, const MDSMap &m) { | |
664 | m.print_summary(NULL, &out); | |
665 | return out; | |
666 | } | |
667 | ||
668 | #endif |