]> git.proxmox.com Git - ceph.git/blame - ceph/src/mds/MDSMap.h
bump version to 18.2.4-pve3
[ceph.git] / ceph / src / mds / MDSMap.h
CommitLineData
7c673cae
FG
1// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2// vim: ts=8 sw=2 smarttab
3/*
4 * Ceph - scalable distributed file system
5 *
6 * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
7 *
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
12 *
13 */
14
7c673cae
FG
15#ifndef CEPH_MDSMAP_H
16#define CEPH_MDSMAP_H
17
94b18763
FG
18#include <algorithm>
19#include <map>
20#include <set>
21#include <string>
11fdf7f2 22#include <string_view>
94b18763 23
7c673cae
FG
24#include <errno.h>
25
26#include "include/types.h"
9f95a23c 27#include "include/ceph_features.h"
224ce89b 28#include "include/health.h"
9f95a23c
TL
29#include "include/CompatSet.h"
30#include "include/common_fwd.h"
7c673cae 31
9f95a23c
TL
32#include "common/Clock.h"
33#include "common/Formatter.h"
34#include "common/ceph_releases.h"
7c673cae
FG
35#include "common/config.h"
36
7c673cae 37#include "mds/mdstypes.h"
f67539c2 38#include "mds/cephfs_features.h"
7c673cae 39
20effc67
TL
40static inline const auto MDS_FEATURE_INCOMPAT_BASE = CompatSet::Feature(1, "base v0.20");
41static inline const auto MDS_FEATURE_INCOMPAT_CLIENTRANGES = CompatSet::Feature(2, "client writeable ranges");
42static inline const auto MDS_FEATURE_INCOMPAT_FILELAYOUT = CompatSet::Feature(3, "default file layouts on dirs");
43static inline const auto MDS_FEATURE_INCOMPAT_DIRINODE = CompatSet::Feature(4, "dir inode in separate object");
44static inline const auto MDS_FEATURE_INCOMPAT_ENCODING = CompatSet::Feature(5, "mds uses versioned encoding");
45static inline const auto MDS_FEATURE_INCOMPAT_OMAPDIRFRAG = CompatSet::Feature(6, "dirfrag is stored in omap");
46static inline const auto MDS_FEATURE_INCOMPAT_INLINE = CompatSet::Feature(7, "mds uses inline data");
47static inline const auto MDS_FEATURE_INCOMPAT_NOANCHOR = CompatSet::Feature(8, "no anchor table");
48static inline const auto MDS_FEATURE_INCOMPAT_FILE_LAYOUT_V2 = CompatSet::Feature(9, "file layout v2");
49static inline const auto MDS_FEATURE_INCOMPAT_SNAPREALM_V2 = CompatSet::Feature(10, "snaprealm v2");
7c673cae
FG
50
51#define MDS_FS_NAME_DEFAULT "cephfs"
52
f38dd50b
TL
53/*
54 * Maximum size of xattrs the MDS can handle per inode by default. This
55 * includes the attribute name and 4+4 bytes for the key/value sizes.
56 */
57#define MDS_MAX_XATTR_SIZE (1<<16) /* 64K */
58
9f95a23c
TL
59class health_check_map_t;
60
7c673cae
FG
61class MDSMap {
62public:
63 /* These states are the union of the set of possible states of an MDS daemon,
11fdf7f2 64 * and the set of possible states of an MDS rank. See
2a845540
TL
65 * doc/cephfs/mds-states.rst for state descriptions and a visual state diagram, and
66 * doc/cephfs/mds-state-diagram.dot to update the diagram.
11fdf7f2 67 */
7c673cae
FG
68 typedef enum {
69 // States of an MDS daemon not currently holding a rank
70 // ====================================================
71 STATE_NULL = CEPH_MDS_STATE_NULL, // null value for fns returning this type.
72 STATE_BOOT = CEPH_MDS_STATE_BOOT, // up, boot announcement. destiny unknown.
73 STATE_STANDBY = CEPH_MDS_STATE_STANDBY, // up, idle. waiting for assignment by monitor.
7c673cae
FG
74
75 // States of an MDS rank, and of any MDS daemon holding that rank
76 // ==============================================================
2a845540 77 STATE_STANDBY_REPLAY = CEPH_MDS_STATE_STANDBY_REPLAY, // up, replaying active node, ready to take over and not serving clients. Note: Up to two MDS hold the rank being replayed.
7c673cae
FG
78 STATE_STOPPED = CEPH_MDS_STATE_STOPPED, // down, once existed, but no subtrees. empty log. may not be held by a daemon.
79
80 STATE_CREATING = CEPH_MDS_STATE_CREATING, // up, creating MDS instance (new journal, idalloc..).
81 STATE_STARTING = CEPH_MDS_STATE_STARTING, // up, starting prior stopped MDS instance.
82
83 STATE_REPLAY = CEPH_MDS_STATE_REPLAY, // up, starting prior failed instance. scanning journal.
84 STATE_RESOLVE = CEPH_MDS_STATE_RESOLVE, // up, disambiguating distributed operations (import, rename, etc.)
85 STATE_RECONNECT = CEPH_MDS_STATE_RECONNECT, // up, reconnect to clients
86 STATE_REJOIN = CEPH_MDS_STATE_REJOIN, // up, replayed journal, rejoining distributed cache
87 STATE_CLIENTREPLAY = CEPH_MDS_STATE_CLIENTREPLAY, // up, active
88 STATE_ACTIVE = CEPH_MDS_STATE_ACTIVE, // up, active
89 STATE_STOPPING = CEPH_MDS_STATE_STOPPING, // up, exporting metadata (-> standby or out)
90 STATE_DNE = CEPH_MDS_STATE_DNE, // down, rank does not exist
91
92 // State which a daemon may send to MDSMonitor in its beacon
93 // to indicate that offline repair is required. Daemon must stop
94 // immediately after indicating this state.
95 STATE_DAMAGED = CEPH_MDS_STATE_DAMAGED
96
97 /*
98 * In addition to explicit states, an MDS rank implicitly in state:
99 * - STOPPED if it is not currently associated with an MDS daemon gid but it
100 * is in MDSMap::stopped
101 * - FAILED if it is not currently associated with an MDS daemon gid but it
102 * is in MDSMap::failed
103 * - DNE if it is not currently associated with an MDS daemon gid and it is
104 * missing from both MDSMap::failed and MDSMap::stopped
105 */
106 } DaemonState;
107
9f95a23c
TL
108 typedef enum
109 {
110 AVAILABLE = 0,
111 TRANSIENT_UNAVAILABLE = 1,
112 STUCK_UNAVAILABLE = 2
7c673cae 113
9f95a23c
TL
114 } availability_t;
115
116 struct mds_info_t {
522d829b
TL
117 enum mds_flags : uint64_t {
118 FROZEN = 1 << 0,
119 };
120
11fdf7f2 121 mds_info_t() = default;
7c673cae
FG
122
123 bool laggy() const { return !(laggy_since == utime_t()); }
124 void clear_laggy() { laggy_since = utime_t(); }
125
11fdf7f2
TL
126 bool is_degraded() const {
127 return STATE_REPLAY <= state && state <= STATE_CLIENTREPLAY;
128 }
129
130 void freeze() { flags |= mds_flags::FROZEN; }
131 void unfreeze() { flags &= ~mds_flags::FROZEN; }
132 bool is_frozen() const { return flags&mds_flags::FROZEN; }
133
134 const entity_addrvec_t& get_addrs() const {
135 return addrs;
136 }
7c673cae 137
f67539c2 138 void encode(ceph::buffer::list& bl, uint64_t features) const {
7c673cae
FG
139 if ((features & CEPH_FEATURE_MDSENC) == 0 ) encode_unversioned(bl);
140 else encode_versioned(bl, features);
141 }
f67539c2
TL
142 void decode(ceph::buffer::list::const_iterator& p);
143 void dump(ceph::Formatter *f) const;
9f95a23c 144 void dump(std::ostream&) const;
d2e6a577
FG
145
146 // The long form name for use in cluster log messages`
147 std::string human_name() const;
148
9f95a23c
TL
149 static void generate_test_instances(std::list<mds_info_t*>& ls);
150
151 mds_gid_t global_id = MDS_GID_NONE;
152 std::string name;
153 mds_rank_t rank = MDS_RANK_NONE;
154 int32_t inc = 0;
155 MDSMap::DaemonState state = STATE_STANDBY;
156 version_t state_seq = 0;
157 entity_addrvec_t addrs;
158 utime_t laggy_since;
159 std::set<mds_rank_t> export_targets;
160 fs_cluster_id_t join_fscid = FS_CLUSTER_ID_NONE;
161 uint64_t mds_features = 0;
162 uint64_t flags = 0;
522d829b 163 CompatSet compat;
7c673cae 164 private:
f67539c2
TL
165 void encode_versioned(ceph::buffer::list& bl, uint64_t features) const;
166 void encode_unversioned(ceph::buffer::list& bl) const;
7c673cae
FG
167 };
168
7c673cae
FG
169 friend class MDSMonitor;
170 friend class Filesystem;
171 friend class FSMap;
172
9f95a23c
TL
173 static CompatSet get_compat_set_all();
174 static CompatSet get_compat_set_default();
175 static CompatSet get_compat_set_base(); // pre v0.20
a4b75251 176 static CompatSet get_compat_set_v16_2_4(); // pre-v16.2.5 CompatSet in MDS beacon
9f95a23c 177
522d829b
TL
178 static MDSMap create_null_mdsmap() {
179 MDSMap null_map;
180 /* Use the largest epoch so it's always bigger than whatever the MDS has. */
181 null_map.epoch = std::numeric_limits<decltype(epoch)>::max();
182 return null_map;
183 }
184
7c673cae
FG
185 bool get_inline_data_enabled() const { return inline_data_enabled; }
186 void set_inline_data_enabled(bool enabled) { inline_data_enabled = enabled; }
187
188 utime_t get_session_timeout() const {
189 return utime_t(session_timeout,0);
190 }
f64942e4
AA
191 void set_session_timeout(uint32_t t) {
192 session_timeout = t;
193 }
b32b8144
FG
194
195 utime_t get_session_autoclose() const {
196 return utime_t(session_autoclose, 0);
197 }
f64942e4
AA
198 void set_session_autoclose(uint32_t t) {
199 session_autoclose = t;
200 }
b32b8144 201
7c673cae
FG
202 uint64_t get_max_filesize() const { return max_file_size; }
203 void set_max_filesize(uint64_t m) { max_file_size = m; }
11fdf7f2 204
f38dd50b
TL
205 uint64_t get_max_xattr_size() const { return max_xattr_size; }
206 void set_max_xattr_size(uint64_t m) { max_xattr_size = m; }
207
f67539c2
TL
208 void set_min_compat_client(ceph_release_t version);
209
210 void add_required_client_feature(size_t bit) {
211 required_client_features.insert(bit);
212 }
213 void remove_required_client_feature(size_t bit) {
214 required_client_features.erase(bit);
215 }
216 const auto& get_required_client_features() const {
217 return required_client_features;
218 }
7c673cae
FG
219
220 int get_flags() const { return flags; }
221 bool test_flag(int f) const { return flags & f; }
222 void set_flag(int f) { flags |= f; }
223 void clear_flag(int f) { flags &= ~f; }
224
11fdf7f2 225 std::string_view get_fs_name() const {return fs_name;}
20effc67 226 void set_fs_name(std::string new_fs_name) { fs_name = std::move(new_fs_name); }
7c673cae
FG
227
228 void set_snaps_allowed() {
229 set_flag(CEPH_MDSMAP_ALLOW_SNAPS);
230 ever_allowed_features |= CEPH_MDSMAP_ALLOW_SNAPS;
231 explicitly_allowed_features |= CEPH_MDSMAP_ALLOW_SNAPS;
232 }
233 void clear_snaps_allowed() { clear_flag(CEPH_MDSMAP_ALLOW_SNAPS); }
234 bool allows_snaps() const { return test_flag(CEPH_MDSMAP_ALLOW_SNAPS); }
11fdf7f2 235 bool was_snaps_ever_allowed() const { return ever_allowed_features & CEPH_MDSMAP_ALLOW_SNAPS; }
7c673cae 236
11fdf7f2
TL
237 void set_standby_replay_allowed() {
238 set_flag(CEPH_MDSMAP_ALLOW_STANDBY_REPLAY);
239 ever_allowed_features |= CEPH_MDSMAP_ALLOW_STANDBY_REPLAY;
240 explicitly_allowed_features |= CEPH_MDSMAP_ALLOW_STANDBY_REPLAY;
7c673cae 241 }
11fdf7f2
TL
242 void clear_standby_replay_allowed() { clear_flag(CEPH_MDSMAP_ALLOW_STANDBY_REPLAY); }
243 bool allows_standby_replay() const { return test_flag(CEPH_MDSMAP_ALLOW_STANDBY_REPLAY); }
244 bool was_standby_replay_ever_allowed() const { return ever_allowed_features & CEPH_MDSMAP_ALLOW_STANDBY_REPLAY; }
7c673cae 245
f38dd50b
TL
246 void set_balance_automate() {
247 set_flag(CEPH_MDSMAP_BALANCE_AUTOMATE);
248 ever_allowed_features |= CEPH_MDSMAP_BALANCE_AUTOMATE;
249 explicitly_allowed_features |= CEPH_MDSMAP_BALANCE_AUTOMATE;
250 }
251 void clear_balance_automate() { clear_flag(CEPH_MDSMAP_BALANCE_AUTOMATE); }
252 bool allows_balance_automate() const { return test_flag(CEPH_MDSMAP_BALANCE_AUTOMATE); }
253 bool was_balance_automate_ever_allowed() const { return ever_allowed_features & CEPH_MDSMAP_BALANCE_AUTOMATE; }
254
11fdf7f2
TL
255 void set_multimds_snaps_allowed() {
256 set_flag(CEPH_MDSMAP_ALLOW_MULTIMDS_SNAPS);
257 ever_allowed_features |= CEPH_MDSMAP_ALLOW_MULTIMDS_SNAPS;
258 explicitly_allowed_features |= CEPH_MDSMAP_ALLOW_MULTIMDS_SNAPS;
7c673cae 259 }
11fdf7f2
TL
260 void clear_multimds_snaps_allowed() { clear_flag(CEPH_MDSMAP_ALLOW_MULTIMDS_SNAPS); }
261 bool allows_multimds_snaps() const { return test_flag(CEPH_MDSMAP_ALLOW_MULTIMDS_SNAPS); }
20effc67 262 bool joinable() const { return !test_flag(CEPH_MDSMAP_NOT_JOINABLE); }
7c673cae
FG
263
264 epoch_t get_epoch() const { return epoch; }
265 void inc_epoch() { epoch++; }
266
267 bool get_enabled() const { return enabled; }
268
269 const utime_t& get_created() const { return created; }
270 void set_created(utime_t ct) { modified = created = ct; }
271 const utime_t& get_modified() const { return modified; }
272 void set_modified(utime_t mt) { modified = mt; }
273
274 epoch_t get_last_failure() const { return last_failure; }
275 epoch_t get_last_failure_osd_epoch() const { return last_failure_osd_epoch; }
276
277 mds_rank_t get_max_mds() const { return max_mds; }
278 void set_max_mds(mds_rank_t m) { max_mds = m; }
11fdf7f2
TL
279 void set_old_max_mds() { old_max_mds = max_mds; }
280 mds_rank_t get_old_max_mds() const { return old_max_mds; }
7c673cae
FG
281
282 mds_rank_t get_standby_count_wanted(mds_rank_t standby_daemon_count) const {
11fdf7f2 283 ceph_assert(standby_daemon_count >= 0);
7c673cae
FG
284 std::set<mds_rank_t> s;
285 get_standby_replay_mds_set(s);
286 mds_rank_t standbys_avail = (mds_rank_t)s.size()+standby_daemon_count;
287 mds_rank_t wanted = std::max(0, standby_count_wanted);
288 return wanted > standbys_avail ? wanted - standbys_avail : 0;
289 }
290 void set_standby_count_wanted(mds_rank_t n) { standby_count_wanted = n; }
291 bool check_health(mds_rank_t standby_daemon_count);
292
293 const std::string get_balancer() const { return balancer; }
294 void set_balancer(std::string val) { balancer.assign(val); }
295
1e59de90
TL
296 const std::bitset<MAX_MDS>& get_bal_rank_mask_bitset() const;
297 void set_bal_rank_mask(std::string val);
298 unsigned get_num_mdss_in_rank_mask_bitset() const { return num_mdss_in_rank_mask_bitset; }
299 void update_num_mdss_in_rank_mask_bitset();
300 int hex2bin(std::string hex_string, std::string &bin_string, unsigned int max_bits, std::ostream& ss) const;
301
302 typedef enum
303 {
304 BAL_RANK_MASK_TYPE_ANY = 0,
305 BAL_RANK_MASK_TYPE_ALL = 1,
306 BAL_RANK_MASK_TYPE_NONE = 2,
307 } bal_rank_mask_type_t;
308
309 const bool check_special_bal_rank_mask(std::string val, bal_rank_mask_type_t type) const;
310
7c673cae
FG
311 mds_rank_t get_tableserver() const { return tableserver; }
312 mds_rank_t get_root() const { return root; }
313
31f18b77 314 const std::vector<int64_t> &get_data_pools() const { return data_pools; }
7c673cae
FG
315 int64_t get_first_data_pool() const { return *data_pools.begin(); }
316 int64_t get_metadata_pool() const { return metadata_pool; }
317 bool is_data_pool(int64_t poolid) const {
c07f9fc5
FG
318 auto p = std::find(data_pools.begin(), data_pools.end(), poolid);
319 if (p == data_pools.end())
320 return false;
321 return true;
7c673cae
FG
322 }
323
324 bool pool_in_use(int64_t poolid) const {
325 return get_enabled() && (is_data_pool(poolid) || metadata_pool == poolid);
326 }
327
9f95a23c
TL
328 const auto& get_mds_info() const { return mds_info; }
329 const auto& get_mds_info_gid(mds_gid_t gid) const {
7c673cae
FG
330 return mds_info.at(gid);
331 }
332 const mds_info_t& get_mds_info(mds_rank_t m) const {
11fdf7f2 333 ceph_assert(up.count(m) && mds_info.count(up.at(m)));
7c673cae
FG
334 return mds_info.at(up.at(m));
335 }
9f95a23c 336 mds_gid_t find_mds_gid_by_name(std::string_view s) const;
7c673cae
FG
337
338 // counts
339 unsigned get_num_in_mds() const {
340 return in.size();
341 }
342 unsigned get_num_up_mds() const {
343 return up.size();
344 }
31f18b77
FG
345 mds_rank_t get_last_in_mds() const {
346 auto p = in.rbegin();
347 return p == in.rend() ? MDS_RANK_NONE : *p;
348 }
7c673cae
FG
349 int get_num_failed_mds() const {
350 return failed.size();
351 }
f67539c2
TL
352 unsigned get_num_standby_replay_mds() const {
353 unsigned num = 0;
354 for (auto& i : mds_info) {
355 if (i.second.state == MDSMap::STATE_STANDBY_REPLAY) {
356 ++num;
357 }
358 }
359 return num;
360 }
9f95a23c 361 unsigned get_num_mds(int state) const;
7c673cae
FG
362 // data pools
363 void add_data_pool(int64_t poolid) {
31f18b77 364 data_pools.push_back(poolid);
7c673cae
FG
365 }
366 int remove_data_pool(int64_t poolid) {
31f18b77 367 std::vector<int64_t>::iterator p = std::find(data_pools.begin(), data_pools.end(), poolid);
7c673cae 368 if (p == data_pools.end())
f67539c2 369 return -CEPHFS_ENOENT;
7c673cae
FG
370 data_pools.erase(p);
371 return 0;
372 }
373
374 // sets
375 void get_mds_set(std::set<mds_rank_t>& s) const {
376 s = in;
377 }
9f95a23c 378 void get_up_mds_set(std::set<mds_rank_t>& s) const;
7c673cae
FG
379 void get_active_mds_set(std::set<mds_rank_t>& s) const {
380 get_mds_set(s, MDSMap::STATE_ACTIVE);
381 }
382 void get_standby_replay_mds_set(std::set<mds_rank_t>& s) const {
383 get_mds_set(s, MDSMap::STATE_STANDBY_REPLAY);
384 }
385 void get_failed_mds_set(std::set<mds_rank_t>& s) const {
386 s = failed;
387 }
f67539c2
TL
388 void get_damaged_mds_set(std::set<mds_rank_t>& s) const {
389 s = damaged;
390 }
7c673cae
FG
391
392 // features
9f95a23c 393 uint64_t get_up_features();
7c673cae
FG
394
395 /**
396 * Get MDS ranks which are in but not up.
397 */
398 void get_down_mds_set(std::set<mds_rank_t> *s) const
399 {
11fdf7f2 400 ceph_assert(s != NULL);
7c673cae
FG
401 s->insert(failed.begin(), failed.end());
402 s->insert(damaged.begin(), damaged.end());
403 }
404
405 int get_failed() const {
406 if (!failed.empty()) return *failed.begin();
407 return -1;
408 }
409 void get_stopped_mds_set(std::set<mds_rank_t>& s) const {
410 s = stopped;
411 }
9f95a23c
TL
412 void get_recovery_mds_set(std::set<mds_rank_t>& s) const;
413
414 void get_mds_set_lower_bound(std::set<mds_rank_t>& s, DaemonState first) const;
415 void get_mds_set(std::set<mds_rank_t>& s, DaemonState state) const;
7c673cae 416
f67539c2
TL
417 void get_health(std::list<std::pair<health_status_t,std::string> >& summary,
418 std::list<std::pair<health_status_t,std::string> > *detail) const;
7c673cae 419
224ce89b
WB
420 void get_health_checks(health_check_map_t *checks) const;
421
7c673cae
FG
422 /**
423 * Return indication of whether cluster is available. This is a
424 * heuristic for clients to see if they should bother waiting to talk to
425 * MDSs, or whether they should error out at startup/mount.
426 *
427 * A TRANSIENT_UNAVAILABLE result indicates that the cluster is in a
428 * transition state like replaying, or is potentially about the fail over.
429 * Clients should wait for an updated map before making a final decision
430 * about whether the filesystem is mountable.
431 *
432 * A STUCK_UNAVAILABLE result indicates that we can't see a way that
433 * the cluster is about to recover on its own, so it'll probably require
11fdf7f2 434 * administrator intervention: clients should probably not bother trying
7c673cae
FG
435 * to mount.
436 */
437 availability_t is_cluster_available() const;
438
11fdf7f2
TL
439 /**
440 * Return whether this MDSMap is suitable for resizing based on the state
441 * of the ranks.
442 */
443 bool is_resizeable() const {
444 return !is_degraded() &&
445 get_num_mds(CEPH_MDS_STATE_CREATING) == 0 &&
446 get_num_mds(CEPH_MDS_STATE_STARTING) == 0 &&
447 get_num_mds(CEPH_MDS_STATE_STOPPING) == 0;
448 }
449
7c673cae
FG
450 // mds states
451 bool is_down(mds_rank_t m) const { return up.count(m) == 0; }
452 bool is_up(mds_rank_t m) const { return up.count(m); }
453 bool is_in(mds_rank_t m) const { return up.count(m) || failed.count(m); }
454 bool is_out(mds_rank_t m) const { return !is_in(m); }
455
456 bool is_failed(mds_rank_t m) const { return failed.count(m); }
457 bool is_stopped(mds_rank_t m) const { return stopped.count(m); }
458
459 bool is_dne(mds_rank_t m) const { return in.count(m) == 0; }
460 bool is_dne_gid(mds_gid_t gid) const { return mds_info.count(gid) == 0; }
461
462 /**
9f95a23c 463 * Get MDS daemon status by GID
7c673cae 464 */
9f95a23c
TL
465 auto get_state_gid(mds_gid_t gid) const {
466 auto it = mds_info.find(gid);
467 if (it == mds_info.end())
7c673cae 468 return STATE_NULL;
9f95a23c 469 return it->second.state;
7c673cae
FG
470 }
471
472 /**
9f95a23c 473 * Get MDS rank state if the rank is up, else STATE_NULL
7c673cae 474 */
9f95a23c
TL
475 auto get_state(mds_rank_t m) const {
476 auto it = up.find(m);
477 if (it == up.end())
7c673cae 478 return STATE_NULL;
9f95a23c 479 return get_state_gid(it->second);
7c673cae
FG
480 }
481
a4b75251
TL
482 auto get_gid(mds_rank_t r) const {
483 return up.at(r);
484 }
9f95a23c 485 const auto& get_info(mds_rank_t m) const {
7c673cae
FG
486 return mds_info.at(up.at(m));
487 }
9f95a23c 488 const auto& get_info_gid(mds_gid_t gid) const {
7c673cae
FG
489 return mds_info.at(gid);
490 }
491
492 bool is_boot(mds_rank_t m) const { return get_state(m) == STATE_BOOT; }
20effc67
TL
493 bool is_bootstrapping(mds_rank_t m) const {
494 return is_creating(m) || is_starting(m) || is_replay(m);
495 }
7c673cae
FG
496 bool is_creating(mds_rank_t m) const { return get_state(m) == STATE_CREATING; }
497 bool is_starting(mds_rank_t m) const { return get_state(m) == STATE_STARTING; }
498 bool is_replay(mds_rank_t m) const { return get_state(m) == STATE_REPLAY; }
499 bool is_resolve(mds_rank_t m) const { return get_state(m) == STATE_RESOLVE; }
500 bool is_reconnect(mds_rank_t m) const { return get_state(m) == STATE_RECONNECT; }
501 bool is_rejoin(mds_rank_t m) const { return get_state(m) == STATE_REJOIN; }
502 bool is_clientreplay(mds_rank_t m) const { return get_state(m) == STATE_CLIENTREPLAY; }
503 bool is_active(mds_rank_t m) const { return get_state(m) == STATE_ACTIVE; }
504 bool is_stopping(mds_rank_t m) const { return get_state(m) == STATE_STOPPING; }
505 bool is_active_or_stopping(mds_rank_t m) const {
506 return is_active(m) || is_stopping(m);
507 }
508 bool is_clientreplay_or_active_or_stopping(mds_rank_t m) const {
509 return is_clientreplay(m) || is_active(m) || is_stopping(m);
510 }
511
9f95a23c 512 mds_gid_t get_standby_replay(mds_rank_t r) const;
11fdf7f2
TL
513 bool has_standby_replay(mds_rank_t r) const {
514 return get_standby_replay(r) != MDS_GID_NONE;
515 }
516
517 bool is_followable(mds_rank_t r) const {
518 if (auto it1 = up.find(r); it1 != up.end()) {
519 if (auto it2 = mds_info.find(it1->second); it2 != mds_info.end()) {
520 auto& info = it2->second;
521 if (!info.is_degraded() && !has_standby_replay(r)) {
522 return true;
523 }
524 }
525 }
526 return false;
7c673cae
FG
527 }
528
529 bool is_laggy_gid(mds_gid_t gid) const {
11fdf7f2
TL
530 auto it = mds_info.find(gid);
531 return it == mds_info.end() ? false : it->second.laggy();
7c673cae
FG
532 }
533
534 // degraded = some recovery in process. fixes active membership and
535 // recovery_set.
9f95a23c 536 bool is_degraded() const;
7c673cae 537 bool is_any_failed() const {
f67539c2
TL
538 return !failed.empty();
539 }
540 bool is_any_damaged() const {
541 return !damaged.empty();
7c673cae
FG
542 }
543 bool is_resolving() const {
544 return
545 get_num_mds(STATE_RESOLVE) > 0 &&
546 get_num_mds(STATE_REPLAY) == 0 &&
547 failed.empty() && damaged.empty();
548 }
549 bool is_rejoining() const {
550 // nodes are rejoining cache state
551 return
552 get_num_mds(STATE_REJOIN) > 0 &&
553 get_num_mds(STATE_REPLAY) == 0 &&
554 get_num_mds(STATE_RECONNECT) == 0 &&
555 get_num_mds(STATE_RESOLVE) == 0 &&
556 failed.empty() && damaged.empty();
557 }
558 bool is_stopped() const {
559 return up.empty();
560 }
561
562 /**
563 * Get whether a rank is 'up', i.e. has
564 * an MDS daemon's entity_inst_t associated
565 * with it.
566 */
567 bool have_inst(mds_rank_t m) const {
568 return up.count(m);
569 }
570
571 /**
572 * Get the MDS daemon entity_inst_t for a rank
573 * known to be up.
574 */
11fdf7f2
TL
575 entity_addrvec_t get_addrs(mds_rank_t m) const {
576 return mds_info.at(up.at(m)).get_addrs();
7c673cae
FG
577 }
578
7c673cae
FG
579 mds_rank_t get_rank_gid(mds_gid_t gid) const {
580 if (mds_info.count(gid)) {
581 return mds_info.at(gid).rank;
582 } else {
583 return MDS_RANK_NONE;
584 }
585 }
586
f64942e4
AA
587 /**
588 * Get MDS rank incarnation if the rank is up, else -1
589 */
590 mds_gid_t get_incarnation(mds_rank_t m) const {
a4b75251
TL
591 auto it = up.find(m);
592 if (it == up.end())
f64942e4 593 return MDS_GID_NONE;
a4b75251 594 return (mds_gid_t)get_inc_gid(it->second);
f64942e4
AA
595 }
596
7c673cae
FG
597 int get_inc_gid(mds_gid_t gid) const {
598 auto mds_info_entry = mds_info.find(gid);
599 if (mds_info_entry != mds_info.end())
600 return mds_info_entry->second.inc;
601 return -1;
602 }
f67539c2
TL
603 void encode(ceph::buffer::list& bl, uint64_t features) const;
604 void decode(ceph::buffer::list::const_iterator& p);
605 void decode(const ceph::buffer::list& bl) {
11fdf7f2 606 auto p = bl.cbegin();
7c673cae
FG
607 decode(p);
608 }
11fdf7f2 609 void sanitize(const std::function<bool(int64_t pool)>& pool_exists);
7c673cae 610
f67539c2
TL
611 void print(std::ostream& out) const;
612 void print_summary(ceph::Formatter *f, std::ostream *out) const;
20effc67 613 void print_flags(std::ostream& out) const;
7c673cae 614
f67539c2 615 void dump(ceph::Formatter *f) const;
20effc67 616 void dump_flags_state(Formatter *f) const;
9f95a23c 617 static void generate_test_instances(std::list<MDSMap*>& ls);
7c673cae
FG
618
619 static bool state_transition_valid(DaemonState prev, DaemonState next);
9f95a23c
TL
620
621 CompatSet compat;
622protected:
623 // base map
624 epoch_t epoch = 0;
625 bool enabled = false;
626 std::string fs_name = MDS_FS_NAME_DEFAULT;
627 uint32_t flags = CEPH_MDSMAP_DEFAULTS; // flags
628 epoch_t last_failure = 0; // mds epoch of last failure
629 epoch_t last_failure_osd_epoch = 0; // osd epoch of last failure; any mds entering replay needs
f67539c2 630 // at least this osdmap to ensure the blocklist propagates.
9f95a23c
TL
631 utime_t created;
632 utime_t modified;
633
634 mds_rank_t tableserver = 0; // which MDS has snaptable
635 mds_rank_t root = 0; // which MDS has root directory
636
637 __u32 session_timeout = 60;
638 __u32 session_autoclose = 300;
639 uint64_t max_file_size = 1ULL<<40; /* 1TB */
640
f38dd50b
TL
641 uint64_t max_xattr_size = MDS_MAX_XATTR_SIZE;
642
f67539c2 643 feature_bitset_t required_client_features;
9f95a23c
TL
644
645 std::vector<int64_t> data_pools; // file data pools available to clients (via an ioctl). first is the default.
646 int64_t cas_pool = -1; // where CAS objects go
647 int64_t metadata_pool = -1; // where fs metadata objects go
648
649 /*
650 * in: the set of logical mds #'s that define the cluster. this is the set
651 * of mds's the metadata may be distributed over.
652 * up: map from logical mds #'s to the addrs filling those roles.
653 * failed: subset of @in that are failed.
654 * stopped: set of nodes that have been initialized, but are not active.
655 *
656 * @up + @failed = @in. @in * @stopped = {}.
657 */
658
659 mds_rank_t max_mds = 1; /* The maximum number of active MDSes. Also, the maximum rank. */
660 mds_rank_t old_max_mds = 0; /* Value to restore when MDS cluster is marked up */
661 mds_rank_t standby_count_wanted = -1;
f67539c2 662 std::string balancer; /* The name/version of the mantle balancer (i.e. the rados obj name) */
9f95a23c 663
1e59de90
TL
664 std::string bal_rank_mask = "-1";
665 std::bitset<MAX_MDS> bal_rank_mask_bitset;
666 uint32_t num_mdss_in_rank_mask_bitset;
667
9f95a23c
TL
668 std::set<mds_rank_t> in; // currently defined cluster
669
670 // which ranks are failed, stopped, damaged (i.e. not held by a daemon)
671 std::set<mds_rank_t> failed, stopped, damaged;
672 std::map<mds_rank_t, mds_gid_t> up; // who is in those roles
673 std::map<mds_gid_t, mds_info_t> mds_info;
674
675 uint8_t ever_allowed_features = 0; //< bitmap of features the cluster has allowed
676 uint8_t explicitly_allowed_features = 0; //< bitmap of features explicitly enabled
677
678 bool inline_data_enabled = false;
679
680 uint64_t cached_up_features = 0;
20effc67
TL
681private:
682 inline static const std::map<int, std::string> flag_display = {
683 {CEPH_MDSMAP_NOT_JOINABLE, "joinable"}, //inverse for user display
684 {CEPH_MDSMAP_ALLOW_SNAPS, "allow_snaps"},
685 {CEPH_MDSMAP_ALLOW_MULTIMDS_SNAPS, "allow_multimds_snaps"},
1e59de90 686 {CEPH_MDSMAP_ALLOW_STANDBY_REPLAY, "allow_standby_replay"},
f38dd50b
TL
687 {CEPH_MDSMAP_REFUSE_CLIENT_SESSION, "refuse_client_session"},
688 {CEPH_MDSMAP_REFUSE_STANDBY_FOR_ANOTHER_FS, "refuse_standby_for_another_fs"},
689 {CEPH_MDSMAP_BALANCE_AUTOMATE, "balance_automate"}
20effc67 690 };
7c673cae
FG
691};
692WRITE_CLASS_ENCODER_FEATURES(MDSMap::mds_info_t)
693WRITE_CLASS_ENCODER_FEATURES(MDSMap)
694
f67539c2 695inline std::ostream& operator<<(std::ostream &out, const MDSMap &m) {
7c673cae
FG
696 m.print_summary(NULL, &out);
697 return out;
698}
699
9f95a23c
TL
700inline std::ostream& operator<<(std::ostream& o, const MDSMap::mds_info_t& info) {
701 info.dump(o);
702 return o;
703}
7c673cae 704#endif