]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
2 | // vim: ts=8 sw=2 smarttab | |
3 | /* | |
4 | * Ceph - scalable distributed file system | |
5 | * | |
6 | * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net> | |
7 | * | |
8 | * This is free software; you can redistribute it and/or | |
9 | * modify it under the terms of the GNU Lesser General Public | |
10 | * License version 2.1, as published by the Free Software | |
11 | * Foundation. See file COPYING. | |
12 | * | |
13 | */ | |
14 | ||
7c673cae FG |
15 | #ifndef CEPH_MDSMAP_H |
16 | #define CEPH_MDSMAP_H | |
17 | ||
94b18763 FG |
18 | #include <algorithm> |
19 | #include <map> | |
20 | #include <set> | |
21 | #include <string> | |
11fdf7f2 | 22 | #include <string_view> |
94b18763 | 23 | |
7c673cae FG |
24 | #include <errno.h> |
25 | ||
26 | #include "include/types.h" | |
9f95a23c | 27 | #include "include/ceph_features.h" |
224ce89b | 28 | #include "include/health.h" |
9f95a23c TL |
29 | #include "include/CompatSet.h" |
30 | #include "include/common_fwd.h" | |
7c673cae | 31 | |
9f95a23c TL |
32 | #include "common/Clock.h" |
33 | #include "common/Formatter.h" | |
34 | #include "common/ceph_releases.h" | |
7c673cae FG |
35 | #include "common/config.h" |
36 | ||
7c673cae | 37 | #include "mds/mdstypes.h" |
f67539c2 | 38 | #include "mds/cephfs_features.h" |
7c673cae | 39 | |
20effc67 TL |
40 | static inline const auto MDS_FEATURE_INCOMPAT_BASE = CompatSet::Feature(1, "base v0.20"); |
41 | static inline const auto MDS_FEATURE_INCOMPAT_CLIENTRANGES = CompatSet::Feature(2, "client writeable ranges"); | |
42 | static inline const auto MDS_FEATURE_INCOMPAT_FILELAYOUT = CompatSet::Feature(3, "default file layouts on dirs"); | |
43 | static inline const auto MDS_FEATURE_INCOMPAT_DIRINODE = CompatSet::Feature(4, "dir inode in separate object"); | |
44 | static inline const auto MDS_FEATURE_INCOMPAT_ENCODING = CompatSet::Feature(5, "mds uses versioned encoding"); | |
45 | static inline const auto MDS_FEATURE_INCOMPAT_OMAPDIRFRAG = CompatSet::Feature(6, "dirfrag is stored in omap"); | |
46 | static inline const auto MDS_FEATURE_INCOMPAT_INLINE = CompatSet::Feature(7, "mds uses inline data"); | |
47 | static inline const auto MDS_FEATURE_INCOMPAT_NOANCHOR = CompatSet::Feature(8, "no anchor table"); | |
48 | static inline const auto MDS_FEATURE_INCOMPAT_FILE_LAYOUT_V2 = CompatSet::Feature(9, "file layout v2"); | |
49 | static inline const auto MDS_FEATURE_INCOMPAT_SNAPREALM_V2 = CompatSet::Feature(10, "snaprealm v2"); | |
7c673cae FG |
50 | |
51 | #define MDS_FS_NAME_DEFAULT "cephfs" | |
52 | ||
f38dd50b TL |
53 | /* |
54 | * Maximum size of xattrs the MDS can handle per inode by default. This | |
55 | * includes the attribute name and 4+4 bytes for the key/value sizes. | |
56 | */ | |
57 | #define MDS_MAX_XATTR_SIZE (1<<16) /* 64K */ | |
58 | ||
9f95a23c TL |
59 | class health_check_map_t; |
60 | ||
7c673cae FG |
61 | class MDSMap { |
62 | public: | |
63 | /* These states are the union of the set of possible states of an MDS daemon, | |
11fdf7f2 | 64 | * and the set of possible states of an MDS rank. See |
2a845540 TL |
65 | * doc/cephfs/mds-states.rst for state descriptions and a visual state diagram, and |
66 | * doc/cephfs/mds-state-diagram.dot to update the diagram. | |
11fdf7f2 | 67 | */ |
7c673cae FG |
68 | typedef enum { |
69 | // States of an MDS daemon not currently holding a rank | |
70 | // ==================================================== | |
71 | STATE_NULL = CEPH_MDS_STATE_NULL, // null value for fns returning this type. | |
72 | STATE_BOOT = CEPH_MDS_STATE_BOOT, // up, boot announcement. destiny unknown. | |
73 | STATE_STANDBY = CEPH_MDS_STATE_STANDBY, // up, idle. waiting for assignment by monitor. | |
7c673cae FG |
74 | |
75 | // States of an MDS rank, and of any MDS daemon holding that rank | |
76 | // ============================================================== | |
2a845540 | 77 | STATE_STANDBY_REPLAY = CEPH_MDS_STATE_STANDBY_REPLAY, // up, replaying active node, ready to take over and not serving clients. Note: Up to two MDS hold the rank being replayed. |
7c673cae FG |
78 | STATE_STOPPED = CEPH_MDS_STATE_STOPPED, // down, once existed, but no subtrees. empty log. may not be held by a daemon. |
79 | ||
80 | STATE_CREATING = CEPH_MDS_STATE_CREATING, // up, creating MDS instance (new journal, idalloc..). | |
81 | STATE_STARTING = CEPH_MDS_STATE_STARTING, // up, starting prior stopped MDS instance. | |
82 | ||
83 | STATE_REPLAY = CEPH_MDS_STATE_REPLAY, // up, starting prior failed instance. scanning journal. | |
84 | STATE_RESOLVE = CEPH_MDS_STATE_RESOLVE, // up, disambiguating distributed operations (import, rename, etc.) | |
85 | STATE_RECONNECT = CEPH_MDS_STATE_RECONNECT, // up, reconnect to clients | |
86 | STATE_REJOIN = CEPH_MDS_STATE_REJOIN, // up, replayed journal, rejoining distributed cache | |
87 | STATE_CLIENTREPLAY = CEPH_MDS_STATE_CLIENTREPLAY, // up, active | |
88 | STATE_ACTIVE = CEPH_MDS_STATE_ACTIVE, // up, active | |
89 | STATE_STOPPING = CEPH_MDS_STATE_STOPPING, // up, exporting metadata (-> standby or out) | |
90 | STATE_DNE = CEPH_MDS_STATE_DNE, // down, rank does not exist | |
91 | ||
92 | // State which a daemon may send to MDSMonitor in its beacon | |
93 | // to indicate that offline repair is required. Daemon must stop | |
94 | // immediately after indicating this state. | |
95 | STATE_DAMAGED = CEPH_MDS_STATE_DAMAGED | |
96 | ||
97 | /* | |
98 | * In addition to explicit states, an MDS rank implicitly in state: | |
99 | * - STOPPED if it is not currently associated with an MDS daemon gid but it | |
100 | * is in MDSMap::stopped | |
101 | * - FAILED if it is not currently associated with an MDS daemon gid but it | |
102 | * is in MDSMap::failed | |
103 | * - DNE if it is not currently associated with an MDS daemon gid and it is | |
104 | * missing from both MDSMap::failed and MDSMap::stopped | |
105 | */ | |
106 | } DaemonState; | |
107 | ||
9f95a23c TL |
108 | typedef enum |
109 | { | |
110 | AVAILABLE = 0, | |
111 | TRANSIENT_UNAVAILABLE = 1, | |
112 | STUCK_UNAVAILABLE = 2 | |
7c673cae | 113 | |
9f95a23c TL |
114 | } availability_t; |
115 | ||
116 | struct mds_info_t { | |
522d829b TL |
117 | enum mds_flags : uint64_t { |
118 | FROZEN = 1 << 0, | |
119 | }; | |
120 | ||
11fdf7f2 | 121 | mds_info_t() = default; |
7c673cae FG |
122 | |
123 | bool laggy() const { return !(laggy_since == utime_t()); } | |
124 | void clear_laggy() { laggy_since = utime_t(); } | |
125 | ||
11fdf7f2 TL |
126 | bool is_degraded() const { |
127 | return STATE_REPLAY <= state && state <= STATE_CLIENTREPLAY; | |
128 | } | |
129 | ||
130 | void freeze() { flags |= mds_flags::FROZEN; } | |
131 | void unfreeze() { flags &= ~mds_flags::FROZEN; } | |
132 | bool is_frozen() const { return flags&mds_flags::FROZEN; } | |
133 | ||
134 | const entity_addrvec_t& get_addrs() const { | |
135 | return addrs; | |
136 | } | |
7c673cae | 137 | |
f67539c2 | 138 | void encode(ceph::buffer::list& bl, uint64_t features) const { |
7c673cae FG |
139 | if ((features & CEPH_FEATURE_MDSENC) == 0 ) encode_unversioned(bl); |
140 | else encode_versioned(bl, features); | |
141 | } | |
f67539c2 TL |
142 | void decode(ceph::buffer::list::const_iterator& p); |
143 | void dump(ceph::Formatter *f) const; | |
9f95a23c | 144 | void dump(std::ostream&) const; |
d2e6a577 FG |
145 | |
146 | // The long form name for use in cluster log messages` | |
147 | std::string human_name() const; | |
148 | ||
9f95a23c TL |
149 | static void generate_test_instances(std::list<mds_info_t*>& ls); |
150 | ||
151 | mds_gid_t global_id = MDS_GID_NONE; | |
152 | std::string name; | |
153 | mds_rank_t rank = MDS_RANK_NONE; | |
154 | int32_t inc = 0; | |
155 | MDSMap::DaemonState state = STATE_STANDBY; | |
156 | version_t state_seq = 0; | |
157 | entity_addrvec_t addrs; | |
158 | utime_t laggy_since; | |
159 | std::set<mds_rank_t> export_targets; | |
160 | fs_cluster_id_t join_fscid = FS_CLUSTER_ID_NONE; | |
161 | uint64_t mds_features = 0; | |
162 | uint64_t flags = 0; | |
522d829b | 163 | CompatSet compat; |
7c673cae | 164 | private: |
f67539c2 TL |
165 | void encode_versioned(ceph::buffer::list& bl, uint64_t features) const; |
166 | void encode_unversioned(ceph::buffer::list& bl) const; | |
7c673cae FG |
167 | }; |
168 | ||
7c673cae FG |
169 | friend class MDSMonitor; |
170 | friend class Filesystem; | |
171 | friend class FSMap; | |
172 | ||
9f95a23c TL |
173 | static CompatSet get_compat_set_all(); |
174 | static CompatSet get_compat_set_default(); | |
175 | static CompatSet get_compat_set_base(); // pre v0.20 | |
a4b75251 | 176 | static CompatSet get_compat_set_v16_2_4(); // pre-v16.2.5 CompatSet in MDS beacon |
9f95a23c | 177 | |
522d829b TL |
178 | static MDSMap create_null_mdsmap() { |
179 | MDSMap null_map; | |
180 | /* Use the largest epoch so it's always bigger than whatever the MDS has. */ | |
181 | null_map.epoch = std::numeric_limits<decltype(epoch)>::max(); | |
182 | return null_map; | |
183 | } | |
184 | ||
7c673cae FG |
185 | bool get_inline_data_enabled() const { return inline_data_enabled; } |
186 | void set_inline_data_enabled(bool enabled) { inline_data_enabled = enabled; } | |
187 | ||
188 | utime_t get_session_timeout() const { | |
189 | return utime_t(session_timeout,0); | |
190 | } | |
f64942e4 AA |
191 | void set_session_timeout(uint32_t t) { |
192 | session_timeout = t; | |
193 | } | |
b32b8144 FG |
194 | |
195 | utime_t get_session_autoclose() const { | |
196 | return utime_t(session_autoclose, 0); | |
197 | } | |
f64942e4 AA |
198 | void set_session_autoclose(uint32_t t) { |
199 | session_autoclose = t; | |
200 | } | |
b32b8144 | 201 | |
7c673cae FG |
202 | uint64_t get_max_filesize() const { return max_file_size; } |
203 | void set_max_filesize(uint64_t m) { max_file_size = m; } | |
11fdf7f2 | 204 | |
f38dd50b TL |
205 | uint64_t get_max_xattr_size() const { return max_xattr_size; } |
206 | void set_max_xattr_size(uint64_t m) { max_xattr_size = m; } | |
207 | ||
f67539c2 TL |
208 | void set_min_compat_client(ceph_release_t version); |
209 | ||
210 | void add_required_client_feature(size_t bit) { | |
211 | required_client_features.insert(bit); | |
212 | } | |
213 | void remove_required_client_feature(size_t bit) { | |
214 | required_client_features.erase(bit); | |
215 | } | |
216 | const auto& get_required_client_features() const { | |
217 | return required_client_features; | |
218 | } | |
7c673cae FG |
219 | |
220 | int get_flags() const { return flags; } | |
221 | bool test_flag(int f) const { return flags & f; } | |
222 | void set_flag(int f) { flags |= f; } | |
223 | void clear_flag(int f) { flags &= ~f; } | |
224 | ||
11fdf7f2 | 225 | std::string_view get_fs_name() const {return fs_name;} |
20effc67 | 226 | void set_fs_name(std::string new_fs_name) { fs_name = std::move(new_fs_name); } |
7c673cae FG |
227 | |
228 | void set_snaps_allowed() { | |
229 | set_flag(CEPH_MDSMAP_ALLOW_SNAPS); | |
230 | ever_allowed_features |= CEPH_MDSMAP_ALLOW_SNAPS; | |
231 | explicitly_allowed_features |= CEPH_MDSMAP_ALLOW_SNAPS; | |
232 | } | |
233 | void clear_snaps_allowed() { clear_flag(CEPH_MDSMAP_ALLOW_SNAPS); } | |
234 | bool allows_snaps() const { return test_flag(CEPH_MDSMAP_ALLOW_SNAPS); } | |
11fdf7f2 | 235 | bool was_snaps_ever_allowed() const { return ever_allowed_features & CEPH_MDSMAP_ALLOW_SNAPS; } |
7c673cae | 236 | |
11fdf7f2 TL |
237 | void set_standby_replay_allowed() { |
238 | set_flag(CEPH_MDSMAP_ALLOW_STANDBY_REPLAY); | |
239 | ever_allowed_features |= CEPH_MDSMAP_ALLOW_STANDBY_REPLAY; | |
240 | explicitly_allowed_features |= CEPH_MDSMAP_ALLOW_STANDBY_REPLAY; | |
7c673cae | 241 | } |
11fdf7f2 TL |
242 | void clear_standby_replay_allowed() { clear_flag(CEPH_MDSMAP_ALLOW_STANDBY_REPLAY); } |
243 | bool allows_standby_replay() const { return test_flag(CEPH_MDSMAP_ALLOW_STANDBY_REPLAY); } | |
244 | bool was_standby_replay_ever_allowed() const { return ever_allowed_features & CEPH_MDSMAP_ALLOW_STANDBY_REPLAY; } | |
7c673cae | 245 | |
f38dd50b TL |
246 | void set_balance_automate() { |
247 | set_flag(CEPH_MDSMAP_BALANCE_AUTOMATE); | |
248 | ever_allowed_features |= CEPH_MDSMAP_BALANCE_AUTOMATE; | |
249 | explicitly_allowed_features |= CEPH_MDSMAP_BALANCE_AUTOMATE; | |
250 | } | |
251 | void clear_balance_automate() { clear_flag(CEPH_MDSMAP_BALANCE_AUTOMATE); } | |
252 | bool allows_balance_automate() const { return test_flag(CEPH_MDSMAP_BALANCE_AUTOMATE); } | |
253 | bool was_balance_automate_ever_allowed() const { return ever_allowed_features & CEPH_MDSMAP_BALANCE_AUTOMATE; } | |
254 | ||
11fdf7f2 TL |
255 | void set_multimds_snaps_allowed() { |
256 | set_flag(CEPH_MDSMAP_ALLOW_MULTIMDS_SNAPS); | |
257 | ever_allowed_features |= CEPH_MDSMAP_ALLOW_MULTIMDS_SNAPS; | |
258 | explicitly_allowed_features |= CEPH_MDSMAP_ALLOW_MULTIMDS_SNAPS; | |
7c673cae | 259 | } |
11fdf7f2 TL |
260 | void clear_multimds_snaps_allowed() { clear_flag(CEPH_MDSMAP_ALLOW_MULTIMDS_SNAPS); } |
261 | bool allows_multimds_snaps() const { return test_flag(CEPH_MDSMAP_ALLOW_MULTIMDS_SNAPS); } | |
20effc67 | 262 | bool joinable() const { return !test_flag(CEPH_MDSMAP_NOT_JOINABLE); } |
7c673cae FG |
263 | |
264 | epoch_t get_epoch() const { return epoch; } | |
265 | void inc_epoch() { epoch++; } | |
266 | ||
267 | bool get_enabled() const { return enabled; } | |
268 | ||
269 | const utime_t& get_created() const { return created; } | |
270 | void set_created(utime_t ct) { modified = created = ct; } | |
271 | const utime_t& get_modified() const { return modified; } | |
272 | void set_modified(utime_t mt) { modified = mt; } | |
273 | ||
274 | epoch_t get_last_failure() const { return last_failure; } | |
275 | epoch_t get_last_failure_osd_epoch() const { return last_failure_osd_epoch; } | |
276 | ||
277 | mds_rank_t get_max_mds() const { return max_mds; } | |
278 | void set_max_mds(mds_rank_t m) { max_mds = m; } | |
11fdf7f2 TL |
279 | void set_old_max_mds() { old_max_mds = max_mds; } |
280 | mds_rank_t get_old_max_mds() const { return old_max_mds; } | |
7c673cae FG |
281 | |
282 | mds_rank_t get_standby_count_wanted(mds_rank_t standby_daemon_count) const { | |
11fdf7f2 | 283 | ceph_assert(standby_daemon_count >= 0); |
7c673cae FG |
284 | std::set<mds_rank_t> s; |
285 | get_standby_replay_mds_set(s); | |
286 | mds_rank_t standbys_avail = (mds_rank_t)s.size()+standby_daemon_count; | |
287 | mds_rank_t wanted = std::max(0, standby_count_wanted); | |
288 | return wanted > standbys_avail ? wanted - standbys_avail : 0; | |
289 | } | |
290 | void set_standby_count_wanted(mds_rank_t n) { standby_count_wanted = n; } | |
291 | bool check_health(mds_rank_t standby_daemon_count); | |
292 | ||
293 | const std::string get_balancer() const { return balancer; } | |
294 | void set_balancer(std::string val) { balancer.assign(val); } | |
295 | ||
1e59de90 TL |
296 | const std::bitset<MAX_MDS>& get_bal_rank_mask_bitset() const; |
297 | void set_bal_rank_mask(std::string val); | |
298 | unsigned get_num_mdss_in_rank_mask_bitset() const { return num_mdss_in_rank_mask_bitset; } | |
299 | void update_num_mdss_in_rank_mask_bitset(); | |
300 | int hex2bin(std::string hex_string, std::string &bin_string, unsigned int max_bits, std::ostream& ss) const; | |
301 | ||
302 | typedef enum | |
303 | { | |
304 | BAL_RANK_MASK_TYPE_ANY = 0, | |
305 | BAL_RANK_MASK_TYPE_ALL = 1, | |
306 | BAL_RANK_MASK_TYPE_NONE = 2, | |
307 | } bal_rank_mask_type_t; | |
308 | ||
309 | const bool check_special_bal_rank_mask(std::string val, bal_rank_mask_type_t type) const; | |
310 | ||
7c673cae FG |
311 | mds_rank_t get_tableserver() const { return tableserver; } |
312 | mds_rank_t get_root() const { return root; } | |
313 | ||
31f18b77 | 314 | const std::vector<int64_t> &get_data_pools() const { return data_pools; } |
7c673cae FG |
315 | int64_t get_first_data_pool() const { return *data_pools.begin(); } |
316 | int64_t get_metadata_pool() const { return metadata_pool; } | |
317 | bool is_data_pool(int64_t poolid) const { | |
c07f9fc5 FG |
318 | auto p = std::find(data_pools.begin(), data_pools.end(), poolid); |
319 | if (p == data_pools.end()) | |
320 | return false; | |
321 | return true; | |
7c673cae FG |
322 | } |
323 | ||
324 | bool pool_in_use(int64_t poolid) const { | |
325 | return get_enabled() && (is_data_pool(poolid) || metadata_pool == poolid); | |
326 | } | |
327 | ||
9f95a23c TL |
328 | const auto& get_mds_info() const { return mds_info; } |
329 | const auto& get_mds_info_gid(mds_gid_t gid) const { | |
7c673cae FG |
330 | return mds_info.at(gid); |
331 | } | |
332 | const mds_info_t& get_mds_info(mds_rank_t m) const { | |
11fdf7f2 | 333 | ceph_assert(up.count(m) && mds_info.count(up.at(m))); |
7c673cae FG |
334 | return mds_info.at(up.at(m)); |
335 | } | |
9f95a23c | 336 | mds_gid_t find_mds_gid_by_name(std::string_view s) const; |
7c673cae FG |
337 | |
338 | // counts | |
339 | unsigned get_num_in_mds() const { | |
340 | return in.size(); | |
341 | } | |
342 | unsigned get_num_up_mds() const { | |
343 | return up.size(); | |
344 | } | |
31f18b77 FG |
345 | mds_rank_t get_last_in_mds() const { |
346 | auto p = in.rbegin(); | |
347 | return p == in.rend() ? MDS_RANK_NONE : *p; | |
348 | } | |
7c673cae FG |
349 | int get_num_failed_mds() const { |
350 | return failed.size(); | |
351 | } | |
f67539c2 TL |
352 | unsigned get_num_standby_replay_mds() const { |
353 | unsigned num = 0; | |
354 | for (auto& i : mds_info) { | |
355 | if (i.second.state == MDSMap::STATE_STANDBY_REPLAY) { | |
356 | ++num; | |
357 | } | |
358 | } | |
359 | return num; | |
360 | } | |
9f95a23c | 361 | unsigned get_num_mds(int state) const; |
7c673cae FG |
362 | // data pools |
363 | void add_data_pool(int64_t poolid) { | |
31f18b77 | 364 | data_pools.push_back(poolid); |
7c673cae FG |
365 | } |
366 | int remove_data_pool(int64_t poolid) { | |
31f18b77 | 367 | std::vector<int64_t>::iterator p = std::find(data_pools.begin(), data_pools.end(), poolid); |
7c673cae | 368 | if (p == data_pools.end()) |
f67539c2 | 369 | return -CEPHFS_ENOENT; |
7c673cae FG |
370 | data_pools.erase(p); |
371 | return 0; | |
372 | } | |
373 | ||
374 | // sets | |
375 | void get_mds_set(std::set<mds_rank_t>& s) const { | |
376 | s = in; | |
377 | } | |
9f95a23c | 378 | void get_up_mds_set(std::set<mds_rank_t>& s) const; |
7c673cae FG |
379 | void get_active_mds_set(std::set<mds_rank_t>& s) const { |
380 | get_mds_set(s, MDSMap::STATE_ACTIVE); | |
381 | } | |
382 | void get_standby_replay_mds_set(std::set<mds_rank_t>& s) const { | |
383 | get_mds_set(s, MDSMap::STATE_STANDBY_REPLAY); | |
384 | } | |
385 | void get_failed_mds_set(std::set<mds_rank_t>& s) const { | |
386 | s = failed; | |
387 | } | |
f67539c2 TL |
388 | void get_damaged_mds_set(std::set<mds_rank_t>& s) const { |
389 | s = damaged; | |
390 | } | |
7c673cae FG |
391 | |
392 | // features | |
9f95a23c | 393 | uint64_t get_up_features(); |
7c673cae FG |
394 | |
395 | /** | |
396 | * Get MDS ranks which are in but not up. | |
397 | */ | |
398 | void get_down_mds_set(std::set<mds_rank_t> *s) const | |
399 | { | |
11fdf7f2 | 400 | ceph_assert(s != NULL); |
7c673cae FG |
401 | s->insert(failed.begin(), failed.end()); |
402 | s->insert(damaged.begin(), damaged.end()); | |
403 | } | |
404 | ||
405 | int get_failed() const { | |
406 | if (!failed.empty()) return *failed.begin(); | |
407 | return -1; | |
408 | } | |
409 | void get_stopped_mds_set(std::set<mds_rank_t>& s) const { | |
410 | s = stopped; | |
411 | } | |
9f95a23c TL |
412 | void get_recovery_mds_set(std::set<mds_rank_t>& s) const; |
413 | ||
414 | void get_mds_set_lower_bound(std::set<mds_rank_t>& s, DaemonState first) const; | |
415 | void get_mds_set(std::set<mds_rank_t>& s, DaemonState state) const; | |
7c673cae | 416 | |
f67539c2 TL |
417 | void get_health(std::list<std::pair<health_status_t,std::string> >& summary, |
418 | std::list<std::pair<health_status_t,std::string> > *detail) const; | |
7c673cae | 419 | |
224ce89b WB |
420 | void get_health_checks(health_check_map_t *checks) const; |
421 | ||
7c673cae FG |
422 | /** |
423 | * Return indication of whether cluster is available. This is a | |
424 | * heuristic for clients to see if they should bother waiting to talk to | |
425 | * MDSs, or whether they should error out at startup/mount. | |
426 | * | |
427 | * A TRANSIENT_UNAVAILABLE result indicates that the cluster is in a | |
428 | * transition state like replaying, or is potentially about the fail over. | |
429 | * Clients should wait for an updated map before making a final decision | |
430 | * about whether the filesystem is mountable. | |
431 | * | |
432 | * A STUCK_UNAVAILABLE result indicates that we can't see a way that | |
433 | * the cluster is about to recover on its own, so it'll probably require | |
11fdf7f2 | 434 | * administrator intervention: clients should probably not bother trying |
7c673cae FG |
435 | * to mount. |
436 | */ | |
437 | availability_t is_cluster_available() const; | |
438 | ||
11fdf7f2 TL |
439 | /** |
440 | * Return whether this MDSMap is suitable for resizing based on the state | |
441 | * of the ranks. | |
442 | */ | |
443 | bool is_resizeable() const { | |
444 | return !is_degraded() && | |
445 | get_num_mds(CEPH_MDS_STATE_CREATING) == 0 && | |
446 | get_num_mds(CEPH_MDS_STATE_STARTING) == 0 && | |
447 | get_num_mds(CEPH_MDS_STATE_STOPPING) == 0; | |
448 | } | |
449 | ||
7c673cae FG |
450 | // mds states |
451 | bool is_down(mds_rank_t m) const { return up.count(m) == 0; } | |
452 | bool is_up(mds_rank_t m) const { return up.count(m); } | |
453 | bool is_in(mds_rank_t m) const { return up.count(m) || failed.count(m); } | |
454 | bool is_out(mds_rank_t m) const { return !is_in(m); } | |
455 | ||
456 | bool is_failed(mds_rank_t m) const { return failed.count(m); } | |
457 | bool is_stopped(mds_rank_t m) const { return stopped.count(m); } | |
458 | ||
459 | bool is_dne(mds_rank_t m) const { return in.count(m) == 0; } | |
460 | bool is_dne_gid(mds_gid_t gid) const { return mds_info.count(gid) == 0; } | |
461 | ||
462 | /** | |
9f95a23c | 463 | * Get MDS daemon status by GID |
7c673cae | 464 | */ |
9f95a23c TL |
465 | auto get_state_gid(mds_gid_t gid) const { |
466 | auto it = mds_info.find(gid); | |
467 | if (it == mds_info.end()) | |
7c673cae | 468 | return STATE_NULL; |
9f95a23c | 469 | return it->second.state; |
7c673cae FG |
470 | } |
471 | ||
472 | /** | |
9f95a23c | 473 | * Get MDS rank state if the rank is up, else STATE_NULL |
7c673cae | 474 | */ |
9f95a23c TL |
475 | auto get_state(mds_rank_t m) const { |
476 | auto it = up.find(m); | |
477 | if (it == up.end()) | |
7c673cae | 478 | return STATE_NULL; |
9f95a23c | 479 | return get_state_gid(it->second); |
7c673cae FG |
480 | } |
481 | ||
a4b75251 TL |
482 | auto get_gid(mds_rank_t r) const { |
483 | return up.at(r); | |
484 | } | |
9f95a23c | 485 | const auto& get_info(mds_rank_t m) const { |
7c673cae FG |
486 | return mds_info.at(up.at(m)); |
487 | } | |
9f95a23c | 488 | const auto& get_info_gid(mds_gid_t gid) const { |
7c673cae FG |
489 | return mds_info.at(gid); |
490 | } | |
491 | ||
492 | bool is_boot(mds_rank_t m) const { return get_state(m) == STATE_BOOT; } | |
20effc67 TL |
493 | bool is_bootstrapping(mds_rank_t m) const { |
494 | return is_creating(m) || is_starting(m) || is_replay(m); | |
495 | } | |
7c673cae FG |
496 | bool is_creating(mds_rank_t m) const { return get_state(m) == STATE_CREATING; } |
497 | bool is_starting(mds_rank_t m) const { return get_state(m) == STATE_STARTING; } | |
498 | bool is_replay(mds_rank_t m) const { return get_state(m) == STATE_REPLAY; } | |
499 | bool is_resolve(mds_rank_t m) const { return get_state(m) == STATE_RESOLVE; } | |
500 | bool is_reconnect(mds_rank_t m) const { return get_state(m) == STATE_RECONNECT; } | |
501 | bool is_rejoin(mds_rank_t m) const { return get_state(m) == STATE_REJOIN; } | |
502 | bool is_clientreplay(mds_rank_t m) const { return get_state(m) == STATE_CLIENTREPLAY; } | |
503 | bool is_active(mds_rank_t m) const { return get_state(m) == STATE_ACTIVE; } | |
504 | bool is_stopping(mds_rank_t m) const { return get_state(m) == STATE_STOPPING; } | |
505 | bool is_active_or_stopping(mds_rank_t m) const { | |
506 | return is_active(m) || is_stopping(m); | |
507 | } | |
508 | bool is_clientreplay_or_active_or_stopping(mds_rank_t m) const { | |
509 | return is_clientreplay(m) || is_active(m) || is_stopping(m); | |
510 | } | |
511 | ||
9f95a23c | 512 | mds_gid_t get_standby_replay(mds_rank_t r) const; |
11fdf7f2 TL |
513 | bool has_standby_replay(mds_rank_t r) const { |
514 | return get_standby_replay(r) != MDS_GID_NONE; | |
515 | } | |
516 | ||
517 | bool is_followable(mds_rank_t r) const { | |
518 | if (auto it1 = up.find(r); it1 != up.end()) { | |
519 | if (auto it2 = mds_info.find(it1->second); it2 != mds_info.end()) { | |
520 | auto& info = it2->second; | |
521 | if (!info.is_degraded() && !has_standby_replay(r)) { | |
522 | return true; | |
523 | } | |
524 | } | |
525 | } | |
526 | return false; | |
7c673cae FG |
527 | } |
528 | ||
529 | bool is_laggy_gid(mds_gid_t gid) const { | |
11fdf7f2 TL |
530 | auto it = mds_info.find(gid); |
531 | return it == mds_info.end() ? false : it->second.laggy(); | |
7c673cae FG |
532 | } |
533 | ||
534 | // degraded = some recovery in process. fixes active membership and | |
535 | // recovery_set. | |
9f95a23c | 536 | bool is_degraded() const; |
7c673cae | 537 | bool is_any_failed() const { |
f67539c2 TL |
538 | return !failed.empty(); |
539 | } | |
540 | bool is_any_damaged() const { | |
541 | return !damaged.empty(); | |
7c673cae FG |
542 | } |
543 | bool is_resolving() const { | |
544 | return | |
545 | get_num_mds(STATE_RESOLVE) > 0 && | |
546 | get_num_mds(STATE_REPLAY) == 0 && | |
547 | failed.empty() && damaged.empty(); | |
548 | } | |
549 | bool is_rejoining() const { | |
550 | // nodes are rejoining cache state | |
551 | return | |
552 | get_num_mds(STATE_REJOIN) > 0 && | |
553 | get_num_mds(STATE_REPLAY) == 0 && | |
554 | get_num_mds(STATE_RECONNECT) == 0 && | |
555 | get_num_mds(STATE_RESOLVE) == 0 && | |
556 | failed.empty() && damaged.empty(); | |
557 | } | |
558 | bool is_stopped() const { | |
559 | return up.empty(); | |
560 | } | |
561 | ||
562 | /** | |
563 | * Get whether a rank is 'up', i.e. has | |
564 | * an MDS daemon's entity_inst_t associated | |
565 | * with it. | |
566 | */ | |
567 | bool have_inst(mds_rank_t m) const { | |
568 | return up.count(m); | |
569 | } | |
570 | ||
571 | /** | |
572 | * Get the MDS daemon entity_inst_t for a rank | |
573 | * known to be up. | |
574 | */ | |
11fdf7f2 TL |
575 | entity_addrvec_t get_addrs(mds_rank_t m) const { |
576 | return mds_info.at(up.at(m)).get_addrs(); | |
7c673cae FG |
577 | } |
578 | ||
7c673cae FG |
579 | mds_rank_t get_rank_gid(mds_gid_t gid) const { |
580 | if (mds_info.count(gid)) { | |
581 | return mds_info.at(gid).rank; | |
582 | } else { | |
583 | return MDS_RANK_NONE; | |
584 | } | |
585 | } | |
586 | ||
f64942e4 AA |
587 | /** |
588 | * Get MDS rank incarnation if the rank is up, else -1 | |
589 | */ | |
590 | mds_gid_t get_incarnation(mds_rank_t m) const { | |
a4b75251 TL |
591 | auto it = up.find(m); |
592 | if (it == up.end()) | |
f64942e4 | 593 | return MDS_GID_NONE; |
a4b75251 | 594 | return (mds_gid_t)get_inc_gid(it->second); |
f64942e4 AA |
595 | } |
596 | ||
7c673cae FG |
597 | int get_inc_gid(mds_gid_t gid) const { |
598 | auto mds_info_entry = mds_info.find(gid); | |
599 | if (mds_info_entry != mds_info.end()) | |
600 | return mds_info_entry->second.inc; | |
601 | return -1; | |
602 | } | |
f67539c2 TL |
603 | void encode(ceph::buffer::list& bl, uint64_t features) const; |
604 | void decode(ceph::buffer::list::const_iterator& p); | |
605 | void decode(const ceph::buffer::list& bl) { | |
11fdf7f2 | 606 | auto p = bl.cbegin(); |
7c673cae FG |
607 | decode(p); |
608 | } | |
11fdf7f2 | 609 | void sanitize(const std::function<bool(int64_t pool)>& pool_exists); |
7c673cae | 610 | |
f67539c2 TL |
611 | void print(std::ostream& out) const; |
612 | void print_summary(ceph::Formatter *f, std::ostream *out) const; | |
20effc67 | 613 | void print_flags(std::ostream& out) const; |
7c673cae | 614 | |
f67539c2 | 615 | void dump(ceph::Formatter *f) const; |
20effc67 | 616 | void dump_flags_state(Formatter *f) const; |
9f95a23c | 617 | static void generate_test_instances(std::list<MDSMap*>& ls); |
7c673cae FG |
618 | |
619 | static bool state_transition_valid(DaemonState prev, DaemonState next); | |
9f95a23c TL |
620 | |
621 | CompatSet compat; | |
622 | protected: | |
623 | // base map | |
624 | epoch_t epoch = 0; | |
625 | bool enabled = false; | |
626 | std::string fs_name = MDS_FS_NAME_DEFAULT; | |
627 | uint32_t flags = CEPH_MDSMAP_DEFAULTS; // flags | |
628 | epoch_t last_failure = 0; // mds epoch of last failure | |
629 | epoch_t last_failure_osd_epoch = 0; // osd epoch of last failure; any mds entering replay needs | |
f67539c2 | 630 | // at least this osdmap to ensure the blocklist propagates. |
9f95a23c TL |
631 | utime_t created; |
632 | utime_t modified; | |
633 | ||
634 | mds_rank_t tableserver = 0; // which MDS has snaptable | |
635 | mds_rank_t root = 0; // which MDS has root directory | |
636 | ||
637 | __u32 session_timeout = 60; | |
638 | __u32 session_autoclose = 300; | |
639 | uint64_t max_file_size = 1ULL<<40; /* 1TB */ | |
640 | ||
f38dd50b TL |
641 | uint64_t max_xattr_size = MDS_MAX_XATTR_SIZE; |
642 | ||
f67539c2 | 643 | feature_bitset_t required_client_features; |
9f95a23c TL |
644 | |
645 | std::vector<int64_t> data_pools; // file data pools available to clients (via an ioctl). first is the default. | |
646 | int64_t cas_pool = -1; // where CAS objects go | |
647 | int64_t metadata_pool = -1; // where fs metadata objects go | |
648 | ||
649 | /* | |
650 | * in: the set of logical mds #'s that define the cluster. this is the set | |
651 | * of mds's the metadata may be distributed over. | |
652 | * up: map from logical mds #'s to the addrs filling those roles. | |
653 | * failed: subset of @in that are failed. | |
654 | * stopped: set of nodes that have been initialized, but are not active. | |
655 | * | |
656 | * @up + @failed = @in. @in * @stopped = {}. | |
657 | */ | |
658 | ||
659 | mds_rank_t max_mds = 1; /* The maximum number of active MDSes. Also, the maximum rank. */ | |
660 | mds_rank_t old_max_mds = 0; /* Value to restore when MDS cluster is marked up */ | |
661 | mds_rank_t standby_count_wanted = -1; | |
f67539c2 | 662 | std::string balancer; /* The name/version of the mantle balancer (i.e. the rados obj name) */ |
9f95a23c | 663 | |
1e59de90 TL |
664 | std::string bal_rank_mask = "-1"; |
665 | std::bitset<MAX_MDS> bal_rank_mask_bitset; | |
666 | uint32_t num_mdss_in_rank_mask_bitset; | |
667 | ||
9f95a23c TL |
668 | std::set<mds_rank_t> in; // currently defined cluster |
669 | ||
670 | // which ranks are failed, stopped, damaged (i.e. not held by a daemon) | |
671 | std::set<mds_rank_t> failed, stopped, damaged; | |
672 | std::map<mds_rank_t, mds_gid_t> up; // who is in those roles | |
673 | std::map<mds_gid_t, mds_info_t> mds_info; | |
674 | ||
675 | uint8_t ever_allowed_features = 0; //< bitmap of features the cluster has allowed | |
676 | uint8_t explicitly_allowed_features = 0; //< bitmap of features explicitly enabled | |
677 | ||
678 | bool inline_data_enabled = false; | |
679 | ||
680 | uint64_t cached_up_features = 0; | |
20effc67 TL |
681 | private: |
682 | inline static const std::map<int, std::string> flag_display = { | |
683 | {CEPH_MDSMAP_NOT_JOINABLE, "joinable"}, //inverse for user display | |
684 | {CEPH_MDSMAP_ALLOW_SNAPS, "allow_snaps"}, | |
685 | {CEPH_MDSMAP_ALLOW_MULTIMDS_SNAPS, "allow_multimds_snaps"}, | |
1e59de90 | 686 | {CEPH_MDSMAP_ALLOW_STANDBY_REPLAY, "allow_standby_replay"}, |
f38dd50b TL |
687 | {CEPH_MDSMAP_REFUSE_CLIENT_SESSION, "refuse_client_session"}, |
688 | {CEPH_MDSMAP_REFUSE_STANDBY_FOR_ANOTHER_FS, "refuse_standby_for_another_fs"}, | |
689 | {CEPH_MDSMAP_BALANCE_AUTOMATE, "balance_automate"} | |
20effc67 | 690 | }; |
7c673cae FG |
691 | }; |
692 | WRITE_CLASS_ENCODER_FEATURES(MDSMap::mds_info_t) | |
693 | WRITE_CLASS_ENCODER_FEATURES(MDSMap) | |
694 | ||
f67539c2 | 695 | inline std::ostream& operator<<(std::ostream &out, const MDSMap &m) { |
7c673cae FG |
696 | m.print_summary(NULL, &out); |
697 | return out; | |
698 | } | |
699 | ||
9f95a23c TL |
700 | inline std::ostream& operator<<(std::ostream& o, const MDSMap::mds_info_t& info) { |
701 | info.dump(o); | |
702 | return o; | |
703 | } | |
7c673cae | 704 | #endif |