1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
4 * Ceph - scalable distributed file system
6 * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
15 #include "common/debug.h"
16 #include "mon/health_check.h"
21 using std::stringstream
;
23 #define dout_context g_ceph_context
24 #define dout_subsys ceph_subsys_
27 CompatSet
MDSMap::get_compat_set_all() {
28 CompatSet::FeatureSet feature_compat
;
29 CompatSet::FeatureSet feature_ro_compat
;
30 CompatSet::FeatureSet feature_incompat
;
31 feature_incompat
.insert(MDS_FEATURE_INCOMPAT_BASE
);
32 feature_incompat
.insert(MDS_FEATURE_INCOMPAT_CLIENTRANGES
);
33 feature_incompat
.insert(MDS_FEATURE_INCOMPAT_FILELAYOUT
);
34 feature_incompat
.insert(MDS_FEATURE_INCOMPAT_DIRINODE
);
35 feature_incompat
.insert(MDS_FEATURE_INCOMPAT_ENCODING
);
36 feature_incompat
.insert(MDS_FEATURE_INCOMPAT_OMAPDIRFRAG
);
37 feature_incompat
.insert(MDS_FEATURE_INCOMPAT_INLINE
);
38 feature_incompat
.insert(MDS_FEATURE_INCOMPAT_NOANCHOR
);
39 feature_incompat
.insert(MDS_FEATURE_INCOMPAT_FILE_LAYOUT_V2
);
40 feature_incompat
.insert(MDS_FEATURE_INCOMPAT_SNAPREALM_V2
);
42 return CompatSet(feature_compat
, feature_ro_compat
, feature_incompat
);
45 CompatSet
MDSMap::get_compat_set_default() {
46 CompatSet::FeatureSet feature_compat
;
47 CompatSet::FeatureSet feature_ro_compat
;
48 CompatSet::FeatureSet feature_incompat
;
49 feature_incompat
.insert(MDS_FEATURE_INCOMPAT_BASE
);
50 feature_incompat
.insert(MDS_FEATURE_INCOMPAT_CLIENTRANGES
);
51 feature_incompat
.insert(MDS_FEATURE_INCOMPAT_FILELAYOUT
);
52 feature_incompat
.insert(MDS_FEATURE_INCOMPAT_DIRINODE
);
53 feature_incompat
.insert(MDS_FEATURE_INCOMPAT_ENCODING
);
54 feature_incompat
.insert(MDS_FEATURE_INCOMPAT_OMAPDIRFRAG
);
55 feature_incompat
.insert(MDS_FEATURE_INCOMPAT_NOANCHOR
);
56 feature_incompat
.insert(MDS_FEATURE_INCOMPAT_FILE_LAYOUT_V2
);
57 feature_incompat
.insert(MDS_FEATURE_INCOMPAT_SNAPREALM_V2
);
59 return CompatSet(feature_compat
, feature_ro_compat
, feature_incompat
);
63 CompatSet
MDSMap::get_compat_set_base() {
64 CompatSet::FeatureSet feature_compat_base
;
65 CompatSet::FeatureSet feature_incompat_base
;
66 feature_incompat_base
.insert(MDS_FEATURE_INCOMPAT_BASE
);
67 CompatSet::FeatureSet feature_ro_compat_base
;
69 return CompatSet(feature_compat_base
, feature_ro_compat_base
, feature_incompat_base
);
72 void MDSMap::mds_info_t::dump(Formatter
*f
) const
74 f
->dump_unsigned("gid", global_id
);
75 f
->dump_string("name", name
);
76 f
->dump_int("rank", rank
);
77 f
->dump_int("incarnation", inc
);
78 f
->dump_stream("state") << ceph_mds_state_name(state
);
79 f
->dump_int("state_seq", state_seq
);
80 f
->dump_stream("addr") << addrs
.get_legacy_str();
81 f
->dump_object("addrs", addrs
);
82 f
->dump_int("join_fscid", join_fscid
);
83 if (laggy_since
!= utime_t())
84 f
->dump_stream("laggy_since") << laggy_since
;
86 f
->open_array_section("export_targets");
87 for (set
<mds_rank_t
>::iterator p
= export_targets
.begin();
88 p
!= export_targets
.end(); ++p
) {
89 f
->dump_int("mds", *p
);
92 f
->dump_unsigned("features", mds_features
);
93 f
->dump_unsigned("flags", flags
);
96 void MDSMap::mds_info_t::dump(std::ostream
& o
) const
98 o
<< "[mds." << name
<< "{" << rank
<< ":" << global_id
<< "}"
99 << " state " << ceph_mds_state_name(state
)
100 << " seq " << state_seq
;
102 o
<< " laggy since " << laggy_since
;
104 if (!export_targets
.empty()) {
105 o
<< " export targets " << export_targets
;
110 if (join_fscid
!= FS_CLUSTER_ID_NONE
) {
111 o
<< " join_fscid=" << join_fscid
;
113 o
<< " addr " << addrs
<< "]";
116 void MDSMap::mds_info_t::generate_test_instances(std::list
<mds_info_t
*>& ls
)
118 mds_info_t
*sample
= new mds_info_t();
119 ls
.push_back(sample
);
120 sample
= new mds_info_t();
121 sample
->global_id
= 1;
122 sample
->name
= "test_instance";
124 ls
.push_back(sample
);
127 void MDSMap::dump(Formatter
*f
) const
129 f
->dump_int("epoch", epoch
);
130 f
->dump_unsigned("flags", flags
);
131 f
->dump_unsigned("ever_allowed_features", ever_allowed_features
);
132 f
->dump_unsigned("explicitly_allowed_features", explicitly_allowed_features
);
133 f
->dump_stream("created") << created
;
134 f
->dump_stream("modified") << modified
;
135 f
->dump_int("tableserver", tableserver
);
136 f
->dump_int("root", root
);
137 f
->dump_int("session_timeout", session_timeout
);
138 f
->dump_int("session_autoclose", session_autoclose
);
139 f
->dump_stream("min_compat_client") << ceph::to_integer
<int>(min_compat_client
) << " ("
140 << min_compat_client
<< ")";
141 f
->dump_int("max_file_size", max_file_size
);
142 f
->dump_int("last_failure", last_failure
);
143 f
->dump_int("last_failure_osd_epoch", last_failure_osd_epoch
);
144 f
->open_object_section("compat");
147 f
->dump_int("max_mds", max_mds
);
148 f
->open_array_section("in");
149 for (set
<mds_rank_t
>::const_iterator p
= in
.begin(); p
!= in
.end(); ++p
)
150 f
->dump_int("mds", *p
);
152 f
->open_object_section("up");
153 for (map
<mds_rank_t
,mds_gid_t
>::const_iterator p
= up
.begin(); p
!= up
.end(); ++p
) {
155 sprintf(s
, "mds_%d", int(p
->first
));
156 f
->dump_int(s
, p
->second
);
159 f
->open_array_section("failed");
160 for (set
<mds_rank_t
>::const_iterator p
= failed
.begin(); p
!= failed
.end(); ++p
)
161 f
->dump_int("mds", *p
);
163 f
->open_array_section("damaged");
164 for (set
<mds_rank_t
>::const_iterator p
= damaged
.begin(); p
!= damaged
.end(); ++p
)
165 f
->dump_int("mds", *p
);
167 f
->open_array_section("stopped");
168 for (set
<mds_rank_t
>::const_iterator p
= stopped
.begin(); p
!= stopped
.end(); ++p
)
169 f
->dump_int("mds", *p
);
171 f
->open_object_section("info");
172 for (const auto& [gid
, info
] : mds_info
) {
173 char s
[25]; // 'gid_' + len(str(ULLONG_MAX)) + '\0'
174 sprintf(s
, "gid_%llu", (long long unsigned)gid
);
175 f
->open_object_section(s
);
180 f
->open_array_section("data_pools");
181 for (const auto& p
: data_pools
)
182 f
->dump_int("pool", p
);
184 f
->dump_int("metadata_pool", metadata_pool
);
185 f
->dump_bool("enabled", enabled
);
186 f
->dump_string("fs_name", fs_name
);
187 f
->dump_string("balancer", balancer
);
188 f
->dump_int("standby_count_wanted", std::max(0, standby_count_wanted
));
191 void MDSMap::generate_test_instances(std::list
<MDSMap
*>& ls
)
193 MDSMap
*m
= new MDSMap();
195 m
->data_pools
.push_back(0);
196 m
->metadata_pool
= 1;
198 m
->compat
= get_compat_set_all();
200 // these aren't the defaults, just in case anybody gets confused
201 m
->session_timeout
= 61;
202 m
->session_autoclose
= 301;
203 m
->max_file_size
= 1<<24;
207 void MDSMap::print(ostream
& out
) const
209 out
<< "fs_name\t" << fs_name
<< "\n";
210 out
<< "epoch\t" << epoch
<< "\n";
211 out
<< "flags\t" << hex
<< flags
<< dec
<< "\n";
212 out
<< "created\t" << created
<< "\n";
213 out
<< "modified\t" << modified
<< "\n";
214 out
<< "tableserver\t" << tableserver
<< "\n";
215 out
<< "root\t" << root
<< "\n";
216 out
<< "session_timeout\t" << session_timeout
<< "\n"
217 << "session_autoclose\t" << session_autoclose
<< "\n";
218 out
<< "max_file_size\t" << max_file_size
<< "\n";
219 out
<< "min_compat_client\t" << ceph::to_integer
<int>(min_compat_client
) << " ("
220 << min_compat_client
<< ")\n";
221 out
<< "last_failure\t" << last_failure
<< "\n"
222 << "last_failure_osd_epoch\t" << last_failure_osd_epoch
<< "\n";
223 out
<< "compat\t" << compat
<< "\n";
224 out
<< "max_mds\t" << max_mds
<< "\n";
225 out
<< "in\t" << in
<< "\n"
226 << "up\t" << up
<< "\n"
227 << "failed\t" << failed
<< "\n"
228 << "damaged\t" << damaged
<< "\n"
229 << "stopped\t" << stopped
<< "\n";
230 out
<< "data_pools\t" << data_pools
<< "\n";
231 out
<< "metadata_pool\t" << metadata_pool
<< "\n";
232 out
<< "inline_data\t" << (inline_data_enabled
? "enabled" : "disabled") << "\n";
233 out
<< "balancer\t" << balancer
<< "\n";
234 out
<< "standby_count_wanted\t" << std::max(0, standby_count_wanted
) << "\n";
236 multimap
< pair
<mds_rank_t
, unsigned>, mds_gid_t
> foo
;
237 for (const auto &p
: mds_info
) {
238 foo
.insert(std::make_pair(
239 std::make_pair(p
.second
.rank
, p
.second
.inc
-1), p
.first
));
242 for (const auto &p
: foo
) {
243 out
<< mds_info
.at(p
.second
) << "\n";
247 void MDSMap::print_summary(Formatter
*f
, ostream
*out
) const
249 map
<mds_rank_t
,string
> by_rank
;
250 map
<string
,int> by_state
;
253 f
->dump_unsigned("epoch", get_epoch());
254 f
->dump_unsigned("up", up
.size());
255 f
->dump_unsigned("in", in
.size());
256 f
->dump_unsigned("max", max_mds
);
258 *out
<< "e" << get_epoch() << ": " << up
.size() << "/" << in
.size() << "/" << max_mds
<< " up";
262 f
->open_array_section("by_rank");
263 for (const auto &p
: mds_info
) {
264 string s
= ceph_mds_state_name(p
.second
.state
);
265 if (p
.second
.laggy())
266 s
+= "(laggy or crashed)";
268 if (p
.second
.rank
>= 0 && p
.second
.state
!= MDSMap::STATE_STANDBY_REPLAY
) {
270 f
->open_object_section("mds");
271 f
->dump_unsigned("rank", p
.second
.rank
);
272 f
->dump_string("name", p
.second
.name
);
273 f
->dump_string("status", s
);
276 by_rank
[p
.second
.rank
] = p
.second
.name
+ "=" + s
;
285 if (!by_rank
.empty())
286 *out
<< " " << by_rank
;
289 for (map
<string
,int>::reverse_iterator p
= by_state
.rbegin(); p
!= by_state
.rend(); ++p
) {
291 f
->dump_unsigned(p
->first
.c_str(), p
->second
);
293 *out
<< ", " << p
->second
<< " " << p
->first
;
297 if (!failed
.empty()) {
299 f
->dump_unsigned("failed", failed
.size());
301 *out
<< ", " << failed
.size() << " failed";
305 if (!damaged
.empty()) {
307 f
->dump_unsigned("damaged", damaged
.size());
309 *out
<< ", " << damaged
.size() << " damaged";
312 //if (stopped.size())
313 //out << ", " << stopped.size() << " stopped";
316 void MDSMap::get_health(list
<pair
<health_status_t
,string
> >& summary
,
317 list
<pair
<health_status_t
,string
> > *detail
) const
319 if (!failed
.empty()) {
320 std::ostringstream oss
;
322 << ((failed
.size() > 1) ? "s ":" ")
324 << ((failed
.size() > 1) ? " have":" has")
326 summary
.push_back(make_pair(HEALTH_ERR
, oss
.str()));
328 for (set
<mds_rank_t
>::const_iterator p
= failed
.begin(); p
!= failed
.end(); ++p
) {
329 std::ostringstream oss
;
330 oss
<< "mds." << *p
<< " has failed";
331 detail
->push_back(make_pair(HEALTH_ERR
, oss
.str()));
336 if (!damaged
.empty()) {
337 std::ostringstream oss
;
339 << ((damaged
.size() > 1) ? "s ":" ")
341 << ((damaged
.size() > 1) ? " are":" is")
343 summary
.push_back(make_pair(HEALTH_ERR
, oss
.str()));
345 for (set
<mds_rank_t
>::const_iterator p
= damaged
.begin(); p
!= damaged
.end(); ++p
) {
346 std::ostringstream oss
;
347 oss
<< "mds." << *p
<< " is damaged";
348 detail
->push_back(make_pair(HEALTH_ERR
, oss
.str()));
354 summary
.push_back(make_pair(HEALTH_WARN
, "mds cluster is degraded"));
356 detail
->push_back(make_pair(HEALTH_WARN
, "mds cluster is degraded"));
357 for (mds_rank_t i
= mds_rank_t(0); i
< get_max_mds(); i
++) {
360 mds_gid_t gid
= up
.find(i
)->second
;
361 const auto& info
= mds_info
.at(gid
);
364 ss
<< "mds." << info
.name
<< " at " << info
.addrs
365 << " rank " << i
<< " is resolving";
367 ss
<< "mds." << info
.name
<< " at " << info
.addrs
368 << " rank " << i
<< " is replaying journal";
370 ss
<< "mds." << info
.name
<< " at " << info
.addrs
371 << " rank " << i
<< " is rejoining";
373 ss
<< "mds." << info
.name
<< " at " << info
.addrs
374 << " rank " << i
<< " is reconnecting to clients";
375 if (ss
.str().length())
376 detail
->push_back(make_pair(HEALTH_WARN
, ss
.str()));
383 ss
<< fs_name
<< " max_mds " << max_mds
;
384 summary
.push_back(make_pair(HEALTH_WARN
, ss
.str()));
387 if ((mds_rank_t
)up
.size() < max_mds
) {
389 ss
<< fs_name
<< " has " << up
.size()
390 << " active MDS(s), but has max_mds of " << max_mds
;
391 summary
.push_back(make_pair(HEALTH_WARN
, ss
.str()));
395 for (const auto &u
: up
) {
396 const auto& info
= mds_info
.at(u
.second
);
398 laggy
.insert(info
.name
);
400 std::ostringstream oss
;
401 oss
<< "mds." << info
.name
<< " at " << info
.addrs
402 << " is laggy/unresponsive";
403 detail
->push_back(make_pair(HEALTH_WARN
, oss
.str()));
408 if (!laggy
.empty()) {
409 std::ostringstream oss
;
410 oss
<< "mds " << laggy
411 << ((laggy
.size() > 1) ? " are":" is")
413 summary
.push_back(make_pair(HEALTH_WARN
, oss
.str()));
416 if (get_max_mds() > 1 &&
417 was_snaps_ever_allowed() && !allows_multimds_snaps()) {
418 std::ostringstream oss
;
419 oss
<< "multi-active mds while there are snapshots possibly created by pre-mimic MDS";
420 summary
.push_back(make_pair(HEALTH_WARN
, oss
.str()));
424 void MDSMap::get_health_checks(health_check_map_t
*checks
) const
427 if (!damaged
.empty()) {
428 health_check_t
& check
= checks
->get_or_add("MDS_DAMAGE", HEALTH_ERR
,
429 "%num% mds daemon%plurals% damaged",
431 for (auto p
: damaged
) {
432 std::ostringstream oss
;
433 oss
<< "fs " << fs_name
<< " mds." << p
<< " is damaged";
434 check
.detail
.push_back(oss
.str());
440 health_check_t
& fscheck
= checks
->get_or_add(
441 "FS_DEGRADED", HEALTH_WARN
,
442 "%num% filesystem%plurals% %isorare% degraded", 1);
444 ss
<< "fs " << fs_name
<< " is degraded";
445 fscheck
.detail
.push_back(ss
.str());
448 for (mds_rank_t i
= mds_rank_t(0); i
< get_max_mds(); i
++) {
451 mds_gid_t gid
= up
.find(i
)->second
;
452 const auto& info
= mds_info
.at(gid
);
454 ss
<< "fs " << fs_name
<< " mds." << info
.name
<< " at "
455 << info
.addrs
<< " rank " << i
;
457 ss
<< " is resolving";
459 ss
<< " is replaying journal";
461 ss
<< " is rejoining";
463 ss
<< " is reconnecting to clients";
464 if (ss
.str().length())
465 detail
.push_back(ss
.str());
469 // MDS_UP_LESS_THAN_MAX
470 if ((mds_rank_t
)get_num_in_mds() < get_max_mds()) {
471 health_check_t
& check
= checks
->add(
472 "MDS_UP_LESS_THAN_MAX", HEALTH_WARN
,
473 "%num% filesystem%plurals% %isorare% online with fewer MDS than max_mds", 1);
475 ss
<< "fs " << fs_name
<< " has " << get_num_in_mds()
476 << " MDS online, but wants " << get_max_mds();
477 check
.detail
.push_back(ss
.str());
481 if ((mds_rank_t
)get_num_up_mds() == 0 && get_max_mds() > 0) {
482 health_check_t
&check
= checks
->add(
483 "MDS_ALL_DOWN", HEALTH_ERR
,
484 "%num% filesystem%plurals% %isorare% offline", 1);
486 ss
<< "fs " << fs_name
<< " is offline because no MDS is active for it.";
487 check
.detail
.push_back(ss
.str());
490 if (get_max_mds() > 1 &&
491 was_snaps_ever_allowed() && !allows_multimds_snaps()) {
492 health_check_t
&check
= checks
->add(
493 "MULTIMDS_WITH_OLDSNAPS", HEALTH_ERR
,
494 "%num% filesystem%plurals% %isorare% multi-active mds with old snapshots", 1);
496 ss
<< "multi-active mds while there are snapshots possibly created by pre-mimic MDS";
497 check
.detail
.push_back(ss
.str());
500 if (get_inline_data_enabled()) {
501 health_check_t
&check
= checks
->add(
502 "FS_INLINE_DATA_DEPRECATED", HEALTH_WARN
,
503 "%num% filesystem%plurals% with deprecated feature inline_data", 1);
505 ss
<< "fs " << fs_name
<< " has deprecated feature inline_data enabled.";
506 check
.detail
.push_back(ss
.str());
510 void MDSMap::mds_info_t::encode_versioned(bufferlist
& bl
, uint64_t features
) const
513 if (!HAVE_FEATURE(features
, SERVER_NAUTILUS
)) {
516 ENCODE_START(v
, 4, bl
);
517 encode(global_id
, bl
);
521 encode((int32_t)state
, bl
);
522 encode(state_seq
, bl
);
524 encode(addrs
.legacy_addr(), bl
, features
);
526 encode(addrs
, bl
, features
);
528 encode(laggy_since
, bl
);
529 encode(MDS_RANK_NONE
, bl
); /* standby_for_rank */
530 encode(std::string(), bl
); /* standby_for_name */
531 encode(export_targets
, bl
);
532 encode(mds_features
, bl
);
533 encode(join_fscid
, bl
); /* formerly: standby_for_fscid */
541 void MDSMap::mds_info_t::encode_unversioned(bufferlist
& bl
) const
545 encode(struct_v
, bl
);
546 encode(global_id
, bl
);
550 encode((int32_t)state
, bl
);
551 encode(state_seq
, bl
);
552 encode(addrs
.legacy_addr(), bl
, 0);
553 encode(laggy_since
, bl
);
554 encode(MDS_RANK_NONE
, bl
);
555 encode(std::string(), bl
);
556 encode(export_targets
, bl
);
559 void MDSMap::mds_info_t::decode(bufferlist::const_iterator
& bl
)
561 DECODE_START_LEGACY_COMPAT_LEN(9, 4, 4, bl
);
562 decode(global_id
, bl
);
567 decode(raw_state
, bl
);
568 state
= (MDSMap::DaemonState
)raw_state
;
569 decode(state_seq
, bl
);
571 decode(laggy_since
, bl
);
573 mds_rank_t standby_for_rank
;
574 decode(standby_for_rank
, bl
);
577 std::string standby_for_name
;
578 decode(standby_for_name
, bl
);
581 decode(export_targets
, bl
);
583 decode(mds_features
, bl
);
585 decode(join_fscid
, bl
);
589 decode(standby_replay
, bl
);
597 std::string
MDSMap::mds_info_t::human_name() const
599 // Like "daemon mds.myhost restarted", "Activating daemon mds.myhost"
600 std::ostringstream out
;
601 out
<< "daemon mds." << name
;
605 void MDSMap::encode(bufferlist
& bl
, uint64_t features
) const
607 std::map
<mds_rank_t
,int32_t> inc
; // Legacy field, fake it so that
608 // old-mon peers have something sane
610 for (const auto rank
: in
) {
611 inc
.insert(std::make_pair(rank
, epoch
));
615 if ((features
& CEPH_FEATURE_PGID64
) == 0) {
620 encode(last_failure
, bl
);
622 encode(session_timeout
, bl
);
623 encode(session_autoclose
, bl
);
624 encode(max_file_size
, bl
);
626 __u32 n
= mds_info
.size();
628 for (map
<mds_gid_t
, mds_info_t
>::const_iterator i
= mds_info
.begin();
629 i
!= mds_info
.end(); ++i
) {
630 encode(i
->first
, bl
);
631 encode(i
->second
, bl
, features
);
633 n
= data_pools
.size();
635 for (const auto p
: data_pools
) {
640 int32_t m
= cas_pool
;
643 } else if ((features
& CEPH_FEATURE_MDSENC
) == 0) {
648 encode(last_failure
, bl
);
650 encode(session_timeout
, bl
);
651 encode(session_autoclose
, bl
);
652 encode(max_file_size
, bl
);
654 __u32 n
= mds_info
.size();
656 for (map
<mds_gid_t
, mds_info_t
>::const_iterator i
= mds_info
.begin();
657 i
!= mds_info
.end(); ++i
) {
658 encode(i
->first
, bl
);
659 encode(i
->second
, bl
, features
);
661 encode(data_pools
, bl
);
662 encode(cas_pool
, bl
);
667 encode(metadata_pool
, bl
);
669 encode(modified
, bl
);
670 encode(tableserver
, bl
);
676 encode(last_failure_osd_epoch
, bl
);
680 ENCODE_START(5, 4, bl
);
683 encode(last_failure
, bl
);
685 encode(session_timeout
, bl
);
686 encode(session_autoclose
, bl
);
687 encode(max_file_size
, bl
);
689 encode(mds_info
, bl
, features
);
690 encode(data_pools
, bl
);
691 encode(cas_pool
, bl
);
696 encode(metadata_pool
, bl
);
698 encode(modified
, bl
);
699 encode(tableserver
, bl
);
705 encode(last_failure_osd_epoch
, bl
);
706 encode(ever_allowed_features
, bl
);
707 encode(explicitly_allowed_features
, bl
);
708 encode(inline_data_enabled
, bl
);
712 encode(balancer
, bl
);
713 encode(standby_count_wanted
, bl
);
714 encode(old_max_mds
, bl
);
715 encode(min_compat_client
, bl
);
719 void MDSMap::sanitize(const std::function
<bool(int64_t pool
)>& pool_exists
)
721 /* Before we did stricter checking, it was possible to remove a data pool
722 * without also deleting it from the MDSMap. Check for that here after
723 * decoding the data pools.
726 for (auto it
= data_pools
.begin(); it
!= data_pools
.end();) {
727 if (!pool_exists(*it
)) {
728 dout(0) << "removed non-existant data pool " << *it
<< " from MDSMap" << dendl
;
729 it
= data_pools
.erase(it
);
736 void MDSMap::decode(bufferlist::const_iterator
& p
)
738 std::map
<mds_rank_t
,int32_t> inc
; // Legacy field, parse and drop
740 cached_up_features
= 0;
741 DECODE_START_LEGACY_COMPAT_LEN_16(5, 4, 4, p
);
744 decode(last_failure
, p
);
746 decode(session_timeout
, p
);
747 decode(session_autoclose
, p
);
748 decode(max_file_size
, p
);
757 data_pools
.push_back(m
);
763 decode(data_pools
, p
);
767 // kclient ignores everything from here
774 compat
= get_compat_set_base();
780 decode(metadata_pool
, p
);
784 decode(tableserver
, p
);
791 decode(last_failure_osd_epoch
, p
);
794 // previously this was a bool about snaps, not a flag map
797 ever_allowed_features
= flag
? CEPH_MDSMAP_ALLOW_SNAPS
: 0;
799 explicitly_allowed_features
= flag
? CEPH_MDSMAP_ALLOW_SNAPS
: 0;
801 decode(ever_allowed_features
, p
);
802 decode(explicitly_allowed_features
, p
);
805 ever_allowed_features
= 0;
806 explicitly_allowed_features
= 0;
809 decode(inline_data_enabled
, p
);
812 ceph_assert(struct_v
>= 5);
817 // If an MDS has ever been started, epoch will be greater than 1,
818 // assume filesystem is enabled.
821 // Upgrading from a cluster that never used an MDS, switch off
822 // filesystem until it's explicitly enabled.
836 decode(standby_count_wanted
, p
);
840 decode(old_max_mds
, p
);
847 min_compat_client
= ceph_release_t::unknown
;
849 min_compat_client
= ceph_release_t
{static_cast<uint8_t>(r
)};
851 } else if (ev
> 14) {
852 decode(min_compat_client
, p
);
858 MDSMap::availability_t
MDSMap::is_cluster_available() const
861 // If I'm a client, this means I'm looking at an MDSMap instance
862 // that was never actually initialized from the mons. Client should
864 return TRANSIENT_UNAVAILABLE
;
867 // If a rank is marked damage (unavailable until operator intervenes)
868 if (damaged
.size()) {
869 return STUCK_UNAVAILABLE
;
872 // If no ranks are created (filesystem not initialized)
874 return STUCK_UNAVAILABLE
;
877 for (const auto rank
: in
) {
878 if (up
.count(rank
) && mds_info
.at(up
.at(rank
)).laggy()) {
879 // This might only be transient, but because we can't see
880 // standbys, we have no way of knowing whether there is a
881 // standby available to replace the laggy guy.
882 return STUCK_UNAVAILABLE
;
886 if (get_num_mds(CEPH_MDS_STATE_ACTIVE
) > 0) {
887 // Nobody looks stuck, so indicate to client they should go ahead
888 // and try mounting if anybody is active. This may include e.g.
889 // one MDS failing over and another active: the client should
890 // proceed to start talking to the active one and let the
891 // transiently-unavailable guy catch up later.
894 // Nothing indicating we were stuck, but nobody active (yet)
895 //return TRANSIENT_UNAVAILABLE;
897 // Because we don't have standbys in the MDSMap any more, we can't
898 // reliably indicate transient vs. stuck, so always say stuck so
899 // that the client doesn't block.
900 return STUCK_UNAVAILABLE
;
904 bool MDSMap::state_transition_valid(DaemonState prev
, DaemonState next
)
906 bool state_valid
= true;
908 if (prev
== MDSMap::STATE_REPLAY
) {
909 if (next
!= MDSMap::STATE_RESOLVE
&& next
!= MDSMap::STATE_RECONNECT
) {
912 } else if (prev
== MDSMap::STATE_REJOIN
) {
913 if (next
!= MDSMap::STATE_ACTIVE
&&
914 next
!= MDSMap::STATE_CLIENTREPLAY
&&
915 next
!= MDSMap::STATE_STOPPED
) {
918 } else if (prev
>= MDSMap::STATE_RESOLVE
&& prev
< MDSMap::STATE_ACTIVE
) {
919 // Once I have entered replay, the only allowable transitions are to
920 // the next next along in the sequence.
921 if (next
!= prev
+ 1) {
930 bool MDSMap::check_health(mds_rank_t standby_daemon_count
)
932 std::set
<mds_rank_t
> standbys
;
933 get_standby_replay_mds_set(standbys
);
934 std::set
<mds_rank_t
> actives
;
935 get_active_mds_set(actives
);
936 mds_rank_t standbys_avail
= (mds_rank_t
)standbys
.size()+standby_daemon_count
;
938 /* If there are standby daemons available/replaying and
939 * standby_count_wanted is unset (default), then we set it to 1. This will
940 * happen during health checks by the mons. Also, during initial creation
941 * of the FS we will have no actives so we don't want to change the default
944 if (standby_count_wanted
== -1 && actives
.size() > 0 && standbys_avail
> 0) {
945 set_standby_count_wanted(1);
951 mds_gid_t
MDSMap::find_mds_gid_by_name(std::string_view s
) const {
952 for (const auto& [gid
, info
] : mds_info
) {
953 if (info
.name
== s
) {
960 unsigned MDSMap::get_num_mds(int state
) const {
962 for (std::map
<mds_gid_t
,mds_info_t
>::const_iterator p
= mds_info
.begin();
965 if (p
->second
.state
== state
) ++n
;
969 void MDSMap::get_up_mds_set(std::set
<mds_rank_t
>& s
) const {
970 for (std::map
<mds_rank_t
, mds_gid_t
>::const_iterator p
= up
.begin();
976 uint64_t MDSMap::get_up_features() {
977 if (!cached_up_features
) {
979 for (std::map
<mds_rank_t
, mds_gid_t
>::const_iterator p
= up
.begin();
982 std::map
<mds_gid_t
, mds_info_t
>::const_iterator q
=
983 mds_info
.find(p
->second
);
984 ceph_assert(q
!= mds_info
.end());
986 cached_up_features
= q
->second
.mds_features
;
989 cached_up_features
&= q
->second
.mds_features
;
993 return cached_up_features
;
996 void MDSMap::get_recovery_mds_set(std::set
<mds_rank_t
>& s
) const {
998 for (const auto& p
: damaged
)
1000 for (const auto& p
: mds_info
)
1001 if (p
.second
.state
>= STATE_REPLAY
&& p
.second
.state
<= STATE_STOPPING
)
1002 s
.insert(p
.second
.rank
);
1005 void MDSMap::get_mds_set_lower_bound(std::set
<mds_rank_t
>& s
, DaemonState first
) const {
1006 for (std::map
<mds_gid_t
, mds_info_t
>::const_iterator p
= mds_info
.begin();
1007 p
!= mds_info
.end();
1009 if (p
->second
.state
>= first
&& p
->second
.state
<= STATE_STOPPING
)
1010 s
.insert(p
->second
.rank
);
1013 void MDSMap::get_mds_set(std::set
<mds_rank_t
>& s
, DaemonState state
) const {
1014 for (std::map
<mds_gid_t
, mds_info_t
>::const_iterator p
= mds_info
.begin();
1015 p
!= mds_info
.end();
1017 if (p
->second
.state
== state
)
1018 s
.insert(p
->second
.rank
);
1021 mds_gid_t
MDSMap::get_standby_replay(mds_rank_t r
) const {
1022 for (auto& [gid
,info
] : mds_info
) {
1023 if (info
.rank
== r
&& info
.state
== STATE_STANDBY_REPLAY
) {
1027 return MDS_GID_NONE
;
1030 bool MDSMap::is_degraded() const {
1031 if (!failed
.empty() || !damaged
.empty())
1033 for (const auto& p
: mds_info
) {
1034 if (p
.second
.is_degraded())