]> git.proxmox.com Git - ceph.git/blob - ceph/src/mds/MDSMap.cc
b397eb089e9fa038401efc1af202aa5beb33d758
[ceph.git] / ceph / src / mds / MDSMap.cc
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3 /*
4 * Ceph - scalable distributed file system
5 *
6 * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
7 *
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
12 *
13 */
14
15
16 #include "MDSMap.h"
17
18 #include <sstream>
19 using std::stringstream;
20
21
22 // features
23 CompatSet get_mdsmap_compat_set_all() {
24 CompatSet::FeatureSet feature_compat;
25 CompatSet::FeatureSet feature_ro_compat;
26 CompatSet::FeatureSet feature_incompat;
27 feature_incompat.insert(MDS_FEATURE_INCOMPAT_BASE);
28 feature_incompat.insert(MDS_FEATURE_INCOMPAT_CLIENTRANGES);
29 feature_incompat.insert(MDS_FEATURE_INCOMPAT_FILELAYOUT);
30 feature_incompat.insert(MDS_FEATURE_INCOMPAT_DIRINODE);
31 feature_incompat.insert(MDS_FEATURE_INCOMPAT_ENCODING);
32 feature_incompat.insert(MDS_FEATURE_INCOMPAT_OMAPDIRFRAG);
33 feature_incompat.insert(MDS_FEATURE_INCOMPAT_INLINE);
34 feature_incompat.insert(MDS_FEATURE_INCOMPAT_NOANCHOR);
35 feature_incompat.insert(MDS_FEATURE_INCOMPAT_FILE_LAYOUT_V2);
36
37 return CompatSet(feature_compat, feature_ro_compat, feature_incompat);
38 }
39
40 CompatSet get_mdsmap_compat_set_default() {
41 CompatSet::FeatureSet feature_compat;
42 CompatSet::FeatureSet feature_ro_compat;
43 CompatSet::FeatureSet feature_incompat;
44 feature_incompat.insert(MDS_FEATURE_INCOMPAT_BASE);
45 feature_incompat.insert(MDS_FEATURE_INCOMPAT_CLIENTRANGES);
46 feature_incompat.insert(MDS_FEATURE_INCOMPAT_FILELAYOUT);
47 feature_incompat.insert(MDS_FEATURE_INCOMPAT_DIRINODE);
48 feature_incompat.insert(MDS_FEATURE_INCOMPAT_ENCODING);
49 feature_incompat.insert(MDS_FEATURE_INCOMPAT_OMAPDIRFRAG);
50 feature_incompat.insert(MDS_FEATURE_INCOMPAT_NOANCHOR);
51 feature_incompat.insert(MDS_FEATURE_INCOMPAT_FILE_LAYOUT_V2);
52
53 return CompatSet(feature_compat, feature_ro_compat, feature_incompat);
54 }
55
56 // base (pre v0.20)
57 CompatSet get_mdsmap_compat_set_base() {
58 CompatSet::FeatureSet feature_compat_base;
59 CompatSet::FeatureSet feature_incompat_base;
60 feature_incompat_base.insert(MDS_FEATURE_INCOMPAT_BASE);
61 CompatSet::FeatureSet feature_ro_compat_base;
62
63 return CompatSet(feature_compat_base, feature_ro_compat_base, feature_incompat_base);
64 }
65
66 void MDSMap::mds_info_t::dump(Formatter *f) const
67 {
68 f->dump_unsigned("gid", global_id);
69 f->dump_string("name", name);
70 f->dump_int("rank", rank);
71 f->dump_int("incarnation", inc);
72 f->dump_stream("state") << ceph_mds_state_name(state);
73 f->dump_int("state_seq", state_seq);
74 f->dump_stream("addr") << addr;
75 if (laggy_since != utime_t())
76 f->dump_stream("laggy_since") << laggy_since;
77
78 f->dump_int("standby_for_rank", standby_for_rank);
79 f->dump_int("standby_for_fscid", standby_for_fscid);
80 f->dump_string("standby_for_name", standby_for_name);
81 f->dump_bool("standby_replay", standby_replay);
82 f->open_array_section("export_targets");
83 for (set<mds_rank_t>::iterator p = export_targets.begin();
84 p != export_targets.end(); ++p) {
85 f->dump_int("mds", *p);
86 }
87 f->close_section();
88 f->dump_unsigned("features", mds_features);
89 }
90
91 void MDSMap::mds_info_t::print_summary(ostream &out) const
92 {
93 out << global_id << ":\t"
94 << addr
95 << " '" << name << "'"
96 << " mds." << rank
97 << "." << inc
98 << " " << ceph_mds_state_name(state)
99 << " seq " << state_seq;
100 if (laggy()) {
101 out << " laggy since " << laggy_since;
102 }
103 if (standby_for_rank != -1 ||
104 !standby_for_name.empty()) {
105 out << " (standby for";
106 //if (standby_for_rank >= 0)
107 out << " rank " << standby_for_rank;
108 if (!standby_for_name.empty()) {
109 out << " '" << standby_for_name << "'";
110 }
111 out << ")";
112 }
113 if (!export_targets.empty()) {
114 out << " export_targets=" << export_targets;
115 }
116 }
117
118 void MDSMap::mds_info_t::generate_test_instances(list<mds_info_t*>& ls)
119 {
120 mds_info_t *sample = new mds_info_t();
121 ls.push_back(sample);
122 sample = new mds_info_t();
123 sample->global_id = 1;
124 sample->name = "test_instance";
125 sample->rank = 0;
126 ls.push_back(sample);
127 }
128
129 void MDSMap::dump(Formatter *f) const
130 {
131 f->dump_int("epoch", epoch);
132 f->dump_unsigned("flags", flags);
133 f->dump_unsigned("ever_allowed_features", ever_allowed_features);
134 f->dump_unsigned("explicitly_allowed_features", explicitly_allowed_features);
135 f->dump_stream("created") << created;
136 f->dump_stream("modified") << modified;
137 f->dump_int("tableserver", tableserver);
138 f->dump_int("root", root);
139 f->dump_int("session_timeout", session_timeout);
140 f->dump_int("session_autoclose", session_autoclose);
141 f->dump_int("max_file_size", max_file_size);
142 f->dump_int("last_failure", last_failure);
143 f->dump_int("last_failure_osd_epoch", last_failure_osd_epoch);
144 f->open_object_section("compat");
145 compat.dump(f);
146 f->close_section();
147 f->dump_int("max_mds", max_mds);
148 f->open_array_section("in");
149 for (set<mds_rank_t>::const_iterator p = in.begin(); p != in.end(); ++p)
150 f->dump_int("mds", *p);
151 f->close_section();
152 f->open_object_section("up");
153 for (map<mds_rank_t,mds_gid_t>::const_iterator p = up.begin(); p != up.end(); ++p) {
154 char s[14];
155 sprintf(s, "mds_%d", int(p->first));
156 f->dump_int(s, p->second);
157 }
158 f->close_section();
159 f->open_array_section("failed");
160 for (set<mds_rank_t>::const_iterator p = failed.begin(); p != failed.end(); ++p)
161 f->dump_int("mds", *p);
162 f->close_section();
163 f->open_array_section("damaged");
164 for (set<mds_rank_t>::const_iterator p = damaged.begin(); p != damaged.end(); ++p)
165 f->dump_int("mds", *p);
166 f->close_section();
167 f->open_array_section("stopped");
168 for (set<mds_rank_t>::const_iterator p = stopped.begin(); p != stopped.end(); ++p)
169 f->dump_int("mds", *p);
170 f->close_section();
171 f->open_object_section("info");
172 for (map<mds_gid_t,mds_info_t>::const_iterator p = mds_info.begin(); p != mds_info.end(); ++p) {
173 char s[25]; // 'gid_' + len(str(ULLONG_MAX)) + '\0'
174 sprintf(s, "gid_%llu", (long long unsigned)p->first);
175 f->open_object_section(s);
176 p->second.dump(f);
177 f->close_section();
178 }
179 f->close_section();
180 f->open_array_section("data_pools");
181 for (const auto p: data_pools)
182 f->dump_int("pool", p);
183 f->close_section();
184 f->dump_int("metadata_pool", metadata_pool);
185 f->dump_bool("enabled", enabled);
186 f->dump_string("fs_name", fs_name);
187 f->dump_string("balancer", balancer);
188 f->dump_int("standby_count_wanted", std::max(0, standby_count_wanted));
189 }
190
191 void MDSMap::generate_test_instances(list<MDSMap*>& ls)
192 {
193 MDSMap *m = new MDSMap();
194 m->max_mds = 1;
195 m->data_pools.push_back(0);
196 m->metadata_pool = 1;
197 m->cas_pool = 2;
198 m->compat = get_mdsmap_compat_set_all();
199
200 // these aren't the defaults, just in case anybody gets confused
201 m->session_timeout = 61;
202 m->session_autoclose = 301;
203 m->max_file_size = 1<<24;
204 ls.push_back(m);
205 }
206
207 void MDSMap::print(ostream& out) const
208 {
209 out << "fs_name\t" << fs_name << "\n";
210 out << "epoch\t" << epoch << "\n";
211 out << "flags\t" << hex << flags << dec << "\n";
212 out << "created\t" << created << "\n";
213 out << "modified\t" << modified << "\n";
214 out << "tableserver\t" << tableserver << "\n";
215 out << "root\t" << root << "\n";
216 out << "session_timeout\t" << session_timeout << "\n"
217 << "session_autoclose\t" << session_autoclose << "\n";
218 out << "max_file_size\t" << max_file_size << "\n";
219 out << "last_failure\t" << last_failure << "\n"
220 << "last_failure_osd_epoch\t" << last_failure_osd_epoch << "\n";
221 out << "compat\t" << compat << "\n";
222 out << "max_mds\t" << max_mds << "\n";
223 out << "in\t" << in << "\n"
224 << "up\t" << up << "\n"
225 << "failed\t" << failed << "\n"
226 << "damaged\t" << damaged << "\n"
227 << "stopped\t" << stopped << "\n";
228 out << "data_pools\t" << data_pools << "\n";
229 out << "metadata_pool\t" << metadata_pool << "\n";
230 out << "inline_data\t" << (inline_data_enabled ? "enabled" : "disabled") << "\n";
231 out << "balancer\t" << balancer << "\n";
232 out << "standby_count_wanted\t" << std::max(0, standby_count_wanted) << "\n";
233
234 multimap< pair<mds_rank_t, unsigned>, mds_gid_t > foo;
235 for (const auto &p : mds_info) {
236 foo.insert(std::make_pair(
237 std::make_pair(p.second.rank, p.second.inc-1), p.first));
238 }
239
240 for (const auto &p : foo) {
241 const mds_info_t& info = mds_info.at(p.second);
242 info.print_summary(out);
243 out << "\n";
244 }
245 }
246
247
248
249 void MDSMap::print_summary(Formatter *f, ostream *out) const
250 {
251 map<mds_rank_t,string> by_rank;
252 map<string,int> by_state;
253
254 if (f) {
255 f->dump_unsigned("epoch", get_epoch());
256 f->dump_unsigned("up", up.size());
257 f->dump_unsigned("in", in.size());
258 f->dump_unsigned("max", max_mds);
259 } else {
260 *out << "e" << get_epoch() << ": " << up.size() << "/" << in.size() << "/" << max_mds << " up";
261 }
262
263 if (f)
264 f->open_array_section("by_rank");
265 for (const auto &p : mds_info) {
266 string s = ceph_mds_state_name(p.second.state);
267 if (p.second.laggy())
268 s += "(laggy or crashed)";
269
270 if (p.second.rank >= 0 && p.second.state != MDSMap::STATE_STANDBY_REPLAY) {
271 if (f) {
272 f->open_object_section("mds");
273 f->dump_unsigned("rank", p.second.rank);
274 f->dump_string("name", p.second.name);
275 f->dump_string("status", s);
276 f->close_section();
277 } else {
278 by_rank[p.second.rank] = p.second.name + "=" + s;
279 }
280 } else {
281 by_state[s]++;
282 }
283 }
284 if (f) {
285 f->close_section();
286 } else {
287 if (!by_rank.empty())
288 *out << " " << by_rank;
289 }
290
291 for (map<string,int>::reverse_iterator p = by_state.rbegin(); p != by_state.rend(); ++p) {
292 if (f) {
293 f->dump_unsigned(p->first.c_str(), p->second);
294 } else {
295 *out << ", " << p->second << " " << p->first;
296 }
297 }
298
299 if (!failed.empty()) {
300 if (f) {
301 f->dump_unsigned("failed", failed.size());
302 } else {
303 *out << ", " << failed.size() << " failed";
304 }
305 }
306
307 if (!damaged.empty()) {
308 if (f) {
309 f->dump_unsigned("damaged", damaged.size());
310 } else {
311 *out << ", " << damaged.size() << " damaged";
312 }
313 }
314 //if (stopped.size())
315 //out << ", " << stopped.size() << " stopped";
316 }
317
318 void MDSMap::get_health(list<pair<health_status_t,string> >& summary,
319 list<pair<health_status_t,string> > *detail) const
320 {
321 if (!failed.empty()) {
322 std::ostringstream oss;
323 oss << "mds rank"
324 << ((failed.size() > 1) ? "s ":" ")
325 << failed
326 << ((failed.size() > 1) ? " have":" has")
327 << " failed";
328 summary.push_back(make_pair(HEALTH_ERR, oss.str()));
329 if (detail) {
330 for (set<mds_rank_t>::const_iterator p = failed.begin(); p != failed.end(); ++p) {
331 std::ostringstream oss;
332 oss << "mds." << *p << " has failed";
333 detail->push_back(make_pair(HEALTH_ERR, oss.str()));
334 }
335 }
336 }
337
338 if (!damaged.empty()) {
339 std::ostringstream oss;
340 oss << "mds rank"
341 << ((damaged.size() > 1) ? "s ":" ")
342 << damaged
343 << ((damaged.size() > 1) ? " are":" is")
344 << " damaged";
345 summary.push_back(make_pair(HEALTH_ERR, oss.str()));
346 if (detail) {
347 for (set<mds_rank_t>::const_iterator p = damaged.begin(); p != damaged.end(); ++p) {
348 std::ostringstream oss;
349 oss << "mds." << *p << " is damaged";
350 detail->push_back(make_pair(HEALTH_ERR, oss.str()));
351 }
352 }
353 }
354
355 if (is_degraded()) {
356 summary.push_back(make_pair(HEALTH_WARN, "mds cluster is degraded"));
357 if (detail) {
358 detail->push_back(make_pair(HEALTH_WARN, "mds cluster is degraded"));
359 for (mds_rank_t i = mds_rank_t(0); i< get_max_mds(); i++) {
360 if (!is_up(i))
361 continue;
362 mds_gid_t gid = up.find(i)->second;
363 map<mds_gid_t,mds_info_t>::const_iterator info = mds_info.find(gid);
364 stringstream ss;
365 if (is_resolve(i))
366 ss << "mds." << info->second.name << " at " << info->second.addr << " rank " << i << " is resolving";
367 if (is_replay(i))
368 ss << "mds." << info->second.name << " at " << info->second.addr << " rank " << i << " is replaying journal";
369 if (is_rejoin(i))
370 ss << "mds." << info->second.name << " at " << info->second.addr << " rank " << i << " is rejoining";
371 if (is_reconnect(i))
372 ss << "mds." << info->second.name << " at " << info->second.addr << " rank " << i << " is reconnecting to clients";
373 if (ss.str().length())
374 detail->push_back(make_pair(HEALTH_WARN, ss.str()));
375 }
376 }
377 }
378
379 map<mds_gid_t, mds_info_t>::const_iterator m_end = mds_info.end();
380 set<string> laggy;
381 for (const auto &u : up) {
382 map<mds_gid_t, mds_info_t>::const_iterator m = mds_info.find(u.second);
383 if (m == m_end) {
384 std::cerr << "Up rank " << u.first << " GID " << u.second << " not found!" << std::endl;
385 }
386 assert(m != m_end);
387 const mds_info_t &mds_info(m->second);
388 if (mds_info.laggy()) {
389 laggy.insert(mds_info.name);
390 if (detail) {
391 std::ostringstream oss;
392 oss << "mds." << mds_info.name << " at " << mds_info.addr << " is laggy/unresponsive";
393 detail->push_back(make_pair(HEALTH_WARN, oss.str()));
394 }
395 }
396 }
397
398 if (!laggy.empty()) {
399 std::ostringstream oss;
400 oss << "mds " << laggy
401 << ((laggy.size() > 1) ? " are":" is")
402 << " laggy";
403 summary.push_back(make_pair(HEALTH_WARN, oss.str()));
404 }
405 }
406
407 void MDSMap::mds_info_t::encode_versioned(bufferlist& bl, uint64_t features) const
408 {
409 ENCODE_START(7, 4, bl);
410 ::encode(global_id, bl);
411 ::encode(name, bl);
412 ::encode(rank, bl);
413 ::encode(inc, bl);
414 ::encode((int32_t)state, bl);
415 ::encode(state_seq, bl);
416 ::encode(addr, bl, features);
417 ::encode(laggy_since, bl);
418 ::encode(standby_for_rank, bl);
419 ::encode(standby_for_name, bl);
420 ::encode(export_targets, bl);
421 ::encode(mds_features, bl);
422 ::encode(standby_for_fscid, bl);
423 ::encode(standby_replay, bl);
424 ENCODE_FINISH(bl);
425 }
426
427 void MDSMap::mds_info_t::encode_unversioned(bufferlist& bl) const
428 {
429 __u8 struct_v = 3;
430 ::encode(struct_v, bl);
431 ::encode(global_id, bl);
432 ::encode(name, bl);
433 ::encode(rank, bl);
434 ::encode(inc, bl);
435 ::encode((int32_t)state, bl);
436 ::encode(state_seq, bl);
437 ::encode(addr, bl, 0);
438 ::encode(laggy_since, bl);
439 ::encode(standby_for_rank, bl);
440 ::encode(standby_for_name, bl);
441 ::encode(export_targets, bl);
442 }
443
444 void MDSMap::mds_info_t::decode(bufferlist::iterator& bl)
445 {
446 DECODE_START_LEGACY_COMPAT_LEN(7, 4, 4, bl);
447 ::decode(global_id, bl);
448 ::decode(name, bl);
449 ::decode(rank, bl);
450 ::decode(inc, bl);
451 ::decode((int32_t&)(state), bl);
452 ::decode(state_seq, bl);
453 ::decode(addr, bl);
454 ::decode(laggy_since, bl);
455 ::decode(standby_for_rank, bl);
456 ::decode(standby_for_name, bl);
457 if (struct_v >= 2)
458 ::decode(export_targets, bl);
459 if (struct_v >= 5)
460 ::decode(mds_features, bl);
461 if (struct_v >= 6) {
462 ::decode(standby_for_fscid, bl);
463 }
464 if (struct_v >= 7) {
465 ::decode(standby_replay, bl);
466 }
467 DECODE_FINISH(bl);
468 }
469
470
471
472 void MDSMap::encode(bufferlist& bl, uint64_t features) const
473 {
474 std::map<mds_rank_t,int32_t> inc; // Legacy field, fake it so that
475 // old-mon peers have something sane
476 // during upgrade
477 for (const auto rank : in) {
478 inc.insert(std::make_pair(rank, epoch));
479 }
480
481 if ((features & CEPH_FEATURE_PGID64) == 0) {
482 __u16 v = 2;
483 ::encode(v, bl);
484 ::encode(epoch, bl);
485 ::encode(flags, bl);
486 ::encode(last_failure, bl);
487 ::encode(root, bl);
488 ::encode(session_timeout, bl);
489 ::encode(session_autoclose, bl);
490 ::encode(max_file_size, bl);
491 ::encode(max_mds, bl);
492 __u32 n = mds_info.size();
493 ::encode(n, bl);
494 for (map<mds_gid_t, mds_info_t>::const_iterator i = mds_info.begin();
495 i != mds_info.end(); ++i) {
496 ::encode(i->first, bl);
497 ::encode(i->second, bl, features);
498 }
499 n = data_pools.size();
500 ::encode(n, bl);
501 for (const auto p: data_pools) {
502 n = p;
503 ::encode(n, bl);
504 }
505
506 int32_t m = cas_pool;
507 ::encode(m, bl);
508 return;
509 } else if ((features & CEPH_FEATURE_MDSENC) == 0) {
510 __u16 v = 3;
511 ::encode(v, bl);
512 ::encode(epoch, bl);
513 ::encode(flags, bl);
514 ::encode(last_failure, bl);
515 ::encode(root, bl);
516 ::encode(session_timeout, bl);
517 ::encode(session_autoclose, bl);
518 ::encode(max_file_size, bl);
519 ::encode(max_mds, bl);
520 __u32 n = mds_info.size();
521 ::encode(n, bl);
522 for (map<mds_gid_t, mds_info_t>::const_iterator i = mds_info.begin();
523 i != mds_info.end(); ++i) {
524 ::encode(i->first, bl);
525 ::encode(i->second, bl, features);
526 }
527 ::encode(data_pools, bl);
528 ::encode(cas_pool, bl);
529
530 // kclient ignores everything from here
531 __u16 ev = 5;
532 ::encode(ev, bl);
533 ::encode(compat, bl);
534 ::encode(metadata_pool, bl);
535 ::encode(created, bl);
536 ::encode(modified, bl);
537 ::encode(tableserver, bl);
538 ::encode(in, bl);
539 ::encode(inc, bl);
540 ::encode(up, bl);
541 ::encode(failed, bl);
542 ::encode(stopped, bl);
543 ::encode(last_failure_osd_epoch, bl);
544 return;
545 }
546
547 ENCODE_START(5, 4, bl);
548 ::encode(epoch, bl);
549 ::encode(flags, bl);
550 ::encode(last_failure, bl);
551 ::encode(root, bl);
552 ::encode(session_timeout, bl);
553 ::encode(session_autoclose, bl);
554 ::encode(max_file_size, bl);
555 ::encode(max_mds, bl);
556 ::encode(mds_info, bl, features);
557 ::encode(data_pools, bl);
558 ::encode(cas_pool, bl);
559
560 // kclient ignores everything from here
561 __u16 ev = 12;
562 ::encode(ev, bl);
563 ::encode(compat, bl);
564 ::encode(metadata_pool, bl);
565 ::encode(created, bl);
566 ::encode(modified, bl);
567 ::encode(tableserver, bl);
568 ::encode(in, bl);
569 ::encode(inc, bl);
570 ::encode(up, bl);
571 ::encode(failed, bl);
572 ::encode(stopped, bl);
573 ::encode(last_failure_osd_epoch, bl);
574 ::encode(ever_allowed_features, bl);
575 ::encode(explicitly_allowed_features, bl);
576 ::encode(inline_data_enabled, bl);
577 ::encode(enabled, bl);
578 ::encode(fs_name, bl);
579 ::encode(damaged, bl);
580 ::encode(balancer, bl);
581 ::encode(standby_count_wanted, bl);
582 ENCODE_FINISH(bl);
583 }
584
585 void MDSMap::decode(bufferlist::iterator& p)
586 {
587 std::map<mds_rank_t,int32_t> inc; // Legacy field, parse and drop
588
589 cached_up_features = 0;
590 DECODE_START_LEGACY_COMPAT_LEN_16(5, 4, 4, p);
591 ::decode(epoch, p);
592 ::decode(flags, p);
593 ::decode(last_failure, p);
594 ::decode(root, p);
595 ::decode(session_timeout, p);
596 ::decode(session_autoclose, p);
597 ::decode(max_file_size, p);
598 ::decode(max_mds, p);
599 ::decode(mds_info, p);
600 if (struct_v < 3) {
601 __u32 n;
602 ::decode(n, p);
603 while (n--) {
604 __u32 m;
605 ::decode(m, p);
606 data_pools.push_back(m);
607 }
608 __s32 s;
609 ::decode(s, p);
610 cas_pool = s;
611 } else {
612 ::decode(data_pools, p);
613 ::decode(cas_pool, p);
614 }
615
616 // kclient ignores everything from here
617 __u16 ev = 1;
618 if (struct_v >= 2)
619 ::decode(ev, p);
620 if (ev >= 3)
621 ::decode(compat, p);
622 else
623 compat = get_mdsmap_compat_set_base();
624 if (ev < 5) {
625 __u32 n;
626 ::decode(n, p);
627 metadata_pool = n;
628 } else {
629 ::decode(metadata_pool, p);
630 }
631 ::decode(created, p);
632 ::decode(modified, p);
633 ::decode(tableserver, p);
634 ::decode(in, p);
635 ::decode(inc, p);
636 ::decode(up, p);
637 ::decode(failed, p);
638 ::decode(stopped, p);
639 if (ev >= 4)
640 ::decode(last_failure_osd_epoch, p);
641 if (ev >= 6) {
642 if (ev < 10) {
643 // previously this was a bool about snaps, not a flag map
644 bool flag;
645 ::decode(flag, p);
646 ever_allowed_features = flag ? CEPH_MDSMAP_ALLOW_SNAPS : 0;
647 ever_allowed_features |= CEPH_MDSMAP_ALLOW_MULTIMDS|CEPH_MDSMAP_ALLOW_DIRFRAGS;
648 ::decode(flag, p);
649 explicitly_allowed_features = flag ? CEPH_MDSMAP_ALLOW_SNAPS : 0;
650 if (max_mds > 1) {
651 set_multimds_allowed();
652 }
653 } else {
654 ::decode(ever_allowed_features, p);
655 ::decode(explicitly_allowed_features, p);
656 }
657 } else {
658 ever_allowed_features = CEPH_MDSMAP_ALLOW_CLASSICS;
659 explicitly_allowed_features = 0;
660 if (max_mds > 1) {
661 set_multimds_allowed();
662 }
663 }
664 if (ev >= 7)
665 ::decode(inline_data_enabled, p);
666
667 if (ev >= 8) {
668 assert(struct_v >= 5);
669 ::decode(enabled, p);
670 ::decode(fs_name, p);
671 } else {
672 if (epoch > 1) {
673 // If an MDS has ever been started, epoch will be greater than 1,
674 // assume filesystem is enabled.
675 enabled = true;
676 } else {
677 // Upgrading from a cluster that never used an MDS, switch off
678 // filesystem until it's explicitly enabled.
679 enabled = false;
680 }
681 }
682
683 if (ev >= 9) {
684 ::decode(damaged, p);
685 }
686
687 if (ev >= 11) {
688 ::decode(balancer, p);
689 }
690
691 if (ev >= 12) {
692 ::decode(standby_count_wanted, p);
693 }
694
695 DECODE_FINISH(p);
696 }
697
698 MDSMap::availability_t MDSMap::is_cluster_available() const
699 {
700 if (epoch == 0) {
701 // If I'm a client, this means I'm looking at an MDSMap instance
702 // that was never actually initialized from the mons. Client should
703 // wait.
704 return TRANSIENT_UNAVAILABLE;
705 }
706
707 // If a rank is marked damage (unavailable until operator intervenes)
708 if (damaged.size()) {
709 return STUCK_UNAVAILABLE;
710 }
711
712 // If no ranks are created (filesystem not initialized)
713 if (in.empty()) {
714 return STUCK_UNAVAILABLE;
715 }
716
717 for (const auto rank : in) {
718 if (up.count(rank) && mds_info.at(up.at(rank)).laggy()) {
719 // This might only be transient, but because we can't see
720 // standbys, we have no way of knowing whether there is a
721 // standby available to replace the laggy guy.
722 return STUCK_UNAVAILABLE;
723 }
724 }
725
726 if (get_num_mds(CEPH_MDS_STATE_ACTIVE) > 0) {
727 // Nobody looks stuck, so indicate to client they should go ahead
728 // and try mounting if anybody is active. This may include e.g.
729 // one MDS failing over and another active: the client should
730 // proceed to start talking to the active one and let the
731 // transiently-unavailable guy catch up later.
732 return AVAILABLE;
733 } else {
734 // Nothing indicating we were stuck, but nobody active (yet)
735 //return TRANSIENT_UNAVAILABLE;
736
737 // Because we don't have standbys in the MDSMap any more, we can't
738 // reliably indicate transient vs. stuck, so always say stuck so
739 // that the client doesn't block.
740 return STUCK_UNAVAILABLE;
741 }
742 }
743
744 bool MDSMap::state_transition_valid(DaemonState prev, DaemonState next)
745 {
746 bool state_valid = true;
747 if (next != prev) {
748 if (prev == MDSMap::STATE_REPLAY) {
749 if (next != MDSMap::STATE_RESOLVE && next != MDSMap::STATE_RECONNECT) {
750 state_valid = false;
751 }
752 } else if (prev == MDSMap::STATE_REJOIN) {
753 if (next != MDSMap::STATE_ACTIVE
754 && next != MDSMap::STATE_CLIENTREPLAY
755 && next != MDSMap::STATE_STOPPED) {
756 state_valid = false;
757 }
758 } else if (prev >= MDSMap::STATE_RECONNECT && prev < MDSMap::STATE_ACTIVE) {
759 // Once I have entered replay, the only allowable transitions are to
760 // the next next along in the sequence.
761 if (next != prev + 1) {
762 state_valid = false;
763 }
764 }
765 }
766
767 return state_valid;
768 }
769
770 bool MDSMap::check_health(mds_rank_t standby_daemon_count)
771 {
772 std::set<mds_rank_t> standbys;
773 get_standby_replay_mds_set(standbys);
774 std::set<mds_rank_t> actives;
775 get_active_mds_set(actives);
776 mds_rank_t standbys_avail = (mds_rank_t)standbys.size()+standby_daemon_count;
777
778 /* If there are standby daemons available/replaying and
779 * standby_count_wanted is unset (default), then we set it to 1. This will
780 * happen during health checks by the mons. Also, during initial creation
781 * of the FS we will have no actives so we don't want to change the default
782 * yet.
783 */
784 if (standby_count_wanted == -1 && actives.size() > 0 && standbys_avail > 0) {
785 set_standby_count_wanted(1);
786 return true;
787 }
788 return false;
789 }