]> git.proxmox.com Git - ceph.git/blame - ceph/src/mds/MDSMap.cc
update sources to v12.1.1
[ceph.git] / ceph / src / mds / MDSMap.cc
CommitLineData
7c673cae
FG
1// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2// vim: ts=8 sw=2 smarttab
3/*
4 * Ceph - scalable distributed file system
5 *
6 * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
7 *
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
12 *
13 */
14
15
16#include "MDSMap.h"
17
18#include <sstream>
19using std::stringstream;
20
224ce89b
WB
21#include "mon/health_check.h"
22
7c673cae
FG
23
24// features
25CompatSet get_mdsmap_compat_set_all() {
26 CompatSet::FeatureSet feature_compat;
27 CompatSet::FeatureSet feature_ro_compat;
28 CompatSet::FeatureSet feature_incompat;
29 feature_incompat.insert(MDS_FEATURE_INCOMPAT_BASE);
30 feature_incompat.insert(MDS_FEATURE_INCOMPAT_CLIENTRANGES);
31 feature_incompat.insert(MDS_FEATURE_INCOMPAT_FILELAYOUT);
32 feature_incompat.insert(MDS_FEATURE_INCOMPAT_DIRINODE);
33 feature_incompat.insert(MDS_FEATURE_INCOMPAT_ENCODING);
34 feature_incompat.insert(MDS_FEATURE_INCOMPAT_OMAPDIRFRAG);
35 feature_incompat.insert(MDS_FEATURE_INCOMPAT_INLINE);
36 feature_incompat.insert(MDS_FEATURE_INCOMPAT_NOANCHOR);
37 feature_incompat.insert(MDS_FEATURE_INCOMPAT_FILE_LAYOUT_V2);
38
39 return CompatSet(feature_compat, feature_ro_compat, feature_incompat);
40}
41
42CompatSet get_mdsmap_compat_set_default() {
43 CompatSet::FeatureSet feature_compat;
44 CompatSet::FeatureSet feature_ro_compat;
45 CompatSet::FeatureSet feature_incompat;
46 feature_incompat.insert(MDS_FEATURE_INCOMPAT_BASE);
47 feature_incompat.insert(MDS_FEATURE_INCOMPAT_CLIENTRANGES);
48 feature_incompat.insert(MDS_FEATURE_INCOMPAT_FILELAYOUT);
49 feature_incompat.insert(MDS_FEATURE_INCOMPAT_DIRINODE);
50 feature_incompat.insert(MDS_FEATURE_INCOMPAT_ENCODING);
51 feature_incompat.insert(MDS_FEATURE_INCOMPAT_OMAPDIRFRAG);
52 feature_incompat.insert(MDS_FEATURE_INCOMPAT_NOANCHOR);
53 feature_incompat.insert(MDS_FEATURE_INCOMPAT_FILE_LAYOUT_V2);
54
55 return CompatSet(feature_compat, feature_ro_compat, feature_incompat);
56}
57
58// base (pre v0.20)
59CompatSet get_mdsmap_compat_set_base() {
60 CompatSet::FeatureSet feature_compat_base;
61 CompatSet::FeatureSet feature_incompat_base;
62 feature_incompat_base.insert(MDS_FEATURE_INCOMPAT_BASE);
63 CompatSet::FeatureSet feature_ro_compat_base;
64
65 return CompatSet(feature_compat_base, feature_ro_compat_base, feature_incompat_base);
66}
67
68void MDSMap::mds_info_t::dump(Formatter *f) const
69{
70 f->dump_unsigned("gid", global_id);
71 f->dump_string("name", name);
72 f->dump_int("rank", rank);
73 f->dump_int("incarnation", inc);
74 f->dump_stream("state") << ceph_mds_state_name(state);
75 f->dump_int("state_seq", state_seq);
76 f->dump_stream("addr") << addr;
77 if (laggy_since != utime_t())
78 f->dump_stream("laggy_since") << laggy_since;
79
80 f->dump_int("standby_for_rank", standby_for_rank);
81 f->dump_int("standby_for_fscid", standby_for_fscid);
82 f->dump_string("standby_for_name", standby_for_name);
83 f->dump_bool("standby_replay", standby_replay);
84 f->open_array_section("export_targets");
85 for (set<mds_rank_t>::iterator p = export_targets.begin();
86 p != export_targets.end(); ++p) {
87 f->dump_int("mds", *p);
88 }
89 f->close_section();
90 f->dump_unsigned("features", mds_features);
91}
92
93void MDSMap::mds_info_t::print_summary(ostream &out) const
94{
95 out << global_id << ":\t"
96 << addr
97 << " '" << name << "'"
98 << " mds." << rank
99 << "." << inc
100 << " " << ceph_mds_state_name(state)
101 << " seq " << state_seq;
102 if (laggy()) {
103 out << " laggy since " << laggy_since;
104 }
105 if (standby_for_rank != -1 ||
106 !standby_for_name.empty()) {
107 out << " (standby for";
108 //if (standby_for_rank >= 0)
109 out << " rank " << standby_for_rank;
110 if (!standby_for_name.empty()) {
111 out << " '" << standby_for_name << "'";
112 }
113 out << ")";
114 }
115 if (!export_targets.empty()) {
116 out << " export_targets=" << export_targets;
117 }
118}
119
120void MDSMap::mds_info_t::generate_test_instances(list<mds_info_t*>& ls)
121{
122 mds_info_t *sample = new mds_info_t();
123 ls.push_back(sample);
124 sample = new mds_info_t();
125 sample->global_id = 1;
126 sample->name = "test_instance";
127 sample->rank = 0;
128 ls.push_back(sample);
129}
130
131void MDSMap::dump(Formatter *f) const
132{
133 f->dump_int("epoch", epoch);
134 f->dump_unsigned("flags", flags);
135 f->dump_unsigned("ever_allowed_features", ever_allowed_features);
136 f->dump_unsigned("explicitly_allowed_features", explicitly_allowed_features);
137 f->dump_stream("created") << created;
138 f->dump_stream("modified") << modified;
139 f->dump_int("tableserver", tableserver);
140 f->dump_int("root", root);
141 f->dump_int("session_timeout", session_timeout);
142 f->dump_int("session_autoclose", session_autoclose);
143 f->dump_int("max_file_size", max_file_size);
144 f->dump_int("last_failure", last_failure);
145 f->dump_int("last_failure_osd_epoch", last_failure_osd_epoch);
146 f->open_object_section("compat");
147 compat.dump(f);
148 f->close_section();
149 f->dump_int("max_mds", max_mds);
150 f->open_array_section("in");
151 for (set<mds_rank_t>::const_iterator p = in.begin(); p != in.end(); ++p)
152 f->dump_int("mds", *p);
153 f->close_section();
154 f->open_object_section("up");
155 for (map<mds_rank_t,mds_gid_t>::const_iterator p = up.begin(); p != up.end(); ++p) {
156 char s[14];
157 sprintf(s, "mds_%d", int(p->first));
158 f->dump_int(s, p->second);
159 }
160 f->close_section();
161 f->open_array_section("failed");
162 for (set<mds_rank_t>::const_iterator p = failed.begin(); p != failed.end(); ++p)
163 f->dump_int("mds", *p);
164 f->close_section();
165 f->open_array_section("damaged");
166 for (set<mds_rank_t>::const_iterator p = damaged.begin(); p != damaged.end(); ++p)
167 f->dump_int("mds", *p);
168 f->close_section();
169 f->open_array_section("stopped");
170 for (set<mds_rank_t>::const_iterator p = stopped.begin(); p != stopped.end(); ++p)
171 f->dump_int("mds", *p);
172 f->close_section();
173 f->open_object_section("info");
174 for (map<mds_gid_t,mds_info_t>::const_iterator p = mds_info.begin(); p != mds_info.end(); ++p) {
175 char s[25]; // 'gid_' + len(str(ULLONG_MAX)) + '\0'
176 sprintf(s, "gid_%llu", (long long unsigned)p->first);
177 f->open_object_section(s);
178 p->second.dump(f);
179 f->close_section();
180 }
181 f->close_section();
182 f->open_array_section("data_pools");
31f18b77
FG
183 for (const auto p: data_pools)
184 f->dump_int("pool", p);
7c673cae
FG
185 f->close_section();
186 f->dump_int("metadata_pool", metadata_pool);
187 f->dump_bool("enabled", enabled);
188 f->dump_string("fs_name", fs_name);
189 f->dump_string("balancer", balancer);
190 f->dump_int("standby_count_wanted", std::max(0, standby_count_wanted));
191}
192
193void MDSMap::generate_test_instances(list<MDSMap*>& ls)
194{
195 MDSMap *m = new MDSMap();
196 m->max_mds = 1;
31f18b77 197 m->data_pools.push_back(0);
7c673cae
FG
198 m->metadata_pool = 1;
199 m->cas_pool = 2;
200 m->compat = get_mdsmap_compat_set_all();
201
202 // these aren't the defaults, just in case anybody gets confused
203 m->session_timeout = 61;
204 m->session_autoclose = 301;
205 m->max_file_size = 1<<24;
206 ls.push_back(m);
207}
208
209void MDSMap::print(ostream& out) const
210{
211 out << "fs_name\t" << fs_name << "\n";
212 out << "epoch\t" << epoch << "\n";
213 out << "flags\t" << hex << flags << dec << "\n";
214 out << "created\t" << created << "\n";
215 out << "modified\t" << modified << "\n";
216 out << "tableserver\t" << tableserver << "\n";
217 out << "root\t" << root << "\n";
218 out << "session_timeout\t" << session_timeout << "\n"
219 << "session_autoclose\t" << session_autoclose << "\n";
220 out << "max_file_size\t" << max_file_size << "\n";
221 out << "last_failure\t" << last_failure << "\n"
222 << "last_failure_osd_epoch\t" << last_failure_osd_epoch << "\n";
223 out << "compat\t" << compat << "\n";
224 out << "max_mds\t" << max_mds << "\n";
225 out << "in\t" << in << "\n"
226 << "up\t" << up << "\n"
227 << "failed\t" << failed << "\n"
228 << "damaged\t" << damaged << "\n"
229 << "stopped\t" << stopped << "\n";
230 out << "data_pools\t" << data_pools << "\n";
231 out << "metadata_pool\t" << metadata_pool << "\n";
232 out << "inline_data\t" << (inline_data_enabled ? "enabled" : "disabled") << "\n";
233 out << "balancer\t" << balancer << "\n";
234 out << "standby_count_wanted\t" << std::max(0, standby_count_wanted) << "\n";
235
236 multimap< pair<mds_rank_t, unsigned>, mds_gid_t > foo;
237 for (const auto &p : mds_info) {
238 foo.insert(std::make_pair(
239 std::make_pair(p.second.rank, p.second.inc-1), p.first));
240 }
241
242 for (const auto &p : foo) {
243 const mds_info_t& info = mds_info.at(p.second);
244 info.print_summary(out);
245 out << "\n";
246 }
247}
248
249
250
251void MDSMap::print_summary(Formatter *f, ostream *out) const
252{
253 map<mds_rank_t,string> by_rank;
254 map<string,int> by_state;
255
256 if (f) {
257 f->dump_unsigned("epoch", get_epoch());
258 f->dump_unsigned("up", up.size());
259 f->dump_unsigned("in", in.size());
260 f->dump_unsigned("max", max_mds);
261 } else {
262 *out << "e" << get_epoch() << ": " << up.size() << "/" << in.size() << "/" << max_mds << " up";
263 }
264
265 if (f)
266 f->open_array_section("by_rank");
267 for (const auto &p : mds_info) {
268 string s = ceph_mds_state_name(p.second.state);
269 if (p.second.laggy())
270 s += "(laggy or crashed)";
271
272 if (p.second.rank >= 0 && p.second.state != MDSMap::STATE_STANDBY_REPLAY) {
273 if (f) {
274 f->open_object_section("mds");
275 f->dump_unsigned("rank", p.second.rank);
276 f->dump_string("name", p.second.name);
277 f->dump_string("status", s);
278 f->close_section();
279 } else {
280 by_rank[p.second.rank] = p.second.name + "=" + s;
281 }
282 } else {
283 by_state[s]++;
284 }
285 }
286 if (f) {
287 f->close_section();
288 } else {
289 if (!by_rank.empty())
290 *out << " " << by_rank;
291 }
292
293 for (map<string,int>::reverse_iterator p = by_state.rbegin(); p != by_state.rend(); ++p) {
294 if (f) {
295 f->dump_unsigned(p->first.c_str(), p->second);
296 } else {
297 *out << ", " << p->second << " " << p->first;
298 }
299 }
300
301 if (!failed.empty()) {
302 if (f) {
303 f->dump_unsigned("failed", failed.size());
304 } else {
305 *out << ", " << failed.size() << " failed";
306 }
307 }
308
309 if (!damaged.empty()) {
310 if (f) {
311 f->dump_unsigned("damaged", damaged.size());
312 } else {
313 *out << ", " << damaged.size() << " damaged";
314 }
315 }
316 //if (stopped.size())
317 //out << ", " << stopped.size() << " stopped";
318}
319
320void MDSMap::get_health(list<pair<health_status_t,string> >& summary,
321 list<pair<health_status_t,string> > *detail) const
322{
323 if (!failed.empty()) {
324 std::ostringstream oss;
325 oss << "mds rank"
326 << ((failed.size() > 1) ? "s ":" ")
327 << failed
328 << ((failed.size() > 1) ? " have":" has")
329 << " failed";
330 summary.push_back(make_pair(HEALTH_ERR, oss.str()));
331 if (detail) {
332 for (set<mds_rank_t>::const_iterator p = failed.begin(); p != failed.end(); ++p) {
333 std::ostringstream oss;
334 oss << "mds." << *p << " has failed";
335 detail->push_back(make_pair(HEALTH_ERR, oss.str()));
336 }
337 }
338 }
339
340 if (!damaged.empty()) {
341 std::ostringstream oss;
342 oss << "mds rank"
343 << ((damaged.size() > 1) ? "s ":" ")
344 << damaged
345 << ((damaged.size() > 1) ? " are":" is")
346 << " damaged";
347 summary.push_back(make_pair(HEALTH_ERR, oss.str()));
348 if (detail) {
349 for (set<mds_rank_t>::const_iterator p = damaged.begin(); p != damaged.end(); ++p) {
350 std::ostringstream oss;
351 oss << "mds." << *p << " is damaged";
352 detail->push_back(make_pair(HEALTH_ERR, oss.str()));
353 }
354 }
355 }
356
357 if (is_degraded()) {
358 summary.push_back(make_pair(HEALTH_WARN, "mds cluster is degraded"));
359 if (detail) {
360 detail->push_back(make_pair(HEALTH_WARN, "mds cluster is degraded"));
361 for (mds_rank_t i = mds_rank_t(0); i< get_max_mds(); i++) {
362 if (!is_up(i))
363 continue;
364 mds_gid_t gid = up.find(i)->second;
365 map<mds_gid_t,mds_info_t>::const_iterator info = mds_info.find(gid);
366 stringstream ss;
367 if (is_resolve(i))
368 ss << "mds." << info->second.name << " at " << info->second.addr << " rank " << i << " is resolving";
369 if (is_replay(i))
370 ss << "mds." << info->second.name << " at " << info->second.addr << " rank " << i << " is replaying journal";
371 if (is_rejoin(i))
372 ss << "mds." << info->second.name << " at " << info->second.addr << " rank " << i << " is rejoining";
373 if (is_reconnect(i))
374 ss << "mds." << info->second.name << " at " << info->second.addr << " rank " << i << " is reconnecting to clients";
375 if (ss.str().length())
376 detail->push_back(make_pair(HEALTH_WARN, ss.str()));
377 }
378 }
379 }
380
381 map<mds_gid_t, mds_info_t>::const_iterator m_end = mds_info.end();
382 set<string> laggy;
383 for (const auto &u : up) {
384 map<mds_gid_t, mds_info_t>::const_iterator m = mds_info.find(u.second);
385 if (m == m_end) {
386 std::cerr << "Up rank " << u.first << " GID " << u.second << " not found!" << std::endl;
387 }
388 assert(m != m_end);
389 const mds_info_t &mds_info(m->second);
390 if (mds_info.laggy()) {
391 laggy.insert(mds_info.name);
392 if (detail) {
393 std::ostringstream oss;
394 oss << "mds." << mds_info.name << " at " << mds_info.addr << " is laggy/unresponsive";
395 detail->push_back(make_pair(HEALTH_WARN, oss.str()));
396 }
397 }
398 }
399
400 if (!laggy.empty()) {
401 std::ostringstream oss;
402 oss << "mds " << laggy
403 << ((laggy.size() > 1) ? " are":" is")
404 << " laggy";
405 summary.push_back(make_pair(HEALTH_WARN, oss.str()));
406 }
407}
408
224ce89b
WB
409void MDSMap::get_health_checks(health_check_map_t *checks) const
410{
411 // FS_WITH_FAILED_MDS
412 // MDS_FAILED
413 if (!failed.empty()) {
414 health_check_t& fscheck = checks->add(
415 "FS_WITH_FAILED_MDS", HEALTH_WARN,
416 "%num% filesystem%plurals% %isorare% have a failed mds daemon");
417 ostringstream ss;
418 ss << "fs " << fs_name << " has " << failed.size() << " failed mds"
419 << (failed.size() > 1 ? "s" : "");
420 fscheck.detail.push_back(ss.str());
421
422 health_check_t& check = checks->add("MDS_FAILED", HEALTH_ERR,
423 "%num% mds daemon%plurals% down");
424 for (auto p : failed) {
425 std::ostringstream oss;
426 oss << "fs " << fs_name << " mds." << p << " has failed";
427 check.detail.push_back(oss.str());
428 }
429 }
430
431 // MDS_DAMAGED
432 if (!damaged.empty()) {
433 health_check_t& check = checks->add("MDS_DAMAGED", HEALTH_ERR,
434 "%num% mds daemon%plurals% damaged");
435 for (auto p : damaged) {
436 std::ostringstream oss;
437 oss << "fs " << fs_name << " mds." << p << " is damaged";
438 check.detail.push_back(oss.str());
439 }
440 }
441
442 // FS_DEGRADED
443 // MDS_DEGRADED
444 if (is_degraded()) {
445 health_check_t& fscheck = checks->add(
446 "FS_DEGRADED", HEALTH_WARN,
447 "%num% filesystem%plurals% %isorare% degraded");
448 ostringstream ss;
449 ss << "fs " << fs_name << " is degraded";
450 fscheck.detail.push_back(ss.str());
451
452 list<string> detail;
453 for (mds_rank_t i = mds_rank_t(0); i< get_max_mds(); i++) {
454 if (!is_up(i))
455 continue;
456 mds_gid_t gid = up.find(i)->second;
457 map<mds_gid_t,mds_info_t>::const_iterator info = mds_info.find(gid);
458 stringstream ss;
459 ss << "fs " << fs_name << " mds." << info->second.name << " at "
460 << info->second.addr << " rank " << i;
461 if (is_resolve(i))
462 ss << " is resolving";
463 if (is_replay(i))
464 ss << " is replaying journal";
465 if (is_rejoin(i))
466 ss << " is rejoining";
467 if (is_reconnect(i))
468 ss << " is reconnecting to clients";
469 if (ss.str().length())
470 detail.push_back(ss.str());
471 }
472 if (!detail.empty()) {
473 health_check_t& check = checks->add(
474 "MDS_DEGRADED", HEALTH_WARN,
475 "%num% mds daemon%plurals% %isorare% degraded");
476 check.detail.insert(check.detail.end(), detail.begin(), detail.end());
477 }
478 }
479}
480
7c673cae
FG
481void MDSMap::mds_info_t::encode_versioned(bufferlist& bl, uint64_t features) const
482{
483 ENCODE_START(7, 4, bl);
484 ::encode(global_id, bl);
485 ::encode(name, bl);
486 ::encode(rank, bl);
487 ::encode(inc, bl);
488 ::encode((int32_t)state, bl);
489 ::encode(state_seq, bl);
490 ::encode(addr, bl, features);
491 ::encode(laggy_since, bl);
492 ::encode(standby_for_rank, bl);
493 ::encode(standby_for_name, bl);
494 ::encode(export_targets, bl);
495 ::encode(mds_features, bl);
496 ::encode(standby_for_fscid, bl);
497 ::encode(standby_replay, bl);
498 ENCODE_FINISH(bl);
499}
500
501void MDSMap::mds_info_t::encode_unversioned(bufferlist& bl) const
502{
503 __u8 struct_v = 3;
504 ::encode(struct_v, bl);
505 ::encode(global_id, bl);
506 ::encode(name, bl);
507 ::encode(rank, bl);
508 ::encode(inc, bl);
509 ::encode((int32_t)state, bl);
510 ::encode(state_seq, bl);
511 ::encode(addr, bl, 0);
512 ::encode(laggy_since, bl);
513 ::encode(standby_for_rank, bl);
514 ::encode(standby_for_name, bl);
515 ::encode(export_targets, bl);
516}
517
518void MDSMap::mds_info_t::decode(bufferlist::iterator& bl)
519{
520 DECODE_START_LEGACY_COMPAT_LEN(7, 4, 4, bl);
521 ::decode(global_id, bl);
522 ::decode(name, bl);
523 ::decode(rank, bl);
524 ::decode(inc, bl);
525 ::decode((int32_t&)(state), bl);
526 ::decode(state_seq, bl);
527 ::decode(addr, bl);
528 ::decode(laggy_since, bl);
529 ::decode(standby_for_rank, bl);
530 ::decode(standby_for_name, bl);
531 if (struct_v >= 2)
532 ::decode(export_targets, bl);
533 if (struct_v >= 5)
534 ::decode(mds_features, bl);
535 if (struct_v >= 6) {
536 ::decode(standby_for_fscid, bl);
537 }
538 if (struct_v >= 7) {
539 ::decode(standby_replay, bl);
540 }
541 DECODE_FINISH(bl);
542}
543
544
545
546void MDSMap::encode(bufferlist& bl, uint64_t features) const
547{
548 std::map<mds_rank_t,int32_t> inc; // Legacy field, fake it so that
549 // old-mon peers have something sane
550 // during upgrade
551 for (const auto rank : in) {
552 inc.insert(std::make_pair(rank, epoch));
553 }
554
555 if ((features & CEPH_FEATURE_PGID64) == 0) {
556 __u16 v = 2;
557 ::encode(v, bl);
558 ::encode(epoch, bl);
559 ::encode(flags, bl);
560 ::encode(last_failure, bl);
561 ::encode(root, bl);
562 ::encode(session_timeout, bl);
563 ::encode(session_autoclose, bl);
564 ::encode(max_file_size, bl);
565 ::encode(max_mds, bl);
566 __u32 n = mds_info.size();
567 ::encode(n, bl);
568 for (map<mds_gid_t, mds_info_t>::const_iterator i = mds_info.begin();
569 i != mds_info.end(); ++i) {
570 ::encode(i->first, bl);
571 ::encode(i->second, bl, features);
572 }
573 n = data_pools.size();
574 ::encode(n, bl);
31f18b77
FG
575 for (const auto p: data_pools) {
576 n = p;
7c673cae
FG
577 ::encode(n, bl);
578 }
579
580 int32_t m = cas_pool;
581 ::encode(m, bl);
582 return;
583 } else if ((features & CEPH_FEATURE_MDSENC) == 0) {
584 __u16 v = 3;
585 ::encode(v, bl);
586 ::encode(epoch, bl);
587 ::encode(flags, bl);
588 ::encode(last_failure, bl);
589 ::encode(root, bl);
590 ::encode(session_timeout, bl);
591 ::encode(session_autoclose, bl);
592 ::encode(max_file_size, bl);
593 ::encode(max_mds, bl);
594 __u32 n = mds_info.size();
595 ::encode(n, bl);
596 for (map<mds_gid_t, mds_info_t>::const_iterator i = mds_info.begin();
597 i != mds_info.end(); ++i) {
598 ::encode(i->first, bl);
599 ::encode(i->second, bl, features);
600 }
601 ::encode(data_pools, bl);
602 ::encode(cas_pool, bl);
603
604 // kclient ignores everything from here
605 __u16 ev = 5;
606 ::encode(ev, bl);
607 ::encode(compat, bl);
608 ::encode(metadata_pool, bl);
609 ::encode(created, bl);
610 ::encode(modified, bl);
611 ::encode(tableserver, bl);
612 ::encode(in, bl);
613 ::encode(inc, bl);
614 ::encode(up, bl);
615 ::encode(failed, bl);
616 ::encode(stopped, bl);
617 ::encode(last_failure_osd_epoch, bl);
618 return;
619 }
620
621 ENCODE_START(5, 4, bl);
622 ::encode(epoch, bl);
623 ::encode(flags, bl);
624 ::encode(last_failure, bl);
625 ::encode(root, bl);
626 ::encode(session_timeout, bl);
627 ::encode(session_autoclose, bl);
628 ::encode(max_file_size, bl);
629 ::encode(max_mds, bl);
630 ::encode(mds_info, bl, features);
631 ::encode(data_pools, bl);
632 ::encode(cas_pool, bl);
633
634 // kclient ignores everything from here
635 __u16 ev = 12;
636 ::encode(ev, bl);
637 ::encode(compat, bl);
638 ::encode(metadata_pool, bl);
639 ::encode(created, bl);
640 ::encode(modified, bl);
641 ::encode(tableserver, bl);
642 ::encode(in, bl);
643 ::encode(inc, bl);
644 ::encode(up, bl);
645 ::encode(failed, bl);
646 ::encode(stopped, bl);
647 ::encode(last_failure_osd_epoch, bl);
648 ::encode(ever_allowed_features, bl);
649 ::encode(explicitly_allowed_features, bl);
650 ::encode(inline_data_enabled, bl);
651 ::encode(enabled, bl);
652 ::encode(fs_name, bl);
653 ::encode(damaged, bl);
654 ::encode(balancer, bl);
655 ::encode(standby_count_wanted, bl);
656 ENCODE_FINISH(bl);
657}
658
659void MDSMap::decode(bufferlist::iterator& p)
660{
661 std::map<mds_rank_t,int32_t> inc; // Legacy field, parse and drop
662
663 cached_up_features = 0;
664 DECODE_START_LEGACY_COMPAT_LEN_16(5, 4, 4, p);
665 ::decode(epoch, p);
666 ::decode(flags, p);
667 ::decode(last_failure, p);
668 ::decode(root, p);
669 ::decode(session_timeout, p);
670 ::decode(session_autoclose, p);
671 ::decode(max_file_size, p);
672 ::decode(max_mds, p);
673 ::decode(mds_info, p);
674 if (struct_v < 3) {
675 __u32 n;
676 ::decode(n, p);
677 while (n--) {
678 __u32 m;
679 ::decode(m, p);
31f18b77 680 data_pools.push_back(m);
7c673cae
FG
681 }
682 __s32 s;
683 ::decode(s, p);
684 cas_pool = s;
685 } else {
686 ::decode(data_pools, p);
687 ::decode(cas_pool, p);
688 }
689
690 // kclient ignores everything from here
691 __u16 ev = 1;
692 if (struct_v >= 2)
693 ::decode(ev, p);
694 if (ev >= 3)
695 ::decode(compat, p);
696 else
697 compat = get_mdsmap_compat_set_base();
698 if (ev < 5) {
699 __u32 n;
700 ::decode(n, p);
701 metadata_pool = n;
702 } else {
703 ::decode(metadata_pool, p);
704 }
705 ::decode(created, p);
706 ::decode(modified, p);
707 ::decode(tableserver, p);
708 ::decode(in, p);
709 ::decode(inc, p);
710 ::decode(up, p);
711 ::decode(failed, p);
712 ::decode(stopped, p);
713 if (ev >= 4)
714 ::decode(last_failure_osd_epoch, p);
715 if (ev >= 6) {
716 if (ev < 10) {
717 // previously this was a bool about snaps, not a flag map
718 bool flag;
719 ::decode(flag, p);
720 ever_allowed_features = flag ? CEPH_MDSMAP_ALLOW_SNAPS : 0;
721 ever_allowed_features |= CEPH_MDSMAP_ALLOW_MULTIMDS|CEPH_MDSMAP_ALLOW_DIRFRAGS;
722 ::decode(flag, p);
723 explicitly_allowed_features = flag ? CEPH_MDSMAP_ALLOW_SNAPS : 0;
724 if (max_mds > 1) {
725 set_multimds_allowed();
726 }
727 } else {
728 ::decode(ever_allowed_features, p);
729 ::decode(explicitly_allowed_features, p);
730 }
731 } else {
732 ever_allowed_features = CEPH_MDSMAP_ALLOW_CLASSICS;
733 explicitly_allowed_features = 0;
734 if (max_mds > 1) {
735 set_multimds_allowed();
736 }
737 }
738 if (ev >= 7)
739 ::decode(inline_data_enabled, p);
740
741 if (ev >= 8) {
742 assert(struct_v >= 5);
743 ::decode(enabled, p);
744 ::decode(fs_name, p);
745 } else {
746 if (epoch > 1) {
747 // If an MDS has ever been started, epoch will be greater than 1,
748 // assume filesystem is enabled.
749 enabled = true;
750 } else {
751 // Upgrading from a cluster that never used an MDS, switch off
752 // filesystem until it's explicitly enabled.
753 enabled = false;
754 }
755 }
756
757 if (ev >= 9) {
758 ::decode(damaged, p);
759 }
760
761 if (ev >= 11) {
762 ::decode(balancer, p);
763 }
764
765 if (ev >= 12) {
766 ::decode(standby_count_wanted, p);
767 }
768
769 DECODE_FINISH(p);
770}
771
772MDSMap::availability_t MDSMap::is_cluster_available() const
773{
774 if (epoch == 0) {
775 // If I'm a client, this means I'm looking at an MDSMap instance
776 // that was never actually initialized from the mons. Client should
777 // wait.
778 return TRANSIENT_UNAVAILABLE;
779 }
780
781 // If a rank is marked damage (unavailable until operator intervenes)
782 if (damaged.size()) {
783 return STUCK_UNAVAILABLE;
784 }
785
786 // If no ranks are created (filesystem not initialized)
787 if (in.empty()) {
788 return STUCK_UNAVAILABLE;
789 }
790
791 for (const auto rank : in) {
792 if (up.count(rank) && mds_info.at(up.at(rank)).laggy()) {
793 // This might only be transient, but because we can't see
794 // standbys, we have no way of knowing whether there is a
795 // standby available to replace the laggy guy.
796 return STUCK_UNAVAILABLE;
797 }
798 }
799
800 if (get_num_mds(CEPH_MDS_STATE_ACTIVE) > 0) {
801 // Nobody looks stuck, so indicate to client they should go ahead
802 // and try mounting if anybody is active. This may include e.g.
803 // one MDS failing over and another active: the client should
804 // proceed to start talking to the active one and let the
805 // transiently-unavailable guy catch up later.
806 return AVAILABLE;
807 } else {
808 // Nothing indicating we were stuck, but nobody active (yet)
809 //return TRANSIENT_UNAVAILABLE;
810
811 // Because we don't have standbys in the MDSMap any more, we can't
812 // reliably indicate transient vs. stuck, so always say stuck so
813 // that the client doesn't block.
814 return STUCK_UNAVAILABLE;
815 }
816}
817
818bool MDSMap::state_transition_valid(DaemonState prev, DaemonState next)
819{
820 bool state_valid = true;
821 if (next != prev) {
822 if (prev == MDSMap::STATE_REPLAY) {
823 if (next != MDSMap::STATE_RESOLVE && next != MDSMap::STATE_RECONNECT) {
824 state_valid = false;
825 }
826 } else if (prev == MDSMap::STATE_REJOIN) {
827 if (next != MDSMap::STATE_ACTIVE
828 && next != MDSMap::STATE_CLIENTREPLAY
829 && next != MDSMap::STATE_STOPPED) {
830 state_valid = false;
831 }
832 } else if (prev >= MDSMap::STATE_RECONNECT && prev < MDSMap::STATE_ACTIVE) {
833 // Once I have entered replay, the only allowable transitions are to
834 // the next next along in the sequence.
835 if (next != prev + 1) {
836 state_valid = false;
837 }
838 }
839 }
840
841 return state_valid;
842}
843
844bool MDSMap::check_health(mds_rank_t standby_daemon_count)
845{
846 std::set<mds_rank_t> standbys;
847 get_standby_replay_mds_set(standbys);
848 std::set<mds_rank_t> actives;
849 get_active_mds_set(actives);
850 mds_rank_t standbys_avail = (mds_rank_t)standbys.size()+standby_daemon_count;
851
852 /* If there are standby daemons available/replaying and
853 * standby_count_wanted is unset (default), then we set it to 1. This will
854 * happen during health checks by the mons. Also, during initial creation
855 * of the FS we will have no actives so we don't want to change the default
856 * yet.
857 */
858 if (standby_count_wanted == -1 && actives.size() > 0 && standbys_avail > 0) {
859 set_standby_count_wanted(1);
860 return true;
861 }
862 return false;
863}