]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
2 | // vim: ts=8 sw=2 smarttab | |
3 | /* | |
4 | * Ceph - scalable distributed file system | |
5 | * | |
6 | * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net> | |
7 | * | |
8 | * This is free software; you can redistribute it and/or | |
9 | * modify it under the terms of the GNU Lesser General Public | |
10 | * License version 2.1, as published by the Free Software | |
11 | * Foundation. See file COPYING. | |
12 | * | |
13 | */ | |
14 | ||
15 | ||
16 | #include "MDSMap.h" | |
17 | ||
18 | #include <sstream> | |
19 | using std::stringstream; | |
20 | ||
224ce89b WB |
21 | #include "mon/health_check.h" |
22 | ||
7c673cae FG |
23 | |
24 | // features | |
25 | CompatSet get_mdsmap_compat_set_all() { | |
26 | CompatSet::FeatureSet feature_compat; | |
27 | CompatSet::FeatureSet feature_ro_compat; | |
28 | CompatSet::FeatureSet feature_incompat; | |
29 | feature_incompat.insert(MDS_FEATURE_INCOMPAT_BASE); | |
30 | feature_incompat.insert(MDS_FEATURE_INCOMPAT_CLIENTRANGES); | |
31 | feature_incompat.insert(MDS_FEATURE_INCOMPAT_FILELAYOUT); | |
32 | feature_incompat.insert(MDS_FEATURE_INCOMPAT_DIRINODE); | |
33 | feature_incompat.insert(MDS_FEATURE_INCOMPAT_ENCODING); | |
34 | feature_incompat.insert(MDS_FEATURE_INCOMPAT_OMAPDIRFRAG); | |
35 | feature_incompat.insert(MDS_FEATURE_INCOMPAT_INLINE); | |
36 | feature_incompat.insert(MDS_FEATURE_INCOMPAT_NOANCHOR); | |
37 | feature_incompat.insert(MDS_FEATURE_INCOMPAT_FILE_LAYOUT_V2); | |
38 | ||
39 | return CompatSet(feature_compat, feature_ro_compat, feature_incompat); | |
40 | } | |
41 | ||
42 | CompatSet get_mdsmap_compat_set_default() { | |
43 | CompatSet::FeatureSet feature_compat; | |
44 | CompatSet::FeatureSet feature_ro_compat; | |
45 | CompatSet::FeatureSet feature_incompat; | |
46 | feature_incompat.insert(MDS_FEATURE_INCOMPAT_BASE); | |
47 | feature_incompat.insert(MDS_FEATURE_INCOMPAT_CLIENTRANGES); | |
48 | feature_incompat.insert(MDS_FEATURE_INCOMPAT_FILELAYOUT); | |
49 | feature_incompat.insert(MDS_FEATURE_INCOMPAT_DIRINODE); | |
50 | feature_incompat.insert(MDS_FEATURE_INCOMPAT_ENCODING); | |
51 | feature_incompat.insert(MDS_FEATURE_INCOMPAT_OMAPDIRFRAG); | |
52 | feature_incompat.insert(MDS_FEATURE_INCOMPAT_NOANCHOR); | |
53 | feature_incompat.insert(MDS_FEATURE_INCOMPAT_FILE_LAYOUT_V2); | |
54 | ||
55 | return CompatSet(feature_compat, feature_ro_compat, feature_incompat); | |
56 | } | |
57 | ||
58 | // base (pre v0.20) | |
59 | CompatSet get_mdsmap_compat_set_base() { | |
60 | CompatSet::FeatureSet feature_compat_base; | |
61 | CompatSet::FeatureSet feature_incompat_base; | |
62 | feature_incompat_base.insert(MDS_FEATURE_INCOMPAT_BASE); | |
63 | CompatSet::FeatureSet feature_ro_compat_base; | |
64 | ||
65 | return CompatSet(feature_compat_base, feature_ro_compat_base, feature_incompat_base); | |
66 | } | |
67 | ||
68 | void MDSMap::mds_info_t::dump(Formatter *f) const | |
69 | { | |
70 | f->dump_unsigned("gid", global_id); | |
71 | f->dump_string("name", name); | |
72 | f->dump_int("rank", rank); | |
73 | f->dump_int("incarnation", inc); | |
74 | f->dump_stream("state") << ceph_mds_state_name(state); | |
75 | f->dump_int("state_seq", state_seq); | |
76 | f->dump_stream("addr") << addr; | |
77 | if (laggy_since != utime_t()) | |
78 | f->dump_stream("laggy_since") << laggy_since; | |
79 | ||
80 | f->dump_int("standby_for_rank", standby_for_rank); | |
81 | f->dump_int("standby_for_fscid", standby_for_fscid); | |
82 | f->dump_string("standby_for_name", standby_for_name); | |
83 | f->dump_bool("standby_replay", standby_replay); | |
84 | f->open_array_section("export_targets"); | |
85 | for (set<mds_rank_t>::iterator p = export_targets.begin(); | |
86 | p != export_targets.end(); ++p) { | |
87 | f->dump_int("mds", *p); | |
88 | } | |
89 | f->close_section(); | |
90 | f->dump_unsigned("features", mds_features); | |
91 | } | |
92 | ||
93 | void MDSMap::mds_info_t::print_summary(ostream &out) const | |
94 | { | |
95 | out << global_id << ":\t" | |
96 | << addr | |
97 | << " '" << name << "'" | |
98 | << " mds." << rank | |
99 | << "." << inc | |
100 | << " " << ceph_mds_state_name(state) | |
101 | << " seq " << state_seq; | |
102 | if (laggy()) { | |
103 | out << " laggy since " << laggy_since; | |
104 | } | |
105 | if (standby_for_rank != -1 || | |
106 | !standby_for_name.empty()) { | |
107 | out << " (standby for"; | |
108 | //if (standby_for_rank >= 0) | |
109 | out << " rank " << standby_for_rank; | |
110 | if (!standby_for_name.empty()) { | |
111 | out << " '" << standby_for_name << "'"; | |
112 | } | |
113 | out << ")"; | |
114 | } | |
115 | if (!export_targets.empty()) { | |
116 | out << " export_targets=" << export_targets; | |
117 | } | |
118 | } | |
119 | ||
120 | void MDSMap::mds_info_t::generate_test_instances(list<mds_info_t*>& ls) | |
121 | { | |
122 | mds_info_t *sample = new mds_info_t(); | |
123 | ls.push_back(sample); | |
124 | sample = new mds_info_t(); | |
125 | sample->global_id = 1; | |
126 | sample->name = "test_instance"; | |
127 | sample->rank = 0; | |
128 | ls.push_back(sample); | |
129 | } | |
130 | ||
131 | void MDSMap::dump(Formatter *f) const | |
132 | { | |
133 | f->dump_int("epoch", epoch); | |
134 | f->dump_unsigned("flags", flags); | |
135 | f->dump_unsigned("ever_allowed_features", ever_allowed_features); | |
136 | f->dump_unsigned("explicitly_allowed_features", explicitly_allowed_features); | |
137 | f->dump_stream("created") << created; | |
138 | f->dump_stream("modified") << modified; | |
139 | f->dump_int("tableserver", tableserver); | |
140 | f->dump_int("root", root); | |
141 | f->dump_int("session_timeout", session_timeout); | |
142 | f->dump_int("session_autoclose", session_autoclose); | |
143 | f->dump_int("max_file_size", max_file_size); | |
144 | f->dump_int("last_failure", last_failure); | |
145 | f->dump_int("last_failure_osd_epoch", last_failure_osd_epoch); | |
146 | f->open_object_section("compat"); | |
147 | compat.dump(f); | |
148 | f->close_section(); | |
149 | f->dump_int("max_mds", max_mds); | |
150 | f->open_array_section("in"); | |
151 | for (set<mds_rank_t>::const_iterator p = in.begin(); p != in.end(); ++p) | |
152 | f->dump_int("mds", *p); | |
153 | f->close_section(); | |
154 | f->open_object_section("up"); | |
155 | for (map<mds_rank_t,mds_gid_t>::const_iterator p = up.begin(); p != up.end(); ++p) { | |
156 | char s[14]; | |
157 | sprintf(s, "mds_%d", int(p->first)); | |
158 | f->dump_int(s, p->second); | |
159 | } | |
160 | f->close_section(); | |
161 | f->open_array_section("failed"); | |
162 | for (set<mds_rank_t>::const_iterator p = failed.begin(); p != failed.end(); ++p) | |
163 | f->dump_int("mds", *p); | |
164 | f->close_section(); | |
165 | f->open_array_section("damaged"); | |
166 | for (set<mds_rank_t>::const_iterator p = damaged.begin(); p != damaged.end(); ++p) | |
167 | f->dump_int("mds", *p); | |
168 | f->close_section(); | |
169 | f->open_array_section("stopped"); | |
170 | for (set<mds_rank_t>::const_iterator p = stopped.begin(); p != stopped.end(); ++p) | |
171 | f->dump_int("mds", *p); | |
172 | f->close_section(); | |
173 | f->open_object_section("info"); | |
174 | for (map<mds_gid_t,mds_info_t>::const_iterator p = mds_info.begin(); p != mds_info.end(); ++p) { | |
175 | char s[25]; // 'gid_' + len(str(ULLONG_MAX)) + '\0' | |
176 | sprintf(s, "gid_%llu", (long long unsigned)p->first); | |
177 | f->open_object_section(s); | |
178 | p->second.dump(f); | |
179 | f->close_section(); | |
180 | } | |
181 | f->close_section(); | |
182 | f->open_array_section("data_pools"); | |
31f18b77 FG |
183 | for (const auto p: data_pools) |
184 | f->dump_int("pool", p); | |
7c673cae FG |
185 | f->close_section(); |
186 | f->dump_int("metadata_pool", metadata_pool); | |
187 | f->dump_bool("enabled", enabled); | |
188 | f->dump_string("fs_name", fs_name); | |
189 | f->dump_string("balancer", balancer); | |
190 | f->dump_int("standby_count_wanted", std::max(0, standby_count_wanted)); | |
191 | } | |
192 | ||
193 | void MDSMap::generate_test_instances(list<MDSMap*>& ls) | |
194 | { | |
195 | MDSMap *m = new MDSMap(); | |
196 | m->max_mds = 1; | |
31f18b77 | 197 | m->data_pools.push_back(0); |
7c673cae FG |
198 | m->metadata_pool = 1; |
199 | m->cas_pool = 2; | |
200 | m->compat = get_mdsmap_compat_set_all(); | |
201 | ||
202 | // these aren't the defaults, just in case anybody gets confused | |
203 | m->session_timeout = 61; | |
204 | m->session_autoclose = 301; | |
205 | m->max_file_size = 1<<24; | |
206 | ls.push_back(m); | |
207 | } | |
208 | ||
209 | void MDSMap::print(ostream& out) const | |
210 | { | |
211 | out << "fs_name\t" << fs_name << "\n"; | |
212 | out << "epoch\t" << epoch << "\n"; | |
213 | out << "flags\t" << hex << flags << dec << "\n"; | |
214 | out << "created\t" << created << "\n"; | |
215 | out << "modified\t" << modified << "\n"; | |
216 | out << "tableserver\t" << tableserver << "\n"; | |
217 | out << "root\t" << root << "\n"; | |
218 | out << "session_timeout\t" << session_timeout << "\n" | |
219 | << "session_autoclose\t" << session_autoclose << "\n"; | |
220 | out << "max_file_size\t" << max_file_size << "\n"; | |
221 | out << "last_failure\t" << last_failure << "\n" | |
222 | << "last_failure_osd_epoch\t" << last_failure_osd_epoch << "\n"; | |
223 | out << "compat\t" << compat << "\n"; | |
224 | out << "max_mds\t" << max_mds << "\n"; | |
225 | out << "in\t" << in << "\n" | |
226 | << "up\t" << up << "\n" | |
227 | << "failed\t" << failed << "\n" | |
228 | << "damaged\t" << damaged << "\n" | |
229 | << "stopped\t" << stopped << "\n"; | |
230 | out << "data_pools\t" << data_pools << "\n"; | |
231 | out << "metadata_pool\t" << metadata_pool << "\n"; | |
232 | out << "inline_data\t" << (inline_data_enabled ? "enabled" : "disabled") << "\n"; | |
233 | out << "balancer\t" << balancer << "\n"; | |
234 | out << "standby_count_wanted\t" << std::max(0, standby_count_wanted) << "\n"; | |
235 | ||
236 | multimap< pair<mds_rank_t, unsigned>, mds_gid_t > foo; | |
237 | for (const auto &p : mds_info) { | |
238 | foo.insert(std::make_pair( | |
239 | std::make_pair(p.second.rank, p.second.inc-1), p.first)); | |
240 | } | |
241 | ||
242 | for (const auto &p : foo) { | |
243 | const mds_info_t& info = mds_info.at(p.second); | |
244 | info.print_summary(out); | |
245 | out << "\n"; | |
246 | } | |
247 | } | |
248 | ||
249 | ||
250 | ||
251 | void MDSMap::print_summary(Formatter *f, ostream *out) const | |
252 | { | |
253 | map<mds_rank_t,string> by_rank; | |
254 | map<string,int> by_state; | |
255 | ||
256 | if (f) { | |
257 | f->dump_unsigned("epoch", get_epoch()); | |
258 | f->dump_unsigned("up", up.size()); | |
259 | f->dump_unsigned("in", in.size()); | |
260 | f->dump_unsigned("max", max_mds); | |
261 | } else { | |
262 | *out << "e" << get_epoch() << ": " << up.size() << "/" << in.size() << "/" << max_mds << " up"; | |
263 | } | |
264 | ||
265 | if (f) | |
266 | f->open_array_section("by_rank"); | |
267 | for (const auto &p : mds_info) { | |
268 | string s = ceph_mds_state_name(p.second.state); | |
269 | if (p.second.laggy()) | |
270 | s += "(laggy or crashed)"; | |
271 | ||
272 | if (p.second.rank >= 0 && p.second.state != MDSMap::STATE_STANDBY_REPLAY) { | |
273 | if (f) { | |
274 | f->open_object_section("mds"); | |
275 | f->dump_unsigned("rank", p.second.rank); | |
276 | f->dump_string("name", p.second.name); | |
277 | f->dump_string("status", s); | |
278 | f->close_section(); | |
279 | } else { | |
280 | by_rank[p.second.rank] = p.second.name + "=" + s; | |
281 | } | |
282 | } else { | |
283 | by_state[s]++; | |
284 | } | |
285 | } | |
286 | if (f) { | |
287 | f->close_section(); | |
288 | } else { | |
289 | if (!by_rank.empty()) | |
290 | *out << " " << by_rank; | |
291 | } | |
292 | ||
293 | for (map<string,int>::reverse_iterator p = by_state.rbegin(); p != by_state.rend(); ++p) { | |
294 | if (f) { | |
295 | f->dump_unsigned(p->first.c_str(), p->second); | |
296 | } else { | |
297 | *out << ", " << p->second << " " << p->first; | |
298 | } | |
299 | } | |
300 | ||
301 | if (!failed.empty()) { | |
302 | if (f) { | |
303 | f->dump_unsigned("failed", failed.size()); | |
304 | } else { | |
305 | *out << ", " << failed.size() << " failed"; | |
306 | } | |
307 | } | |
308 | ||
309 | if (!damaged.empty()) { | |
310 | if (f) { | |
311 | f->dump_unsigned("damaged", damaged.size()); | |
312 | } else { | |
313 | *out << ", " << damaged.size() << " damaged"; | |
314 | } | |
315 | } | |
316 | //if (stopped.size()) | |
317 | //out << ", " << stopped.size() << " stopped"; | |
318 | } | |
319 | ||
320 | void MDSMap::get_health(list<pair<health_status_t,string> >& summary, | |
321 | list<pair<health_status_t,string> > *detail) const | |
322 | { | |
323 | if (!failed.empty()) { | |
324 | std::ostringstream oss; | |
325 | oss << "mds rank" | |
326 | << ((failed.size() > 1) ? "s ":" ") | |
327 | << failed | |
328 | << ((failed.size() > 1) ? " have":" has") | |
329 | << " failed"; | |
330 | summary.push_back(make_pair(HEALTH_ERR, oss.str())); | |
331 | if (detail) { | |
332 | for (set<mds_rank_t>::const_iterator p = failed.begin(); p != failed.end(); ++p) { | |
333 | std::ostringstream oss; | |
334 | oss << "mds." << *p << " has failed"; | |
335 | detail->push_back(make_pair(HEALTH_ERR, oss.str())); | |
336 | } | |
337 | } | |
338 | } | |
339 | ||
340 | if (!damaged.empty()) { | |
341 | std::ostringstream oss; | |
342 | oss << "mds rank" | |
343 | << ((damaged.size() > 1) ? "s ":" ") | |
344 | << damaged | |
345 | << ((damaged.size() > 1) ? " are":" is") | |
346 | << " damaged"; | |
347 | summary.push_back(make_pair(HEALTH_ERR, oss.str())); | |
348 | if (detail) { | |
349 | for (set<mds_rank_t>::const_iterator p = damaged.begin(); p != damaged.end(); ++p) { | |
350 | std::ostringstream oss; | |
351 | oss << "mds." << *p << " is damaged"; | |
352 | detail->push_back(make_pair(HEALTH_ERR, oss.str())); | |
353 | } | |
354 | } | |
355 | } | |
356 | ||
357 | if (is_degraded()) { | |
358 | summary.push_back(make_pair(HEALTH_WARN, "mds cluster is degraded")); | |
359 | if (detail) { | |
360 | detail->push_back(make_pair(HEALTH_WARN, "mds cluster is degraded")); | |
361 | for (mds_rank_t i = mds_rank_t(0); i< get_max_mds(); i++) { | |
362 | if (!is_up(i)) | |
363 | continue; | |
364 | mds_gid_t gid = up.find(i)->second; | |
365 | map<mds_gid_t,mds_info_t>::const_iterator info = mds_info.find(gid); | |
366 | stringstream ss; | |
367 | if (is_resolve(i)) | |
368 | ss << "mds." << info->second.name << " at " << info->second.addr << " rank " << i << " is resolving"; | |
369 | if (is_replay(i)) | |
370 | ss << "mds." << info->second.name << " at " << info->second.addr << " rank " << i << " is replaying journal"; | |
371 | if (is_rejoin(i)) | |
372 | ss << "mds." << info->second.name << " at " << info->second.addr << " rank " << i << " is rejoining"; | |
373 | if (is_reconnect(i)) | |
374 | ss << "mds." << info->second.name << " at " << info->second.addr << " rank " << i << " is reconnecting to clients"; | |
375 | if (ss.str().length()) | |
376 | detail->push_back(make_pair(HEALTH_WARN, ss.str())); | |
377 | } | |
378 | } | |
379 | } | |
380 | ||
381 | map<mds_gid_t, mds_info_t>::const_iterator m_end = mds_info.end(); | |
382 | set<string> laggy; | |
383 | for (const auto &u : up) { | |
384 | map<mds_gid_t, mds_info_t>::const_iterator m = mds_info.find(u.second); | |
385 | if (m == m_end) { | |
386 | std::cerr << "Up rank " << u.first << " GID " << u.second << " not found!" << std::endl; | |
387 | } | |
388 | assert(m != m_end); | |
389 | const mds_info_t &mds_info(m->second); | |
390 | if (mds_info.laggy()) { | |
391 | laggy.insert(mds_info.name); | |
392 | if (detail) { | |
393 | std::ostringstream oss; | |
394 | oss << "mds." << mds_info.name << " at " << mds_info.addr << " is laggy/unresponsive"; | |
395 | detail->push_back(make_pair(HEALTH_WARN, oss.str())); | |
396 | } | |
397 | } | |
398 | } | |
399 | ||
400 | if (!laggy.empty()) { | |
401 | std::ostringstream oss; | |
402 | oss << "mds " << laggy | |
403 | << ((laggy.size() > 1) ? " are":" is") | |
404 | << " laggy"; | |
405 | summary.push_back(make_pair(HEALTH_WARN, oss.str())); | |
406 | } | |
407 | } | |
408 | ||
224ce89b WB |
409 | void MDSMap::get_health_checks(health_check_map_t *checks) const |
410 | { | |
d2e6a577 | 411 | // MDS_DAMAGE |
224ce89b | 412 | if (!damaged.empty()) { |
d2e6a577 | 413 | health_check_t& check = checks->get_or_add("MDS_DAMAGE", HEALTH_ERR, |
224ce89b WB |
414 | "%num% mds daemon%plurals% damaged"); |
415 | for (auto p : damaged) { | |
416 | std::ostringstream oss; | |
417 | oss << "fs " << fs_name << " mds." << p << " is damaged"; | |
418 | check.detail.push_back(oss.str()); | |
419 | } | |
420 | } | |
421 | ||
422 | // FS_DEGRADED | |
224ce89b | 423 | if (is_degraded()) { |
d2e6a577 | 424 | health_check_t& fscheck = checks->get_or_add( |
224ce89b WB |
425 | "FS_DEGRADED", HEALTH_WARN, |
426 | "%num% filesystem%plurals% %isorare% degraded"); | |
427 | ostringstream ss; | |
428 | ss << "fs " << fs_name << " is degraded"; | |
429 | fscheck.detail.push_back(ss.str()); | |
430 | ||
431 | list<string> detail; | |
432 | for (mds_rank_t i = mds_rank_t(0); i< get_max_mds(); i++) { | |
433 | if (!is_up(i)) | |
434 | continue; | |
435 | mds_gid_t gid = up.find(i)->second; | |
436 | map<mds_gid_t,mds_info_t>::const_iterator info = mds_info.find(gid); | |
437 | stringstream ss; | |
438 | ss << "fs " << fs_name << " mds." << info->second.name << " at " | |
439 | << info->second.addr << " rank " << i; | |
440 | if (is_resolve(i)) | |
441 | ss << " is resolving"; | |
442 | if (is_replay(i)) | |
443 | ss << " is replaying journal"; | |
444 | if (is_rejoin(i)) | |
445 | ss << " is rejoining"; | |
446 | if (is_reconnect(i)) | |
447 | ss << " is reconnecting to clients"; | |
448 | if (ss.str().length()) | |
449 | detail.push_back(ss.str()); | |
450 | } | |
224ce89b WB |
451 | } |
452 | } | |
453 | ||
7c673cae FG |
454 | void MDSMap::mds_info_t::encode_versioned(bufferlist& bl, uint64_t features) const |
455 | { | |
456 | ENCODE_START(7, 4, bl); | |
457 | ::encode(global_id, bl); | |
458 | ::encode(name, bl); | |
459 | ::encode(rank, bl); | |
460 | ::encode(inc, bl); | |
461 | ::encode((int32_t)state, bl); | |
462 | ::encode(state_seq, bl); | |
463 | ::encode(addr, bl, features); | |
464 | ::encode(laggy_since, bl); | |
465 | ::encode(standby_for_rank, bl); | |
466 | ::encode(standby_for_name, bl); | |
467 | ::encode(export_targets, bl); | |
468 | ::encode(mds_features, bl); | |
469 | ::encode(standby_for_fscid, bl); | |
470 | ::encode(standby_replay, bl); | |
471 | ENCODE_FINISH(bl); | |
472 | } | |
473 | ||
474 | void MDSMap::mds_info_t::encode_unversioned(bufferlist& bl) const | |
475 | { | |
476 | __u8 struct_v = 3; | |
477 | ::encode(struct_v, bl); | |
478 | ::encode(global_id, bl); | |
479 | ::encode(name, bl); | |
480 | ::encode(rank, bl); | |
481 | ::encode(inc, bl); | |
482 | ::encode((int32_t)state, bl); | |
483 | ::encode(state_seq, bl); | |
484 | ::encode(addr, bl, 0); | |
485 | ::encode(laggy_since, bl); | |
486 | ::encode(standby_for_rank, bl); | |
487 | ::encode(standby_for_name, bl); | |
488 | ::encode(export_targets, bl); | |
489 | } | |
490 | ||
491 | void MDSMap::mds_info_t::decode(bufferlist::iterator& bl) | |
492 | { | |
493 | DECODE_START_LEGACY_COMPAT_LEN(7, 4, 4, bl); | |
494 | ::decode(global_id, bl); | |
495 | ::decode(name, bl); | |
496 | ::decode(rank, bl); | |
497 | ::decode(inc, bl); | |
498 | ::decode((int32_t&)(state), bl); | |
499 | ::decode(state_seq, bl); | |
500 | ::decode(addr, bl); | |
501 | ::decode(laggy_since, bl); | |
502 | ::decode(standby_for_rank, bl); | |
503 | ::decode(standby_for_name, bl); | |
504 | if (struct_v >= 2) | |
505 | ::decode(export_targets, bl); | |
506 | if (struct_v >= 5) | |
507 | ::decode(mds_features, bl); | |
508 | if (struct_v >= 6) { | |
509 | ::decode(standby_for_fscid, bl); | |
510 | } | |
511 | if (struct_v >= 7) { | |
512 | ::decode(standby_replay, bl); | |
513 | } | |
514 | DECODE_FINISH(bl); | |
515 | } | |
516 | ||
d2e6a577 FG |
517 | std::string MDSMap::mds_info_t::human_name() const |
518 | { | |
519 | // Like "daemon mds.myhost restarted", "Activating daemon mds.myhost" | |
520 | std::ostringstream out; | |
521 | out << "daemon mds." << name; | |
522 | return out.str(); | |
523 | } | |
7c673cae FG |
524 | |
525 | void MDSMap::encode(bufferlist& bl, uint64_t features) const | |
526 | { | |
527 | std::map<mds_rank_t,int32_t> inc; // Legacy field, fake it so that | |
528 | // old-mon peers have something sane | |
529 | // during upgrade | |
530 | for (const auto rank : in) { | |
531 | inc.insert(std::make_pair(rank, epoch)); | |
532 | } | |
533 | ||
534 | if ((features & CEPH_FEATURE_PGID64) == 0) { | |
535 | __u16 v = 2; | |
536 | ::encode(v, bl); | |
537 | ::encode(epoch, bl); | |
538 | ::encode(flags, bl); | |
539 | ::encode(last_failure, bl); | |
540 | ::encode(root, bl); | |
541 | ::encode(session_timeout, bl); | |
542 | ::encode(session_autoclose, bl); | |
543 | ::encode(max_file_size, bl); | |
544 | ::encode(max_mds, bl); | |
545 | __u32 n = mds_info.size(); | |
546 | ::encode(n, bl); | |
547 | for (map<mds_gid_t, mds_info_t>::const_iterator i = mds_info.begin(); | |
548 | i != mds_info.end(); ++i) { | |
549 | ::encode(i->first, bl); | |
550 | ::encode(i->second, bl, features); | |
551 | } | |
552 | n = data_pools.size(); | |
553 | ::encode(n, bl); | |
31f18b77 FG |
554 | for (const auto p: data_pools) { |
555 | n = p; | |
7c673cae FG |
556 | ::encode(n, bl); |
557 | } | |
558 | ||
559 | int32_t m = cas_pool; | |
560 | ::encode(m, bl); | |
561 | return; | |
562 | } else if ((features & CEPH_FEATURE_MDSENC) == 0) { | |
563 | __u16 v = 3; | |
564 | ::encode(v, bl); | |
565 | ::encode(epoch, bl); | |
566 | ::encode(flags, bl); | |
567 | ::encode(last_failure, bl); | |
568 | ::encode(root, bl); | |
569 | ::encode(session_timeout, bl); | |
570 | ::encode(session_autoclose, bl); | |
571 | ::encode(max_file_size, bl); | |
572 | ::encode(max_mds, bl); | |
573 | __u32 n = mds_info.size(); | |
574 | ::encode(n, bl); | |
575 | for (map<mds_gid_t, mds_info_t>::const_iterator i = mds_info.begin(); | |
576 | i != mds_info.end(); ++i) { | |
577 | ::encode(i->first, bl); | |
578 | ::encode(i->second, bl, features); | |
579 | } | |
580 | ::encode(data_pools, bl); | |
581 | ::encode(cas_pool, bl); | |
582 | ||
583 | // kclient ignores everything from here | |
584 | __u16 ev = 5; | |
585 | ::encode(ev, bl); | |
586 | ::encode(compat, bl); | |
587 | ::encode(metadata_pool, bl); | |
588 | ::encode(created, bl); | |
589 | ::encode(modified, bl); | |
590 | ::encode(tableserver, bl); | |
591 | ::encode(in, bl); | |
592 | ::encode(inc, bl); | |
593 | ::encode(up, bl); | |
594 | ::encode(failed, bl); | |
595 | ::encode(stopped, bl); | |
596 | ::encode(last_failure_osd_epoch, bl); | |
597 | return; | |
598 | } | |
599 | ||
600 | ENCODE_START(5, 4, bl); | |
601 | ::encode(epoch, bl); | |
602 | ::encode(flags, bl); | |
603 | ::encode(last_failure, bl); | |
604 | ::encode(root, bl); | |
605 | ::encode(session_timeout, bl); | |
606 | ::encode(session_autoclose, bl); | |
607 | ::encode(max_file_size, bl); | |
608 | ::encode(max_mds, bl); | |
609 | ::encode(mds_info, bl, features); | |
610 | ::encode(data_pools, bl); | |
611 | ::encode(cas_pool, bl); | |
612 | ||
613 | // kclient ignores everything from here | |
614 | __u16 ev = 12; | |
615 | ::encode(ev, bl); | |
616 | ::encode(compat, bl); | |
617 | ::encode(metadata_pool, bl); | |
618 | ::encode(created, bl); | |
619 | ::encode(modified, bl); | |
620 | ::encode(tableserver, bl); | |
621 | ::encode(in, bl); | |
622 | ::encode(inc, bl); | |
623 | ::encode(up, bl); | |
624 | ::encode(failed, bl); | |
625 | ::encode(stopped, bl); | |
626 | ::encode(last_failure_osd_epoch, bl); | |
627 | ::encode(ever_allowed_features, bl); | |
628 | ::encode(explicitly_allowed_features, bl); | |
629 | ::encode(inline_data_enabled, bl); | |
630 | ::encode(enabled, bl); | |
631 | ::encode(fs_name, bl); | |
632 | ::encode(damaged, bl); | |
633 | ::encode(balancer, bl); | |
634 | ::encode(standby_count_wanted, bl); | |
635 | ENCODE_FINISH(bl); | |
636 | } | |
637 | ||
638 | void MDSMap::decode(bufferlist::iterator& p) | |
639 | { | |
640 | std::map<mds_rank_t,int32_t> inc; // Legacy field, parse and drop | |
641 | ||
642 | cached_up_features = 0; | |
643 | DECODE_START_LEGACY_COMPAT_LEN_16(5, 4, 4, p); | |
644 | ::decode(epoch, p); | |
645 | ::decode(flags, p); | |
646 | ::decode(last_failure, p); | |
647 | ::decode(root, p); | |
648 | ::decode(session_timeout, p); | |
649 | ::decode(session_autoclose, p); | |
650 | ::decode(max_file_size, p); | |
651 | ::decode(max_mds, p); | |
652 | ::decode(mds_info, p); | |
653 | if (struct_v < 3) { | |
654 | __u32 n; | |
655 | ::decode(n, p); | |
656 | while (n--) { | |
657 | __u32 m; | |
658 | ::decode(m, p); | |
31f18b77 | 659 | data_pools.push_back(m); |
7c673cae FG |
660 | } |
661 | __s32 s; | |
662 | ::decode(s, p); | |
663 | cas_pool = s; | |
664 | } else { | |
665 | ::decode(data_pools, p); | |
666 | ::decode(cas_pool, p); | |
667 | } | |
668 | ||
669 | // kclient ignores everything from here | |
670 | __u16 ev = 1; | |
671 | if (struct_v >= 2) | |
672 | ::decode(ev, p); | |
673 | if (ev >= 3) | |
674 | ::decode(compat, p); | |
675 | else | |
676 | compat = get_mdsmap_compat_set_base(); | |
677 | if (ev < 5) { | |
678 | __u32 n; | |
679 | ::decode(n, p); | |
680 | metadata_pool = n; | |
681 | } else { | |
682 | ::decode(metadata_pool, p); | |
683 | } | |
684 | ::decode(created, p); | |
685 | ::decode(modified, p); | |
686 | ::decode(tableserver, p); | |
687 | ::decode(in, p); | |
688 | ::decode(inc, p); | |
689 | ::decode(up, p); | |
690 | ::decode(failed, p); | |
691 | ::decode(stopped, p); | |
692 | if (ev >= 4) | |
693 | ::decode(last_failure_osd_epoch, p); | |
694 | if (ev >= 6) { | |
695 | if (ev < 10) { | |
696 | // previously this was a bool about snaps, not a flag map | |
697 | bool flag; | |
698 | ::decode(flag, p); | |
699 | ever_allowed_features = flag ? CEPH_MDSMAP_ALLOW_SNAPS : 0; | |
700 | ever_allowed_features |= CEPH_MDSMAP_ALLOW_MULTIMDS|CEPH_MDSMAP_ALLOW_DIRFRAGS; | |
701 | ::decode(flag, p); | |
702 | explicitly_allowed_features = flag ? CEPH_MDSMAP_ALLOW_SNAPS : 0; | |
703 | if (max_mds > 1) { | |
704 | set_multimds_allowed(); | |
705 | } | |
706 | } else { | |
707 | ::decode(ever_allowed_features, p); | |
708 | ::decode(explicitly_allowed_features, p); | |
709 | } | |
710 | } else { | |
711 | ever_allowed_features = CEPH_MDSMAP_ALLOW_CLASSICS; | |
712 | explicitly_allowed_features = 0; | |
713 | if (max_mds > 1) { | |
714 | set_multimds_allowed(); | |
715 | } | |
716 | } | |
717 | if (ev >= 7) | |
718 | ::decode(inline_data_enabled, p); | |
719 | ||
720 | if (ev >= 8) { | |
721 | assert(struct_v >= 5); | |
722 | ::decode(enabled, p); | |
723 | ::decode(fs_name, p); | |
724 | } else { | |
725 | if (epoch > 1) { | |
726 | // If an MDS has ever been started, epoch will be greater than 1, | |
727 | // assume filesystem is enabled. | |
728 | enabled = true; | |
729 | } else { | |
730 | // Upgrading from a cluster that never used an MDS, switch off | |
731 | // filesystem until it's explicitly enabled. | |
732 | enabled = false; | |
733 | } | |
734 | } | |
735 | ||
736 | if (ev >= 9) { | |
737 | ::decode(damaged, p); | |
738 | } | |
739 | ||
740 | if (ev >= 11) { | |
741 | ::decode(balancer, p); | |
742 | } | |
743 | ||
744 | if (ev >= 12) { | |
745 | ::decode(standby_count_wanted, p); | |
746 | } | |
747 | ||
748 | DECODE_FINISH(p); | |
749 | } | |
750 | ||
751 | MDSMap::availability_t MDSMap::is_cluster_available() const | |
752 | { | |
753 | if (epoch == 0) { | |
754 | // If I'm a client, this means I'm looking at an MDSMap instance | |
755 | // that was never actually initialized from the mons. Client should | |
756 | // wait. | |
757 | return TRANSIENT_UNAVAILABLE; | |
758 | } | |
759 | ||
760 | // If a rank is marked damage (unavailable until operator intervenes) | |
761 | if (damaged.size()) { | |
762 | return STUCK_UNAVAILABLE; | |
763 | } | |
764 | ||
765 | // If no ranks are created (filesystem not initialized) | |
766 | if (in.empty()) { | |
767 | return STUCK_UNAVAILABLE; | |
768 | } | |
769 | ||
770 | for (const auto rank : in) { | |
771 | if (up.count(rank) && mds_info.at(up.at(rank)).laggy()) { | |
772 | // This might only be transient, but because we can't see | |
773 | // standbys, we have no way of knowing whether there is a | |
774 | // standby available to replace the laggy guy. | |
775 | return STUCK_UNAVAILABLE; | |
776 | } | |
777 | } | |
778 | ||
779 | if (get_num_mds(CEPH_MDS_STATE_ACTIVE) > 0) { | |
780 | // Nobody looks stuck, so indicate to client they should go ahead | |
781 | // and try mounting if anybody is active. This may include e.g. | |
782 | // one MDS failing over and another active: the client should | |
783 | // proceed to start talking to the active one and let the | |
784 | // transiently-unavailable guy catch up later. | |
785 | return AVAILABLE; | |
786 | } else { | |
787 | // Nothing indicating we were stuck, but nobody active (yet) | |
788 | //return TRANSIENT_UNAVAILABLE; | |
789 | ||
790 | // Because we don't have standbys in the MDSMap any more, we can't | |
791 | // reliably indicate transient vs. stuck, so always say stuck so | |
792 | // that the client doesn't block. | |
793 | return STUCK_UNAVAILABLE; | |
794 | } | |
795 | } | |
796 | ||
797 | bool MDSMap::state_transition_valid(DaemonState prev, DaemonState next) | |
798 | { | |
799 | bool state_valid = true; | |
800 | if (next != prev) { | |
801 | if (prev == MDSMap::STATE_REPLAY) { | |
802 | if (next != MDSMap::STATE_RESOLVE && next != MDSMap::STATE_RECONNECT) { | |
803 | state_valid = false; | |
804 | } | |
805 | } else if (prev == MDSMap::STATE_REJOIN) { | |
806 | if (next != MDSMap::STATE_ACTIVE | |
807 | && next != MDSMap::STATE_CLIENTREPLAY | |
808 | && next != MDSMap::STATE_STOPPED) { | |
809 | state_valid = false; | |
810 | } | |
811 | } else if (prev >= MDSMap::STATE_RECONNECT && prev < MDSMap::STATE_ACTIVE) { | |
812 | // Once I have entered replay, the only allowable transitions are to | |
813 | // the next next along in the sequence. | |
814 | if (next != prev + 1) { | |
815 | state_valid = false; | |
816 | } | |
817 | } | |
818 | } | |
819 | ||
820 | return state_valid; | |
821 | } | |
822 | ||
823 | bool MDSMap::check_health(mds_rank_t standby_daemon_count) | |
824 | { | |
825 | std::set<mds_rank_t> standbys; | |
826 | get_standby_replay_mds_set(standbys); | |
827 | std::set<mds_rank_t> actives; | |
828 | get_active_mds_set(actives); | |
829 | mds_rank_t standbys_avail = (mds_rank_t)standbys.size()+standby_daemon_count; | |
830 | ||
831 | /* If there are standby daemons available/replaying and | |
832 | * standby_count_wanted is unset (default), then we set it to 1. This will | |
833 | * happen during health checks by the mons. Also, during initial creation | |
834 | * of the FS we will have no actives so we don't want to change the default | |
835 | * yet. | |
836 | */ | |
837 | if (standby_count_wanted == -1 && actives.size() > 0 && standbys_avail > 0) { | |
838 | set_standby_count_wanted(1); | |
839 | return true; | |
840 | } | |
841 | return false; | |
842 | } |