]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
2 | // vim: ts=8 sw=2 smarttab | |
3 | /* | |
4 | * Ceph - scalable distributed file system | |
5 | * | |
6 | * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net> | |
7 | * | |
8 | * This is free software; you can redistribute it and/or | |
9 | * modify it under the terms of the GNU Lesser General Public | |
10 | * License version 2.1, as published by the Free Software | |
11 | * Foundation. See file COPYING. | |
12 | * | |
13 | */ | |
14 | ||
15 | ||
16 | #include "MDSMap.h" | |
17 | ||
18 | #include <sstream> | |
19 | using std::stringstream; | |
20 | ||
21 | ||
22 | // features | |
23 | CompatSet get_mdsmap_compat_set_all() { | |
24 | CompatSet::FeatureSet feature_compat; | |
25 | CompatSet::FeatureSet feature_ro_compat; | |
26 | CompatSet::FeatureSet feature_incompat; | |
27 | feature_incompat.insert(MDS_FEATURE_INCOMPAT_BASE); | |
28 | feature_incompat.insert(MDS_FEATURE_INCOMPAT_CLIENTRANGES); | |
29 | feature_incompat.insert(MDS_FEATURE_INCOMPAT_FILELAYOUT); | |
30 | feature_incompat.insert(MDS_FEATURE_INCOMPAT_DIRINODE); | |
31 | feature_incompat.insert(MDS_FEATURE_INCOMPAT_ENCODING); | |
32 | feature_incompat.insert(MDS_FEATURE_INCOMPAT_OMAPDIRFRAG); | |
33 | feature_incompat.insert(MDS_FEATURE_INCOMPAT_INLINE); | |
34 | feature_incompat.insert(MDS_FEATURE_INCOMPAT_NOANCHOR); | |
35 | feature_incompat.insert(MDS_FEATURE_INCOMPAT_FILE_LAYOUT_V2); | |
36 | ||
37 | return CompatSet(feature_compat, feature_ro_compat, feature_incompat); | |
38 | } | |
39 | ||
40 | CompatSet get_mdsmap_compat_set_default() { | |
41 | CompatSet::FeatureSet feature_compat; | |
42 | CompatSet::FeatureSet feature_ro_compat; | |
43 | CompatSet::FeatureSet feature_incompat; | |
44 | feature_incompat.insert(MDS_FEATURE_INCOMPAT_BASE); | |
45 | feature_incompat.insert(MDS_FEATURE_INCOMPAT_CLIENTRANGES); | |
46 | feature_incompat.insert(MDS_FEATURE_INCOMPAT_FILELAYOUT); | |
47 | feature_incompat.insert(MDS_FEATURE_INCOMPAT_DIRINODE); | |
48 | feature_incompat.insert(MDS_FEATURE_INCOMPAT_ENCODING); | |
49 | feature_incompat.insert(MDS_FEATURE_INCOMPAT_OMAPDIRFRAG); | |
50 | feature_incompat.insert(MDS_FEATURE_INCOMPAT_NOANCHOR); | |
51 | feature_incompat.insert(MDS_FEATURE_INCOMPAT_FILE_LAYOUT_V2); | |
52 | ||
53 | return CompatSet(feature_compat, feature_ro_compat, feature_incompat); | |
54 | } | |
55 | ||
56 | // base (pre v0.20) | |
57 | CompatSet get_mdsmap_compat_set_base() { | |
58 | CompatSet::FeatureSet feature_compat_base; | |
59 | CompatSet::FeatureSet feature_incompat_base; | |
60 | feature_incompat_base.insert(MDS_FEATURE_INCOMPAT_BASE); | |
61 | CompatSet::FeatureSet feature_ro_compat_base; | |
62 | ||
63 | return CompatSet(feature_compat_base, feature_ro_compat_base, feature_incompat_base); | |
64 | } | |
65 | ||
66 | void MDSMap::mds_info_t::dump(Formatter *f) const | |
67 | { | |
68 | f->dump_unsigned("gid", global_id); | |
69 | f->dump_string("name", name); | |
70 | f->dump_int("rank", rank); | |
71 | f->dump_int("incarnation", inc); | |
72 | f->dump_stream("state") << ceph_mds_state_name(state); | |
73 | f->dump_int("state_seq", state_seq); | |
74 | f->dump_stream("addr") << addr; | |
75 | if (laggy_since != utime_t()) | |
76 | f->dump_stream("laggy_since") << laggy_since; | |
77 | ||
78 | f->dump_int("standby_for_rank", standby_for_rank); | |
79 | f->dump_int("standby_for_fscid", standby_for_fscid); | |
80 | f->dump_string("standby_for_name", standby_for_name); | |
81 | f->dump_bool("standby_replay", standby_replay); | |
82 | f->open_array_section("export_targets"); | |
83 | for (set<mds_rank_t>::iterator p = export_targets.begin(); | |
84 | p != export_targets.end(); ++p) { | |
85 | f->dump_int("mds", *p); | |
86 | } | |
87 | f->close_section(); | |
88 | f->dump_unsigned("features", mds_features); | |
89 | } | |
90 | ||
91 | void MDSMap::mds_info_t::print_summary(ostream &out) const | |
92 | { | |
93 | out << global_id << ":\t" | |
94 | << addr | |
95 | << " '" << name << "'" | |
96 | << " mds." << rank | |
97 | << "." << inc | |
98 | << " " << ceph_mds_state_name(state) | |
99 | << " seq " << state_seq; | |
100 | if (laggy()) { | |
101 | out << " laggy since " << laggy_since; | |
102 | } | |
103 | if (standby_for_rank != -1 || | |
104 | !standby_for_name.empty()) { | |
105 | out << " (standby for"; | |
106 | //if (standby_for_rank >= 0) | |
107 | out << " rank " << standby_for_rank; | |
108 | if (!standby_for_name.empty()) { | |
109 | out << " '" << standby_for_name << "'"; | |
110 | } | |
111 | out << ")"; | |
112 | } | |
113 | if (!export_targets.empty()) { | |
114 | out << " export_targets=" << export_targets; | |
115 | } | |
116 | } | |
117 | ||
118 | void MDSMap::mds_info_t::generate_test_instances(list<mds_info_t*>& ls) | |
119 | { | |
120 | mds_info_t *sample = new mds_info_t(); | |
121 | ls.push_back(sample); | |
122 | sample = new mds_info_t(); | |
123 | sample->global_id = 1; | |
124 | sample->name = "test_instance"; | |
125 | sample->rank = 0; | |
126 | ls.push_back(sample); | |
127 | } | |
128 | ||
129 | void MDSMap::dump(Formatter *f) const | |
130 | { | |
131 | f->dump_int("epoch", epoch); | |
132 | f->dump_unsigned("flags", flags); | |
133 | f->dump_unsigned("ever_allowed_features", ever_allowed_features); | |
134 | f->dump_unsigned("explicitly_allowed_features", explicitly_allowed_features); | |
135 | f->dump_stream("created") << created; | |
136 | f->dump_stream("modified") << modified; | |
137 | f->dump_int("tableserver", tableserver); | |
138 | f->dump_int("root", root); | |
139 | f->dump_int("session_timeout", session_timeout); | |
140 | f->dump_int("session_autoclose", session_autoclose); | |
141 | f->dump_int("max_file_size", max_file_size); | |
142 | f->dump_int("last_failure", last_failure); | |
143 | f->dump_int("last_failure_osd_epoch", last_failure_osd_epoch); | |
144 | f->open_object_section("compat"); | |
145 | compat.dump(f); | |
146 | f->close_section(); | |
147 | f->dump_int("max_mds", max_mds); | |
148 | f->open_array_section("in"); | |
149 | for (set<mds_rank_t>::const_iterator p = in.begin(); p != in.end(); ++p) | |
150 | f->dump_int("mds", *p); | |
151 | f->close_section(); | |
152 | f->open_object_section("up"); | |
153 | for (map<mds_rank_t,mds_gid_t>::const_iterator p = up.begin(); p != up.end(); ++p) { | |
154 | char s[14]; | |
155 | sprintf(s, "mds_%d", int(p->first)); | |
156 | f->dump_int(s, p->second); | |
157 | } | |
158 | f->close_section(); | |
159 | f->open_array_section("failed"); | |
160 | for (set<mds_rank_t>::const_iterator p = failed.begin(); p != failed.end(); ++p) | |
161 | f->dump_int("mds", *p); | |
162 | f->close_section(); | |
163 | f->open_array_section("damaged"); | |
164 | for (set<mds_rank_t>::const_iterator p = damaged.begin(); p != damaged.end(); ++p) | |
165 | f->dump_int("mds", *p); | |
166 | f->close_section(); | |
167 | f->open_array_section("stopped"); | |
168 | for (set<mds_rank_t>::const_iterator p = stopped.begin(); p != stopped.end(); ++p) | |
169 | f->dump_int("mds", *p); | |
170 | f->close_section(); | |
171 | f->open_object_section("info"); | |
172 | for (map<mds_gid_t,mds_info_t>::const_iterator p = mds_info.begin(); p != mds_info.end(); ++p) { | |
173 | char s[25]; // 'gid_' + len(str(ULLONG_MAX)) + '\0' | |
174 | sprintf(s, "gid_%llu", (long long unsigned)p->first); | |
175 | f->open_object_section(s); | |
176 | p->second.dump(f); | |
177 | f->close_section(); | |
178 | } | |
179 | f->close_section(); | |
180 | f->open_array_section("data_pools"); | |
181 | for (set<int64_t>::const_iterator p = data_pools.begin(); p != data_pools.end(); ++p) | |
182 | f->dump_int("pool", *p); | |
183 | f->close_section(); | |
184 | f->dump_int("metadata_pool", metadata_pool); | |
185 | f->dump_bool("enabled", enabled); | |
186 | f->dump_string("fs_name", fs_name); | |
187 | f->dump_string("balancer", balancer); | |
188 | f->dump_int("standby_count_wanted", std::max(0, standby_count_wanted)); | |
189 | } | |
190 | ||
191 | void MDSMap::generate_test_instances(list<MDSMap*>& ls) | |
192 | { | |
193 | MDSMap *m = new MDSMap(); | |
194 | m->max_mds = 1; | |
195 | m->data_pools.insert(0); | |
196 | m->metadata_pool = 1; | |
197 | m->cas_pool = 2; | |
198 | m->compat = get_mdsmap_compat_set_all(); | |
199 | ||
200 | // these aren't the defaults, just in case anybody gets confused | |
201 | m->session_timeout = 61; | |
202 | m->session_autoclose = 301; | |
203 | m->max_file_size = 1<<24; | |
204 | ls.push_back(m); | |
205 | } | |
206 | ||
207 | void MDSMap::print(ostream& out) const | |
208 | { | |
209 | out << "fs_name\t" << fs_name << "\n"; | |
210 | out << "epoch\t" << epoch << "\n"; | |
211 | out << "flags\t" << hex << flags << dec << "\n"; | |
212 | out << "created\t" << created << "\n"; | |
213 | out << "modified\t" << modified << "\n"; | |
214 | out << "tableserver\t" << tableserver << "\n"; | |
215 | out << "root\t" << root << "\n"; | |
216 | out << "session_timeout\t" << session_timeout << "\n" | |
217 | << "session_autoclose\t" << session_autoclose << "\n"; | |
218 | out << "max_file_size\t" << max_file_size << "\n"; | |
219 | out << "last_failure\t" << last_failure << "\n" | |
220 | << "last_failure_osd_epoch\t" << last_failure_osd_epoch << "\n"; | |
221 | out << "compat\t" << compat << "\n"; | |
222 | out << "max_mds\t" << max_mds << "\n"; | |
223 | out << "in\t" << in << "\n" | |
224 | << "up\t" << up << "\n" | |
225 | << "failed\t" << failed << "\n" | |
226 | << "damaged\t" << damaged << "\n" | |
227 | << "stopped\t" << stopped << "\n"; | |
228 | out << "data_pools\t" << data_pools << "\n"; | |
229 | out << "metadata_pool\t" << metadata_pool << "\n"; | |
230 | out << "inline_data\t" << (inline_data_enabled ? "enabled" : "disabled") << "\n"; | |
231 | out << "balancer\t" << balancer << "\n"; | |
232 | out << "standby_count_wanted\t" << std::max(0, standby_count_wanted) << "\n"; | |
233 | ||
234 | multimap< pair<mds_rank_t, unsigned>, mds_gid_t > foo; | |
235 | for (const auto &p : mds_info) { | |
236 | foo.insert(std::make_pair( | |
237 | std::make_pair(p.second.rank, p.second.inc-1), p.first)); | |
238 | } | |
239 | ||
240 | for (const auto &p : foo) { | |
241 | const mds_info_t& info = mds_info.at(p.second); | |
242 | info.print_summary(out); | |
243 | out << "\n"; | |
244 | } | |
245 | } | |
246 | ||
247 | ||
248 | ||
249 | void MDSMap::print_summary(Formatter *f, ostream *out) const | |
250 | { | |
251 | map<mds_rank_t,string> by_rank; | |
252 | map<string,int> by_state; | |
253 | ||
254 | if (f) { | |
255 | f->dump_unsigned("epoch", get_epoch()); | |
256 | f->dump_unsigned("up", up.size()); | |
257 | f->dump_unsigned("in", in.size()); | |
258 | f->dump_unsigned("max", max_mds); | |
259 | } else { | |
260 | *out << "e" << get_epoch() << ": " << up.size() << "/" << in.size() << "/" << max_mds << " up"; | |
261 | } | |
262 | ||
263 | if (f) | |
264 | f->open_array_section("by_rank"); | |
265 | for (const auto &p : mds_info) { | |
266 | string s = ceph_mds_state_name(p.second.state); | |
267 | if (p.second.laggy()) | |
268 | s += "(laggy or crashed)"; | |
269 | ||
270 | if (p.second.rank >= 0 && p.second.state != MDSMap::STATE_STANDBY_REPLAY) { | |
271 | if (f) { | |
272 | f->open_object_section("mds"); | |
273 | f->dump_unsigned("rank", p.second.rank); | |
274 | f->dump_string("name", p.second.name); | |
275 | f->dump_string("status", s); | |
276 | f->close_section(); | |
277 | } else { | |
278 | by_rank[p.second.rank] = p.second.name + "=" + s; | |
279 | } | |
280 | } else { | |
281 | by_state[s]++; | |
282 | } | |
283 | } | |
284 | if (f) { | |
285 | f->close_section(); | |
286 | } else { | |
287 | if (!by_rank.empty()) | |
288 | *out << " " << by_rank; | |
289 | } | |
290 | ||
291 | for (map<string,int>::reverse_iterator p = by_state.rbegin(); p != by_state.rend(); ++p) { | |
292 | if (f) { | |
293 | f->dump_unsigned(p->first.c_str(), p->second); | |
294 | } else { | |
295 | *out << ", " << p->second << " " << p->first; | |
296 | } | |
297 | } | |
298 | ||
299 | if (!failed.empty()) { | |
300 | if (f) { | |
301 | f->dump_unsigned("failed", failed.size()); | |
302 | } else { | |
303 | *out << ", " << failed.size() << " failed"; | |
304 | } | |
305 | } | |
306 | ||
307 | if (!damaged.empty()) { | |
308 | if (f) { | |
309 | f->dump_unsigned("damaged", damaged.size()); | |
310 | } else { | |
311 | *out << ", " << damaged.size() << " damaged"; | |
312 | } | |
313 | } | |
314 | //if (stopped.size()) | |
315 | //out << ", " << stopped.size() << " stopped"; | |
316 | } | |
317 | ||
318 | void MDSMap::get_health(list<pair<health_status_t,string> >& summary, | |
319 | list<pair<health_status_t,string> > *detail) const | |
320 | { | |
321 | if (!failed.empty()) { | |
322 | std::ostringstream oss; | |
323 | oss << "mds rank" | |
324 | << ((failed.size() > 1) ? "s ":" ") | |
325 | << failed | |
326 | << ((failed.size() > 1) ? " have":" has") | |
327 | << " failed"; | |
328 | summary.push_back(make_pair(HEALTH_ERR, oss.str())); | |
329 | if (detail) { | |
330 | for (set<mds_rank_t>::const_iterator p = failed.begin(); p != failed.end(); ++p) { | |
331 | std::ostringstream oss; | |
332 | oss << "mds." << *p << " has failed"; | |
333 | detail->push_back(make_pair(HEALTH_ERR, oss.str())); | |
334 | } | |
335 | } | |
336 | } | |
337 | ||
338 | if (!damaged.empty()) { | |
339 | std::ostringstream oss; | |
340 | oss << "mds rank" | |
341 | << ((damaged.size() > 1) ? "s ":" ") | |
342 | << damaged | |
343 | << ((damaged.size() > 1) ? " are":" is") | |
344 | << " damaged"; | |
345 | summary.push_back(make_pair(HEALTH_ERR, oss.str())); | |
346 | if (detail) { | |
347 | for (set<mds_rank_t>::const_iterator p = damaged.begin(); p != damaged.end(); ++p) { | |
348 | std::ostringstream oss; | |
349 | oss << "mds." << *p << " is damaged"; | |
350 | detail->push_back(make_pair(HEALTH_ERR, oss.str())); | |
351 | } | |
352 | } | |
353 | } | |
354 | ||
355 | if (is_degraded()) { | |
356 | summary.push_back(make_pair(HEALTH_WARN, "mds cluster is degraded")); | |
357 | if (detail) { | |
358 | detail->push_back(make_pair(HEALTH_WARN, "mds cluster is degraded")); | |
359 | for (mds_rank_t i = mds_rank_t(0); i< get_max_mds(); i++) { | |
360 | if (!is_up(i)) | |
361 | continue; | |
362 | mds_gid_t gid = up.find(i)->second; | |
363 | map<mds_gid_t,mds_info_t>::const_iterator info = mds_info.find(gid); | |
364 | stringstream ss; | |
365 | if (is_resolve(i)) | |
366 | ss << "mds." << info->second.name << " at " << info->second.addr << " rank " << i << " is resolving"; | |
367 | if (is_replay(i)) | |
368 | ss << "mds." << info->second.name << " at " << info->second.addr << " rank " << i << " is replaying journal"; | |
369 | if (is_rejoin(i)) | |
370 | ss << "mds." << info->second.name << " at " << info->second.addr << " rank " << i << " is rejoining"; | |
371 | if (is_reconnect(i)) | |
372 | ss << "mds." << info->second.name << " at " << info->second.addr << " rank " << i << " is reconnecting to clients"; | |
373 | if (ss.str().length()) | |
374 | detail->push_back(make_pair(HEALTH_WARN, ss.str())); | |
375 | } | |
376 | } | |
377 | } | |
378 | ||
379 | map<mds_gid_t, mds_info_t>::const_iterator m_end = mds_info.end(); | |
380 | set<string> laggy; | |
381 | for (const auto &u : up) { | |
382 | map<mds_gid_t, mds_info_t>::const_iterator m = mds_info.find(u.second); | |
383 | if (m == m_end) { | |
384 | std::cerr << "Up rank " << u.first << " GID " << u.second << " not found!" << std::endl; | |
385 | } | |
386 | assert(m != m_end); | |
387 | const mds_info_t &mds_info(m->second); | |
388 | if (mds_info.laggy()) { | |
389 | laggy.insert(mds_info.name); | |
390 | if (detail) { | |
391 | std::ostringstream oss; | |
392 | oss << "mds." << mds_info.name << " at " << mds_info.addr << " is laggy/unresponsive"; | |
393 | detail->push_back(make_pair(HEALTH_WARN, oss.str())); | |
394 | } | |
395 | } | |
396 | } | |
397 | ||
398 | if (!laggy.empty()) { | |
399 | std::ostringstream oss; | |
400 | oss << "mds " << laggy | |
401 | << ((laggy.size() > 1) ? " are":" is") | |
402 | << " laggy"; | |
403 | summary.push_back(make_pair(HEALTH_WARN, oss.str())); | |
404 | } | |
405 | } | |
406 | ||
407 | void MDSMap::mds_info_t::encode_versioned(bufferlist& bl, uint64_t features) const | |
408 | { | |
409 | ENCODE_START(7, 4, bl); | |
410 | ::encode(global_id, bl); | |
411 | ::encode(name, bl); | |
412 | ::encode(rank, bl); | |
413 | ::encode(inc, bl); | |
414 | ::encode((int32_t)state, bl); | |
415 | ::encode(state_seq, bl); | |
416 | ::encode(addr, bl, features); | |
417 | ::encode(laggy_since, bl); | |
418 | ::encode(standby_for_rank, bl); | |
419 | ::encode(standby_for_name, bl); | |
420 | ::encode(export_targets, bl); | |
421 | ::encode(mds_features, bl); | |
422 | ::encode(standby_for_fscid, bl); | |
423 | ::encode(standby_replay, bl); | |
424 | ENCODE_FINISH(bl); | |
425 | } | |
426 | ||
427 | void MDSMap::mds_info_t::encode_unversioned(bufferlist& bl) const | |
428 | { | |
429 | __u8 struct_v = 3; | |
430 | ::encode(struct_v, bl); | |
431 | ::encode(global_id, bl); | |
432 | ::encode(name, bl); | |
433 | ::encode(rank, bl); | |
434 | ::encode(inc, bl); | |
435 | ::encode((int32_t)state, bl); | |
436 | ::encode(state_seq, bl); | |
437 | ::encode(addr, bl, 0); | |
438 | ::encode(laggy_since, bl); | |
439 | ::encode(standby_for_rank, bl); | |
440 | ::encode(standby_for_name, bl); | |
441 | ::encode(export_targets, bl); | |
442 | } | |
443 | ||
444 | void MDSMap::mds_info_t::decode(bufferlist::iterator& bl) | |
445 | { | |
446 | DECODE_START_LEGACY_COMPAT_LEN(7, 4, 4, bl); | |
447 | ::decode(global_id, bl); | |
448 | ::decode(name, bl); | |
449 | ::decode(rank, bl); | |
450 | ::decode(inc, bl); | |
451 | ::decode((int32_t&)(state), bl); | |
452 | ::decode(state_seq, bl); | |
453 | ::decode(addr, bl); | |
454 | ::decode(laggy_since, bl); | |
455 | ::decode(standby_for_rank, bl); | |
456 | ::decode(standby_for_name, bl); | |
457 | if (struct_v >= 2) | |
458 | ::decode(export_targets, bl); | |
459 | if (struct_v >= 5) | |
460 | ::decode(mds_features, bl); | |
461 | if (struct_v >= 6) { | |
462 | ::decode(standby_for_fscid, bl); | |
463 | } | |
464 | if (struct_v >= 7) { | |
465 | ::decode(standby_replay, bl); | |
466 | } | |
467 | DECODE_FINISH(bl); | |
468 | } | |
469 | ||
470 | ||
471 | ||
472 | void MDSMap::encode(bufferlist& bl, uint64_t features) const | |
473 | { | |
474 | std::map<mds_rank_t,int32_t> inc; // Legacy field, fake it so that | |
475 | // old-mon peers have something sane | |
476 | // during upgrade | |
477 | for (const auto rank : in) { | |
478 | inc.insert(std::make_pair(rank, epoch)); | |
479 | } | |
480 | ||
481 | if ((features & CEPH_FEATURE_PGID64) == 0) { | |
482 | __u16 v = 2; | |
483 | ::encode(v, bl); | |
484 | ::encode(epoch, bl); | |
485 | ::encode(flags, bl); | |
486 | ::encode(last_failure, bl); | |
487 | ::encode(root, bl); | |
488 | ::encode(session_timeout, bl); | |
489 | ::encode(session_autoclose, bl); | |
490 | ::encode(max_file_size, bl); | |
491 | ::encode(max_mds, bl); | |
492 | __u32 n = mds_info.size(); | |
493 | ::encode(n, bl); | |
494 | for (map<mds_gid_t, mds_info_t>::const_iterator i = mds_info.begin(); | |
495 | i != mds_info.end(); ++i) { | |
496 | ::encode(i->first, bl); | |
497 | ::encode(i->second, bl, features); | |
498 | } | |
499 | n = data_pools.size(); | |
500 | ::encode(n, bl); | |
501 | for (set<int64_t>::const_iterator p = data_pools.begin(); p != data_pools.end(); ++p) { | |
502 | n = *p; | |
503 | ::encode(n, bl); | |
504 | } | |
505 | ||
506 | int32_t m = cas_pool; | |
507 | ::encode(m, bl); | |
508 | return; | |
509 | } else if ((features & CEPH_FEATURE_MDSENC) == 0) { | |
510 | __u16 v = 3; | |
511 | ::encode(v, bl); | |
512 | ::encode(epoch, bl); | |
513 | ::encode(flags, bl); | |
514 | ::encode(last_failure, bl); | |
515 | ::encode(root, bl); | |
516 | ::encode(session_timeout, bl); | |
517 | ::encode(session_autoclose, bl); | |
518 | ::encode(max_file_size, bl); | |
519 | ::encode(max_mds, bl); | |
520 | __u32 n = mds_info.size(); | |
521 | ::encode(n, bl); | |
522 | for (map<mds_gid_t, mds_info_t>::const_iterator i = mds_info.begin(); | |
523 | i != mds_info.end(); ++i) { | |
524 | ::encode(i->first, bl); | |
525 | ::encode(i->second, bl, features); | |
526 | } | |
527 | ::encode(data_pools, bl); | |
528 | ::encode(cas_pool, bl); | |
529 | ||
530 | // kclient ignores everything from here | |
531 | __u16 ev = 5; | |
532 | ::encode(ev, bl); | |
533 | ::encode(compat, bl); | |
534 | ::encode(metadata_pool, bl); | |
535 | ::encode(created, bl); | |
536 | ::encode(modified, bl); | |
537 | ::encode(tableserver, bl); | |
538 | ::encode(in, bl); | |
539 | ::encode(inc, bl); | |
540 | ::encode(up, bl); | |
541 | ::encode(failed, bl); | |
542 | ::encode(stopped, bl); | |
543 | ::encode(last_failure_osd_epoch, bl); | |
544 | return; | |
545 | } | |
546 | ||
547 | ENCODE_START(5, 4, bl); | |
548 | ::encode(epoch, bl); | |
549 | ::encode(flags, bl); | |
550 | ::encode(last_failure, bl); | |
551 | ::encode(root, bl); | |
552 | ::encode(session_timeout, bl); | |
553 | ::encode(session_autoclose, bl); | |
554 | ::encode(max_file_size, bl); | |
555 | ::encode(max_mds, bl); | |
556 | ::encode(mds_info, bl, features); | |
557 | ::encode(data_pools, bl); | |
558 | ::encode(cas_pool, bl); | |
559 | ||
560 | // kclient ignores everything from here | |
561 | __u16 ev = 12; | |
562 | ::encode(ev, bl); | |
563 | ::encode(compat, bl); | |
564 | ::encode(metadata_pool, bl); | |
565 | ::encode(created, bl); | |
566 | ::encode(modified, bl); | |
567 | ::encode(tableserver, bl); | |
568 | ::encode(in, bl); | |
569 | ::encode(inc, bl); | |
570 | ::encode(up, bl); | |
571 | ::encode(failed, bl); | |
572 | ::encode(stopped, bl); | |
573 | ::encode(last_failure_osd_epoch, bl); | |
574 | ::encode(ever_allowed_features, bl); | |
575 | ::encode(explicitly_allowed_features, bl); | |
576 | ::encode(inline_data_enabled, bl); | |
577 | ::encode(enabled, bl); | |
578 | ::encode(fs_name, bl); | |
579 | ::encode(damaged, bl); | |
580 | ::encode(balancer, bl); | |
581 | ::encode(standby_count_wanted, bl); | |
582 | ENCODE_FINISH(bl); | |
583 | } | |
584 | ||
585 | void MDSMap::decode(bufferlist::iterator& p) | |
586 | { | |
587 | std::map<mds_rank_t,int32_t> inc; // Legacy field, parse and drop | |
588 | ||
589 | cached_up_features = 0; | |
590 | DECODE_START_LEGACY_COMPAT_LEN_16(5, 4, 4, p); | |
591 | ::decode(epoch, p); | |
592 | ::decode(flags, p); | |
593 | ::decode(last_failure, p); | |
594 | ::decode(root, p); | |
595 | ::decode(session_timeout, p); | |
596 | ::decode(session_autoclose, p); | |
597 | ::decode(max_file_size, p); | |
598 | ::decode(max_mds, p); | |
599 | ::decode(mds_info, p); | |
600 | if (struct_v < 3) { | |
601 | __u32 n; | |
602 | ::decode(n, p); | |
603 | while (n--) { | |
604 | __u32 m; | |
605 | ::decode(m, p); | |
606 | data_pools.insert(m); | |
607 | } | |
608 | __s32 s; | |
609 | ::decode(s, p); | |
610 | cas_pool = s; | |
611 | } else { | |
612 | ::decode(data_pools, p); | |
613 | ::decode(cas_pool, p); | |
614 | } | |
615 | ||
616 | // kclient ignores everything from here | |
617 | __u16 ev = 1; | |
618 | if (struct_v >= 2) | |
619 | ::decode(ev, p); | |
620 | if (ev >= 3) | |
621 | ::decode(compat, p); | |
622 | else | |
623 | compat = get_mdsmap_compat_set_base(); | |
624 | if (ev < 5) { | |
625 | __u32 n; | |
626 | ::decode(n, p); | |
627 | metadata_pool = n; | |
628 | } else { | |
629 | ::decode(metadata_pool, p); | |
630 | } | |
631 | ::decode(created, p); | |
632 | ::decode(modified, p); | |
633 | ::decode(tableserver, p); | |
634 | ::decode(in, p); | |
635 | ::decode(inc, p); | |
636 | ::decode(up, p); | |
637 | ::decode(failed, p); | |
638 | ::decode(stopped, p); | |
639 | if (ev >= 4) | |
640 | ::decode(last_failure_osd_epoch, p); | |
641 | if (ev >= 6) { | |
642 | if (ev < 10) { | |
643 | // previously this was a bool about snaps, not a flag map | |
644 | bool flag; | |
645 | ::decode(flag, p); | |
646 | ever_allowed_features = flag ? CEPH_MDSMAP_ALLOW_SNAPS : 0; | |
647 | ever_allowed_features |= CEPH_MDSMAP_ALLOW_MULTIMDS|CEPH_MDSMAP_ALLOW_DIRFRAGS; | |
648 | ::decode(flag, p); | |
649 | explicitly_allowed_features = flag ? CEPH_MDSMAP_ALLOW_SNAPS : 0; | |
650 | if (max_mds > 1) { | |
651 | set_multimds_allowed(); | |
652 | } | |
653 | } else { | |
654 | ::decode(ever_allowed_features, p); | |
655 | ::decode(explicitly_allowed_features, p); | |
656 | } | |
657 | } else { | |
658 | ever_allowed_features = CEPH_MDSMAP_ALLOW_CLASSICS; | |
659 | explicitly_allowed_features = 0; | |
660 | if (max_mds > 1) { | |
661 | set_multimds_allowed(); | |
662 | } | |
663 | } | |
664 | if (ev >= 7) | |
665 | ::decode(inline_data_enabled, p); | |
666 | ||
667 | if (ev >= 8) { | |
668 | assert(struct_v >= 5); | |
669 | ::decode(enabled, p); | |
670 | ::decode(fs_name, p); | |
671 | } else { | |
672 | if (epoch > 1) { | |
673 | // If an MDS has ever been started, epoch will be greater than 1, | |
674 | // assume filesystem is enabled. | |
675 | enabled = true; | |
676 | } else { | |
677 | // Upgrading from a cluster that never used an MDS, switch off | |
678 | // filesystem until it's explicitly enabled. | |
679 | enabled = false; | |
680 | } | |
681 | } | |
682 | ||
683 | if (ev >= 9) { | |
684 | ::decode(damaged, p); | |
685 | } | |
686 | ||
687 | if (ev >= 11) { | |
688 | ::decode(balancer, p); | |
689 | } | |
690 | ||
691 | if (ev >= 12) { | |
692 | ::decode(standby_count_wanted, p); | |
693 | } | |
694 | ||
695 | DECODE_FINISH(p); | |
696 | } | |
697 | ||
698 | MDSMap::availability_t MDSMap::is_cluster_available() const | |
699 | { | |
700 | if (epoch == 0) { | |
701 | // If I'm a client, this means I'm looking at an MDSMap instance | |
702 | // that was never actually initialized from the mons. Client should | |
703 | // wait. | |
704 | return TRANSIENT_UNAVAILABLE; | |
705 | } | |
706 | ||
707 | // If a rank is marked damage (unavailable until operator intervenes) | |
708 | if (damaged.size()) { | |
709 | return STUCK_UNAVAILABLE; | |
710 | } | |
711 | ||
712 | // If no ranks are created (filesystem not initialized) | |
713 | if (in.empty()) { | |
714 | return STUCK_UNAVAILABLE; | |
715 | } | |
716 | ||
717 | for (const auto rank : in) { | |
718 | if (up.count(rank) && mds_info.at(up.at(rank)).laggy()) { | |
719 | // This might only be transient, but because we can't see | |
720 | // standbys, we have no way of knowing whether there is a | |
721 | // standby available to replace the laggy guy. | |
722 | return STUCK_UNAVAILABLE; | |
723 | } | |
724 | } | |
725 | ||
726 | if (get_num_mds(CEPH_MDS_STATE_ACTIVE) > 0) { | |
727 | // Nobody looks stuck, so indicate to client they should go ahead | |
728 | // and try mounting if anybody is active. This may include e.g. | |
729 | // one MDS failing over and another active: the client should | |
730 | // proceed to start talking to the active one and let the | |
731 | // transiently-unavailable guy catch up later. | |
732 | return AVAILABLE; | |
733 | } else { | |
734 | // Nothing indicating we were stuck, but nobody active (yet) | |
735 | //return TRANSIENT_UNAVAILABLE; | |
736 | ||
737 | // Because we don't have standbys in the MDSMap any more, we can't | |
738 | // reliably indicate transient vs. stuck, so always say stuck so | |
739 | // that the client doesn't block. | |
740 | return STUCK_UNAVAILABLE; | |
741 | } | |
742 | } | |
743 | ||
744 | bool MDSMap::state_transition_valid(DaemonState prev, DaemonState next) | |
745 | { | |
746 | bool state_valid = true; | |
747 | if (next != prev) { | |
748 | if (prev == MDSMap::STATE_REPLAY) { | |
749 | if (next != MDSMap::STATE_RESOLVE && next != MDSMap::STATE_RECONNECT) { | |
750 | state_valid = false; | |
751 | } | |
752 | } else if (prev == MDSMap::STATE_REJOIN) { | |
753 | if (next != MDSMap::STATE_ACTIVE | |
754 | && next != MDSMap::STATE_CLIENTREPLAY | |
755 | && next != MDSMap::STATE_STOPPED) { | |
756 | state_valid = false; | |
757 | } | |
758 | } else if (prev >= MDSMap::STATE_RECONNECT && prev < MDSMap::STATE_ACTIVE) { | |
759 | // Once I have entered replay, the only allowable transitions are to | |
760 | // the next next along in the sequence. | |
761 | if (next != prev + 1) { | |
762 | state_valid = false; | |
763 | } | |
764 | } | |
765 | } | |
766 | ||
767 | return state_valid; | |
768 | } | |
769 | ||
770 | bool MDSMap::check_health(mds_rank_t standby_daemon_count) | |
771 | { | |
772 | std::set<mds_rank_t> standbys; | |
773 | get_standby_replay_mds_set(standbys); | |
774 | std::set<mds_rank_t> actives; | |
775 | get_active_mds_set(actives); | |
776 | mds_rank_t standbys_avail = (mds_rank_t)standbys.size()+standby_daemon_count; | |
777 | ||
778 | /* If there are standby daemons available/replaying and | |
779 | * standby_count_wanted is unset (default), then we set it to 1. This will | |
780 | * happen during health checks by the mons. Also, during initial creation | |
781 | * of the FS we will have no actives so we don't want to change the default | |
782 | * yet. | |
783 | */ | |
784 | if (standby_count_wanted == -1 && actives.size() > 0 && standbys_avail > 0) { | |
785 | set_standby_count_wanted(1); | |
786 | return true; | |
787 | } | |
788 | return false; | |
789 | } |