]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
2 | // vim: ts=8 sw=2 smarttab | |
3 | /* | |
4 | * Ceph - scalable distributed file system | |
5 | * | |
6 | * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net> | |
7 | * | |
8 | * This is free software; you can redistribute it and/or | |
9 | * modify it under the terms of the GNU Lesser General Public | |
10 | * License version 2.1, as published by the Free Software | |
11 | * Foundation. See file COPYING. | |
12 | * | |
13 | */ | |
14 | ||
15 | ||
16 | #include "FSMap.h" | |
17 | ||
11fdf7f2 | 18 | #include "common/StackStringStream.h" |
7c673cae | 19 | |
11fdf7f2 TL |
20 | #include <sstream> |
21 | #ifdef WITH_SEASTAR | |
22 | #include "crimson/common/config_proxy.h" | |
23 | #else | |
24 | #include "common/config_proxy.h" | |
25 | #endif | |
26 | #include "global/global_context.h" | |
224ce89b WB |
27 | #include "mon/health_check.h" |
28 | ||
11fdf7f2 | 29 | using std::stringstream; |
7c673cae FG |
30 | |
31 | void Filesystem::dump(Formatter *f) const | |
32 | { | |
33 | f->open_object_section("mdsmap"); | |
34 | mds_map.dump(f); | |
35 | f->close_section(); | |
36 | f->dump_int("id", fscid); | |
37 | } | |
38 | ||
39 | void FSMap::dump(Formatter *f) const | |
40 | { | |
41 | f->dump_int("epoch", epoch); | |
11fdf7f2 TL |
42 | // Use 'default' naming to match 'set-default' CLI |
43 | f->dump_int("default_fscid", legacy_client_fscid); | |
7c673cae FG |
44 | |
45 | f->open_object_section("compat"); | |
46 | compat.dump(f); | |
47 | f->close_section(); | |
48 | ||
49 | f->open_object_section("feature_flags"); | |
50 | f->dump_bool("enable_multiple", enable_multiple); | |
51 | f->dump_bool("ever_enabled_multiple", ever_enabled_multiple); | |
52 | f->close_section(); | |
53 | ||
54 | f->open_array_section("standbys"); | |
9f95a23c | 55 | for (const auto& [gid, info] : standby_daemons) { |
7c673cae | 56 | f->open_object_section("info"); |
9f95a23c TL |
57 | info.dump(f); |
58 | f->dump_int("epoch", standby_epochs.at(gid)); | |
7c673cae FG |
59 | f->close_section(); |
60 | } | |
61 | f->close_section(); | |
62 | ||
63 | f->open_array_section("filesystems"); | |
64 | for (const auto &fs : filesystems) { | |
65 | f->open_object_section("filesystem"); | |
66 | fs.second->dump(f); | |
67 | f->close_section(); | |
68 | } | |
69 | f->close_section(); | |
70 | } | |
71 | ||
9f95a23c TL |
72 | FSMap &FSMap::operator=(const FSMap &rhs) |
73 | { | |
74 | epoch = rhs.epoch; | |
75 | next_filesystem_id = rhs.next_filesystem_id; | |
76 | legacy_client_fscid = rhs.legacy_client_fscid; | |
77 | compat = rhs.compat; | |
78 | enable_multiple = rhs.enable_multiple; | |
79 | mds_roles = rhs.mds_roles; | |
80 | standby_daemons = rhs.standby_daemons; | |
81 | standby_epochs = rhs.standby_epochs; | |
82 | ||
83 | filesystems.clear(); | |
84 | for (const auto &i : rhs.filesystems) { | |
85 | const auto &fs = i.second; | |
86 | filesystems[fs->fscid] = std::make_shared<Filesystem>(*fs); | |
87 | } | |
88 | ||
89 | return *this; | |
90 | } | |
91 | ||
92 | void FSMap::generate_test_instances(std::list<FSMap*>& ls) | |
7c673cae FG |
93 | { |
94 | FSMap *m = new FSMap(); | |
95 | ||
96 | std::list<MDSMap*> mds_map_instances; | |
97 | MDSMap::generate_test_instances(mds_map_instances); | |
98 | ||
99 | int k = 20; | |
100 | for (auto i : mds_map_instances) { | |
11fdf7f2 | 101 | auto fs = Filesystem::create(); |
7c673cae FG |
102 | fs->fscid = k++; |
103 | fs->mds_map = *i; | |
104 | delete i; | |
105 | m->filesystems[fs->fscid] = fs; | |
106 | } | |
107 | mds_map_instances.clear(); | |
108 | ||
109 | ls.push_back(m); | |
110 | } | |
111 | ||
112 | void FSMap::print(ostream& out) const | |
113 | { | |
114 | out << "e" << epoch << std::endl; | |
115 | out << "enable_multiple, ever_enabled_multiple: " << enable_multiple << "," | |
116 | << ever_enabled_multiple << std::endl; | |
117 | out << "compat: " << compat << std::endl; | |
118 | out << "legacy client fscid: " << legacy_client_fscid << std::endl; | |
119 | out << " " << std::endl; | |
120 | ||
121 | if (filesystems.empty()) { | |
122 | out << "No filesystems configured" << std::endl; | |
7c673cae FG |
123 | } |
124 | ||
11fdf7f2 TL |
125 | for (const auto& p : filesystems) { |
126 | p.second->print(out); | |
7c673cae FG |
127 | out << " " << std::endl << " " << std::endl; // Space out a bit |
128 | } | |
129 | ||
130 | if (!standby_daemons.empty()) { | |
131 | out << "Standby daemons:" << std::endl << " " << std::endl; | |
132 | } | |
133 | ||
9f95a23c TL |
134 | for (const auto& p : standby_daemons) { |
135 | out << p.second << std::endl; | |
7c673cae FG |
136 | } |
137 | } | |
138 | ||
7c673cae FG |
139 | void FSMap::print_summary(Formatter *f, ostream *out) const |
140 | { | |
7c673cae FG |
141 | if (f) { |
142 | f->dump_unsigned("epoch", get_epoch()); | |
11fdf7f2 TL |
143 | for (const auto &p : filesystems) { |
144 | auto& fs = p.second; | |
7c673cae FG |
145 | f->dump_unsigned("id", fs->fscid); |
146 | f->dump_unsigned("up", fs->mds_map.up.size()); | |
147 | f->dump_unsigned("in", fs->mds_map.in.size()); | |
148 | f->dump_unsigned("max", fs->mds_map.max_mds); | |
149 | } | |
150 | } else { | |
11fdf7f2 TL |
151 | auto count = filesystems.size(); |
152 | if (count <= 3) { | |
153 | bool first = true; | |
154 | for (const auto& p : filesystems) { | |
155 | const auto& fs = p.second; | |
156 | if (!first) { | |
157 | *out << " "; | |
158 | } | |
159 | if (fs->mds_map.is_degraded()) { | |
160 | *out << fs->mds_map.fs_name << ":" << fs->mds_map.up.size() << "/" << fs->mds_map.in.size(); | |
161 | } else { | |
162 | *out << fs->mds_map.fs_name << ":" << fs->mds_map.in.size(); | |
163 | } | |
164 | first = false; | |
165 | } | |
166 | } else { | |
167 | *out << count << " fs"; | |
168 | unsigned degraded = 0; | |
169 | CachedStackStringStream css; | |
170 | *css << " (degraded: "; | |
171 | for (const auto& p : filesystems) { | |
172 | const auto& fs = p.second; | |
173 | if (fs->mds_map.is_degraded()) { | |
174 | degraded++; | |
175 | if (degraded <= 3) { | |
176 | *css << fs->mds_map.fs_name << ":" << fs->mds_map.up.size() << "/" << fs->mds_map.in.size(); | |
177 | } | |
178 | } | |
179 | } | |
180 | if (degraded > 0) { | |
181 | if (degraded <= 3) { | |
182 | *css << ")"; | |
183 | *out << css->strv(); | |
184 | } else { | |
185 | *out << " (degraded: " << degraded << " fs)"; | |
186 | } | |
187 | } | |
7c673cae FG |
188 | } |
189 | } | |
190 | ||
191 | if (f) { | |
192 | f->open_array_section("by_rank"); | |
193 | } | |
194 | ||
11fdf7f2 TL |
195 | std::map<MDSMap::DaemonState,unsigned> by_state; |
196 | std::map<mds_role_t, std::pair<MDSMap::DaemonState, std::string>> by_rank; | |
197 | by_state[MDSMap::DaemonState::STATE_STANDBY] = standby_daemons.size(); | |
198 | for (const auto& [gid, fscid] : mds_roles) { | |
199 | if (fscid == FS_CLUSTER_ID_NONE) | |
200 | continue; | |
201 | ||
202 | const auto& info = filesystems.at(fscid)->mds_map.get_info_gid(gid); | |
203 | auto s = std::string(ceph_mds_state_name(info.state)); | |
7c673cae FG |
204 | if (info.laggy()) { |
205 | s += "(laggy or crashed)"; | |
206 | } | |
207 | ||
11fdf7f2 TL |
208 | if (f) { |
209 | f->open_object_section("mds"); | |
210 | f->dump_unsigned("filesystem_id", fscid); | |
211 | f->dump_unsigned("rank", info.rank); | |
212 | f->dump_string("name", info.name); | |
213 | f->dump_string("status", s); | |
214 | f->dump_unsigned("gid", gid); | |
215 | f->close_section(); | |
216 | } else if (info.state != MDSMap::DaemonState::STATE_STANDBY_REPLAY) { | |
217 | by_rank[mds_role_t(fscid, info.rank)] = std::make_pair(info.state, info.name + "=" + s); | |
7c673cae | 218 | } |
11fdf7f2 | 219 | by_state[info.state]++; |
7c673cae FG |
220 | } |
221 | ||
222 | if (f) { | |
223 | f->close_section(); | |
224 | } else { | |
11fdf7f2 | 225 | if (0 < by_rank.size() && by_rank.size() < 5) { |
7c673cae FG |
226 | if (filesystems.size() > 1) { |
227 | // Disambiguate filesystems | |
228 | std::map<std::string, std::string> pretty; | |
11fdf7f2 TL |
229 | for (const auto& [role,status] : by_rank) { |
230 | const auto &fs_name = filesystems.at(role.fscid)->mds_map.fs_name; | |
231 | CachedStackStringStream css; | |
232 | *css << fs_name << ":" << role.rank; | |
233 | pretty.emplace(std::piecewise_construct, std::forward_as_tuple(css->strv()), std::forward_as_tuple(status.second)); | |
234 | --by_state[status.first]; /* already printed! */ | |
7c673cae FG |
235 | } |
236 | *out << " " << pretty; | |
237 | } else { | |
238 | // Omit FSCID in output when only one filesystem exists | |
239 | std::map<mds_rank_t, std::string> shortened; | |
11fdf7f2 TL |
240 | for (const auto& [role,status] : by_rank) { |
241 | shortened[role.rank] = status.second; | |
242 | --by_state[status.first]; /* already printed! */ | |
7c673cae FG |
243 | } |
244 | *out << " " << shortened; | |
245 | } | |
246 | } | |
11fdf7f2 TL |
247 | for (const auto& [state, count] : by_state) { |
248 | if (count > 0) { | |
249 | auto s = std::string_view(ceph_mds_state_name(state)); | |
250 | *out << " " << count << " " << s; | |
251 | } | |
252 | } | |
7c673cae FG |
253 | } |
254 | ||
11fdf7f2 TL |
255 | if (f) { |
256 | const auto state = MDSMap::DaemonState::STATE_STANDBY; | |
257 | auto&& name = ceph_mds_state_name(state); | |
258 | auto count = standby_daemons.size(); | |
259 | f->dump_unsigned(name, count); | |
7c673cae FG |
260 | } |
261 | ||
262 | size_t failed = 0; | |
263 | size_t damaged = 0; | |
11fdf7f2 TL |
264 | for (const auto& p : filesystems) { |
265 | auto& fs = p.second; | |
7c673cae FG |
266 | failed += fs->mds_map.failed.size(); |
267 | damaged += fs->mds_map.damaged.size(); | |
268 | } | |
269 | ||
270 | if (failed > 0) { | |
271 | if (f) { | |
272 | f->dump_unsigned("failed", failed); | |
273 | } else { | |
274 | *out << ", " << failed << " failed"; | |
275 | } | |
276 | } | |
277 | ||
278 | if (damaged > 0) { | |
279 | if (f) { | |
280 | f->dump_unsigned("damaged", damaged); | |
281 | } else { | |
282 | *out << ", " << damaged << " damaged"; | |
283 | } | |
284 | } | |
285 | //if (stopped.size()) | |
286 | //out << ", " << stopped.size() << " stopped"; | |
287 | } | |
288 | ||
9f95a23c TL |
289 | mds_gid_t Filesystem::get_standby_replay(mds_gid_t who) const |
290 | { | |
291 | for (const auto &i : mds_map.mds_info) { | |
292 | const auto &info = i.second; | |
293 | if (info.state == MDSMap::STATE_STANDBY_REPLAY | |
294 | && info.rank == mds_map.mds_info.at(who).rank) { | |
295 | return info.global_id; | |
296 | } | |
297 | } | |
298 | return MDS_GID_NONE; | |
299 | } | |
7c673cae | 300 | |
11fdf7f2 TL |
301 | Filesystem::ref FSMap::create_filesystem(std::string_view name, |
302 | int64_t metadata_pool, int64_t data_pool, uint64_t features) | |
7c673cae | 303 | { |
11fdf7f2 | 304 | auto fs = Filesystem::create(); |
28e407b8 | 305 | fs->mds_map.epoch = epoch; |
11fdf7f2 | 306 | fs->mds_map.fs_name = name; |
31f18b77 | 307 | fs->mds_map.data_pools.push_back(data_pool); |
7c673cae FG |
308 | fs->mds_map.metadata_pool = metadata_pool; |
309 | fs->mds_map.cas_pool = -1; | |
7c673cae FG |
310 | fs->mds_map.compat = compat; |
311 | fs->mds_map.created = ceph_clock_now(); | |
312 | fs->mds_map.modified = ceph_clock_now(); | |
7c673cae | 313 | fs->mds_map.enabled = true; |
9f95a23c TL |
314 | fs->fscid = next_filesystem_id++; |
315 | // ANONYMOUS is only for upgrades from legacy mdsmaps, we should | |
316 | // have initialized next_filesystem_id such that it's never used here. | |
317 | ceph_assert(fs->fscid != FS_CLUSTER_ID_ANONYMOUS); | |
7c673cae FG |
318 | filesystems[fs->fscid] = fs; |
319 | ||
320 | // Created first filesystem? Set it as the one | |
321 | // for legacy clients to use | |
322 | if (filesystems.size() == 1) { | |
323 | legacy_client_fscid = fs->fscid; | |
324 | } | |
11fdf7f2 TL |
325 | |
326 | return fs; | |
7c673cae FG |
327 | } |
328 | ||
9f95a23c TL |
329 | Filesystem::const_ref FSMap::get_filesystem(std::string_view name) const |
330 | { | |
331 | for (const auto& p : filesystems) { | |
332 | if (p.second->mds_map.fs_name == name) { | |
333 | return p.second; | |
334 | } | |
335 | } | |
336 | return nullptr; | |
337 | } | |
338 | ||
339 | std::vector<Filesystem::const_ref> FSMap::get_filesystems(void) const | |
340 | { | |
341 | std::vector<Filesystem::const_ref> ret; | |
342 | for (const auto& p : filesystems) { | |
343 | ret.push_back(p.second); | |
344 | } | |
345 | return ret; | |
346 | } | |
347 | ||
7c673cae FG |
348 | void FSMap::reset_filesystem(fs_cluster_id_t fscid) |
349 | { | |
350 | auto fs = get_filesystem(fscid); | |
11fdf7f2 | 351 | auto new_fs = Filesystem::create(); |
7c673cae FG |
352 | |
353 | // Populate rank 0 as existing (so don't go into CREATING) | |
354 | // but failed (so that next available MDS is assigned the rank) | |
355 | new_fs->mds_map.in.insert(mds_rank_t(0)); | |
356 | new_fs->mds_map.failed.insert(mds_rank_t(0)); | |
357 | ||
358 | // Carry forward what makes sense | |
359 | new_fs->fscid = fs->fscid; | |
360 | new_fs->mds_map.inline_data_enabled = fs->mds_map.inline_data_enabled; | |
7c673cae FG |
361 | new_fs->mds_map.data_pools = fs->mds_map.data_pools; |
362 | new_fs->mds_map.metadata_pool = fs->mds_map.metadata_pool; | |
363 | new_fs->mds_map.cas_pool = fs->mds_map.cas_pool; | |
364 | new_fs->mds_map.fs_name = fs->mds_map.fs_name; | |
7c673cae FG |
365 | new_fs->mds_map.compat = compat; |
366 | new_fs->mds_map.created = ceph_clock_now(); | |
367 | new_fs->mds_map.modified = ceph_clock_now(); | |
7c673cae FG |
368 | new_fs->mds_map.standby_count_wanted = fs->mds_map.standby_count_wanted; |
369 | new_fs->mds_map.enabled = true; | |
370 | ||
c07f9fc5 FG |
371 | // Remember mds ranks that have ever started. (They should load old inotable |
372 | // instead of creating new one if they start again.) | |
373 | new_fs->mds_map.stopped.insert(fs->mds_map.in.begin(), fs->mds_map.in.end()); | |
374 | new_fs->mds_map.stopped.insert(fs->mds_map.stopped.begin(), fs->mds_map.stopped.end()); | |
375 | new_fs->mds_map.stopped.erase(mds_rank_t(0)); | |
376 | ||
7c673cae FG |
377 | // Persist the new FSMap |
378 | filesystems[new_fs->fscid] = new_fs; | |
379 | } | |
380 | ||
381 | void FSMap::get_health(list<pair<health_status_t,string> >& summary, | |
382 | list<pair<health_status_t,string> > *detail) const | |
383 | { | |
384 | mds_rank_t standby_count_wanted = 0; | |
385 | for (const auto &i : filesystems) { | |
386 | const auto &fs = i.second; | |
387 | ||
388 | // TODO: move get_health up into here so that we can qualify | |
389 | // all the messages with what filesystem they're talking about | |
390 | fs->mds_map.get_health(summary, detail); | |
391 | ||
392 | standby_count_wanted = std::max(standby_count_wanted, fs->mds_map.get_standby_count_wanted((mds_rank_t)standby_daemons.size())); | |
393 | } | |
394 | ||
395 | if (standby_count_wanted) { | |
396 | std::ostringstream oss; | |
397 | oss << "insufficient standby daemons available: have " << standby_daemons.size() << "; want " << standby_count_wanted << " more"; | |
398 | summary.push_back(make_pair(HEALTH_WARN, oss.str())); | |
399 | } | |
400 | } | |
401 | ||
402 | bool FSMap::check_health(void) | |
403 | { | |
404 | bool changed = false; | |
405 | for (auto &i : filesystems) { | |
406 | changed |= i.second->mds_map.check_health((mds_rank_t)standby_daemons.size()); | |
407 | } | |
408 | return changed; | |
409 | } | |
410 | ||
224ce89b WB |
411 | void FSMap::get_health_checks(health_check_map_t *checks) const |
412 | { | |
413 | mds_rank_t standby_count_wanted = 0; | |
414 | for (const auto &i : filesystems) { | |
415 | const auto &fs = i.second; | |
416 | health_check_map_t fschecks; | |
d2e6a577 | 417 | |
224ce89b | 418 | fs->mds_map.get_health_checks(&fschecks); |
d2e6a577 FG |
419 | |
420 | // Some of the failed ranks might be transient (i.e. there are standbys | |
421 | // ready to replace them). We will report only on "stuck" failed, i.e. | |
422 | // ranks which are failed and have no standby replacement available. | |
423 | std::set<mds_rank_t> stuck_failed; | |
424 | ||
425 | for (const auto &rank : fs->mds_map.failed) { | |
9f95a23c TL |
426 | auto rep_info = find_replacement_for({fs->fscid, rank}); |
427 | if (!rep_info) { | |
d2e6a577 FG |
428 | stuck_failed.insert(rank); |
429 | } | |
430 | } | |
431 | ||
432 | // FS_WITH_FAILED_MDS | |
433 | if (!stuck_failed.empty()) { | |
434 | health_check_t& fscheck = checks->get_or_add( | |
435 | "FS_WITH_FAILED_MDS", HEALTH_WARN, | |
9f95a23c | 436 | "%num% filesystem%plurals% %hasorhave% a failed mds daemon", 1); |
d2e6a577 FG |
437 | ostringstream ss; |
438 | ss << "fs " << fs->mds_map.fs_name << " has " << stuck_failed.size() | |
439 | << " failed mds" << (stuck_failed.size() > 1 ? "s" : ""); | |
440 | fscheck.detail.push_back(ss.str()); } | |
441 | ||
224ce89b WB |
442 | checks->merge(fschecks); |
443 | standby_count_wanted = std::max( | |
444 | standby_count_wanted, | |
445 | fs->mds_map.get_standby_count_wanted((mds_rank_t)standby_daemons.size())); | |
446 | } | |
447 | ||
448 | // MDS_INSUFFICIENT_STANDBY | |
449 | if (standby_count_wanted) { | |
450 | std::ostringstream oss, dss; | |
d2e6a577 | 451 | oss << "insufficient standby MDS daemons available"; |
9f95a23c | 452 | auto& d = checks->get_or_add("MDS_INSUFFICIENT_STANDBY", HEALTH_WARN, oss.str(), 1); |
224ce89b WB |
453 | dss << "have " << standby_daemons.size() << "; want " << standby_count_wanted |
454 | << " more"; | |
455 | d.detail.push_back(dss.str()); | |
456 | } | |
457 | } | |
458 | ||
9f95a23c | 459 | void FSMap::update_compat(const CompatSet &c) |
7c673cae | 460 | { |
9f95a23c TL |
461 | // We could do something more complicated here to enable |
462 | // different filesystems to be served by different MDS versions, | |
463 | // but this is a lot simpler because it doesn't require us to | |
464 | // track the compat versions for standby daemons. | |
465 | compat = c; | |
466 | for (const auto &i : filesystems) { | |
467 | MDSMap &mds_map = i.second->mds_map; | |
468 | mds_map.compat = c; | |
469 | mds_map.epoch = epoch; | |
470 | } | |
471 | } | |
7c673cae | 472 | |
9f95a23c TL |
473 | void FSMap::encode(bufferlist& bl, uint64_t features) const |
474 | { | |
475 | ENCODE_START(7, 6, bl); | |
476 | encode(epoch, bl); | |
477 | encode(next_filesystem_id, bl); | |
478 | encode(legacy_client_fscid, bl); | |
479 | encode(compat, bl); | |
480 | encode(enable_multiple, bl); | |
481 | { | |
482 | std::vector<Filesystem::ref> v; | |
483 | v.reserve(filesystems.size()); | |
484 | for (auto& p : filesystems) v.emplace_back(p.second); | |
485 | encode(v, bl, features); | |
7c673cae | 486 | } |
9f95a23c TL |
487 | encode(mds_roles, bl); |
488 | encode(standby_daemons, bl, features); | |
489 | encode(standby_epochs, bl); | |
490 | encode(ever_enabled_multiple, bl); | |
491 | ENCODE_FINISH(bl); | |
7c673cae FG |
492 | } |
493 | ||
11fdf7f2 | 494 | void FSMap::decode(bufferlist::const_iterator& p) |
7c673cae | 495 | { |
7c673cae FG |
496 | // The highest MDSMap encoding version before we changed the |
497 | // MDSMonitor to store an FSMap instead of an MDSMap was | |
498 | // 5, so anything older than 6 is decoded as an MDSMap, | |
499 | // and anything newer is decoded as an FSMap. | |
f91f0fd5 | 500 | DECODE_START_LEGACY_COMPAT_LEN_16(7, 4, 4, p); |
7c673cae | 501 | if (struct_v < 6) { |
3efd9988 FG |
502 | // Because the mon used to store an MDSMap where we now |
503 | // store an FSMap, FSMap knows how to decode the legacy | |
504 | // MDSMap format (it never needs to encode it though). | |
505 | MDSMap legacy_mds_map; | |
506 | ||
7c673cae | 507 | // Decoding an MDSMap (upgrade) |
11fdf7f2 TL |
508 | decode(epoch, p); |
509 | decode(legacy_mds_map.flags, p); | |
510 | decode(legacy_mds_map.last_failure, p); | |
511 | decode(legacy_mds_map.root, p); | |
512 | decode(legacy_mds_map.session_timeout, p); | |
513 | decode(legacy_mds_map.session_autoclose, p); | |
514 | decode(legacy_mds_map.max_file_size, p); | |
515 | decode(legacy_mds_map.max_mds, p); | |
516 | decode(legacy_mds_map.mds_info, p); | |
7c673cae FG |
517 | if (struct_v < 3) { |
518 | __u32 n; | |
11fdf7f2 | 519 | decode(n, p); |
7c673cae FG |
520 | while (n--) { |
521 | __u32 m; | |
11fdf7f2 | 522 | decode(m, p); |
31f18b77 | 523 | legacy_mds_map.data_pools.push_back(m); |
7c673cae FG |
524 | } |
525 | __s32 s; | |
11fdf7f2 | 526 | decode(s, p); |
7c673cae FG |
527 | legacy_mds_map.cas_pool = s; |
528 | } else { | |
11fdf7f2 TL |
529 | decode(legacy_mds_map.data_pools, p); |
530 | decode(legacy_mds_map.cas_pool, p); | |
7c673cae FG |
531 | } |
532 | ||
533 | // kclient ignores everything from here | |
534 | __u16 ev = 1; | |
535 | if (struct_v >= 2) | |
11fdf7f2 | 536 | decode(ev, p); |
7c673cae | 537 | if (ev >= 3) |
11fdf7f2 | 538 | decode(legacy_mds_map.compat, p); |
7c673cae | 539 | else |
1adf2230 | 540 | legacy_mds_map.compat = MDSMap::get_compat_set_base(); |
7c673cae FG |
541 | if (ev < 5) { |
542 | __u32 n; | |
11fdf7f2 | 543 | decode(n, p); |
7c673cae FG |
544 | legacy_mds_map.metadata_pool = n; |
545 | } else { | |
11fdf7f2 | 546 | decode(legacy_mds_map.metadata_pool, p); |
7c673cae | 547 | } |
11fdf7f2 TL |
548 | decode(legacy_mds_map.created, p); |
549 | decode(legacy_mds_map.modified, p); | |
550 | decode(legacy_mds_map.tableserver, p); | |
551 | decode(legacy_mds_map.in, p); | |
7c673cae | 552 | std::map<mds_rank_t,int32_t> inc; // Legacy field, parse and drop |
11fdf7f2 TL |
553 | decode(inc, p); |
554 | decode(legacy_mds_map.up, p); | |
555 | decode(legacy_mds_map.failed, p); | |
556 | decode(legacy_mds_map.stopped, p); | |
7c673cae | 557 | if (ev >= 4) |
11fdf7f2 | 558 | decode(legacy_mds_map.last_failure_osd_epoch, p); |
7c673cae FG |
559 | if (ev >= 6) { |
560 | if (ev < 10) { | |
561 | // previously this was a bool about snaps, not a flag map | |
562 | bool flag; | |
11fdf7f2 | 563 | decode(flag, p); |
7c673cae FG |
564 | legacy_mds_map.ever_allowed_features = flag ? |
565 | CEPH_MDSMAP_ALLOW_SNAPS : 0; | |
11fdf7f2 | 566 | decode(flag, p); |
7c673cae FG |
567 | legacy_mds_map.explicitly_allowed_features = flag ? |
568 | CEPH_MDSMAP_ALLOW_SNAPS : 0; | |
7c673cae | 569 | } else { |
11fdf7f2 TL |
570 | decode(legacy_mds_map.ever_allowed_features, p); |
571 | decode(legacy_mds_map.explicitly_allowed_features, p); | |
7c673cae FG |
572 | } |
573 | } else { | |
11fdf7f2 | 574 | legacy_mds_map.ever_allowed_features = 0; |
7c673cae | 575 | legacy_mds_map.explicitly_allowed_features = 0; |
7c673cae FG |
576 | } |
577 | if (ev >= 7) | |
11fdf7f2 | 578 | decode(legacy_mds_map.inline_data_enabled, p); |
7c673cae FG |
579 | |
580 | if (ev >= 8) { | |
11fdf7f2 TL |
581 | ceph_assert(struct_v >= 5); |
582 | decode(legacy_mds_map.enabled, p); | |
583 | decode(legacy_mds_map.fs_name, p); | |
7c673cae FG |
584 | } else { |
585 | legacy_mds_map.fs_name = "default"; | |
586 | if (epoch > 1) { | |
587 | // If an MDS has ever been started, epoch will be greater than 1, | |
588 | // assume filesystem is enabled. | |
589 | legacy_mds_map.enabled = true; | |
590 | } else { | |
591 | // Upgrading from a cluster that never used an MDS, switch off | |
592 | // filesystem until it's explicitly enabled. | |
593 | legacy_mds_map.enabled = false; | |
594 | } | |
595 | } | |
596 | ||
597 | if (ev >= 9) { | |
11fdf7f2 | 598 | decode(legacy_mds_map.damaged, p); |
7c673cae FG |
599 | } |
600 | ||
601 | // We're upgrading, populate filesystems from the legacy fields | |
602 | filesystems.clear(); | |
603 | standby_daemons.clear(); | |
604 | standby_epochs.clear(); | |
605 | mds_roles.clear(); | |
606 | compat = legacy_mds_map.compat; | |
607 | enable_multiple = false; | |
608 | ||
609 | // Synthesise a Filesystem from legacy_mds_map, if enabled | |
610 | if (legacy_mds_map.enabled) { | |
611 | // Construct a Filesystem from the legacy MDSMap | |
11fdf7f2 | 612 | auto migrate_fs = Filesystem::create(); |
7c673cae FG |
613 | migrate_fs->fscid = FS_CLUSTER_ID_ANONYMOUS; |
614 | migrate_fs->mds_map = legacy_mds_map; | |
615 | migrate_fs->mds_map.epoch = epoch; | |
616 | filesystems[migrate_fs->fscid] = migrate_fs; | |
617 | ||
618 | // List of GIDs that had invalid states | |
619 | std::set<mds_gid_t> drop_gids; | |
620 | ||
621 | // Construct mds_roles, standby_daemons, and remove | |
622 | // standbys from the MDSMap in the Filesystem. | |
11fdf7f2 TL |
623 | for (const auto& [gid, info] : migrate_fs->mds_map.mds_info) { |
624 | if (info.state == MDSMap::STATE_STANDBY_REPLAY) { | |
625 | /* drop any legacy standby-replay daemons */ | |
626 | drop_gids.insert(gid); | |
627 | } else if (info.rank == MDS_RANK_NONE) { | |
628 | if (info.state != MDSMap::STATE_STANDBY) { | |
7c673cae FG |
629 | // Old MDSMaps can have down:dne here, which |
630 | // is invalid in an FSMap (#17837) | |
11fdf7f2 | 631 | drop_gids.insert(gid); |
7c673cae | 632 | } else { |
11fdf7f2 | 633 | insert(info); // into standby_daemons |
7c673cae FG |
634 | } |
635 | } else { | |
11fdf7f2 | 636 | mds_roles[gid] = migrate_fs->fscid; |
7c673cae FG |
637 | } |
638 | } | |
639 | for (const auto &p : standby_daemons) { | |
640 | // Erase from this Filesystem's MDSMap, because it has | |
641 | // been copied into FSMap::Standby_daemons above | |
642 | migrate_fs->mds_map.mds_info.erase(p.first); | |
643 | } | |
644 | for (const auto &gid : drop_gids) { | |
645 | // Throw away all info for this MDS because it was identified | |
646 | // as having invalid state above. | |
647 | migrate_fs->mds_map.mds_info.erase(gid); | |
648 | } | |
649 | ||
650 | legacy_client_fscid = migrate_fs->fscid; | |
651 | } else { | |
652 | legacy_client_fscid = FS_CLUSTER_ID_NONE; | |
653 | } | |
654 | } else { | |
11fdf7f2 TL |
655 | decode(epoch, p); |
656 | decode(next_filesystem_id, p); | |
657 | decode(legacy_client_fscid, p); | |
658 | decode(compat, p); | |
659 | decode(enable_multiple, p); | |
660 | { | |
661 | std::vector<Filesystem::ref> v; | |
662 | decode(v, p); | |
663 | filesystems.clear(); | |
664 | for (auto& ref : v) { | |
665 | auto em = filesystems.emplace(std::piecewise_construct, std::forward_as_tuple(ref->fscid), std::forward_as_tuple(std::move(ref))); | |
666 | ceph_assert(em.second); | |
667 | } | |
7c673cae | 668 | } |
11fdf7f2 TL |
669 | decode(mds_roles, p); |
670 | decode(standby_daemons, p); | |
671 | decode(standby_epochs, p); | |
7c673cae | 672 | if (struct_v >= 7) { |
11fdf7f2 | 673 | decode(ever_enabled_multiple, p); |
7c673cae FG |
674 | } |
675 | } | |
676 | ||
677 | DECODE_FINISH(p); | |
678 | } | |
679 | ||
11fdf7f2 | 680 | void FSMap::sanitize(const std::function<bool(int64_t pool)>& pool_exists) |
3efd9988 FG |
681 | { |
682 | for (auto &fs : filesystems) { | |
683 | fs.second->mds_map.sanitize(pool_exists); | |
684 | } | |
685 | } | |
7c673cae FG |
686 | |
687 | void Filesystem::encode(bufferlist& bl, uint64_t features) const | |
688 | { | |
689 | ENCODE_START(1, 1, bl); | |
11fdf7f2 | 690 | encode(fscid, bl); |
7c673cae FG |
691 | bufferlist mdsmap_bl; |
692 | mds_map.encode(mdsmap_bl, features); | |
11fdf7f2 | 693 | encode(mdsmap_bl, bl); |
7c673cae FG |
694 | ENCODE_FINISH(bl); |
695 | } | |
696 | ||
11fdf7f2 | 697 | void Filesystem::decode(bufferlist::const_iterator& p) |
7c673cae FG |
698 | { |
699 | DECODE_START(1, p); | |
11fdf7f2 | 700 | decode(fscid, p); |
7c673cae | 701 | bufferlist mdsmap_bl; |
11fdf7f2 TL |
702 | decode(mdsmap_bl, p); |
703 | auto mdsmap_bl_iter = mdsmap_bl.cbegin(); | |
7c673cae FG |
704 | mds_map.decode(mdsmap_bl_iter); |
705 | DECODE_FINISH(p); | |
706 | } | |
707 | ||
708 | int FSMap::parse_filesystem( | |
11fdf7f2 TL |
709 | std::string_view ns_str, |
710 | Filesystem::const_ref* result | |
7c673cae FG |
711 | ) const |
712 | { | |
713 | std::string ns_err; | |
94b18763 FG |
714 | std::string s(ns_str); |
715 | fs_cluster_id_t fscid = strict_strtol(s.c_str(), 10, &ns_err); | |
7c673cae FG |
716 | if (!ns_err.empty() || filesystems.count(fscid) == 0) { |
717 | for (auto &fs : filesystems) { | |
94b18763 | 718 | if (fs.second->mds_map.fs_name == s) { |
7c673cae FG |
719 | *result = std::const_pointer_cast<const Filesystem>(fs.second); |
720 | return 0; | |
721 | } | |
722 | } | |
723 | return -ENOENT; | |
724 | } else { | |
725 | *result = get_filesystem(fscid); | |
726 | return 0; | |
727 | } | |
728 | } | |
729 | ||
730 | void Filesystem::print(std::ostream &out) const | |
731 | { | |
732 | out << "Filesystem '" << mds_map.fs_name | |
733 | << "' (" << fscid << ")" << std::endl; | |
734 | mds_map.print(out); | |
735 | } | |
736 | ||
9f95a23c | 737 | bool FSMap::is_any_degraded() const |
7c673cae | 738 | { |
9f95a23c TL |
739 | for (auto& i : filesystems) { |
740 | if (i.second->mds_map.is_degraded()) { | |
741 | return true; | |
742 | } | |
743 | } | |
744 | return false; | |
745 | } | |
746 | ||
747 | std::map<mds_gid_t, MDSMap::mds_info_t> FSMap::get_mds_info() const | |
748 | { | |
749 | std::map<mds_gid_t, mds_info_t> result; | |
750 | for (const auto &i : standby_daemons) { | |
751 | result[i.first] = i.second; | |
752 | } | |
753 | ||
754 | for (const auto &i : filesystems) { | |
755 | const auto &fs_info = i.second->mds_map.get_mds_info(); | |
756 | for (const auto &j : fs_info) { | |
757 | result[j.first] = j.second; | |
758 | } | |
759 | } | |
760 | ||
761 | return result; | |
762 | } | |
763 | ||
764 | const MDSMap::mds_info_t* FSMap::get_available_standby(fs_cluster_id_t fscid) const | |
765 | { | |
766 | const mds_info_t* who = nullptr; | |
11fdf7f2 TL |
767 | for (const auto& [gid, info] : standby_daemons) { |
768 | ceph_assert(info.rank == MDS_RANK_NONE); | |
769 | ceph_assert(info.state == MDSMap::STATE_STANDBY); | |
7c673cae | 770 | |
11fdf7f2 | 771 | if (info.laggy() || info.is_frozen()) { |
7c673cae FG |
772 | continue; |
773 | } | |
774 | ||
9f95a23c TL |
775 | if (info.join_fscid == fscid) { |
776 | who = &info; | |
777 | break; | |
778 | } else if (info.join_fscid == FS_CLUSTER_ID_NONE) { | |
779 | who = &info; /* vanilla standby */ | |
780 | } else if (who == nullptr) { | |
781 | who = &info; /* standby for another fs, last resort */ | |
782 | } | |
783 | } | |
784 | return who; | |
785 | } | |
786 | ||
787 | mds_gid_t FSMap::find_mds_gid_by_name(std::string_view s) const | |
788 | { | |
789 | const auto info = get_mds_info(); | |
790 | for (const auto &p : info) { | |
791 | if (p.second.name == s) { | |
792 | return p.first; | |
793 | } | |
7c673cae | 794 | } |
11fdf7f2 | 795 | return MDS_GID_NONE; |
7c673cae FG |
796 | } |
797 | ||
9f95a23c TL |
798 | const MDSMap::mds_info_t* FSMap::find_by_name(std::string_view name) const |
799 | { | |
800 | std::map<mds_gid_t, mds_info_t> result; | |
801 | for (const auto &i : standby_daemons) { | |
802 | if (i.second.name == name) { | |
803 | return &(i.second); | |
804 | } | |
805 | } | |
806 | ||
807 | for (const auto &i : filesystems) { | |
808 | const auto &fs_info = i.second->mds_map.get_mds_info(); | |
809 | for (const auto &j : fs_info) { | |
810 | if (j.second.name == name) { | |
811 | return &(j.second); | |
812 | } | |
813 | } | |
814 | } | |
815 | ||
816 | return nullptr; | |
817 | } | |
818 | ||
819 | const MDSMap::mds_info_t* FSMap::find_replacement_for(mds_role_t role) const | |
11fdf7f2 TL |
820 | { |
821 | auto&& fs = get_filesystem(role.fscid); | |
7c673cae | 822 | |
11fdf7f2 TL |
823 | // First see if we have a STANDBY_REPLAY |
824 | for (const auto& [gid, info] : fs->mds_map.mds_info) { | |
825 | if (info.rank == role.rank && info.state == MDSMap::STATE_STANDBY_REPLAY) { | |
826 | if (info.is_frozen()) { | |
827 | /* the standby-replay is frozen, do nothing! */ | |
9f95a23c | 828 | return nullptr; |
11fdf7f2 | 829 | } else { |
9f95a23c | 830 | return &info; |
11fdf7f2 | 831 | } |
7c673cae FG |
832 | } |
833 | } | |
7c673cae | 834 | |
9f95a23c | 835 | return get_available_standby(role.fscid); |
7c673cae FG |
836 | } |
837 | ||
838 | void FSMap::sanity() const | |
839 | { | |
840 | if (legacy_client_fscid != FS_CLUSTER_ID_NONE) { | |
11fdf7f2 | 841 | ceph_assert(filesystems.count(legacy_client_fscid) == 1); |
7c673cae FG |
842 | } |
843 | ||
844 | for (const auto &i : filesystems) { | |
845 | auto fs = i.second; | |
11fdf7f2 TL |
846 | ceph_assert(fs->mds_map.compat.compare(compat) == 0); |
847 | ceph_assert(fs->fscid == i.first); | |
7c673cae | 848 | for (const auto &j : fs->mds_map.mds_info) { |
11fdf7f2 TL |
849 | ceph_assert(j.second.rank != MDS_RANK_NONE); |
850 | ceph_assert(mds_roles.count(j.first) == 1); | |
851 | ceph_assert(standby_daemons.count(j.first) == 0); | |
852 | ceph_assert(standby_epochs.count(j.first) == 0); | |
853 | ceph_assert(mds_roles.at(j.first) == i.first); | |
7c673cae | 854 | if (j.second.state != MDSMap::STATE_STANDBY_REPLAY) { |
11fdf7f2 TL |
855 | ceph_assert(fs->mds_map.up.at(j.second.rank) == j.first); |
856 | ceph_assert(fs->mds_map.failed.count(j.second.rank) == 0); | |
857 | ceph_assert(fs->mds_map.damaged.count(j.second.rank) == 0); | |
7c673cae FG |
858 | } |
859 | } | |
860 | ||
861 | for (const auto &j : fs->mds_map.up) { | |
862 | mds_rank_t rank = j.first; | |
11fdf7f2 | 863 | ceph_assert(fs->mds_map.in.count(rank) == 1); |
7c673cae | 864 | mds_gid_t gid = j.second; |
11fdf7f2 | 865 | ceph_assert(fs->mds_map.mds_info.count(gid) == 1); |
7c673cae FG |
866 | } |
867 | } | |
868 | ||
869 | for (const auto &i : standby_daemons) { | |
11fdf7f2 TL |
870 | ceph_assert(i.second.state == MDSMap::STATE_STANDBY); |
871 | ceph_assert(i.second.rank == MDS_RANK_NONE); | |
872 | ceph_assert(i.second.global_id == i.first); | |
873 | ceph_assert(standby_epochs.count(i.first) == 1); | |
874 | ceph_assert(mds_roles.count(i.first) == 1); | |
875 | ceph_assert(mds_roles.at(i.first) == FS_CLUSTER_ID_NONE); | |
7c673cae FG |
876 | } |
877 | ||
878 | for (const auto &i : standby_epochs) { | |
11fdf7f2 | 879 | ceph_assert(standby_daemons.count(i.first) == 1); |
7c673cae FG |
880 | } |
881 | ||
882 | for (const auto &i : mds_roles) { | |
883 | if (i.second == FS_CLUSTER_ID_NONE) { | |
11fdf7f2 | 884 | ceph_assert(standby_daemons.count(i.first) == 1); |
7c673cae | 885 | } else { |
11fdf7f2 TL |
886 | ceph_assert(filesystems.count(i.second) == 1); |
887 | ceph_assert(filesystems.at(i.second)->mds_map.mds_info.count(i.first) == 1); | |
7c673cae FG |
888 | } |
889 | } | |
890 | } | |
891 | ||
892 | void FSMap::promote( | |
893 | mds_gid_t standby_gid, | |
11fdf7f2 | 894 | Filesystem& filesystem, |
7c673cae FG |
895 | mds_rank_t assigned_rank) |
896 | { | |
11fdf7f2 | 897 | ceph_assert(gid_exists(standby_gid)); |
7c673cae FG |
898 | bool is_standby_replay = mds_roles.at(standby_gid) != FS_CLUSTER_ID_NONE; |
899 | if (!is_standby_replay) { | |
11fdf7f2 TL |
900 | ceph_assert(standby_daemons.count(standby_gid)); |
901 | ceph_assert(standby_daemons.at(standby_gid).state == MDSMap::STATE_STANDBY); | |
7c673cae FG |
902 | } |
903 | ||
11fdf7f2 | 904 | MDSMap &mds_map = filesystem.mds_map; |
7c673cae FG |
905 | |
906 | // Insert daemon state to Filesystem | |
907 | if (!is_standby_replay) { | |
908 | mds_map.mds_info[standby_gid] = standby_daemons.at(standby_gid); | |
909 | } else { | |
11fdf7f2 TL |
910 | ceph_assert(mds_map.mds_info.count(standby_gid)); |
911 | ceph_assert(mds_map.mds_info.at(standby_gid).state == MDSMap::STATE_STANDBY_REPLAY); | |
912 | ceph_assert(mds_map.mds_info.at(standby_gid).rank == assigned_rank); | |
7c673cae | 913 | } |
9f95a23c | 914 | auto& info = mds_map.mds_info[standby_gid]; |
7c673cae FG |
915 | |
916 | if (mds_map.stopped.erase(assigned_rank)) { | |
917 | // The cluster is being expanded with a stopped rank | |
918 | info.state = MDSMap::STATE_STARTING; | |
919 | } else if (!mds_map.is_in(assigned_rank)) { | |
920 | // The cluster is being expanded with a new rank | |
921 | info.state = MDSMap::STATE_CREATING; | |
922 | } else { | |
923 | // An existing rank is being assigned to a replacement | |
924 | info.state = MDSMap::STATE_REPLAY; | |
925 | mds_map.failed.erase(assigned_rank); | |
926 | } | |
927 | info.rank = assigned_rank; | |
928 | info.inc = epoch; | |
11fdf7f2 | 929 | mds_roles[standby_gid] = filesystem.fscid; |
7c673cae FG |
930 | |
931 | // Update the rank state in Filesystem | |
932 | mds_map.in.insert(assigned_rank); | |
933 | mds_map.up[assigned_rank] = standby_gid; | |
934 | ||
935 | // Remove from the list of standbys | |
936 | if (!is_standby_replay) { | |
937 | standby_daemons.erase(standby_gid); | |
938 | standby_epochs.erase(standby_gid); | |
939 | } | |
940 | ||
941 | // Indicate that Filesystem has been modified | |
942 | mds_map.epoch = epoch; | |
943 | } | |
944 | ||
945 | void FSMap::assign_standby_replay( | |
946 | const mds_gid_t standby_gid, | |
947 | const fs_cluster_id_t leader_ns, | |
948 | const mds_rank_t leader_rank) | |
949 | { | |
11fdf7f2 TL |
950 | ceph_assert(mds_roles.at(standby_gid) == FS_CLUSTER_ID_NONE); |
951 | ceph_assert(gid_exists(standby_gid)); | |
952 | ceph_assert(!gid_has_rank(standby_gid)); | |
953 | ceph_assert(standby_daemons.count(standby_gid)); | |
7c673cae FG |
954 | |
955 | // Insert to the filesystem | |
956 | auto fs = filesystems.at(leader_ns); | |
957 | fs->mds_map.mds_info[standby_gid] = standby_daemons.at(standby_gid); | |
958 | fs->mds_map.mds_info[standby_gid].rank = leader_rank; | |
959 | fs->mds_map.mds_info[standby_gid].state = MDSMap::STATE_STANDBY_REPLAY; | |
960 | mds_roles[standby_gid] = leader_ns; | |
961 | ||
962 | // Remove from the list of standbys | |
963 | standby_daemons.erase(standby_gid); | |
964 | standby_epochs.erase(standby_gid); | |
965 | ||
966 | // Indicate that Filesystem has been modified | |
967 | fs->mds_map.epoch = epoch; | |
968 | } | |
969 | ||
970 | void FSMap::erase(mds_gid_t who, epoch_t blacklist_epoch) | |
971 | { | |
972 | if (mds_roles.at(who) == FS_CLUSTER_ID_NONE) { | |
973 | standby_daemons.erase(who); | |
974 | standby_epochs.erase(who); | |
975 | } else { | |
976 | auto &fs = filesystems.at(mds_roles.at(who)); | |
977 | const auto &info = fs->mds_map.mds_info.at(who); | |
978 | if (info.state != MDSMap::STATE_STANDBY_REPLAY) { | |
979 | if (info.state == MDSMap::STATE_CREATING) { | |
980 | // If this gid didn't make it past CREATING, then forget | |
981 | // the rank ever existed so that next time it's handed out | |
982 | // to a gid it'll go back into CREATING. | |
983 | fs->mds_map.in.erase(info.rank); | |
984 | } else { | |
985 | // Put this rank into the failed list so that the next available | |
986 | // STANDBY will pick it up. | |
987 | fs->mds_map.failed.insert(info.rank); | |
988 | } | |
11fdf7f2 | 989 | ceph_assert(fs->mds_map.up.at(info.rank) == info.global_id); |
7c673cae FG |
990 | fs->mds_map.up.erase(info.rank); |
991 | } | |
992 | fs->mds_map.mds_info.erase(who); | |
993 | fs->mds_map.last_failure_osd_epoch = blacklist_epoch; | |
994 | fs->mds_map.epoch = epoch; | |
995 | } | |
996 | ||
997 | mds_roles.erase(who); | |
998 | } | |
999 | ||
1000 | void FSMap::damaged(mds_gid_t who, epoch_t blacklist_epoch) | |
1001 | { | |
11fdf7f2 | 1002 | ceph_assert(mds_roles.at(who) != FS_CLUSTER_ID_NONE); |
7c673cae FG |
1003 | auto fs = filesystems.at(mds_roles.at(who)); |
1004 | mds_rank_t rank = fs->mds_map.mds_info[who].rank; | |
1005 | ||
1006 | erase(who, blacklist_epoch); | |
1007 | fs->mds_map.failed.erase(rank); | |
1008 | fs->mds_map.damaged.insert(rank); | |
1009 | ||
11fdf7f2 | 1010 | ceph_assert(fs->mds_map.epoch == epoch); |
7c673cae FG |
1011 | } |
1012 | ||
1013 | /** | |
1014 | * Update to indicate that the rank `rank` is to be removed | |
1015 | * from the damaged list of the filesystem `fscid` | |
1016 | */ | |
1017 | bool FSMap::undamaged(const fs_cluster_id_t fscid, const mds_rank_t rank) | |
1018 | { | |
1019 | auto fs = filesystems.at(fscid); | |
1020 | ||
1021 | if (fs->mds_map.damaged.erase(rank)) { | |
1022 | fs->mds_map.failed.insert(rank); | |
1023 | fs->mds_map.epoch = epoch; | |
1024 | return true; | |
1025 | } else { | |
1026 | return false; | |
1027 | } | |
1028 | } | |
1029 | ||
1030 | void FSMap::insert(const MDSMap::mds_info_t &new_info) | |
1031 | { | |
11fdf7f2 TL |
1032 | ceph_assert(new_info.state == MDSMap::STATE_STANDBY); |
1033 | ceph_assert(new_info.rank == MDS_RANK_NONE); | |
7c673cae FG |
1034 | mds_roles[new_info.global_id] = FS_CLUSTER_ID_NONE; |
1035 | standby_daemons[new_info.global_id] = new_info; | |
1036 | standby_epochs[new_info.global_id] = epoch; | |
1037 | } | |
1038 | ||
9f95a23c | 1039 | std::vector<mds_gid_t> FSMap::stop(mds_gid_t who) |
7c673cae | 1040 | { |
11fdf7f2 | 1041 | ceph_assert(mds_roles.at(who) != FS_CLUSTER_ID_NONE); |
7c673cae FG |
1042 | auto fs = filesystems.at(mds_roles.at(who)); |
1043 | const auto &info = fs->mds_map.mds_info.at(who); | |
1044 | fs->mds_map.up.erase(info.rank); | |
1045 | fs->mds_map.in.erase(info.rank); | |
1046 | fs->mds_map.stopped.insert(info.rank); | |
1047 | ||
1048 | // Also drop any standby replays that were following this rank | |
9f95a23c | 1049 | std::vector<mds_gid_t> standbys; |
7c673cae FG |
1050 | for (const auto &i : fs->mds_map.mds_info) { |
1051 | const auto &other_gid = i.first; | |
1052 | const auto &other_info = i.second; | |
1053 | if (other_info.rank == info.rank | |
1054 | && other_info.state == MDSMap::STATE_STANDBY_REPLAY) { | |
1055 | standbys.push_back(other_gid); | |
1056 | erase(other_gid, 0); | |
1057 | } | |
1058 | } | |
1059 | ||
1060 | fs->mds_map.mds_info.erase(who); | |
1061 | mds_roles.erase(who); | |
1062 | ||
1063 | fs->mds_map.epoch = epoch; | |
1064 | ||
1065 | return standbys; | |
1066 | } | |
1067 | ||
1068 | ||
1069 | /** | |
1070 | * Given one of the following forms: | |
1071 | * <fs name>:<rank> | |
1072 | * <fs id>:<rank> | |
1073 | * <rank> | |
1074 | * | |
1075 | * Parse into a mds_role_t. The rank-only form is only valid | |
1076 | * if legacy_client_ns is set. | |
1077 | */ | |
1078 | int FSMap::parse_role( | |
11fdf7f2 | 1079 | std::string_view role_str, |
7c673cae FG |
1080 | mds_role_t *role, |
1081 | std::ostream &ss) const | |
1082 | { | |
1083 | size_t colon_pos = role_str.find(":"); | |
1084 | size_t rank_pos; | |
11fdf7f2 | 1085 | Filesystem::const_ref fs; |
7c673cae FG |
1086 | if (colon_pos == std::string::npos) { |
1087 | if (legacy_client_fscid == FS_CLUSTER_ID_NONE) { | |
1088 | ss << "No filesystem selected"; | |
1089 | return -ENOENT; | |
1090 | } | |
1091 | fs = get_filesystem(legacy_client_fscid); | |
1092 | rank_pos = 0; | |
1093 | } else { | |
1094 | if (parse_filesystem(role_str.substr(0, colon_pos), &fs) < 0) { | |
1095 | ss << "Invalid filesystem"; | |
1096 | return -ENOENT; | |
1097 | } | |
1098 | rank_pos = colon_pos+1; | |
1099 | } | |
1100 | ||
1101 | mds_rank_t rank; | |
1102 | std::string err; | |
94b18763 | 1103 | std::string rank_str(role_str.substr(rank_pos)); |
7c673cae FG |
1104 | long rank_i = strict_strtol(rank_str.c_str(), 10, &err); |
1105 | if (rank_i < 0 || !err.empty()) { | |
1106 | ss << "Invalid rank '" << rank_str << "'"; | |
1107 | return -EINVAL; | |
1108 | } else { | |
1109 | rank = rank_i; | |
1110 | } | |
1111 | ||
1112 | if (fs->mds_map.in.count(rank) == 0) { | |
1113 | ss << "Rank '" << rank << "' not found"; | |
1114 | return -ENOENT; | |
1115 | } | |
1116 | ||
1117 | *role = {fs->fscid, rank}; | |
1118 | ||
1119 | return 0; | |
1120 | } | |
9f95a23c TL |
1121 | |
1122 | bool FSMap::pool_in_use(int64_t poolid) const | |
1123 | { | |
1124 | for (auto const &i : filesystems) { | |
1125 | if (i.second->mds_map.is_data_pool(poolid) | |
1126 | || i.second->mds_map.metadata_pool == poolid) { | |
1127 | return true; | |
1128 | } | |
1129 | } | |
1130 | return false; | |
1131 | } | |
1132 | ||
1133 | void FSMap::erase_filesystem(fs_cluster_id_t fscid) | |
1134 | { | |
1135 | filesystems.erase(fscid); | |
1136 | for (auto& [gid, info] : standby_daemons) { | |
1137 | if (info.join_fscid == fscid) { | |
1138 | modify_daemon(gid, [](auto& info) { | |
1139 | info.join_fscid = FS_CLUSTER_ID_NONE; | |
1140 | }); | |
1141 | } | |
1142 | } | |
1143 | for (auto& p : filesystems) { | |
1144 | for (auto& [gid, info] : p.second->mds_map.get_mds_info()) { | |
1145 | if (info.join_fscid == fscid) { | |
1146 | modify_daemon(gid, [](auto& info) { | |
1147 | info.join_fscid = FS_CLUSTER_ID_NONE; | |
1148 | }); | |
1149 | } | |
1150 | } | |
1151 | } | |
1152 | } |