]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
2 | // vim: ts=8 sw=2 smarttab | |
3 | /* | |
4 | * Ceph - scalable distributed file system | |
5 | * | |
6 | * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net> | |
7 | * | |
8 | * This is free software; you can redistribute it and/or | |
9 | * modify it under the terms of the GNU Lesser General Public | |
10 | * License version 2.1, as published by the Free Software | |
11 | * Foundation. See file COPYING. | |
12 | * | |
13 | */ | |
14 | ||
f67539c2 | 15 | #include <ostream> |
7c673cae FG |
16 | |
17 | #include "FSMap.h" | |
18 | ||
11fdf7f2 | 19 | #include "common/StackStringStream.h" |
7c673cae | 20 | |
11fdf7f2 TL |
21 | #ifdef WITH_SEASTAR |
22 | #include "crimson/common/config_proxy.h" | |
23 | #else | |
24 | #include "common/config_proxy.h" | |
25 | #endif | |
26 | #include "global/global_context.h" | |
224ce89b WB |
27 | #include "mon/health_check.h" |
28 | ||
f67539c2 TL |
29 | using std::list; |
30 | using std::pair; | |
31 | using std::ostream; | |
32 | using std::string; | |
20effc67 | 33 | using std::string_view; |
f67539c2 TL |
34 | |
35 | using ceph::bufferlist; | |
36 | using ceph::Formatter; | |
37 | ||
38 | void ClusterInfo::encode(ceph::buffer::list &bl) const { | |
39 | ENCODE_START(1, 1, bl); | |
40 | encode(client_name, bl); | |
41 | encode(cluster_name, bl); | |
42 | encode(fs_name, bl); | |
43 | ENCODE_FINISH(bl); | |
44 | } | |
45 | ||
46 | void ClusterInfo::decode(ceph::buffer::list::const_iterator &iter) { | |
47 | DECODE_START(1, iter); | |
48 | decode(client_name, iter); | |
49 | decode(cluster_name, iter); | |
50 | decode(fs_name, iter); | |
51 | DECODE_FINISH(iter); | |
52 | } | |
53 | ||
54 | void ClusterInfo::dump(ceph::Formatter *f) const { | |
55 | f->dump_string("client_name", client_name); | |
56 | f->dump_string("cluster_name", cluster_name); | |
57 | f->dump_string("fs_name", fs_name); | |
58 | } | |
59 | ||
60 | void ClusterInfo::print(std::ostream& out) const { | |
61 | out << "[client_name=" << client_name << ", cluster_name=" << cluster_name | |
62 | << ", fs_name=" << fs_name << "]" << std::endl; | |
63 | } | |
64 | ||
65 | void Peer::encode(ceph::buffer::list &bl) const { | |
66 | ENCODE_START(1, 1, bl); | |
67 | encode(uuid, bl); | |
68 | encode(remote, bl); | |
69 | ENCODE_FINISH(bl); | |
70 | } | |
71 | ||
72 | void Peer::decode(ceph::buffer::list::const_iterator &iter) { | |
73 | DECODE_START(1, iter); | |
74 | decode(uuid, iter); | |
75 | decode(remote, iter); | |
76 | DECODE_FINISH(iter); | |
77 | } | |
78 | ||
79 | void Peer::dump(ceph::Formatter *f) const { | |
80 | f->open_object_section(uuid); | |
81 | f->dump_object("remote", remote); | |
82 | f->close_section(); | |
83 | } | |
84 | ||
85 | void Peer::print(std::ostream& out) const { | |
86 | out << "[uuid=" << uuid << ", remote=" << remote << "]" << std::endl; | |
87 | } | |
88 | ||
89 | void MirrorInfo::encode(ceph::buffer::list &bl) const { | |
90 | ENCODE_START(1, 1, bl); | |
91 | encode(mirrored, bl); | |
92 | encode(peers, bl); | |
93 | ENCODE_FINISH(bl); | |
94 | } | |
95 | ||
96 | void MirrorInfo::decode(ceph::buffer::list::const_iterator &iter) { | |
97 | DECODE_START(1, iter); | |
98 | decode(mirrored, iter); | |
99 | decode(peers, iter); | |
100 | DECODE_FINISH(iter); | |
101 | } | |
102 | ||
103 | void MirrorInfo::dump(ceph::Formatter *f) const { | |
104 | f->open_object_section("peers"); | |
105 | for (auto &peer : peers) { | |
106 | peer.dump(f); | |
107 | } | |
108 | f->close_section(); // peers | |
109 | } | |
110 | ||
111 | void MirrorInfo::print(std::ostream& out) const { | |
112 | out << "[peers=" << peers << "]" << std::endl; | |
113 | } | |
7c673cae FG |
114 | |
115 | void Filesystem::dump(Formatter *f) const | |
116 | { | |
117 | f->open_object_section("mdsmap"); | |
118 | mds_map.dump(f); | |
119 | f->close_section(); | |
120 | f->dump_int("id", fscid); | |
f67539c2 TL |
121 | if (mirror_info.is_mirrored()) { |
122 | f->open_object_section("mirror_info"); | |
123 | mirror_info.dump(f); | |
124 | f->close_section(); // mirror_info | |
125 | } | |
7c673cae FG |
126 | } |
127 | ||
128 | void FSMap::dump(Formatter *f) const | |
129 | { | |
130 | f->dump_int("epoch", epoch); | |
11fdf7f2 TL |
131 | // Use 'default' naming to match 'set-default' CLI |
132 | f->dump_int("default_fscid", legacy_client_fscid); | |
7c673cae FG |
133 | |
134 | f->open_object_section("compat"); | |
522d829b | 135 | default_compat.dump(f); |
7c673cae FG |
136 | f->close_section(); |
137 | ||
138 | f->open_object_section("feature_flags"); | |
139 | f->dump_bool("enable_multiple", enable_multiple); | |
140 | f->dump_bool("ever_enabled_multiple", ever_enabled_multiple); | |
141 | f->close_section(); | |
142 | ||
143 | f->open_array_section("standbys"); | |
9f95a23c | 144 | for (const auto& [gid, info] : standby_daemons) { |
7c673cae | 145 | f->open_object_section("info"); |
9f95a23c TL |
146 | info.dump(f); |
147 | f->dump_int("epoch", standby_epochs.at(gid)); | |
7c673cae FG |
148 | f->close_section(); |
149 | } | |
150 | f->close_section(); | |
151 | ||
152 | f->open_array_section("filesystems"); | |
153 | for (const auto &fs : filesystems) { | |
154 | f->open_object_section("filesystem"); | |
155 | fs.second->dump(f); | |
156 | f->close_section(); | |
157 | } | |
158 | f->close_section(); | |
159 | } | |
160 | ||
9f95a23c TL |
161 | FSMap &FSMap::operator=(const FSMap &rhs) |
162 | { | |
163 | epoch = rhs.epoch; | |
164 | next_filesystem_id = rhs.next_filesystem_id; | |
165 | legacy_client_fscid = rhs.legacy_client_fscid; | |
522d829b | 166 | default_compat = rhs.default_compat; |
9f95a23c TL |
167 | enable_multiple = rhs.enable_multiple; |
168 | mds_roles = rhs.mds_roles; | |
169 | standby_daemons = rhs.standby_daemons; | |
170 | standby_epochs = rhs.standby_epochs; | |
171 | ||
172 | filesystems.clear(); | |
173 | for (const auto &i : rhs.filesystems) { | |
174 | const auto &fs = i.second; | |
175 | filesystems[fs->fscid] = std::make_shared<Filesystem>(*fs); | |
176 | } | |
177 | ||
178 | return *this; | |
179 | } | |
180 | ||
181 | void FSMap::generate_test_instances(std::list<FSMap*>& ls) | |
7c673cae FG |
182 | { |
183 | FSMap *m = new FSMap(); | |
184 | ||
185 | std::list<MDSMap*> mds_map_instances; | |
186 | MDSMap::generate_test_instances(mds_map_instances); | |
187 | ||
188 | int k = 20; | |
189 | for (auto i : mds_map_instances) { | |
11fdf7f2 | 190 | auto fs = Filesystem::create(); |
7c673cae FG |
191 | fs->fscid = k++; |
192 | fs->mds_map = *i; | |
193 | delete i; | |
194 | m->filesystems[fs->fscid] = fs; | |
195 | } | |
196 | mds_map_instances.clear(); | |
197 | ||
198 | ls.push_back(m); | |
199 | } | |
200 | ||
201 | void FSMap::print(ostream& out) const | |
202 | { | |
203 | out << "e" << epoch << std::endl; | |
204 | out << "enable_multiple, ever_enabled_multiple: " << enable_multiple << "," | |
205 | << ever_enabled_multiple << std::endl; | |
522d829b | 206 | out << "default compat: " << default_compat << std::endl; |
7c673cae FG |
207 | out << "legacy client fscid: " << legacy_client_fscid << std::endl; |
208 | out << " " << std::endl; | |
209 | ||
210 | if (filesystems.empty()) { | |
211 | out << "No filesystems configured" << std::endl; | |
7c673cae FG |
212 | } |
213 | ||
11fdf7f2 TL |
214 | for (const auto& p : filesystems) { |
215 | p.second->print(out); | |
7c673cae FG |
216 | out << " " << std::endl << " " << std::endl; // Space out a bit |
217 | } | |
218 | ||
219 | if (!standby_daemons.empty()) { | |
220 | out << "Standby daemons:" << std::endl << " " << std::endl; | |
221 | } | |
222 | ||
9f95a23c TL |
223 | for (const auto& p : standby_daemons) { |
224 | out << p.second << std::endl; | |
7c673cae FG |
225 | } |
226 | } | |
227 | ||
f67539c2 TL |
228 | void FSMap::print_daemon_summary(ostream& out) const |
229 | { | |
230 | // this appears in the "services:" section of "ceph status" | |
231 | int num_up = 0, num_in = 0, num_failed = 0; | |
232 | int num_standby_replay = 0; | |
233 | for (auto& [fscid, fs] : filesystems) { | |
234 | num_up += fs->mds_map.get_num_up_mds(); | |
235 | num_in += fs->mds_map.get_num_in_mds(); | |
236 | num_failed += fs->mds_map.get_num_failed_mds(); | |
237 | num_standby_replay += fs->mds_map.get_num_standby_replay_mds(); | |
238 | } | |
239 | int num_standby = standby_daemons.size(); | |
240 | out << num_up << "/" << num_in << " daemons up"; | |
241 | if (num_failed) { | |
242 | out << " (" << num_failed << " failed)"; | |
243 | } | |
244 | if (num_standby) { | |
245 | out << ", " << num_standby << " standby"; | |
246 | } | |
247 | if (num_standby_replay) { | |
248 | out << ", " << num_standby_replay << " hot standby"; | |
249 | } | |
250 | } | |
251 | ||
252 | void FSMap::print_fs_summary(ostream& out) const | |
253 | { | |
254 | // this appears in the "data:" section of "ceph status" | |
255 | if (!filesystems.empty()) { | |
256 | int num_failed = 0, num_recovering = 0, num_stopped = 0, num_healthy = 0; | |
257 | int num_damaged = 0; | |
258 | for (auto& [fscid, fs] : filesystems) { | |
259 | if (fs->mds_map.is_any_damaged()) { | |
260 | ++num_damaged; | |
261 | } | |
262 | if (fs->mds_map.is_any_failed()) { | |
263 | ++num_failed; | |
264 | } else if (fs->mds_map.is_degraded()) { | |
265 | ++num_recovering; | |
266 | } else if (fs->mds_map.get_max_mds() == 0) { | |
267 | ++num_stopped; | |
268 | } else { | |
269 | ++num_healthy; | |
270 | } | |
271 | } | |
272 | out << " volumes: " | |
273 | << num_healthy << "/" << filesystems.size() << " healthy"; | |
274 | if (num_recovering) { | |
275 | out << ", " << num_recovering << " recovering"; | |
276 | } | |
277 | if (num_failed) { | |
278 | out << ", " << num_failed << " failed"; | |
279 | } | |
280 | if (num_stopped) { | |
281 | out << ", " << num_stopped << " stopped"; | |
282 | } | |
283 | if (num_damaged) { | |
284 | out << "; " << num_damaged << " damaged"; | |
285 | } | |
286 | out << "\n"; | |
287 | } | |
288 | } | |
289 | ||
7c673cae FG |
290 | void FSMap::print_summary(Formatter *f, ostream *out) const |
291 | { | |
7c673cae FG |
292 | if (f) { |
293 | f->dump_unsigned("epoch", get_epoch()); | |
11fdf7f2 TL |
294 | for (const auto &p : filesystems) { |
295 | auto& fs = p.second; | |
7c673cae FG |
296 | f->dump_unsigned("id", fs->fscid); |
297 | f->dump_unsigned("up", fs->mds_map.up.size()); | |
298 | f->dump_unsigned("in", fs->mds_map.in.size()); | |
299 | f->dump_unsigned("max", fs->mds_map.max_mds); | |
300 | } | |
301 | } else { | |
11fdf7f2 TL |
302 | auto count = filesystems.size(); |
303 | if (count <= 3) { | |
304 | bool first = true; | |
305 | for (const auto& p : filesystems) { | |
306 | const auto& fs = p.second; | |
307 | if (!first) { | |
308 | *out << " "; | |
309 | } | |
310 | if (fs->mds_map.is_degraded()) { | |
311 | *out << fs->mds_map.fs_name << ":" << fs->mds_map.up.size() << "/" << fs->mds_map.in.size(); | |
312 | } else { | |
313 | *out << fs->mds_map.fs_name << ":" << fs->mds_map.in.size(); | |
314 | } | |
315 | first = false; | |
316 | } | |
317 | } else { | |
318 | *out << count << " fs"; | |
319 | unsigned degraded = 0; | |
320 | CachedStackStringStream css; | |
321 | *css << " (degraded: "; | |
322 | for (const auto& p : filesystems) { | |
323 | const auto& fs = p.second; | |
324 | if (fs->mds_map.is_degraded()) { | |
325 | degraded++; | |
326 | if (degraded <= 3) { | |
327 | *css << fs->mds_map.fs_name << ":" << fs->mds_map.up.size() << "/" << fs->mds_map.in.size(); | |
328 | } | |
329 | } | |
330 | } | |
331 | if (degraded > 0) { | |
332 | if (degraded <= 3) { | |
333 | *css << ")"; | |
334 | *out << css->strv(); | |
335 | } else { | |
336 | *out << " (degraded: " << degraded << " fs)"; | |
337 | } | |
338 | } | |
7c673cae FG |
339 | } |
340 | } | |
341 | ||
342 | if (f) { | |
343 | f->open_array_section("by_rank"); | |
344 | } | |
345 | ||
11fdf7f2 TL |
346 | std::map<MDSMap::DaemonState,unsigned> by_state; |
347 | std::map<mds_role_t, std::pair<MDSMap::DaemonState, std::string>> by_rank; | |
348 | by_state[MDSMap::DaemonState::STATE_STANDBY] = standby_daemons.size(); | |
349 | for (const auto& [gid, fscid] : mds_roles) { | |
350 | if (fscid == FS_CLUSTER_ID_NONE) | |
351 | continue; | |
352 | ||
353 | const auto& info = filesystems.at(fscid)->mds_map.get_info_gid(gid); | |
354 | auto s = std::string(ceph_mds_state_name(info.state)); | |
7c673cae FG |
355 | if (info.laggy()) { |
356 | s += "(laggy or crashed)"; | |
357 | } | |
358 | ||
11fdf7f2 TL |
359 | if (f) { |
360 | f->open_object_section("mds"); | |
361 | f->dump_unsigned("filesystem_id", fscid); | |
362 | f->dump_unsigned("rank", info.rank); | |
363 | f->dump_string("name", info.name); | |
364 | f->dump_string("status", s); | |
365 | f->dump_unsigned("gid", gid); | |
366 | f->close_section(); | |
367 | } else if (info.state != MDSMap::DaemonState::STATE_STANDBY_REPLAY) { | |
368 | by_rank[mds_role_t(fscid, info.rank)] = std::make_pair(info.state, info.name + "=" + s); | |
7c673cae | 369 | } |
11fdf7f2 | 370 | by_state[info.state]++; |
7c673cae FG |
371 | } |
372 | ||
373 | if (f) { | |
374 | f->close_section(); | |
375 | } else { | |
11fdf7f2 | 376 | if (0 < by_rank.size() && by_rank.size() < 5) { |
7c673cae FG |
377 | if (filesystems.size() > 1) { |
378 | // Disambiguate filesystems | |
379 | std::map<std::string, std::string> pretty; | |
11fdf7f2 TL |
380 | for (const auto& [role,status] : by_rank) { |
381 | const auto &fs_name = filesystems.at(role.fscid)->mds_map.fs_name; | |
382 | CachedStackStringStream css; | |
383 | *css << fs_name << ":" << role.rank; | |
384 | pretty.emplace(std::piecewise_construct, std::forward_as_tuple(css->strv()), std::forward_as_tuple(status.second)); | |
385 | --by_state[status.first]; /* already printed! */ | |
7c673cae FG |
386 | } |
387 | *out << " " << pretty; | |
388 | } else { | |
389 | // Omit FSCID in output when only one filesystem exists | |
390 | std::map<mds_rank_t, std::string> shortened; | |
11fdf7f2 TL |
391 | for (const auto& [role,status] : by_rank) { |
392 | shortened[role.rank] = status.second; | |
393 | --by_state[status.first]; /* already printed! */ | |
7c673cae FG |
394 | } |
395 | *out << " " << shortened; | |
396 | } | |
397 | } | |
11fdf7f2 TL |
398 | for (const auto& [state, count] : by_state) { |
399 | if (count > 0) { | |
400 | auto s = std::string_view(ceph_mds_state_name(state)); | |
401 | *out << " " << count << " " << s; | |
402 | } | |
403 | } | |
7c673cae FG |
404 | } |
405 | ||
11fdf7f2 TL |
406 | if (f) { |
407 | const auto state = MDSMap::DaemonState::STATE_STANDBY; | |
408 | auto&& name = ceph_mds_state_name(state); | |
409 | auto count = standby_daemons.size(); | |
410 | f->dump_unsigned(name, count); | |
7c673cae FG |
411 | } |
412 | ||
413 | size_t failed = 0; | |
414 | size_t damaged = 0; | |
11fdf7f2 TL |
415 | for (const auto& p : filesystems) { |
416 | auto& fs = p.second; | |
7c673cae FG |
417 | failed += fs->mds_map.failed.size(); |
418 | damaged += fs->mds_map.damaged.size(); | |
419 | } | |
420 | ||
421 | if (failed > 0) { | |
422 | if (f) { | |
423 | f->dump_unsigned("failed", failed); | |
424 | } else { | |
425 | *out << ", " << failed << " failed"; | |
426 | } | |
427 | } | |
428 | ||
429 | if (damaged > 0) { | |
430 | if (f) { | |
431 | f->dump_unsigned("damaged", damaged); | |
432 | } else { | |
433 | *out << ", " << damaged << " damaged"; | |
434 | } | |
435 | } | |
436 | //if (stopped.size()) | |
437 | //out << ", " << stopped.size() << " stopped"; | |
438 | } | |
439 | ||
9f95a23c TL |
440 | mds_gid_t Filesystem::get_standby_replay(mds_gid_t who) const |
441 | { | |
442 | for (const auto &i : mds_map.mds_info) { | |
443 | const auto &info = i.second; | |
444 | if (info.state == MDSMap::STATE_STANDBY_REPLAY | |
445 | && info.rank == mds_map.mds_info.at(who).rank) { | |
446 | return info.global_id; | |
447 | } | |
448 | } | |
449 | return MDS_GID_NONE; | |
450 | } | |
7c673cae | 451 | |
11fdf7f2 | 452 | Filesystem::ref FSMap::create_filesystem(std::string_view name, |
522d829b | 453 | int64_t metadata_pool, int64_t data_pool, uint64_t features, |
20effc67 | 454 | fs_cluster_id_t fscid, bool recover) |
7c673cae | 455 | { |
11fdf7f2 | 456 | auto fs = Filesystem::create(); |
28e407b8 | 457 | fs->mds_map.epoch = epoch; |
11fdf7f2 | 458 | fs->mds_map.fs_name = name; |
31f18b77 | 459 | fs->mds_map.data_pools.push_back(data_pool); |
7c673cae FG |
460 | fs->mds_map.metadata_pool = metadata_pool; |
461 | fs->mds_map.cas_pool = -1; | |
522d829b | 462 | fs->mds_map.compat = default_compat; |
7c673cae FG |
463 | fs->mds_map.created = ceph_clock_now(); |
464 | fs->mds_map.modified = ceph_clock_now(); | |
7c673cae | 465 | fs->mds_map.enabled = true; |
522d829b TL |
466 | if (fscid == FS_CLUSTER_ID_NONE) { |
467 | fs->fscid = next_filesystem_id++; | |
468 | } else { | |
469 | fs->fscid = fscid; | |
470 | next_filesystem_id = std::max(fscid, (fs_cluster_id_t)next_filesystem_id) + 1; | |
471 | } | |
472 | ||
20effc67 TL |
473 | if (recover) { |
474 | // Populate rank 0 as existing (so don't go into CREATING) | |
475 | // but failed (so that next available MDS is assigned the rank) | |
476 | fs->mds_map.in.insert(mds_rank_t(0)); | |
477 | fs->mds_map.failed.insert(mds_rank_t(0)); | |
478 | ||
479 | fs->mds_map.set_flag(CEPH_MDSMAP_NOT_JOINABLE); | |
480 | } | |
481 | ||
522d829b TL |
482 | // File system's ID can be FS_CLUSTER_ID_ANONYMOUS if we're recovering |
483 | // a legacy file system by passing FS_CLUSTER_ID_ANONYMOUS as the desired | |
484 | // file system ID | |
485 | if (fscid != FS_CLUSTER_ID_ANONYMOUS) { | |
486 | // ANONYMOUS is only for upgrades from legacy mdsmaps, we should | |
487 | // have initialized next_filesystem_id such that it's never used here. | |
488 | ceph_assert(fs->fscid != FS_CLUSTER_ID_ANONYMOUS); | |
489 | } | |
7c673cae FG |
490 | filesystems[fs->fscid] = fs; |
491 | ||
492 | // Created first filesystem? Set it as the one | |
493 | // for legacy clients to use | |
494 | if (filesystems.size() == 1) { | |
495 | legacy_client_fscid = fs->fscid; | |
496 | } | |
11fdf7f2 TL |
497 | |
498 | return fs; | |
7c673cae FG |
499 | } |
500 | ||
9f95a23c TL |
501 | Filesystem::const_ref FSMap::get_filesystem(std::string_view name) const |
502 | { | |
503 | for (const auto& p : filesystems) { | |
504 | if (p.second->mds_map.fs_name == name) { | |
505 | return p.second; | |
506 | } | |
507 | } | |
508 | return nullptr; | |
509 | } | |
510 | ||
511 | std::vector<Filesystem::const_ref> FSMap::get_filesystems(void) const | |
512 | { | |
513 | std::vector<Filesystem::const_ref> ret; | |
514 | for (const auto& p : filesystems) { | |
515 | ret.push_back(p.second); | |
516 | } | |
517 | return ret; | |
518 | } | |
519 | ||
7c673cae FG |
520 | void FSMap::reset_filesystem(fs_cluster_id_t fscid) |
521 | { | |
522 | auto fs = get_filesystem(fscid); | |
11fdf7f2 | 523 | auto new_fs = Filesystem::create(); |
7c673cae FG |
524 | |
525 | // Populate rank 0 as existing (so don't go into CREATING) | |
526 | // but failed (so that next available MDS is assigned the rank) | |
527 | new_fs->mds_map.in.insert(mds_rank_t(0)); | |
528 | new_fs->mds_map.failed.insert(mds_rank_t(0)); | |
529 | ||
530 | // Carry forward what makes sense | |
531 | new_fs->fscid = fs->fscid; | |
532 | new_fs->mds_map.inline_data_enabled = fs->mds_map.inline_data_enabled; | |
7c673cae FG |
533 | new_fs->mds_map.data_pools = fs->mds_map.data_pools; |
534 | new_fs->mds_map.metadata_pool = fs->mds_map.metadata_pool; | |
535 | new_fs->mds_map.cas_pool = fs->mds_map.cas_pool; | |
536 | new_fs->mds_map.fs_name = fs->mds_map.fs_name; | |
522d829b | 537 | new_fs->mds_map.compat = default_compat; |
7c673cae FG |
538 | new_fs->mds_map.created = ceph_clock_now(); |
539 | new_fs->mds_map.modified = ceph_clock_now(); | |
7c673cae FG |
540 | new_fs->mds_map.standby_count_wanted = fs->mds_map.standby_count_wanted; |
541 | new_fs->mds_map.enabled = true; | |
542 | ||
c07f9fc5 FG |
543 | // Remember mds ranks that have ever started. (They should load old inotable |
544 | // instead of creating new one if they start again.) | |
545 | new_fs->mds_map.stopped.insert(fs->mds_map.in.begin(), fs->mds_map.in.end()); | |
546 | new_fs->mds_map.stopped.insert(fs->mds_map.stopped.begin(), fs->mds_map.stopped.end()); | |
547 | new_fs->mds_map.stopped.erase(mds_rank_t(0)); | |
548 | ||
7c673cae FG |
549 | // Persist the new FSMap |
550 | filesystems[new_fs->fscid] = new_fs; | |
551 | } | |
552 | ||
553 | void FSMap::get_health(list<pair<health_status_t,string> >& summary, | |
554 | list<pair<health_status_t,string> > *detail) const | |
555 | { | |
556 | mds_rank_t standby_count_wanted = 0; | |
557 | for (const auto &i : filesystems) { | |
558 | const auto &fs = i.second; | |
559 | ||
560 | // TODO: move get_health up into here so that we can qualify | |
561 | // all the messages with what filesystem they're talking about | |
562 | fs->mds_map.get_health(summary, detail); | |
563 | ||
564 | standby_count_wanted = std::max(standby_count_wanted, fs->mds_map.get_standby_count_wanted((mds_rank_t)standby_daemons.size())); | |
565 | } | |
566 | ||
567 | if (standby_count_wanted) { | |
f67539c2 TL |
568 | CachedStackStringStream css; |
569 | *css << "insufficient standby daemons available: have " << standby_daemons.size() << "; want " << standby_count_wanted << " more"; | |
570 | summary.push_back(make_pair(HEALTH_WARN, css->str())); | |
7c673cae FG |
571 | } |
572 | } | |
573 | ||
574 | bool FSMap::check_health(void) | |
575 | { | |
576 | bool changed = false; | |
577 | for (auto &i : filesystems) { | |
578 | changed |= i.second->mds_map.check_health((mds_rank_t)standby_daemons.size()); | |
579 | } | |
580 | return changed; | |
581 | } | |
582 | ||
224ce89b WB |
583 | void FSMap::get_health_checks(health_check_map_t *checks) const |
584 | { | |
585 | mds_rank_t standby_count_wanted = 0; | |
586 | for (const auto &i : filesystems) { | |
587 | const auto &fs = i.second; | |
588 | health_check_map_t fschecks; | |
d2e6a577 | 589 | |
224ce89b | 590 | fs->mds_map.get_health_checks(&fschecks); |
d2e6a577 FG |
591 | |
592 | // Some of the failed ranks might be transient (i.e. there are standbys | |
593 | // ready to replace them). We will report only on "stuck" failed, i.e. | |
594 | // ranks which are failed and have no standby replacement available. | |
595 | std::set<mds_rank_t> stuck_failed; | |
596 | ||
597 | for (const auto &rank : fs->mds_map.failed) { | |
9f95a23c TL |
598 | auto rep_info = find_replacement_for({fs->fscid, rank}); |
599 | if (!rep_info) { | |
d2e6a577 FG |
600 | stuck_failed.insert(rank); |
601 | } | |
602 | } | |
603 | ||
604 | // FS_WITH_FAILED_MDS | |
605 | if (!stuck_failed.empty()) { | |
606 | health_check_t& fscheck = checks->get_or_add( | |
607 | "FS_WITH_FAILED_MDS", HEALTH_WARN, | |
9f95a23c | 608 | "%num% filesystem%plurals% %hasorhave% a failed mds daemon", 1); |
f67539c2 TL |
609 | CachedStackStringStream css; |
610 | *css << "fs " << fs->mds_map.fs_name << " has " << stuck_failed.size() | |
d2e6a577 | 611 | << " failed mds" << (stuck_failed.size() > 1 ? "s" : ""); |
f67539c2 | 612 | fscheck.detail.push_back(css->str()); } |
d2e6a577 | 613 | |
224ce89b WB |
614 | checks->merge(fschecks); |
615 | standby_count_wanted = std::max( | |
616 | standby_count_wanted, | |
617 | fs->mds_map.get_standby_count_wanted((mds_rank_t)standby_daemons.size())); | |
618 | } | |
619 | ||
620 | // MDS_INSUFFICIENT_STANDBY | |
621 | if (standby_count_wanted) { | |
f67539c2 TL |
622 | CachedStackStringStream css1, css2; |
623 | *css1 << "insufficient standby MDS daemons available"; | |
624 | auto& d = checks->get_or_add("MDS_INSUFFICIENT_STANDBY", HEALTH_WARN, css1->str(), 1); | |
625 | *css2 << "have " << standby_daemons.size() << "; want " << standby_count_wanted | |
626 | << " more"; | |
627 | d.detail.push_back(css2->str()); | |
224ce89b WB |
628 | } |
629 | } | |
630 | ||
9f95a23c TL |
631 | void FSMap::encode(bufferlist& bl, uint64_t features) const |
632 | { | |
522d829b | 633 | ENCODE_START(STRUCT_VERSION, 6, bl); |
9f95a23c TL |
634 | encode(epoch, bl); |
635 | encode(next_filesystem_id, bl); | |
636 | encode(legacy_client_fscid, bl); | |
522d829b | 637 | encode(default_compat, bl); |
9f95a23c TL |
638 | encode(enable_multiple, bl); |
639 | { | |
640 | std::vector<Filesystem::ref> v; | |
641 | v.reserve(filesystems.size()); | |
642 | for (auto& p : filesystems) v.emplace_back(p.second); | |
643 | encode(v, bl, features); | |
7c673cae | 644 | } |
9f95a23c TL |
645 | encode(mds_roles, bl); |
646 | encode(standby_daemons, bl, features); | |
647 | encode(standby_epochs, bl); | |
648 | encode(ever_enabled_multiple, bl); | |
649 | ENCODE_FINISH(bl); | |
7c673cae FG |
650 | } |
651 | ||
11fdf7f2 | 652 | void FSMap::decode(bufferlist::const_iterator& p) |
7c673cae | 653 | { |
a4b75251 | 654 | struct_version = 0; |
522d829b TL |
655 | DECODE_START(STRUCT_VERSION, p); |
656 | DECODE_OLDEST(7); | |
657 | struct_version = struct_v; | |
f67539c2 TL |
658 | decode(epoch, p); |
659 | decode(next_filesystem_id, p); | |
660 | decode(legacy_client_fscid, p); | |
522d829b | 661 | decode(default_compat, p); |
f67539c2 TL |
662 | decode(enable_multiple, p); |
663 | { | |
664 | std::vector<Filesystem::ref> v; | |
665 | decode(v, p); | |
7c673cae | 666 | filesystems.clear(); |
f67539c2 TL |
667 | for (auto& ref : v) { |
668 | auto em = filesystems.emplace(std::piecewise_construct, std::forward_as_tuple(ref->fscid), std::forward_as_tuple(std::move(ref))); | |
669 | ceph_assert(em.second); | |
7c673cae FG |
670 | } |
671 | } | |
f67539c2 TL |
672 | decode(mds_roles, p); |
673 | decode(standby_daemons, p); | |
674 | decode(standby_epochs, p); | |
675 | if (struct_v >= 7) { | |
676 | decode(ever_enabled_multiple, p); | |
677 | } | |
7c673cae FG |
678 | DECODE_FINISH(p); |
679 | } | |
680 | ||
11fdf7f2 | 681 | void FSMap::sanitize(const std::function<bool(int64_t pool)>& pool_exists) |
3efd9988 FG |
682 | { |
683 | for (auto &fs : filesystems) { | |
684 | fs.second->mds_map.sanitize(pool_exists); | |
685 | } | |
686 | } | |
7c673cae FG |
687 | |
688 | void Filesystem::encode(bufferlist& bl, uint64_t features) const | |
689 | { | |
f67539c2 | 690 | ENCODE_START(2, 1, bl); |
11fdf7f2 | 691 | encode(fscid, bl); |
7c673cae FG |
692 | bufferlist mdsmap_bl; |
693 | mds_map.encode(mdsmap_bl, features); | |
11fdf7f2 | 694 | encode(mdsmap_bl, bl); |
f67539c2 | 695 | encode(mirror_info, bl); |
7c673cae FG |
696 | ENCODE_FINISH(bl); |
697 | } | |
698 | ||
11fdf7f2 | 699 | void Filesystem::decode(bufferlist::const_iterator& p) |
7c673cae | 700 | { |
f67539c2 | 701 | DECODE_START(2, p); |
11fdf7f2 | 702 | decode(fscid, p); |
7c673cae | 703 | bufferlist mdsmap_bl; |
11fdf7f2 TL |
704 | decode(mdsmap_bl, p); |
705 | auto mdsmap_bl_iter = mdsmap_bl.cbegin(); | |
7c673cae | 706 | mds_map.decode(mdsmap_bl_iter); |
f67539c2 TL |
707 | if (struct_v >= 2) { |
708 | decode(mirror_info, p); | |
709 | } | |
7c673cae FG |
710 | DECODE_FINISH(p); |
711 | } | |
712 | ||
713 | int FSMap::parse_filesystem( | |
11fdf7f2 TL |
714 | std::string_view ns_str, |
715 | Filesystem::const_ref* result | |
7c673cae FG |
716 | ) const |
717 | { | |
718 | std::string ns_err; | |
94b18763 FG |
719 | std::string s(ns_str); |
720 | fs_cluster_id_t fscid = strict_strtol(s.c_str(), 10, &ns_err); | |
7c673cae FG |
721 | if (!ns_err.empty() || filesystems.count(fscid) == 0) { |
722 | for (auto &fs : filesystems) { | |
94b18763 | 723 | if (fs.second->mds_map.fs_name == s) { |
7c673cae FG |
724 | *result = std::const_pointer_cast<const Filesystem>(fs.second); |
725 | return 0; | |
726 | } | |
727 | } | |
f67539c2 | 728 | return -CEPHFS_ENOENT; |
7c673cae FG |
729 | } else { |
730 | *result = get_filesystem(fscid); | |
731 | return 0; | |
732 | } | |
733 | } | |
734 | ||
735 | void Filesystem::print(std::ostream &out) const | |
736 | { | |
737 | out << "Filesystem '" << mds_map.fs_name | |
738 | << "' (" << fscid << ")" << std::endl; | |
739 | mds_map.print(out); | |
f67539c2 TL |
740 | if (mirror_info.is_mirrored()) { |
741 | mirror_info.print(out); | |
742 | } | |
7c673cae FG |
743 | } |
744 | ||
9f95a23c | 745 | bool FSMap::is_any_degraded() const |
7c673cae | 746 | { |
9f95a23c TL |
747 | for (auto& i : filesystems) { |
748 | if (i.second->mds_map.is_degraded()) { | |
749 | return true; | |
750 | } | |
751 | } | |
752 | return false; | |
753 | } | |
754 | ||
755 | std::map<mds_gid_t, MDSMap::mds_info_t> FSMap::get_mds_info() const | |
756 | { | |
757 | std::map<mds_gid_t, mds_info_t> result; | |
758 | for (const auto &i : standby_daemons) { | |
759 | result[i.first] = i.second; | |
760 | } | |
761 | ||
762 | for (const auto &i : filesystems) { | |
763 | const auto &fs_info = i.second->mds_map.get_mds_info(); | |
764 | for (const auto &j : fs_info) { | |
765 | result[j.first] = j.second; | |
766 | } | |
767 | } | |
768 | ||
769 | return result; | |
770 | } | |
771 | ||
522d829b | 772 | const MDSMap::mds_info_t* FSMap::get_available_standby(const Filesystem& fs) const |
9f95a23c | 773 | { |
522d829b | 774 | const bool upgradeable = fs.is_upgradeable(); |
9f95a23c | 775 | const mds_info_t* who = nullptr; |
11fdf7f2 TL |
776 | for (const auto& [gid, info] : standby_daemons) { |
777 | ceph_assert(info.rank == MDS_RANK_NONE); | |
778 | ceph_assert(info.state == MDSMap::STATE_STANDBY); | |
7c673cae | 779 | |
11fdf7f2 | 780 | if (info.laggy() || info.is_frozen()) { |
7c673cae | 781 | continue; |
522d829b TL |
782 | } else if (!info.compat.writeable(fs.mds_map.compat)) { |
783 | /* standby is not compatible with this fs */ | |
784 | continue; | |
785 | } else if (!upgradeable && !fs.mds_map.compat.writeable(info.compat)) { | |
786 | /* promotion would change fs.mds_map.compat and we're not upgradeable */ | |
787 | continue; | |
7c673cae FG |
788 | } |
789 | ||
522d829b | 790 | if (info.join_fscid == fs.fscid) { |
9f95a23c TL |
791 | who = &info; |
792 | break; | |
793 | } else if (info.join_fscid == FS_CLUSTER_ID_NONE) { | |
794 | who = &info; /* vanilla standby */ | |
795 | } else if (who == nullptr) { | |
796 | who = &info; /* standby for another fs, last resort */ | |
797 | } | |
798 | } | |
799 | return who; | |
800 | } | |
801 | ||
802 | mds_gid_t FSMap::find_mds_gid_by_name(std::string_view s) const | |
803 | { | |
804 | const auto info = get_mds_info(); | |
805 | for (const auto &p : info) { | |
806 | if (p.second.name == s) { | |
807 | return p.first; | |
808 | } | |
7c673cae | 809 | } |
11fdf7f2 | 810 | return MDS_GID_NONE; |
7c673cae FG |
811 | } |
812 | ||
9f95a23c TL |
813 | const MDSMap::mds_info_t* FSMap::find_by_name(std::string_view name) const |
814 | { | |
815 | std::map<mds_gid_t, mds_info_t> result; | |
816 | for (const auto &i : standby_daemons) { | |
817 | if (i.second.name == name) { | |
818 | return &(i.second); | |
819 | } | |
820 | } | |
821 | ||
822 | for (const auto &i : filesystems) { | |
823 | const auto &fs_info = i.second->mds_map.get_mds_info(); | |
824 | for (const auto &j : fs_info) { | |
825 | if (j.second.name == name) { | |
826 | return &(j.second); | |
827 | } | |
828 | } | |
829 | } | |
830 | ||
831 | return nullptr; | |
832 | } | |
833 | ||
834 | const MDSMap::mds_info_t* FSMap::find_replacement_for(mds_role_t role) const | |
11fdf7f2 TL |
835 | { |
836 | auto&& fs = get_filesystem(role.fscid); | |
7c673cae | 837 | |
11fdf7f2 TL |
838 | // First see if we have a STANDBY_REPLAY |
839 | for (const auto& [gid, info] : fs->mds_map.mds_info) { | |
840 | if (info.rank == role.rank && info.state == MDSMap::STATE_STANDBY_REPLAY) { | |
841 | if (info.is_frozen()) { | |
842 | /* the standby-replay is frozen, do nothing! */ | |
9f95a23c | 843 | return nullptr; |
11fdf7f2 | 844 | } else { |
522d829b | 845 | ceph_assert(info.compat.writeable(fs->mds_map.compat)); |
9f95a23c | 846 | return &info; |
11fdf7f2 | 847 | } |
7c673cae FG |
848 | } |
849 | } | |
7c673cae | 850 | |
522d829b | 851 | return get_available_standby(*fs); |
7c673cae FG |
852 | } |
853 | ||
a4b75251 | 854 | void FSMap::sanity(bool pending) const |
7c673cae | 855 | { |
a4b75251 TL |
856 | /* Only do some sanity checks on **new** FSMaps. Older versions may not be |
857 | * compliant. | |
858 | */ | |
859 | ||
7c673cae | 860 | if (legacy_client_fscid != FS_CLUSTER_ID_NONE) { |
11fdf7f2 | 861 | ceph_assert(filesystems.count(legacy_client_fscid) == 1); |
7c673cae FG |
862 | } |
863 | ||
522d829b TL |
864 | for (const auto& [fscid, fs] : filesystems) { |
865 | ceph_assert(fscid == fs->fscid); | |
866 | for (const auto& [gid, info] : fs->mds_map.mds_info) { | |
867 | ceph_assert(info.rank != MDS_RANK_NONE); | |
868 | ceph_assert(mds_roles.at(gid) == fscid); | |
869 | ceph_assert(standby_daemons.count(gid) == 0); | |
870 | ceph_assert(standby_epochs.count(gid) == 0); | |
871 | if (info.state != MDSMap::STATE_STANDBY_REPLAY) { | |
872 | ceph_assert(fs->mds_map.up.at(info.rank) == gid); | |
873 | ceph_assert(fs->mds_map.failed.count(info.rank) == 0); | |
874 | ceph_assert(fs->mds_map.damaged.count(info.rank) == 0); | |
875 | } else { | |
a4b75251 | 876 | ceph_assert(!pending || fs->mds_map.allows_standby_replay()); |
7c673cae | 877 | } |
522d829b | 878 | ceph_assert(info.compat.writeable(fs->mds_map.compat)); |
7c673cae FG |
879 | } |
880 | ||
881 | for (const auto &j : fs->mds_map.up) { | |
882 | mds_rank_t rank = j.first; | |
11fdf7f2 | 883 | ceph_assert(fs->mds_map.in.count(rank) == 1); |
7c673cae | 884 | mds_gid_t gid = j.second; |
11fdf7f2 | 885 | ceph_assert(fs->mds_map.mds_info.count(gid) == 1); |
7c673cae FG |
886 | } |
887 | } | |
888 | ||
889 | for (const auto &i : standby_daemons) { | |
11fdf7f2 TL |
890 | ceph_assert(i.second.state == MDSMap::STATE_STANDBY); |
891 | ceph_assert(i.second.rank == MDS_RANK_NONE); | |
892 | ceph_assert(i.second.global_id == i.first); | |
893 | ceph_assert(standby_epochs.count(i.first) == 1); | |
894 | ceph_assert(mds_roles.count(i.first) == 1); | |
895 | ceph_assert(mds_roles.at(i.first) == FS_CLUSTER_ID_NONE); | |
7c673cae FG |
896 | } |
897 | ||
898 | for (const auto &i : standby_epochs) { | |
11fdf7f2 | 899 | ceph_assert(standby_daemons.count(i.first) == 1); |
7c673cae FG |
900 | } |
901 | ||
902 | for (const auto &i : mds_roles) { | |
903 | if (i.second == FS_CLUSTER_ID_NONE) { | |
11fdf7f2 | 904 | ceph_assert(standby_daemons.count(i.first) == 1); |
7c673cae | 905 | } else { |
11fdf7f2 TL |
906 | ceph_assert(filesystems.count(i.second) == 1); |
907 | ceph_assert(filesystems.at(i.second)->mds_map.mds_info.count(i.first) == 1); | |
7c673cae FG |
908 | } |
909 | } | |
910 | } | |
911 | ||
912 | void FSMap::promote( | |
913 | mds_gid_t standby_gid, | |
11fdf7f2 | 914 | Filesystem& filesystem, |
7c673cae FG |
915 | mds_rank_t assigned_rank) |
916 | { | |
11fdf7f2 | 917 | ceph_assert(gid_exists(standby_gid)); |
7c673cae FG |
918 | bool is_standby_replay = mds_roles.at(standby_gid) != FS_CLUSTER_ID_NONE; |
919 | if (!is_standby_replay) { | |
11fdf7f2 TL |
920 | ceph_assert(standby_daemons.count(standby_gid)); |
921 | ceph_assert(standby_daemons.at(standby_gid).state == MDSMap::STATE_STANDBY); | |
7c673cae FG |
922 | } |
923 | ||
11fdf7f2 | 924 | MDSMap &mds_map = filesystem.mds_map; |
7c673cae FG |
925 | |
926 | // Insert daemon state to Filesystem | |
927 | if (!is_standby_replay) { | |
928 | mds_map.mds_info[standby_gid] = standby_daemons.at(standby_gid); | |
929 | } else { | |
11fdf7f2 TL |
930 | ceph_assert(mds_map.mds_info.count(standby_gid)); |
931 | ceph_assert(mds_map.mds_info.at(standby_gid).state == MDSMap::STATE_STANDBY_REPLAY); | |
932 | ceph_assert(mds_map.mds_info.at(standby_gid).rank == assigned_rank); | |
7c673cae | 933 | } |
522d829b | 934 | auto& info = mds_map.mds_info.at(standby_gid); |
7c673cae | 935 | |
a4b75251 TL |
936 | if (!filesystem.mds_map.compat.writeable(info.compat)) { |
937 | ceph_assert(filesystem.is_upgradeable()); | |
938 | filesystem.mds_map.compat.merge(info.compat); | |
939 | } | |
940 | ||
7c673cae FG |
941 | if (mds_map.stopped.erase(assigned_rank)) { |
942 | // The cluster is being expanded with a stopped rank | |
943 | info.state = MDSMap::STATE_STARTING; | |
944 | } else if (!mds_map.is_in(assigned_rank)) { | |
945 | // The cluster is being expanded with a new rank | |
946 | info.state = MDSMap::STATE_CREATING; | |
947 | } else { | |
948 | // An existing rank is being assigned to a replacement | |
949 | info.state = MDSMap::STATE_REPLAY; | |
950 | mds_map.failed.erase(assigned_rank); | |
951 | } | |
952 | info.rank = assigned_rank; | |
953 | info.inc = epoch; | |
522d829b | 954 | mds_roles.at(standby_gid) = filesystem.fscid; |
7c673cae FG |
955 | |
956 | // Update the rank state in Filesystem | |
957 | mds_map.in.insert(assigned_rank); | |
958 | mds_map.up[assigned_rank] = standby_gid; | |
959 | ||
960 | // Remove from the list of standbys | |
961 | if (!is_standby_replay) { | |
962 | standby_daemons.erase(standby_gid); | |
963 | standby_epochs.erase(standby_gid); | |
964 | } | |
965 | ||
966 | // Indicate that Filesystem has been modified | |
967 | mds_map.epoch = epoch; | |
968 | } | |
969 | ||
970 | void FSMap::assign_standby_replay( | |
971 | const mds_gid_t standby_gid, | |
972 | const fs_cluster_id_t leader_ns, | |
973 | const mds_rank_t leader_rank) | |
974 | { | |
11fdf7f2 TL |
975 | ceph_assert(mds_roles.at(standby_gid) == FS_CLUSTER_ID_NONE); |
976 | ceph_assert(gid_exists(standby_gid)); | |
977 | ceph_assert(!gid_has_rank(standby_gid)); | |
978 | ceph_assert(standby_daemons.count(standby_gid)); | |
7c673cae FG |
979 | |
980 | // Insert to the filesystem | |
981 | auto fs = filesystems.at(leader_ns); | |
982 | fs->mds_map.mds_info[standby_gid] = standby_daemons.at(standby_gid); | |
983 | fs->mds_map.mds_info[standby_gid].rank = leader_rank; | |
984 | fs->mds_map.mds_info[standby_gid].state = MDSMap::STATE_STANDBY_REPLAY; | |
985 | mds_roles[standby_gid] = leader_ns; | |
986 | ||
987 | // Remove from the list of standbys | |
988 | standby_daemons.erase(standby_gid); | |
989 | standby_epochs.erase(standby_gid); | |
990 | ||
991 | // Indicate that Filesystem has been modified | |
992 | fs->mds_map.epoch = epoch; | |
993 | } | |
994 | ||
f67539c2 | 995 | void FSMap::erase(mds_gid_t who, epoch_t blocklist_epoch) |
7c673cae FG |
996 | { |
997 | if (mds_roles.at(who) == FS_CLUSTER_ID_NONE) { | |
998 | standby_daemons.erase(who); | |
999 | standby_epochs.erase(who); | |
1000 | } else { | |
1001 | auto &fs = filesystems.at(mds_roles.at(who)); | |
1002 | const auto &info = fs->mds_map.mds_info.at(who); | |
1003 | if (info.state != MDSMap::STATE_STANDBY_REPLAY) { | |
1004 | if (info.state == MDSMap::STATE_CREATING) { | |
1005 | // If this gid didn't make it past CREATING, then forget | |
1006 | // the rank ever existed so that next time it's handed out | |
1007 | // to a gid it'll go back into CREATING. | |
1008 | fs->mds_map.in.erase(info.rank); | |
1009 | } else { | |
1010 | // Put this rank into the failed list so that the next available | |
1011 | // STANDBY will pick it up. | |
1012 | fs->mds_map.failed.insert(info.rank); | |
1013 | } | |
11fdf7f2 | 1014 | ceph_assert(fs->mds_map.up.at(info.rank) == info.global_id); |
7c673cae FG |
1015 | fs->mds_map.up.erase(info.rank); |
1016 | } | |
1017 | fs->mds_map.mds_info.erase(who); | |
f67539c2 | 1018 | fs->mds_map.last_failure_osd_epoch = blocklist_epoch; |
7c673cae FG |
1019 | fs->mds_map.epoch = epoch; |
1020 | } | |
1021 | ||
1022 | mds_roles.erase(who); | |
1023 | } | |
1024 | ||
f67539c2 | 1025 | void FSMap::damaged(mds_gid_t who, epoch_t blocklist_epoch) |
7c673cae | 1026 | { |
11fdf7f2 | 1027 | ceph_assert(mds_roles.at(who) != FS_CLUSTER_ID_NONE); |
7c673cae | 1028 | auto fs = filesystems.at(mds_roles.at(who)); |
a4b75251 | 1029 | mds_rank_t rank = fs->mds_map.mds_info.at(who).rank; |
7c673cae | 1030 | |
f67539c2 | 1031 | erase(who, blocklist_epoch); |
7c673cae FG |
1032 | fs->mds_map.failed.erase(rank); |
1033 | fs->mds_map.damaged.insert(rank); | |
1034 | ||
11fdf7f2 | 1035 | ceph_assert(fs->mds_map.epoch == epoch); |
7c673cae FG |
1036 | } |
1037 | ||
1038 | /** | |
1039 | * Update to indicate that the rank `rank` is to be removed | |
1040 | * from the damaged list of the filesystem `fscid` | |
1041 | */ | |
1042 | bool FSMap::undamaged(const fs_cluster_id_t fscid, const mds_rank_t rank) | |
1043 | { | |
1044 | auto fs = filesystems.at(fscid); | |
1045 | ||
1046 | if (fs->mds_map.damaged.erase(rank)) { | |
1047 | fs->mds_map.failed.insert(rank); | |
1048 | fs->mds_map.epoch = epoch; | |
1049 | return true; | |
1050 | } else { | |
1051 | return false; | |
1052 | } | |
1053 | } | |
1054 | ||
1055 | void FSMap::insert(const MDSMap::mds_info_t &new_info) | |
1056 | { | |
a4b75251 TL |
1057 | static const CompatSet empty; |
1058 | ||
11fdf7f2 TL |
1059 | ceph_assert(new_info.state == MDSMap::STATE_STANDBY); |
1060 | ceph_assert(new_info.rank == MDS_RANK_NONE); | |
7c673cae | 1061 | mds_roles[new_info.global_id] = FS_CLUSTER_ID_NONE; |
a4b75251 TL |
1062 | auto& info = standby_daemons[new_info.global_id]; |
1063 | info = new_info; | |
1064 | if (empty.compare(info.compat) == 0) { | |
1065 | // bootstrap old compat: boot beacon contains empty compat on old (v16.2.4 | |
1066 | // or older) MDS. | |
1067 | info.compat = MDSMap::get_compat_set_v16_2_4(); | |
1068 | } | |
20effc67 TL |
1069 | /* TODO remove after R is released |
1070 | * Insert INLINE; see comment in MDSMap::decode. | |
1071 | */ | |
1072 | info.compat.incompat.insert(MDS_FEATURE_INCOMPAT_INLINE); | |
7c673cae FG |
1073 | standby_epochs[new_info.global_id] = epoch; |
1074 | } | |
1075 | ||
9f95a23c | 1076 | std::vector<mds_gid_t> FSMap::stop(mds_gid_t who) |
7c673cae | 1077 | { |
11fdf7f2 | 1078 | ceph_assert(mds_roles.at(who) != FS_CLUSTER_ID_NONE); |
7c673cae FG |
1079 | auto fs = filesystems.at(mds_roles.at(who)); |
1080 | const auto &info = fs->mds_map.mds_info.at(who); | |
1081 | fs->mds_map.up.erase(info.rank); | |
1082 | fs->mds_map.in.erase(info.rank); | |
1083 | fs->mds_map.stopped.insert(info.rank); | |
1084 | ||
1085 | // Also drop any standby replays that were following this rank | |
9f95a23c | 1086 | std::vector<mds_gid_t> standbys; |
7c673cae FG |
1087 | for (const auto &i : fs->mds_map.mds_info) { |
1088 | const auto &other_gid = i.first; | |
1089 | const auto &other_info = i.second; | |
1090 | if (other_info.rank == info.rank | |
1091 | && other_info.state == MDSMap::STATE_STANDBY_REPLAY) { | |
1092 | standbys.push_back(other_gid); | |
1093 | erase(other_gid, 0); | |
1094 | } | |
1095 | } | |
1096 | ||
1097 | fs->mds_map.mds_info.erase(who); | |
1098 | mds_roles.erase(who); | |
1099 | ||
1100 | fs->mds_map.epoch = epoch; | |
1101 | ||
1102 | return standbys; | |
1103 | } | |
1104 | ||
1105 | ||
1106 | /** | |
1107 | * Given one of the following forms: | |
1108 | * <fs name>:<rank> | |
1109 | * <fs id>:<rank> | |
1110 | * <rank> | |
1111 | * | |
1112 | * Parse into a mds_role_t. The rank-only form is only valid | |
1113 | * if legacy_client_ns is set. | |
1114 | */ | |
f67539c2 TL |
1115 | |
1116 | int FSMap::parse_role( | |
1117 | std::string_view role_str, | |
1118 | mds_role_t *role, | |
1119 | std::ostream &ss, | |
1120 | const std::vector<string> &filter) const | |
1121 | { | |
1122 | int r = parse_role(role_str, role, ss); | |
1123 | if (r < 0) return r; | |
1124 | ||
1125 | string_view fs_name = get_filesystem(role->fscid)->mds_map.get_fs_name(); | |
1126 | ||
1127 | if (!filter.empty() && | |
1128 | std::find(filter.begin(), filter.end(), fs_name) == filter.end()) { | |
1129 | if (r >= 0) { | |
1130 | ss << "Invalid file system"; | |
1131 | } | |
1132 | return -CEPHFS_ENOENT; | |
1133 | } | |
1134 | ||
1135 | return r; | |
1136 | } | |
1137 | ||
7c673cae | 1138 | int FSMap::parse_role( |
11fdf7f2 | 1139 | std::string_view role_str, |
7c673cae FG |
1140 | mds_role_t *role, |
1141 | std::ostream &ss) const | |
1142 | { | |
1143 | size_t colon_pos = role_str.find(":"); | |
1144 | size_t rank_pos; | |
11fdf7f2 | 1145 | Filesystem::const_ref fs; |
7c673cae FG |
1146 | if (colon_pos == std::string::npos) { |
1147 | if (legacy_client_fscid == FS_CLUSTER_ID_NONE) { | |
1148 | ss << "No filesystem selected"; | |
f67539c2 | 1149 | return -CEPHFS_ENOENT; |
7c673cae FG |
1150 | } |
1151 | fs = get_filesystem(legacy_client_fscid); | |
1152 | rank_pos = 0; | |
1153 | } else { | |
1154 | if (parse_filesystem(role_str.substr(0, colon_pos), &fs) < 0) { | |
1155 | ss << "Invalid filesystem"; | |
f67539c2 | 1156 | return -CEPHFS_ENOENT; |
7c673cae FG |
1157 | } |
1158 | rank_pos = colon_pos+1; | |
1159 | } | |
1160 | ||
1161 | mds_rank_t rank; | |
1162 | std::string err; | |
94b18763 | 1163 | std::string rank_str(role_str.substr(rank_pos)); |
7c673cae FG |
1164 | long rank_i = strict_strtol(rank_str.c_str(), 10, &err); |
1165 | if (rank_i < 0 || !err.empty()) { | |
1166 | ss << "Invalid rank '" << rank_str << "'"; | |
f67539c2 | 1167 | return -CEPHFS_EINVAL; |
7c673cae FG |
1168 | } else { |
1169 | rank = rank_i; | |
1170 | } | |
1171 | ||
1172 | if (fs->mds_map.in.count(rank) == 0) { | |
1173 | ss << "Rank '" << rank << "' not found"; | |
f67539c2 | 1174 | return -CEPHFS_ENOENT; |
7c673cae FG |
1175 | } |
1176 | ||
1177 | *role = {fs->fscid, rank}; | |
1178 | ||
1179 | return 0; | |
1180 | } | |
9f95a23c TL |
1181 | |
1182 | bool FSMap::pool_in_use(int64_t poolid) const | |
1183 | { | |
1184 | for (auto const &i : filesystems) { | |
1185 | if (i.second->mds_map.is_data_pool(poolid) | |
1186 | || i.second->mds_map.metadata_pool == poolid) { | |
1187 | return true; | |
1188 | } | |
1189 | } | |
1190 | return false; | |
1191 | } | |
1192 | ||
1193 | void FSMap::erase_filesystem(fs_cluster_id_t fscid) | |
1194 | { | |
1195 | filesystems.erase(fscid); | |
1196 | for (auto& [gid, info] : standby_daemons) { | |
1197 | if (info.join_fscid == fscid) { | |
1198 | modify_daemon(gid, [](auto& info) { | |
1199 | info.join_fscid = FS_CLUSTER_ID_NONE; | |
1200 | }); | |
1201 | } | |
1202 | } | |
1203 | for (auto& p : filesystems) { | |
1204 | for (auto& [gid, info] : p.second->mds_map.get_mds_info()) { | |
1205 | if (info.join_fscid == fscid) { | |
1206 | modify_daemon(gid, [](auto& info) { | |
1207 | info.join_fscid = FS_CLUSTER_ID_NONE; | |
1208 | }); | |
1209 | } | |
1210 | } | |
1211 | } | |
1212 | } |