]> git.proxmox.com Git - ceph.git/blob - ceph/src/mds/FSMap.h
update ceph source to reef 18.2.1
[ceph.git] / ceph / src / mds / FSMap.h
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3 /*
4 * Ceph - scalable distributed file system
5 *
6 * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
7 *
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
12 *
13 */
14
15
16 #ifndef CEPH_FSMAP_H
17 #define CEPH_FSMAP_H
18
19 #include <map>
20 #include <memory>
21 #include <set>
22 #include <string>
23 #include <string_view>
24
25 #include <errno.h>
26
27 #include "include/types.h"
28 #include "common/Clock.h"
29 #include "mds/MDSMap.h"
30
31 #include "include/CompatSet.h"
32 #include "include/ceph_features.h"
33 #include "include/common_fwd.h"
34 #include "common/Formatter.h"
35 #include "mds/mdstypes.h"
36
37 #if __cplusplus <= 201703L
38 template<class Key, class T, class Compare, class Alloc, class Pred>
39 typename std::map<Key, T, Compare, Alloc>::size_type
40 erase_if(std::map<Key, T, Compare, Alloc>& c, Pred pred) {
41 auto old_size = c.size();
42 for (auto i = c.begin(), last = c.end(); i != last; ) {
43 if (pred(*i)) {
44 i = c.erase(i);
45 } else {
46 ++i;
47 }
48 }
49 return old_size - c.size();
50 }
51 #endif
52
53 class health_check_map_t;
54
55 struct ClusterInfo {
56 ClusterInfo() = default;
57 ClusterInfo(std::string_view client_name, std::string_view cluster_name,
58 std::string_view fs_name)
59 : client_name(client_name),
60 cluster_name(cluster_name),
61 fs_name(fs_name) {
62 }
63
64 std::string client_name;
65 std::string cluster_name;
66 std::string fs_name;
67
68 bool operator==(const ClusterInfo &cluster_info) const {
69 return client_name == cluster_info.client_name &&
70 cluster_name == cluster_info.cluster_name &&
71 fs_name == cluster_info.fs_name;
72 }
73
74 void dump(ceph::Formatter *f) const;
75 void print(std::ostream& out) const;
76
77 void encode(ceph::buffer::list &bl) const;
78 void decode(ceph::buffer::list::const_iterator &iter);
79 };
80
81 inline std::ostream& operator<<(std::ostream& out, const ClusterInfo &cluster_info) {
82 out << "{client_name=" << cluster_info.client_name << ", cluster_name="
83 << cluster_info.cluster_name << ", fs_name=" << cluster_info.fs_name << "}";
84 return out;
85 }
86
87 struct Peer {
88 Peer() = default;
89 Peer(std::string_view uuid)
90 : uuid(uuid) {
91 }
92 Peer(std::string_view uuid,
93 const ClusterInfo &remote)
94 : uuid(uuid),
95 remote(remote) {
96 }
97
98 std::string uuid;
99 ClusterInfo remote;
100
101 bool operator==(const Peer &rhs) const {
102 return uuid == rhs.uuid;
103 }
104
105 bool operator<(const Peer &rhs) const {
106 return uuid < rhs.uuid;
107 }
108
109 void dump(ceph::Formatter *f) const;
110 void print(std::ostream& out) const;
111
112 void encode(ceph::buffer::list &bl) const;
113 void decode(ceph::buffer::list::const_iterator &iter);
114 };
115
116 typedef std::set<Peer> Peers;
117 inline std::ostream& operator<<(std::ostream& out, const Peer &peer) {
118 out << "{uuid=" << peer.uuid << ", remote_cluster=" << peer.remote << "}";
119 return out;
120 }
121
122 struct MirrorInfo {
123 MirrorInfo() = default;
124
125 bool is_mirrored() const {
126 return mirrored;
127 }
128 void enable_mirroring() {
129 mirrored = true;
130 }
131 void disable_mirroring() {
132 peers.clear();
133 mirrored = false;
134 }
135
136 // uuid variant check
137 bool has_peer(std::string_view uuid) const {
138 return peers.find(Peer(uuid)) != peers.end();
139 }
140 // client_name/cluster_name/fs_name variant check
141 bool has_peer(std::string_view client_name,
142 std::string_view cluster_name,
143 std::string_view fs_name) const {
144 ClusterInfo cluster_info(client_name, cluster_name, fs_name);
145 for (auto &peer : peers) {
146 if (peer.remote == cluster_info) {
147 return true;
148 }
149 }
150 return false;
151 }
152 bool has_peers() const {
153 return !peers.empty();
154 }
155
156 void peer_add(std::string_view uuid,
157 std::string_view client_name,
158 std::string_view cluster_name,
159 std::string_view fs_name) {
160 peers.emplace(Peer(uuid, ClusterInfo(client_name, cluster_name, fs_name)));
161 }
162 void peer_remove(std::string_view uuid) {
163 peers.erase(uuid);
164 }
165
166 bool mirrored = false;
167 Peers peers;
168
169 void dump(ceph::Formatter *f) const;
170 void print(std::ostream& out) const;
171
172 void encode(ceph::buffer::list &bl) const;
173 void decode(ceph::buffer::list::const_iterator &iter);
174 };
175
176 inline std::ostream& operator<<(std::ostream& out, const MirrorInfo &mirror_info) {
177 out << "{peers=" << mirror_info.peers << "}";
178 return out;
179 }
180
181 WRITE_CLASS_ENCODER(ClusterInfo)
182 WRITE_CLASS_ENCODER(Peer)
183 WRITE_CLASS_ENCODER(MirrorInfo)
184
185 /**
186 * The MDSMap and any additional fields describing a particular
187 * filesystem (a unique fs_cluster_id_t).
188 */
189 class Filesystem
190 {
191 public:
192 using ref = std::shared_ptr<Filesystem>;
193 using const_ref = std::shared_ptr<Filesystem const>;
194
195 template<typename... Args>
196 static ref create(Args&&... args)
197 {
198 return std::make_shared<Filesystem>(std::forward<Args>(args)...);
199 }
200
201 void encode(ceph::buffer::list& bl, uint64_t features) const;
202 void decode(ceph::buffer::list::const_iterator& p);
203
204 void dump(ceph::Formatter *f) const;
205 void print(std::ostream& out) const;
206
207 bool is_upgradeable() const {
208 bool asr = mds_map.allows_standby_replay();
209 auto in_mds = mds_map.get_num_in_mds();
210 auto up_mds = mds_map.get_num_up_mds();
211 return
212 /* fs was "down" */
213 (in_mds == 0)
214 /* max_mds was set to 1; asr must be disabled */
215 || (!asr && in_mds == 1)
216 /* max_mds any value and all MDS were failed; asr must be disabled */
217 || (!asr && up_mds == 0);
218 }
219
220 /**
221 * Return true if a daemon is already assigned as
222 * STANDBY_REPLAY for the gid `who`
223 */
224 bool has_standby_replay(mds_gid_t who) const
225 {
226 return get_standby_replay(who) != MDS_GID_NONE;
227 }
228 mds_gid_t get_standby_replay(mds_gid_t who) const;
229 bool is_standby_replay(mds_gid_t who) const
230 {
231 auto p = mds_map.mds_info.find(who);
232 if (p != mds_map.mds_info.end() &&
233 p->second.state == MDSMap::STATE_STANDBY_REPLAY) {
234 return true;
235 }
236 return false;
237 }
238
239 fs_cluster_id_t fscid = FS_CLUSTER_ID_NONE;
240 MDSMap mds_map;
241 MirrorInfo mirror_info;
242 };
243 WRITE_CLASS_ENCODER_FEATURES(Filesystem)
244
245 class FSMap {
246 public:
247 friend class MDSMonitor;
248 friend class PaxosFSMap;
249 using mds_info_t = MDSMap::mds_info_t;
250
251 static const version_t STRUCT_VERSION = 7;
252 static const version_t STRUCT_VERSION_TRIM_TO = 7;
253
254 FSMap() : default_compat(MDSMap::get_compat_set_default()) {}
255
256 FSMap(const FSMap &rhs)
257 :
258 epoch(rhs.epoch),
259 next_filesystem_id(rhs.next_filesystem_id),
260 legacy_client_fscid(rhs.legacy_client_fscid),
261 default_compat(rhs.default_compat),
262 enable_multiple(rhs.enable_multiple),
263 ever_enabled_multiple(rhs.ever_enabled_multiple),
264 mds_roles(rhs.mds_roles),
265 standby_daemons(rhs.standby_daemons),
266 standby_epochs(rhs.standby_epochs),
267 struct_version(rhs.struct_version)
268 {
269 filesystems.clear();
270 for (const auto &i : rhs.filesystems) {
271 const auto &fs = i.second;
272 filesystems[fs->fscid] = std::make_shared<Filesystem>(*fs);
273 }
274 }
275
276 FSMap &operator=(const FSMap &rhs);
277
278 const CompatSet &get_default_compat() const {return default_compat;}
279
280 void filter(const std::vector<std::string>& allowed)
281 {
282 if (allowed.empty()) {
283 return;
284 }
285
286 erase_if(filesystems, [&](const auto& f) {
287 return std::find(allowed.begin(), allowed.end(), f.second->mds_map.get_fs_name()) == allowed.end();
288 });
289
290 erase_if(mds_roles, [&](const auto& r) {
291 return std::find(allowed.begin(), allowed.end(), fs_name_from_gid(r.first)) == allowed.end();
292 });
293 }
294
295 void set_enable_multiple(const bool v)
296 {
297 enable_multiple = v;
298 if (true == v) {
299 ever_enabled_multiple = true;
300 }
301 }
302
303 bool get_enable_multiple() const
304 {
305 return enable_multiple;
306 }
307
308 void set_legacy_client_fscid(fs_cluster_id_t fscid)
309 {
310 ceph_assert(fscid == FS_CLUSTER_ID_NONE || filesystems.count(fscid));
311 legacy_client_fscid = fscid;
312 }
313
314 fs_cluster_id_t get_legacy_client_fscid() const
315 {
316 return legacy_client_fscid;
317 }
318
319 size_t get_num_standby() const {
320 return standby_daemons.size();
321 }
322
323 bool is_any_degraded() const;
324
325 /**
326 * Get state of all daemons (for all filesystems, including all standbys)
327 */
328 std::map<mds_gid_t, mds_info_t> get_mds_info() const;
329
330 const mds_info_t* get_available_standby(const Filesystem& fs) const;
331
332 /**
333 * Resolve daemon name to GID
334 */
335 mds_gid_t find_mds_gid_by_name(std::string_view s) const;
336
337 /**
338 * Resolve daemon name to status
339 */
340 const mds_info_t* find_by_name(std::string_view name) const;
341
342 /**
343 * Does a daemon exist with this GID?
344 */
345 bool gid_exists(mds_gid_t gid,
346 const std::vector<std::string>& in = {}) const
347 {
348 try {
349 std::string_view m = fs_name_from_gid(gid);
350 return in.empty() || std::find(in.begin(), in.end(), m) != in.end();
351 } catch (const std::out_of_range&) {
352 return false;
353 }
354 }
355
356 /**
357 * Does a daemon with this GID exist, *and* have an MDS rank assigned?
358 */
359 bool gid_has_rank(mds_gid_t gid) const
360 {
361 return gid_exists(gid) && mds_roles.at(gid) != FS_CLUSTER_ID_NONE;
362 }
363
364 /**
365 * Which filesystem owns this GID?
366 */
367 fs_cluster_id_t fscid_from_gid(mds_gid_t gid) const {
368 if (!gid_exists(gid)) {
369 return FS_CLUSTER_ID_NONE;
370 }
371 return mds_roles.at(gid);
372 }
373
374 /**
375 * Insert a new MDS daemon, as a standby
376 */
377 void insert(const MDSMap::mds_info_t &new_info);
378
379 /**
380 * Assign an MDS cluster standby replay rank to a standby daemon
381 */
382 void assign_standby_replay(
383 const mds_gid_t standby_gid,
384 const fs_cluster_id_t leader_ns,
385 const mds_rank_t leader_rank);
386
387 /**
388 * Assign an MDS cluster rank to a standby daemon
389 */
390 void promote(
391 mds_gid_t standby_gid,
392 Filesystem& filesystem,
393 mds_rank_t assigned_rank);
394
395 /**
396 * A daemon reports that it is STATE_STOPPED: remove it,
397 * and the rank it held.
398 *
399 * @returns a list of any additional GIDs that were removed from the map
400 * as a side effect (like standby replays)
401 */
402 std::vector<mds_gid_t> stop(mds_gid_t who);
403
404 /**
405 * The rank held by 'who', if any, is to be relinquished, and
406 * the state for the daemon GID is to be forgotten.
407 */
408 void erase(mds_gid_t who, epoch_t blocklist_epoch);
409
410 /**
411 * Update to indicate that the rank held by 'who' is damaged
412 */
413 void damaged(mds_gid_t who, epoch_t blocklist_epoch);
414
415 /**
416 * Update to indicate that the rank `rank` is to be removed
417 * from the damaged list of the filesystem `fscid`
418 */
419 bool undamaged(const fs_cluster_id_t fscid, const mds_rank_t rank);
420
421 /**
422 * Initialize a Filesystem and assign a fscid. Update legacy_client_fscid
423 * to point to the new filesystem if it's the only one.
424 *
425 * Caller must already have validated all arguments vs. the existing
426 * FSMap and OSDMap contents.
427 */
428 Filesystem::ref create_filesystem(
429 std::string_view name, int64_t metadata_pool,
430 int64_t data_pool, uint64_t features,
431 fs_cluster_id_t fscid, bool recover);
432
433 /**
434 * Remove the filesystem (it must exist). Caller should already
435 * have failed out any MDSs that were assigned to the filesystem.
436 */
437 void erase_filesystem(fs_cluster_id_t fscid);
438
439 /**
440 * Reset all the state information (not configuration information)
441 * in a particular filesystem. Caller must have verified that
442 * the filesystem already exists.
443 */
444 void reset_filesystem(fs_cluster_id_t fscid);
445
446 /**
447 * Mutator helper for Filesystem objects: expose a non-const
448 * Filesystem pointer to `fn` and update epochs appropriately.
449 */
450 template<typename T>
451 void modify_filesystem(fs_cluster_id_t fscid, T&& fn)
452 {
453 auto& fs = filesystems.at(fscid);
454 fn(fs);
455 fs->mds_map.epoch = epoch;
456 }
457
458 /**
459 * Apply a mutation to the mds_info_t structure for a particular
460 * daemon (identified by GID), and make appropriate updates to epochs.
461 */
462 template<typename T>
463 void modify_daemon(mds_gid_t who, T&& fn)
464 {
465 const auto& fscid = mds_roles.at(who);
466 if (fscid == FS_CLUSTER_ID_NONE) {
467 auto& info = standby_daemons.at(who);
468 fn(info);
469 ceph_assert(info.state == MDSMap::STATE_STANDBY);
470 standby_epochs[who] = epoch;
471 } else {
472 auto& fs = filesystems.at(fscid);
473 auto& info = fs->mds_map.mds_info.at(who);
474 fn(info);
475 fs->mds_map.epoch = epoch;
476 }
477 }
478
479 /**
480 * Given that gid exists in a filesystem or as a standby, return
481 * a reference to its info.
482 */
483 const mds_info_t& get_info_gid(mds_gid_t gid) const
484 {
485 auto fscid = mds_roles.at(gid);
486 if (fscid == FS_CLUSTER_ID_NONE) {
487 return standby_daemons.at(gid);
488 } else {
489 return filesystems.at(fscid)->mds_map.mds_info.at(gid);
490 }
491 }
492
493 std::string_view fs_name_from_gid(mds_gid_t gid) const
494 {
495 auto fscid = mds_roles.at(gid);
496 if (fscid == FS_CLUSTER_ID_NONE or !filesystem_exists(fscid)) {
497 return std::string_view();
498 } else {
499 return get_filesystem(fscid)->mds_map.get_fs_name();
500 }
501 }
502
503 bool is_standby_replay(mds_gid_t who) const
504 {
505 return filesystems.at(mds_roles.at(who))->is_standby_replay(who);
506 }
507
508 mds_gid_t get_standby_replay(mds_gid_t who) const
509 {
510 return filesystems.at(mds_roles.at(who))->get_standby_replay(who);
511 }
512
513 Filesystem::const_ref get_legacy_filesystem()
514 {
515 if (legacy_client_fscid == FS_CLUSTER_ID_NONE) {
516 return nullptr;
517 } else {
518 return filesystems.at(legacy_client_fscid);
519 }
520 }
521
522 /**
523 * A daemon has informed us of its offload targets
524 */
525 void update_export_targets(mds_gid_t who, const std::set<mds_rank_t> &targets)
526 {
527 auto fscid = mds_roles.at(who);
528 modify_filesystem(fscid, [who, &targets](auto&& fs) {
529 fs->mds_map.mds_info.at(who).export_targets = targets;
530 });
531 }
532
533 epoch_t get_epoch() const { return epoch; }
534 void inc_epoch() { epoch++; }
535
536 version_t get_struct_version() const { return struct_version; }
537 bool is_struct_old() const {
538 return struct_version < STRUCT_VERSION_TRIM_TO;
539 }
540
541 size_t filesystem_count() const {return filesystems.size();}
542 bool filesystem_exists(fs_cluster_id_t fscid) const {return filesystems.count(fscid) > 0;}
543 Filesystem::const_ref get_filesystem(fs_cluster_id_t fscid) const {return std::const_pointer_cast<const Filesystem>(filesystems.at(fscid));}
544 Filesystem::ref get_filesystem(fs_cluster_id_t fscid) {return filesystems.at(fscid);}
545 Filesystem::ref get_filesystem(mds_gid_t gid) {
546 return filesystems.at(mds_roles.at(gid));
547 }
548 Filesystem::const_ref get_filesystem(void) const {return std::const_pointer_cast<const Filesystem>(filesystems.begin()->second);}
549 Filesystem::const_ref get_filesystem(std::string_view name) const;
550 Filesystem::const_ref get_filesystem(mds_gid_t gid) const {
551 return filesystems.at(mds_roles.at(gid));
552 }
553
554 std::vector<Filesystem::const_ref> get_filesystems(void) const;
555
556 int parse_filesystem(
557 std::string_view ns_str,
558 Filesystem::const_ref *result
559 ) const;
560
561 int parse_role(
562 std::string_view role_str,
563 mds_role_t *role,
564 std::ostream &ss,
565 const std::vector<std::string> &filter) const;
566
567 int parse_role(
568 std::string_view role_str,
569 mds_role_t *role,
570 std::ostream &ss) const;
571
572 /**
573 * Return true if this pool is in use by any of the filesystems
574 */
575 bool pool_in_use(int64_t poolid) const;
576
577 const mds_info_t* find_replacement_for(mds_role_t role) const;
578
579 void get_health(std::list<std::pair<health_status_t,std::string> >& summary,
580 std::list<std::pair<health_status_t,std::string> > *detail) const;
581
582 void get_health_checks(health_check_map_t *checks) const;
583
584 bool check_health(void);
585
586 /**
587 * Assert that the FSMap, Filesystem, MDSMap, mds_info_t relations are
588 * all self-consistent.
589 */
590 void sanity(bool pending=false) const;
591
592 void encode(ceph::buffer::list& bl, uint64_t features) const;
593 void decode(ceph::buffer::list::const_iterator& p);
594 void decode(ceph::buffer::list& bl) {
595 auto p = bl.cbegin();
596 decode(p);
597 }
598 void sanitize(const std::function<bool(int64_t pool)>& pool_exists);
599
600 void print(std::ostream& out) const;
601 void print_summary(ceph::Formatter *f, std::ostream *out) const;
602 void print_daemon_summary(std::ostream& out) const;
603 void print_fs_summary(std::ostream& out) const;
604
605 void dump(ceph::Formatter *f) const;
606 static void generate_test_instances(std::list<FSMap*>& ls);
607
608 protected:
609 epoch_t epoch = 0;
610 uint64_t next_filesystem_id = FS_CLUSTER_ID_ANONYMOUS + 1;
611 fs_cluster_id_t legacy_client_fscid = FS_CLUSTER_ID_NONE;
612 CompatSet default_compat;
613 bool enable_multiple = true;
614 bool ever_enabled_multiple = true; // < the cluster had multiple FS enabled once
615
616 std::map<fs_cluster_id_t, Filesystem::ref> filesystems;
617
618 // Remember which Filesystem an MDS daemon's info is stored in
619 // (or in standby_daemons for FS_CLUSTER_ID_NONE)
620 std::map<mds_gid_t, fs_cluster_id_t> mds_roles;
621
622 // For MDS daemons not yet assigned to a Filesystem
623 std::map<mds_gid_t, mds_info_t> standby_daemons;
624 std::map<mds_gid_t, epoch_t> standby_epochs;
625
626 private:
627 epoch_t struct_version = 0;
628 };
629 WRITE_CLASS_ENCODER_FEATURES(FSMap)
630
631 inline std::ostream& operator<<(std::ostream& out, const FSMap& m) {
632 m.print_summary(NULL, &out);
633 return out;
634 }
635
636 #endif