1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
4 * Ceph - scalable distributed file system
6 * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
23 #include <string_view>
27 #include "include/types.h"
28 #include "common/Clock.h"
29 #include "mds/MDSMap.h"
31 #include "include/CompatSet.h"
32 #include "include/ceph_features.h"
33 #include "include/common_fwd.h"
34 #include "common/Formatter.h"
35 #include "mds/mdstypes.h"
37 #if __cplusplus <= 201703L
38 template<class Key
, class T
, class Compare
, class Alloc
, class Pred
>
39 typename
std::map
<Key
, T
, Compare
, Alloc
>::size_type
40 erase_if(std::map
<Key
, T
, Compare
, Alloc
>& c
, Pred pred
) {
41 auto old_size
= c
.size();
42 for (auto i
= c
.begin(), last
= c
.end(); i
!= last
; ) {
49 return old_size
- c
.size();
53 class health_check_map_t
;
56 ClusterInfo() = default;
57 ClusterInfo(std::string_view client_name
, std::string_view cluster_name
,
58 std::string_view fs_name
)
59 : client_name(client_name
),
60 cluster_name(cluster_name
),
64 std::string client_name
;
65 std::string cluster_name
;
68 bool operator==(const ClusterInfo
&cluster_info
) const {
69 return client_name
== cluster_info
.client_name
&&
70 cluster_name
== cluster_info
.cluster_name
&&
71 fs_name
== cluster_info
.fs_name
;
74 void dump(ceph::Formatter
*f
) const;
75 void print(std::ostream
& out
) const;
77 void encode(ceph::buffer::list
&bl
) const;
78 void decode(ceph::buffer::list::const_iterator
&iter
);
81 inline std::ostream
& operator<<(std::ostream
& out
, const ClusterInfo
&cluster_info
) {
82 out
<< "{client_name=" << cluster_info
.client_name
<< ", cluster_name="
83 << cluster_info
.cluster_name
<< ", fs_name=" << cluster_info
.fs_name
<< "}";
89 Peer(std::string_view uuid
)
92 Peer(std::string_view uuid
,
93 const ClusterInfo
&remote
)
101 bool operator==(const Peer
&rhs
) const {
102 return uuid
== rhs
.uuid
;
105 bool operator<(const Peer
&rhs
) const {
106 return uuid
< rhs
.uuid
;
109 void dump(ceph::Formatter
*f
) const;
110 void print(std::ostream
& out
) const;
112 void encode(ceph::buffer::list
&bl
) const;
113 void decode(ceph::buffer::list::const_iterator
&iter
);
116 typedef std::set
<Peer
> Peers
;
117 inline std::ostream
& operator<<(std::ostream
& out
, const Peer
&peer
) {
118 out
<< "{uuid=" << peer
.uuid
<< ", remote_cluster=" << peer
.remote
<< "}";
123 MirrorInfo() = default;
125 bool is_mirrored() const {
128 void enable_mirroring() {
131 void disable_mirroring() {
136 // uuid variant check
137 bool has_peer(std::string_view uuid
) const {
138 return peers
.find(Peer(uuid
)) != peers
.end();
140 // client_name/cluster_name/fs_name variant check
141 bool has_peer(std::string_view client_name
,
142 std::string_view cluster_name
,
143 std::string_view fs_name
) const {
144 ClusterInfo
cluster_info(client_name
, cluster_name
, fs_name
);
145 for (auto &peer
: peers
) {
146 if (peer
.remote
== cluster_info
) {
152 bool has_peers() const {
153 return !peers
.empty();
156 void peer_add(std::string_view uuid
,
157 std::string_view client_name
,
158 std::string_view cluster_name
,
159 std::string_view fs_name
) {
160 peers
.emplace(Peer(uuid
, ClusterInfo(client_name
, cluster_name
, fs_name
)));
162 void peer_remove(std::string_view uuid
) {
166 bool mirrored
= false;
169 void dump(ceph::Formatter
*f
) const;
170 void print(std::ostream
& out
) const;
172 void encode(ceph::buffer::list
&bl
) const;
173 void decode(ceph::buffer::list::const_iterator
&iter
);
176 inline std::ostream
& operator<<(std::ostream
& out
, const MirrorInfo
&mirror_info
) {
177 out
<< "{peers=" << mirror_info
.peers
<< "}";
181 WRITE_CLASS_ENCODER(ClusterInfo
)
182 WRITE_CLASS_ENCODER(Peer
)
183 WRITE_CLASS_ENCODER(MirrorInfo
)
186 * The MDSMap and any additional fields describing a particular
187 * filesystem (a unique fs_cluster_id_t).
192 using ref
= std::shared_ptr
<Filesystem
>;
193 using const_ref
= std::shared_ptr
<Filesystem
const>;
195 template<typename
... Args
>
196 static ref
create(Args
&&... args
)
198 return std::make_shared
<Filesystem
>(std::forward
<Args
>(args
)...);
201 void encode(ceph::buffer::list
& bl
, uint64_t features
) const;
202 void decode(ceph::buffer::list::const_iterator
& p
);
204 void dump(ceph::Formatter
*f
) const;
205 void print(std::ostream
& out
) const;
207 bool is_upgradeable() const {
208 bool asr
= mds_map
.allows_standby_replay();
209 auto in_mds
= mds_map
.get_num_in_mds();
210 auto up_mds
= mds_map
.get_num_up_mds();
214 /* max_mds was set to 1; asr must be disabled */
215 || (!asr
&& in_mds
== 1)
216 /* max_mds any value and all MDS were failed; asr must be disabled */
217 || (!asr
&& up_mds
== 0);
221 * Return true if a daemon is already assigned as
222 * STANDBY_REPLAY for the gid `who`
224 bool has_standby_replay(mds_gid_t who
) const
226 return get_standby_replay(who
) != MDS_GID_NONE
;
228 mds_gid_t
get_standby_replay(mds_gid_t who
) const;
229 bool is_standby_replay(mds_gid_t who
) const
231 auto p
= mds_map
.mds_info
.find(who
);
232 if (p
!= mds_map
.mds_info
.end() &&
233 p
->second
.state
== MDSMap::STATE_STANDBY_REPLAY
) {
239 fs_cluster_id_t fscid
= FS_CLUSTER_ID_NONE
;
241 MirrorInfo mirror_info
;
243 WRITE_CLASS_ENCODER_FEATURES(Filesystem
)
247 friend class MDSMonitor
;
248 friend class PaxosFSMap
;
249 using mds_info_t
= MDSMap::mds_info_t
;
251 static const version_t STRUCT_VERSION
= 7;
252 static const version_t STRUCT_VERSION_TRIM_TO
= 7;
254 FSMap() : default_compat(MDSMap::get_compat_set_default()) {}
256 FSMap(const FSMap
&rhs
)
259 next_filesystem_id(rhs
.next_filesystem_id
),
260 legacy_client_fscid(rhs
.legacy_client_fscid
),
261 default_compat(rhs
.default_compat
),
262 enable_multiple(rhs
.enable_multiple
),
263 ever_enabled_multiple(rhs
.ever_enabled_multiple
),
264 mds_roles(rhs
.mds_roles
),
265 standby_daemons(rhs
.standby_daemons
),
266 standby_epochs(rhs
.standby_epochs
),
267 struct_version(rhs
.struct_version
)
270 for (const auto &i
: rhs
.filesystems
) {
271 const auto &fs
= i
.second
;
272 filesystems
[fs
->fscid
] = std::make_shared
<Filesystem
>(*fs
);
276 FSMap
&operator=(const FSMap
&rhs
);
278 const CompatSet
&get_default_compat() const {return default_compat
;}
280 void filter(const std::vector
<std::string
>& allowed
)
282 if (allowed
.empty()) {
286 erase_if(filesystems
, [&](const auto& f
) {
287 return std::find(allowed
.begin(), allowed
.end(), f
.second
->mds_map
.get_fs_name()) == allowed
.end();
290 erase_if(mds_roles
, [&](const auto& r
) {
291 return std::find(allowed
.begin(), allowed
.end(), fs_name_from_gid(r
.first
)) == allowed
.end();
295 void set_enable_multiple(const bool v
)
299 ever_enabled_multiple
= true;
303 bool get_enable_multiple() const
305 return enable_multiple
;
308 void set_legacy_client_fscid(fs_cluster_id_t fscid
)
310 ceph_assert(fscid
== FS_CLUSTER_ID_NONE
|| filesystems
.count(fscid
));
311 legacy_client_fscid
= fscid
;
314 fs_cluster_id_t
get_legacy_client_fscid() const
316 return legacy_client_fscid
;
319 size_t get_num_standby() const {
320 return standby_daemons
.size();
323 bool is_any_degraded() const;
326 * Get state of all daemons (for all filesystems, including all standbys)
328 std::map
<mds_gid_t
, mds_info_t
> get_mds_info() const;
330 const mds_info_t
* get_available_standby(const Filesystem
& fs
) const;
333 * Resolve daemon name to GID
335 mds_gid_t
find_mds_gid_by_name(std::string_view s
) const;
338 * Resolve daemon name to status
340 const mds_info_t
* find_by_name(std::string_view name
) const;
343 * Does a daemon exist with this GID?
345 bool gid_exists(mds_gid_t gid
,
346 const std::vector
<std::string
>& in
= {}) const
349 std::string_view m
= fs_name_from_gid(gid
);
350 return in
.empty() || std::find(in
.begin(), in
.end(), m
) != in
.end();
351 } catch (const std::out_of_range
&) {
357 * Does a daemon with this GID exist, *and* have an MDS rank assigned?
359 bool gid_has_rank(mds_gid_t gid
) const
361 return gid_exists(gid
) && mds_roles
.at(gid
) != FS_CLUSTER_ID_NONE
;
365 * Which filesystem owns this GID?
367 fs_cluster_id_t
fscid_from_gid(mds_gid_t gid
) const {
368 if (!gid_exists(gid
)) {
369 return FS_CLUSTER_ID_NONE
;
371 return mds_roles
.at(gid
);
375 * Insert a new MDS daemon, as a standby
377 void insert(const MDSMap::mds_info_t
&new_info
);
380 * Assign an MDS cluster standby replay rank to a standby daemon
382 void assign_standby_replay(
383 const mds_gid_t standby_gid
,
384 const fs_cluster_id_t leader_ns
,
385 const mds_rank_t leader_rank
);
388 * Assign an MDS cluster rank to a standby daemon
391 mds_gid_t standby_gid
,
392 Filesystem
& filesystem
,
393 mds_rank_t assigned_rank
);
396 * A daemon reports that it is STATE_STOPPED: remove it,
397 * and the rank it held.
399 * @returns a list of any additional GIDs that were removed from the map
400 * as a side effect (like standby replays)
402 std::vector
<mds_gid_t
> stop(mds_gid_t who
);
405 * The rank held by 'who', if any, is to be relinquished, and
406 * the state for the daemon GID is to be forgotten.
408 void erase(mds_gid_t who
, epoch_t blocklist_epoch
);
411 * Update to indicate that the rank held by 'who' is damaged
413 void damaged(mds_gid_t who
, epoch_t blocklist_epoch
);
416 * Update to indicate that the rank `rank` is to be removed
417 * from the damaged list of the filesystem `fscid`
419 bool undamaged(const fs_cluster_id_t fscid
, const mds_rank_t rank
);
422 * Initialize a Filesystem and assign a fscid. Update legacy_client_fscid
423 * to point to the new filesystem if it's the only one.
425 * Caller must already have validated all arguments vs. the existing
426 * FSMap and OSDMap contents.
428 Filesystem::ref
create_filesystem(
429 std::string_view name
, int64_t metadata_pool
,
430 int64_t data_pool
, uint64_t features
,
431 fs_cluster_id_t fscid
, bool recover
);
434 * Remove the filesystem (it must exist). Caller should already
435 * have failed out any MDSs that were assigned to the filesystem.
437 void erase_filesystem(fs_cluster_id_t fscid
);
440 * Reset all the state information (not configuration information)
441 * in a particular filesystem. Caller must have verified that
442 * the filesystem already exists.
444 void reset_filesystem(fs_cluster_id_t fscid
);
447 * Mutator helper for Filesystem objects: expose a non-const
448 * Filesystem pointer to `fn` and update epochs appropriately.
451 void modify_filesystem(fs_cluster_id_t fscid
, T
&& fn
)
453 auto& fs
= filesystems
.at(fscid
);
455 fs
->mds_map
.epoch
= epoch
;
459 * Apply a mutation to the mds_info_t structure for a particular
460 * daemon (identified by GID), and make appropriate updates to epochs.
463 void modify_daemon(mds_gid_t who
, T
&& fn
)
465 const auto& fscid
= mds_roles
.at(who
);
466 if (fscid
== FS_CLUSTER_ID_NONE
) {
467 auto& info
= standby_daemons
.at(who
);
469 ceph_assert(info
.state
== MDSMap::STATE_STANDBY
);
470 standby_epochs
[who
] = epoch
;
472 auto& fs
= filesystems
.at(fscid
);
473 auto& info
= fs
->mds_map
.mds_info
.at(who
);
475 fs
->mds_map
.epoch
= epoch
;
480 * Given that gid exists in a filesystem or as a standby, return
481 * a reference to its info.
483 const mds_info_t
& get_info_gid(mds_gid_t gid
) const
485 auto fscid
= mds_roles
.at(gid
);
486 if (fscid
== FS_CLUSTER_ID_NONE
) {
487 return standby_daemons
.at(gid
);
489 return filesystems
.at(fscid
)->mds_map
.mds_info
.at(gid
);
493 std::string_view
fs_name_from_gid(mds_gid_t gid
) const
495 auto fscid
= mds_roles
.at(gid
);
496 if (fscid
== FS_CLUSTER_ID_NONE
or !filesystem_exists(fscid
)) {
497 return std::string_view();
499 return get_filesystem(fscid
)->mds_map
.get_fs_name();
503 bool is_standby_replay(mds_gid_t who
) const
505 return filesystems
.at(mds_roles
.at(who
))->is_standby_replay(who
);
508 mds_gid_t
get_standby_replay(mds_gid_t who
) const
510 return filesystems
.at(mds_roles
.at(who
))->get_standby_replay(who
);
513 Filesystem::const_ref
get_legacy_filesystem()
515 if (legacy_client_fscid
== FS_CLUSTER_ID_NONE
) {
518 return filesystems
.at(legacy_client_fscid
);
523 * A daemon has informed us of its offload targets
525 void update_export_targets(mds_gid_t who
, const std::set
<mds_rank_t
> &targets
)
527 auto fscid
= mds_roles
.at(who
);
528 modify_filesystem(fscid
, [who
, &targets
](auto&& fs
) {
529 fs
->mds_map
.mds_info
.at(who
).export_targets
= targets
;
533 epoch_t
get_epoch() const { return epoch
; }
534 void inc_epoch() { epoch
++; }
536 version_t
get_struct_version() const { return struct_version
; }
537 bool is_struct_old() const {
538 return struct_version
< STRUCT_VERSION_TRIM_TO
;
541 size_t filesystem_count() const {return filesystems
.size();}
542 bool filesystem_exists(fs_cluster_id_t fscid
) const {return filesystems
.count(fscid
) > 0;}
543 Filesystem::const_ref
get_filesystem(fs_cluster_id_t fscid
) const {return std::const_pointer_cast
<const Filesystem
>(filesystems
.at(fscid
));}
544 Filesystem::ref
get_filesystem(fs_cluster_id_t fscid
) {return filesystems
.at(fscid
);}
545 Filesystem::ref
get_filesystem(mds_gid_t gid
) {
546 return filesystems
.at(mds_roles
.at(gid
));
548 Filesystem::const_ref
get_filesystem(void) const {return std::const_pointer_cast
<const Filesystem
>(filesystems
.begin()->second
);}
549 Filesystem::const_ref
get_filesystem(std::string_view name
) const;
550 Filesystem::const_ref
get_filesystem(mds_gid_t gid
) const {
551 return filesystems
.at(mds_roles
.at(gid
));
554 std::vector
<Filesystem::const_ref
> get_filesystems(void) const;
556 int parse_filesystem(
557 std::string_view ns_str
,
558 Filesystem::const_ref
*result
562 std::string_view role_str
,
565 const std::vector
<std::string
> &filter
) const;
568 std::string_view role_str
,
570 std::ostream
&ss
) const;
573 * Return true if this pool is in use by any of the filesystems
575 bool pool_in_use(int64_t poolid
) const;
577 const mds_info_t
* find_replacement_for(mds_role_t role
) const;
579 void get_health(std::list
<std::pair
<health_status_t
,std::string
> >& summary
,
580 std::list
<std::pair
<health_status_t
,std::string
> > *detail
) const;
582 void get_health_checks(health_check_map_t
*checks
) const;
584 bool check_health(void);
587 * Assert that the FSMap, Filesystem, MDSMap, mds_info_t relations are
588 * all self-consistent.
590 void sanity(bool pending
=false) const;
592 void encode(ceph::buffer::list
& bl
, uint64_t features
) const;
593 void decode(ceph::buffer::list::const_iterator
& p
);
594 void decode(ceph::buffer::list
& bl
) {
595 auto p
= bl
.cbegin();
598 void sanitize(const std::function
<bool(int64_t pool
)>& pool_exists
);
600 void print(std::ostream
& out
) const;
601 void print_summary(ceph::Formatter
*f
, std::ostream
*out
) const;
602 void print_daemon_summary(std::ostream
& out
) const;
603 void print_fs_summary(std::ostream
& out
) const;
605 void dump(ceph::Formatter
*f
) const;
606 static void generate_test_instances(std::list
<FSMap
*>& ls
);
610 uint64_t next_filesystem_id
= FS_CLUSTER_ID_ANONYMOUS
+ 1;
611 fs_cluster_id_t legacy_client_fscid
= FS_CLUSTER_ID_NONE
;
612 CompatSet default_compat
;
613 bool enable_multiple
= true;
614 bool ever_enabled_multiple
= true; // < the cluster had multiple FS enabled once
616 std::map
<fs_cluster_id_t
, Filesystem::ref
> filesystems
;
618 // Remember which Filesystem an MDS daemon's info is stored in
619 // (or in standby_daemons for FS_CLUSTER_ID_NONE)
620 std::map
<mds_gid_t
, fs_cluster_id_t
> mds_roles
;
622 // For MDS daemons not yet assigned to a Filesystem
623 std::map
<mds_gid_t
, mds_info_t
> standby_daemons
;
624 std::map
<mds_gid_t
, epoch_t
> standby_epochs
;
627 epoch_t struct_version
= 0;
629 WRITE_CLASS_ENCODER_FEATURES(FSMap
)
631 inline std::ostream
& operator<<(std::ostream
& out
, const FSMap
& m
) {
632 m
.print_summary(NULL
, &out
);