1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
4 * Ceph - scalable distributed file system
6 * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
23 #include <string_view>
27 #include "include/types.h"
28 #include "common/Clock.h"
29 #include "mds/MDSMap.h"
31 #include "include/CompatSet.h"
32 #include "include/ceph_features.h"
33 #include "include/common_fwd.h"
34 #include "common/Formatter.h"
35 #include "mds/mdstypes.h"
37 class health_check_map_t
;
40 ClusterInfo() = default;
41 ClusterInfo(std::string_view client_name
, std::string_view cluster_name
,
42 std::string_view fs_name
)
43 : client_name(client_name
),
44 cluster_name(cluster_name
),
48 std::string client_name
;
49 std::string cluster_name
;
52 bool operator==(const ClusterInfo
&cluster_info
) const {
53 return client_name
== cluster_info
.client_name
&&
54 cluster_name
== cluster_info
.cluster_name
&&
55 fs_name
== cluster_info
.fs_name
;
58 void dump(ceph::Formatter
*f
) const;
59 void print(std::ostream
& out
) const;
61 void encode(ceph::buffer::list
&bl
) const;
62 void decode(ceph::buffer::list::const_iterator
&iter
);
65 inline std::ostream
& operator<<(std::ostream
& out
, const ClusterInfo
&cluster_info
) {
66 out
<< "{client_name=" << cluster_info
.client_name
<< ", cluster_name="
67 << cluster_info
.cluster_name
<< ", fs_name=" << cluster_info
.fs_name
<< "}";
73 Peer(std::string_view uuid
)
76 Peer(std::string_view uuid
,
77 const ClusterInfo
&remote
)
85 bool operator==(const Peer
&rhs
) const {
86 return uuid
== rhs
.uuid
;
89 bool operator<(const Peer
&rhs
) const {
90 return uuid
< rhs
.uuid
;
93 void dump(ceph::Formatter
*f
) const;
94 void print(std::ostream
& out
) const;
96 void encode(ceph::buffer::list
&bl
) const;
97 void decode(ceph::buffer::list::const_iterator
&iter
);
100 typedef std::set
<Peer
> Peers
;
101 inline std::ostream
& operator<<(std::ostream
& out
, const Peer
&peer
) {
102 out
<< "{uuid=" << peer
.uuid
<< ", remote_cluster=" << peer
.remote
<< "}";
107 MirrorInfo() = default;
109 bool is_mirrored() const {
112 void enable_mirroring() {
115 void disable_mirroring() {
120 // uuid variant check
121 bool has_peer(std::string_view uuid
) const {
122 return peers
.find(Peer(uuid
)) != peers
.end();
124 // client_name/cluster_name/fs_name variant check
125 bool has_peer(std::string_view client_name
,
126 std::string_view cluster_name
,
127 std::string_view fs_name
) const {
128 ClusterInfo
cluster_info(client_name
, cluster_name
, fs_name
);
129 for (auto &peer
: peers
) {
130 if (peer
.remote
== cluster_info
) {
136 bool has_peers() const {
137 return !peers
.empty();
140 void peer_add(std::string_view uuid
,
141 std::string_view client_name
,
142 std::string_view cluster_name
,
143 std::string_view fs_name
) {
144 peers
.emplace(Peer(uuid
, ClusterInfo(client_name
, cluster_name
, fs_name
)));
146 void peer_remove(std::string_view uuid
) {
150 bool mirrored
= false;
153 void dump(ceph::Formatter
*f
) const;
154 void print(std::ostream
& out
) const;
156 void encode(ceph::buffer::list
&bl
) const;
157 void decode(ceph::buffer::list::const_iterator
&iter
);
160 inline std::ostream
& operator<<(std::ostream
& out
, const MirrorInfo
&mirror_info
) {
161 out
<< "{peers=" << mirror_info
.peers
<< "}";
165 WRITE_CLASS_ENCODER(ClusterInfo
)
166 WRITE_CLASS_ENCODER(Peer
)
167 WRITE_CLASS_ENCODER(MirrorInfo
)
170 * The MDSMap and any additional fields describing a particular
171 * filesystem (a unique fs_cluster_id_t).
176 using ref
= std::shared_ptr
<Filesystem
>;
177 using const_ref
= std::shared_ptr
<Filesystem
const>;
179 template<typename
... Args
>
180 static ref
create(Args
&&... args
)
182 return std::make_shared
<Filesystem
>(std::forward
<Args
>(args
)...);
185 void encode(ceph::buffer::list
& bl
, uint64_t features
) const;
186 void decode(ceph::buffer::list::const_iterator
& p
);
188 void dump(ceph::Formatter
*f
) const;
189 void print(std::ostream
& out
) const;
191 bool is_upgradeable() const {
192 return (mds_map
.allows_standby_replay() && mds_map
.get_num_in_mds() == 0)
193 || (!mds_map
.allows_standby_replay() && mds_map
.get_num_in_mds() <= 1);
197 * Return true if a daemon is already assigned as
198 * STANDBY_REPLAY for the gid `who`
200 bool has_standby_replay(mds_gid_t who
) const
202 return get_standby_replay(who
) != MDS_GID_NONE
;
204 mds_gid_t
get_standby_replay(mds_gid_t who
) const;
205 bool is_standby_replay(mds_gid_t who
) const
207 auto p
= mds_map
.mds_info
.find(who
);
208 if (p
!= mds_map
.mds_info
.end() &&
209 p
->second
.state
== MDSMap::STATE_STANDBY_REPLAY
) {
215 fs_cluster_id_t fscid
= FS_CLUSTER_ID_NONE
;
217 MirrorInfo mirror_info
;
219 WRITE_CLASS_ENCODER_FEATURES(Filesystem
)
223 friend class MDSMonitor
;
224 friend class PaxosFSMap
;
225 using mds_info_t
= MDSMap::mds_info_t
;
227 static const version_t STRUCT_VERSION
= 7;
228 static const version_t STRUCT_VERSION_TRIM_TO
= 7;
230 FSMap() : default_compat(MDSMap::get_compat_set_default()) {}
232 FSMap(const FSMap
&rhs
)
235 next_filesystem_id(rhs
.next_filesystem_id
),
236 legacy_client_fscid(rhs
.legacy_client_fscid
),
237 default_compat(rhs
.default_compat
),
238 enable_multiple(rhs
.enable_multiple
),
239 ever_enabled_multiple(rhs
.ever_enabled_multiple
),
240 mds_roles(rhs
.mds_roles
),
241 standby_daemons(rhs
.standby_daemons
),
242 standby_epochs(rhs
.standby_epochs
),
243 struct_version(rhs
.struct_version
)
246 for (const auto &i
: rhs
.filesystems
) {
247 const auto &fs
= i
.second
;
248 filesystems
[fs
->fscid
] = std::make_shared
<Filesystem
>(*fs
);
252 FSMap
&operator=(const FSMap
&rhs
);
254 const CompatSet
&get_default_compat() const {return default_compat
;}
256 void filter(const std::vector
<std::string
>& allowed
)
258 if (allowed
.empty()) {
262 for (auto &f
: filesystems
) {
263 std::string_view fs_name
= f
.second
->mds_map
.get_fs_name();
264 if (std::find(allowed
.begin(), allowed
.end(), fs_name
) == allowed
.end()) {
265 filesystems
.erase(f
.first
);
269 for (auto r
: mds_roles
) {
270 std::string_view fs_name
= fs_name_from_gid(r
.first
);
271 if (std::find(allowed
.begin(), allowed
.end(), fs_name
) == allowed
.end()) {
272 mds_roles
.erase(r
.first
);
277 void set_enable_multiple(const bool v
)
281 ever_enabled_multiple
= true;
285 bool get_enable_multiple() const
287 return enable_multiple
;
290 void set_legacy_client_fscid(fs_cluster_id_t fscid
)
292 ceph_assert(fscid
== FS_CLUSTER_ID_NONE
|| filesystems
.count(fscid
));
293 legacy_client_fscid
= fscid
;
296 fs_cluster_id_t
get_legacy_client_fscid() const
298 return legacy_client_fscid
;
301 size_t get_num_standby() const {
302 return standby_daemons
.size();
305 bool is_any_degraded() const;
308 * Get state of all daemons (for all filesystems, including all standbys)
310 std::map
<mds_gid_t
, mds_info_t
> get_mds_info() const;
312 const mds_info_t
* get_available_standby(const Filesystem
& fs
) const;
315 * Resolve daemon name to GID
317 mds_gid_t
find_mds_gid_by_name(std::string_view s
) const;
320 * Resolve daemon name to status
322 const mds_info_t
* find_by_name(std::string_view name
) const;
325 * Does a daemon exist with this GID?
327 bool gid_exists(mds_gid_t gid
,
328 const std::vector
<std::string
>& in
= {}) const
331 std::string_view m
= fs_name_from_gid(gid
);
332 return in
.empty() || std::find(in
.begin(), in
.end(), m
) != in
.end();
333 } catch (const std::out_of_range
&) {
339 * Does a daemon with this GID exist, *and* have an MDS rank assigned?
341 bool gid_has_rank(mds_gid_t gid
) const
343 return gid_exists(gid
) && mds_roles
.at(gid
) != FS_CLUSTER_ID_NONE
;
347 * Which filesystem owns this GID?
349 fs_cluster_id_t
fscid_from_gid(mds_gid_t gid
) const {
350 if (!gid_exists(gid
)) {
351 return FS_CLUSTER_ID_NONE
;
353 return mds_roles
.at(gid
);
357 * Insert a new MDS daemon, as a standby
359 void insert(const MDSMap::mds_info_t
&new_info
);
362 * Assign an MDS cluster standby replay rank to a standby daemon
364 void assign_standby_replay(
365 const mds_gid_t standby_gid
,
366 const fs_cluster_id_t leader_ns
,
367 const mds_rank_t leader_rank
);
370 * Assign an MDS cluster rank to a standby daemon
373 mds_gid_t standby_gid
,
374 Filesystem
& filesystem
,
375 mds_rank_t assigned_rank
);
378 * A daemon reports that it is STATE_STOPPED: remove it,
379 * and the rank it held.
381 * @returns a list of any additional GIDs that were removed from the map
382 * as a side effect (like standby replays)
384 std::vector
<mds_gid_t
> stop(mds_gid_t who
);
387 * The rank held by 'who', if any, is to be relinquished, and
388 * the state for the daemon GID is to be forgotten.
390 void erase(mds_gid_t who
, epoch_t blocklist_epoch
);
393 * Update to indicate that the rank held by 'who' is damaged
395 void damaged(mds_gid_t who
, epoch_t blocklist_epoch
);
398 * Update to indicate that the rank `rank` is to be removed
399 * from the damaged list of the filesystem `fscid`
401 bool undamaged(const fs_cluster_id_t fscid
, const mds_rank_t rank
);
404 * Initialize a Filesystem and assign a fscid. Update legacy_client_fscid
405 * to point to the new filesystem if it's the only one.
407 * Caller must already have validated all arguments vs. the existing
408 * FSMap and OSDMap contents.
410 Filesystem::ref
create_filesystem(
411 std::string_view name
, int64_t metadata_pool
,
412 int64_t data_pool
, uint64_t features
,
413 fs_cluster_id_t fscid
, bool recover
);
416 * Remove the filesystem (it must exist). Caller should already
417 * have failed out any MDSs that were assigned to the filesystem.
419 void erase_filesystem(fs_cluster_id_t fscid
);
422 * Reset all the state information (not configuration information)
423 * in a particular filesystem. Caller must have verified that
424 * the filesystem already exists.
426 void reset_filesystem(fs_cluster_id_t fscid
);
429 * Mutator helper for Filesystem objects: expose a non-const
430 * Filesystem pointer to `fn` and update epochs appropriately.
433 void modify_filesystem(fs_cluster_id_t fscid
, T
&& fn
)
435 auto& fs
= filesystems
.at(fscid
);
437 fs
->mds_map
.epoch
= epoch
;
441 * Apply a mutation to the mds_info_t structure for a particular
442 * daemon (identified by GID), and make appropriate updates to epochs.
445 void modify_daemon(mds_gid_t who
, T
&& fn
)
447 const auto& fscid
= mds_roles
.at(who
);
448 if (fscid
== FS_CLUSTER_ID_NONE
) {
449 auto& info
= standby_daemons
.at(who
);
451 ceph_assert(info
.state
== MDSMap::STATE_STANDBY
);
452 standby_epochs
[who
] = epoch
;
454 auto& fs
= filesystems
.at(fscid
);
455 auto& info
= fs
->mds_map
.mds_info
.at(who
);
457 fs
->mds_map
.epoch
= epoch
;
462 * Given that gid exists in a filesystem or as a standby, return
463 * a reference to its info.
465 const mds_info_t
& get_info_gid(mds_gid_t gid
) const
467 auto fscid
= mds_roles
.at(gid
);
468 if (fscid
== FS_CLUSTER_ID_NONE
) {
469 return standby_daemons
.at(gid
);
471 return filesystems
.at(fscid
)->mds_map
.mds_info
.at(gid
);
475 std::string_view
fs_name_from_gid(mds_gid_t gid
) const
477 auto fscid
= mds_roles
.at(gid
);
478 if (fscid
== FS_CLUSTER_ID_NONE
or !filesystem_exists(fscid
)) {
479 return std::string_view();
481 return get_filesystem(fscid
)->mds_map
.get_fs_name();
485 bool is_standby_replay(mds_gid_t who
) const
487 return filesystems
.at(mds_roles
.at(who
))->is_standby_replay(who
);
490 mds_gid_t
get_standby_replay(mds_gid_t who
) const
492 return filesystems
.at(mds_roles
.at(who
))->get_standby_replay(who
);
495 Filesystem::const_ref
get_legacy_filesystem()
497 if (legacy_client_fscid
== FS_CLUSTER_ID_NONE
) {
500 return filesystems
.at(legacy_client_fscid
);
505 * A daemon has informed us of its offload targets
507 void update_export_targets(mds_gid_t who
, const std::set
<mds_rank_t
> &targets
)
509 auto fscid
= mds_roles
.at(who
);
510 modify_filesystem(fscid
, [who
, &targets
](auto&& fs
) {
511 fs
->mds_map
.mds_info
.at(who
).export_targets
= targets
;
515 epoch_t
get_epoch() const { return epoch
; }
516 void inc_epoch() { epoch
++; }
518 version_t
get_struct_version() const { return struct_version
; }
519 bool is_struct_old() const {
520 return struct_version
< STRUCT_VERSION_TRIM_TO
;
523 size_t filesystem_count() const {return filesystems
.size();}
524 bool filesystem_exists(fs_cluster_id_t fscid
) const {return filesystems
.count(fscid
) > 0;}
525 Filesystem::const_ref
get_filesystem(fs_cluster_id_t fscid
) const {return std::const_pointer_cast
<const Filesystem
>(filesystems
.at(fscid
));}
526 Filesystem::ref
get_filesystem(fs_cluster_id_t fscid
) {return filesystems
.at(fscid
);}
527 Filesystem::ref
get_filesystem(mds_gid_t gid
) {
528 return filesystems
.at(mds_roles
.at(gid
));
530 Filesystem::const_ref
get_filesystem(void) const {return std::const_pointer_cast
<const Filesystem
>(filesystems
.begin()->second
);}
531 Filesystem::const_ref
get_filesystem(std::string_view name
) const;
532 Filesystem::const_ref
get_filesystem(mds_gid_t gid
) const {
533 return filesystems
.at(mds_roles
.at(gid
));
536 std::vector
<Filesystem::const_ref
> get_filesystems(void) const;
538 int parse_filesystem(
539 std::string_view ns_str
,
540 Filesystem::const_ref
*result
544 std::string_view role_str
,
547 const std::vector
<std::string
> &filter
) const;
550 std::string_view role_str
,
552 std::ostream
&ss
) const;
555 * Return true if this pool is in use by any of the filesystems
557 bool pool_in_use(int64_t poolid
) const;
559 const mds_info_t
* find_replacement_for(mds_role_t role
) const;
561 void get_health(std::list
<std::pair
<health_status_t
,std::string
> >& summary
,
562 std::list
<std::pair
<health_status_t
,std::string
> > *detail
) const;
564 void get_health_checks(health_check_map_t
*checks
) const;
566 bool check_health(void);
569 * Assert that the FSMap, Filesystem, MDSMap, mds_info_t relations are
570 * all self-consistent.
572 void sanity(bool pending
=false) const;
574 void encode(ceph::buffer::list
& bl
, uint64_t features
) const;
575 void decode(ceph::buffer::list::const_iterator
& p
);
576 void decode(ceph::buffer::list
& bl
) {
577 auto p
= bl
.cbegin();
580 void sanitize(const std::function
<bool(int64_t pool
)>& pool_exists
);
582 void print(std::ostream
& out
) const;
583 void print_summary(ceph::Formatter
*f
, std::ostream
*out
) const;
584 void print_daemon_summary(std::ostream
& out
) const;
585 void print_fs_summary(std::ostream
& out
) const;
587 void dump(ceph::Formatter
*f
) const;
588 static void generate_test_instances(std::list
<FSMap
*>& ls
);
592 uint64_t next_filesystem_id
= FS_CLUSTER_ID_ANONYMOUS
+ 1;
593 fs_cluster_id_t legacy_client_fscid
= FS_CLUSTER_ID_NONE
;
594 CompatSet default_compat
;
595 bool enable_multiple
= true;
596 bool ever_enabled_multiple
= true; // < the cluster had multiple FS enabled once
598 std::map
<fs_cluster_id_t
, Filesystem::ref
> filesystems
;
600 // Remember which Filesystem an MDS daemon's info is stored in
601 // (or in standby_daemons for FS_CLUSTER_ID_NONE)
602 std::map
<mds_gid_t
, fs_cluster_id_t
> mds_roles
;
604 // For MDS daemons not yet assigned to a Filesystem
605 std::map
<mds_gid_t
, mds_info_t
> standby_daemons
;
606 std::map
<mds_gid_t
, epoch_t
> standby_epochs
;
609 epoch_t struct_version
= 0;
611 WRITE_CLASS_ENCODER_FEATURES(FSMap
)
613 inline std::ostream
& operator<<(std::ostream
& out
, const FSMap
& m
) {
614 m
.print_summary(NULL
, &out
);