1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
4 * Ceph - scalable distributed file system
6 * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
21 #include "include/types.h"
22 #include "common/Clock.h"
23 #include "msg/Message.h"
24 #include "mds/MDSMap.h"
30 #include "common/config.h"
32 #include "include/CompatSet.h"
33 #include "include/ceph_features.h"
34 #include "common/Formatter.h"
35 #include "mds/mdstypes.h"
39 #define MDS_FEATURE_INCOMPAT_BASE CompatSet::Feature(1, "base v0.20")
40 #define MDS_FEATURE_INCOMPAT_CLIENTRANGES CompatSet::Feature(2, "client writeable ranges")
41 #define MDS_FEATURE_INCOMPAT_FILELAYOUT CompatSet::Feature(3, "default file layouts on dirs")
42 #define MDS_FEATURE_INCOMPAT_DIRINODE CompatSet::Feature(4, "dir inode in separate object")
43 #define MDS_FEATURE_INCOMPAT_ENCODING CompatSet::Feature(5, "mds uses versioned encoding")
44 #define MDS_FEATURE_INCOMPAT_OMAPDIRFRAG CompatSet::Feature(6, "dirfrag is stored in omap")
45 #define MDS_FEATURE_INCOMPAT_INLINE CompatSet::Feature(7, "mds uses inline data")
46 #define MDS_FEATURE_INCOMPAT_NOANCHOR CompatSet::Feature(8, "no anchor table")
48 #define MDS_FS_NAME_DEFAULT "cephfs"
51 * The MDSMap and any additional fields describing a particular
52 * filesystem (a unique fs_cluster_id_t).
57 fs_cluster_id_t fscid
;
60 void encode(bufferlist
& bl
, uint64_t features
) const;
61 void decode(bufferlist::iterator
& p
);
65 fscid(FS_CLUSTER_ID_NONE
)
69 void dump(Formatter
*f
) const;
70 void print(std::ostream
& out
) const;
73 * Return true if a daemon is already assigned as
74 * STANDBY_REPLAY for the gid `who`
76 bool has_standby_replay(mds_gid_t who
) const
78 for (const auto &i
: mds_map
.mds_info
) {
79 const auto &info
= i
.second
;
80 if (info
.state
== MDSMap::STATE_STANDBY_REPLAY
81 && info
.rank
== mds_map
.mds_info
.at(who
).rank
) {
89 WRITE_CLASS_ENCODER_FEATURES(Filesystem
)
94 uint64_t next_filesystem_id
;
95 fs_cluster_id_t legacy_client_fscid
;
98 bool ever_enabled_multiple
; // < the cluster had multiple MDSes enabled once
100 std::map
<fs_cluster_id_t
, std::shared_ptr
<Filesystem
> > filesystems
;
102 // Remember which Filesystem an MDS daemon's info is stored in
103 // (or in standby_daemons for FS_CLUSTER_ID_NONE)
104 std::map
<mds_gid_t
, fs_cluster_id_t
> mds_roles
;
106 // For MDS daemons not yet assigned to a Filesystem
107 std::map
<mds_gid_t
, MDSMap::mds_info_t
> standby_daemons
;
108 std::map
<mds_gid_t
, epoch_t
> standby_epochs
;
112 friend class MDSMonitor
;
116 next_filesystem_id(FS_CLUSTER_ID_ANONYMOUS
+ 1),
117 legacy_client_fscid(FS_CLUSTER_ID_NONE
),
118 compat(get_mdsmap_compat_set_default()),
119 enable_multiple(false), ever_enabled_multiple(false)
122 FSMap(const FSMap
&rhs
)
125 next_filesystem_id(rhs
.next_filesystem_id
),
126 legacy_client_fscid(rhs
.legacy_client_fscid
),
128 enable_multiple(rhs
.enable_multiple
),
129 ever_enabled_multiple(rhs
.ever_enabled_multiple
),
130 mds_roles(rhs
.mds_roles
),
131 standby_daemons(rhs
.standby_daemons
),
132 standby_epochs(rhs
.standby_epochs
)
134 for (const auto &i
: rhs
.filesystems
) {
135 const auto &fs
= i
.second
;
136 filesystems
[fs
->fscid
] = std::make_shared
<Filesystem
>(*fs
);
140 FSMap
&operator=(const FSMap
&rhs
)
143 next_filesystem_id
= rhs
.next_filesystem_id
;
144 legacy_client_fscid
= rhs
.legacy_client_fscid
;
146 enable_multiple
= rhs
.enable_multiple
;
147 mds_roles
= rhs
.mds_roles
;
148 standby_daemons
= rhs
.standby_daemons
;
149 standby_epochs
= rhs
.standby_epochs
;
151 for (const auto &i
: rhs
.filesystems
) {
152 const auto &fs
= i
.second
;
153 filesystems
[fs
->fscid
] = std::make_shared
<Filesystem
>(*fs
);
159 const CompatSet
&get_compat() const {return compat
;}
161 void set_enable_multiple(const bool v
)
165 ever_enabled_multiple
= true;
169 bool get_enable_multiple() const
171 return enable_multiple
;
174 void set_legacy_client_fscid(fs_cluster_id_t fscid
)
176 assert(fscid
== FS_CLUSTER_ID_NONE
|| filesystems
.count(fscid
));
177 legacy_client_fscid
= fscid
;
180 fs_cluster_id_t
get_legacy_client_fscid() const
182 return legacy_client_fscid
;
186 * Get state of all daemons (for all filesystems, including all standbys)
188 std::map
<mds_gid_t
, MDSMap::mds_info_t
> get_mds_info() const
190 std::map
<mds_gid_t
, MDSMap::mds_info_t
> result
;
191 for (const auto &i
: standby_daemons
) {
192 result
[i
.first
] = i
.second
;
195 for (const auto &i
: filesystems
) {
196 const auto &fs_info
= i
.second
->mds_map
.get_mds_info();
197 for (const auto &j
: fs_info
) {
198 result
[j
.first
] = j
.second
;
206 * Resolve daemon name to GID
208 mds_gid_t
find_mds_gid_by_name(const std::string
& s
) const
210 const auto info
= get_mds_info();
211 for (const auto &p
: info
) {
212 if (p
.second
.name
== s
) {
220 * Resolve daemon name to status
222 const MDSMap::mds_info_t
* find_by_name(const std::string
& name
) const
224 std::map
<mds_gid_t
, MDSMap::mds_info_t
> result
;
225 for (const auto &i
: standby_daemons
) {
226 if (i
.second
.name
== name
) {
231 for (const auto &i
: filesystems
) {
232 const auto &fs_info
= i
.second
->mds_map
.get_mds_info();
233 for (const auto &j
: fs_info
) {
234 if (j
.second
.name
== name
) {
244 * Does a daemon exist with this GID?
246 bool gid_exists(mds_gid_t gid
) const
248 return mds_roles
.count(gid
) > 0;
252 * Does a daemon with this GID exist, *and* have an MDS rank assigned?
254 bool gid_has_rank(mds_gid_t gid
) const
256 return gid_exists(gid
) && mds_roles
.at(gid
) != FS_CLUSTER_ID_NONE
;
260 * Insert a new MDS daemon, as a standby
262 void insert(const MDSMap::mds_info_t
&new_info
);
265 * Assign an MDS cluster standby replay rank to a standby daemon
267 void assign_standby_replay(
268 const mds_gid_t standby_gid
,
269 const fs_cluster_id_t leader_ns
,
270 const mds_rank_t leader_rank
);
273 * Assign an MDS cluster rank to a standby daemon
276 mds_gid_t standby_gid
,
277 const std::shared_ptr
<Filesystem
> &filesystem
,
278 mds_rank_t assigned_rank
);
281 * A daemon reports that it is STATE_STOPPED: remove it,
282 * and the rank it held.
284 * @returns a list of any additional GIDs that were removed from the map
285 * as a side effect (like standby replays)
287 std::list
<mds_gid_t
> stop(mds_gid_t who
);
290 * The rank held by 'who', if any, is to be relinquished, and
291 * the state for the daemon GID is to be forgotten.
293 void erase(mds_gid_t who
, epoch_t blacklist_epoch
);
296 * Update to indicate that the rank held by 'who' is damaged
298 void damaged(mds_gid_t who
, epoch_t blacklist_epoch
);
301 * Update to indicate that the rank `rank` is to be removed
302 * from the damaged list of the filesystem `fscid`
304 bool undamaged(const fs_cluster_id_t fscid
, const mds_rank_t rank
);
307 * Initialize a Filesystem and assign a fscid. Update legacy_client_fscid
308 * to point to the new filesystem if it's the only one.
310 * Caller must already have validated all arguments vs. the existing
311 * FSMap and OSDMap contents.
313 void create_filesystem(const std::string
&name
,
314 int64_t metadata_pool
, int64_t data_pool
,
318 * Remove the filesystem (it must exist). Caller should already
319 * have failed out any MDSs that were assigned to the filesystem.
321 void erase_filesystem(fs_cluster_id_t fscid
)
323 filesystems
.erase(fscid
);
327 * Reset all the state information (not configuration information)
328 * in a particular filesystem. Caller must have verified that
329 * the filesystem already exists.
331 void reset_filesystem(fs_cluster_id_t fscid
);
334 * Mutator helper for Filesystem objects: expose a non-const
335 * Filesystem pointer to `fn` and update epochs appropriately.
337 void modify_filesystem(
338 const fs_cluster_id_t fscid
,
339 std::function
<void(std::shared_ptr
<Filesystem
> )> fn
)
341 auto fs
= filesystems
.at(fscid
);
343 fs
->mds_map
.epoch
= epoch
;
347 * Apply a mutation to the mds_info_t structure for a particular
348 * daemon (identified by GID), and make appropriate updates to epochs.
352 std::function
<void(MDSMap::mds_info_t
*info
)> fn
)
354 if (mds_roles
.at(who
) == FS_CLUSTER_ID_NONE
) {
355 auto &info
= standby_daemons
.at(who
);
357 assert(info
.state
== MDSMap::STATE_STANDBY
);
358 standby_epochs
[who
] = epoch
;
360 const auto &fs
= filesystems
[mds_roles
.at(who
)];
361 auto &info
= fs
->mds_map
.mds_info
.at(who
);
364 fs
->mds_map
.epoch
= epoch
;
369 * Given that gid exists in a filesystem or as a standby, return
370 * a reference to its info.
372 const MDSMap::mds_info_t
& get_info_gid(mds_gid_t gid
) const
374 auto fscid
= mds_roles
.at(gid
);
375 if (fscid
== FS_CLUSTER_ID_NONE
) {
376 return standby_daemons
.at(gid
);
378 return filesystems
.at(fscid
)->mds_map
.mds_info
.at(gid
);
383 * A daemon has told us it's compat, and it's too new
384 * for the one we had previously. Impose the new one
385 * on all filesystems.
387 void update_compat(const CompatSet
&c
)
389 // We could do something more complicated here to enable
390 // different filesystems to be served by different MDS versions,
391 // but this is a lot simpler because it doesn't require us to
392 // track the compat versions for standby daemons.
394 for (const auto &i
: filesystems
) {
395 MDSMap
&mds_map
= i
.second
->mds_map
;
397 mds_map
.epoch
= epoch
;
401 std::shared_ptr
<const Filesystem
> get_legacy_filesystem()
403 if (legacy_client_fscid
== FS_CLUSTER_ID_NONE
) {
406 return filesystems
.at(legacy_client_fscid
);
411 * A daemon has informed us of its offload targets
413 void update_export_targets(mds_gid_t who
, const std::set
<mds_rank_t
> targets
)
415 auto fscid
= mds_roles
.at(who
);
416 modify_filesystem(fscid
, [who
, &targets
](std::shared_ptr
<Filesystem
> fs
) {
417 fs
->mds_map
.mds_info
.at(who
).export_targets
= targets
;
421 epoch_t
get_epoch() const { return epoch
; }
422 void inc_epoch() { epoch
++; }
424 size_t filesystem_count() const {return filesystems
.size();}
425 bool filesystem_exists(fs_cluster_id_t fscid
) const {return filesystems
.count(fscid
) > 0;}
426 std::shared_ptr
<const Filesystem
> get_filesystem(fs_cluster_id_t fscid
) const {return std::const_pointer_cast
<const Filesystem
>(filesystems
.at(fscid
));}
427 std::shared_ptr
<const Filesystem
> get_filesystem(void) const {return std::const_pointer_cast
<const Filesystem
>(filesystems
.begin()->second
);}
428 std::shared_ptr
<const Filesystem
> get_filesystem(const std::string
&name
) const
430 for (const auto &i
: filesystems
) {
431 if (i
.second
->mds_map
.fs_name
== name
) {
432 return std::const_pointer_cast
<const Filesystem
>(i
.second
);
437 std::list
<std::shared_ptr
<const Filesystem
> > get_filesystems(void) const
439 std::list
<std::shared_ptr
<const Filesystem
> > ret
;
440 for (const auto &i
: filesystems
) {
441 ret
.push_back(std::const_pointer_cast
<const Filesystem
>(i
.second
));
446 int parse_filesystem(
447 std::string
const &ns_str
,
448 std::shared_ptr
<const Filesystem
> *result
452 const std::string
&role_str
,
454 std::ostream
&ss
) const;
457 * Return true if this pool is in use by any of the filesystems
459 bool pool_in_use(int64_t poolid
) const {
460 for (auto const &i
: filesystems
) {
461 if (i
.second
->mds_map
.is_data_pool(poolid
)
462 || i
.second
->mds_map
.metadata_pool
== poolid
) {
469 mds_gid_t
find_standby_for(mds_role_t mds
, const std::string
& name
) const;
471 mds_gid_t
find_unused_for(mds_role_t mds
, bool force_standby_active
) const;
473 mds_gid_t
find_replacement_for(mds_role_t mds
, const std::string
& name
,
474 bool force_standby_active
) const;
476 void get_health(list
<pair
<health_status_t
,std::string
> >& summary
,
477 list
<pair
<health_status_t
,std::string
> > *detail
) const;
479 bool check_health(void);
482 * Assert that the FSMap, Filesystem, MDSMap, mds_info_t relations are
483 * all self-consistent.
487 void encode(bufferlist
& bl
, uint64_t features
) const;
488 void decode(bufferlist::iterator
& p
);
489 void decode(bufferlist
& bl
) {
490 bufferlist::iterator p
= bl
.begin();
494 void print(ostream
& out
) const;
495 void print_summary(Formatter
*f
, ostream
*out
) const;
497 void dump(Formatter
*f
) const;
498 static void generate_test_instances(list
<FSMap
*>& ls
);
500 WRITE_CLASS_ENCODER_FEATURES(FSMap
)
502 inline ostream
& operator<<(ostream
& out
, const FSMap
& m
) {
503 m
.print_summary(NULL
, &out
);