]> git.proxmox.com Git - ceph.git/blob - ceph/src/mds/FSMap.h
update sources to v12.1.1
[ceph.git] / ceph / src / mds / FSMap.h
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3 /*
4 * Ceph - scalable distributed file system
5 *
6 * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
7 *
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
12 *
13 */
14
15
16 #ifndef CEPH_FSMAP_H
17 #define CEPH_FSMAP_H
18
19 #include <errno.h>
20
21 #include "include/types.h"
22 #include "common/Clock.h"
23 #include "msg/Message.h"
24 #include "mds/MDSMap.h"
25
26 #include <set>
27 #include <map>
28 #include <string>
29
30 #include "common/config.h"
31
32 #include "include/CompatSet.h"
33 #include "include/ceph_features.h"
34 #include "common/Formatter.h"
35 #include "mds/mdstypes.h"
36
37 class CephContext;
38 class health_check_map_t;
39
40 #define MDS_FEATURE_INCOMPAT_BASE CompatSet::Feature(1, "base v0.20")
41 #define MDS_FEATURE_INCOMPAT_CLIENTRANGES CompatSet::Feature(2, "client writeable ranges")
42 #define MDS_FEATURE_INCOMPAT_FILELAYOUT CompatSet::Feature(3, "default file layouts on dirs")
43 #define MDS_FEATURE_INCOMPAT_DIRINODE CompatSet::Feature(4, "dir inode in separate object")
44 #define MDS_FEATURE_INCOMPAT_ENCODING CompatSet::Feature(5, "mds uses versioned encoding")
45 #define MDS_FEATURE_INCOMPAT_OMAPDIRFRAG CompatSet::Feature(6, "dirfrag is stored in omap")
46 #define MDS_FEATURE_INCOMPAT_INLINE CompatSet::Feature(7, "mds uses inline data")
47 #define MDS_FEATURE_INCOMPAT_NOANCHOR CompatSet::Feature(8, "no anchor table")
48
49 #define MDS_FS_NAME_DEFAULT "cephfs"
50
51 /**
52 * The MDSMap and any additional fields describing a particular
53 * filesystem (a unique fs_cluster_id_t).
54 */
55 class Filesystem
56 {
57 public:
58 fs_cluster_id_t fscid;
59 MDSMap mds_map;
60
61 void encode(bufferlist& bl, uint64_t features) const;
62 void decode(bufferlist::iterator& p);
63
64 Filesystem()
65 :
66 fscid(FS_CLUSTER_ID_NONE)
67 {
68 }
69
70 void dump(Formatter *f) const;
71 void print(std::ostream& out) const;
72
73 /**
74 * Return true if a daemon is already assigned as
75 * STANDBY_REPLAY for the gid `who`
76 */
77 bool has_standby_replay(mds_gid_t who) const
78 {
79 for (const auto &i : mds_map.mds_info) {
80 const auto &info = i.second;
81 if (info.state == MDSMap::STATE_STANDBY_REPLAY
82 && info.rank == mds_map.mds_info.at(who).rank) {
83 return true;
84 }
85 }
86
87 return false;
88 }
89 };
90 WRITE_CLASS_ENCODER_FEATURES(Filesystem)
91
92 class FSMap {
93 protected:
94 epoch_t epoch;
95 uint64_t next_filesystem_id;
96 fs_cluster_id_t legacy_client_fscid;
97 CompatSet compat;
98 bool enable_multiple;
99 bool ever_enabled_multiple; // < the cluster had multiple MDSes enabled once
100
101 std::map<fs_cluster_id_t, std::shared_ptr<Filesystem> > filesystems;
102
103 // Remember which Filesystem an MDS daemon's info is stored in
104 // (or in standby_daemons for FS_CLUSTER_ID_NONE)
105 std::map<mds_gid_t, fs_cluster_id_t> mds_roles;
106
107 // For MDS daemons not yet assigned to a Filesystem
108 std::map<mds_gid_t, MDSMap::mds_info_t> standby_daemons;
109 std::map<mds_gid_t, epoch_t> standby_epochs;
110
111 public:
112
113 friend class MDSMonitor;
114
115 FSMap()
116 : epoch(0),
117 next_filesystem_id(FS_CLUSTER_ID_ANONYMOUS + 1),
118 legacy_client_fscid(FS_CLUSTER_ID_NONE),
119 compat(get_mdsmap_compat_set_default()),
120 enable_multiple(false), ever_enabled_multiple(false)
121 { }
122
123 FSMap(const FSMap &rhs)
124 :
125 epoch(rhs.epoch),
126 next_filesystem_id(rhs.next_filesystem_id),
127 legacy_client_fscid(rhs.legacy_client_fscid),
128 compat(rhs.compat),
129 enable_multiple(rhs.enable_multiple),
130 ever_enabled_multiple(rhs.ever_enabled_multiple),
131 mds_roles(rhs.mds_roles),
132 standby_daemons(rhs.standby_daemons),
133 standby_epochs(rhs.standby_epochs)
134 {
135 for (const auto &i : rhs.filesystems) {
136 const auto &fs = i.second;
137 filesystems[fs->fscid] = std::make_shared<Filesystem>(*fs);
138 }
139 }
140
141 FSMap &operator=(const FSMap &rhs)
142 {
143 epoch = rhs.epoch;
144 next_filesystem_id = rhs.next_filesystem_id;
145 legacy_client_fscid = rhs.legacy_client_fscid;
146 compat = rhs.compat;
147 enable_multiple = rhs.enable_multiple;
148 mds_roles = rhs.mds_roles;
149 standby_daemons = rhs.standby_daemons;
150 standby_epochs = rhs.standby_epochs;
151
152 for (const auto &i : rhs.filesystems) {
153 const auto &fs = i.second;
154 filesystems[fs->fscid] = std::make_shared<Filesystem>(*fs);
155 }
156
157 return *this;
158 }
159
160 const CompatSet &get_compat() const {return compat;}
161
162 void set_enable_multiple(const bool v)
163 {
164 enable_multiple = v;
165 if (true == v) {
166 ever_enabled_multiple = true;
167 }
168 }
169
170 bool get_enable_multiple() const
171 {
172 return enable_multiple;
173 }
174
175 void set_legacy_client_fscid(fs_cluster_id_t fscid)
176 {
177 assert(fscid == FS_CLUSTER_ID_NONE || filesystems.count(fscid));
178 legacy_client_fscid = fscid;
179 }
180
181 fs_cluster_id_t get_legacy_client_fscid() const
182 {
183 return legacy_client_fscid;
184 }
185
186 /**
187 * Get state of all daemons (for all filesystems, including all standbys)
188 */
189 std::map<mds_gid_t, MDSMap::mds_info_t> get_mds_info() const
190 {
191 std::map<mds_gid_t, MDSMap::mds_info_t> result;
192 for (const auto &i : standby_daemons) {
193 result[i.first] = i.second;
194 }
195
196 for (const auto &i : filesystems) {
197 const auto &fs_info = i.second->mds_map.get_mds_info();
198 for (const auto &j : fs_info) {
199 result[j.first] = j.second;
200 }
201 }
202
203 return result;
204 }
205
206 /**
207 * Resolve daemon name to GID
208 */
209 mds_gid_t find_mds_gid_by_name(const std::string& s) const
210 {
211 const auto info = get_mds_info();
212 for (const auto &p : info) {
213 if (p.second.name == s) {
214 return p.first;
215 }
216 }
217 return MDS_GID_NONE;
218 }
219
220 /**
221 * Resolve daemon name to status
222 */
223 const MDSMap::mds_info_t* find_by_name(const std::string& name) const
224 {
225 std::map<mds_gid_t, MDSMap::mds_info_t> result;
226 for (const auto &i : standby_daemons) {
227 if (i.second.name == name) {
228 return &(i.second);
229 }
230 }
231
232 for (const auto &i : filesystems) {
233 const auto &fs_info = i.second->mds_map.get_mds_info();
234 for (const auto &j : fs_info) {
235 if (j.second.name == name) {
236 return &(j.second);
237 }
238 }
239 }
240
241 return nullptr;
242 }
243
244 /**
245 * Does a daemon exist with this GID?
246 */
247 bool gid_exists(mds_gid_t gid) const
248 {
249 return mds_roles.count(gid) > 0;
250 }
251
252 /**
253 * Does a daemon with this GID exist, *and* have an MDS rank assigned?
254 */
255 bool gid_has_rank(mds_gid_t gid) const
256 {
257 return gid_exists(gid) && mds_roles.at(gid) != FS_CLUSTER_ID_NONE;
258 }
259
260 /**
261 * Insert a new MDS daemon, as a standby
262 */
263 void insert(const MDSMap::mds_info_t &new_info);
264
265 /**
266 * Assign an MDS cluster standby replay rank to a standby daemon
267 */
268 void assign_standby_replay(
269 const mds_gid_t standby_gid,
270 const fs_cluster_id_t leader_ns,
271 const mds_rank_t leader_rank);
272
273 /**
274 * Assign an MDS cluster rank to a standby daemon
275 */
276 void promote(
277 mds_gid_t standby_gid,
278 const std::shared_ptr<Filesystem> &filesystem,
279 mds_rank_t assigned_rank);
280
281 /**
282 * A daemon reports that it is STATE_STOPPED: remove it,
283 * and the rank it held.
284 *
285 * @returns a list of any additional GIDs that were removed from the map
286 * as a side effect (like standby replays)
287 */
288 std::list<mds_gid_t> stop(mds_gid_t who);
289
290 /**
291 * The rank held by 'who', if any, is to be relinquished, and
292 * the state for the daemon GID is to be forgotten.
293 */
294 void erase(mds_gid_t who, epoch_t blacklist_epoch);
295
296 /**
297 * Update to indicate that the rank held by 'who' is damaged
298 */
299 void damaged(mds_gid_t who, epoch_t blacklist_epoch);
300
301 /**
302 * Update to indicate that the rank `rank` is to be removed
303 * from the damaged list of the filesystem `fscid`
304 */
305 bool undamaged(const fs_cluster_id_t fscid, const mds_rank_t rank);
306
307 /**
308 * Initialize a Filesystem and assign a fscid. Update legacy_client_fscid
309 * to point to the new filesystem if it's the only one.
310 *
311 * Caller must already have validated all arguments vs. the existing
312 * FSMap and OSDMap contents.
313 */
314 void create_filesystem(const std::string &name,
315 int64_t metadata_pool, int64_t data_pool,
316 uint64_t features);
317
318 /**
319 * Remove the filesystem (it must exist). Caller should already
320 * have failed out any MDSs that were assigned to the filesystem.
321 */
322 void erase_filesystem(fs_cluster_id_t fscid)
323 {
324 filesystems.erase(fscid);
325 }
326
327 /**
328 * Reset all the state information (not configuration information)
329 * in a particular filesystem. Caller must have verified that
330 * the filesystem already exists.
331 */
332 void reset_filesystem(fs_cluster_id_t fscid);
333
334 /**
335 * Mutator helper for Filesystem objects: expose a non-const
336 * Filesystem pointer to `fn` and update epochs appropriately.
337 */
338 void modify_filesystem(
339 const fs_cluster_id_t fscid,
340 std::function<void(std::shared_ptr<Filesystem> )> fn)
341 {
342 auto fs = filesystems.at(fscid);
343 fn(fs);
344 fs->mds_map.epoch = epoch;
345 }
346
347 /**
348 * Apply a mutation to the mds_info_t structure for a particular
349 * daemon (identified by GID), and make appropriate updates to epochs.
350 */
351 void modify_daemon(
352 mds_gid_t who,
353 std::function<void(MDSMap::mds_info_t *info)> fn)
354 {
355 if (mds_roles.at(who) == FS_CLUSTER_ID_NONE) {
356 auto &info = standby_daemons.at(who);
357 fn(&info);
358 assert(info.state == MDSMap::STATE_STANDBY);
359 standby_epochs[who] = epoch;
360 } else {
361 const auto &fs = filesystems[mds_roles.at(who)];
362 auto &info = fs->mds_map.mds_info.at(who);
363 fn(&info);
364
365 fs->mds_map.epoch = epoch;
366 }
367 }
368
369 /**
370 * Given that gid exists in a filesystem or as a standby, return
371 * a reference to its info.
372 */
373 const MDSMap::mds_info_t& get_info_gid(mds_gid_t gid) const
374 {
375 auto fscid = mds_roles.at(gid);
376 if (fscid == FS_CLUSTER_ID_NONE) {
377 return standby_daemons.at(gid);
378 } else {
379 return filesystems.at(fscid)->mds_map.mds_info.at(gid);
380 }
381 }
382
383 /**
384 * A daemon has told us it's compat, and it's too new
385 * for the one we had previously. Impose the new one
386 * on all filesystems.
387 */
388 void update_compat(const CompatSet &c)
389 {
390 // We could do something more complicated here to enable
391 // different filesystems to be served by different MDS versions,
392 // but this is a lot simpler because it doesn't require us to
393 // track the compat versions for standby daemons.
394 compat = c;
395 for (const auto &i : filesystems) {
396 MDSMap &mds_map = i.second->mds_map;
397 mds_map.compat = c;
398 mds_map.epoch = epoch;
399 }
400 }
401
402 std::shared_ptr<const Filesystem> get_legacy_filesystem()
403 {
404 if (legacy_client_fscid == FS_CLUSTER_ID_NONE) {
405 return nullptr;
406 } else {
407 return filesystems.at(legacy_client_fscid);
408 }
409 }
410
411 /**
412 * A daemon has informed us of its offload targets
413 */
414 void update_export_targets(mds_gid_t who, const std::set<mds_rank_t> targets)
415 {
416 auto fscid = mds_roles.at(who);
417 modify_filesystem(fscid, [who, &targets](std::shared_ptr<Filesystem> fs) {
418 fs->mds_map.mds_info.at(who).export_targets = targets;
419 });
420 }
421
422 epoch_t get_epoch() const { return epoch; }
423 void inc_epoch() { epoch++; }
424
425 size_t filesystem_count() const {return filesystems.size();}
426 bool filesystem_exists(fs_cluster_id_t fscid) const {return filesystems.count(fscid) > 0;}
427 std::shared_ptr<const Filesystem> get_filesystem(fs_cluster_id_t fscid) const {return std::const_pointer_cast<const Filesystem>(filesystems.at(fscid));}
428 std::shared_ptr<const Filesystem> get_filesystem(void) const {return std::const_pointer_cast<const Filesystem>(filesystems.begin()->second);}
429 std::shared_ptr<const Filesystem> get_filesystem(const std::string &name) const
430 {
431 for (const auto &i : filesystems) {
432 if (i.second->mds_map.fs_name == name) {
433 return std::const_pointer_cast<const Filesystem>(i.second);
434 }
435 }
436 return nullptr;
437 }
438 std::list<std::shared_ptr<const Filesystem> > get_filesystems(void) const
439 {
440 std::list<std::shared_ptr<const Filesystem> > ret;
441 for (const auto &i : filesystems) {
442 ret.push_back(std::const_pointer_cast<const Filesystem>(i.second));
443 }
444 return ret;
445 }
446
447 int parse_filesystem(
448 std::string const &ns_str,
449 std::shared_ptr<const Filesystem> *result
450 ) const;
451
452 int parse_role(
453 const std::string &role_str,
454 mds_role_t *role,
455 std::ostream &ss) const;
456
457 /**
458 * Return true if this pool is in use by any of the filesystems
459 */
460 bool pool_in_use(int64_t poolid) const {
461 for (auto const &i : filesystems) {
462 if (i.second->mds_map.is_data_pool(poolid)
463 || i.second->mds_map.metadata_pool == poolid) {
464 return true;
465 }
466 }
467 return false;
468 }
469
470 mds_gid_t find_standby_for(mds_role_t mds, const std::string& name) const;
471
472 mds_gid_t find_unused_for(mds_role_t mds, bool force_standby_active) const;
473
474 mds_gid_t find_replacement_for(mds_role_t mds, const std::string& name,
475 bool force_standby_active) const;
476
477 void get_health(list<pair<health_status_t,std::string> >& summary,
478 list<pair<health_status_t,std::string> > *detail) const;
479
480 void get_health_checks(health_check_map_t *checks) const;
481
482 bool check_health(void);
483
484 /**
485 * Assert that the FSMap, Filesystem, MDSMap, mds_info_t relations are
486 * all self-consistent.
487 */
488 void sanity() const;
489
490 void encode(bufferlist& bl, uint64_t features) const;
491 void decode(bufferlist::iterator& p);
492 void decode(bufferlist& bl) {
493 bufferlist::iterator p = bl.begin();
494 decode(p);
495 }
496
497 void print(ostream& out) const;
498 void print_summary(Formatter *f, ostream *out) const;
499
500 void dump(Formatter *f) const;
501 static void generate_test_instances(list<FSMap*>& ls);
502 };
503 WRITE_CLASS_ENCODER_FEATURES(FSMap)
504
505 inline ostream& operator<<(ostream& out, const FSMap& m) {
506 m.print_summary(NULL, &out);
507 return out;
508 }
509
510 #endif