]> git.proxmox.com Git - ceph.git/blob - ceph/src/mds/FSMap.h
3d389c48885b5751a8c99b8d79799c0c79ccf5a1
[ceph.git] / ceph / src / mds / FSMap.h
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3 /*
4 * Ceph - scalable distributed file system
5 *
6 * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
7 *
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
12 *
13 */
14
15
16 #ifndef CEPH_FSMAP_H
17 #define CEPH_FSMAP_H
18
19 #include <errno.h>
20
21 #include "include/types.h"
22 #include "common/Clock.h"
23 #include "msg/Message.h"
24 #include "mds/MDSMap.h"
25
26 #include <set>
27 #include <map>
28 #include <string>
29
30 #include "common/config.h"
31
32 #include "include/CompatSet.h"
33 #include "include/ceph_features.h"
34 #include "common/Formatter.h"
35 #include "mds/mdstypes.h"
36
37 class CephContext;
38
39 #define MDS_FEATURE_INCOMPAT_BASE CompatSet::Feature(1, "base v0.20")
40 #define MDS_FEATURE_INCOMPAT_CLIENTRANGES CompatSet::Feature(2, "client writeable ranges")
41 #define MDS_FEATURE_INCOMPAT_FILELAYOUT CompatSet::Feature(3, "default file layouts on dirs")
42 #define MDS_FEATURE_INCOMPAT_DIRINODE CompatSet::Feature(4, "dir inode in separate object")
43 #define MDS_FEATURE_INCOMPAT_ENCODING CompatSet::Feature(5, "mds uses versioned encoding")
44 #define MDS_FEATURE_INCOMPAT_OMAPDIRFRAG CompatSet::Feature(6, "dirfrag is stored in omap")
45 #define MDS_FEATURE_INCOMPAT_INLINE CompatSet::Feature(7, "mds uses inline data")
46 #define MDS_FEATURE_INCOMPAT_NOANCHOR CompatSet::Feature(8, "no anchor table")
47
48 #define MDS_FS_NAME_DEFAULT "cephfs"
49
50 /**
51 * The MDSMap and any additional fields describing a particular
52 * filesystem (a unique fs_cluster_id_t).
53 */
54 class Filesystem
55 {
56 public:
57 fs_cluster_id_t fscid;
58 MDSMap mds_map;
59
60 void encode(bufferlist& bl, uint64_t features) const;
61 void decode(bufferlist::iterator& p);
62
63 Filesystem()
64 :
65 fscid(FS_CLUSTER_ID_NONE)
66 {
67 }
68
69 void dump(Formatter *f) const;
70 void print(std::ostream& out) const;
71
72 /**
73 * Return true if a daemon is already assigned as
74 * STANDBY_REPLAY for the gid `who`
75 */
76 bool has_standby_replay(mds_gid_t who) const
77 {
78 for (const auto &i : mds_map.mds_info) {
79 const auto &info = i.second;
80 if (info.state == MDSMap::STATE_STANDBY_REPLAY
81 && info.rank == mds_map.mds_info.at(who).rank) {
82 return true;
83 }
84 }
85
86 return false;
87 }
88 };
89 WRITE_CLASS_ENCODER_FEATURES(Filesystem)
90
91 class FSMap {
92 protected:
93 epoch_t epoch;
94 uint64_t next_filesystem_id;
95 fs_cluster_id_t legacy_client_fscid;
96 CompatSet compat;
97 bool enable_multiple;
98 bool ever_enabled_multiple; // < the cluster had multiple MDSes enabled once
99
100 std::map<fs_cluster_id_t, std::shared_ptr<Filesystem> > filesystems;
101
102 // Remember which Filesystem an MDS daemon's info is stored in
103 // (or in standby_daemons for FS_CLUSTER_ID_NONE)
104 std::map<mds_gid_t, fs_cluster_id_t> mds_roles;
105
106 // For MDS daemons not yet assigned to a Filesystem
107 std::map<mds_gid_t, MDSMap::mds_info_t> standby_daemons;
108 std::map<mds_gid_t, epoch_t> standby_epochs;
109
110 public:
111
112 friend class MDSMonitor;
113
114 FSMap()
115 : epoch(0),
116 next_filesystem_id(FS_CLUSTER_ID_ANONYMOUS + 1),
117 legacy_client_fscid(FS_CLUSTER_ID_NONE),
118 compat(get_mdsmap_compat_set_default()),
119 enable_multiple(false), ever_enabled_multiple(false)
120 { }
121
122 FSMap(const FSMap &rhs)
123 :
124 epoch(rhs.epoch),
125 next_filesystem_id(rhs.next_filesystem_id),
126 legacy_client_fscid(rhs.legacy_client_fscid),
127 compat(rhs.compat),
128 enable_multiple(rhs.enable_multiple),
129 ever_enabled_multiple(rhs.ever_enabled_multiple),
130 mds_roles(rhs.mds_roles),
131 standby_daemons(rhs.standby_daemons),
132 standby_epochs(rhs.standby_epochs)
133 {
134 for (const auto &i : rhs.filesystems) {
135 const auto &fs = i.second;
136 filesystems[fs->fscid] = std::make_shared<Filesystem>(*fs);
137 }
138 }
139
140 FSMap &operator=(const FSMap &rhs)
141 {
142 epoch = rhs.epoch;
143 next_filesystem_id = rhs.next_filesystem_id;
144 legacy_client_fscid = rhs.legacy_client_fscid;
145 compat = rhs.compat;
146 enable_multiple = rhs.enable_multiple;
147 mds_roles = rhs.mds_roles;
148 standby_daemons = rhs.standby_daemons;
149 standby_epochs = rhs.standby_epochs;
150
151 for (const auto &i : rhs.filesystems) {
152 const auto &fs = i.second;
153 filesystems[fs->fscid] = std::make_shared<Filesystem>(*fs);
154 }
155
156 return *this;
157 }
158
159 const CompatSet &get_compat() const {return compat;}
160
161 void set_enable_multiple(const bool v)
162 {
163 enable_multiple = v;
164 if (true == v) {
165 ever_enabled_multiple = true;
166 }
167 }
168
169 bool get_enable_multiple() const
170 {
171 return enable_multiple;
172 }
173
174 void set_legacy_client_fscid(fs_cluster_id_t fscid)
175 {
176 assert(fscid == FS_CLUSTER_ID_NONE || filesystems.count(fscid));
177 legacy_client_fscid = fscid;
178 }
179
180 fs_cluster_id_t get_legacy_client_fscid() const
181 {
182 return legacy_client_fscid;
183 }
184
185 /**
186 * Get state of all daemons (for all filesystems, including all standbys)
187 */
188 std::map<mds_gid_t, MDSMap::mds_info_t> get_mds_info() const
189 {
190 std::map<mds_gid_t, MDSMap::mds_info_t> result;
191 for (const auto &i : standby_daemons) {
192 result[i.first] = i.second;
193 }
194
195 for (const auto &i : filesystems) {
196 const auto &fs_info = i.second->mds_map.get_mds_info();
197 for (const auto &j : fs_info) {
198 result[j.first] = j.second;
199 }
200 }
201
202 return result;
203 }
204
205 /**
206 * Resolve daemon name to GID
207 */
208 mds_gid_t find_mds_gid_by_name(const std::string& s) const
209 {
210 const auto info = get_mds_info();
211 for (const auto &p : info) {
212 if (p.second.name == s) {
213 return p.first;
214 }
215 }
216 return MDS_GID_NONE;
217 }
218
219 /**
220 * Resolve daemon name to status
221 */
222 const MDSMap::mds_info_t* find_by_name(const std::string& name) const
223 {
224 std::map<mds_gid_t, MDSMap::mds_info_t> result;
225 for (const auto &i : standby_daemons) {
226 if (i.second.name == name) {
227 return &(i.second);
228 }
229 }
230
231 for (const auto &i : filesystems) {
232 const auto &fs_info = i.second->mds_map.get_mds_info();
233 for (const auto &j : fs_info) {
234 if (j.second.name == name) {
235 return &(j.second);
236 }
237 }
238 }
239
240 return nullptr;
241 }
242
243 /**
244 * Does a daemon exist with this GID?
245 */
246 bool gid_exists(mds_gid_t gid) const
247 {
248 return mds_roles.count(gid) > 0;
249 }
250
251 /**
252 * Does a daemon with this GID exist, *and* have an MDS rank assigned?
253 */
254 bool gid_has_rank(mds_gid_t gid) const
255 {
256 return gid_exists(gid) && mds_roles.at(gid) != FS_CLUSTER_ID_NONE;
257 }
258
259 /**
260 * Insert a new MDS daemon, as a standby
261 */
262 void insert(const MDSMap::mds_info_t &new_info);
263
264 /**
265 * Assign an MDS cluster standby replay rank to a standby daemon
266 */
267 void assign_standby_replay(
268 const mds_gid_t standby_gid,
269 const fs_cluster_id_t leader_ns,
270 const mds_rank_t leader_rank);
271
272 /**
273 * Assign an MDS cluster rank to a standby daemon
274 */
275 void promote(
276 mds_gid_t standby_gid,
277 const std::shared_ptr<Filesystem> &filesystem,
278 mds_rank_t assigned_rank);
279
280 /**
281 * A daemon reports that it is STATE_STOPPED: remove it,
282 * and the rank it held.
283 *
284 * @returns a list of any additional GIDs that were removed from the map
285 * as a side effect (like standby replays)
286 */
287 std::list<mds_gid_t> stop(mds_gid_t who);
288
289 /**
290 * The rank held by 'who', if any, is to be relinquished, and
291 * the state for the daemon GID is to be forgotten.
292 */
293 void erase(mds_gid_t who, epoch_t blacklist_epoch);
294
295 /**
296 * Update to indicate that the rank held by 'who' is damaged
297 */
298 void damaged(mds_gid_t who, epoch_t blacklist_epoch);
299
300 /**
301 * Update to indicate that the rank `rank` is to be removed
302 * from the damaged list of the filesystem `fscid`
303 */
304 bool undamaged(const fs_cluster_id_t fscid, const mds_rank_t rank);
305
306 /**
307 * Initialize a Filesystem and assign a fscid. Update legacy_client_fscid
308 * to point to the new filesystem if it's the only one.
309 *
310 * Caller must already have validated all arguments vs. the existing
311 * FSMap and OSDMap contents.
312 */
313 void create_filesystem(const std::string &name,
314 int64_t metadata_pool, int64_t data_pool,
315 uint64_t features);
316
317 /**
318 * Remove the filesystem (it must exist). Caller should already
319 * have failed out any MDSs that were assigned to the filesystem.
320 */
321 void erase_filesystem(fs_cluster_id_t fscid)
322 {
323 filesystems.erase(fscid);
324 }
325
326 /**
327 * Reset all the state information (not configuration information)
328 * in a particular filesystem. Caller must have verified that
329 * the filesystem already exists.
330 */
331 void reset_filesystem(fs_cluster_id_t fscid);
332
333 /**
334 * Mutator helper for Filesystem objects: expose a non-const
335 * Filesystem pointer to `fn` and update epochs appropriately.
336 */
337 void modify_filesystem(
338 const fs_cluster_id_t fscid,
339 std::function<void(std::shared_ptr<Filesystem> )> fn)
340 {
341 auto fs = filesystems.at(fscid);
342 fn(fs);
343 fs->mds_map.epoch = epoch;
344 }
345
346 /**
347 * Apply a mutation to the mds_info_t structure for a particular
348 * daemon (identified by GID), and make appropriate updates to epochs.
349 */
350 void modify_daemon(
351 mds_gid_t who,
352 std::function<void(MDSMap::mds_info_t *info)> fn)
353 {
354 if (mds_roles.at(who) == FS_CLUSTER_ID_NONE) {
355 auto &info = standby_daemons.at(who);
356 fn(&info);
357 assert(info.state == MDSMap::STATE_STANDBY);
358 standby_epochs[who] = epoch;
359 } else {
360 const auto &fs = filesystems[mds_roles.at(who)];
361 auto &info = fs->mds_map.mds_info.at(who);
362 fn(&info);
363
364 fs->mds_map.epoch = epoch;
365 }
366 }
367
368 /**
369 * Given that gid exists in a filesystem or as a standby, return
370 * a reference to its info.
371 */
372 const MDSMap::mds_info_t& get_info_gid(mds_gid_t gid) const
373 {
374 auto fscid = mds_roles.at(gid);
375 if (fscid == FS_CLUSTER_ID_NONE) {
376 return standby_daemons.at(gid);
377 } else {
378 return filesystems.at(fscid)->mds_map.mds_info.at(gid);
379 }
380 }
381
382 /**
383 * A daemon has told us it's compat, and it's too new
384 * for the one we had previously. Impose the new one
385 * on all filesystems.
386 */
387 void update_compat(const CompatSet &c)
388 {
389 // We could do something more complicated here to enable
390 // different filesystems to be served by different MDS versions,
391 // but this is a lot simpler because it doesn't require us to
392 // track the compat versions for standby daemons.
393 compat = c;
394 for (const auto &i : filesystems) {
395 MDSMap &mds_map = i.second->mds_map;
396 mds_map.compat = c;
397 mds_map.epoch = epoch;
398 }
399 }
400
401 std::shared_ptr<const Filesystem> get_legacy_filesystem()
402 {
403 if (legacy_client_fscid == FS_CLUSTER_ID_NONE) {
404 return nullptr;
405 } else {
406 return filesystems.at(legacy_client_fscid);
407 }
408 }
409
410 /**
411 * A daemon has informed us of its offload targets
412 */
413 void update_export_targets(mds_gid_t who, const std::set<mds_rank_t> targets)
414 {
415 auto fscid = mds_roles.at(who);
416 modify_filesystem(fscid, [who, &targets](std::shared_ptr<Filesystem> fs) {
417 fs->mds_map.mds_info.at(who).export_targets = targets;
418 });
419 }
420
421 epoch_t get_epoch() const { return epoch; }
422 void inc_epoch() { epoch++; }
423
424 size_t filesystem_count() const {return filesystems.size();}
425 bool filesystem_exists(fs_cluster_id_t fscid) const {return filesystems.count(fscid) > 0;}
426 std::shared_ptr<const Filesystem> get_filesystem(fs_cluster_id_t fscid) const {return std::const_pointer_cast<const Filesystem>(filesystems.at(fscid));}
427 std::shared_ptr<const Filesystem> get_filesystem(void) const {return std::const_pointer_cast<const Filesystem>(filesystems.begin()->second);}
428 std::shared_ptr<const Filesystem> get_filesystem(const std::string &name) const
429 {
430 for (const auto &i : filesystems) {
431 if (i.second->mds_map.fs_name == name) {
432 return std::const_pointer_cast<const Filesystem>(i.second);
433 }
434 }
435 return nullptr;
436 }
437 std::list<std::shared_ptr<const Filesystem> > get_filesystems(void) const
438 {
439 std::list<std::shared_ptr<const Filesystem> > ret;
440 for (const auto &i : filesystems) {
441 ret.push_back(std::const_pointer_cast<const Filesystem>(i.second));
442 }
443 return ret;
444 }
445
446 int parse_filesystem(
447 std::string const &ns_str,
448 std::shared_ptr<const Filesystem> *result
449 ) const;
450
451 int parse_role(
452 const std::string &role_str,
453 mds_role_t *role,
454 std::ostream &ss) const;
455
456 /**
457 * Return true if this pool is in use by any of the filesystems
458 */
459 bool pool_in_use(int64_t poolid) const {
460 for (auto const &i : filesystems) {
461 if (i.second->mds_map.is_data_pool(poolid)
462 || i.second->mds_map.metadata_pool == poolid) {
463 return true;
464 }
465 }
466 return false;
467 }
468
469 mds_gid_t find_standby_for(mds_role_t mds, const std::string& name) const;
470
471 mds_gid_t find_unused_for(mds_role_t mds, bool force_standby_active) const;
472
473 mds_gid_t find_replacement_for(mds_role_t mds, const std::string& name,
474 bool force_standby_active) const;
475
476 void get_health(list<pair<health_status_t,std::string> >& summary,
477 list<pair<health_status_t,std::string> > *detail) const;
478
479 bool check_health(void);
480
481 /**
482 * Assert that the FSMap, Filesystem, MDSMap, mds_info_t relations are
483 * all self-consistent.
484 */
485 void sanity() const;
486
487 void encode(bufferlist& bl, uint64_t features) const;
488 void decode(bufferlist::iterator& p);
489 void decode(bufferlist& bl) {
490 bufferlist::iterator p = bl.begin();
491 decode(p);
492 }
493
494 void print(ostream& out) const;
495 void print_summary(Formatter *f, ostream *out) const;
496
497 void dump(Formatter *f) const;
498 static void generate_test_instances(list<FSMap*>& ls);
499 };
500 WRITE_CLASS_ENCODER_FEATURES(FSMap)
501
502 inline ostream& operator<<(ostream& out, const FSMap& m) {
503 m.print_summary(NULL, &out);
504 return out;
505 }
506
507 #endif