]> git.proxmox.com Git - ceph.git/blame - ceph/src/mds/FSMap.h
import ceph 15.2.14
[ceph.git] / ceph / src / mds / FSMap.h
CommitLineData
7c673cae
FG
1// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2// vim: ts=8 sw=2 smarttab
3/*
4 * Ceph - scalable distributed file system
5 *
6 * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
7 *
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
12 *
13 */
14
15
16#ifndef CEPH_FSMAP_H
17#define CEPH_FSMAP_H
18
94b18763 19#include <map>
11fdf7f2 20#include <memory>
94b18763
FG
21#include <set>
22#include <string>
11fdf7f2 23#include <string_view>
94b18763 24
7c673cae
FG
25#include <errno.h>
26
27#include "include/types.h"
28#include "common/Clock.h"
7c673cae
FG
29#include "mds/MDSMap.h"
30
7c673cae
FG
31#include "include/CompatSet.h"
32#include "include/ceph_features.h"
9f95a23c 33#include "include/common_fwd.h"
7c673cae
FG
34#include "common/Formatter.h"
35#include "mds/mdstypes.h"
36
224ce89b 37class health_check_map_t;
7c673cae 38
7c673cae
FG
39/**
40 * The MDSMap and any additional fields describing a particular
41 * filesystem (a unique fs_cluster_id_t).
42 */
43class Filesystem
44{
1adf2230 45public:
11fdf7f2
TL
46 using ref = std::shared_ptr<Filesystem>;
47 using const_ref = std::shared_ptr<Filesystem const>;
48
49 template<typename... Args>
50 static ref create(Args&&... args)
51 {
52 return std::make_shared<Filesystem>(std::forward<Args>(args)...);
53 }
54
7c673cae 55 void encode(bufferlist& bl, uint64_t features) const;
11fdf7f2 56 void decode(bufferlist::const_iterator& p);
7c673cae 57
7c673cae
FG
58 void dump(Formatter *f) const;
59 void print(std::ostream& out) const;
60
61 /**
62 * Return true if a daemon is already assigned as
63 * STANDBY_REPLAY for the gid `who`
64 */
65 bool has_standby_replay(mds_gid_t who) const
11fdf7f2
TL
66 {
67 return get_standby_replay(who) != MDS_GID_NONE;
68 }
9f95a23c 69 mds_gid_t get_standby_replay(mds_gid_t who) const;
11fdf7f2
TL
70 bool is_standby_replay(mds_gid_t who) const
71 {
72 auto p = mds_map.mds_info.find(who);
73 if (p != mds_map.mds_info.end() &&
74 p->second.state == MDSMap::STATE_STANDBY_REPLAY) {
75 return true;
76 }
7c673cae
FG
77 return false;
78 }
1adf2230
AA
79
80 fs_cluster_id_t fscid = FS_CLUSTER_ID_NONE;
81 MDSMap mds_map;
7c673cae
FG
82};
83WRITE_CLASS_ENCODER_FEATURES(Filesystem)
84
85class FSMap {
7c673cae 86public:
7c673cae 87 friend class MDSMonitor;
28e407b8 88 friend class PaxosFSMap;
9f95a23c 89 using mds_info_t = MDSMap::mds_info_t;
7c673cae 90
6d8e3169
FG
91 static const version_t STRUCT_VERSION = 7;
92 static const version_t STRUCT_VERSION_TRIM_TO = 7;
93
1adf2230 94 FSMap() : compat(MDSMap::get_compat_set_default()) {}
7c673cae
FG
95
96 FSMap(const FSMap &rhs)
97 :
98 epoch(rhs.epoch),
99 next_filesystem_id(rhs.next_filesystem_id),
100 legacy_client_fscid(rhs.legacy_client_fscid),
101 compat(rhs.compat),
102 enable_multiple(rhs.enable_multiple),
103 ever_enabled_multiple(rhs.ever_enabled_multiple),
104 mds_roles(rhs.mds_roles),
105 standby_daemons(rhs.standby_daemons),
6d8e3169
FG
106 standby_epochs(rhs.standby_epochs),
107 struct_version(rhs.struct_version)
7c673cae 108 {
b32b8144 109 filesystems.clear();
7c673cae
FG
110 for (const auto &i : rhs.filesystems) {
111 const auto &fs = i.second;
112 filesystems[fs->fscid] = std::make_shared<Filesystem>(*fs);
113 }
114 }
115
9f95a23c 116 FSMap &operator=(const FSMap &rhs);
7c673cae
FG
117
118 const CompatSet &get_compat() const {return compat;}
119
120 void set_enable_multiple(const bool v)
121 {
122 enable_multiple = v;
123 if (true == v) {
124 ever_enabled_multiple = true;
125 }
126 }
127
128 bool get_enable_multiple() const
129 {
130 return enable_multiple;
131 }
132
133 void set_legacy_client_fscid(fs_cluster_id_t fscid)
134 {
11fdf7f2 135 ceph_assert(fscid == FS_CLUSTER_ID_NONE || filesystems.count(fscid));
7c673cae
FG
136 legacy_client_fscid = fscid;
137 }
138
139 fs_cluster_id_t get_legacy_client_fscid() const
140 {
141 return legacy_client_fscid;
142 }
143
11fdf7f2
TL
144 size_t get_num_standby() const {
145 return standby_daemons.size();
146 }
147
9f95a23c 148 bool is_any_degraded() const;
11fdf7f2 149
7c673cae
FG
150 /**
151 * Get state of all daemons (for all filesystems, including all standbys)
152 */
9f95a23c 153 std::map<mds_gid_t, mds_info_t> get_mds_info() const;
7c673cae 154
9f95a23c 155 const mds_info_t* get_available_standby(fs_cluster_id_t fscid) const;
11fdf7f2 156
7c673cae
FG
157 /**
158 * Resolve daemon name to GID
159 */
9f95a23c 160 mds_gid_t find_mds_gid_by_name(std::string_view s) const;
7c673cae
FG
161
162 /**
163 * Resolve daemon name to status
164 */
9f95a23c 165 const mds_info_t* find_by_name(std::string_view name) const;
7c673cae
FG
166
167 /**
168 * Does a daemon exist with this GID?
169 */
170 bool gid_exists(mds_gid_t gid) const
171 {
172 return mds_roles.count(gid) > 0;
173 }
174
175 /**
176 * Does a daemon with this GID exist, *and* have an MDS rank assigned?
177 */
178 bool gid_has_rank(mds_gid_t gid) const
179 {
180 return gid_exists(gid) && mds_roles.at(gid) != FS_CLUSTER_ID_NONE;
181 }
182
9f95a23c
TL
183 fs_cluster_id_t gid_fscid(mds_gid_t gid) const
184 {
185 return mds_roles.at(gid);
186 }
187
7c673cae
FG
188 /**
189 * Insert a new MDS daemon, as a standby
190 */
9f95a23c 191 void insert(const mds_info_t& new_info);
7c673cae
FG
192
193 /**
194 * Assign an MDS cluster standby replay rank to a standby daemon
195 */
196 void assign_standby_replay(
197 const mds_gid_t standby_gid,
198 const fs_cluster_id_t leader_ns,
199 const mds_rank_t leader_rank);
200
201 /**
202 * Assign an MDS cluster rank to a standby daemon
203 */
204 void promote(
205 mds_gid_t standby_gid,
11fdf7f2 206 Filesystem& filesystem,
7c673cae
FG
207 mds_rank_t assigned_rank);
208
209 /**
210 * A daemon reports that it is STATE_STOPPED: remove it,
211 * and the rank it held.
212 *
213 * @returns a list of any additional GIDs that were removed from the map
214 * as a side effect (like standby replays)
215 */
9f95a23c 216 std::vector<mds_gid_t> stop(mds_gid_t who);
7c673cae
FG
217
218 /**
219 * The rank held by 'who', if any, is to be relinquished, and
220 * the state for the daemon GID is to be forgotten.
221 */
222 void erase(mds_gid_t who, epoch_t blacklist_epoch);
223
224 /**
225 * Update to indicate that the rank held by 'who' is damaged
226 */
227 void damaged(mds_gid_t who, epoch_t blacklist_epoch);
228
229 /**
230 * Update to indicate that the rank `rank` is to be removed
231 * from the damaged list of the filesystem `fscid`
232 */
233 bool undamaged(const fs_cluster_id_t fscid, const mds_rank_t rank);
234
235 /**
236 * Initialize a Filesystem and assign a fscid. Update legacy_client_fscid
237 * to point to the new filesystem if it's the only one.
238 *
239 * Caller must already have validated all arguments vs. the existing
240 * FSMap and OSDMap contents.
241 */
11fdf7f2
TL
242 Filesystem::ref create_filesystem(
243 std::string_view name, int64_t metadata_pool,
244 int64_t data_pool, uint64_t features);
7c673cae
FG
245
246 /**
247 * Remove the filesystem (it must exist). Caller should already
248 * have failed out any MDSs that were assigned to the filesystem.
249 */
9f95a23c 250 void erase_filesystem(fs_cluster_id_t fscid);
7c673cae
FG
251
252 /**
253 * Reset all the state information (not configuration information)
254 * in a particular filesystem. Caller must have verified that
255 * the filesystem already exists.
256 */
257 void reset_filesystem(fs_cluster_id_t fscid);
258
259 /**
260 * Mutator helper for Filesystem objects: expose a non-const
261 * Filesystem pointer to `fn` and update epochs appropriately.
262 */
11fdf7f2
TL
263 template<typename T>
264 void modify_filesystem(fs_cluster_id_t fscid, T&& fn)
7c673cae 265 {
11fdf7f2 266 auto& fs = filesystems.at(fscid);
7c673cae
FG
267 fn(fs);
268 fs->mds_map.epoch = epoch;
269 }
270
271 /**
272 * Apply a mutation to the mds_info_t structure for a particular
273 * daemon (identified by GID), and make appropriate updates to epochs.
274 */
11fdf7f2
TL
275 template<typename T>
276 void modify_daemon(mds_gid_t who, T&& fn)
7c673cae 277 {
11fdf7f2
TL
278 const auto& fscid = mds_roles.at(who);
279 if (fscid == FS_CLUSTER_ID_NONE) {
280 auto& info = standby_daemons.at(who);
281 fn(info);
282 ceph_assert(info.state == MDSMap::STATE_STANDBY);
7c673cae
FG
283 standby_epochs[who] = epoch;
284 } else {
11fdf7f2
TL
285 auto& fs = filesystems.at(fscid);
286 auto& info = fs->mds_map.mds_info.at(who);
287 fn(info);
7c673cae
FG
288 fs->mds_map.epoch = epoch;
289 }
290 }
291
292 /**
293 * Given that gid exists in a filesystem or as a standby, return
294 * a reference to its info.
295 */
9f95a23c 296 const mds_info_t& get_info_gid(mds_gid_t gid) const
7c673cae
FG
297 {
298 auto fscid = mds_roles.at(gid);
299 if (fscid == FS_CLUSTER_ID_NONE) {
300 return standby_daemons.at(gid);
301 } else {
302 return filesystems.at(fscid)->mds_map.mds_info.at(gid);
303 }
304 }
305
11fdf7f2
TL
306 bool is_standby_replay(mds_gid_t who) const
307 {
308 return filesystems.at(mds_roles.at(who))->is_standby_replay(who);
309 }
310
311 mds_gid_t get_standby_replay(mds_gid_t who) const
312 {
313 return filesystems.at(mds_roles.at(who))->get_standby_replay(who);
314 }
315
7c673cae
FG
316 /**
317 * A daemon has told us it's compat, and it's too new
318 * for the one we had previously. Impose the new one
319 * on all filesystems.
320 */
9f95a23c 321 void update_compat(const CompatSet &c);
7c673cae 322
11fdf7f2 323 Filesystem::const_ref get_legacy_filesystem()
7c673cae
FG
324 {
325 if (legacy_client_fscid == FS_CLUSTER_ID_NONE) {
326 return nullptr;
327 } else {
328 return filesystems.at(legacy_client_fscid);
329 }
330 }
331
332 /**
333 * A daemon has informed us of its offload targets
334 */
11fdf7f2 335 void update_export_targets(mds_gid_t who, const std::set<mds_rank_t> &targets)
7c673cae
FG
336 {
337 auto fscid = mds_roles.at(who);
11fdf7f2 338 modify_filesystem(fscid, [who, &targets](auto&& fs) {
7c673cae
FG
339 fs->mds_map.mds_info.at(who).export_targets = targets;
340 });
341 }
342
343 epoch_t get_epoch() const { return epoch; }
344 void inc_epoch() { epoch++; }
345
6d8e3169
FG
346 version_t get_struct_version() const { return struct_version; }
347 bool is_struct_old() const {
348 return struct_version < STRUCT_VERSION_TRIM_TO;
349 }
350
7c673cae
FG
351 size_t filesystem_count() const {return filesystems.size();}
352 bool filesystem_exists(fs_cluster_id_t fscid) const {return filesystems.count(fscid) > 0;}
11fdf7f2
TL
353 Filesystem::const_ref get_filesystem(fs_cluster_id_t fscid) const {return std::const_pointer_cast<const Filesystem>(filesystems.at(fscid));}
354 Filesystem::ref get_filesystem(fs_cluster_id_t fscid) {return filesystems.at(fscid);}
355 Filesystem::const_ref get_filesystem(void) const {return std::const_pointer_cast<const Filesystem>(filesystems.begin()->second);}
9f95a23c
TL
356 Filesystem::const_ref get_filesystem(std::string_view name) const;
357
358 std::vector<Filesystem::const_ref> get_filesystems(void) const;
7c673cae
FG
359
360 int parse_filesystem(
11fdf7f2
TL
361 std::string_view ns_str,
362 Filesystem::const_ref *result
7c673cae
FG
363 ) const;
364
365 int parse_role(
11fdf7f2 366 std::string_view role_str,
7c673cae
FG
367 mds_role_t *role,
368 std::ostream &ss) const;
369
370 /**
371 * Return true if this pool is in use by any of the filesystems
372 */
9f95a23c 373 bool pool_in_use(int64_t poolid) const;
7c673cae 374
9f95a23c 375 const mds_info_t* find_replacement_for(mds_role_t role) const;
7c673cae
FG
376
377 void get_health(list<pair<health_status_t,std::string> >& summary,
378 list<pair<health_status_t,std::string> > *detail) const;
379
224ce89b
WB
380 void get_health_checks(health_check_map_t *checks) const;
381
7c673cae
FG
382 bool check_health(void);
383
384 /**
385 * Assert that the FSMap, Filesystem, MDSMap, mds_info_t relations are
386 * all self-consistent.
387 */
388 void sanity() const;
389
390 void encode(bufferlist& bl, uint64_t features) const;
11fdf7f2 391 void decode(bufferlist::const_iterator& p);
7c673cae 392 void decode(bufferlist& bl) {
11fdf7f2 393 auto p = bl.cbegin();
7c673cae
FG
394 decode(p);
395 }
11fdf7f2 396 void sanitize(const std::function<bool(int64_t pool)>& pool_exists);
7c673cae
FG
397
398 void print(ostream& out) const;
399 void print_summary(Formatter *f, ostream *out) const;
400
401 void dump(Formatter *f) const;
9f95a23c
TL
402 static void generate_test_instances(std::list<FSMap*>& ls);
403
404protected:
405 epoch_t epoch = 0;
406 uint64_t next_filesystem_id = FS_CLUSTER_ID_ANONYMOUS + 1;
407 fs_cluster_id_t legacy_client_fscid = FS_CLUSTER_ID_NONE;
408 CompatSet compat;
409 bool enable_multiple = false;
410 bool ever_enabled_multiple = false; // < the cluster had multiple MDSes enabled once
411
412 std::map<fs_cluster_id_t, Filesystem::ref> filesystems;
413
414 // Remember which Filesystem an MDS daemon's info is stored in
415 // (or in standby_daemons for FS_CLUSTER_ID_NONE)
416 std::map<mds_gid_t, fs_cluster_id_t> mds_roles;
417
418 // For MDS daemons not yet assigned to a Filesystem
419 std::map<mds_gid_t, mds_info_t> standby_daemons;
420 std::map<mds_gid_t, epoch_t> standby_epochs;
6d8e3169
FG
421
422private:
423 epoch_t struct_version = 0;
7c673cae
FG
424};
425WRITE_CLASS_ENCODER_FEATURES(FSMap)
426
427inline ostream& operator<<(ostream& out, const FSMap& m) {
428 m.print_summary(NULL, &out);
429 return out;
430}
431
432#endif