]> git.proxmox.com Git - ceph.git/blame - ceph/src/mds/FSMap.h
import ceph 16.2.7
[ceph.git] / ceph / src / mds / FSMap.h
CommitLineData
7c673cae
FG
1// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2// vim: ts=8 sw=2 smarttab
3/*
4 * Ceph - scalable distributed file system
5 *
6 * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
7 *
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
12 *
13 */
14
15
16#ifndef CEPH_FSMAP_H
17#define CEPH_FSMAP_H
18
94b18763 19#include <map>
11fdf7f2 20#include <memory>
94b18763
FG
21#include <set>
22#include <string>
11fdf7f2 23#include <string_view>
94b18763 24
7c673cae
FG
25#include <errno.h>
26
27#include "include/types.h"
28#include "common/Clock.h"
7c673cae
FG
29#include "mds/MDSMap.h"
30
7c673cae
FG
31#include "include/CompatSet.h"
32#include "include/ceph_features.h"
9f95a23c 33#include "include/common_fwd.h"
7c673cae
FG
34#include "common/Formatter.h"
35#include "mds/mdstypes.h"
36
224ce89b 37class health_check_map_t;
7c673cae 38
f67539c2
TL
39struct ClusterInfo {
40 ClusterInfo() = default;
41 ClusterInfo(std::string_view client_name, std::string_view cluster_name,
42 std::string_view fs_name)
43 : client_name(client_name),
44 cluster_name(cluster_name),
45 fs_name(fs_name) {
46 }
47
48 std::string client_name;
49 std::string cluster_name;
50 std::string fs_name;
51
52 bool operator==(const ClusterInfo &cluster_info) const {
53 return client_name == cluster_info.client_name &&
54 cluster_name == cluster_info.cluster_name &&
55 fs_name == cluster_info.fs_name;
56 }
57
58 void dump(ceph::Formatter *f) const;
59 void print(std::ostream& out) const;
60
61 void encode(ceph::buffer::list &bl) const;
62 void decode(ceph::buffer::list::const_iterator &iter);
63};
64
65inline std::ostream& operator<<(std::ostream& out, const ClusterInfo &cluster_info) {
66 out << "{client_name=" << cluster_info.client_name << ", cluster_name="
67 << cluster_info.cluster_name << ", fs_name=" << cluster_info.fs_name << "}";
68 return out;
69}
70
71struct Peer {
72 Peer() = default;
73 Peer(std::string_view uuid)
74 : uuid(uuid) {
75 }
76 Peer(std::string_view uuid,
77 const ClusterInfo &remote)
78 : uuid(uuid),
79 remote(remote) {
80 }
81
82 std::string uuid;
83 ClusterInfo remote;
84
85 bool operator==(const Peer &rhs) const {
86 return uuid == rhs.uuid;
87 }
88
89 bool operator<(const Peer &rhs) const {
90 return uuid < rhs.uuid;
91 }
92
93 void dump(ceph::Formatter *f) const;
94 void print(std::ostream& out) const;
95
96 void encode(ceph::buffer::list &bl) const;
97 void decode(ceph::buffer::list::const_iterator &iter);
98};
99
100typedef std::set<Peer> Peers;
101inline std::ostream& operator<<(std::ostream& out, const Peer &peer) {
102 out << "{uuid=" << peer.uuid << ", remote_cluster=" << peer.remote << "}";
103 return out;
104}
105
106struct MirrorInfo {
107 MirrorInfo() = default;
108
109 bool is_mirrored() const {
110 return mirrored;
111 }
112 void enable_mirroring() {
113 mirrored = true;
114 }
115 void disable_mirroring() {
116 peers.clear();
117 mirrored = false;
118 }
119
120 // uuid variant check
121 bool has_peer(std::string_view uuid) const {
122 return peers.find(Peer(uuid)) != peers.end();
123 }
124 // client_name/cluster_name/fs_name variant check
125 bool has_peer(std::string_view client_name,
126 std::string_view cluster_name,
127 std::string_view fs_name) const {
128 ClusterInfo cluster_info(client_name, cluster_name, fs_name);
129 for (auto &peer : peers) {
130 if (peer.remote == cluster_info) {
131 return true;
132 }
133 }
134 return false;
135 }
136 bool has_peers() const {
137 return !peers.empty();
138 }
139
140 void peer_add(std::string_view uuid,
141 std::string_view client_name,
142 std::string_view cluster_name,
143 std::string_view fs_name) {
144 peers.emplace(Peer(uuid, ClusterInfo(client_name, cluster_name, fs_name)));
145 }
146 void peer_remove(std::string_view uuid) {
147 peers.erase(uuid);
148 }
149
150 bool mirrored = false;
151 Peers peers;
152
153 void dump(ceph::Formatter *f) const;
154 void print(std::ostream& out) const;
155
156 void encode(ceph::buffer::list &bl) const;
157 void decode(ceph::buffer::list::const_iterator &iter);
158};
159
160inline std::ostream& operator<<(std::ostream& out, const MirrorInfo &mirror_info) {
161 out << "{peers=" << mirror_info.peers << "}";
162 return out;
163}
164
165WRITE_CLASS_ENCODER(ClusterInfo)
166WRITE_CLASS_ENCODER(Peer)
167WRITE_CLASS_ENCODER(MirrorInfo)
168
7c673cae
FG
169/**
170 * The MDSMap and any additional fields describing a particular
171 * filesystem (a unique fs_cluster_id_t).
172 */
173class Filesystem
174{
1adf2230 175public:
11fdf7f2
TL
176 using ref = std::shared_ptr<Filesystem>;
177 using const_ref = std::shared_ptr<Filesystem const>;
178
179 template<typename... Args>
180 static ref create(Args&&... args)
181 {
182 return std::make_shared<Filesystem>(std::forward<Args>(args)...);
183 }
184
f67539c2
TL
185 void encode(ceph::buffer::list& bl, uint64_t features) const;
186 void decode(ceph::buffer::list::const_iterator& p);
7c673cae 187
f67539c2 188 void dump(ceph::Formatter *f) const;
7c673cae
FG
189 void print(std::ostream& out) const;
190
522d829b 191 bool is_upgradeable() const {
a4b75251
TL
192 return (mds_map.allows_standby_replay() && mds_map.get_num_in_mds() == 0)
193 || (!mds_map.allows_standby_replay() && mds_map.get_num_in_mds() <= 1);
522d829b
TL
194 }
195
7c673cae
FG
196 /**
197 * Return true if a daemon is already assigned as
198 * STANDBY_REPLAY for the gid `who`
199 */
200 bool has_standby_replay(mds_gid_t who) const
11fdf7f2
TL
201 {
202 return get_standby_replay(who) != MDS_GID_NONE;
203 }
9f95a23c 204 mds_gid_t get_standby_replay(mds_gid_t who) const;
11fdf7f2
TL
205 bool is_standby_replay(mds_gid_t who) const
206 {
207 auto p = mds_map.mds_info.find(who);
208 if (p != mds_map.mds_info.end() &&
209 p->second.state == MDSMap::STATE_STANDBY_REPLAY) {
210 return true;
211 }
7c673cae
FG
212 return false;
213 }
1adf2230
AA
214
215 fs_cluster_id_t fscid = FS_CLUSTER_ID_NONE;
216 MDSMap mds_map;
f67539c2 217 MirrorInfo mirror_info;
7c673cae
FG
218};
219WRITE_CLASS_ENCODER_FEATURES(Filesystem)
220
221class FSMap {
7c673cae 222public:
7c673cae 223 friend class MDSMonitor;
28e407b8 224 friend class PaxosFSMap;
9f95a23c 225 using mds_info_t = MDSMap::mds_info_t;
7c673cae 226
522d829b
TL
227 static const version_t STRUCT_VERSION = 7;
228 static const version_t STRUCT_VERSION_TRIM_TO = 7;
229
230 FSMap() : default_compat(MDSMap::get_compat_set_default()) {}
7c673cae
FG
231
232 FSMap(const FSMap &rhs)
233 :
234 epoch(rhs.epoch),
235 next_filesystem_id(rhs.next_filesystem_id),
236 legacy_client_fscid(rhs.legacy_client_fscid),
522d829b 237 default_compat(rhs.default_compat),
7c673cae
FG
238 enable_multiple(rhs.enable_multiple),
239 ever_enabled_multiple(rhs.ever_enabled_multiple),
240 mds_roles(rhs.mds_roles),
241 standby_daemons(rhs.standby_daemons),
522d829b
TL
242 standby_epochs(rhs.standby_epochs),
243 struct_version(rhs.struct_version)
7c673cae 244 {
b32b8144 245 filesystems.clear();
7c673cae
FG
246 for (const auto &i : rhs.filesystems) {
247 const auto &fs = i.second;
248 filesystems[fs->fscid] = std::make_shared<Filesystem>(*fs);
249 }
250 }
251
9f95a23c 252 FSMap &operator=(const FSMap &rhs);
7c673cae 253
522d829b 254 const CompatSet &get_default_compat() const {return default_compat;}
7c673cae 255
f67539c2
TL
256 void filter(const std::vector<string>& allowed)
257 {
258 if (allowed.empty()) {
259 return;
260 }
261
262 for (auto &f : filesystems) {
263 string_view fs_name = f.second->mds_map.get_fs_name();
264 if (std::find(allowed.begin(), allowed.end(), fs_name) == allowed.end()) {
265 filesystems.erase(f.first);
266 }
267 }
268
269 for (auto r : mds_roles) {
270 string_view fs_name = fs_name_from_gid(r.first);
271 if (std::find(allowed.begin(), allowed.end(), fs_name) == allowed.end()) {
272 mds_roles.erase(r.first);
273 }
274 }
275 }
276
7c673cae
FG
277 void set_enable_multiple(const bool v)
278 {
279 enable_multiple = v;
280 if (true == v) {
281 ever_enabled_multiple = true;
282 }
283 }
284
285 bool get_enable_multiple() const
286 {
287 return enable_multiple;
288 }
289
290 void set_legacy_client_fscid(fs_cluster_id_t fscid)
291 {
11fdf7f2 292 ceph_assert(fscid == FS_CLUSTER_ID_NONE || filesystems.count(fscid));
7c673cae
FG
293 legacy_client_fscid = fscid;
294 }
295
296 fs_cluster_id_t get_legacy_client_fscid() const
297 {
298 return legacy_client_fscid;
299 }
300
11fdf7f2
TL
301 size_t get_num_standby() const {
302 return standby_daemons.size();
303 }
304
9f95a23c 305 bool is_any_degraded() const;
11fdf7f2 306
7c673cae
FG
307 /**
308 * Get state of all daemons (for all filesystems, including all standbys)
309 */
9f95a23c 310 std::map<mds_gid_t, mds_info_t> get_mds_info() const;
7c673cae 311
522d829b 312 const mds_info_t* get_available_standby(const Filesystem& fs) const;
11fdf7f2 313
7c673cae
FG
314 /**
315 * Resolve daemon name to GID
316 */
9f95a23c 317 mds_gid_t find_mds_gid_by_name(std::string_view s) const;
7c673cae
FG
318
319 /**
320 * Resolve daemon name to status
321 */
9f95a23c 322 const mds_info_t* find_by_name(std::string_view name) const;
7c673cae
FG
323
324 /**
325 * Does a daemon exist with this GID?
326 */
f67539c2
TL
327 bool gid_exists(mds_gid_t gid,
328 const std::vector<string>& in = {}) const
7c673cae 329 {
f67539c2
TL
330 try {
331 string_view m = fs_name_from_gid(gid);
332 return in.empty() || std::find(in.begin(), in.end(), m) != in.end();
333 } catch (const std::out_of_range&) {
334 return false;
335 }
7c673cae
FG
336 }
337
338 /**
339 * Does a daemon with this GID exist, *and* have an MDS rank assigned?
340 */
341 bool gid_has_rank(mds_gid_t gid) const
342 {
343 return gid_exists(gid) && mds_roles.at(gid) != FS_CLUSTER_ID_NONE;
344 }
345
f67539c2
TL
346 /**
347 * Which filesystem owns this GID?
348 */
349 fs_cluster_id_t fscid_from_gid(mds_gid_t gid) const {
350 if (!gid_exists(gid)) {
351 return FS_CLUSTER_ID_NONE;
352 }
9f95a23c
TL
353 return mds_roles.at(gid);
354 }
355
7c673cae
FG
356 /**
357 * Insert a new MDS daemon, as a standby
358 */
f67539c2 359 void insert(const MDSMap::mds_info_t &new_info);
7c673cae
FG
360
361 /**
362 * Assign an MDS cluster standby replay rank to a standby daemon
363 */
364 void assign_standby_replay(
365 const mds_gid_t standby_gid,
366 const fs_cluster_id_t leader_ns,
367 const mds_rank_t leader_rank);
368
369 /**
370 * Assign an MDS cluster rank to a standby daemon
371 */
372 void promote(
373 mds_gid_t standby_gid,
11fdf7f2 374 Filesystem& filesystem,
7c673cae
FG
375 mds_rank_t assigned_rank);
376
377 /**
378 * A daemon reports that it is STATE_STOPPED: remove it,
379 * and the rank it held.
380 *
381 * @returns a list of any additional GIDs that were removed from the map
382 * as a side effect (like standby replays)
383 */
9f95a23c 384 std::vector<mds_gid_t> stop(mds_gid_t who);
7c673cae
FG
385
386 /**
387 * The rank held by 'who', if any, is to be relinquished, and
388 * the state for the daemon GID is to be forgotten.
389 */
f67539c2 390 void erase(mds_gid_t who, epoch_t blocklist_epoch);
7c673cae
FG
391
392 /**
393 * Update to indicate that the rank held by 'who' is damaged
394 */
f67539c2 395 void damaged(mds_gid_t who, epoch_t blocklist_epoch);
7c673cae
FG
396
397 /**
398 * Update to indicate that the rank `rank` is to be removed
399 * from the damaged list of the filesystem `fscid`
400 */
401 bool undamaged(const fs_cluster_id_t fscid, const mds_rank_t rank);
402
403 /**
404 * Initialize a Filesystem and assign a fscid. Update legacy_client_fscid
405 * to point to the new filesystem if it's the only one.
406 *
407 * Caller must already have validated all arguments vs. the existing
408 * FSMap and OSDMap contents.
409 */
11fdf7f2
TL
410 Filesystem::ref create_filesystem(
411 std::string_view name, int64_t metadata_pool,
522d829b
TL
412 int64_t data_pool, uint64_t features,
413 fs_cluster_id_t fscid);
7c673cae
FG
414
415 /**
416 * Remove the filesystem (it must exist). Caller should already
417 * have failed out any MDSs that were assigned to the filesystem.
418 */
9f95a23c 419 void erase_filesystem(fs_cluster_id_t fscid);
7c673cae
FG
420
421 /**
422 * Reset all the state information (not configuration information)
423 * in a particular filesystem. Caller must have verified that
424 * the filesystem already exists.
425 */
426 void reset_filesystem(fs_cluster_id_t fscid);
427
428 /**
429 * Mutator helper for Filesystem objects: expose a non-const
430 * Filesystem pointer to `fn` and update epochs appropriately.
431 */
11fdf7f2
TL
432 template<typename T>
433 void modify_filesystem(fs_cluster_id_t fscid, T&& fn)
7c673cae 434 {
11fdf7f2 435 auto& fs = filesystems.at(fscid);
7c673cae
FG
436 fn(fs);
437 fs->mds_map.epoch = epoch;
438 }
439
440 /**
441 * Apply a mutation to the mds_info_t structure for a particular
442 * daemon (identified by GID), and make appropriate updates to epochs.
443 */
11fdf7f2
TL
444 template<typename T>
445 void modify_daemon(mds_gid_t who, T&& fn)
7c673cae 446 {
11fdf7f2
TL
447 const auto& fscid = mds_roles.at(who);
448 if (fscid == FS_CLUSTER_ID_NONE) {
449 auto& info = standby_daemons.at(who);
450 fn(info);
451 ceph_assert(info.state == MDSMap::STATE_STANDBY);
7c673cae
FG
452 standby_epochs[who] = epoch;
453 } else {
11fdf7f2
TL
454 auto& fs = filesystems.at(fscid);
455 auto& info = fs->mds_map.mds_info.at(who);
456 fn(info);
7c673cae
FG
457 fs->mds_map.epoch = epoch;
458 }
459 }
460
461 /**
462 * Given that gid exists in a filesystem or as a standby, return
463 * a reference to its info.
464 */
9f95a23c 465 const mds_info_t& get_info_gid(mds_gid_t gid) const
7c673cae
FG
466 {
467 auto fscid = mds_roles.at(gid);
468 if (fscid == FS_CLUSTER_ID_NONE) {
469 return standby_daemons.at(gid);
470 } else {
471 return filesystems.at(fscid)->mds_map.mds_info.at(gid);
472 }
473 }
474
f67539c2
TL
475 std::string_view fs_name_from_gid(mds_gid_t gid) const
476 {
477 auto fscid = mds_roles.at(gid);
478 if (fscid == FS_CLUSTER_ID_NONE or !filesystem_exists(fscid)) {
479 return std::string_view();
480 } else {
481 return get_filesystem(fscid)->mds_map.get_fs_name();
482 }
483 }
484
11fdf7f2
TL
485 bool is_standby_replay(mds_gid_t who) const
486 {
487 return filesystems.at(mds_roles.at(who))->is_standby_replay(who);
488 }
489
490 mds_gid_t get_standby_replay(mds_gid_t who) const
491 {
492 return filesystems.at(mds_roles.at(who))->get_standby_replay(who);
493 }
494
11fdf7f2 495 Filesystem::const_ref get_legacy_filesystem()
7c673cae
FG
496 {
497 if (legacy_client_fscid == FS_CLUSTER_ID_NONE) {
498 return nullptr;
499 } else {
500 return filesystems.at(legacy_client_fscid);
501 }
502 }
503
504 /**
505 * A daemon has informed us of its offload targets
506 */
11fdf7f2 507 void update_export_targets(mds_gid_t who, const std::set<mds_rank_t> &targets)
7c673cae
FG
508 {
509 auto fscid = mds_roles.at(who);
11fdf7f2 510 modify_filesystem(fscid, [who, &targets](auto&& fs) {
7c673cae
FG
511 fs->mds_map.mds_info.at(who).export_targets = targets;
512 });
513 }
514
515 epoch_t get_epoch() const { return epoch; }
516 void inc_epoch() { epoch++; }
517
522d829b
TL
518 version_t get_struct_version() const { return struct_version; }
519 bool is_struct_old() const {
520 return struct_version < STRUCT_VERSION_TRIM_TO;
521 }
522
7c673cae
FG
523 size_t filesystem_count() const {return filesystems.size();}
524 bool filesystem_exists(fs_cluster_id_t fscid) const {return filesystems.count(fscid) > 0;}
11fdf7f2
TL
525 Filesystem::const_ref get_filesystem(fs_cluster_id_t fscid) const {return std::const_pointer_cast<const Filesystem>(filesystems.at(fscid));}
526 Filesystem::ref get_filesystem(fs_cluster_id_t fscid) {return filesystems.at(fscid);}
a4b75251
TL
527 Filesystem::ref get_filesystem(mds_gid_t gid) {
528 return filesystems.at(mds_roles.at(gid));
529 }
11fdf7f2 530 Filesystem::const_ref get_filesystem(void) const {return std::const_pointer_cast<const Filesystem>(filesystems.begin()->second);}
9f95a23c 531 Filesystem::const_ref get_filesystem(std::string_view name) const;
a4b75251
TL
532 Filesystem::const_ref get_filesystem(mds_gid_t gid) const {
533 return filesystems.at(mds_roles.at(gid));
534 }
9f95a23c
TL
535
536 std::vector<Filesystem::const_ref> get_filesystems(void) const;
7c673cae
FG
537
538 int parse_filesystem(
11fdf7f2
TL
539 std::string_view ns_str,
540 Filesystem::const_ref *result
7c673cae
FG
541 ) const;
542
f67539c2
TL
543 int parse_role(
544 std::string_view role_str,
545 mds_role_t *role,
546 std::ostream &ss,
547 const std::vector<string> &filter) const;
548
7c673cae 549 int parse_role(
11fdf7f2 550 std::string_view role_str,
7c673cae
FG
551 mds_role_t *role,
552 std::ostream &ss) const;
553
554 /**
555 * Return true if this pool is in use by any of the filesystems
556 */
9f95a23c 557 bool pool_in_use(int64_t poolid) const;
7c673cae 558
9f95a23c 559 const mds_info_t* find_replacement_for(mds_role_t role) const;
7c673cae 560
f67539c2
TL
561 void get_health(std::list<std::pair<health_status_t,std::string> >& summary,
562 std::list<std::pair<health_status_t,std::string> > *detail) const;
7c673cae 563
224ce89b
WB
564 void get_health_checks(health_check_map_t *checks) const;
565
7c673cae
FG
566 bool check_health(void);
567
568 /**
569 * Assert that the FSMap, Filesystem, MDSMap, mds_info_t relations are
570 * all self-consistent.
571 */
a4b75251 572 void sanity(bool pending=false) const;
7c673cae 573
f67539c2
TL
574 void encode(ceph::buffer::list& bl, uint64_t features) const;
575 void decode(ceph::buffer::list::const_iterator& p);
576 void decode(ceph::buffer::list& bl) {
11fdf7f2 577 auto p = bl.cbegin();
7c673cae
FG
578 decode(p);
579 }
11fdf7f2 580 void sanitize(const std::function<bool(int64_t pool)>& pool_exists);
7c673cae 581
f67539c2
TL
582 void print(std::ostream& out) const;
583 void print_summary(ceph::Formatter *f, std::ostream *out) const;
584 void print_daemon_summary(std::ostream& out) const;
585 void print_fs_summary(std::ostream& out) const;
7c673cae 586
f67539c2 587 void dump(ceph::Formatter *f) const;
9f95a23c
TL
588 static void generate_test_instances(std::list<FSMap*>& ls);
589
590protected:
591 epoch_t epoch = 0;
592 uint64_t next_filesystem_id = FS_CLUSTER_ID_ANONYMOUS + 1;
593 fs_cluster_id_t legacy_client_fscid = FS_CLUSTER_ID_NONE;
522d829b 594 CompatSet default_compat;
f67539c2
TL
595 bool enable_multiple = true;
596 bool ever_enabled_multiple = true; // < the cluster had multiple FS enabled once
9f95a23c
TL
597
598 std::map<fs_cluster_id_t, Filesystem::ref> filesystems;
599
600 // Remember which Filesystem an MDS daemon's info is stored in
601 // (or in standby_daemons for FS_CLUSTER_ID_NONE)
602 std::map<mds_gid_t, fs_cluster_id_t> mds_roles;
603
604 // For MDS daemons not yet assigned to a Filesystem
605 std::map<mds_gid_t, mds_info_t> standby_daemons;
606 std::map<mds_gid_t, epoch_t> standby_epochs;
522d829b
TL
607
608private:
609 epoch_t struct_version = 0;
7c673cae
FG
610};
611WRITE_CLASS_ENCODER_FEATURES(FSMap)
612
f67539c2 613inline std::ostream& operator<<(std::ostream& out, const FSMap& m) {
7c673cae
FG
614 m.print_summary(NULL, &out);
615 return out;
616}
617
618#endif