]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
2 | // vim: ts=8 sw=2 smarttab | |
3 | /* | |
4 | * Ceph - scalable distributed file system | |
5 | * | |
6 | * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net> | |
7 | * | |
8 | * This is free software; you can redistribute it and/or | |
9 | * modify it under the terms of the GNU Lesser General Public | |
10 | * License version 2.1, as published by the Free Software | |
11 | * Foundation. See file COPYING. | |
12 | * | |
13 | */ | |
14 | ||
15 | ||
16 | #ifndef CEPH_FSMAP_H | |
17 | #define CEPH_FSMAP_H | |
18 | ||
94b18763 | 19 | #include <map> |
11fdf7f2 | 20 | #include <memory> |
94b18763 FG |
21 | #include <set> |
22 | #include <string> | |
11fdf7f2 | 23 | #include <string_view> |
94b18763 | 24 | |
7c673cae FG |
25 | #include <errno.h> |
26 | ||
27 | #include "include/types.h" | |
28 | #include "common/Clock.h" | |
7c673cae FG |
29 | #include "mds/MDSMap.h" |
30 | ||
7c673cae FG |
31 | #include "include/CompatSet.h" |
32 | #include "include/ceph_features.h" | |
9f95a23c | 33 | #include "include/common_fwd.h" |
7c673cae FG |
34 | #include "common/Formatter.h" |
35 | #include "mds/mdstypes.h" | |
36 | ||
224ce89b | 37 | class health_check_map_t; |
7c673cae | 38 | |
7c673cae FG |
39 | /** |
40 | * The MDSMap and any additional fields describing a particular | |
41 | * filesystem (a unique fs_cluster_id_t). | |
42 | */ | |
43 | class Filesystem | |
44 | { | |
1adf2230 | 45 | public: |
11fdf7f2 TL |
46 | using ref = std::shared_ptr<Filesystem>; |
47 | using const_ref = std::shared_ptr<Filesystem const>; | |
48 | ||
49 | template<typename... Args> | |
50 | static ref create(Args&&... args) | |
51 | { | |
52 | return std::make_shared<Filesystem>(std::forward<Args>(args)...); | |
53 | } | |
54 | ||
7c673cae | 55 | void encode(bufferlist& bl, uint64_t features) const; |
11fdf7f2 | 56 | void decode(bufferlist::const_iterator& p); |
7c673cae | 57 | |
7c673cae FG |
58 | void dump(Formatter *f) const; |
59 | void print(std::ostream& out) const; | |
60 | ||
61 | /** | |
62 | * Return true if a daemon is already assigned as | |
63 | * STANDBY_REPLAY for the gid `who` | |
64 | */ | |
65 | bool has_standby_replay(mds_gid_t who) const | |
11fdf7f2 TL |
66 | { |
67 | return get_standby_replay(who) != MDS_GID_NONE; | |
68 | } | |
9f95a23c | 69 | mds_gid_t get_standby_replay(mds_gid_t who) const; |
11fdf7f2 TL |
70 | bool is_standby_replay(mds_gid_t who) const |
71 | { | |
72 | auto p = mds_map.mds_info.find(who); | |
73 | if (p != mds_map.mds_info.end() && | |
74 | p->second.state == MDSMap::STATE_STANDBY_REPLAY) { | |
75 | return true; | |
76 | } | |
7c673cae FG |
77 | return false; |
78 | } | |
1adf2230 AA |
79 | |
80 | fs_cluster_id_t fscid = FS_CLUSTER_ID_NONE; | |
81 | MDSMap mds_map; | |
7c673cae FG |
82 | }; |
83 | WRITE_CLASS_ENCODER_FEATURES(Filesystem) | |
84 | ||
85 | class FSMap { | |
7c673cae | 86 | public: |
7c673cae | 87 | friend class MDSMonitor; |
28e407b8 | 88 | friend class PaxosFSMap; |
9f95a23c | 89 | using mds_info_t = MDSMap::mds_info_t; |
7c673cae | 90 | |
6d8e3169 FG |
91 | static const version_t STRUCT_VERSION = 7; |
92 | static const version_t STRUCT_VERSION_TRIM_TO = 7; | |
93 | ||
1adf2230 | 94 | FSMap() : compat(MDSMap::get_compat_set_default()) {} |
7c673cae FG |
95 | |
96 | FSMap(const FSMap &rhs) | |
97 | : | |
98 | epoch(rhs.epoch), | |
99 | next_filesystem_id(rhs.next_filesystem_id), | |
100 | legacy_client_fscid(rhs.legacy_client_fscid), | |
101 | compat(rhs.compat), | |
102 | enable_multiple(rhs.enable_multiple), | |
103 | ever_enabled_multiple(rhs.ever_enabled_multiple), | |
104 | mds_roles(rhs.mds_roles), | |
105 | standby_daemons(rhs.standby_daemons), | |
6d8e3169 FG |
106 | standby_epochs(rhs.standby_epochs), |
107 | struct_version(rhs.struct_version) | |
7c673cae | 108 | { |
b32b8144 | 109 | filesystems.clear(); |
7c673cae FG |
110 | for (const auto &i : rhs.filesystems) { |
111 | const auto &fs = i.second; | |
112 | filesystems[fs->fscid] = std::make_shared<Filesystem>(*fs); | |
113 | } | |
114 | } | |
115 | ||
9f95a23c | 116 | FSMap &operator=(const FSMap &rhs); |
7c673cae FG |
117 | |
118 | const CompatSet &get_compat() const {return compat;} | |
119 | ||
120 | void set_enable_multiple(const bool v) | |
121 | { | |
122 | enable_multiple = v; | |
123 | if (true == v) { | |
124 | ever_enabled_multiple = true; | |
125 | } | |
126 | } | |
127 | ||
128 | bool get_enable_multiple() const | |
129 | { | |
130 | return enable_multiple; | |
131 | } | |
132 | ||
133 | void set_legacy_client_fscid(fs_cluster_id_t fscid) | |
134 | { | |
11fdf7f2 | 135 | ceph_assert(fscid == FS_CLUSTER_ID_NONE || filesystems.count(fscid)); |
7c673cae FG |
136 | legacy_client_fscid = fscid; |
137 | } | |
138 | ||
139 | fs_cluster_id_t get_legacy_client_fscid() const | |
140 | { | |
141 | return legacy_client_fscid; | |
142 | } | |
143 | ||
11fdf7f2 TL |
144 | size_t get_num_standby() const { |
145 | return standby_daemons.size(); | |
146 | } | |
147 | ||
9f95a23c | 148 | bool is_any_degraded() const; |
11fdf7f2 | 149 | |
7c673cae FG |
150 | /** |
151 | * Get state of all daemons (for all filesystems, including all standbys) | |
152 | */ | |
9f95a23c | 153 | std::map<mds_gid_t, mds_info_t> get_mds_info() const; |
7c673cae | 154 | |
9f95a23c | 155 | const mds_info_t* get_available_standby(fs_cluster_id_t fscid) const; |
11fdf7f2 | 156 | |
7c673cae FG |
157 | /** |
158 | * Resolve daemon name to GID | |
159 | */ | |
9f95a23c | 160 | mds_gid_t find_mds_gid_by_name(std::string_view s) const; |
7c673cae FG |
161 | |
162 | /** | |
163 | * Resolve daemon name to status | |
164 | */ | |
9f95a23c | 165 | const mds_info_t* find_by_name(std::string_view name) const; |
7c673cae FG |
166 | |
167 | /** | |
168 | * Does a daemon exist with this GID? | |
169 | */ | |
170 | bool gid_exists(mds_gid_t gid) const | |
171 | { | |
172 | return mds_roles.count(gid) > 0; | |
173 | } | |
174 | ||
175 | /** | |
176 | * Does a daemon with this GID exist, *and* have an MDS rank assigned? | |
177 | */ | |
178 | bool gid_has_rank(mds_gid_t gid) const | |
179 | { | |
180 | return gid_exists(gid) && mds_roles.at(gid) != FS_CLUSTER_ID_NONE; | |
181 | } | |
182 | ||
9f95a23c TL |
183 | fs_cluster_id_t gid_fscid(mds_gid_t gid) const |
184 | { | |
185 | return mds_roles.at(gid); | |
186 | } | |
187 | ||
7c673cae FG |
188 | /** |
189 | * Insert a new MDS daemon, as a standby | |
190 | */ | |
9f95a23c | 191 | void insert(const mds_info_t& new_info); |
7c673cae FG |
192 | |
193 | /** | |
194 | * Assign an MDS cluster standby replay rank to a standby daemon | |
195 | */ | |
196 | void assign_standby_replay( | |
197 | const mds_gid_t standby_gid, | |
198 | const fs_cluster_id_t leader_ns, | |
199 | const mds_rank_t leader_rank); | |
200 | ||
201 | /** | |
202 | * Assign an MDS cluster rank to a standby daemon | |
203 | */ | |
204 | void promote( | |
205 | mds_gid_t standby_gid, | |
11fdf7f2 | 206 | Filesystem& filesystem, |
7c673cae FG |
207 | mds_rank_t assigned_rank); |
208 | ||
209 | /** | |
210 | * A daemon reports that it is STATE_STOPPED: remove it, | |
211 | * and the rank it held. | |
212 | * | |
213 | * @returns a list of any additional GIDs that were removed from the map | |
214 | * as a side effect (like standby replays) | |
215 | */ | |
9f95a23c | 216 | std::vector<mds_gid_t> stop(mds_gid_t who); |
7c673cae FG |
217 | |
218 | /** | |
219 | * The rank held by 'who', if any, is to be relinquished, and | |
220 | * the state for the daemon GID is to be forgotten. | |
221 | */ | |
222 | void erase(mds_gid_t who, epoch_t blacklist_epoch); | |
223 | ||
224 | /** | |
225 | * Update to indicate that the rank held by 'who' is damaged | |
226 | */ | |
227 | void damaged(mds_gid_t who, epoch_t blacklist_epoch); | |
228 | ||
229 | /** | |
230 | * Update to indicate that the rank `rank` is to be removed | |
231 | * from the damaged list of the filesystem `fscid` | |
232 | */ | |
233 | bool undamaged(const fs_cluster_id_t fscid, const mds_rank_t rank); | |
234 | ||
235 | /** | |
236 | * Initialize a Filesystem and assign a fscid. Update legacy_client_fscid | |
237 | * to point to the new filesystem if it's the only one. | |
238 | * | |
239 | * Caller must already have validated all arguments vs. the existing | |
240 | * FSMap and OSDMap contents. | |
241 | */ | |
11fdf7f2 TL |
242 | Filesystem::ref create_filesystem( |
243 | std::string_view name, int64_t metadata_pool, | |
244 | int64_t data_pool, uint64_t features); | |
7c673cae FG |
245 | |
246 | /** | |
247 | * Remove the filesystem (it must exist). Caller should already | |
248 | * have failed out any MDSs that were assigned to the filesystem. | |
249 | */ | |
9f95a23c | 250 | void erase_filesystem(fs_cluster_id_t fscid); |
7c673cae FG |
251 | |
252 | /** | |
253 | * Reset all the state information (not configuration information) | |
254 | * in a particular filesystem. Caller must have verified that | |
255 | * the filesystem already exists. | |
256 | */ | |
257 | void reset_filesystem(fs_cluster_id_t fscid); | |
258 | ||
259 | /** | |
260 | * Mutator helper for Filesystem objects: expose a non-const | |
261 | * Filesystem pointer to `fn` and update epochs appropriately. | |
262 | */ | |
11fdf7f2 TL |
263 | template<typename T> |
264 | void modify_filesystem(fs_cluster_id_t fscid, T&& fn) | |
7c673cae | 265 | { |
11fdf7f2 | 266 | auto& fs = filesystems.at(fscid); |
7c673cae FG |
267 | fn(fs); |
268 | fs->mds_map.epoch = epoch; | |
269 | } | |
270 | ||
271 | /** | |
272 | * Apply a mutation to the mds_info_t structure for a particular | |
273 | * daemon (identified by GID), and make appropriate updates to epochs. | |
274 | */ | |
11fdf7f2 TL |
275 | template<typename T> |
276 | void modify_daemon(mds_gid_t who, T&& fn) | |
7c673cae | 277 | { |
11fdf7f2 TL |
278 | const auto& fscid = mds_roles.at(who); |
279 | if (fscid == FS_CLUSTER_ID_NONE) { | |
280 | auto& info = standby_daemons.at(who); | |
281 | fn(info); | |
282 | ceph_assert(info.state == MDSMap::STATE_STANDBY); | |
7c673cae FG |
283 | standby_epochs[who] = epoch; |
284 | } else { | |
11fdf7f2 TL |
285 | auto& fs = filesystems.at(fscid); |
286 | auto& info = fs->mds_map.mds_info.at(who); | |
287 | fn(info); | |
7c673cae FG |
288 | fs->mds_map.epoch = epoch; |
289 | } | |
290 | } | |
291 | ||
292 | /** | |
293 | * Given that gid exists in a filesystem or as a standby, return | |
294 | * a reference to its info. | |
295 | */ | |
9f95a23c | 296 | const mds_info_t& get_info_gid(mds_gid_t gid) const |
7c673cae FG |
297 | { |
298 | auto fscid = mds_roles.at(gid); | |
299 | if (fscid == FS_CLUSTER_ID_NONE) { | |
300 | return standby_daemons.at(gid); | |
301 | } else { | |
302 | return filesystems.at(fscid)->mds_map.mds_info.at(gid); | |
303 | } | |
304 | } | |
305 | ||
11fdf7f2 TL |
306 | bool is_standby_replay(mds_gid_t who) const |
307 | { | |
308 | return filesystems.at(mds_roles.at(who))->is_standby_replay(who); | |
309 | } | |
310 | ||
311 | mds_gid_t get_standby_replay(mds_gid_t who) const | |
312 | { | |
313 | return filesystems.at(mds_roles.at(who))->get_standby_replay(who); | |
314 | } | |
315 | ||
7c673cae FG |
316 | /** |
317 | * A daemon has told us it's compat, and it's too new | |
318 | * for the one we had previously. Impose the new one | |
319 | * on all filesystems. | |
320 | */ | |
9f95a23c | 321 | void update_compat(const CompatSet &c); |
7c673cae | 322 | |
11fdf7f2 | 323 | Filesystem::const_ref get_legacy_filesystem() |
7c673cae FG |
324 | { |
325 | if (legacy_client_fscid == FS_CLUSTER_ID_NONE) { | |
326 | return nullptr; | |
327 | } else { | |
328 | return filesystems.at(legacy_client_fscid); | |
329 | } | |
330 | } | |
331 | ||
332 | /** | |
333 | * A daemon has informed us of its offload targets | |
334 | */ | |
11fdf7f2 | 335 | void update_export_targets(mds_gid_t who, const std::set<mds_rank_t> &targets) |
7c673cae FG |
336 | { |
337 | auto fscid = mds_roles.at(who); | |
11fdf7f2 | 338 | modify_filesystem(fscid, [who, &targets](auto&& fs) { |
7c673cae FG |
339 | fs->mds_map.mds_info.at(who).export_targets = targets; |
340 | }); | |
341 | } | |
342 | ||
343 | epoch_t get_epoch() const { return epoch; } | |
344 | void inc_epoch() { epoch++; } | |
345 | ||
6d8e3169 FG |
346 | version_t get_struct_version() const { return struct_version; } |
347 | bool is_struct_old() const { | |
348 | return struct_version < STRUCT_VERSION_TRIM_TO; | |
349 | } | |
350 | ||
7c673cae FG |
351 | size_t filesystem_count() const {return filesystems.size();} |
352 | bool filesystem_exists(fs_cluster_id_t fscid) const {return filesystems.count(fscid) > 0;} | |
11fdf7f2 TL |
353 | Filesystem::const_ref get_filesystem(fs_cluster_id_t fscid) const {return std::const_pointer_cast<const Filesystem>(filesystems.at(fscid));} |
354 | Filesystem::ref get_filesystem(fs_cluster_id_t fscid) {return filesystems.at(fscid);} | |
355 | Filesystem::const_ref get_filesystem(void) const {return std::const_pointer_cast<const Filesystem>(filesystems.begin()->second);} | |
9f95a23c TL |
356 | Filesystem::const_ref get_filesystem(std::string_view name) const; |
357 | ||
358 | std::vector<Filesystem::const_ref> get_filesystems(void) const; | |
7c673cae FG |
359 | |
360 | int parse_filesystem( | |
11fdf7f2 TL |
361 | std::string_view ns_str, |
362 | Filesystem::const_ref *result | |
7c673cae FG |
363 | ) const; |
364 | ||
365 | int parse_role( | |
11fdf7f2 | 366 | std::string_view role_str, |
7c673cae FG |
367 | mds_role_t *role, |
368 | std::ostream &ss) const; | |
369 | ||
370 | /** | |
371 | * Return true if this pool is in use by any of the filesystems | |
372 | */ | |
9f95a23c | 373 | bool pool_in_use(int64_t poolid) const; |
7c673cae | 374 | |
9f95a23c | 375 | const mds_info_t* find_replacement_for(mds_role_t role) const; |
7c673cae FG |
376 | |
377 | void get_health(list<pair<health_status_t,std::string> >& summary, | |
378 | list<pair<health_status_t,std::string> > *detail) const; | |
379 | ||
224ce89b WB |
380 | void get_health_checks(health_check_map_t *checks) const; |
381 | ||
7c673cae FG |
382 | bool check_health(void); |
383 | ||
384 | /** | |
385 | * Assert that the FSMap, Filesystem, MDSMap, mds_info_t relations are | |
386 | * all self-consistent. | |
387 | */ | |
388 | void sanity() const; | |
389 | ||
390 | void encode(bufferlist& bl, uint64_t features) const; | |
11fdf7f2 | 391 | void decode(bufferlist::const_iterator& p); |
7c673cae | 392 | void decode(bufferlist& bl) { |
11fdf7f2 | 393 | auto p = bl.cbegin(); |
7c673cae FG |
394 | decode(p); |
395 | } | |
11fdf7f2 | 396 | void sanitize(const std::function<bool(int64_t pool)>& pool_exists); |
7c673cae FG |
397 | |
398 | void print(ostream& out) const; | |
399 | void print_summary(Formatter *f, ostream *out) const; | |
400 | ||
401 | void dump(Formatter *f) const; | |
9f95a23c TL |
402 | static void generate_test_instances(std::list<FSMap*>& ls); |
403 | ||
404 | protected: | |
405 | epoch_t epoch = 0; | |
406 | uint64_t next_filesystem_id = FS_CLUSTER_ID_ANONYMOUS + 1; | |
407 | fs_cluster_id_t legacy_client_fscid = FS_CLUSTER_ID_NONE; | |
408 | CompatSet compat; | |
409 | bool enable_multiple = false; | |
410 | bool ever_enabled_multiple = false; // < the cluster had multiple MDSes enabled once | |
411 | ||
412 | std::map<fs_cluster_id_t, Filesystem::ref> filesystems; | |
413 | ||
414 | // Remember which Filesystem an MDS daemon's info is stored in | |
415 | // (or in standby_daemons for FS_CLUSTER_ID_NONE) | |
416 | std::map<mds_gid_t, fs_cluster_id_t> mds_roles; | |
417 | ||
418 | // For MDS daemons not yet assigned to a Filesystem | |
419 | std::map<mds_gid_t, mds_info_t> standby_daemons; | |
420 | std::map<mds_gid_t, epoch_t> standby_epochs; | |
6d8e3169 FG |
421 | |
422 | private: | |
423 | epoch_t struct_version = 0; | |
7c673cae FG |
424 | }; |
425 | WRITE_CLASS_ENCODER_FEATURES(FSMap) | |
426 | ||
427 | inline ostream& operator<<(ostream& out, const FSMap& m) { | |
428 | m.print_summary(NULL, &out); | |
429 | return out; | |
430 | } | |
431 | ||
432 | #endif |