]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
2 | // vim: ts=8 sw=2 smarttab | |
3 | /* | |
4 | * Ceph - scalable distributed file system | |
5 | * | |
6 | * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net> | |
7 | * | |
8 | * This is free software; you can redistribute it and/or | |
9 | * modify it under the terms of the GNU Lesser General Public | |
10 | * License version 2.1, as published by the Free Software | |
11 | * Foundation. See file COPYING. | |
12 | * | |
13 | */ | |
14 | ||
15 | ||
16 | #ifndef CEPH_FSMAP_H | |
17 | #define CEPH_FSMAP_H | |
18 | ||
94b18763 FG |
19 | #include <map> |
20 | #include <set> | |
21 | #include <string> | |
22 | #include <boost/utility/string_view.hpp> | |
23 | ||
7c673cae FG |
24 | #include <errno.h> |
25 | ||
26 | #include "include/types.h" | |
27 | #include "common/Clock.h" | |
28 | #include "msg/Message.h" | |
29 | #include "mds/MDSMap.h" | |
30 | ||
7c673cae FG |
31 | #include "common/config.h" |
32 | ||
33 | #include "include/CompatSet.h" | |
34 | #include "include/ceph_features.h" | |
35 | #include "common/Formatter.h" | |
36 | #include "mds/mdstypes.h" | |
37 | ||
38 | class CephContext; | |
224ce89b | 39 | class health_check_map_t; |
7c673cae | 40 | |
7c673cae FG |
41 | #define MDS_FS_NAME_DEFAULT "cephfs" |
42 | ||
43 | /** | |
44 | * The MDSMap and any additional fields describing a particular | |
45 | * filesystem (a unique fs_cluster_id_t). | |
46 | */ | |
47 | class Filesystem | |
48 | { | |
1adf2230 | 49 | public: |
7c673cae FG |
50 | void encode(bufferlist& bl, uint64_t features) const; |
51 | void decode(bufferlist::iterator& p); | |
52 | ||
7c673cae FG |
53 | void dump(Formatter *f) const; |
54 | void print(std::ostream& out) const; | |
55 | ||
56 | /** | |
57 | * Return true if a daemon is already assigned as | |
58 | * STANDBY_REPLAY for the gid `who` | |
59 | */ | |
60 | bool has_standby_replay(mds_gid_t who) const | |
61 | { | |
62 | for (const auto &i : mds_map.mds_info) { | |
63 | const auto &info = i.second; | |
64 | if (info.state == MDSMap::STATE_STANDBY_REPLAY | |
65 | && info.rank == mds_map.mds_info.at(who).rank) { | |
66 | return true; | |
67 | } | |
68 | } | |
69 | ||
70 | return false; | |
71 | } | |
1adf2230 AA |
72 | |
73 | fs_cluster_id_t fscid = FS_CLUSTER_ID_NONE; | |
74 | MDSMap mds_map; | |
7c673cae FG |
75 | }; |
76 | WRITE_CLASS_ENCODER_FEATURES(Filesystem) | |
77 | ||
78 | class FSMap { | |
79 | protected: | |
1adf2230 AA |
80 | epoch_t epoch = 0; |
81 | uint64_t next_filesystem_id = FS_CLUSTER_ID_ANONYMOUS + 1; | |
82 | fs_cluster_id_t legacy_client_fscid = FS_CLUSTER_ID_NONE; | |
7c673cae | 83 | CompatSet compat; |
1adf2230 AA |
84 | bool enable_multiple = false; |
85 | bool ever_enabled_multiple = false; // < the cluster had multiple MDSes enabled once | |
7c673cae FG |
86 | |
87 | std::map<fs_cluster_id_t, std::shared_ptr<Filesystem> > filesystems; | |
88 | ||
89 | // Remember which Filesystem an MDS daemon's info is stored in | |
90 | // (or in standby_daemons for FS_CLUSTER_ID_NONE) | |
91 | std::map<mds_gid_t, fs_cluster_id_t> mds_roles; | |
92 | ||
93 | // For MDS daemons not yet assigned to a Filesystem | |
94 | std::map<mds_gid_t, MDSMap::mds_info_t> standby_daemons; | |
95 | std::map<mds_gid_t, epoch_t> standby_epochs; | |
96 | ||
97 | public: | |
98 | ||
99 | friend class MDSMonitor; | |
28e407b8 | 100 | friend class PaxosFSMap; |
7c673cae | 101 | |
1adf2230 | 102 | FSMap() : compat(MDSMap::get_compat_set_default()) {} |
7c673cae FG |
103 | |
104 | FSMap(const FSMap &rhs) | |
105 | : | |
106 | epoch(rhs.epoch), | |
107 | next_filesystem_id(rhs.next_filesystem_id), | |
108 | legacy_client_fscid(rhs.legacy_client_fscid), | |
109 | compat(rhs.compat), | |
110 | enable_multiple(rhs.enable_multiple), | |
111 | ever_enabled_multiple(rhs.ever_enabled_multiple), | |
112 | mds_roles(rhs.mds_roles), | |
113 | standby_daemons(rhs.standby_daemons), | |
114 | standby_epochs(rhs.standby_epochs) | |
115 | { | |
b32b8144 | 116 | filesystems.clear(); |
7c673cae FG |
117 | for (const auto &i : rhs.filesystems) { |
118 | const auto &fs = i.second; | |
119 | filesystems[fs->fscid] = std::make_shared<Filesystem>(*fs); | |
120 | } | |
121 | } | |
122 | ||
123 | FSMap &operator=(const FSMap &rhs) | |
124 | { | |
125 | epoch = rhs.epoch; | |
126 | next_filesystem_id = rhs.next_filesystem_id; | |
127 | legacy_client_fscid = rhs.legacy_client_fscid; | |
128 | compat = rhs.compat; | |
129 | enable_multiple = rhs.enable_multiple; | |
130 | mds_roles = rhs.mds_roles; | |
131 | standby_daemons = rhs.standby_daemons; | |
132 | standby_epochs = rhs.standby_epochs; | |
133 | ||
b32b8144 | 134 | filesystems.clear(); |
7c673cae FG |
135 | for (const auto &i : rhs.filesystems) { |
136 | const auto &fs = i.second; | |
137 | filesystems[fs->fscid] = std::make_shared<Filesystem>(*fs); | |
138 | } | |
139 | ||
140 | return *this; | |
141 | } | |
142 | ||
143 | const CompatSet &get_compat() const {return compat;} | |
144 | ||
145 | void set_enable_multiple(const bool v) | |
146 | { | |
147 | enable_multiple = v; | |
148 | if (true == v) { | |
149 | ever_enabled_multiple = true; | |
150 | } | |
151 | } | |
152 | ||
153 | bool get_enable_multiple() const | |
154 | { | |
155 | return enable_multiple; | |
156 | } | |
157 | ||
158 | void set_legacy_client_fscid(fs_cluster_id_t fscid) | |
159 | { | |
160 | assert(fscid == FS_CLUSTER_ID_NONE || filesystems.count(fscid)); | |
161 | legacy_client_fscid = fscid; | |
162 | } | |
163 | ||
164 | fs_cluster_id_t get_legacy_client_fscid() const | |
165 | { | |
166 | return legacy_client_fscid; | |
167 | } | |
168 | ||
169 | /** | |
170 | * Get state of all daemons (for all filesystems, including all standbys) | |
171 | */ | |
172 | std::map<mds_gid_t, MDSMap::mds_info_t> get_mds_info() const | |
173 | { | |
174 | std::map<mds_gid_t, MDSMap::mds_info_t> result; | |
175 | for (const auto &i : standby_daemons) { | |
176 | result[i.first] = i.second; | |
177 | } | |
178 | ||
179 | for (const auto &i : filesystems) { | |
180 | const auto &fs_info = i.second->mds_map.get_mds_info(); | |
181 | for (const auto &j : fs_info) { | |
182 | result[j.first] = j.second; | |
183 | } | |
184 | } | |
185 | ||
186 | return result; | |
187 | } | |
188 | ||
189 | /** | |
190 | * Resolve daemon name to GID | |
191 | */ | |
94b18763 | 192 | mds_gid_t find_mds_gid_by_name(boost::string_view s) const |
7c673cae FG |
193 | { |
194 | const auto info = get_mds_info(); | |
195 | for (const auto &p : info) { | |
196 | if (p.second.name == s) { | |
197 | return p.first; | |
198 | } | |
199 | } | |
200 | return MDS_GID_NONE; | |
201 | } | |
202 | ||
203 | /** | |
204 | * Resolve daemon name to status | |
205 | */ | |
94b18763 | 206 | const MDSMap::mds_info_t* find_by_name(boost::string_view name) const |
7c673cae FG |
207 | { |
208 | std::map<mds_gid_t, MDSMap::mds_info_t> result; | |
209 | for (const auto &i : standby_daemons) { | |
210 | if (i.second.name == name) { | |
211 | return &(i.second); | |
212 | } | |
213 | } | |
214 | ||
215 | for (const auto &i : filesystems) { | |
216 | const auto &fs_info = i.second->mds_map.get_mds_info(); | |
217 | for (const auto &j : fs_info) { | |
218 | if (j.second.name == name) { | |
219 | return &(j.second); | |
220 | } | |
221 | } | |
222 | } | |
223 | ||
224 | return nullptr; | |
225 | } | |
226 | ||
227 | /** | |
228 | * Does a daemon exist with this GID? | |
229 | */ | |
230 | bool gid_exists(mds_gid_t gid) const | |
231 | { | |
232 | return mds_roles.count(gid) > 0; | |
233 | } | |
234 | ||
235 | /** | |
236 | * Does a daemon with this GID exist, *and* have an MDS rank assigned? | |
237 | */ | |
238 | bool gid_has_rank(mds_gid_t gid) const | |
239 | { | |
240 | return gid_exists(gid) && mds_roles.at(gid) != FS_CLUSTER_ID_NONE; | |
241 | } | |
242 | ||
243 | /** | |
244 | * Insert a new MDS daemon, as a standby | |
245 | */ | |
246 | void insert(const MDSMap::mds_info_t &new_info); | |
247 | ||
248 | /** | |
249 | * Assign an MDS cluster standby replay rank to a standby daemon | |
250 | */ | |
251 | void assign_standby_replay( | |
252 | const mds_gid_t standby_gid, | |
253 | const fs_cluster_id_t leader_ns, | |
254 | const mds_rank_t leader_rank); | |
255 | ||
256 | /** | |
257 | * Assign an MDS cluster rank to a standby daemon | |
258 | */ | |
259 | void promote( | |
260 | mds_gid_t standby_gid, | |
261 | const std::shared_ptr<Filesystem> &filesystem, | |
262 | mds_rank_t assigned_rank); | |
263 | ||
264 | /** | |
265 | * A daemon reports that it is STATE_STOPPED: remove it, | |
266 | * and the rank it held. | |
267 | * | |
268 | * @returns a list of any additional GIDs that were removed from the map | |
269 | * as a side effect (like standby replays) | |
270 | */ | |
271 | std::list<mds_gid_t> stop(mds_gid_t who); | |
272 | ||
273 | /** | |
274 | * The rank held by 'who', if any, is to be relinquished, and | |
275 | * the state for the daemon GID is to be forgotten. | |
276 | */ | |
277 | void erase(mds_gid_t who, epoch_t blacklist_epoch); | |
278 | ||
279 | /** | |
280 | * Update to indicate that the rank held by 'who' is damaged | |
281 | */ | |
282 | void damaged(mds_gid_t who, epoch_t blacklist_epoch); | |
283 | ||
284 | /** | |
285 | * Update to indicate that the rank `rank` is to be removed | |
286 | * from the damaged list of the filesystem `fscid` | |
287 | */ | |
288 | bool undamaged(const fs_cluster_id_t fscid, const mds_rank_t rank); | |
289 | ||
290 | /** | |
291 | * Initialize a Filesystem and assign a fscid. Update legacy_client_fscid | |
292 | * to point to the new filesystem if it's the only one. | |
293 | * | |
294 | * Caller must already have validated all arguments vs. the existing | |
295 | * FSMap and OSDMap contents. | |
296 | */ | |
94b18763 | 297 | void create_filesystem(boost::string_view name, |
7c673cae FG |
298 | int64_t metadata_pool, int64_t data_pool, |
299 | uint64_t features); | |
300 | ||
301 | /** | |
302 | * Remove the filesystem (it must exist). Caller should already | |
303 | * have failed out any MDSs that were assigned to the filesystem. | |
304 | */ | |
305 | void erase_filesystem(fs_cluster_id_t fscid) | |
306 | { | |
307 | filesystems.erase(fscid); | |
308 | } | |
309 | ||
310 | /** | |
311 | * Reset all the state information (not configuration information) | |
312 | * in a particular filesystem. Caller must have verified that | |
313 | * the filesystem already exists. | |
314 | */ | |
315 | void reset_filesystem(fs_cluster_id_t fscid); | |
316 | ||
317 | /** | |
318 | * Mutator helper for Filesystem objects: expose a non-const | |
319 | * Filesystem pointer to `fn` and update epochs appropriately. | |
320 | */ | |
321 | void modify_filesystem( | |
322 | const fs_cluster_id_t fscid, | |
323 | std::function<void(std::shared_ptr<Filesystem> )> fn) | |
324 | { | |
325 | auto fs = filesystems.at(fscid); | |
326 | fn(fs); | |
327 | fs->mds_map.epoch = epoch; | |
328 | } | |
329 | ||
330 | /** | |
331 | * Apply a mutation to the mds_info_t structure for a particular | |
332 | * daemon (identified by GID), and make appropriate updates to epochs. | |
333 | */ | |
334 | void modify_daemon( | |
335 | mds_gid_t who, | |
336 | std::function<void(MDSMap::mds_info_t *info)> fn) | |
337 | { | |
338 | if (mds_roles.at(who) == FS_CLUSTER_ID_NONE) { | |
339 | auto &info = standby_daemons.at(who); | |
340 | fn(&info); | |
341 | assert(info.state == MDSMap::STATE_STANDBY); | |
342 | standby_epochs[who] = epoch; | |
343 | } else { | |
344 | const auto &fs = filesystems[mds_roles.at(who)]; | |
345 | auto &info = fs->mds_map.mds_info.at(who); | |
346 | fn(&info); | |
347 | ||
348 | fs->mds_map.epoch = epoch; | |
349 | } | |
350 | } | |
351 | ||
352 | /** | |
353 | * Given that gid exists in a filesystem or as a standby, return | |
354 | * a reference to its info. | |
355 | */ | |
356 | const MDSMap::mds_info_t& get_info_gid(mds_gid_t gid) const | |
357 | { | |
358 | auto fscid = mds_roles.at(gid); | |
359 | if (fscid == FS_CLUSTER_ID_NONE) { | |
360 | return standby_daemons.at(gid); | |
361 | } else { | |
362 | return filesystems.at(fscid)->mds_map.mds_info.at(gid); | |
363 | } | |
364 | } | |
365 | ||
366 | /** | |
367 | * A daemon has told us it's compat, and it's too new | |
368 | * for the one we had previously. Impose the new one | |
369 | * on all filesystems. | |
370 | */ | |
371 | void update_compat(const CompatSet &c) | |
372 | { | |
373 | // We could do something more complicated here to enable | |
374 | // different filesystems to be served by different MDS versions, | |
375 | // but this is a lot simpler because it doesn't require us to | |
376 | // track the compat versions for standby daemons. | |
377 | compat = c; | |
378 | for (const auto &i : filesystems) { | |
379 | MDSMap &mds_map = i.second->mds_map; | |
380 | mds_map.compat = c; | |
381 | mds_map.epoch = epoch; | |
382 | } | |
383 | } | |
384 | ||
385 | std::shared_ptr<const Filesystem> get_legacy_filesystem() | |
386 | { | |
387 | if (legacy_client_fscid == FS_CLUSTER_ID_NONE) { | |
388 | return nullptr; | |
389 | } else { | |
390 | return filesystems.at(legacy_client_fscid); | |
391 | } | |
392 | } | |
393 | ||
394 | /** | |
395 | * A daemon has informed us of its offload targets | |
396 | */ | |
397 | void update_export_targets(mds_gid_t who, const std::set<mds_rank_t> targets) | |
398 | { | |
399 | auto fscid = mds_roles.at(who); | |
400 | modify_filesystem(fscid, [who, &targets](std::shared_ptr<Filesystem> fs) { | |
401 | fs->mds_map.mds_info.at(who).export_targets = targets; | |
402 | }); | |
403 | } | |
404 | ||
405 | epoch_t get_epoch() const { return epoch; } | |
406 | void inc_epoch() { epoch++; } | |
407 | ||
408 | size_t filesystem_count() const {return filesystems.size();} | |
409 | bool filesystem_exists(fs_cluster_id_t fscid) const {return filesystems.count(fscid) > 0;} | |
1adf2230 | 410 | const std::shared_ptr<Filesystem> &get_filesystem(fs_cluster_id_t fscid) {return filesystems.at(fscid);} |
7c673cae FG |
411 | std::shared_ptr<const Filesystem> get_filesystem(fs_cluster_id_t fscid) const {return std::const_pointer_cast<const Filesystem>(filesystems.at(fscid));} |
412 | std::shared_ptr<const Filesystem> get_filesystem(void) const {return std::const_pointer_cast<const Filesystem>(filesystems.begin()->second);} | |
94b18763 | 413 | std::shared_ptr<const Filesystem> get_filesystem(boost::string_view name) const |
7c673cae FG |
414 | { |
415 | for (const auto &i : filesystems) { | |
416 | if (i.second->mds_map.fs_name == name) { | |
417 | return std::const_pointer_cast<const Filesystem>(i.second); | |
418 | } | |
419 | } | |
420 | return nullptr; | |
421 | } | |
422 | std::list<std::shared_ptr<const Filesystem> > get_filesystems(void) const | |
423 | { | |
424 | std::list<std::shared_ptr<const Filesystem> > ret; | |
425 | for (const auto &i : filesystems) { | |
426 | ret.push_back(std::const_pointer_cast<const Filesystem>(i.second)); | |
427 | } | |
428 | return ret; | |
429 | } | |
430 | ||
431 | int parse_filesystem( | |
94b18763 | 432 | boost::string_view ns_str, |
7c673cae FG |
433 | std::shared_ptr<const Filesystem> *result |
434 | ) const; | |
435 | ||
436 | int parse_role( | |
94b18763 | 437 | boost::string_view role_str, |
7c673cae FG |
438 | mds_role_t *role, |
439 | std::ostream &ss) const; | |
440 | ||
441 | /** | |
442 | * Return true if this pool is in use by any of the filesystems | |
443 | */ | |
444 | bool pool_in_use(int64_t poolid) const { | |
445 | for (auto const &i : filesystems) { | |
446 | if (i.second->mds_map.is_data_pool(poolid) | |
447 | || i.second->mds_map.metadata_pool == poolid) { | |
448 | return true; | |
449 | } | |
450 | } | |
451 | return false; | |
452 | } | |
453 | ||
94b18763 | 454 | mds_gid_t find_standby_for(mds_role_t mds, boost::string_view name) const; |
7c673cae | 455 | |
31f18b77 | 456 | mds_gid_t find_unused_for(mds_role_t mds, bool force_standby_active) const; |
7c673cae | 457 | |
94b18763 | 458 | mds_gid_t find_replacement_for(mds_role_t mds, boost::string_view name, |
7c673cae FG |
459 | bool force_standby_active) const; |
460 | ||
461 | void get_health(list<pair<health_status_t,std::string> >& summary, | |
462 | list<pair<health_status_t,std::string> > *detail) const; | |
463 | ||
224ce89b WB |
464 | void get_health_checks(health_check_map_t *checks) const; |
465 | ||
7c673cae FG |
466 | bool check_health(void); |
467 | ||
468 | /** | |
469 | * Assert that the FSMap, Filesystem, MDSMap, mds_info_t relations are | |
470 | * all self-consistent. | |
471 | */ | |
472 | void sanity() const; | |
473 | ||
474 | void encode(bufferlist& bl, uint64_t features) const; | |
475 | void decode(bufferlist::iterator& p); | |
476 | void decode(bufferlist& bl) { | |
477 | bufferlist::iterator p = bl.begin(); | |
478 | decode(p); | |
479 | } | |
3efd9988 | 480 | void sanitize(std::function<bool(int64_t pool)> pool_exists); |
7c673cae FG |
481 | |
482 | void print(ostream& out) const; | |
483 | void print_summary(Formatter *f, ostream *out) const; | |
484 | ||
485 | void dump(Formatter *f) const; | |
486 | static void generate_test_instances(list<FSMap*>& ls); | |
487 | }; | |
488 | WRITE_CLASS_ENCODER_FEATURES(FSMap) | |
489 | ||
490 | inline ostream& operator<<(ostream& out, const FSMap& m) { | |
491 | m.print_summary(NULL, &out); | |
492 | return out; | |
493 | } | |
494 | ||
495 | #endif |