]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
2 | // vim: ts=8 sw=2 smarttab | |
3 | /* | |
4 | * Ceph - scalable distributed file system | |
5 | * | |
6 | * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net> | |
7 | * | |
8 | * This is free software; you can redistribute it and/or | |
9 | * modify it under the terms of the GNU Lesser General Public | |
10 | * License version 2.1, as published by the Free Software | |
11 | * Foundation. See file COPYING. | |
12 | * | |
13 | */ | |
14 | ||
15 | ||
16 | #ifndef CEPH_FSMAP_H | |
17 | #define CEPH_FSMAP_H | |
18 | ||
94b18763 FG |
19 | #include <map> |
20 | #include <set> | |
21 | #include <string> | |
22 | #include <boost/utility/string_view.hpp> | |
23 | ||
7c673cae FG |
24 | #include <errno.h> |
25 | ||
26 | #include "include/types.h" | |
27 | #include "common/Clock.h" | |
28 | #include "msg/Message.h" | |
29 | #include "mds/MDSMap.h" | |
30 | ||
7c673cae FG |
31 | #include "common/config.h" |
32 | ||
33 | #include "include/CompatSet.h" | |
34 | #include "include/ceph_features.h" | |
35 | #include "common/Formatter.h" | |
36 | #include "mds/mdstypes.h" | |
37 | ||
38 | class CephContext; | |
224ce89b | 39 | class health_check_map_t; |
7c673cae | 40 | |
7c673cae FG |
41 | #define MDS_FS_NAME_DEFAULT "cephfs" |
42 | ||
43 | /** | |
44 | * The MDSMap and any additional fields describing a particular | |
45 | * filesystem (a unique fs_cluster_id_t). | |
46 | */ | |
47 | class Filesystem | |
48 | { | |
49 | public: | |
50 | fs_cluster_id_t fscid; | |
51 | MDSMap mds_map; | |
52 | ||
53 | void encode(bufferlist& bl, uint64_t features) const; | |
54 | void decode(bufferlist::iterator& p); | |
55 | ||
56 | Filesystem() | |
57 | : | |
58 | fscid(FS_CLUSTER_ID_NONE) | |
59 | { | |
60 | } | |
61 | ||
62 | void dump(Formatter *f) const; | |
63 | void print(std::ostream& out) const; | |
64 | ||
65 | /** | |
66 | * Return true if a daemon is already assigned as | |
67 | * STANDBY_REPLAY for the gid `who` | |
68 | */ | |
69 | bool has_standby_replay(mds_gid_t who) const | |
70 | { | |
71 | for (const auto &i : mds_map.mds_info) { | |
72 | const auto &info = i.second; | |
73 | if (info.state == MDSMap::STATE_STANDBY_REPLAY | |
74 | && info.rank == mds_map.mds_info.at(who).rank) { | |
75 | return true; | |
76 | } | |
77 | } | |
78 | ||
79 | return false; | |
80 | } | |
81 | }; | |
82 | WRITE_CLASS_ENCODER_FEATURES(Filesystem) | |
83 | ||
84 | class FSMap { | |
85 | protected: | |
86 | epoch_t epoch; | |
87 | uint64_t next_filesystem_id; | |
88 | fs_cluster_id_t legacy_client_fscid; | |
89 | CompatSet compat; | |
90 | bool enable_multiple; | |
91 | bool ever_enabled_multiple; // < the cluster had multiple MDSes enabled once | |
92 | ||
93 | std::map<fs_cluster_id_t, std::shared_ptr<Filesystem> > filesystems; | |
94 | ||
95 | // Remember which Filesystem an MDS daemon's info is stored in | |
96 | // (or in standby_daemons for FS_CLUSTER_ID_NONE) | |
97 | std::map<mds_gid_t, fs_cluster_id_t> mds_roles; | |
98 | ||
99 | // For MDS daemons not yet assigned to a Filesystem | |
100 | std::map<mds_gid_t, MDSMap::mds_info_t> standby_daemons; | |
101 | std::map<mds_gid_t, epoch_t> standby_epochs; | |
102 | ||
103 | public: | |
104 | ||
105 | friend class MDSMonitor; | |
28e407b8 | 106 | friend class PaxosFSMap; |
7c673cae FG |
107 | |
108 | FSMap() | |
109 | : epoch(0), | |
110 | next_filesystem_id(FS_CLUSTER_ID_ANONYMOUS + 1), | |
111 | legacy_client_fscid(FS_CLUSTER_ID_NONE), | |
112 | compat(get_mdsmap_compat_set_default()), | |
113 | enable_multiple(false), ever_enabled_multiple(false) | |
114 | { } | |
115 | ||
116 | FSMap(const FSMap &rhs) | |
117 | : | |
118 | epoch(rhs.epoch), | |
119 | next_filesystem_id(rhs.next_filesystem_id), | |
120 | legacy_client_fscid(rhs.legacy_client_fscid), | |
121 | compat(rhs.compat), | |
122 | enable_multiple(rhs.enable_multiple), | |
123 | ever_enabled_multiple(rhs.ever_enabled_multiple), | |
124 | mds_roles(rhs.mds_roles), | |
125 | standby_daemons(rhs.standby_daemons), | |
126 | standby_epochs(rhs.standby_epochs) | |
127 | { | |
b32b8144 | 128 | filesystems.clear(); |
7c673cae FG |
129 | for (const auto &i : rhs.filesystems) { |
130 | const auto &fs = i.second; | |
131 | filesystems[fs->fscid] = std::make_shared<Filesystem>(*fs); | |
132 | } | |
133 | } | |
134 | ||
135 | FSMap &operator=(const FSMap &rhs) | |
136 | { | |
137 | epoch = rhs.epoch; | |
138 | next_filesystem_id = rhs.next_filesystem_id; | |
139 | legacy_client_fscid = rhs.legacy_client_fscid; | |
140 | compat = rhs.compat; | |
141 | enable_multiple = rhs.enable_multiple; | |
142 | mds_roles = rhs.mds_roles; | |
143 | standby_daemons = rhs.standby_daemons; | |
144 | standby_epochs = rhs.standby_epochs; | |
145 | ||
b32b8144 | 146 | filesystems.clear(); |
7c673cae FG |
147 | for (const auto &i : rhs.filesystems) { |
148 | const auto &fs = i.second; | |
149 | filesystems[fs->fscid] = std::make_shared<Filesystem>(*fs); | |
150 | } | |
151 | ||
152 | return *this; | |
153 | } | |
154 | ||
155 | const CompatSet &get_compat() const {return compat;} | |
156 | ||
157 | void set_enable_multiple(const bool v) | |
158 | { | |
159 | enable_multiple = v; | |
160 | if (true == v) { | |
161 | ever_enabled_multiple = true; | |
162 | } | |
163 | } | |
164 | ||
165 | bool get_enable_multiple() const | |
166 | { | |
167 | return enable_multiple; | |
168 | } | |
169 | ||
170 | void set_legacy_client_fscid(fs_cluster_id_t fscid) | |
171 | { | |
172 | assert(fscid == FS_CLUSTER_ID_NONE || filesystems.count(fscid)); | |
173 | legacy_client_fscid = fscid; | |
174 | } | |
175 | ||
176 | fs_cluster_id_t get_legacy_client_fscid() const | |
177 | { | |
178 | return legacy_client_fscid; | |
179 | } | |
180 | ||
181 | /** | |
182 | * Get state of all daemons (for all filesystems, including all standbys) | |
183 | */ | |
184 | std::map<mds_gid_t, MDSMap::mds_info_t> get_mds_info() const | |
185 | { | |
186 | std::map<mds_gid_t, MDSMap::mds_info_t> result; | |
187 | for (const auto &i : standby_daemons) { | |
188 | result[i.first] = i.second; | |
189 | } | |
190 | ||
191 | for (const auto &i : filesystems) { | |
192 | const auto &fs_info = i.second->mds_map.get_mds_info(); | |
193 | for (const auto &j : fs_info) { | |
194 | result[j.first] = j.second; | |
195 | } | |
196 | } | |
197 | ||
198 | return result; | |
199 | } | |
200 | ||
201 | /** | |
202 | * Resolve daemon name to GID | |
203 | */ | |
94b18763 | 204 | mds_gid_t find_mds_gid_by_name(boost::string_view s) const |
7c673cae FG |
205 | { |
206 | const auto info = get_mds_info(); | |
207 | for (const auto &p : info) { | |
208 | if (p.second.name == s) { | |
209 | return p.first; | |
210 | } | |
211 | } | |
212 | return MDS_GID_NONE; | |
213 | } | |
214 | ||
215 | /** | |
216 | * Resolve daemon name to status | |
217 | */ | |
94b18763 | 218 | const MDSMap::mds_info_t* find_by_name(boost::string_view name) const |
7c673cae FG |
219 | { |
220 | std::map<mds_gid_t, MDSMap::mds_info_t> result; | |
221 | for (const auto &i : standby_daemons) { | |
222 | if (i.second.name == name) { | |
223 | return &(i.second); | |
224 | } | |
225 | } | |
226 | ||
227 | for (const auto &i : filesystems) { | |
228 | const auto &fs_info = i.second->mds_map.get_mds_info(); | |
229 | for (const auto &j : fs_info) { | |
230 | if (j.second.name == name) { | |
231 | return &(j.second); | |
232 | } | |
233 | } | |
234 | } | |
235 | ||
236 | return nullptr; | |
237 | } | |
238 | ||
239 | /** | |
240 | * Does a daemon exist with this GID? | |
241 | */ | |
242 | bool gid_exists(mds_gid_t gid) const | |
243 | { | |
244 | return mds_roles.count(gid) > 0; | |
245 | } | |
246 | ||
247 | /** | |
248 | * Does a daemon with this GID exist, *and* have an MDS rank assigned? | |
249 | */ | |
250 | bool gid_has_rank(mds_gid_t gid) const | |
251 | { | |
252 | return gid_exists(gid) && mds_roles.at(gid) != FS_CLUSTER_ID_NONE; | |
253 | } | |
254 | ||
255 | /** | |
256 | * Insert a new MDS daemon, as a standby | |
257 | */ | |
258 | void insert(const MDSMap::mds_info_t &new_info); | |
259 | ||
260 | /** | |
261 | * Assign an MDS cluster standby replay rank to a standby daemon | |
262 | */ | |
263 | void assign_standby_replay( | |
264 | const mds_gid_t standby_gid, | |
265 | const fs_cluster_id_t leader_ns, | |
266 | const mds_rank_t leader_rank); | |
267 | ||
268 | /** | |
269 | * Assign an MDS cluster rank to a standby daemon | |
270 | */ | |
271 | void promote( | |
272 | mds_gid_t standby_gid, | |
273 | const std::shared_ptr<Filesystem> &filesystem, | |
274 | mds_rank_t assigned_rank); | |
275 | ||
276 | /** | |
277 | * A daemon reports that it is STATE_STOPPED: remove it, | |
278 | * and the rank it held. | |
279 | * | |
280 | * @returns a list of any additional GIDs that were removed from the map | |
281 | * as a side effect (like standby replays) | |
282 | */ | |
283 | std::list<mds_gid_t> stop(mds_gid_t who); | |
284 | ||
285 | /** | |
286 | * The rank held by 'who', if any, is to be relinquished, and | |
287 | * the state for the daemon GID is to be forgotten. | |
288 | */ | |
289 | void erase(mds_gid_t who, epoch_t blacklist_epoch); | |
290 | ||
291 | /** | |
292 | * Update to indicate that the rank held by 'who' is damaged | |
293 | */ | |
294 | void damaged(mds_gid_t who, epoch_t blacklist_epoch); | |
295 | ||
296 | /** | |
297 | * Update to indicate that the rank `rank` is to be removed | |
298 | * from the damaged list of the filesystem `fscid` | |
299 | */ | |
300 | bool undamaged(const fs_cluster_id_t fscid, const mds_rank_t rank); | |
301 | ||
302 | /** | |
303 | * Initialize a Filesystem and assign a fscid. Update legacy_client_fscid | |
304 | * to point to the new filesystem if it's the only one. | |
305 | * | |
306 | * Caller must already have validated all arguments vs. the existing | |
307 | * FSMap and OSDMap contents. | |
308 | */ | |
94b18763 | 309 | void create_filesystem(boost::string_view name, |
7c673cae FG |
310 | int64_t metadata_pool, int64_t data_pool, |
311 | uint64_t features); | |
312 | ||
313 | /** | |
314 | * Remove the filesystem (it must exist). Caller should already | |
315 | * have failed out any MDSs that were assigned to the filesystem. | |
316 | */ | |
317 | void erase_filesystem(fs_cluster_id_t fscid) | |
318 | { | |
319 | filesystems.erase(fscid); | |
320 | } | |
321 | ||
322 | /** | |
323 | * Reset all the state information (not configuration information) | |
324 | * in a particular filesystem. Caller must have verified that | |
325 | * the filesystem already exists. | |
326 | */ | |
327 | void reset_filesystem(fs_cluster_id_t fscid); | |
328 | ||
329 | /** | |
330 | * Mutator helper for Filesystem objects: expose a non-const | |
331 | * Filesystem pointer to `fn` and update epochs appropriately. | |
332 | */ | |
333 | void modify_filesystem( | |
334 | const fs_cluster_id_t fscid, | |
335 | std::function<void(std::shared_ptr<Filesystem> )> fn) | |
336 | { | |
337 | auto fs = filesystems.at(fscid); | |
338 | fn(fs); | |
339 | fs->mds_map.epoch = epoch; | |
340 | } | |
341 | ||
342 | /** | |
343 | * Apply a mutation to the mds_info_t structure for a particular | |
344 | * daemon (identified by GID), and make appropriate updates to epochs. | |
345 | */ | |
346 | void modify_daemon( | |
347 | mds_gid_t who, | |
348 | std::function<void(MDSMap::mds_info_t *info)> fn) | |
349 | { | |
350 | if (mds_roles.at(who) == FS_CLUSTER_ID_NONE) { | |
351 | auto &info = standby_daemons.at(who); | |
352 | fn(&info); | |
353 | assert(info.state == MDSMap::STATE_STANDBY); | |
354 | standby_epochs[who] = epoch; | |
355 | } else { | |
356 | const auto &fs = filesystems[mds_roles.at(who)]; | |
357 | auto &info = fs->mds_map.mds_info.at(who); | |
358 | fn(&info); | |
359 | ||
360 | fs->mds_map.epoch = epoch; | |
361 | } | |
362 | } | |
363 | ||
364 | /** | |
365 | * Given that gid exists in a filesystem or as a standby, return | |
366 | * a reference to its info. | |
367 | */ | |
368 | const MDSMap::mds_info_t& get_info_gid(mds_gid_t gid) const | |
369 | { | |
370 | auto fscid = mds_roles.at(gid); | |
371 | if (fscid == FS_CLUSTER_ID_NONE) { | |
372 | return standby_daemons.at(gid); | |
373 | } else { | |
374 | return filesystems.at(fscid)->mds_map.mds_info.at(gid); | |
375 | } | |
376 | } | |
377 | ||
378 | /** | |
379 | * A daemon has told us it's compat, and it's too new | |
380 | * for the one we had previously. Impose the new one | |
381 | * on all filesystems. | |
382 | */ | |
383 | void update_compat(const CompatSet &c) | |
384 | { | |
385 | // We could do something more complicated here to enable | |
386 | // different filesystems to be served by different MDS versions, | |
387 | // but this is a lot simpler because it doesn't require us to | |
388 | // track the compat versions for standby daemons. | |
389 | compat = c; | |
390 | for (const auto &i : filesystems) { | |
391 | MDSMap &mds_map = i.second->mds_map; | |
392 | mds_map.compat = c; | |
393 | mds_map.epoch = epoch; | |
394 | } | |
395 | } | |
396 | ||
397 | std::shared_ptr<const Filesystem> get_legacy_filesystem() | |
398 | { | |
399 | if (legacy_client_fscid == FS_CLUSTER_ID_NONE) { | |
400 | return nullptr; | |
401 | } else { | |
402 | return filesystems.at(legacy_client_fscid); | |
403 | } | |
404 | } | |
405 | ||
406 | /** | |
407 | * A daemon has informed us of its offload targets | |
408 | */ | |
409 | void update_export_targets(mds_gid_t who, const std::set<mds_rank_t> targets) | |
410 | { | |
411 | auto fscid = mds_roles.at(who); | |
412 | modify_filesystem(fscid, [who, &targets](std::shared_ptr<Filesystem> fs) { | |
413 | fs->mds_map.mds_info.at(who).export_targets = targets; | |
414 | }); | |
415 | } | |
416 | ||
417 | epoch_t get_epoch() const { return epoch; } | |
418 | void inc_epoch() { epoch++; } | |
419 | ||
420 | size_t filesystem_count() const {return filesystems.size();} | |
421 | bool filesystem_exists(fs_cluster_id_t fscid) const {return filesystems.count(fscid) > 0;} | |
422 | std::shared_ptr<const Filesystem> get_filesystem(fs_cluster_id_t fscid) const {return std::const_pointer_cast<const Filesystem>(filesystems.at(fscid));} | |
423 | std::shared_ptr<const Filesystem> get_filesystem(void) const {return std::const_pointer_cast<const Filesystem>(filesystems.begin()->second);} | |
94b18763 | 424 | std::shared_ptr<const Filesystem> get_filesystem(boost::string_view name) const |
7c673cae FG |
425 | { |
426 | for (const auto &i : filesystems) { | |
427 | if (i.second->mds_map.fs_name == name) { | |
428 | return std::const_pointer_cast<const Filesystem>(i.second); | |
429 | } | |
430 | } | |
431 | return nullptr; | |
432 | } | |
433 | std::list<std::shared_ptr<const Filesystem> > get_filesystems(void) const | |
434 | { | |
435 | std::list<std::shared_ptr<const Filesystem> > ret; | |
436 | for (const auto &i : filesystems) { | |
437 | ret.push_back(std::const_pointer_cast<const Filesystem>(i.second)); | |
438 | } | |
439 | return ret; | |
440 | } | |
441 | ||
442 | int parse_filesystem( | |
94b18763 | 443 | boost::string_view ns_str, |
7c673cae FG |
444 | std::shared_ptr<const Filesystem> *result |
445 | ) const; | |
446 | ||
447 | int parse_role( | |
94b18763 | 448 | boost::string_view role_str, |
7c673cae FG |
449 | mds_role_t *role, |
450 | std::ostream &ss) const; | |
451 | ||
452 | /** | |
453 | * Return true if this pool is in use by any of the filesystems | |
454 | */ | |
455 | bool pool_in_use(int64_t poolid) const { | |
456 | for (auto const &i : filesystems) { | |
457 | if (i.second->mds_map.is_data_pool(poolid) | |
458 | || i.second->mds_map.metadata_pool == poolid) { | |
459 | return true; | |
460 | } | |
461 | } | |
462 | return false; | |
463 | } | |
464 | ||
94b18763 | 465 | mds_gid_t find_standby_for(mds_role_t mds, boost::string_view name) const; |
7c673cae | 466 | |
31f18b77 | 467 | mds_gid_t find_unused_for(mds_role_t mds, bool force_standby_active) const; |
7c673cae | 468 | |
94b18763 | 469 | mds_gid_t find_replacement_for(mds_role_t mds, boost::string_view name, |
7c673cae FG |
470 | bool force_standby_active) const; |
471 | ||
472 | void get_health(list<pair<health_status_t,std::string> >& summary, | |
473 | list<pair<health_status_t,std::string> > *detail) const; | |
474 | ||
224ce89b WB |
475 | void get_health_checks(health_check_map_t *checks) const; |
476 | ||
7c673cae FG |
477 | bool check_health(void); |
478 | ||
479 | /** | |
480 | * Assert that the FSMap, Filesystem, MDSMap, mds_info_t relations are | |
481 | * all self-consistent. | |
482 | */ | |
483 | void sanity() const; | |
484 | ||
485 | void encode(bufferlist& bl, uint64_t features) const; | |
486 | void decode(bufferlist::iterator& p); | |
487 | void decode(bufferlist& bl) { | |
488 | bufferlist::iterator p = bl.begin(); | |
489 | decode(p); | |
490 | } | |
3efd9988 | 491 | void sanitize(std::function<bool(int64_t pool)> pool_exists); |
7c673cae FG |
492 | |
493 | void print(ostream& out) const; | |
494 | void print_summary(Formatter *f, ostream *out) const; | |
495 | ||
496 | void dump(Formatter *f) const; | |
497 | static void generate_test_instances(list<FSMap*>& ls); | |
498 | }; | |
499 | WRITE_CLASS_ENCODER_FEATURES(FSMap) | |
500 | ||
501 | inline ostream& operator<<(ostream& out, const FSMap& m) { | |
502 | m.print_summary(NULL, &out); | |
503 | return out; | |
504 | } | |
505 | ||
506 | #endif |