]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
2 | // vim: ts=8 sw=2 smarttab | |
3 | /* | |
4 | * Ceph - scalable distributed file system | |
5 | * | |
6 | * Copyright (C) 2015 Red Hat | |
7 | * | |
8 | * This is free software; you can redistribute it and/or | |
9 | * modify it under the terms of the GNU Lesser General Public | |
10 | * License version 2.1, as published by the Free Software | |
11 | * Foundation. See file COPYING. | |
12 | * | |
13 | */ | |
14 | ||
15 | #ifndef MDS_RANK_H_ | |
16 | #define MDS_RANK_H_ | |
17 | ||
11fdf7f2 | 18 | #include <string_view> |
94b18763 | 19 | |
7c673cae FG |
20 | #include "common/DecayCounter.h" |
21 | #include "common/LogClient.h" | |
22 | #include "common/Timer.h" | |
23 | #include "common/TrackedOp.h" | |
24 | ||
11fdf7f2 | 25 | #include "messages/MClientRequest.h" |
7c673cae | 26 | #include "messages/MCommand.h" |
11fdf7f2 | 27 | #include "messages/MMDSMap.h" |
7c673cae FG |
28 | |
29 | #include "Beacon.h" | |
30 | #include "DamageTable.h" | |
31 | #include "MDSMap.h" | |
32 | #include "SessionMap.h" | |
33 | #include "MDCache.h" | |
7c673cae | 34 | #include "MDLog.h" |
11fdf7f2 | 35 | #include "MDSContext.h" |
7c673cae | 36 | #include "PurgeQueue.h" |
91327a77 | 37 | #include "Server.h" |
7c673cae FG |
38 | #include "osdc/Journaler.h" |
39 | ||
40 | // Full .h import instead of forward declaration for PerfCounter, for the | |
41 | // benefit of those including this header and using MDSRank::logger | |
42 | #include "common/perf_counters.h" | |
43 | ||
44 | enum { | |
45 | l_mds_first = 2000, | |
46 | l_mds_request, | |
47 | l_mds_reply, | |
48 | l_mds_reply_latency, | |
49 | l_mds_forward, | |
50 | l_mds_dir_fetch, | |
51 | l_mds_dir_commit, | |
52 | l_mds_dir_split, | |
53 | l_mds_dir_merge, | |
54 | l_mds_inode_max, | |
55 | l_mds_inodes, | |
56 | l_mds_inodes_top, | |
57 | l_mds_inodes_bottom, | |
58 | l_mds_inodes_pin_tail, | |
59 | l_mds_inodes_pinned, | |
60 | l_mds_inodes_expired, | |
61 | l_mds_inodes_with_caps, | |
62 | l_mds_caps, | |
63 | l_mds_subtrees, | |
64 | l_mds_traverse, | |
65 | l_mds_traverse_hit, | |
66 | l_mds_traverse_forward, | |
67 | l_mds_traverse_discover, | |
68 | l_mds_traverse_dir_fetch, | |
69 | l_mds_traverse_remote_ino, | |
70 | l_mds_traverse_lock, | |
71 | l_mds_load_cent, | |
72 | l_mds_dispatch_queue_len, | |
73 | l_mds_exported, | |
74 | l_mds_exported_inodes, | |
75 | l_mds_imported, | |
76 | l_mds_imported_inodes, | |
11fdf7f2 TL |
77 | l_mds_openino_dir_fetch, |
78 | l_mds_openino_backtrace_fetch, | |
79 | l_mds_openino_peer_discover, | |
eafe8130 TL |
80 | l_mds_root_rfiles, |
81 | l_mds_root_rbytes, | |
82 | l_mds_root_rsnaps, | |
7c673cae FG |
83 | l_mds_last, |
84 | }; | |
85 | ||
86 | // memory utilization | |
87 | enum { | |
88 | l_mdm_first = 2500, | |
89 | l_mdm_ino, | |
90 | l_mdm_inoa, | |
91 | l_mdm_inos, | |
92 | l_mdm_dir, | |
93 | l_mdm_dira, | |
94 | l_mdm_dirs, | |
95 | l_mdm_dn, | |
96 | l_mdm_dna, | |
97 | l_mdm_dns, | |
98 | l_mdm_cap, | |
99 | l_mdm_capa, | |
100 | l_mdm_caps, | |
101 | l_mdm_rss, | |
102 | l_mdm_heap, | |
7c673cae FG |
103 | l_mdm_last, |
104 | }; | |
105 | ||
106 | namespace ceph { | |
107 | struct heartbeat_handle_d; | |
108 | } | |
109 | ||
7c673cae FG |
110 | class Locker; |
111 | class MDCache; | |
112 | class MDLog; | |
113 | class MDBalancer; | |
114 | class InoTable; | |
115 | class SnapServer; | |
116 | class SnapClient; | |
117 | class MDSTableServer; | |
118 | class MDSTableClient; | |
119 | class Messenger; | |
120 | class Objecter; | |
121 | class MonClient; | |
122 | class Finisher; | |
7c673cae | 123 | class ScrubStack; |
f64942e4 | 124 | class C_MDS_Send_Command_Reply; |
11fdf7f2 | 125 | class C_ExecAndReply; |
7c673cae FG |
126 | |
127 | /** | |
128 | * The public part of this class's interface is what's exposed to all | |
129 | * the various subsystems (server, mdcache, etc), such as pointers | |
130 | * to the other subsystems, and message-sending calls. | |
131 | */ | |
132 | class MDSRank { | |
133 | protected: | |
134 | const mds_rank_t whoami; | |
135 | ||
136 | // Incarnation as seen in MDSMap at the point where a rank is | |
137 | // assigned. | |
138 | int incarnation; | |
139 | ||
140 | public: | |
f64942e4 AA |
141 | |
142 | friend class C_Flush_Journal; | |
143 | friend class C_Drop_Cache; | |
144 | ||
11fdf7f2 TL |
145 | friend class C_CacheDropExecAndReply; |
146 | friend class C_ScrubExecAndReply; | |
147 | friend class C_ScrubControlExecAndReply; | |
148 | ||
7c673cae FG |
149 | mds_rank_t get_nodeid() const { return whoami; } |
150 | int64_t get_metadata_pool(); | |
151 | ||
152 | // Reference to global MDS::mds_lock, so that users of MDSRank don't | |
153 | // carry around references to the outer MDS, and we can substitute | |
154 | // a separate lock here in future potentially. | |
155 | Mutex &mds_lock; | |
156 | ||
94b18763 FG |
157 | mono_time get_starttime() const { |
158 | return starttime; | |
159 | } | |
160 | chrono::duration<double> get_uptime() const { | |
161 | mono_time now = mono_clock::now(); | |
162 | return chrono::duration<double>(now-starttime); | |
163 | } | |
164 | ||
b32b8144 FG |
165 | class CephContext *cct; |
166 | ||
7c673cae FG |
167 | bool is_daemon_stopping() const; |
168 | ||
169 | // Reference to global cluster log client, just to avoid initialising | |
170 | // a separate one here. | |
171 | LogChannelRef &clog; | |
172 | ||
173 | // Reference to global timer utility, because MDSRank and MDSDaemon | |
174 | // currently both use the same mds_lock, so it makes sense for them | |
175 | // to share a timer. | |
176 | SafeTimer &timer; | |
177 | ||
11fdf7f2 | 178 | std::unique_ptr<MDSMap> &mdsmap; /* MDSDaemon::mdsmap */ |
7c673cae FG |
179 | |
180 | Objecter *objecter; | |
181 | ||
182 | // sub systems | |
183 | Server *server; | |
184 | MDCache *mdcache; | |
185 | Locker *locker; | |
186 | MDLog *mdlog; | |
187 | MDBalancer *balancer; | |
188 | ScrubStack *scrubstack; | |
189 | DamageTable damage_table; | |
190 | ||
191 | ||
192 | InoTable *inotable; | |
193 | ||
194 | SnapServer *snapserver; | |
195 | SnapClient *snapclient; | |
196 | ||
197 | MDSTableClient *get_table_client(int t); | |
198 | MDSTableServer *get_table_server(int t); | |
199 | ||
200 | SessionMap sessionmap; | |
201 | Session *get_session(client_t client) { | |
202 | return sessionmap.get_session(entity_name_t::CLIENT(client.v)); | |
203 | } | |
11fdf7f2 | 204 | Session *get_session(const Message::const_ref &m); |
7c673cae FG |
205 | |
206 | PerfCounters *logger, *mlogger; | |
207 | OpTracker op_tracker; | |
208 | ||
209 | // The last different state I held before current | |
210 | MDSMap::DaemonState last_state; | |
211 | // The state assigned to me by the MDSMap | |
212 | MDSMap::DaemonState state; | |
213 | ||
214 | bool cluster_degraded; | |
215 | ||
216 | MDSMap::DaemonState get_state() const { return state; } | |
217 | MDSMap::DaemonState get_want_state() const { return beacon.get_want_state(); } | |
218 | ||
219 | bool is_creating() const { return state == MDSMap::STATE_CREATING; } | |
220 | bool is_starting() const { return state == MDSMap::STATE_STARTING; } | |
221 | bool is_standby() const { return state == MDSMap::STATE_STANDBY; } | |
222 | bool is_replay() const { return state == MDSMap::STATE_REPLAY; } | |
223 | bool is_standby_replay() const { return state == MDSMap::STATE_STANDBY_REPLAY; } | |
224 | bool is_resolve() const { return state == MDSMap::STATE_RESOLVE; } | |
225 | bool is_reconnect() const { return state == MDSMap::STATE_RECONNECT; } | |
226 | bool is_rejoin() const { return state == MDSMap::STATE_REJOIN; } | |
227 | bool is_clientreplay() const { return state == MDSMap::STATE_CLIENTREPLAY; } | |
228 | bool is_active() const { return state == MDSMap::STATE_ACTIVE; } | |
229 | bool is_stopping() const { return state == MDSMap::STATE_STOPPING; } | |
230 | bool is_any_replay() const { return (is_replay() || is_standby_replay()); } | |
231 | bool is_stopped() const { return mdsmap->is_stopped(whoami); } | |
232 | bool is_cluster_degraded() const { return cluster_degraded; } | |
11fdf7f2 | 233 | bool allows_multimds_snaps() const { return mdsmap->allows_multimds_snaps(); } |
7c673cae | 234 | |
eafe8130 TL |
235 | bool is_cache_trimmable() const { |
236 | return is_clientreplay() || is_active() || is_stopping(); | |
237 | } | |
238 | ||
7c673cae FG |
239 | void handle_write_error(int err); |
240 | ||
11fdf7f2 | 241 | void handle_conf_change(const ConfigProxy& conf, |
7c673cae FG |
242 | const std::set <std::string> &changed) |
243 | { | |
91327a77 AA |
244 | sessionmap.handle_conf_change(conf, changed); |
245 | server->handle_conf_change(conf, changed); | |
11fdf7f2 | 246 | mdcache->handle_conf_change(conf, changed, *mdsmap); |
7c673cae FG |
247 | purge_queue.handle_conf_change(conf, changed, *mdsmap); |
248 | } | |
249 | ||
c07f9fc5 | 250 | void update_mlogger(); |
7c673cae FG |
251 | protected: |
252 | // Flag to indicate we entered shutdown: anyone seeing this to be true | |
253 | // after taking mds_lock must drop out. | |
254 | bool stopping; | |
255 | ||
256 | // PurgeQueue is only used by StrayManager, but it is owned by MDSRank | |
257 | // because its init/shutdown happens at the top level. | |
258 | PurgeQueue purge_queue; | |
259 | ||
260 | class ProgressThread : public Thread { | |
261 | MDSRank *mds; | |
262 | Cond cond; | |
263 | public: | |
264 | explicit ProgressThread(MDSRank *mds_) : mds(mds_) {} | |
265 | void * entry() override; | |
266 | void shutdown(); | |
267 | void signal() {cond.Signal();} | |
268 | } progress_thread; | |
269 | ||
11fdf7f2 TL |
270 | list<Message::const_ref> waiting_for_nolaggy; |
271 | MDSContext::que finished_queue; | |
7c673cae FG |
272 | // Dispatch, retry, queues |
273 | int dispatch_depth; | |
274 | void inc_dispatch_depth() { ++dispatch_depth; } | |
275 | void dec_dispatch_depth() { --dispatch_depth; } | |
11fdf7f2 TL |
276 | void retry_dispatch(const Message::const_ref &m); |
277 | bool handle_deferrable_message(const Message::const_ref &m); | |
7c673cae | 278 | void _advance_queues(); |
11fdf7f2 | 279 | bool _dispatch(const Message::const_ref &m, bool new_msg); |
7c673cae FG |
280 | |
281 | ceph::heartbeat_handle_d *hb; // Heartbeat for threads using mds_lock | |
282 | ||
11fdf7f2 | 283 | bool is_stale_message(const Message::const_ref &m) const; |
7c673cae FG |
284 | |
285 | map<mds_rank_t, version_t> peer_mdsmap_epoch; | |
286 | ||
287 | ceph_tid_t last_tid; // for mds-initiated requests (e.g. stray rename) | |
288 | ||
11fdf7f2 TL |
289 | MDSContext::vec waiting_for_active, waiting_for_replay, waiting_for_rejoin, |
290 | waiting_for_reconnect, waiting_for_resolve; | |
291 | MDSContext::vec waiting_for_any_client_connection; | |
292 | MDSContext::que replay_queue; | |
293 | bool replaying_requests_done = false; | |
294 | ||
295 | map<mds_rank_t, MDSContext::vec > waiting_for_active_peer; | |
296 | map<epoch_t, MDSContext::vec > waiting_for_mdsmap; | |
7c673cae FG |
297 | |
298 | epoch_t osd_epoch_barrier; | |
299 | ||
300 | // Const reference to the beacon so that we can behave differently | |
301 | // when it's laggy. | |
302 | Beacon &beacon; | |
303 | ||
304 | /** | |
305 | * Emit clog warnings for any ops reported as warnings by optracker | |
306 | */ | |
307 | void check_ops_in_flight(); | |
308 | ||
309 | int mds_slow_req_count; | |
310 | ||
311 | /** | |
312 | * Share MDSMap with clients | |
313 | */ | |
314 | void bcast_mds_map(); // to mounted clients | |
315 | epoch_t last_client_mdsmap_bcast; | |
316 | ||
317 | map<mds_rank_t,DecayCounter> export_targets; /* targets this MDS is exporting to or wants/tries to */ | |
318 | ||
319 | void create_logger(); | |
320 | public: | |
321 | ||
11fdf7f2 | 322 | void queue_waiter(MDSContext *c) { |
91327a77 AA |
323 | finished_queue.push_back(c); |
324 | progress_thread.signal(); | |
325 | } | |
494da23a TL |
326 | void queue_waiter_front(MDSContext *c) { |
327 | finished_queue.push_front(c); | |
328 | progress_thread.signal(); | |
329 | } | |
11fdf7f2 TL |
330 | void queue_waiters(MDSContext::vec& ls) { |
331 | MDSContext::vec v; | |
332 | v.swap(ls); | |
333 | std::copy(v.begin(), v.end(), std::back_inserter(finished_queue)); | |
7c673cae FG |
334 | progress_thread.signal(); |
335 | } | |
11fdf7f2 TL |
336 | void queue_waiters_front(MDSContext::vec& ls) { |
337 | MDSContext::vec v; | |
338 | v.swap(ls); | |
339 | std::copy(v.rbegin(), v.rend(), std::front_inserter(finished_queue)); | |
91327a77 AA |
340 | progress_thread.signal(); |
341 | } | |
7c673cae FG |
342 | |
343 | MDSRank( | |
344 | mds_rank_t whoami_, | |
345 | Mutex &mds_lock_, | |
346 | LogChannelRef &clog_, | |
347 | SafeTimer &timer_, | |
348 | Beacon &beacon_, | |
11fdf7f2 | 349 | std::unique_ptr<MDSMap> & mdsmap_, |
7c673cae FG |
350 | Messenger *msgr, |
351 | MonClient *monc_, | |
352 | Context *respawn_hook_, | |
353 | Context *suicide_hook_); | |
354 | ||
355 | protected: | |
356 | ~MDSRank(); | |
357 | ||
358 | public: | |
359 | ||
360 | // Daemon lifetime functions: these guys break the abstraction | |
361 | // and call up into the parent MDSDaemon instance. It's kind | |
362 | // of unavoidable: if we want any depth into our calls | |
363 | // to be able to e.g. tear down the whole process, we have to | |
364 | // have a reference going all the way down. | |
365 | // >>> | |
366 | void suicide(); | |
367 | void respawn(); | |
368 | // <<< | |
369 | ||
370 | /** | |
371 | * Call this periodically if inside a potentially long running piece | |
372 | * of code while holding the mds_lock | |
373 | */ | |
374 | void heartbeat_reset(); | |
375 | ||
376 | /** | |
377 | * Report state DAMAGED to the mon, and then pass on to respawn(). Call | |
378 | * this when an unrecoverable error is encountered while attempting | |
379 | * to load an MDS rank's data structures. This is *not* for use with | |
380 | * errors affecting normal dirfrag/inode objects -- they should be handled | |
381 | * through cleaner scrub/repair mechanisms. | |
382 | * | |
383 | * Callers must already hold mds_lock. | |
384 | */ | |
385 | void damaged(); | |
386 | ||
387 | /** | |
388 | * Wrapper around `damaged` for users who are not | |
389 | * already holding mds_lock. | |
390 | * | |
391 | * Callers must not already hold mds_lock. | |
392 | */ | |
393 | void damaged_unlocked(); | |
394 | ||
91327a77 AA |
395 | double last_cleared_laggy() const { |
396 | return beacon.last_cleared_laggy(); | |
397 | } | |
398 | ||
399 | double get_dispatch_queue_max_age(utime_t now) const; | |
7c673cae | 400 | |
11fdf7f2 TL |
401 | void send_message_mds(const Message::ref& m, mds_rank_t mds); |
402 | void forward_message_mds(const MClientRequest::const_ref& req, mds_rank_t mds); | |
403 | void send_message_client_counted(const Message::ref& m, client_t client); | |
404 | void send_message_client_counted(const Message::ref& m, Session* session); | |
405 | void send_message_client_counted(const Message::ref& m, const ConnectionRef& connection); | |
406 | void send_message_client(const Message::ref& m, Session* session); | |
407 | void send_message(const Message::ref& m, const ConnectionRef& c); | |
7c673cae | 408 | |
11fdf7f2 | 409 | void wait_for_active_peer(mds_rank_t who, MDSContext *c) { |
7c673cae FG |
410 | waiting_for_active_peer[who].push_back(c); |
411 | } | |
11fdf7f2 TL |
412 | void wait_for_cluster_recovered(MDSContext *c) { |
413 | ceph_assert(cluster_degraded); | |
7c673cae FG |
414 | waiting_for_active_peer[MDS_RANK_NONE].push_back(c); |
415 | } | |
416 | ||
11fdf7f2 | 417 | void wait_for_any_client_connection(MDSContext *c) { |
28e407b8 AA |
418 | waiting_for_any_client_connection.push_back(c); |
419 | } | |
420 | void kick_waiters_for_any_client_connection(void) { | |
421 | finish_contexts(g_ceph_context, waiting_for_any_client_connection); | |
422 | } | |
11fdf7f2 | 423 | void wait_for_active(MDSContext *c) { |
7c673cae FG |
424 | waiting_for_active.push_back(c); |
425 | } | |
11fdf7f2 | 426 | void wait_for_replay(MDSContext *c) { |
7c673cae FG |
427 | waiting_for_replay.push_back(c); |
428 | } | |
11fdf7f2 | 429 | void wait_for_rejoin(MDSContext *c) { |
a8e16298 TL |
430 | waiting_for_rejoin.push_back(c); |
431 | } | |
11fdf7f2 | 432 | void wait_for_reconnect(MDSContext *c) { |
7c673cae FG |
433 | waiting_for_reconnect.push_back(c); |
434 | } | |
11fdf7f2 | 435 | void wait_for_resolve(MDSContext *c) { |
7c673cae FG |
436 | waiting_for_resolve.push_back(c); |
437 | } | |
11fdf7f2 | 438 | void wait_for_mdsmap(epoch_t e, MDSContext *c) { |
7c673cae FG |
439 | waiting_for_mdsmap[e].push_back(c); |
440 | } | |
11fdf7f2 | 441 | void enqueue_replay(MDSContext *c) { |
7c673cae FG |
442 | replay_queue.push_back(c); |
443 | } | |
444 | ||
445 | bool queue_one_replay(); | |
11fdf7f2 | 446 | void maybe_clientreplay_done(); |
7c673cae FG |
447 | |
448 | void set_osd_epoch_barrier(epoch_t e); | |
449 | epoch_t get_osd_epoch_barrier() const {return osd_epoch_barrier;} | |
450 | epoch_t get_osd_epoch() const; | |
451 | ||
452 | ceph_tid_t issue_tid() { return ++last_tid; } | |
453 | ||
454 | Finisher *finisher; | |
455 | ||
11fdf7f2 | 456 | MDSMap *get_mds_map() { return mdsmap.get(); } |
7c673cae | 457 | |
28e407b8 | 458 | uint64_t get_num_requests() const { return logger->get(l_mds_request); } |
7c673cae FG |
459 | |
460 | int get_mds_slow_req_count() const { return mds_slow_req_count; } | |
461 | ||
462 | void dump_status(Formatter *f) const; | |
463 | ||
11fdf7f2 | 464 | void hit_export_target(mds_rank_t rank, double amount=-1.0); |
7c673cae FG |
465 | bool is_export_target(mds_rank_t rank) { |
466 | const set<mds_rank_t>& map_targets = mdsmap->get_mds_info(get_nodeid()).export_targets; | |
467 | return map_targets.count(rank); | |
468 | } | |
469 | ||
31f18b77 | 470 | bool evict_client(int64_t session_id, bool wait, bool blacklist, |
11fdf7f2 TL |
471 | std::ostream& ss, Context *on_killed=nullptr); |
472 | ||
473 | void mark_base_recursively_scrubbed(inodeno_t ino); | |
31f18b77 | 474 | |
7c673cae FG |
475 | protected: |
476 | void dump_clientreplay_status(Formatter *f) const; | |
11fdf7f2 TL |
477 | void command_scrub_start(Formatter *f, |
478 | std::string_view path, std::string_view tag, | |
479 | const vector<string>& scrubop_vec, Context *on_finish); | |
480 | void command_tag_path(Formatter *f, std::string_view path, | |
481 | std::string_view tag); | |
482 | // scrub control commands | |
483 | void command_scrub_abort(Formatter *f, Context *on_finish); | |
484 | void command_scrub_pause(Formatter *f, Context *on_finish); | |
485 | void command_scrub_resume(Formatter *f); | |
486 | void command_scrub_status(Formatter *f); | |
487 | ||
488 | void command_flush_path(Formatter *f, std::string_view path); | |
7c673cae FG |
489 | void command_flush_journal(Formatter *f); |
490 | void command_get_subtrees(Formatter *f); | |
491 | void command_export_dir(Formatter *f, | |
11fdf7f2 | 492 | std::string_view path, mds_rank_t dest); |
7c673cae FG |
493 | bool command_dirfrag_split( |
494 | cmdmap_t cmdmap, | |
495 | std::ostream &ss); | |
496 | bool command_dirfrag_merge( | |
497 | cmdmap_t cmdmap, | |
498 | std::ostream &ss); | |
499 | bool command_dirfrag_ls( | |
500 | cmdmap_t cmdmap, | |
501 | std::ostream &ss, | |
502 | Formatter *f); | |
11fdf7f2 | 503 | int _command_export_dir(std::string_view path, mds_rank_t dest); |
7c673cae FG |
504 | CDir *_command_dirfrag_get( |
505 | const cmdmap_t &cmdmap, | |
506 | std::ostream &ss); | |
11fdf7f2 TL |
507 | void command_openfiles_ls(Formatter *f); |
508 | void command_dump_tree(const cmdmap_t &cmdmap, std::ostream &ss, Formatter *f); | |
509 | void command_dump_inode(Formatter *f, const cmdmap_t &cmdmap, std::ostream &ss); | |
f64942e4 AA |
510 | void command_cache_drop(uint64_t timeout, Formatter *f, Context *on_finish); |
511 | ||
7c673cae FG |
512 | protected: |
513 | Messenger *messenger; | |
514 | MonClient *monc; | |
515 | ||
516 | Context *respawn_hook; | |
517 | Context *suicide_hook; | |
518 | ||
519 | // Friended to access retry_dispatch | |
520 | friend class C_MDS_RetryMessage; | |
521 | ||
522 | // FIXME the state machine logic should be separable from the dispatch | |
523 | // logic that calls it. | |
524 | // >>> | |
525 | void calc_recovery_set(); | |
526 | void request_state(MDSMap::DaemonState s); | |
527 | ||
528 | bool standby_replaying; // true if current replay pass is in standby-replay mode | |
529 | ||
530 | typedef enum { | |
531 | // The MDSMap is available, configure default layouts and structures | |
532 | MDS_BOOT_INITIAL = 0, | |
533 | // We are ready to open some inodes | |
534 | MDS_BOOT_OPEN_ROOT, | |
535 | // We are ready to do a replay if needed | |
536 | MDS_BOOT_PREPARE_LOG, | |
537 | // Replay is complete | |
538 | MDS_BOOT_REPLAY_DONE | |
539 | } BootStep; | |
540 | friend class C_MDS_BootStart; | |
541 | friend class C_MDS_InternalBootStart; | |
542 | void boot_create(); // i am new mds. | |
543 | void boot_start(BootStep step=MDS_BOOT_INITIAL, int r=0); // starting|replay | |
544 | ||
545 | void replay_start(); | |
546 | void creating_done(); | |
547 | void starting_done(); | |
548 | void replay_done(); | |
549 | void standby_replay_restart(); | |
550 | void _standby_replay_restart_finish(int r, uint64_t old_read_pos); | |
551 | class C_MDS_StandbyReplayRestart; | |
552 | class C_MDS_StandbyReplayRestartFinish; | |
553 | ||
554 | void reopen_log(); | |
555 | ||
556 | void resolve_start(); | |
557 | void resolve_done(); | |
558 | void reconnect_start(); | |
559 | void reconnect_done(); | |
560 | void rejoin_joint_start(); | |
561 | void rejoin_start(); | |
562 | void rejoin_done(); | |
563 | void recovery_done(int oldstate); | |
564 | void clientreplay_start(); | |
565 | void clientreplay_done(); | |
566 | void active_start(); | |
567 | void stopping_start(); | |
568 | void stopping_done(); | |
569 | ||
570 | void validate_sessions(); | |
571 | // <<< | |
572 | ||
573 | // >>> | |
574 | void handle_mds_recovery(mds_rank_t who); | |
575 | void handle_mds_failure(mds_rank_t who); | |
576 | // <<< | |
577 | ||
578 | /* Update MDSMap export_targets for this rank. Called on ::tick(). */ | |
11fdf7f2 | 579 | void update_targets(); |
94b18763 | 580 | |
11fdf7f2 TL |
581 | friend class C_MDS_MonCommand; |
582 | void _mon_command_finish(int r, std::string_view cmd, std::string_view outs); | |
583 | void set_mdsmap_multimds_snaps_allowed(); | |
94b18763 FG |
584 | private: |
585 | mono_time starttime = mono_clock::zero(); | |
11fdf7f2 TL |
586 | |
587 | protected: | |
588 | Context *create_async_exec_context(C_ExecAndReply *ctx); | |
7c673cae FG |
589 | }; |
590 | ||
591 | /* This expects to be given a reference which it is responsible for. | |
592 | * The finish function calls functions which | |
593 | * will put the Message exactly once.*/ | |
594 | class C_MDS_RetryMessage : public MDSInternalContext { | |
7c673cae | 595 | public: |
11fdf7f2 TL |
596 | C_MDS_RetryMessage(MDSRank *mds, const Message::const_ref &m) |
597 | : MDSInternalContext(mds), m(m) {} | |
7c673cae | 598 | void finish(int r) override { |
11fdf7f2 TL |
599 | get_mds()->retry_dispatch(m); |
600 | } | |
601 | protected: | |
602 | Message::const_ref m; | |
603 | }; | |
604 | ||
605 | class CF_MDS_RetryMessageFactory : public MDSContextFactory { | |
606 | public: | |
607 | CF_MDS_RetryMessageFactory(MDSRank *mds, const Message::const_ref &m) | |
608 | : mds(mds), m(m) {} | |
609 | ||
610 | MDSContext *build() { | |
611 | return new C_MDS_RetryMessage(mds, m); | |
7c673cae | 612 | } |
11fdf7f2 TL |
613 | |
614 | private: | |
615 | MDSRank *mds; | |
616 | Message::const_ref m; | |
7c673cae FG |
617 | }; |
618 | ||
619 | /** | |
620 | * The aspect of MDSRank exposed to MDSDaemon but not subsystems: i.e. | |
621 | * the service/dispatcher stuff like init/shutdown that subsystems should | |
622 | * never touch. | |
623 | */ | |
624 | class MDSRankDispatcher : public MDSRank | |
625 | { | |
626 | public: | |
627 | void init(); | |
628 | void tick(); | |
629 | void shutdown(); | |
11fdf7f2 | 630 | bool handle_asok_command(std::string_view command, const cmdmap_t& cmdmap, |
7c673cae | 631 | Formatter *f, std::ostream& ss); |
11fdf7f2 | 632 | void handle_mds_map(const MMDSMap::const_ref &m, const MDSMap &oldmap); |
7c673cae | 633 | void handle_osd_map(); |
7c673cae FG |
634 | void update_log_config(); |
635 | ||
636 | bool handle_command( | |
637 | const cmdmap_t &cmdmap, | |
11fdf7f2 | 638 | const MCommand::const_ref &m, |
7c673cae FG |
639 | int *r, |
640 | std::stringstream *ds, | |
641 | std::stringstream *ss, | |
f64942e4 | 642 | Context **run_later, |
7c673cae FG |
643 | bool *need_reply); |
644 | ||
645 | void dump_sessions(const SessionFilter &filter, Formatter *f) const; | |
11fdf7f2 | 646 | void evict_clients(const SessionFilter &filter, const MCommand::const_ref &m); |
7c673cae FG |
647 | |
648 | // Call into me from MDS::ms_dispatch | |
11fdf7f2 | 649 | bool ms_dispatch(const Message::const_ref &m); |
7c673cae FG |
650 | |
651 | MDSRankDispatcher( | |
652 | mds_rank_t whoami_, | |
653 | Mutex &mds_lock_, | |
654 | LogChannelRef &clog_, | |
655 | SafeTimer &timer_, | |
656 | Beacon &beacon_, | |
11fdf7f2 | 657 | std::unique_ptr<MDSMap> &mdsmap_, |
7c673cae FG |
658 | Messenger *msgr, |
659 | MonClient *monc_, | |
660 | Context *respawn_hook_, | |
661 | Context *suicide_hook_); | |
662 | }; | |
663 | ||
664 | // This utility for MDS and MDSRank dispatchers. | |
665 | #define ALLOW_MESSAGES_FROM(peers) \ | |
666 | do { \ | |
667 | if (m->get_connection() && (m->get_connection()->get_peer_type() & (peers)) == 0) { \ | |
668 | dout(0) << __FILE__ << "." << __LINE__ << ": filtered out request, peer=" << m->get_connection()->get_peer_type() \ | |
669 | << " allowing=" << #peers << " message=" << *m << dendl; \ | |
7c673cae FG |
670 | return true; \ |
671 | } \ | |
672 | } while (0) | |
673 | ||
674 | #endif // MDS_RANK_H_ | |
675 |