]> git.proxmox.com Git - ceph.git/blame - ceph/src/mds/MDSRank.h
import ceph quincy 17.2.4
[ceph.git] / ceph / src / mds / MDSRank.h
CommitLineData
7c673cae
FG
1// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2// vim: ts=8 sw=2 smarttab
3/*
4 * Ceph - scalable distributed file system
5 *
6 * Copyright (C) 2015 Red Hat
7 *
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
12 *
13 */
14
15#ifndef MDS_RANK_H_
16#define MDS_RANK_H_
17
11fdf7f2 18#include <string_view>
94b18763 19
f67539c2
TL
20#include <boost/asio/io_context.hpp>
21
7c673cae
FG
22#include "common/DecayCounter.h"
23#include "common/LogClient.h"
24#include "common/Timer.h"
a4b75251 25#include "common/fair_mutex.h"
7c673cae 26#include "common/TrackedOp.h"
a4b75251 27#include "common/ceph_mutex.h"
7c673cae 28
9f95a23c
TL
29#include "include/common_fwd.h"
30
11fdf7f2 31#include "messages/MClientRequest.h"
7c673cae 32#include "messages/MCommand.h"
11fdf7f2 33#include "messages/MMDSMap.h"
7c673cae
FG
34
35#include "Beacon.h"
36#include "DamageTable.h"
37#include "MDSMap.h"
38#include "SessionMap.h"
39#include "MDCache.h"
7c673cae 40#include "MDLog.h"
11fdf7f2 41#include "MDSContext.h"
7c673cae 42#include "PurgeQueue.h"
91327a77 43#include "Server.h"
f67539c2 44#include "MetricsHandler.h"
7c673cae
FG
45#include "osdc/Journaler.h"
46
47// Full .h import instead of forward declaration for PerfCounter, for the
48// benefit of those including this header and using MDSRank::logger
49#include "common/perf_counters.h"
50
51enum {
52 l_mds_first = 2000,
53 l_mds_request,
54 l_mds_reply,
55 l_mds_reply_latency,
33c7a0ef 56 l_mds_slow_reply,
7c673cae
FG
57 l_mds_forward,
58 l_mds_dir_fetch,
59 l_mds_dir_commit,
60 l_mds_dir_split,
61 l_mds_dir_merge,
7c673cae
FG
62 l_mds_inodes,
63 l_mds_inodes_top,
64 l_mds_inodes_bottom,
65 l_mds_inodes_pin_tail,
66 l_mds_inodes_pinned,
67 l_mds_inodes_expired,
68 l_mds_inodes_with_caps,
69 l_mds_caps,
70 l_mds_subtrees,
71 l_mds_traverse,
72 l_mds_traverse_hit,
73 l_mds_traverse_forward,
74 l_mds_traverse_discover,
75 l_mds_traverse_dir_fetch,
76 l_mds_traverse_remote_ino,
77 l_mds_traverse_lock,
78 l_mds_load_cent,
79 l_mds_dispatch_queue_len,
80 l_mds_exported,
81 l_mds_exported_inodes,
82 l_mds_imported,
83 l_mds_imported_inodes,
11fdf7f2
TL
84 l_mds_openino_dir_fetch,
85 l_mds_openino_backtrace_fetch,
86 l_mds_openino_peer_discover,
eafe8130
TL
87 l_mds_root_rfiles,
88 l_mds_root_rbytes,
89 l_mds_root_rsnaps,
f67539c2
TL
90 l_mds_scrub_backtrace_fetch,
91 l_mds_scrub_set_tag,
92 l_mds_scrub_backtrace_repaired,
93 l_mds_scrub_inotable_repaired,
94 l_mds_scrub_dir_inodes,
95 l_mds_scrub_dir_base_inodes,
96 l_mds_scrub_dirfrag_rstats,
97 l_mds_scrub_file_inodes,
98 l_mdss_handle_inode_file_caps,
99 l_mdss_ceph_cap_op_revoke,
100 l_mdss_ceph_cap_op_grant,
101 l_mdss_ceph_cap_op_trunc,
102 l_mdss_ceph_cap_op_flushsnap_ack,
103 l_mdss_ceph_cap_op_flush_ack,
104 l_mdss_handle_client_caps,
105 l_mdss_handle_client_caps_dirty,
106 l_mdss_handle_client_cap_release,
107 l_mdss_process_request_cap_release,
7c673cae
FG
108 l_mds_last,
109};
110
111// memory utilization
112enum {
113 l_mdm_first = 2500,
114 l_mdm_ino,
115 l_mdm_inoa,
116 l_mdm_inos,
117 l_mdm_dir,
118 l_mdm_dira,
119 l_mdm_dirs,
120 l_mdm_dn,
121 l_mdm_dna,
122 l_mdm_dns,
123 l_mdm_cap,
124 l_mdm_capa,
125 l_mdm_caps,
126 l_mdm_rss,
127 l_mdm_heap,
7c673cae
FG
128 l_mdm_last,
129};
130
131namespace ceph {
132 struct heartbeat_handle_d;
133}
134
7c673cae
FG
135class Locker;
136class MDCache;
137class MDLog;
138class MDBalancer;
139class InoTable;
140class SnapServer;
141class SnapClient;
142class MDSTableServer;
143class MDSTableClient;
144class Messenger;
f67539c2 145class MetricAggregator;
7c673cae
FG
146class Objecter;
147class MonClient;
9f95a23c 148class MgrClient;
7c673cae 149class Finisher;
7c673cae 150class ScrubStack;
11fdf7f2 151class C_ExecAndReply;
7c673cae
FG
152
153/**
154 * The public part of this class's interface is what's exposed to all
155 * the various subsystems (server, mdcache, etc), such as pointers
156 * to the other subsystems, and message-sending calls.
157 */
158class MDSRank {
7c673cae 159 public:
f64942e4
AA
160 friend class C_Flush_Journal;
161 friend class C_Drop_Cache;
11fdf7f2
TL
162 friend class C_CacheDropExecAndReply;
163 friend class C_ScrubExecAndReply;
164 friend class C_ScrubControlExecAndReply;
165
9f95a23c
TL
166 CephContext *cct;
167
168 MDSRank(
169 mds_rank_t whoami_,
a4b75251 170 ceph::fair_mutex &mds_lock_,
9f95a23c 171 LogChannelRef &clog_,
a4b75251 172 CommonSafeTimer<ceph::fair_mutex> &timer_,
9f95a23c
TL
173 Beacon &beacon_,
174 std::unique_ptr<MDSMap> & mdsmap_,
175 Messenger *msgr,
176 MonClient *monc_,
177 MgrClient *mgrc,
178 Context *respawn_hook_,
f67539c2
TL
179 Context *suicide_hook_,
180 boost::asio::io_context& ioc);
9f95a23c 181
7c673cae 182 mds_rank_t get_nodeid() const { return whoami; }
b3b6e05e
TL
183 int64_t get_metadata_pool() const
184 {
185 return metadata_pool;
186 }
7c673cae 187
94b18763
FG
188 mono_time get_starttime() const {
189 return starttime;
190 }
20effc67 191 std::chrono::duration<double> get_uptime() const {
94b18763 192 mono_time now = mono_clock::now();
20effc67 193 return std::chrono::duration<double>(now-starttime);
94b18763
FG
194 }
195
7c673cae
FG
196 bool is_daemon_stopping() const;
197
7c673cae
FG
198 MDSTableClient *get_table_client(int t);
199 MDSTableServer *get_table_server(int t);
200
7c673cae
FG
201 Session *get_session(client_t client) {
202 return sessionmap.get_session(entity_name_t::CLIENT(client.v));
203 }
9f95a23c 204 Session *get_session(const cref_t<Message> &m);
7c673cae
FG
205
206 MDSMap::DaemonState get_state() const { return state; }
207 MDSMap::DaemonState get_want_state() const { return beacon.get_want_state(); }
208
209 bool is_creating() const { return state == MDSMap::STATE_CREATING; }
210 bool is_starting() const { return state == MDSMap::STATE_STARTING; }
211 bool is_standby() const { return state == MDSMap::STATE_STANDBY; }
212 bool is_replay() const { return state == MDSMap::STATE_REPLAY; }
213 bool is_standby_replay() const { return state == MDSMap::STATE_STANDBY_REPLAY; }
214 bool is_resolve() const { return state == MDSMap::STATE_RESOLVE; }
215 bool is_reconnect() const { return state == MDSMap::STATE_RECONNECT; }
216 bool is_rejoin() const { return state == MDSMap::STATE_REJOIN; }
217 bool is_clientreplay() const { return state == MDSMap::STATE_CLIENTREPLAY; }
218 bool is_active() const { return state == MDSMap::STATE_ACTIVE; }
219 bool is_stopping() const { return state == MDSMap::STATE_STOPPING; }
220 bool is_any_replay() const { return (is_replay() || is_standby_replay()); }
221 bool is_stopped() const { return mdsmap->is_stopped(whoami); }
222 bool is_cluster_degraded() const { return cluster_degraded; }
11fdf7f2 223 bool allows_multimds_snaps() const { return mdsmap->allows_multimds_snaps(); }
7c673cae 224
eafe8130 225 bool is_cache_trimmable() const {
b3b6e05e 226 return is_standby_replay() || is_clientreplay() || is_active() || is_stopping();
eafe8130
TL
227 }
228
7c673cae 229 void handle_write_error(int err);
f67539c2 230 void handle_write_error_with_lock(int err);
7c673cae 231
c07f9fc5 232 void update_mlogger();
7c673cae 233
11fdf7f2 234 void queue_waiter(MDSContext *c) {
91327a77
AA
235 finished_queue.push_back(c);
236 progress_thread.signal();
237 }
494da23a
TL
238 void queue_waiter_front(MDSContext *c) {
239 finished_queue.push_front(c);
240 progress_thread.signal();
241 }
11fdf7f2
TL
242 void queue_waiters(MDSContext::vec& ls) {
243 MDSContext::vec v;
244 v.swap(ls);
245 std::copy(v.begin(), v.end(), std::back_inserter(finished_queue));
7c673cae
FG
246 progress_thread.signal();
247 }
11fdf7f2
TL
248 void queue_waiters_front(MDSContext::vec& ls) {
249 MDSContext::vec v;
250 v.swap(ls);
251 std::copy(v.rbegin(), v.rend(), std::front_inserter(finished_queue));
91327a77
AA
252 progress_thread.signal();
253 }
7c673cae 254
7c673cae
FG
255 // Daemon lifetime functions: these guys break the abstraction
256 // and call up into the parent MDSDaemon instance. It's kind
257 // of unavoidable: if we want any depth into our calls
258 // to be able to e.g. tear down the whole process, we have to
259 // have a reference going all the way down.
260 // >>>
261 void suicide();
262 void respawn();
263 // <<<
264
265 /**
266 * Call this periodically if inside a potentially long running piece
267 * of code while holding the mds_lock
268 */
269 void heartbeat_reset();
33c7a0ef
TL
270 int heartbeat_reset_grace(int count=1) {
271 return count * _heartbeat_reset_grace;
272 }
7c673cae
FG
273
274 /**
275 * Report state DAMAGED to the mon, and then pass on to respawn(). Call
276 * this when an unrecoverable error is encountered while attempting
277 * to load an MDS rank's data structures. This is *not* for use with
278 * errors affecting normal dirfrag/inode objects -- they should be handled
279 * through cleaner scrub/repair mechanisms.
280 *
281 * Callers must already hold mds_lock.
282 */
283 void damaged();
284
285 /**
286 * Wrapper around `damaged` for users who are not
287 * already holding mds_lock.
288 *
289 * Callers must not already hold mds_lock.
290 */
291 void damaged_unlocked();
292
91327a77
AA
293 double last_cleared_laggy() const {
294 return beacon.last_cleared_laggy();
295 }
296
297 double get_dispatch_queue_max_age(utime_t now) const;
7c673cae 298
9f95a23c 299 void send_message_mds(const ref_t<Message>& m, mds_rank_t mds);
f67539c2 300 void send_message_mds(const ref_t<Message>& m, const entity_addrvec_t &addr);
9f95a23c
TL
301 void forward_message_mds(const cref_t<MClientRequest>& req, mds_rank_t mds);
302 void send_message_client_counted(const ref_t<Message>& m, client_t client);
303 void send_message_client_counted(const ref_t<Message>& m, Session* session);
304 void send_message_client_counted(const ref_t<Message>& m, const ConnectionRef& connection);
305 void send_message_client(const ref_t<Message>& m, Session* session);
306 void send_message(const ref_t<Message>& m, const ConnectionRef& c);
7c673cae 307
20effc67
TL
308 void wait_for_bootstrapped_peer(mds_rank_t who, MDSContext *c) {
309 waiting_for_bootstrapping_peer[who].push_back(c);
310 }
11fdf7f2 311 void wait_for_active_peer(mds_rank_t who, MDSContext *c) {
7c673cae
FG
312 waiting_for_active_peer[who].push_back(c);
313 }
11fdf7f2
TL
314 void wait_for_cluster_recovered(MDSContext *c) {
315 ceph_assert(cluster_degraded);
7c673cae
FG
316 waiting_for_active_peer[MDS_RANK_NONE].push_back(c);
317 }
318
11fdf7f2 319 void wait_for_any_client_connection(MDSContext *c) {
28e407b8
AA
320 waiting_for_any_client_connection.push_back(c);
321 }
322 void kick_waiters_for_any_client_connection(void) {
323 finish_contexts(g_ceph_context, waiting_for_any_client_connection);
324 }
11fdf7f2 325 void wait_for_active(MDSContext *c) {
7c673cae
FG
326 waiting_for_active.push_back(c);
327 }
11fdf7f2 328 void wait_for_replay(MDSContext *c) {
7c673cae
FG
329 waiting_for_replay.push_back(c);
330 }
11fdf7f2 331 void wait_for_rejoin(MDSContext *c) {
a8e16298
TL
332 waiting_for_rejoin.push_back(c);
333 }
11fdf7f2 334 void wait_for_reconnect(MDSContext *c) {
7c673cae
FG
335 waiting_for_reconnect.push_back(c);
336 }
11fdf7f2 337 void wait_for_resolve(MDSContext *c) {
7c673cae
FG
338 waiting_for_resolve.push_back(c);
339 }
11fdf7f2 340 void wait_for_mdsmap(epoch_t e, MDSContext *c) {
7c673cae
FG
341 waiting_for_mdsmap[e].push_back(c);
342 }
11fdf7f2 343 void enqueue_replay(MDSContext *c) {
7c673cae
FG
344 replay_queue.push_back(c);
345 }
346
347 bool queue_one_replay();
11fdf7f2 348 void maybe_clientreplay_done();
7c673cae
FG
349
350 void set_osd_epoch_barrier(epoch_t e);
351 epoch_t get_osd_epoch_barrier() const {return osd_epoch_barrier;}
352 epoch_t get_osd_epoch() const;
353
354 ceph_tid_t issue_tid() { return ++last_tid; }
355
11fdf7f2 356 MDSMap *get_mds_map() { return mdsmap.get(); }
7c673cae 357
28e407b8 358 uint64_t get_num_requests() const { return logger->get(l_mds_request); }
7c673cae
FG
359
360 int get_mds_slow_req_count() const { return mds_slow_req_count; }
361
362 void dump_status(Formatter *f) const;
363
11fdf7f2 364 void hit_export_target(mds_rank_t rank, double amount=-1.0);
7c673cae 365 bool is_export_target(mds_rank_t rank) {
20effc67 366 const std::set<mds_rank_t>& map_targets = mdsmap->get_mds_info(get_nodeid()).export_targets;
7c673cae
FG
367 return map_targets.count(rank);
368 }
369
f67539c2 370 bool evict_client(int64_t session_id, bool wait, bool blocklist,
11fdf7f2 371 std::ostream& ss, Context *on_killed=nullptr);
92f5a8d4
TL
372 int config_client(int64_t session_id, bool remove,
373 const std::string& option, const std::string& value,
374 std::ostream& ss);
11fdf7f2 375
9f95a23c
TL
376 // Reference to global MDS::mds_lock, so that users of MDSRank don't
377 // carry around references to the outer MDS, and we can substitute
378 // a separate lock here in future potentially.
a4b75251 379 ceph::fair_mutex &mds_lock;
9f95a23c
TL
380
381 // Reference to global cluster log client, just to avoid initialising
382 // a separate one here.
383 LogChannelRef &clog;
384
385 // Reference to global timer utility, because MDSRank and MDSDaemon
386 // currently both use the same mds_lock, so it makes sense for them
387 // to share a timer.
a4b75251 388 CommonSafeTimer<ceph::fair_mutex> &timer;
9f95a23c
TL
389
390 std::unique_ptr<MDSMap> &mdsmap; /* MDSDaemon::mdsmap */
391
392 Objecter *objecter;
393
394 // sub systems
395 Server *server = nullptr;
396 MDCache *mdcache = nullptr;
397 Locker *locker = nullptr;
398 MDLog *mdlog = nullptr;
399 MDBalancer *balancer = nullptr;
400 ScrubStack *scrubstack = nullptr;
401 DamageTable damage_table;
402
403 InoTable *inotable = nullptr;
404
405 SnapServer *snapserver = nullptr;
406 SnapClient *snapclient = nullptr;
407
408 SessionMap sessionmap;
409
410 PerfCounters *logger = nullptr, *mlogger = nullptr;
411 OpTracker op_tracker;
412
413 // The last different state I held before current
414 MDSMap::DaemonState last_state = MDSMap::STATE_BOOT;
415 // The state assigned to me by the MDSMap
2a845540 416 MDSMap::DaemonState state = MDSMap::STATE_STANDBY;
9f95a23c
TL
417
418 bool cluster_degraded = false;
419
420 Finisher *finisher;
7c673cae 421 protected:
9f95a23c
TL
422 typedef enum {
423 // The MDSMap is available, configure default layouts and structures
424 MDS_BOOT_INITIAL = 0,
425 // We are ready to open some inodes
426 MDS_BOOT_OPEN_ROOT,
427 // We are ready to do a replay if needed
428 MDS_BOOT_PREPARE_LOG,
429 // Replay is complete
430 MDS_BOOT_REPLAY_DONE
431 } BootStep;
432
433 class ProgressThread : public Thread {
434 public:
435 explicit ProgressThread(MDSRank *mds_) : mds(mds_) {}
436 void * entry() override;
437 void shutdown();
438 void signal() {cond.notify_all();}
439 private:
440 MDSRank *mds;
a4b75251 441 std::condition_variable_any cond;
9f95a23c
TL
442 } progress_thread;
443
444 class C_MDS_StandbyReplayRestart;
445 class C_MDS_StandbyReplayRestartFinish;
446 // Friended to access retry_dispatch
447 friend class C_MDS_RetryMessage;
448 friend class C_MDS_BootStart;
449 friend class C_MDS_InternalBootStart;
450 friend class C_MDS_MonCommand;
451
452 const mds_rank_t whoami;
453
454 ~MDSRank();
455
456 void inc_dispatch_depth() { ++dispatch_depth; }
457 void dec_dispatch_depth() { --dispatch_depth; }
458 void retry_dispatch(const cref_t<Message> &m);
f6b5b4d7
TL
459 bool is_valid_message(const cref_t<Message> &m);
460 void handle_message(const cref_t<Message> &m);
9f95a23c
TL
461 void _advance_queues();
462 bool _dispatch(const cref_t<Message> &m, bool new_msg);
463 bool is_stale_message(const cref_t<Message> &m) const;
464
465 /**
466 * Emit clog warnings for any ops reported as warnings by optracker
467 */
468 void check_ops_in_flight();
469
470 /**
471 * Share MDSMap with clients
472 */
9f95a23c
TL
473 void create_logger();
474
7c673cae 475 void dump_clientreplay_status(Formatter *f) const;
11fdf7f2
TL
476 void command_scrub_start(Formatter *f,
477 std::string_view path, std::string_view tag,
20effc67 478 const std::vector<std::string>& scrubop_vec, Context *on_finish);
11fdf7f2
TL
479 void command_tag_path(Formatter *f, std::string_view path,
480 std::string_view tag);
481 // scrub control commands
482 void command_scrub_abort(Formatter *f, Context *on_finish);
483 void command_scrub_pause(Formatter *f, Context *on_finish);
484 void command_scrub_resume(Formatter *f);
485 void command_scrub_status(Formatter *f);
486
487 void command_flush_path(Formatter *f, std::string_view path);
7c673cae
FG
488 void command_flush_journal(Formatter *f);
489 void command_get_subtrees(Formatter *f);
490 void command_export_dir(Formatter *f,
11fdf7f2 491 std::string_view path, mds_rank_t dest);
7c673cae
FG
492 bool command_dirfrag_split(
493 cmdmap_t cmdmap,
494 std::ostream &ss);
495 bool command_dirfrag_merge(
496 cmdmap_t cmdmap,
497 std::ostream &ss);
498 bool command_dirfrag_ls(
499 cmdmap_t cmdmap,
500 std::ostream &ss,
501 Formatter *f);
11fdf7f2 502 int _command_export_dir(std::string_view path, mds_rank_t dest);
7c673cae
FG
503 CDir *_command_dirfrag_get(
504 const cmdmap_t &cmdmap,
505 std::ostream &ss);
11fdf7f2
TL
506 void command_openfiles_ls(Formatter *f);
507 void command_dump_tree(const cmdmap_t &cmdmap, std::ostream &ss, Formatter *f);
508 void command_dump_inode(Formatter *f, const cmdmap_t &cmdmap, std::ostream &ss);
f64942e4
AA
509 void command_cache_drop(uint64_t timeout, Formatter *f, Context *on_finish);
510
7c673cae
FG
511 // FIXME the state machine logic should be separable from the dispatch
512 // logic that calls it.
513 // >>>
514 void calc_recovery_set();
515 void request_state(MDSMap::DaemonState s);
516
7c673cae
FG
517 void boot_create(); // i am new mds.
518 void boot_start(BootStep step=MDS_BOOT_INITIAL, int r=0); // starting|replay
519
520 void replay_start();
521 void creating_done();
522 void starting_done();
523 void replay_done();
524 void standby_replay_restart();
525 void _standby_replay_restart_finish(int r, uint64_t old_read_pos);
7c673cae
FG
526
527 void reopen_log();
528
529 void resolve_start();
530 void resolve_done();
531 void reconnect_start();
532 void reconnect_done();
533 void rejoin_joint_start();
534 void rejoin_start();
535 void rejoin_done();
536 void recovery_done(int oldstate);
537 void clientreplay_start();
538 void clientreplay_done();
539 void active_start();
540 void stopping_start();
541 void stopping_done();
542
543 void validate_sessions();
9f95a23c 544
7c673cae
FG
545 void handle_mds_recovery(mds_rank_t who);
546 void handle_mds_failure(mds_rank_t who);
7c673cae
FG
547
548 /* Update MDSMap export_targets for this rank. Called on ::tick(). */
11fdf7f2 549 void update_targets();
94b18763 550
11fdf7f2
TL
551 void _mon_command_finish(int r, std::string_view cmd, std::string_view outs);
552 void set_mdsmap_multimds_snaps_allowed();
9f95a23c
TL
553
554 Context *create_async_exec_context(C_ExecAndReply *ctx);
555
a4b75251
TL
556 // blocklist the provided addrs and set OSD epoch barrier
557 // with the provided epoch.
558 void apply_blocklist(const std::set<entity_addr_t> &addrs, epoch_t epoch);
559
9f95a23c
TL
560 // Incarnation as seen in MDSMap at the point where a rank is
561 // assigned.
562 int incarnation = 0;
563
564 // Flag to indicate we entered shutdown: anyone seeing this to be true
565 // after taking mds_lock must drop out.
566 bool stopping = false;
567
568 // PurgeQueue is only used by StrayManager, but it is owned by MDSRank
569 // because its init/shutdown happens at the top level.
570 PurgeQueue purge_queue;
571
f67539c2
TL
572 MetricsHandler metrics_handler;
573 std::unique_ptr<MetricAggregator> metric_aggregator;
574
20effc67 575 std::list<cref_t<Message>> waiting_for_nolaggy;
9f95a23c
TL
576 MDSContext::que finished_queue;
577 // Dispatch, retry, queues
578 int dispatch_depth = 0;
579
580 ceph::heartbeat_handle_d *hb = nullptr; // Heartbeat for threads using mds_lock
f67539c2 581 double heartbeat_grace;
33c7a0ef 582 int _heartbeat_reset_grace;
9f95a23c 583
20effc67 584 std::map<mds_rank_t, version_t> peer_mdsmap_epoch;
9f95a23c
TL
585
586 ceph_tid_t last_tid = 0; // for mds-initiated requests (e.g. stray rename)
587
588 MDSContext::vec waiting_for_active, waiting_for_replay, waiting_for_rejoin,
589 waiting_for_reconnect, waiting_for_resolve;
590 MDSContext::vec waiting_for_any_client_connection;
591 MDSContext::que replay_queue;
592 bool replaying_requests_done = false;
593
20effc67
TL
594 std::map<mds_rank_t, MDSContext::vec> waiting_for_active_peer;
595 std::map<mds_rank_t, MDSContext::vec> waiting_for_bootstrapping_peer;
596 std::map<epoch_t, MDSContext::vec> waiting_for_mdsmap;
9f95a23c
TL
597
598 epoch_t osd_epoch_barrier = 0;
599
600 // Const reference to the beacon so that we can behave differently
601 // when it's laggy.
602 Beacon &beacon;
603
604 int mds_slow_req_count = 0;
605
20effc67 606 std::map<mds_rank_t,DecayCounter> export_targets; /* targets this MDS is exporting to or wants/tries to */
9f95a23c
TL
607
608 Messenger *messenger;
609 MonClient *monc;
610 MgrClient *mgrc;
611
612 Context *respawn_hook;
613 Context *suicide_hook;
614
615 bool standby_replaying = false; // true if current replay pass is in standby-replay mode
94b18763 616private:
f91f0fd5
TL
617 bool send_status = true;
618
b3b6e05e
TL
619 // The metadata pool won't change in the whole life time of the fs,
620 // with this we can get rid of the mds_lock in many places too.
621 int64_t metadata_pool = -1;
622
9f95a23c
TL
623 // "task" string that gets displayed in ceph status
624 inline static const std::string SCRUB_STATUS_KEY = "scrub status";
11fdf7f2 625
9f95a23c
TL
626 void get_task_status(std::map<std::string, std::string> *status);
627 void schedule_update_timer_task();
628 void send_task_status();
629
f67539c2
TL
630 bool is_rank0() const {
631 return whoami == (mds_rank_t)0;
632 }
633
9f95a23c 634 mono_time starttime = mono_clock::zero();
f67539c2 635 boost::asio::io_context& ioc;
7c673cae
FG
636};
637
638/* This expects to be given a reference which it is responsible for.
639 * The finish function calls functions which
640 * will put the Message exactly once.*/
641class C_MDS_RetryMessage : public MDSInternalContext {
7c673cae 642public:
9f95a23c 643 C_MDS_RetryMessage(MDSRank *mds, const cref_t<Message> &m)
11fdf7f2 644 : MDSInternalContext(mds), m(m) {}
7c673cae 645 void finish(int r) override {
11fdf7f2
TL
646 get_mds()->retry_dispatch(m);
647 }
648protected:
9f95a23c 649 cref_t<Message> m;
11fdf7f2
TL
650};
651
652class CF_MDS_RetryMessageFactory : public MDSContextFactory {
653public:
9f95a23c 654 CF_MDS_RetryMessageFactory(MDSRank *mds, const cref_t<Message> &m)
11fdf7f2
TL
655 : mds(mds), m(m) {}
656
657 MDSContext *build() {
658 return new C_MDS_RetryMessage(mds, m);
7c673cae 659 }
11fdf7f2
TL
660private:
661 MDSRank *mds;
9f95a23c 662 cref_t<Message> m;
7c673cae
FG
663};
664
665/**
666 * The aspect of MDSRank exposed to MDSDaemon but not subsystems: i.e.
667 * the service/dispatcher stuff like init/shutdown that subsystems should
668 * never touch.
669 */
92f5a8d4 670class MDSRankDispatcher : public MDSRank, public md_config_obs_t
7c673cae
FG
671{
672public:
9f95a23c
TL
673 MDSRankDispatcher(
674 mds_rank_t whoami_,
a4b75251 675 ceph::fair_mutex &mds_lock_,
9f95a23c 676 LogChannelRef &clog_,
a4b75251 677 CommonSafeTimer<ceph::fair_mutex> &timer_,
9f95a23c
TL
678 Beacon &beacon_,
679 std::unique_ptr<MDSMap> &mdsmap_,
680 Messenger *msgr,
681 MonClient *monc_,
682 MgrClient *mgrc,
683 Context *respawn_hook_,
f67539c2
TL
684 Context *suicide_hook_,
685 boost::asio::io_context& ioc);
9f95a23c 686
7c673cae
FG
687 void init();
688 void tick();
689 void shutdown();
9f95a23c
TL
690 void handle_asok_command(
691 std::string_view command,
692 const cmdmap_t& cmdmap,
693 Formatter *f,
694 const bufferlist &inbl,
695 std::function<void(int,const std::string&,bufferlist&)> on_finish);
696 void handle_mds_map(const cref_t<MMDSMap> &m, const MDSMap &oldmap);
7c673cae 697 void handle_osd_map();
7c673cae
FG
698 void update_log_config();
699
92f5a8d4
TL
700 const char** get_tracked_conf_keys() const override final;
701 void handle_conf_change(const ConfigProxy& conf, const std::set<std::string>& changed) override;
702
adb31ebb 703 void dump_sessions(const SessionFilter &filter, Formatter *f, bool cap_dump=false) const;
9f95a23c
TL
704 void evict_clients(const SessionFilter &filter,
705 std::function<void(int,const std::string&,bufferlist&)> on_finish);
7c673cae
FG
706
707 // Call into me from MDS::ms_dispatch
9f95a23c 708 bool ms_dispatch(const cref_t<Message> &m);
7c673cae
FG
709};
710
7c673cae 711#endif // MDS_RANK_H_